napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import pathlib
6
+ import re
7
+ from pydantic import BaseModel
8
+ from typing import Optional
9
+
10
+ from napistu import utils
11
+ from napistu.gcs.constants import GCS_ASSETS
12
+ from napistu.gcs.constants import INIT_DATA_DIR_MSG
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def load_public_cpr_asset(
18
+ asset: str,
19
+ data_dir: str,
20
+ subasset: str | None = None,
21
+ init_msg: str = INIT_DATA_DIR_MSG,
22
+ ) -> str:
23
+ """
24
+ Load Public CPR Asset
25
+
26
+ Download the `asset` asset to `data_dir` if it doesn't
27
+ already exist and return a path
28
+
29
+ asset: the file to download (which will be unpacked if its a .tar.gz)
30
+ subasset: the name of a subasset to load from within the asset bundle
31
+ data_dir: the local directory where assets should be stored
32
+ init_msg: message to display if data_dir does not exist
33
+
34
+ returns:
35
+ asset_path: the path to a local file
36
+ """
37
+
38
+ # validate data_directory
39
+ _initialize_data_dir(data_dir, init_msg)
40
+ _validate_gcs_asset(asset)
41
+ _validate_gcs_subasset(asset, subasset)
42
+
43
+ # get the path for the asset (which may have been downloaded in a tar-ball)
44
+ asset_path = os.path.join(data_dir, _get_gcs_asset_path(asset, subasset))
45
+ if os.path.isfile(asset_path):
46
+ return asset_path
47
+
48
+ download_path = os.path.join(
49
+ data_dir, os.path.basename(GCS_ASSETS.ASSETS[asset]["file"])
50
+ )
51
+ if not os.path.isfile(download_path):
52
+ download_public_cpr_asset(asset, download_path)
53
+
54
+ # gunzip if needed
55
+ extn = utils.get_extn_from_url(download_path)
56
+ if (
57
+ re.search(".tar\\.gz$", extn)
58
+ or re.search("\\.tgz$", extn)
59
+ or re.search("\\.zip$", extn)
60
+ or re.search("\\.gz$", extn)
61
+ ):
62
+ utils.extract(download_path)
63
+
64
+ # check that the asset_path exists
65
+ if not os.path.isfile(asset_path):
66
+ raise FileNotFoundError(
67
+ f"Something went wrong and {asset_path} was not created."
68
+ )
69
+
70
+ return asset_path
71
+
72
+
73
+ def download_public_cpr_asset(asset: str, out_path: str) -> None:
74
+ """
75
+ Download Public CPR Asset
76
+
77
+ Args:
78
+ asset (str): The name of a CPR public asset stored in Google Cloud Storage (GCS)
79
+ out_path (list): Local location where the file should be saved.
80
+
81
+ Returns:
82
+ None
83
+ """
84
+
85
+ _validate_gcs_asset(asset)
86
+ selected_file = GCS_ASSETS.ASSETS[asset]["public_url"]
87
+
88
+ logger.info(f"Downloading {os.path.basename(selected_file)} to {out_path}")
89
+
90
+ utils.download_wget(selected_file, out_path)
91
+
92
+ return None
93
+
94
+
95
+ def _initialize_data_dir(data_dir: str, init_msg: str = INIT_DATA_DIR_MSG) -> None:
96
+ """Create a data directory if it doesn't exist."""
97
+
98
+ if not os.path.isdir(data_dir):
99
+
100
+ logger.warning(INIT_DATA_DIR_MSG.format(data_dir=data_dir))
101
+
102
+ # Artifact directory not found; creating {parentdir}")
103
+ logger.warning(f"Trying to create {data_dir}")
104
+ pathlib.Path(data_dir).mkdir(parents=True, exist_ok=True)
105
+
106
+ return None
107
+
108
+
109
+ def _validate_gcs_asset(asset: str) -> None:
110
+ """Validate a GCS asset by name."""
111
+
112
+ assets = _CprAssetsValidator(assets=GCS_ASSETS.ASSETS).assets
113
+ valid_gcs_assets = assets.keys()
114
+ if asset not in valid_gcs_assets:
115
+ raise ValueError(
116
+ f"asset was {asset} and must be one of the keys in GCS_ASSETS.ASSETS: {', '.join(valid_gcs_assets)}"
117
+ )
118
+
119
+ return None
120
+
121
+
122
+ def _validate_gcs_subasset(asset: str, subasset: str) -> None:
123
+ """Validate a subasset as belonging to a given asset."""
124
+
125
+ if GCS_ASSETS.ASSETS[asset]["subassets"] is None:
126
+ if subasset is not None:
127
+ logger.warning(
128
+ f"subasset was not None but asset {asset} does not have subassets. Ignoring subasset."
129
+ )
130
+
131
+ return None
132
+
133
+ valid_subassets = GCS_ASSETS.ASSETS[asset]["subassets"]
134
+
135
+ if subasset is None:
136
+ raise ValueError(
137
+ f"subasset was None and must be one of {', '.join(valid_subassets)}"
138
+ )
139
+
140
+ if subasset not in valid_subassets:
141
+ raise ValueError(
142
+ f"subasset, {subasset}, was not found in asset {asset}. Valid subassets are {', '.join(valid_subassets)}"
143
+ )
144
+
145
+ return None
146
+
147
+
148
+ def _get_gcs_asset_path(asset: str, subasset: Optional[str] = None) -> str:
149
+ """
150
+ Get the GCS path for a given asset and subasset.
151
+
152
+ Parameters
153
+ ----------
154
+ asset : str
155
+ The name of the asset.
156
+ subasset : Optional[str]
157
+ The name of the subasset.
158
+
159
+ Returns
160
+ -------
161
+ str
162
+ The GCS path for the asset or subasset.
163
+ """
164
+ asset_dict = GCS_ASSETS.ASSETS[asset]
165
+ if asset_dict["subassets"] is None:
166
+ out_file = asset_dict["file"]
167
+ else:
168
+ extract_dir = asset_dict["file"].split(".")[0]
169
+ out_file = os.path.join(extract_dir, asset_dict["subassets"][subasset])
170
+ return out_file
171
+
172
+
173
+ class _CprAssetValidator(BaseModel):
174
+ file: str
175
+ subassets: dict[str, str] | None
176
+ public_url: str
177
+
178
+
179
+ class _CprAssetsValidator(BaseModel):
180
+ assets: dict[str, _CprAssetValidator]