napistu 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +12 -0
- napistu/__main__.py +867 -0
- napistu/consensus.py +1557 -0
- napistu/constants.py +500 -0
- napistu/gcs/__init__.py +10 -0
- napistu/gcs/constants.py +69 -0
- napistu/gcs/downloads.py +180 -0
- napistu/identifiers.py +805 -0
- napistu/indices.py +227 -0
- napistu/ingestion/__init__.py +10 -0
- napistu/ingestion/bigg.py +146 -0
- napistu/ingestion/constants.py +296 -0
- napistu/ingestion/cpr_edgelist.py +106 -0
- napistu/ingestion/identifiers_etl.py +148 -0
- napistu/ingestion/obo.py +268 -0
- napistu/ingestion/psi_mi.py +276 -0
- napistu/ingestion/reactome.py +218 -0
- napistu/ingestion/sbml.py +621 -0
- napistu/ingestion/string.py +356 -0
- napistu/ingestion/trrust.py +285 -0
- napistu/ingestion/yeast.py +147 -0
- napistu/mechanism_matching.py +597 -0
- napistu/modify/__init__.py +10 -0
- napistu/modify/constants.py +86 -0
- napistu/modify/curation.py +628 -0
- napistu/modify/gaps.py +635 -0
- napistu/modify/pathwayannot.py +1381 -0
- napistu/modify/uncompartmentalize.py +264 -0
- napistu/network/__init__.py +10 -0
- napistu/network/constants.py +117 -0
- napistu/network/neighborhoods.py +1594 -0
- napistu/network/net_create.py +1647 -0
- napistu/network/net_utils.py +652 -0
- napistu/network/paths.py +500 -0
- napistu/network/precompute.py +221 -0
- napistu/rpy2/__init__.py +127 -0
- napistu/rpy2/callr.py +168 -0
- napistu/rpy2/constants.py +101 -0
- napistu/rpy2/netcontextr.py +464 -0
- napistu/rpy2/rids.py +697 -0
- napistu/sbml_dfs_core.py +2216 -0
- napistu/sbml_dfs_utils.py +304 -0
- napistu/source.py +394 -0
- napistu/utils.py +943 -0
- napistu-0.1.0.dist-info/METADATA +56 -0
- napistu-0.1.0.dist-info/RECORD +77 -0
- napistu-0.1.0.dist-info/WHEEL +5 -0
- napistu-0.1.0.dist-info/entry_points.txt +2 -0
- napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
- napistu-0.1.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/conftest.py +83 -0
- tests/test_consensus.py +255 -0
- tests/test_constants.py +20 -0
- tests/test_curation.py +134 -0
- tests/test_data/__init__.py +0 -0
- tests/test_edgelist.py +20 -0
- tests/test_gcs.py +23 -0
- tests/test_identifiers.py +151 -0
- tests/test_igraph.py +353 -0
- tests/test_indices.py +88 -0
- tests/test_mechanism_matching.py +126 -0
- tests/test_net_utils.py +66 -0
- tests/test_netcontextr.py +105 -0
- tests/test_obo.py +34 -0
- tests/test_pathwayannot.py +95 -0
- tests/test_precomputed_distances.py +222 -0
- tests/test_rpy2.py +61 -0
- tests/test_sbml.py +46 -0
- tests/test_sbml_dfs_create.py +307 -0
- tests/test_sbml_dfs_utils.py +22 -0
- tests/test_sbo.py +11 -0
- tests/test_set_coverage.py +50 -0
- tests/test_source.py +67 -0
- tests/test_uncompartmentalize.py +40 -0
- tests/test_utils.py +487 -0
- tests/utils.py +30 -0
napistu/gcs/downloads.py
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import pathlib
|
6
|
+
import re
|
7
|
+
from pydantic import BaseModel
|
8
|
+
from typing import Optional
|
9
|
+
|
10
|
+
from napistu import utils
|
11
|
+
from napistu.gcs.constants import GCS_ASSETS
|
12
|
+
from napistu.gcs.constants import INIT_DATA_DIR_MSG
|
13
|
+
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
def load_public_cpr_asset(
|
18
|
+
asset: str,
|
19
|
+
data_dir: str,
|
20
|
+
subasset: str | None = None,
|
21
|
+
init_msg: str = INIT_DATA_DIR_MSG,
|
22
|
+
) -> str:
|
23
|
+
"""
|
24
|
+
Load Public CPR Asset
|
25
|
+
|
26
|
+
Download the `asset` asset to `data_dir` if it doesn't
|
27
|
+
already exist and return a path
|
28
|
+
|
29
|
+
asset: the file to download (which will be unpacked if its a .tar.gz)
|
30
|
+
subasset: the name of a subasset to load from within the asset bundle
|
31
|
+
data_dir: the local directory where assets should be stored
|
32
|
+
init_msg: message to display if data_dir does not exist
|
33
|
+
|
34
|
+
returns:
|
35
|
+
asset_path: the path to a local file
|
36
|
+
"""
|
37
|
+
|
38
|
+
# validate data_directory
|
39
|
+
_initialize_data_dir(data_dir, init_msg)
|
40
|
+
_validate_gcs_asset(asset)
|
41
|
+
_validate_gcs_subasset(asset, subasset)
|
42
|
+
|
43
|
+
# get the path for the asset (which may have been downloaded in a tar-ball)
|
44
|
+
asset_path = os.path.join(data_dir, _get_gcs_asset_path(asset, subasset))
|
45
|
+
if os.path.isfile(asset_path):
|
46
|
+
return asset_path
|
47
|
+
|
48
|
+
download_path = os.path.join(
|
49
|
+
data_dir, os.path.basename(GCS_ASSETS.ASSETS[asset]["file"])
|
50
|
+
)
|
51
|
+
if not os.path.isfile(download_path):
|
52
|
+
download_public_cpr_asset(asset, download_path)
|
53
|
+
|
54
|
+
# gunzip if needed
|
55
|
+
extn = utils.get_extn_from_url(download_path)
|
56
|
+
if (
|
57
|
+
re.search(".tar\\.gz$", extn)
|
58
|
+
or re.search("\\.tgz$", extn)
|
59
|
+
or re.search("\\.zip$", extn)
|
60
|
+
or re.search("\\.gz$", extn)
|
61
|
+
):
|
62
|
+
utils.extract(download_path)
|
63
|
+
|
64
|
+
# check that the asset_path exists
|
65
|
+
if not os.path.isfile(asset_path):
|
66
|
+
raise FileNotFoundError(
|
67
|
+
f"Something went wrong and {asset_path} was not created."
|
68
|
+
)
|
69
|
+
|
70
|
+
return asset_path
|
71
|
+
|
72
|
+
|
73
|
+
def download_public_cpr_asset(asset: str, out_path: str) -> None:
|
74
|
+
"""
|
75
|
+
Download Public CPR Asset
|
76
|
+
|
77
|
+
Args:
|
78
|
+
asset (str): The name of a CPR public asset stored in Google Cloud Storage (GCS)
|
79
|
+
out_path (list): Local location where the file should be saved.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
None
|
83
|
+
"""
|
84
|
+
|
85
|
+
_validate_gcs_asset(asset)
|
86
|
+
selected_file = GCS_ASSETS.ASSETS[asset]["public_url"]
|
87
|
+
|
88
|
+
logger.info(f"Downloading {os.path.basename(selected_file)} to {out_path}")
|
89
|
+
|
90
|
+
utils.download_wget(selected_file, out_path)
|
91
|
+
|
92
|
+
return None
|
93
|
+
|
94
|
+
|
95
|
+
def _initialize_data_dir(data_dir: str, init_msg: str = INIT_DATA_DIR_MSG) -> None:
|
96
|
+
"""Create a data directory if it doesn't exist."""
|
97
|
+
|
98
|
+
if not os.path.isdir(data_dir):
|
99
|
+
|
100
|
+
logger.warning(INIT_DATA_DIR_MSG.format(data_dir=data_dir))
|
101
|
+
|
102
|
+
# Artifact directory not found; creating {parentdir}")
|
103
|
+
logger.warning(f"Trying to create {data_dir}")
|
104
|
+
pathlib.Path(data_dir).mkdir(parents=True, exist_ok=True)
|
105
|
+
|
106
|
+
return None
|
107
|
+
|
108
|
+
|
109
|
+
def _validate_gcs_asset(asset: str) -> None:
|
110
|
+
"""Validate a GCS asset by name."""
|
111
|
+
|
112
|
+
assets = _CprAssetsValidator(assets=GCS_ASSETS.ASSETS).assets
|
113
|
+
valid_gcs_assets = assets.keys()
|
114
|
+
if asset not in valid_gcs_assets:
|
115
|
+
raise ValueError(
|
116
|
+
f"asset was {asset} and must be one of the keys in GCS_ASSETS.ASSETS: {', '.join(valid_gcs_assets)}"
|
117
|
+
)
|
118
|
+
|
119
|
+
return None
|
120
|
+
|
121
|
+
|
122
|
+
def _validate_gcs_subasset(asset: str, subasset: str) -> None:
|
123
|
+
"""Validate a subasset as belonging to a given asset."""
|
124
|
+
|
125
|
+
if GCS_ASSETS.ASSETS[asset]["subassets"] is None:
|
126
|
+
if subasset is not None:
|
127
|
+
logger.warning(
|
128
|
+
f"subasset was not None but asset {asset} does not have subassets. Ignoring subasset."
|
129
|
+
)
|
130
|
+
|
131
|
+
return None
|
132
|
+
|
133
|
+
valid_subassets = GCS_ASSETS.ASSETS[asset]["subassets"]
|
134
|
+
|
135
|
+
if subasset is None:
|
136
|
+
raise ValueError(
|
137
|
+
f"subasset was None and must be one of {', '.join(valid_subassets)}"
|
138
|
+
)
|
139
|
+
|
140
|
+
if subasset not in valid_subassets:
|
141
|
+
raise ValueError(
|
142
|
+
f"subasset, {subasset}, was not found in asset {asset}. Valid subassets are {', '.join(valid_subassets)}"
|
143
|
+
)
|
144
|
+
|
145
|
+
return None
|
146
|
+
|
147
|
+
|
148
|
+
def _get_gcs_asset_path(asset: str, subasset: Optional[str] = None) -> str:
|
149
|
+
"""
|
150
|
+
Get the GCS path for a given asset and subasset.
|
151
|
+
|
152
|
+
Parameters
|
153
|
+
----------
|
154
|
+
asset : str
|
155
|
+
The name of the asset.
|
156
|
+
subasset : Optional[str]
|
157
|
+
The name of the subasset.
|
158
|
+
|
159
|
+
Returns
|
160
|
+
-------
|
161
|
+
str
|
162
|
+
The GCS path for the asset or subasset.
|
163
|
+
"""
|
164
|
+
asset_dict = GCS_ASSETS.ASSETS[asset]
|
165
|
+
if asset_dict["subassets"] is None:
|
166
|
+
out_file = asset_dict["file"]
|
167
|
+
else:
|
168
|
+
extract_dir = asset_dict["file"].split(".")[0]
|
169
|
+
out_file = os.path.join(extract_dir, asset_dict["subassets"][subasset])
|
170
|
+
return out_file
|
171
|
+
|
172
|
+
|
173
|
+
class _CprAssetValidator(BaseModel):
|
174
|
+
file: str
|
175
|
+
subassets: dict[str, str] | None
|
176
|
+
public_url: str
|
177
|
+
|
178
|
+
|
179
|
+
class _CprAssetsValidator(BaseModel):
|
180
|
+
assets: dict[str, _CprAssetValidator]
|