napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
napistu/indices.py ADDED
@@ -0,0 +1,227 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import os
5
+ import re
6
+ from os import PathLike
7
+ from typing import Iterable
8
+
9
+ from fs import open_fs
10
+ import pandas as pd
11
+
12
+ from napistu.utils import path_exists
13
+ from napistu.constants import EXPECTED_PW_INDEX_COLUMNS
14
+ from napistu.constants import SOURCE_SPEC
15
+
16
+
17
+ class PWIndex:
18
+ """
19
+ Pathway Index
20
+
21
+ Organizing metadata (and optionally paths) of individual pathway representations
22
+
23
+ Attributes
24
+ ----------
25
+ index : pd.DataFrame
26
+ A table describing the location and contents of pathway files.
27
+ base_path: str
28
+ Path to directory of indexed files
29
+
30
+ Methods
31
+ -------
32
+ filter(sources, species)
33
+ Filter index based on pathway source an/or category
34
+ search(query)
35
+ Filter index to pathways matching the search query
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ pw_index: PathLike[str] | str | pd.DataFrame,
41
+ pw_index_base_path=None,
42
+ validate_paths=True,
43
+ ) -> None:
44
+ """
45
+ Tracks pathway file locations and contents.
46
+
47
+ Parameters
48
+ ----------
49
+ pw_index : str or None
50
+ Path to index file or a pd.DataFrame containing the contents of PWIndex.index
51
+ pw_index_base_path : str or None
52
+ A Path that relative paths in pw_index will reference
53
+ validate_paths : bool
54
+ If True then paths constructed from base_path + file will be tested for existence.
55
+ If False then paths will not be validated and base_path attribute will be set to None
56
+
57
+ Returns
58
+ -------
59
+ None
60
+ """
61
+
62
+ # read index either directly from pandas or from a file
63
+ if isinstance(pw_index, pd.DataFrame):
64
+ self.index = pw_index
65
+ elif isinstance(pw_index, PathLike) or isinstance(pw_index, str):
66
+ base_path = os.path.dirname(pw_index)
67
+ file_name = os.path.basename(pw_index)
68
+ with open_fs(base_path) as base_fs:
69
+ with base_fs.open(file_name) as f:
70
+ self.index = pd.read_table(f)
71
+ else:
72
+ raise ValueError(
73
+ f"pw_index needs to be of type PathLike[str] | str | pd.DataFrame but was {type(pw_index).__name__}"
74
+ )
75
+
76
+ # format option arguments
77
+ if (pw_index_base_path is not None) and (
78
+ not isinstance(pw_index_base_path, str)
79
+ ):
80
+ raise TypeError(
81
+ f"pw_index_base_path was as {type(pw_index_base_path).__name__} and must be a str if provided"
82
+ )
83
+
84
+ if not isinstance(validate_paths, bool):
85
+ raise TypeError(
86
+ f"validate_paths was as {type(validate_paths).__name__} and must be a bool"
87
+ )
88
+
89
+ # verify that the index is syntactically correct
90
+
91
+ observed_columns = set(self.index.columns.to_list())
92
+
93
+ if EXPECTED_PW_INDEX_COLUMNS != observed_columns:
94
+ missing = ", ".join(EXPECTED_PW_INDEX_COLUMNS.difference(observed_columns))
95
+ extra = ", ".join(observed_columns.difference(EXPECTED_PW_INDEX_COLUMNS))
96
+ raise ValueError(
97
+ f"Observed pw_index columns did not match expected columns:\n"
98
+ f"Missing columns: {missing}\nExtra columns: {extra}"
99
+ )
100
+
101
+ # verify that all pathway_ids are unique
102
+ duplicated_pathway_ids = list(
103
+ self.index[SOURCE_SPEC.PATHWAY_ID][
104
+ self.index[SOURCE_SPEC.PATHWAY_ID].duplicated()
105
+ ]
106
+ )
107
+ if len(duplicated_pathway_ids) != 0:
108
+ path_str = "\n".join(duplicated_pathway_ids)
109
+ raise ValueError(
110
+ f"{len(duplicated_pathway_ids)} pathway_ids were duplicated:\n{path_str}"
111
+ )
112
+
113
+ if validate_paths:
114
+ if pw_index_base_path is not None:
115
+ self.base_path = pw_index_base_path
116
+ elif isinstance(pw_index, PathLike) or isinstance(pw_index, str):
117
+ self.base_path = os.path.dirname(pw_index)
118
+ else:
119
+ raise ValueError(
120
+ "validate_paths was True but neither pw_index_base_path "
121
+ "nor an index path were provided. Please provide "
122
+ "pw_index_base_path if you intend to verify that "
123
+ "the files present in pw_index exist"
124
+ )
125
+
126
+ if path_exists(self.base_path) is False:
127
+ raise FileNotFoundError(
128
+ "base_path at {self.base_path} is not a valid directory"
129
+ )
130
+
131
+ # verify that pathway files exist
132
+ self._check_files()
133
+
134
+ elif pw_index_base_path is not None:
135
+ print(
136
+ "validate_paths is False so pw_index_base_path will be ignored and paths will not be validated"
137
+ )
138
+
139
+ def _check_files(self):
140
+ """Verifies that all files in the pwindex are present
141
+
142
+ Raises:
143
+ FileNotFoundError: Error if a file not present
144
+ """
145
+ with open_fs(self.base_path) as base_fs:
146
+ # verify that pathway files exist
147
+ files = base_fs.listdir(".")
148
+ missing_pathway_files = set(self.index[SOURCE_SPEC.FILE]) - set(files)
149
+ if len(missing_pathway_files) != 0:
150
+ file_str = "\n".join(missing_pathway_files)
151
+ raise FileNotFoundError(
152
+ f"{len(missing_pathway_files)} were missing:\n{file_str}"
153
+ )
154
+
155
+ def filter(
156
+ self,
157
+ sources: str | Iterable[str] | None = None,
158
+ species: str | Iterable[str] | None = None,
159
+ ):
160
+ """
161
+ Filter Pathway Index
162
+
163
+ Args:
164
+ sources (str | Iterable[str] | None, optional): A list of valid sources or None for all
165
+ species (str | Iterable[str] | None, optional): A list of valid species or None all all
166
+ """
167
+ pw_index = self.index
168
+ if sources is not None:
169
+ pw_index = pw_index.query("source in @sources")
170
+
171
+ if species is not None:
172
+ pw_index = pw_index.query("species in @species")
173
+
174
+ self.index = pw_index
175
+
176
+ def search(self, query):
177
+ """
178
+ Search Pathway Index
179
+
180
+ Parameters:
181
+ query: str
182
+ Filter to rows of interest based on case-insensitive match to names.
183
+
184
+ Returns:
185
+ None
186
+ """
187
+
188
+ pw_index = self.index
189
+ # find matches to query
190
+ fil = pw_index[SOURCE_SPEC.NAME].str.contains(
191
+ query, regex=True, flags=re.IGNORECASE
192
+ )
193
+ pw_index = pw_index[fil]
194
+ self.index = pw_index
195
+
196
+
197
+ def adapt_pw_index(
198
+ source: str | PWIndex,
199
+ species: str | Iterable[str] | None,
200
+ outdir: str | None = None,
201
+ ) -> PWIndex:
202
+ """Adapts a pw_index
203
+
204
+ Helpful to filter for species before reconstructing.
205
+
206
+ Args:
207
+ source (str | PWIndex): uri for pw_index.csv file or PWIndex object
208
+ species (str):
209
+ outdir (str | None, optional): Optional directory to write pw_index to.
210
+ Defaults to None.
211
+
212
+ Returns:
213
+ PWIndex: Filtered pw index
214
+ """
215
+ if isinstance(source, str):
216
+ pw_index = PWIndex(source)
217
+ elif isinstance(source, PWIndex):
218
+ pw_index = copy.deepcopy(source)
219
+ else:
220
+ raise ValueError("'source' needs to be str or PWIndex")
221
+ pw_index.filter(species=species)
222
+
223
+ if outdir is not None:
224
+ with open_fs(outdir, create=True) as fs:
225
+ with fs.open("pw_index.tsv", "w") as f:
226
+ pw_index.index.to_csv(f, sep="\t")
227
+ return pw_index
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.metadata import PackageNotFoundError
4
+ from importlib.metadata import version
5
+
6
+ try:
7
+ __version__ = version("calicolabs-cpr")
8
+ except PackageNotFoundError:
9
+ # package is not installed
10
+ pass
@@ -0,0 +1,146 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import logging
5
+ import os
6
+ from typing import Iterable
7
+
8
+ import pandas as pd
9
+ from napistu import indices
10
+ from napistu import sbml_dfs_core
11
+ from napistu import utils
12
+ from napistu.consensus import construct_sbml_dfs_dict
13
+ from napistu.ingestion import sbml
14
+ from napistu.ingestion.constants import BIGG_MODEL_FIELD_SPECIES
15
+ from napistu.ingestion.constants import BIGG_MODEL_FIELD_URL
16
+ from napistu.ingestion.constants import BIGG_MODEL_KEYS
17
+ from napistu.ingestion.constants import BIGG_MODEL_URLS
18
+ from napistu.ingestion.constants import BIGG_RECON3D_FIELD_ANNOTATION
19
+ from napistu.ingestion.constants import SPECIES_FULL_NAME_HUMAN
20
+ from napistu.ingestion.constants import SPECIES_FULL_NAME_MOUSE
21
+ from napistu.ingestion.constants import SPECIES_FULL_NAME_YEAST
22
+ from fs import open_fs
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def bigg_sbml_download(bg_pathway_root: str, overwrite: bool = False) -> None:
28
+ """
29
+ BiGG SBML Download
30
+
31
+ Download SBML models from BiGG. Currently just the human Recon3D model
32
+
33
+ Parameters:
34
+ bg_pathway_root (str): Paths to a directory where a \"sbml\" directory should be created.
35
+ overwrite (bool): Overwrite an existing output directory.
36
+
37
+ Returns:
38
+ None
39
+
40
+ """
41
+ utils.initialize_dir(bg_pathway_root, overwrite)
42
+
43
+ bigg_models = {
44
+ BIGG_MODEL_KEYS[SPECIES_FULL_NAME_HUMAN]: {
45
+ BIGG_MODEL_FIELD_URL: BIGG_MODEL_URLS[SPECIES_FULL_NAME_HUMAN],
46
+ BIGG_MODEL_FIELD_SPECIES: SPECIES_FULL_NAME_HUMAN,
47
+ },
48
+ BIGG_MODEL_KEYS[SPECIES_FULL_NAME_MOUSE]: {
49
+ BIGG_MODEL_FIELD_URL: BIGG_MODEL_URLS[SPECIES_FULL_NAME_MOUSE],
50
+ BIGG_MODEL_FIELD_SPECIES: SPECIES_FULL_NAME_MOUSE,
51
+ },
52
+ BIGG_MODEL_KEYS[SPECIES_FULL_NAME_YEAST]: {
53
+ BIGG_MODEL_FIELD_URL: BIGG_MODEL_URLS[SPECIES_FULL_NAME_YEAST],
54
+ BIGG_MODEL_FIELD_SPECIES: SPECIES_FULL_NAME_YEAST,
55
+ },
56
+ }
57
+ bigg_models_df = pd.DataFrame(bigg_models).T
58
+ bigg_models_df["sbml_path"] = [
59
+ os.path.join(bg_pathway_root, k) + ".sbml"
60
+ for k in bigg_models_df.index.tolist()
61
+ ]
62
+ bigg_models_df["file"] = [os.path.basename(x) for x in bigg_models_df["sbml_path"]]
63
+
64
+ # add other attributes which will be used in the pw_index
65
+ bigg_models_df["date"] = datetime.date.today().strftime("%Y%m%d")
66
+ bigg_models_df.index = bigg_models_df.index.rename("pathway_id")
67
+ bigg_models_df = bigg_models_df.reset_index()
68
+ bigg_models_df["name"] = bigg_models_df["pathway_id"]
69
+ bigg_models_df = bigg_models_df.assign(source="BiGG")
70
+
71
+ with open_fs(bg_pathway_root, create=True) as bg_fs:
72
+ for _, row in bigg_models_df.iterrows():
73
+ with bg_fs.open(row["file"], "wb") as f:
74
+ utils.download_wget(row["url"], f) # type: ignore
75
+
76
+ pw_index = bigg_models_df[
77
+ ["file", "source", "species", "pathway_id", "name", "date"]
78
+ ]
79
+
80
+ # save index to sbml dir
81
+ with bg_fs.open("pw_index.tsv", "wb") as f:
82
+ pw_index.to_csv(f, sep="\t", index=False)
83
+
84
+ return None
85
+
86
+
87
+ def annotate_recon(raw_model_path: str, annotated_model_path: str) -> None:
88
+ """Annotate Recon3D
89
+ Add compartment annotations to Recon3D so it can be merged with other pathways
90
+ """
91
+ logger.warning(
92
+ "add_sbml_annotations is deprecated and maybe removed in a future version of rcpr; "
93
+ "we are now adding these annotation during ingestion by sbml.sbml_df_from_sbml() rather "
94
+ "than directly appending them to the raw .sbml"
95
+ )
96
+ recon_3d_annotations = pd.DataFrame(BIGG_RECON3D_FIELD_ANNOTATION)
97
+ sbml_model = sbml.SBML(raw_model_path)
98
+ sbml.add_sbml_annotations(
99
+ sbml_model, recon_3d_annotations, save_path=annotated_model_path
100
+ )
101
+
102
+ return None
103
+
104
+
105
+ def construct_bigg_consensus(
106
+ pw_index_inp: str | indices.PWIndex,
107
+ species: str | Iterable[str] | None = None,
108
+ outdir: str | None = None,
109
+ ) -> sbml_dfs_core.SBML_dfs:
110
+ """Constructs a BiGG SBML DFs Pathway Representation
111
+
112
+ Attention: curently this does work only for a singly model. Integraiton of multiple
113
+ models is not supported yet in BiGG.
114
+
115
+ Args:
116
+ pw_index_inp (str | indices.PWIndex): PWIndex or uri pointing to PWIndex
117
+ species (str | Iterable[str] | None): one or more species to filter by. Default: no filtering
118
+ outdir (str | None, optional): output directory used to cache results. Defaults to None.
119
+
120
+ Returns:
121
+ sbml_dfs_core.SBML_dfs: A consensus SBML
122
+ """
123
+ if isinstance(pw_index_inp, str):
124
+ pw_index = indices.adapt_pw_index(pw_index_inp, species=species, outdir=outdir)
125
+ elif isinstance(pw_index_inp, indices.PWIndex):
126
+ pw_index = pw_index_inp
127
+ else:
128
+ raise ValueError("pw_index_inp needs to be a PWIndex or a str to a location.")
129
+ if outdir is not None:
130
+ construct_sbml_dfs_dict_fkt = utils.pickle_cache(
131
+ os.path.join(outdir, "model_pool.pkl")
132
+ )(construct_sbml_dfs_dict)
133
+ else:
134
+ construct_sbml_dfs_dict_fkt = construct_sbml_dfs_dict
135
+
136
+ sbml_dfs_dict = construct_sbml_dfs_dict_fkt(pw_index)
137
+ if len(sbml_dfs_dict) > 1:
138
+ raise NotImplementedError("Merging of models not implemented yet for BiGG")
139
+
140
+ # In Bigg there should be only one model
141
+ model = list(sbml_dfs_dict.values())[0]
142
+ # fix missing compartimentalization
143
+ model = sbml_dfs_core.infer_uncompartmentalized_species_location(model)
144
+ model = sbml_dfs_core.name_compartmentalized_species(model)
145
+ model.validate()
146
+ return model
@@ -0,0 +1,296 @@
1
+ # Ingestion constants
2
+ from __future__ import annotations
3
+
4
+ from types import SimpleNamespace
5
+
6
+ SPECIES_FULL_NAME_HUMAN = "Homo sapiens"
7
+ SPECIES_FULL_NAME_MOUSE = "Mus musculus"
8
+ SPECIES_FULL_NAME_YEAST = "Saccharomyces cerevisiae"
9
+ SPECIES_FULL_NAME_RAT = "Rattus norvegicus"
10
+ SPECIES_FULL_NAME_WORM = "Caenorhabditis elegans"
11
+
12
+
13
+ # BIGG
14
+ BIGG_MODEL_URLS = {
15
+ SPECIES_FULL_NAME_HUMAN: "http://bigg.ucsd.edu/static/models/Recon3D.xml",
16
+ SPECIES_FULL_NAME_MOUSE: "http://bigg.ucsd.edu/static/models/iMM1415.xml",
17
+ SPECIES_FULL_NAME_YEAST: "http://bigg.ucsd.edu/static/models/iMM904.xml",
18
+ }
19
+
20
+ BIGG_MODEL_FIELD_URL = "url"
21
+ BIGG_MODEL_FIELD_SPECIES = "species"
22
+
23
+ BIGG_MODEL_KEYS = {
24
+ SPECIES_FULL_NAME_HUMAN: "recon3D",
25
+ SPECIES_FULL_NAME_MOUSE: "iMM1415",
26
+ SPECIES_FULL_NAME_YEAST: "iMM904",
27
+ }
28
+ BIGG_RECON3D_FIELD_ID = "id"
29
+ BIGG_RECON3D_FIELD_TYPE = "type"
30
+ BIGG_RECON3D_FIELD_URI = "uri"
31
+
32
+ BIGG_RECON3D_ID_C = "c"
33
+ BIGG_RECON3D_ID_L = "l"
34
+ BIGG_RECON3D_ID_E = "e"
35
+ BIGG_RECON3D_ID_M = "m"
36
+ BIGG_RECON3D_ID_R = "r"
37
+ BIGG_RECON3D_ID_X = "x"
38
+ BIGG_RECON3D_ID_N = "n"
39
+ BIGG_RECON3D_ID_I = "i"
40
+
41
+ BIGG_RECON3D_TYPE_COMPARTMENT = "compartment"
42
+
43
+ BIGG_RECON3D_FIELD_ANNOTATION = [
44
+ {
45
+ # cytosol
46
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_C,
47
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
48
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005829",
49
+ },
50
+ {
51
+ # cytoplasm
52
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_C,
53
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
54
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005737",
55
+ },
56
+ {
57
+ # plasma membrane
58
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_C,
59
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
60
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005886",
61
+ },
62
+ {
63
+ # lysosome lumen
64
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_L,
65
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
66
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0043202",
67
+ },
68
+ {
69
+ # lysosomal membrane
70
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_L,
71
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
72
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005765",
73
+ },
74
+ {
75
+ # mitochondrial intermembrane space
76
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_M,
77
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
78
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005758",
79
+ },
80
+ {
81
+ # mitochondrial outer membrane
82
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_M,
83
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
84
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005741",
85
+ },
86
+ {
87
+ # ER membrane
88
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_R,
89
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
90
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005789",
91
+ },
92
+ {
93
+ # ER lumen
94
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_R,
95
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
96
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005788",
97
+ },
98
+ {
99
+ # extracellular region
100
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_E,
101
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
102
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005576",
103
+ },
104
+ {
105
+ # peroxosomal membrane
106
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_X,
107
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
108
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005778",
109
+ },
110
+ {
111
+ # peroxosomal matrix
112
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_X,
113
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
114
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005782",
115
+ },
116
+ {
117
+ # nucleolus
118
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_N,
119
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
120
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005730",
121
+ },
122
+ {
123
+ # nuclear envelope
124
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_N,
125
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
126
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005635",
127
+ },
128
+ {
129
+ # nucleoplasm
130
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_N,
131
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
132
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005654",
133
+ },
134
+ {
135
+ # golgi membrane
136
+ BIGG_RECON3D_FIELD_ID: "g",
137
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
138
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0000139",
139
+ },
140
+ {
141
+ # golgi lumen
142
+ BIGG_RECON3D_FIELD_ID: "g",
143
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
144
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005796",
145
+ },
146
+ {
147
+ # mitochondrial matrix
148
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_I,
149
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
150
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005759",
151
+ },
152
+ {
153
+ # mitochondrial inner membrane
154
+ BIGG_RECON3D_FIELD_ID: BIGG_RECON3D_ID_I,
155
+ BIGG_RECON3D_FIELD_TYPE: BIGG_RECON3D_TYPE_COMPARTMENT,
156
+ BIGG_RECON3D_FIELD_URI: "https://www.ebi.ac.uk/QuickGO/term/GO:0005743",
157
+ },
158
+ ]
159
+
160
+ # IDENTIFIERS ETL
161
+ IDENTIFIERS_ETL_YEAST_URL = "https://www.uniprot.org/docs/yeast.txt"
162
+ IDENTIFIERS_ETL_SBO_URL = (
163
+ "https://raw.githubusercontent.com/EBI-BioModels/SBO/master/SBO_OBO.obo"
164
+ )
165
+ IDENTIFIERS_ETL_YEAST_FIELDS = (
166
+ "common",
167
+ "common_all",
168
+ "OLN",
169
+ "SwissProt_acc",
170
+ "SwissProt_entry",
171
+ "SGD",
172
+ "size",
173
+ "3d",
174
+ "chromosome",
175
+ )
176
+
177
+ # OBO
178
+ OBO_GO_BASIC_URL = "http://purl.obolibrary.org/obo/go/go-basic.obo"
179
+ OBO_GO_BASIC_LOCAL_TMP = "/tmp/go-basic.obo"
180
+
181
+
182
+ # PSI MI
183
+ PSI_MI_INTACT_FTP_URL = (
184
+ "https://ftp.ebi.ac.uk/pub/databases/intact/current/psi30/species"
185
+ )
186
+ PSI_MI_INTACT_DEFAULT_OUTPUT_DIR = "/tmp/intact_tmp"
187
+ PSI_MI_INTACT_XML_NAMESPACE = "{http://psi.hupo.org/mi/mif300}"
188
+
189
+ PSI_MI_INTACT_SPECIES_TO_BASENAME = {
190
+ SPECIES_FULL_NAME_YEAST: "yeast",
191
+ SPECIES_FULL_NAME_HUMAN: "human",
192
+ SPECIES_FULL_NAME_MOUSE: "mouse",
193
+ SPECIES_FULL_NAME_RAT: "rat",
194
+ SPECIES_FULL_NAME_WORM: "caeel",
195
+ }
196
+
197
+
198
+ # REACTOME
199
+ REACTOME_SBGN_URL = "https://reactome.org/download/current/homo_sapiens.sbgn.tar.gz"
200
+ REACTOME_SMBL_URL = "https://reactome.org/download/current/all_species.3.1.sbml.tgz"
201
+ REACTOME_PATHWAYS_URL = "https://reactome.org/download/current/ReactomePathways.txt"
202
+ REACTOME_PATHWAY_INDEX_COLUMNS = ["file", "source", "species", "pathway_id", "name"]
203
+ REACTOME_PATHWAY_LIST_COLUMNS = ["pathway_id", "name", "species"]
204
+
205
+ # SBML
206
+ SMBL_ERROR_NUMBER = "error_number"
207
+ SMBL_ERROR_CATEGORY = "category"
208
+ SMBL_ERROR_SEVERITY = "severity"
209
+ SMBL_ERROR_DESCRIPTION = "description"
210
+ SMBL_ERROR_MESSAGE = "message"
211
+
212
+ SMBL_SUMMARY_PATHWAY_NAME = "Pathway Name"
213
+ SMBL_SUMMARY_PATHWAY_ID = "Pathway ID"
214
+ SMBL_SUMMARY_N_SPECIES = "# of Species"
215
+ SMBL_SUMMARY_N_REACTIONS = "# of Reactions"
216
+ SMBL_SUMMARY_COMPARTMENTS = "Compartments"
217
+
218
+ SMBL_REACTION_DICT_ID = "r_id"
219
+ SMBL_REACTION_DICT_NAME = "r_name"
220
+ SMBL_REACTION_DICT_IDENTIFIERS = "r_Identifiers"
221
+ SMBL_REACTION_DICT_SOURCE = "r_Source"
222
+ SMBL_REACTION_DICT_IS_REVERSIBLE = "r_isreversible"
223
+
224
+ SMBL_REACTION_SPEC_RSC_ID = "rsc_id"
225
+ SMBL_REACTION_SPEC_SC_ID = "sc_id"
226
+ SMBL_REACTION_SPEC_STOICHIOMETRY = "stoichiometry"
227
+ SMBL_REACTION_SPEC_SBO_TERM = "sbo_term"
228
+
229
+ SBML_COMPARTMENT_DICT_ID = "c_id"
230
+ SBML_COMPARTMENT_DICT_NAME = "c_name"
231
+ SBML_COMPARTMENT_DICT_IDENTIFIERS = "c_Identifiers"
232
+ SBML_COMPARTMENT_DICT_SOURCE = "c_Source"
233
+
234
+ SBML_SPECIES_DICT_ID = "s_id"
235
+ SBML_SPECIES_DICT_NAME = "s_name"
236
+ SBML_SPECIES_DICT_IDENTIFIERS = "s_Identifiers"
237
+
238
+ SBML_COMPARTMENTALIZED_SPECIES_DICT_NAME = "sc_name"
239
+ SBML_COMPARTMENTALIZED_SPECIES_DICT_SOURCE = "sc_Source"
240
+
241
+ SBML_REACTION_ATTR_GET_GENE_PRODUCT = "getGeneProduct"
242
+
243
+ SBML_ANNOTATION_METHOD_GET_SPECIES = "getSpecies"
244
+ SBML_ANNOTATION_METHOD_GET_COMPARTMENT = "getCompartment"
245
+ SBML_ANNOTATION_METHOD_GET_REACTION = "getReaction"
246
+
247
+
248
+ # STRING
249
+ STRING_URL_EXPRESSIONS = {
250
+ "interactions": "https://stringdb-static.org/download/protein.links.full.v{version}/{taxid}.protein.links.full.v{version}.txt.gz",
251
+ "aliases": "https://stringdb-static.org/download/protein.aliases.v{version}/{taxid}.protein.aliases.v{version}.txt.gz",
252
+ }
253
+ STRING_PROTEIN_ID_RAW = "#string_protein_id"
254
+ STRING_PROTEIN_ID = "string_protein_id"
255
+ STRING_SOURCE = "protein1"
256
+ STRING_TARGET = "protein2"
257
+
258
+ STRING_VERSION = 11.5
259
+
260
+ STRING_TAX_IDS = {
261
+ SPECIES_FULL_NAME_WORM: 6239,
262
+ SPECIES_FULL_NAME_HUMAN: 9606,
263
+ SPECIES_FULL_NAME_MOUSE: 10090,
264
+ SPECIES_FULL_NAME_RAT: 10116,
265
+ SPECIES_FULL_NAME_YEAST: 4932,
266
+ }
267
+
268
+ STRING_UPSTREAM_COMPARTMENT = "upstream_compartment"
269
+ STRING_DOWNSTREAM_COMPARTMENT = "downstream_compartment"
270
+ STRING_UPSTREAM_NAME = "upstream_name"
271
+ STRING_DOWNSTREAM_NAME = "downstream_name"
272
+
273
+
274
+ # TRRUST
275
+ TTRUST_URL_RAW_DATA_HUMAN = (
276
+ "https://www.grnpedia.org/trrust/data/trrust_rawdata.human.tsv"
277
+ )
278
+ TRRUST_SYMBOL = "symbol"
279
+ TRRUST_UNIPROT = "uniprot"
280
+ TRRUST_UNIPROT_ID = "uniprot_id"
281
+
282
+ TRRUST_COMPARTMENT_NUCLEOPLASM = "nucleoplasm"
283
+ TRRUST_COMPARTMENT_NUCLEOPLASM_GO_ID = "GO:0005654"
284
+
285
+ TRRUST_SIGNS = SimpleNamespace(ACTIVATION="Activation", REPRESSION="Repression")
286
+
287
+ # YEAST IDEA
288
+ # https://idea.research.calicolabs.com/data
289
+ YEAST_IDEA_KINETICS_URL = "https://storage.googleapis.com/calico-website-pin-public-bucket/datasets/idea_kinetics.zip"
290
+ YEAST_IDEA_SOURCE = "TF"
291
+ YEAST_IDEA_TARGET = "GeneName"
292
+ YEAST_IDEA_PUBMED_ID = "32181581" # ids are characters by convention
293
+
294
+ # Identifiers ETL
295
+
296
+ IDENTIFIERS_ETL_YEAST_HEADER_REGEX = "__________"