napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. napistu/__init__.py +12 -0
  2. napistu/__main__.py +867 -0
  3. napistu/consensus.py +1557 -0
  4. napistu/constants.py +500 -0
  5. napistu/gcs/__init__.py +10 -0
  6. napistu/gcs/constants.py +69 -0
  7. napistu/gcs/downloads.py +180 -0
  8. napistu/identifiers.py +805 -0
  9. napistu/indices.py +227 -0
  10. napistu/ingestion/__init__.py +10 -0
  11. napistu/ingestion/bigg.py +146 -0
  12. napistu/ingestion/constants.py +296 -0
  13. napistu/ingestion/cpr_edgelist.py +106 -0
  14. napistu/ingestion/identifiers_etl.py +148 -0
  15. napistu/ingestion/obo.py +268 -0
  16. napistu/ingestion/psi_mi.py +276 -0
  17. napistu/ingestion/reactome.py +218 -0
  18. napistu/ingestion/sbml.py +621 -0
  19. napistu/ingestion/string.py +356 -0
  20. napistu/ingestion/trrust.py +285 -0
  21. napistu/ingestion/yeast.py +147 -0
  22. napistu/mechanism_matching.py +597 -0
  23. napistu/modify/__init__.py +10 -0
  24. napistu/modify/constants.py +86 -0
  25. napistu/modify/curation.py +628 -0
  26. napistu/modify/gaps.py +635 -0
  27. napistu/modify/pathwayannot.py +1381 -0
  28. napistu/modify/uncompartmentalize.py +264 -0
  29. napistu/network/__init__.py +10 -0
  30. napistu/network/constants.py +117 -0
  31. napistu/network/neighborhoods.py +1594 -0
  32. napistu/network/net_create.py +1647 -0
  33. napistu/network/net_utils.py +652 -0
  34. napistu/network/paths.py +500 -0
  35. napistu/network/precompute.py +221 -0
  36. napistu/rpy2/__init__.py +127 -0
  37. napistu/rpy2/callr.py +168 -0
  38. napistu/rpy2/constants.py +101 -0
  39. napistu/rpy2/netcontextr.py +464 -0
  40. napistu/rpy2/rids.py +697 -0
  41. napistu/sbml_dfs_core.py +2216 -0
  42. napistu/sbml_dfs_utils.py +304 -0
  43. napistu/source.py +394 -0
  44. napistu/utils.py +943 -0
  45. napistu-0.1.0.dist-info/METADATA +56 -0
  46. napistu-0.1.0.dist-info/RECORD +77 -0
  47. napistu-0.1.0.dist-info/WHEEL +5 -0
  48. napistu-0.1.0.dist-info/entry_points.txt +2 -0
  49. napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
  50. napistu-0.1.0.dist-info/top_level.txt +2 -0
  51. tests/__init__.py +0 -0
  52. tests/conftest.py +83 -0
  53. tests/test_consensus.py +255 -0
  54. tests/test_constants.py +20 -0
  55. tests/test_curation.py +134 -0
  56. tests/test_data/__init__.py +0 -0
  57. tests/test_edgelist.py +20 -0
  58. tests/test_gcs.py +23 -0
  59. tests/test_identifiers.py +151 -0
  60. tests/test_igraph.py +353 -0
  61. tests/test_indices.py +88 -0
  62. tests/test_mechanism_matching.py +126 -0
  63. tests/test_net_utils.py +66 -0
  64. tests/test_netcontextr.py +105 -0
  65. tests/test_obo.py +34 -0
  66. tests/test_pathwayannot.py +95 -0
  67. tests/test_precomputed_distances.py +222 -0
  68. tests/test_rpy2.py +61 -0
  69. tests/test_sbml.py +46 -0
  70. tests/test_sbml_dfs_create.py +307 -0
  71. tests/test_sbml_dfs_utils.py +22 -0
  72. tests/test_sbo.py +11 -0
  73. tests/test_set_coverage.py +50 -0
  74. tests/test_source.py +67 -0
  75. tests/test_uncompartmentalize.py +40 -0
  76. tests/test_utils.py +487 -0
  77. tests/utils.py +30 -0
napistu/constants.py ADDED
@@ -0,0 +1,500 @@
1
+ """Module to contain all constants for CPR"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import libsbml
6
+
7
+ from types import SimpleNamespace
8
+ import pandas as pd
9
+
10
+ PROTEINATLAS_SUBCELL_LOC_URL = (
11
+ "https://www.proteinatlas.org/download/tsv/subcellular_location.tsv.zip"
12
+ )
13
+
14
+ # GTEx
15
+ GTEX_RNASEQ_EXPRESSION_URL = "https://storage.googleapis.com/adult-gtex/bulk-gex/v8/rna-seq/GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_median_tpm.gct.gz"
16
+
17
+ # Gencode
18
+ GENCODE_URL = "https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_26/gencode.v26.transcripts.fa.gz"
19
+
20
+ FILE_EXT_ZIP = "zip"
21
+ FILE_EXT_GZ = "gz"
22
+
23
+ # SBML_dfs
24
+
25
+ SBML_DFS = SimpleNamespace(
26
+ COMPARTMENTS="compartments",
27
+ SPECIES="species",
28
+ COMPARTMENTALIZED_SPECIES="compartmentalized_species",
29
+ REACTIONS="reactions",
30
+ REACTION_SPECIES="reaction_species",
31
+ SPECIES_DATA="species_data",
32
+ REACTIONS_DATA="reactions_data",
33
+ C_ID="c_id",
34
+ C_NAME="c_name",
35
+ C_IDENTIFIERS="c_Identifiers",
36
+ C_SOURCE="c_Source",
37
+ S_ID="s_id",
38
+ S_NAME="s_name",
39
+ S_IDENTIFIERS="s_Identifiers",
40
+ S_SOURCE="s_Source",
41
+ SC_ID="sc_id",
42
+ SC_NAME="sc_name",
43
+ SC_SOURCE="sc_Source",
44
+ R_ID="r_id",
45
+ R_NAME="r_name",
46
+ R_IDENTIFIERS="r_Identifiers",
47
+ R_SOURCE="r_Source",
48
+ R_ISREVERSIBLE="r_isreversible",
49
+ RSC_ID="rsc_id",
50
+ STOICHIOMETRY="stoichiometry",
51
+ SBO_TERM="sbo_term",
52
+ )
53
+
54
+ SBML_DFS_SCHEMA = SimpleNamespace(
55
+ SCHEMA={
56
+ SBML_DFS.COMPARTMENTS: {
57
+ "pk": SBML_DFS.C_ID,
58
+ "label": SBML_DFS.C_NAME,
59
+ "id": SBML_DFS.C_IDENTIFIERS,
60
+ "source": SBML_DFS.C_SOURCE,
61
+ "vars": [SBML_DFS.C_NAME, SBML_DFS.C_IDENTIFIERS, SBML_DFS.C_SOURCE],
62
+ },
63
+ SBML_DFS.SPECIES: {
64
+ "pk": SBML_DFS.S_ID,
65
+ "label": SBML_DFS.S_NAME,
66
+ "id": SBML_DFS.S_IDENTIFIERS,
67
+ "source": SBML_DFS.S_SOURCE,
68
+ "vars": [SBML_DFS.S_NAME, SBML_DFS.S_IDENTIFIERS, SBML_DFS.S_SOURCE],
69
+ },
70
+ SBML_DFS.COMPARTMENTALIZED_SPECIES: {
71
+ "pk": SBML_DFS.SC_ID,
72
+ "label": SBML_DFS.SC_NAME,
73
+ "fk": [SBML_DFS.S_ID, SBML_DFS.C_ID],
74
+ "source": SBML_DFS.SC_SOURCE,
75
+ "vars": [
76
+ SBML_DFS.SC_NAME,
77
+ SBML_DFS.S_ID,
78
+ SBML_DFS.C_ID,
79
+ SBML_DFS.SC_SOURCE,
80
+ ],
81
+ },
82
+ SBML_DFS.REACTIONS: {
83
+ "pk": SBML_DFS.R_ID,
84
+ "label": SBML_DFS.R_NAME,
85
+ "id": SBML_DFS.R_IDENTIFIERS,
86
+ "source": SBML_DFS.R_SOURCE,
87
+ "vars": [
88
+ SBML_DFS.R_NAME,
89
+ SBML_DFS.R_IDENTIFIERS,
90
+ SBML_DFS.R_SOURCE,
91
+ SBML_DFS.R_ISREVERSIBLE,
92
+ ],
93
+ },
94
+ SBML_DFS.REACTION_SPECIES: {
95
+ "pk": SBML_DFS.RSC_ID,
96
+ "fk": [SBML_DFS.R_ID, SBML_DFS.SC_ID],
97
+ "vars": [
98
+ SBML_DFS.R_ID,
99
+ SBML_DFS.SC_ID,
100
+ SBML_DFS.STOICHIOMETRY,
101
+ SBML_DFS.SBO_TERM,
102
+ ],
103
+ },
104
+ },
105
+ REQUIRED_ENTITIES={
106
+ SBML_DFS.COMPARTMENTS,
107
+ SBML_DFS.SPECIES,
108
+ SBML_DFS.COMPARTMENTALIZED_SPECIES,
109
+ SBML_DFS.REACTIONS,
110
+ SBML_DFS.REACTION_SPECIES,
111
+ },
112
+ OPTIONAL_ENTITIES={
113
+ SBML_DFS.SPECIES_DATA,
114
+ SBML_DFS.REACTIONS_DATA,
115
+ },
116
+ )
117
+
118
+ ENTITIES_W_DATA = {SBML_DFS.SPECIES, SBML_DFS.REACTIONS}
119
+
120
+ REQUIRED_REACTION_FROMEDGELIST_COLUMNS = [
121
+ "sc_id_up",
122
+ "sc_id_down",
123
+ "sbo_term",
124
+ "r_name",
125
+ "r_Identifiers",
126
+ "r_isreversible",
127
+ ]
128
+
129
+ CPR_STANDARD_OUTPUTS = SimpleNamespace(
130
+ SPECIES_IDENTIFIERS="species_identifiers.tsv",
131
+ SPECIES="species.json",
132
+ REACTIONS="reactions.json",
133
+ REACTION_SPECIES="reaction_species.json",
134
+ COMPARTMENTS="compartments.json",
135
+ COMPARTMENTALIZED_SPECIES="compartmentalized_species.json",
136
+ )
137
+
138
+ INTERACTION_EDGELIST_EXPECTED_VARS = {
139
+ "upstream_name",
140
+ "downstream_name",
141
+ "upstream_compartment",
142
+ "downstream_compartment",
143
+ "r_name",
144
+ "sbo_term",
145
+ "r_Identifiers",
146
+ "r_isreversible",
147
+ }
148
+
149
+ BQB_PRIORITIES = pd.DataFrame(
150
+ [{"bqb": "BQB_IS", "bqb_rank": 1}, {"bqb": "BQB_HAS_PART", "bqb_rank": 2}]
151
+ )
152
+
153
+ ONTOLOGY_PRIORITIES = pd.DataFrame(
154
+ [
155
+ {"ontology": "reactome", "ontology_rank": 1},
156
+ {"ontology": "ensembl_gene", "ontology_rank": 2},
157
+ {"ontology": "chebi", "ontology_rank": 3},
158
+ {"ontology": "uniprot", "ontology_rank": 4},
159
+ {"ontology": "go", "ontology_rank": 5},
160
+ ]
161
+ )
162
+
163
+ # SBML
164
+ # Biological qualifiers
165
+ # Biomodels qualifiers
166
+ BQB = SimpleNamespace(
167
+ IS="BQB_IS",
168
+ HAS_PART="BQB_HAS_PART",
169
+ IS_PART_OF="BQB_IS_PART_OF",
170
+ IS_VERSION_OF="BQB_IS_VERSION_OF",
171
+ HAS_VERSION="BQB_HAS_VERSION",
172
+ IS_HOMOLOG_TO="BQB_IS_HOMOLOG_TO",
173
+ IS_DESCRIBED_BY="BQB_IS_DESCRIBED_BY",
174
+ IS_ENCODED_BY="BQB_IS_ENCODED_BY",
175
+ ENCODES="BQB_ENCODES",
176
+ OCCURS_IN="BQB_OCCURS_IN",
177
+ HAS_PROPERTY="BQB_HAS_PROPERTY",
178
+ IS_PROPERTY_OF="BQB_IS_PROPERTY_OF",
179
+ HAS_TAXON="BQB_HAS_TAXON",
180
+ UNKNOWN="BQB_UNKNOWN",
181
+ )
182
+
183
+ # molecules are distinctly defined by these BQB terms
184
+ BQB_DEFINING_ATTRS = ["BQB_IS", "IS_HOMOLOG_TO"]
185
+
186
+ # a looser convention which will aggregate genes, transcripts, and proteins
187
+ # if they are linked with the appropriate bioqualifiers
188
+ BQB_DEFINING_ATTRS_LOOSE = [
189
+ "BQB_IS",
190
+ "IS_HOMOLOG_TO",
191
+ "BQB_IS_ENCODED_BY",
192
+ "BQB_ENCODES",
193
+ ]
194
+
195
+ # identifiers
196
+ IDENTIFIERS = SimpleNamespace(
197
+ ONTOLOGY="ontology", IDENTIFIER="identifier", BQB="bqb", URL="url"
198
+ )
199
+
200
+ SPECIES_IDENTIFIERS_REQUIRED_VARS = {
201
+ SBML_DFS.S_ID,
202
+ IDENTIFIERS.ONTOLOGY,
203
+ IDENTIFIERS.IDENTIFIER,
204
+ IDENTIFIERS.BQB,
205
+ SBML_DFS.S_NAME,
206
+ }
207
+
208
+ BIOLOGICAL_QUALIFIERS = [
209
+ "BQB_IS",
210
+ "BQB_HAS_PART",
211
+ "BQB_IS_PART_OF",
212
+ "BQB_IS_VERSION_OF",
213
+ "BQB_HAS_VERSION",
214
+ "BQB_IS_HOMOLOG_TO",
215
+ "BQB_IS_DESCRIBED_BY",
216
+ "BQB_IS_ENCODED_BY",
217
+ "BQB_ENCODES",
218
+ "BQB_OCCURS_IN",
219
+ "BQB_HAS_PROPERTY",
220
+ "BQB_IS_PROPERTY_OF",
221
+ "BQB_HAS_TAXON",
222
+ "BQB_UNKNOWN",
223
+ ]
224
+
225
+
226
+ def get_biological_qualifier_codes():
227
+ bio_qualifier_codes = {getattr(libsbml, bqb): bqb for bqb in BIOLOGICAL_QUALIFIERS}
228
+
229
+ return bio_qualifier_codes
230
+
231
+
232
+ BIOLOGICAL_QUALIFIER_CODES = get_biological_qualifier_codes()
233
+
234
+ # Systems biology ontology
235
+ SBOTERM_NAMES = SimpleNamespace(
236
+ REACTANT="reactant",
237
+ PRODUCT="product",
238
+ CATALYST="catalyst",
239
+ INHIBITOR="inhibitor",
240
+ STIMULATOR="stimulator",
241
+ MODIFIER="modifier",
242
+ INTERACTOR="interactor",
243
+ )
244
+
245
+ MINI_SBO_TO_NAME = {
246
+ "SBO:0000010": SBOTERM_NAMES.REACTANT,
247
+ "SBO:0000011": SBOTERM_NAMES.PRODUCT,
248
+ "SBO:0000013": SBOTERM_NAMES.CATALYST,
249
+ "SBO:0000020": SBOTERM_NAMES.INHIBITOR,
250
+ "SBO:0000459": SBOTERM_NAMES.STIMULATOR,
251
+ "SBO:0000019": SBOTERM_NAMES.MODIFIER,
252
+ "SBO:0000336": SBOTERM_NAMES.INTERACTOR,
253
+ }
254
+
255
+ MINI_SBO_FROM_NAME = {
256
+ SBOTERM_NAMES.REACTANT: "SBO:0000010",
257
+ SBOTERM_NAMES.PRODUCT: "SBO:0000011",
258
+ SBOTERM_NAMES.CATALYST: "SBO:0000013",
259
+ SBOTERM_NAMES.INHIBITOR: "SBO:0000020",
260
+ SBOTERM_NAMES.STIMULATOR: "SBO:0000459",
261
+ SBOTERM_NAMES.MODIFIER: "SBO:0000019", # parent category of inhibitor and stimulator (i.e., activator)
262
+ SBOTERM_NAMES.INTERACTOR: "SBO:0000336", # entity participating in a physical or functional interaction
263
+ }
264
+
265
+ SBO_MODIFIER_NAMES = {
266
+ SBOTERM_NAMES.INHIBITOR,
267
+ SBOTERM_NAMES.STIMULATOR,
268
+ SBOTERM_NAMES.MODIFIER,
269
+ }
270
+
271
+ MINI_SBO_NAME_TO_POLARITY = {
272
+ SBOTERM_NAMES.REACTANT: "activation",
273
+ SBOTERM_NAMES.PRODUCT: "activation",
274
+ SBOTERM_NAMES.CATALYST: "activation",
275
+ SBOTERM_NAMES.INHIBITOR: "inhibition",
276
+ SBOTERM_NAMES.STIMULATOR: "activation",
277
+ SBOTERM_NAMES.MODIFIER: "ambiguous",
278
+ SBOTERM_NAMES.INTERACTOR: "ambiguous",
279
+ }
280
+
281
+ # how does changing a reactions' membership
282
+ # affect whether a reaction can occur
283
+ # for example, if I remove any substrate a reaction won't occur
284
+ # but I would have to remove all catalysts for it to not occur
285
+ SBO_NAME_TO_ROLE = {
286
+ SBOTERM_NAMES.REACTANT: "DEFINING",
287
+ SBOTERM_NAMES.PRODUCT: "DEFINING",
288
+ SBOTERM_NAMES.INTERACTOR: "DEFINING",
289
+ SBOTERM_NAMES.CATALYST: "REQUIRED",
290
+ SBOTERM_NAMES.INHIBITOR: "OPTIONAL",
291
+ SBOTERM_NAMES.STIMULATOR: "OPTIONAL",
292
+ SBOTERM_NAMES.MODIFIER: "OPTIONAL",
293
+ }
294
+
295
+ # see also https://github.com/calico/netcontextr/blob/main/R/reactionTrimmingFunctions.R
296
+ VALID_SBO_ROLES = (
297
+ # there is a direct correspondence between the set of defining entries and the identity of a reaction
298
+ # e.g., the stoichiometery of a metabolic reaction or the members of a protein-protein interaction
299
+ "DEFINING",
300
+ # 1+ entries are needed if entries were initially defined. i.e., reactions which require a catalyst
301
+ # would no longer exist if the catalyst was removed, but many reactions do not require a catalyst.
302
+ "REQUIRED",
303
+ # 0+ entries. optional species can be added or removed to a reaction without changing its identity
304
+ "OPTIONAL",
305
+ )
306
+
307
+ # required variables for the edgelist formats used by mechanism_matching
308
+ CPR_EDGELIST = SimpleNamespace(
309
+ S_ID_UPSTREAM="s_id_upstream",
310
+ S_ID_DOWNSTREAM="s_id_downstream",
311
+ SC_ID_UPSTREAM="sc_id_upstream",
312
+ SC_ID_DOWNSTREAM="sc_id_downstream",
313
+ IDENTIFIER_UPSTREAM="identifier_upstream",
314
+ IDENTIFIER_DOWNSTREAM="identifier_downstream",
315
+ S_NAME_UPSTREAM="s_name_upstream",
316
+ S_NAME_DOWNSTREAM="s_name_downstream",
317
+ SC_ID_ORIGIN="sc_id_origin",
318
+ SC_ID_DEST="sc_id_dest",
319
+ )
320
+
321
+ IDENTIFIER_EDGELIST_REQ_VARS = {
322
+ CPR_EDGELIST.IDENTIFIER_UPSTREAM,
323
+ CPR_EDGELIST.IDENTIFIER_DOWNSTREAM,
324
+ }
325
+
326
+ CPR_EDGELIST_REQ_VARS = {
327
+ CPR_EDGELIST.S_ID_UPSTREAM,
328
+ CPR_EDGELIST.S_ID_DOWNSTREAM,
329
+ CPR_EDGELIST.SC_ID_UPSTREAM,
330
+ CPR_EDGELIST.SC_ID_DOWNSTREAM,
331
+ }
332
+
333
+ CPR_PATH_REQ_VARS = {CPR_EDGELIST.SC_ID_ORIGIN, CPR_EDGELIST.SC_ID_DEST}
334
+
335
+ # specifying weighting schemes schema
336
+
337
+ DEFAULT_WT_TRANS = "identity"
338
+
339
+ DEFINED_WEIGHT_TRANSFORMATION = {
340
+ DEFAULT_WT_TRANS: "_wt_transformation_identity",
341
+ "string": "_wt_transformation_string",
342
+ "string_inv": "_wt_transformation_string_inv",
343
+ }
344
+
345
+ SCORE_CALIBRATION_POINTS_DICT = {
346
+ "weights": {"strong": 3, "good": 7, "okay": 20, "weak": 40},
347
+ "string_wt": {"strong": 950, "good": 400, "okay": 230, "weak": 150},
348
+ }
349
+
350
+ SOURCE_VARS_DICT = {"string_wt": 10}
351
+
352
+ # source
353
+ SOURCE_SPEC = SimpleNamespace(
354
+ PATHWAY_ID="pathway_id",
355
+ MODEL="model",
356
+ SOURCE="source",
357
+ SPECIES="species",
358
+ NAME="name",
359
+ ENTRY="entry",
360
+ N_COLLAPSED_PATHWAYS="n_collapsed_pathways",
361
+ INDEX_NAME="entry",
362
+ FILE="file",
363
+ DATE="date",
364
+ )
365
+
366
+ EXPECTED_PW_INDEX_COLUMNS = {
367
+ SOURCE_SPEC.FILE,
368
+ SOURCE_SPEC.PATHWAY_ID,
369
+ SOURCE_SPEC.SOURCE,
370
+ SOURCE_SPEC.SPECIES,
371
+ SOURCE_SPEC.NAME,
372
+ SOURCE_SPEC.DATE,
373
+ }
374
+
375
+ # rules for specific ontologies
376
+
377
+ ONTOLOGIES = SimpleNamespace(
378
+ CHEBI="chebi",
379
+ ENSEMBL_GENE="ensembl_gene",
380
+ ENSEMBL_TRANSCRIPT="ensembl_transcript",
381
+ ENSEMBL_PROTEIN="ensembl_protein",
382
+ GENE_NAME="gene_name",
383
+ GO="go",
384
+ MIRBASE="mirbase",
385
+ NCBI_ENTREZ_GENE="ncbi_entrez_gene",
386
+ PHAROS="pharos",
387
+ REACTOME="reactome",
388
+ SYMBOL="symbol",
389
+ UNIPROT="uniprot",
390
+ )
391
+
392
+ CHARACTERISTIC_COMPLEX_ONTOLOGIES = [
393
+ ONTOLOGIES.ENSEMBL_GENE,
394
+ ONTOLOGIES.NCBI_ENTREZ_GENE,
395
+ ONTOLOGIES.MIRBASE,
396
+ ]
397
+
398
+ ONTOLOGY_ALIASES = SimpleNamespace(NCBI_ENTREZ_GENE={"ncbigene", "ncbi_gene"})
399
+
400
+ ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY = {
401
+ "G": ONTOLOGIES.ENSEMBL_GENE,
402
+ "T": ONTOLOGIES.ENSEMBL_TRANSCRIPT,
403
+ "P": ONTOLOGIES.ENSEMBL_PROTEIN,
404
+ }
405
+
406
+ ENSEMBL_MOLECULE_TYPES_FROM_ONTOLOGY = {
407
+ ONTOLOGIES.ENSEMBL_GENE: "G",
408
+ ONTOLOGIES.ENSEMBL_TRANSCRIPT: "T",
409
+ ONTOLOGIES.ENSEMBL_PROTEIN: "P",
410
+ }
411
+
412
+ ENSEMBL_SPECIES_FROM_CODE = {"MUS": "Mus musculus"}
413
+
414
+ ENSEMBL_SPECIES_TO_CODE = {"Mus musculus": "MUS"}
415
+
416
+ ENSEMBL_PREFIX_TO_ONTOLOGY = {
417
+ "ENSG": ONTOLOGIES.ENSEMBL_GENE,
418
+ "ENST": ONTOLOGIES.ENSEMBL_TRANSCRIPT,
419
+ "ENSP": ONTOLOGIES.ENSEMBL_PROTEIN,
420
+ }
421
+
422
+ COMPARTMENTS = {
423
+ "NUCLEOPLASM": "nucleoplasm",
424
+ "CYTOPLASM": "cytoplasm",
425
+ "CELLULAR_COMPONENT": "cellular_component",
426
+ "CYTOSOL": "cytosol",
427
+ "MITOCHONDRIA": "mitochondria",
428
+ "MITOMEMBRANE": "mitochondrial membrane",
429
+ "INNERMITOCHONDRIA": "inner mitochondria",
430
+ "MITOMATRIX": "mitochondrial matrix",
431
+ "ENDOPLASMICRETICULUM": "endoplasmic reticulum",
432
+ "ERMEMBRANE": "endoplasmic reticulum membrane",
433
+ "ERLUMEN": "endoplasmic reticulum lumen",
434
+ "GOLGIAPPARATUS": "golgi apparatus",
435
+ "GOLGIMEMBRANE": "golgi membrane",
436
+ "NUCLEUS": "nucleus",
437
+ "NUCLEARLUMEN": "nuclear lumen",
438
+ "NUCLEOLUS": "nucleolus",
439
+ "LYSOSOME": "lysosome",
440
+ "PEROXISOME": "peroxisome",
441
+ "EXTRACELLULAR": "extracellular",
442
+ }
443
+
444
+ COMPARTMENT_ALIASES = {
445
+ "NUCLEOPLASM": ["nucleoplasm", "Nucleoplasm"],
446
+ "CYTOPLASM": ["cytoplasm", "Cytoplasm"],
447
+ "CELLULAR_COMPONENT": ["cellular_component", "Cellular_component"],
448
+ "CYTOSOL": ["cytosol", "Cytosol"],
449
+ "MITOCHONDRIA": ["mitochondria", "Mitochondria"],
450
+ "MITOMEMBRANE": ["mitochondrial membrane", "Mitochondrial membrane"],
451
+ "INNERMITOCHONDRIA": [
452
+ "inner mitochondria",
453
+ "Inner mitochondria",
454
+ "inner mitochondrial compartment",
455
+ ],
456
+ "MITOMATRIX": [
457
+ "mitochondrial matrix",
458
+ "Mitochondrial matrix",
459
+ "mitochondrial lumen",
460
+ "Mitochondrial lumen",
461
+ ],
462
+ "ENDOPLASMICRETICULUM": ["endoplasmic reticulum", "Endoplasmic reticulum"],
463
+ "ERMEMBRANE": ["endoplasmic reticulum membrane", "Endoplasmic reticulum membrane"],
464
+ "ERLUMEN": ["endoplasmic reticulum lumen", "Endoplasmic reticulum lumen"],
465
+ "GOLGIAPPARATUS": ["golgi apparatus", "Golgi apparatus"],
466
+ "GOLGIMEMBRANE": ["Golgi membrane", "golgi membrane"],
467
+ "NUCLEUS": ["nucleus", "Nucleus"],
468
+ "NUCLEARLUMEN": ["nuclear lumen", "Nuclear lumen"],
469
+ "NUCLEOLUS": ["nucleolus", "Nucleolus"],
470
+ "LYSOSOME": ["lysosome", "Lysosome"],
471
+ "PEROXISOME": ["peroxisome", "Peroxisome", "peroxisome/glyoxysome"],
472
+ "EXTRACELLULAR": [
473
+ "extracellular",
474
+ "Extracellular",
475
+ "extracellular space",
476
+ "Extracellular space",
477
+ ],
478
+ }
479
+
480
+ COMPARTMENTS_GO_TERMS = {
481
+ "NUCLEOPLASM": "GO:0005654",
482
+ "CELLULAR_COMPONENT": "GO:0005575",
483
+ "CYTOPLASM": "GO:0005737",
484
+ "CYTOSOL": "GO:0005829",
485
+ "MITOCHONDRIA": "GO:0005739",
486
+ "MITOMEMBRANE": "GO:0031966",
487
+ "INNERMITOCHONDRIA": "GO:0005743",
488
+ "MITOMATRIX": "GO:0005759",
489
+ "ENDOPLASMICRETICULUM": "GO:0005783",
490
+ "ERMEMBRANE": "GO:0005789",
491
+ "ERLUMEN": "GO:0005788",
492
+ "GOLGIAPPARATUS": "GO:0005794",
493
+ "GOLGIMEMBRANE": "GO:0000139",
494
+ "NUCLEUS": "GO:0005634",
495
+ "NUCLEARLUMEN": "GO:0031981",
496
+ "NUCLEOLUS": "GO:0005730",
497
+ "LYSOSOME": "GO:0005764",
498
+ "PEROXISOME": "GO:0005777",
499
+ "EXTRACELLULAR": "GO:0005615",
500
+ }
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+ from importlib.metadata import PackageNotFoundError
4
+ from importlib.metadata import version
5
+
6
+ try:
7
+ __version__ = version("calicolabs-cpr")
8
+ except PackageNotFoundError:
9
+ # package is not installed
10
+ pass
@@ -0,0 +1,69 @@
1
+ # GCS constants
2
+ from __future__ import annotations
3
+
4
+ from types import SimpleNamespace
5
+
6
+ GCS_SUBASSET_NAMES = SimpleNamespace(
7
+ SBML_DFS="sbml_dfs",
8
+ IDENTIFIERS="identifiers",
9
+ REGULATORY_GRAPH="regulatory_graph",
10
+ REGULATORY_DISTANCES="regulatory_distances",
11
+ )
12
+
13
+
14
+ GCS_FILETYPES = SimpleNamespace(
15
+ SBML_DFS="sbml_dfs.pkl",
16
+ IDENTIFIERS="identifiers.tsv",
17
+ REGULATORY_GRAPH="regulatory_graph.pkl",
18
+ REGULATORY_DISTANCES="regulatory_distances.json",
19
+ )
20
+
21
+
22
+ GCS_ASSETS = SimpleNamespace(
23
+ PROJECT="calico-public-data",
24
+ BUCKET="calico-cpr-public",
25
+ ASSETS={
26
+ "test_pathway": {
27
+ "file": "test_pathway.tar.gz",
28
+ "subassets": {
29
+ GCS_SUBASSET_NAMES.SBML_DFS: GCS_FILETYPES.SBML_DFS,
30
+ GCS_SUBASSET_NAMES.IDENTIFIERS: GCS_FILETYPES.IDENTIFIERS,
31
+ GCS_SUBASSET_NAMES.REGULATORY_GRAPH: GCS_FILETYPES.REGULATORY_GRAPH,
32
+ GCS_SUBASSET_NAMES.REGULATORY_DISTANCES: GCS_FILETYPES.REGULATORY_DISTANCES,
33
+ },
34
+ "public_url": "https://storage.googleapis.com/calico-cpr-public/test_pathway.tar.gz",
35
+ },
36
+ "human_consensus": {
37
+ "file": "human_consensus.tar.gz",
38
+ "subassets": {
39
+ GCS_SUBASSET_NAMES.SBML_DFS: GCS_FILETYPES.SBML_DFS,
40
+ GCS_SUBASSET_NAMES.IDENTIFIERS: GCS_FILETYPES.IDENTIFIERS,
41
+ GCS_SUBASSET_NAMES.REGULATORY_GRAPH: GCS_FILETYPES.REGULATORY_GRAPH,
42
+ },
43
+ "public_url": "https://storage.googleapis.com/calico-cpr-public/human_consensus.tar.gz",
44
+ },
45
+ "human_consensus_w_distances": {
46
+ "file": "human_consensus_w_distances.tar.gz",
47
+ "subassets": {
48
+ GCS_SUBASSET_NAMES.SBML_DFS: GCS_FILETYPES.SBML_DFS,
49
+ GCS_SUBASSET_NAMES.IDENTIFIERS: GCS_FILETYPES.IDENTIFIERS,
50
+ GCS_SUBASSET_NAMES.REGULATORY_GRAPH: GCS_FILETYPES.REGULATORY_GRAPH,
51
+ GCS_SUBASSET_NAMES.REGULATORY_DISTANCES: GCS_FILETYPES.REGULATORY_DISTANCES,
52
+ },
53
+ "public_url": "https://storage.googleapis.com/calico-cpr-public/human_consensus_w_distances.tar.gz",
54
+ },
55
+ "reactome_members": {
56
+ "file": "external_pathways/external_pathways_reactome_neo4j_members.csv",
57
+ "subassets": None,
58
+ "public_url": "https://storage.googleapis.com/calico-cpr-public/external_pathways/external_pathways_reactome_neo4j_members.csv",
59
+ },
60
+ "reactome_xrefs": {
61
+ "file": "external_pathways/external_pathways_reactome_neo4j_crossref.csv",
62
+ "subassets": None,
63
+ "public_url": "https://storage.googleapis.com/calico-cpr-public/external_pathways/external_pathways_reactome_neo4j_crossref.csv",
64
+ },
65
+ },
66
+ )
67
+
68
+
69
+ INIT_DATA_DIR_MSG = "The `data_dir` {data_dir} does not exist."