napistu 0.3.6__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. napistu/__main__.py +28 -13
  2. napistu/consensus.py +19 -25
  3. napistu/constants.py +102 -83
  4. napistu/indices.py +3 -1
  5. napistu/ingestion/napistu_edgelist.py +4 -4
  6. napistu/ingestion/sbml.py +298 -295
  7. napistu/ingestion/string.py +14 -18
  8. napistu/ingestion/trrust.py +22 -27
  9. napistu/matching/interactions.py +41 -39
  10. napistu/matching/species.py +1 -1
  11. napistu/modify/gaps.py +2 -1
  12. napistu/network/constants.py +61 -45
  13. napistu/network/data_handling.py +1 -1
  14. napistu/network/neighborhoods.py +3 -3
  15. napistu/network/net_create.py +440 -616
  16. napistu/network/net_create_utils.py +734 -0
  17. napistu/network/net_propagation.py +1 -1
  18. napistu/network/{napistu_graph_core.py → ng_core.py} +57 -15
  19. napistu/network/ng_utils.py +28 -21
  20. napistu/network/paths.py +4 -4
  21. napistu/network/precompute.py +35 -74
  22. napistu/ontologies/genodexito.py +5 -1
  23. napistu/ontologies/renaming.py +4 -0
  24. napistu/sbml_dfs_core.py +127 -64
  25. napistu/sbml_dfs_utils.py +50 -0
  26. napistu/utils.py +132 -46
  27. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/METADATA +2 -2
  28. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/RECORD +47 -44
  29. tests/conftest.py +171 -13
  30. tests/test_consensus.py +74 -5
  31. tests/test_gaps.py +26 -15
  32. tests/test_network_data_handling.py +5 -2
  33. tests/test_network_net_create.py +93 -202
  34. tests/test_network_net_create_utils.py +538 -0
  35. tests/test_network_ng_core.py +19 -0
  36. tests/test_network_ng_utils.py +1 -1
  37. tests/test_network_precompute.py +5 -4
  38. tests/test_ontologies_renaming.py +28 -24
  39. tests/test_rpy2_callr.py +0 -1
  40. tests/test_rpy2_init.py +0 -1
  41. tests/test_sbml_dfs_core.py +165 -15
  42. tests/test_sbml_dfs_utils.py +45 -0
  43. tests/test_utils.py +45 -2
  44. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/WHEEL +0 -0
  45. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/entry_points.txt +0 -0
  46. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/licenses/LICENSE +0 -0
  47. {napistu-0.3.6.dist-info → napistu-0.4.0.dist-info}/top_level.txt +0 -0
napistu/__main__.py CHANGED
@@ -12,7 +12,7 @@ import click_logging
12
12
  import napistu
13
13
  import igraph as ig
14
14
  import pandas as pd
15
- from napistu import consensus as cpr_consensus
15
+ from napistu import consensus as napistu_consensus
16
16
  from napistu import indices
17
17
  from napistu import sbml_dfs_core
18
18
  from napistu import utils
@@ -65,7 +65,7 @@ def ingestion():
65
65
  "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
66
66
  )
67
67
  @click_logging.simple_verbosity_option(logger)
68
- def load_reactome(base_folder: str, overwrite=True):
68
+ def ingest_reactome(base_folder: str, overwrite=True):
69
69
  logger.info("Start downloading Reactome to %s", base_folder)
70
70
  reactome.reactome_sbml_download(f"{base_folder}/sbml", overwrite=overwrite)
71
71
 
@@ -76,7 +76,7 @@ def load_reactome(base_folder: str, overwrite=True):
76
76
  "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
77
77
  )
78
78
  @click_logging.simple_verbosity_option(logger)
79
- def load_bigg(base_folder: str, overwrite: bool):
79
+ def ingest_bigg(base_folder: str, overwrite: bool):
80
80
  logger.info("Start downloading Bigg to %s", base_folder)
81
81
  bigg.bigg_sbml_download(base_folder, overwrite)
82
82
 
@@ -84,7 +84,7 @@ def load_bigg(base_folder: str, overwrite: bool):
84
84
  @ingestion.command(name="trrust")
85
85
  @click.argument("target_uri", type=str)
86
86
  @click_logging.simple_verbosity_option(logger)
87
- def load_ttrust(target_uri: str):
87
+ def ingest_ttrust(target_uri: str):
88
88
  logger.info("Start downloading TRRUST to %s", target_uri)
89
89
  trrust.download_trrust(target_uri)
90
90
 
@@ -98,7 +98,7 @@ def load_ttrust(target_uri: str):
98
98
  help="URL to download the zipped protein atlas subcellular localization tsv from.",
99
99
  )
100
100
  @click_logging.simple_verbosity_option(logger)
101
- def load_proteinatlas_subcell(target_uri: str, url: str):
101
+ def ingest_proteinatlas_subcell(target_uri: str, url: str):
102
102
  hpa.download_hpa_data(target_uri, url)
103
103
 
104
104
 
@@ -111,7 +111,7 @@ def load_proteinatlas_subcell(target_uri: str, url: str):
111
111
  help="URL to download the gtex file from.",
112
112
  )
113
113
  @click_logging.simple_verbosity_option(logger)
114
- def load_gtex_rnaseq(target_uri: str, url: str):
114
+ def ingest_gtex_rnaseq(target_uri: str, url: str):
115
115
  gtex.download_gtex_rnaseq(target_uri, url)
116
116
 
117
117
 
@@ -124,7 +124,7 @@ def load_gtex_rnaseq(target_uri: str, url: str):
124
124
  help="Species name (e.g., Homo sapiens).",
125
125
  )
126
126
  @click_logging.simple_verbosity_option(logger)
127
- def load_string_db(target_uri: str, species: str):
127
+ def ingest_string_db(target_uri: str, species: str):
128
128
  string.download_string(target_uri, species)
129
129
 
130
130
 
@@ -137,7 +137,7 @@ def load_string_db(target_uri: str, species: str):
137
137
  help="Species name (e.g., Homo sapiens).",
138
138
  )
139
139
  @click_logging.simple_verbosity_option(logger)
140
- def load_string_aliases(target_uri: str, species: str):
140
+ def ingest_string_aliases(target_uri: str, species: str):
141
141
  string.download_string_aliases(target_uri, species)
142
142
 
143
143
 
@@ -289,7 +289,7 @@ def create_consensus(
289
289
  )
290
290
  pw_index_df["species"] = "unknown"
291
291
  pw_index = indices.PWIndex(pw_index=pw_index_df, validate_paths=False)
292
- consensus_model = cpr_consensus.construct_consensus_model(
292
+ consensus_model = napistu_consensus.construct_consensus_model(
293
293
  sbml_dfs_dict, pw_index, dogmatic
294
294
  )
295
295
  utils.save_pickle(output_model_uri, consensus_model)
@@ -621,7 +621,11 @@ def exporter():
621
621
  "--format", "-f", default="pickle", help="Output format: gml, edgelist, pickle"
622
622
  )
623
623
  @click.option(
624
- "--graph_type", "-g", type=str, default="bipartite", help="bipartite or regulatory"
624
+ "--wiring_approach",
625
+ "-g",
626
+ type=str,
627
+ default="bipartite",
628
+ help="bipartite or regulatory",
625
629
  )
626
630
  @click.option(
627
631
  "--weighting_strategy",
@@ -645,7 +649,7 @@ def export_igraph(
645
649
  output_uri: str,
646
650
  graph_attrs_spec_uri: str | None,
647
651
  format: str,
648
- graph_type: str,
652
+ wiring_approach: str,
649
653
  weighting_strategy: str,
650
654
  directed: bool,
651
655
  reverse: bool,
@@ -663,7 +667,7 @@ def export_igraph(
663
667
  reaction_graph_attrs=graph_attrs_spec,
664
668
  directed=directed,
665
669
  edge_reversed=reverse,
666
- graph_type=graph_type,
670
+ wiring_approach=wiring_approach,
667
671
  weighting_strategy=weighting_strategy,
668
672
  verbose=True,
669
673
  )
@@ -753,7 +757,7 @@ def export_precomputed_distances(
753
757
  weights_vars=weights_vars_list,
754
758
  )
755
759
 
756
- precompute.save_precomputed_distances(precomputed_distances, output_uri)
760
+ utils.save_parquet(precomputed_distances, output_uri)
757
761
 
758
762
 
759
763
  @exporter.command(name="export_smbl_dfs_tables")
@@ -855,6 +859,17 @@ def copy_uri(input_uri, output_uri, is_file=True):
855
859
  utils.copy_uri(input_uri, output_uri, is_file=is_file)
856
860
 
857
861
 
862
+ @helpers.command(name="validate_sbml_dfs")
863
+ @click.argument("input_uri", type=str)
864
+ @click_logging.simple_verbosity_option(logger)
865
+ def validate_sbml_dfs(input_uri):
866
+ """Validate a sbml_dfs object"""
867
+ sbml_dfs = utils.load_pickle(input_uri)
868
+ sbml_dfs.validate()
869
+
870
+ logger.info(f"Successfully validated: {input_uri}")
871
+
872
+
858
873
  @click.group()
859
874
  def stats():
860
875
  """Various functions to calculate network statistics
napistu/consensus.py CHANGED
@@ -15,10 +15,13 @@ from napistu import source
15
15
  from napistu import utils
16
16
  from napistu.ingestion import sbml
17
17
 
18
+ from napistu.constants import SCHEMA_DEFS
18
19
  from napistu.constants import SBML_DFS
20
+ from napistu.constants import SBML_DFS_SCHEMA
19
21
  from napistu.constants import IDENTIFIERS
20
22
  from napistu.constants import SOURCE_SPEC
21
23
  from napistu.constants import BQB_DEFINING_ATTRS
24
+ from napistu.constants import VALID_BQB_TERMS
22
25
 
23
26
  logger = logging.getLogger(__name__)
24
27
  # set the level to show logger.info message
@@ -137,8 +140,7 @@ def unnest_SBML_df(
137
140
  """
138
141
 
139
142
  # check that all sbml_dfs have the same schema
140
- _test_same_schema(sbml_dfs_dict)
141
- table_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[table]
143
+ table_schema = SBML_DFS_SCHEMA.SCHEMA[table]
142
144
 
143
145
  df_list = [
144
146
  getattr(sbml_dfs_dict[x], table).assign(model=x) for x in sbml_dfs_dict.keys()
@@ -192,7 +194,7 @@ def construct_meta_entities_identifiers(
192
194
  agg_tbl = unnest_SBML_df(sbml_dfs_dict, table=table)
193
195
 
194
196
  # since all sbml_dfs have the same schema pull out one schema for reference
195
- table_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[table]
197
+ table_schema = SBML_DFS_SCHEMA.SCHEMA[table]
196
198
 
197
199
  # update foreign keys using provided lookup tables
198
200
  if "fk" in table_schema.keys():
@@ -244,6 +246,8 @@ def reduce_to_consensus_ids(
244
246
  Series mapping the index of the aggregated entities to new consensus IDs.
245
247
  """
246
248
  # Step 1: Build consensus identifiers to create clusters of equivalent entities
249
+ table_name = table_schema[SCHEMA_DEFS.TABLE]
250
+ logger.debug(f"Building consensus identifiers for {table_name}")
247
251
  indexed_cluster, cluster_consensus_identifiers = build_consensus_identifiers(
248
252
  sbml_df, table_schema, defining_biological_qualifiers
249
253
  )
@@ -252,25 +256,28 @@ def reduce_to_consensus_ids(
252
256
  agg_table_harmonized = sbml_df.join(indexed_cluster)
253
257
 
254
258
  # Step 3: Create lookup table for entity IDs
259
+ logger.debug(f"Creating lookup table for {table_name}")
255
260
  lookup_table = _create_entity_lookup_table(agg_table_harmonized, table_schema)
256
261
 
257
262
  # Step 4: Add nameness scores to help select representative names
258
263
  agg_table_harmonized = utils._add_nameness_score_wrapper(
259
- agg_table_harmonized, "label", table_schema
264
+ agg_table_harmonized, SCHEMA_DEFS.LABEL, table_schema
260
265
  )
261
266
 
262
267
  # Step 5: Prepare the consensus table with one row per unique entity
268
+ logger.debug(f"Preparing consensus table for {table_name}")
263
269
  new_id_table = _prepare_consensus_table(
264
270
  agg_table_harmonized, table_schema, cluster_consensus_identifiers
265
271
  )
266
272
 
267
273
  # Step 6: Add source information if required
268
- if "source" in table_schema.keys():
274
+ if SCHEMA_DEFS.SOURCE in table_schema.keys():
269
275
  new_id_table = _add_consensus_sources(
270
276
  new_id_table, agg_table_harmonized, lookup_table, table_schema, pw_index
271
277
  )
272
278
 
273
279
  # Step 7: Validate the resulting table
280
+ logger.debug(f"Validating consensus table for {table_name}")
274
281
  _validate_consensus_table(new_id_table, sbml_df)
275
282
 
276
283
  return new_id_table, lookup_table
@@ -667,7 +674,7 @@ def construct_meta_entities_members(
667
674
  defined_by_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[defined_by]
668
675
 
669
676
  # Step 2: Prepare the member table and validate its structure
670
- agg_tbl, defining_fk = _prepare_member_table(
677
+ agg_tbl, _ = _prepare_member_table(
671
678
  sbml_dfs_dict,
672
679
  defined_by,
673
680
  defined_lookup_tables,
@@ -681,9 +688,7 @@ def construct_meta_entities_members(
681
688
  membership_lookup = _create_membership_lookup(agg_tbl, table_schema)
682
689
 
683
690
  # Step 4: Create consensus entities and lookup table
684
- consensus_entities, lookup_table = _create_entity_consensus(
685
- membership_lookup, table_schema
686
- )
691
+ _, lookup_table = _create_entity_consensus(membership_lookup, table_schema)
687
692
 
688
693
  # Step 5: Log merger information
689
694
  report_consensus_merges(
@@ -1507,6 +1512,11 @@ def _filter_identifiers_by_qualifier(
1507
1512
  pd.DataFrame
1508
1513
  Filtered identifiers
1509
1514
  """
1515
+
1516
+ invalid_bqbs = set(meta_identifiers[IDENTIFIERS.BQB]) - set(VALID_BQB_TERMS)
1517
+ if len(invalid_bqbs) > 0:
1518
+ logger.warning(f"Invalid biological qualifiers: {invalid_bqbs}")
1519
+
1510
1520
  valid_identifiers = meta_identifiers.copy()
1511
1521
  return valid_identifiers[
1512
1522
  meta_identifiers[IDENTIFIERS.BQB].isin(defining_biological_qualifiers)
@@ -2034,22 +2044,6 @@ def _merge_entity_data_report_mismatches(
2034
2044
  return None
2035
2045
 
2036
2046
 
2037
- def _test_same_schema(sbml_dfs_dict: dict[str, sbml_dfs_core.SBML_dfs]) -> None:
2038
- """
2039
- Ensure that all sbml_dfs in the dict have the same schema
2040
- """
2041
-
2042
- if len(sbml_dfs_dict) != 0:
2043
- # extract all schemas
2044
- schema_list = [sbml_dfs_dict[x].schema for x in sbml_dfs_dict.keys()]
2045
- # if multiple entries are present then are they the same?
2046
- if len(sbml_dfs_dict) > 1:
2047
- if not all([x == schema_list[0] for x in schema_list]):
2048
- raise ValueError("sbml_df schemas were not identical")
2049
-
2050
- return None
2051
-
2052
-
2053
2047
  def _create_member_string(x: list[str]) -> str:
2054
2048
  x.sort()
2055
2049
  return "_".join(x)
napistu/constants.py CHANGED
@@ -55,28 +55,49 @@ SBML_DFS = SimpleNamespace(
55
55
  SBO_TERM="sbo_term",
56
56
  )
57
57
 
58
+ SCHEMA_DEFS = SimpleNamespace(
59
+ TABLE="table",
60
+ PK="pk",
61
+ FK="fk",
62
+ LABEL="label",
63
+ ID="id",
64
+ SOURCE="source",
65
+ VARS="vars",
66
+ )
67
+
58
68
  SBML_DFS_SCHEMA = SimpleNamespace(
59
69
  SCHEMA={
60
70
  SBML_DFS.COMPARTMENTS: {
61
- "pk": SBML_DFS.C_ID,
62
- "label": SBML_DFS.C_NAME,
63
- "id": SBML_DFS.C_IDENTIFIERS,
64
- "source": SBML_DFS.C_SOURCE,
65
- "vars": [SBML_DFS.C_NAME, SBML_DFS.C_IDENTIFIERS, SBML_DFS.C_SOURCE],
71
+ SCHEMA_DEFS.TABLE: SBML_DFS.COMPARTMENTS,
72
+ SCHEMA_DEFS.PK: SBML_DFS.C_ID,
73
+ SCHEMA_DEFS.LABEL: SBML_DFS.C_NAME,
74
+ SCHEMA_DEFS.ID: SBML_DFS.C_IDENTIFIERS,
75
+ SCHEMA_DEFS.SOURCE: SBML_DFS.C_SOURCE,
76
+ SCHEMA_DEFS.VARS: [
77
+ SBML_DFS.C_NAME,
78
+ SBML_DFS.C_IDENTIFIERS,
79
+ SBML_DFS.C_SOURCE,
80
+ ],
66
81
  },
67
82
  SBML_DFS.SPECIES: {
68
- "pk": SBML_DFS.S_ID,
69
- "label": SBML_DFS.S_NAME,
70
- "id": SBML_DFS.S_IDENTIFIERS,
71
- "source": SBML_DFS.S_SOURCE,
72
- "vars": [SBML_DFS.S_NAME, SBML_DFS.S_IDENTIFIERS, SBML_DFS.S_SOURCE],
83
+ SCHEMA_DEFS.TABLE: SBML_DFS.SPECIES,
84
+ SCHEMA_DEFS.PK: SBML_DFS.S_ID,
85
+ SCHEMA_DEFS.LABEL: SBML_DFS.S_NAME,
86
+ SCHEMA_DEFS.ID: SBML_DFS.S_IDENTIFIERS,
87
+ SCHEMA_DEFS.SOURCE: SBML_DFS.S_SOURCE,
88
+ SCHEMA_DEFS.VARS: [
89
+ SBML_DFS.S_NAME,
90
+ SBML_DFS.S_IDENTIFIERS,
91
+ SBML_DFS.S_SOURCE,
92
+ ],
73
93
  },
74
94
  SBML_DFS.COMPARTMENTALIZED_SPECIES: {
75
- "pk": SBML_DFS.SC_ID,
76
- "label": SBML_DFS.SC_NAME,
77
- "fk": [SBML_DFS.S_ID, SBML_DFS.C_ID],
78
- "source": SBML_DFS.SC_SOURCE,
79
- "vars": [
95
+ SCHEMA_DEFS.TABLE: SBML_DFS.COMPARTMENTALIZED_SPECIES,
96
+ SCHEMA_DEFS.PK: SBML_DFS.SC_ID,
97
+ SCHEMA_DEFS.LABEL: SBML_DFS.SC_NAME,
98
+ SCHEMA_DEFS.FK: [SBML_DFS.S_ID, SBML_DFS.C_ID],
99
+ SCHEMA_DEFS.SOURCE: SBML_DFS.SC_SOURCE,
100
+ SCHEMA_DEFS.VARS: [
80
101
  SBML_DFS.SC_NAME,
81
102
  SBML_DFS.S_ID,
82
103
  SBML_DFS.C_ID,
@@ -84,11 +105,12 @@ SBML_DFS_SCHEMA = SimpleNamespace(
84
105
  ],
85
106
  },
86
107
  SBML_DFS.REACTIONS: {
87
- "pk": SBML_DFS.R_ID,
88
- "label": SBML_DFS.R_NAME,
89
- "id": SBML_DFS.R_IDENTIFIERS,
90
- "source": SBML_DFS.R_SOURCE,
91
- "vars": [
108
+ SCHEMA_DEFS.TABLE: SBML_DFS.REACTIONS,
109
+ SCHEMA_DEFS.PK: SBML_DFS.R_ID,
110
+ SCHEMA_DEFS.LABEL: SBML_DFS.R_NAME,
111
+ SCHEMA_DEFS.ID: SBML_DFS.R_IDENTIFIERS,
112
+ SCHEMA_DEFS.SOURCE: SBML_DFS.R_SOURCE,
113
+ SCHEMA_DEFS.VARS: [
92
114
  SBML_DFS.R_NAME,
93
115
  SBML_DFS.R_IDENTIFIERS,
94
116
  SBML_DFS.R_SOURCE,
@@ -96,9 +118,10 @@ SBML_DFS_SCHEMA = SimpleNamespace(
96
118
  ],
97
119
  },
98
120
  SBML_DFS.REACTION_SPECIES: {
99
- "pk": SBML_DFS.RSC_ID,
100
- "fk": [SBML_DFS.R_ID, SBML_DFS.SC_ID],
101
- "vars": [
121
+ SCHEMA_DEFS.TABLE: SBML_DFS.REACTION_SPECIES,
122
+ SCHEMA_DEFS.PK: SBML_DFS.RSC_ID,
123
+ SCHEMA_DEFS.FK: [SBML_DFS.R_ID, SBML_DFS.SC_ID],
124
+ SCHEMA_DEFS.VARS: [
102
125
  SBML_DFS.R_ID,
103
126
  SBML_DFS.SC_ID,
104
127
  SBML_DFS.STOICHIOMETRY,
@@ -129,10 +152,10 @@ ENTITIES_TO_ENTITY_DATA = {
129
152
  REQUIRED_REACTION_FROMEDGELIST_COLUMNS = [
130
153
  "sc_id_up",
131
154
  "sc_id_down",
132
- "sbo_term",
133
- "r_name",
134
- "r_Identifiers",
135
- "r_isreversible",
155
+ SBML_DFS.SBO_TERM,
156
+ SBML_DFS.R_NAME,
157
+ SBML_DFS.R_IDENTIFIERS,
158
+ SBML_DFS.R_ISREVERSIBLE,
136
159
  ]
137
160
 
138
161
  NAPISTU_STANDARD_OUTPUTS = SimpleNamespace(
@@ -155,20 +178,6 @@ INTERACTION_EDGELIST_EXPECTED_VARS = {
155
178
  SBML_DFS.R_ISREVERSIBLE,
156
179
  }
157
180
 
158
- BQB_PRIORITIES = pd.DataFrame(
159
- [{"bqb": "BQB_IS", "bqb_rank": 1}, {"bqb": "BQB_HAS_PART", "bqb_rank": 2}]
160
- )
161
-
162
- ONTOLOGY_PRIORITIES = pd.DataFrame(
163
- [
164
- {"ontology": "reactome", "ontology_rank": 1},
165
- {"ontology": "ensembl_gene", "ontology_rank": 2},
166
- {"ontology": "chebi", "ontology_rank": 3},
167
- {"ontology": "uniprot", "ontology_rank": 4},
168
- {"ontology": "go", "ontology_rank": 5},
169
- ]
170
- )
171
-
172
181
  # SBML
173
182
  # Biological qualifiers
174
183
  # Biomodels qualifiers
@@ -189,16 +198,18 @@ BQB = SimpleNamespace(
189
198
  UNKNOWN="BQB_UNKNOWN",
190
199
  )
191
200
 
201
+ VALID_BQB_TERMS = list(BQB.__dict__.values())
202
+
192
203
  # molecules are distinctly defined by these BQB terms
193
- BQB_DEFINING_ATTRS = ["BQB_IS", "IS_HOMOLOG_TO"]
204
+ BQB_DEFINING_ATTRS = [BQB.IS, BQB.IS_HOMOLOG_TO]
194
205
 
195
206
  # a looser convention which will aggregate genes, transcripts, and proteins
196
207
  # if they are linked with the appropriate bioqualifiers
197
208
  BQB_DEFINING_ATTRS_LOOSE = [
198
- "BQB_IS",
199
- "IS_HOMOLOG_TO",
200
- "BQB_IS_ENCODED_BY",
201
- "BQB_ENCODES",
209
+ BQB.IS,
210
+ BQB.IS_HOMOLOG_TO,
211
+ BQB.IS_ENCODED_BY,
212
+ BQB.ENCODES,
202
213
  ]
203
214
 
204
215
  # identifiers
@@ -206,6 +217,13 @@ IDENTIFIERS = SimpleNamespace(
206
217
  ONTOLOGY="ontology", IDENTIFIER="identifier", BQB="bqb", URL="url"
207
218
  )
208
219
 
220
+ BQB_PRIORITIES = pd.DataFrame(
221
+ [
222
+ {IDENTIFIERS.BQB: BQB.IS, "bqb_rank": 1},
223
+ {IDENTIFIERS.BQB: BQB.HAS_PART, "bqb_rank": 2},
224
+ ]
225
+ )
226
+
209
227
  IDENTIFIERS_REQUIRED_VARS = {
210
228
  IDENTIFIERS.ONTOLOGY,
211
229
  IDENTIFIERS.IDENTIFIER,
@@ -217,26 +235,9 @@ SPECIES_IDENTIFIERS_REQUIRED_VARS = IDENTIFIERS_REQUIRED_VARS | {
217
235
  SBML_DFS.S_NAME,
218
236
  }
219
237
 
220
- BIOLOGICAL_QUALIFIERS = [
221
- "BQB_IS",
222
- "BQB_HAS_PART",
223
- "BQB_IS_PART_OF",
224
- "BQB_IS_VERSION_OF",
225
- "BQB_HAS_VERSION",
226
- "BQB_IS_HOMOLOG_TO",
227
- "BQB_IS_DESCRIBED_BY",
228
- "BQB_IS_ENCODED_BY",
229
- "BQB_ENCODES",
230
- "BQB_OCCURS_IN",
231
- "BQB_HAS_PROPERTY",
232
- "BQB_IS_PROPERTY_OF",
233
- "BQB_HAS_TAXON",
234
- "BQB_UNKNOWN",
235
- ]
236
-
237
238
 
238
239
  def get_biological_qualifier_codes():
239
- bio_qualifier_codes = {getattr(libsbml, bqb): bqb for bqb in BIOLOGICAL_QUALIFIERS}
240
+ bio_qualifier_codes = {getattr(libsbml, bqb): bqb for bqb in VALID_BQB_TERMS}
240
241
 
241
242
  return bio_qualifier_codes
242
243
 
@@ -250,6 +251,7 @@ SBOTERM_NAMES = SimpleNamespace(
250
251
  CATALYST="catalyst",
251
252
  INHIBITOR="inhibitor",
252
253
  STIMULATOR="stimulator",
254
+ MODIFIED="modified",
253
255
  MODIFIER="modifier",
254
256
  INTERACTOR="interactor",
255
257
  )
@@ -258,22 +260,27 @@ MINI_SBO_TO_NAME = {
258
260
  "SBO:0000010": SBOTERM_NAMES.REACTANT,
259
261
  "SBO:0000011": SBOTERM_NAMES.PRODUCT,
260
262
  "SBO:0000013": SBOTERM_NAMES.CATALYST,
261
- "SBO:0000020": SBOTERM_NAMES.INHIBITOR,
262
- "SBO:0000459": SBOTERM_NAMES.STIMULATOR,
263
263
  "SBO:0000019": SBOTERM_NAMES.MODIFIER,
264
+ "SBO:0000020": SBOTERM_NAMES.INHIBITOR,
264
265
  "SBO:0000336": SBOTERM_NAMES.INTERACTOR,
266
+ "SBO:0000459": SBOTERM_NAMES.STIMULATOR,
267
+ "SBO:0000644": SBOTERM_NAMES.MODIFIED,
265
268
  }
266
269
 
267
270
  MINI_SBO_FROM_NAME = {
268
- SBOTERM_NAMES.REACTANT: "SBO:0000010",
269
- SBOTERM_NAMES.PRODUCT: "SBO:0000011",
270
271
  SBOTERM_NAMES.CATALYST: "SBO:0000013",
271
272
  SBOTERM_NAMES.INHIBITOR: "SBO:0000020",
272
- SBOTERM_NAMES.STIMULATOR: "SBO:0000459",
273
- SBOTERM_NAMES.MODIFIER: "SBO:0000019", # parent category of inhibitor and stimulator (i.e., activator)
274
273
  SBOTERM_NAMES.INTERACTOR: "SBO:0000336", # entity participating in a physical or functional interaction
274
+ SBOTERM_NAMES.MODIFIED: "SBO:0000644",
275
+ SBOTERM_NAMES.MODIFIER: "SBO:0000019", # parent category of inhibitor and stimulator (i.e., activator)
276
+ SBOTERM_NAMES.PRODUCT: "SBO:0000011",
277
+ SBOTERM_NAMES.REACTANT: "SBO:0000010", # aka substrate
278
+ SBOTERM_NAMES.STIMULATOR: "SBO:0000459", # aka activator
275
279
  }
276
280
 
281
+ VALID_SBO_TERM_NAMES = list(SBOTERM_NAMES.__dict__.values())
282
+ VALID_SBO_TERMS = list(MINI_SBO_FROM_NAME.values())
283
+
277
284
  SBO_MODIFIER_NAMES = {
278
285
  SBOTERM_NAMES.INHIBITOR,
279
286
  SBOTERM_NAMES.STIMULATOR,
@@ -281,13 +288,14 @@ SBO_MODIFIER_NAMES = {
281
288
  }
282
289
 
283
290
  MINI_SBO_NAME_TO_POLARITY = {
284
- SBOTERM_NAMES.REACTANT: "activation",
285
- SBOTERM_NAMES.PRODUCT: "activation",
286
291
  SBOTERM_NAMES.CATALYST: "activation",
287
292
  SBOTERM_NAMES.INHIBITOR: "inhibition",
288
- SBOTERM_NAMES.STIMULATOR: "activation",
289
- SBOTERM_NAMES.MODIFIER: "ambiguous",
290
293
  SBOTERM_NAMES.INTERACTOR: "ambiguous",
294
+ SBOTERM_NAMES.MODIFIED: "ambiguous",
295
+ SBOTERM_NAMES.MODIFIER: "ambiguous",
296
+ SBOTERM_NAMES.PRODUCT: "activation",
297
+ SBOTERM_NAMES.REACTANT: "ambiguous",
298
+ SBOTERM_NAMES.STIMULATOR: "activation",
291
299
  }
292
300
 
293
301
  # how does changing a reactions' membership
@@ -305,6 +313,7 @@ SBO_NAME_TO_ROLE = {
305
313
  SBOTERM_NAMES.CATALYST: SBO_ROLES_DEFS.REQUIRED,
306
314
  SBOTERM_NAMES.INHIBITOR: SBO_ROLES_DEFS.OPTIONAL,
307
315
  SBOTERM_NAMES.STIMULATOR: SBO_ROLES_DEFS.OPTIONAL,
316
+ SBOTERM_NAMES.MODIFIED: SBO_ROLES_DEFS.DEFINING,
308
317
  SBOTERM_NAMES.MODIFIER: SBO_ROLES_DEFS.OPTIONAL,
309
318
  }
310
319
 
@@ -322,7 +331,7 @@ VALID_SBO_ROLES = (
322
331
 
323
332
  # required variables for the edgelist formats used by the matching subpackage
324
333
  # also used in some network modules
325
- CPR_EDGELIST = SimpleNamespace(
334
+ NAPISTU_EDGELIST = SimpleNamespace(
326
335
  S_ID_UPSTREAM="s_id_upstream",
327
336
  S_ID_DOWNSTREAM="s_id_downstream",
328
337
  SC_ID_UPSTREAM="sc_id_upstream",
@@ -336,18 +345,18 @@ CPR_EDGELIST = SimpleNamespace(
336
345
  )
337
346
 
338
347
  IDENTIFIER_EDGELIST_REQ_VARS = {
339
- CPR_EDGELIST.IDENTIFIER_UPSTREAM,
340
- CPR_EDGELIST.IDENTIFIER_DOWNSTREAM,
348
+ NAPISTU_EDGELIST.IDENTIFIER_UPSTREAM,
349
+ NAPISTU_EDGELIST.IDENTIFIER_DOWNSTREAM,
341
350
  }
342
351
 
343
- CPR_EDGELIST_REQ_VARS = {
344
- CPR_EDGELIST.S_ID_UPSTREAM,
345
- CPR_EDGELIST.S_ID_DOWNSTREAM,
346
- CPR_EDGELIST.SC_ID_UPSTREAM,
347
- CPR_EDGELIST.SC_ID_DOWNSTREAM,
352
+ NAPISTU_EDGELIST_REQ_VARS = {
353
+ NAPISTU_EDGELIST.S_ID_UPSTREAM,
354
+ NAPISTU_EDGELIST.S_ID_DOWNSTREAM,
355
+ NAPISTU_EDGELIST.SC_ID_UPSTREAM,
356
+ NAPISTU_EDGELIST.SC_ID_DOWNSTREAM,
348
357
  }
349
358
 
350
- CPR_PATH_REQ_VARS = {CPR_EDGELIST.SC_ID_ORIGIN, CPR_EDGELIST.SC_ID_DEST}
359
+ NAPISTU_PATH_REQ_VARS = {NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST}
351
360
 
352
361
  FEATURE_ID_VAR_DEFAULT = "feature_id"
353
362
 
@@ -409,6 +418,16 @@ ONTOLOGY_SPECIES_ALIASES = {
409
418
  ONTOLOGIES.UNIPROT: {"Uniprot"},
410
419
  }
411
420
 
421
+ ONTOLOGY_PRIORITIES = pd.DataFrame(
422
+ [
423
+ {"ontology": ONTOLOGIES.REACTOME, "ontology_rank": 1},
424
+ {"ontology": ONTOLOGIES.ENSEMBL_GENE, "ontology_rank": 2},
425
+ {"ontology": ONTOLOGIES.CHEBI, "ontology_rank": 3},
426
+ {"ontology": ONTOLOGIES.UNIPROT, "ontology_rank": 4},
427
+ {"ontology": ONTOLOGIES.GO, "ontology_rank": 5},
428
+ ]
429
+ )
430
+
412
431
  ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY = {
413
432
  "G": ONTOLOGIES.ENSEMBL_GENE,
414
433
  "T": ONTOLOGIES.ENSEMBL_TRANSCRIPT,
napistu/indices.py CHANGED
@@ -266,6 +266,7 @@ def adapt_pw_index(
266
266
  source: str | PWIndex,
267
267
  species: str | Iterable[str] | None,
268
268
  outdir: str | None = None,
269
+ update_index: bool = False,
269
270
  ) -> PWIndex:
270
271
  """Adapts a pw_index
271
272
 
@@ -288,8 +289,9 @@ def adapt_pw_index(
288
289
  raise ValueError("'source' needs to be str or PWIndex")
289
290
  pw_index.filter(species=species)
290
291
 
291
- if outdir is not None:
292
+ if outdir is not None and update_index:
292
293
  with open_fs(outdir, create=True) as fs:
293
294
  with fs.open("pw_index.tsv", "w") as f:
294
295
  pw_index.index.to_csv(f, sep="\t")
296
+
295
297
  return pw_index
@@ -20,9 +20,9 @@ def remove_reciprocal_interactions(
20
20
 
21
21
  Args:
22
22
  edgelist (pd.DataFrame): edgelist (pd.DataFrame): edgelist where the first two
23
- columns are assumed to be the edge vertices
23
+ columns are assumed to be the edge vertices
24
24
  extra_defining_vars (list): list (which can be empty) of variables which define
25
- a unique interaction beyond the vertices
25
+ a unique interaction beyond the vertices
26
26
 
27
27
  Returns:
28
28
  indegenerate_edgelist (pd.DataFrame): edgelist with B-A edges removed and A-B retained
@@ -58,9 +58,9 @@ def count_fraction_of_reciprocal_interactions(
58
58
 
59
59
  Args:
60
60
  edgelist (pd.DataFrame): edgelist (pd.DataFrame): edgelist where the first two
61
- columns are assumed to be the edge vertices
61
+ columns are assumed to be the edge vertices
62
62
  extra_defining_vars (list): list (which can be empty) of variables which define
63
- a unique interaction beyond the vertices
63
+ a unique interaction beyond the vertices
64
64
 
65
65
  Returns:
66
66
  fraction (float): fraction of A-B edges which are also included as B-A edges