napistu 0.3.6__tar.gz → 0.3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. {napistu-0.3.6 → napistu-0.3.7}/PKG-INFO +1 -1
  2. {napistu-0.3.6 → napistu-0.3.7}/setup.cfg +1 -1
  3. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/__main__.py +20 -9
  4. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/consensus.py +19 -25
  5. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/constants.py +90 -64
  6. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/indices.py +3 -1
  7. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/sbml.py +298 -295
  8. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/string.py +14 -18
  9. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/trrust.py +22 -27
  10. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/species.py +1 -1
  11. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/genodexito.py +5 -1
  12. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/renaming.py +4 -0
  13. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/sbml_dfs_core.py +127 -64
  14. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/sbml_dfs_utils.py +4 -0
  15. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/utils.py +52 -41
  16. {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/PKG-INFO +1 -1
  17. {napistu-0.3.6 → napistu-0.3.7}/src/tests/conftest.py +70 -13
  18. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_consensus.py +74 -5
  19. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_gaps.py +26 -15
  20. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_net_create.py +1 -1
  21. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_precompute.py +1 -1
  22. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ontologies_renaming.py +28 -24
  23. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_sbml_dfs_core.py +165 -15
  24. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_utils.py +19 -0
  25. {napistu-0.3.6 → napistu-0.3.7}/LICENSE +0 -0
  26. {napistu-0.3.6 → napistu-0.3.7}/README.md +0 -0
  27. {napistu-0.3.6 → napistu-0.3.7}/pyproject.toml +0 -0
  28. {napistu-0.3.6 → napistu-0.3.7}/setup.py +0 -0
  29. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/__init__.py +0 -0
  30. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/context/__init__.py +0 -0
  31. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/context/discretize.py +0 -0
  32. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/context/filtering.py +0 -0
  33. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/gcs/__init__.py +0 -0
  34. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/gcs/constants.py +0 -0
  35. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/gcs/downloads.py +0 -0
  36. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/gcs/utils.py +0 -0
  37. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/identifiers.py +0 -0
  38. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/__init__.py +0 -0
  39. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/bigg.py +0 -0
  40. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/constants.py +0 -0
  41. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/gtex.py +0 -0
  42. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/hpa.py +0 -0
  43. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/identifiers_etl.py +0 -0
  44. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/napistu_edgelist.py +0 -0
  45. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/obo.py +0 -0
  46. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/psi_mi.py +0 -0
  47. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/reactome.py +0 -0
  48. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ingestion/yeast.py +0 -0
  49. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/__init__.py +0 -0
  50. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/constants.py +0 -0
  51. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/interactions.py +0 -0
  52. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/matching/mount.py +0 -0
  53. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/__init__.py +0 -0
  54. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/__main__.py +0 -0
  55. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/client.py +0 -0
  56. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/codebase.py +0 -0
  57. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/codebase_utils.py +0 -0
  58. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/component_base.py +0 -0
  59. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/config.py +0 -0
  60. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/constants.py +0 -0
  61. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/documentation.py +0 -0
  62. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/documentation_utils.py +0 -0
  63. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/execution.py +0 -0
  64. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/health.py +0 -0
  65. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/profiles.py +0 -0
  66. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/server.py +0 -0
  67. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/tutorials.py +0 -0
  68. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/tutorials_utils.py +0 -0
  69. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/mcp/utils.py +0 -0
  70. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/__init__.py +0 -0
  71. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/constants.py +0 -0
  72. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/curation.py +0 -0
  73. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/gaps.py +0 -0
  74. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/pathwayannot.py +0 -0
  75. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/modify/uncompartmentalize.py +0 -0
  76. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/__init__.py +0 -0
  77. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/constants.py +0 -0
  78. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/data_handling.py +0 -0
  79. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/ig_utils.py +0 -0
  80. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/napistu_graph_core.py +0 -0
  81. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/neighborhoods.py +0 -0
  82. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/net_create.py +0 -0
  83. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/net_propagation.py +0 -0
  84. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/ng_utils.py +0 -0
  85. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/paths.py +0 -0
  86. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/network/precompute.py +0 -0
  87. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/__init__.py +0 -0
  88. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/constants.py +0 -0
  89. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/dogma.py +0 -0
  90. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/ontologies/mygene.py +0 -0
  91. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/rpy2/__init__.py +0 -0
  92. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/rpy2/callr.py +0 -0
  93. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/rpy2/constants.py +0 -0
  94. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/rpy2/rids.py +0 -0
  95. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/scverse/__init__.py +0 -0
  96. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/scverse/constants.py +0 -0
  97. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/scverse/loading.py +0 -0
  98. {napistu-0.3.6 → napistu-0.3.7}/src/napistu/source.py +0 -0
  99. {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/SOURCES.txt +0 -0
  100. {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/dependency_links.txt +0 -0
  101. {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/entry_points.txt +0 -0
  102. {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/requires.txt +0 -0
  103. {napistu-0.3.6 → napistu-0.3.7}/src/napistu.egg-info/top_level.txt +0 -0
  104. {napistu-0.3.6 → napistu-0.3.7}/src/tests/__init__.py +0 -0
  105. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_constants.py +0 -0
  106. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_context_discretize.py +0 -0
  107. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_context_filtering.py +0 -0
  108. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_curation.py +0 -0
  109. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_data/__init__.py +0 -0
  110. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_gcs.py +0 -0
  111. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_identifiers.py +0 -0
  112. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_indices.py +0 -0
  113. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ingestion_napistu_edgelist.py +0 -0
  114. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ingestion_obo.py +0 -0
  115. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_matching_interactions.py +0 -0
  116. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_matching_mount.py +0 -0
  117. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_matching_species.py +0 -0
  118. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_mcp_config.py +0 -0
  119. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_mcp_documentation_utils.py +0 -0
  120. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_mcp_server.py +0 -0
  121. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_data_handling.py +0 -0
  122. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_ig_utils.py +0 -0
  123. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_neighborhoods.py +0 -0
  124. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_net_propagation.py +0 -0
  125. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_ng_utils.py +0 -0
  126. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_network_paths.py +0 -0
  127. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ontologies_genodexito.py +0 -0
  128. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_ontologies_mygene.py +0 -0
  129. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_pathwayannot.py +0 -0
  130. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_rpy2_callr.py +0 -0
  131. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_rpy2_init.py +0 -0
  132. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_sbml.py +0 -0
  133. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_sbml_dfs_utils.py +0 -0
  134. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_sbo.py +0 -0
  135. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_scverse_loading.py +0 -0
  136. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_set_coverage.py +0 -0
  137. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_source.py +0 -0
  138. {napistu-0.3.6 → napistu-0.3.7}/src/tests/test_uncompartmentalize.py +0 -0
  139. {napistu-0.3.6 → napistu-0.3.7}/src/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.3.6
3
+ Version: 0.3.7
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = napistu
3
- version = 0.3.6
3
+ version = 0.3.7
4
4
  description = Connecting high-dimensional data to curated pathways
5
5
  long_description = file: README.md
6
6
  long_description_content_type = text/markdown
@@ -12,7 +12,7 @@ import click_logging
12
12
  import napistu
13
13
  import igraph as ig
14
14
  import pandas as pd
15
- from napistu import consensus as cpr_consensus
15
+ from napistu import consensus as napistu_consensus
16
16
  from napistu import indices
17
17
  from napistu import sbml_dfs_core
18
18
  from napistu import utils
@@ -65,7 +65,7 @@ def ingestion():
65
65
  "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
66
66
  )
67
67
  @click_logging.simple_verbosity_option(logger)
68
- def load_reactome(base_folder: str, overwrite=True):
68
+ def ingest_reactome(base_folder: str, overwrite=True):
69
69
  logger.info("Start downloading Reactome to %s", base_folder)
70
70
  reactome.reactome_sbml_download(f"{base_folder}/sbml", overwrite=overwrite)
71
71
 
@@ -76,7 +76,7 @@ def load_reactome(base_folder: str, overwrite=True):
76
76
  "--overwrite", "-o", is_flag=True, default=False, help="Overwrite existing files?"
77
77
  )
78
78
  @click_logging.simple_verbosity_option(logger)
79
- def load_bigg(base_folder: str, overwrite: bool):
79
+ def ingest_bigg(base_folder: str, overwrite: bool):
80
80
  logger.info("Start downloading Bigg to %s", base_folder)
81
81
  bigg.bigg_sbml_download(base_folder, overwrite)
82
82
 
@@ -84,7 +84,7 @@ def load_bigg(base_folder: str, overwrite: bool):
84
84
  @ingestion.command(name="trrust")
85
85
  @click.argument("target_uri", type=str)
86
86
  @click_logging.simple_verbosity_option(logger)
87
- def load_ttrust(target_uri: str):
87
+ def ingest_ttrust(target_uri: str):
88
88
  logger.info("Start downloading TRRUST to %s", target_uri)
89
89
  trrust.download_trrust(target_uri)
90
90
 
@@ -98,7 +98,7 @@ def load_ttrust(target_uri: str):
98
98
  help="URL to download the zipped protein atlas subcellular localization tsv from.",
99
99
  )
100
100
  @click_logging.simple_verbosity_option(logger)
101
- def load_proteinatlas_subcell(target_uri: str, url: str):
101
+ def ingest_proteinatlas_subcell(target_uri: str, url: str):
102
102
  hpa.download_hpa_data(target_uri, url)
103
103
 
104
104
 
@@ -111,7 +111,7 @@ def load_proteinatlas_subcell(target_uri: str, url: str):
111
111
  help="URL to download the gtex file from.",
112
112
  )
113
113
  @click_logging.simple_verbosity_option(logger)
114
- def load_gtex_rnaseq(target_uri: str, url: str):
114
+ def ingest_gtex_rnaseq(target_uri: str, url: str):
115
115
  gtex.download_gtex_rnaseq(target_uri, url)
116
116
 
117
117
 
@@ -124,7 +124,7 @@ def load_gtex_rnaseq(target_uri: str, url: str):
124
124
  help="Species name (e.g., Homo sapiens).",
125
125
  )
126
126
  @click_logging.simple_verbosity_option(logger)
127
- def load_string_db(target_uri: str, species: str):
127
+ def ingest_string_db(target_uri: str, species: str):
128
128
  string.download_string(target_uri, species)
129
129
 
130
130
 
@@ -137,7 +137,7 @@ def load_string_db(target_uri: str, species: str):
137
137
  help="Species name (e.g., Homo sapiens).",
138
138
  )
139
139
  @click_logging.simple_verbosity_option(logger)
140
- def load_string_aliases(target_uri: str, species: str):
140
+ def ingest_string_aliases(target_uri: str, species: str):
141
141
  string.download_string_aliases(target_uri, species)
142
142
 
143
143
 
@@ -289,7 +289,7 @@ def create_consensus(
289
289
  )
290
290
  pw_index_df["species"] = "unknown"
291
291
  pw_index = indices.PWIndex(pw_index=pw_index_df, validate_paths=False)
292
- consensus_model = cpr_consensus.construct_consensus_model(
292
+ consensus_model = napistu_consensus.construct_consensus_model(
293
293
  sbml_dfs_dict, pw_index, dogmatic
294
294
  )
295
295
  utils.save_pickle(output_model_uri, consensus_model)
@@ -855,6 +855,17 @@ def copy_uri(input_uri, output_uri, is_file=True):
855
855
  utils.copy_uri(input_uri, output_uri, is_file=is_file)
856
856
 
857
857
 
858
+ @helpers.command(name="validate_sbml_dfs")
859
+ @click.argument("input_uri", type=str)
860
+ @click_logging.simple_verbosity_option(logger)
861
+ def validate_sbml_dfs(input_uri):
862
+ """Validate a sbml_dfs object"""
863
+ sbml_dfs = utils.load_pickle(input_uri)
864
+ sbml_dfs.validate()
865
+
866
+ logger.info(f"Successfully validated: {input_uri}")
867
+
868
+
858
869
  @click.group()
859
870
  def stats():
860
871
  """Various functions to calculate network statistics
@@ -15,10 +15,13 @@ from napistu import source
15
15
  from napistu import utils
16
16
  from napistu.ingestion import sbml
17
17
 
18
+ from napistu.constants import SCHEMA_DEFS
18
19
  from napistu.constants import SBML_DFS
20
+ from napistu.constants import SBML_DFS_SCHEMA
19
21
  from napistu.constants import IDENTIFIERS
20
22
  from napistu.constants import SOURCE_SPEC
21
23
  from napistu.constants import BQB_DEFINING_ATTRS
24
+ from napistu.constants import VALID_BQB_TERMS
22
25
 
23
26
  logger = logging.getLogger(__name__)
24
27
  # set the level to show logger.info message
@@ -137,8 +140,7 @@ def unnest_SBML_df(
137
140
  """
138
141
 
139
142
  # check that all sbml_dfs have the same schema
140
- _test_same_schema(sbml_dfs_dict)
141
- table_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[table]
143
+ table_schema = SBML_DFS_SCHEMA.SCHEMA[table]
142
144
 
143
145
  df_list = [
144
146
  getattr(sbml_dfs_dict[x], table).assign(model=x) for x in sbml_dfs_dict.keys()
@@ -192,7 +194,7 @@ def construct_meta_entities_identifiers(
192
194
  agg_tbl = unnest_SBML_df(sbml_dfs_dict, table=table)
193
195
 
194
196
  # since all sbml_dfs have the same schema pull out one schema for reference
195
- table_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[table]
197
+ table_schema = SBML_DFS_SCHEMA.SCHEMA[table]
196
198
 
197
199
  # update foreign keys using provided lookup tables
198
200
  if "fk" in table_schema.keys():
@@ -244,6 +246,8 @@ def reduce_to_consensus_ids(
244
246
  Series mapping the index of the aggregated entities to new consensus IDs.
245
247
  """
246
248
  # Step 1: Build consensus identifiers to create clusters of equivalent entities
249
+ table_name = table_schema[SCHEMA_DEFS.TABLE]
250
+ logger.debug(f"Building consensus identifiers for {table_name}")
247
251
  indexed_cluster, cluster_consensus_identifiers = build_consensus_identifiers(
248
252
  sbml_df, table_schema, defining_biological_qualifiers
249
253
  )
@@ -252,25 +256,28 @@ def reduce_to_consensus_ids(
252
256
  agg_table_harmonized = sbml_df.join(indexed_cluster)
253
257
 
254
258
  # Step 3: Create lookup table for entity IDs
259
+ logger.debug(f"Creating lookup table for {table_name}")
255
260
  lookup_table = _create_entity_lookup_table(agg_table_harmonized, table_schema)
256
261
 
257
262
  # Step 4: Add nameness scores to help select representative names
258
263
  agg_table_harmonized = utils._add_nameness_score_wrapper(
259
- agg_table_harmonized, "label", table_schema
264
+ agg_table_harmonized, SCHEMA_DEFS.LABEL, table_schema
260
265
  )
261
266
 
262
267
  # Step 5: Prepare the consensus table with one row per unique entity
268
+ logger.debug(f"Preparing consensus table for {table_name}")
263
269
  new_id_table = _prepare_consensus_table(
264
270
  agg_table_harmonized, table_schema, cluster_consensus_identifiers
265
271
  )
266
272
 
267
273
  # Step 6: Add source information if required
268
- if "source" in table_schema.keys():
274
+ if SCHEMA_DEFS.SOURCE in table_schema.keys():
269
275
  new_id_table = _add_consensus_sources(
270
276
  new_id_table, agg_table_harmonized, lookup_table, table_schema, pw_index
271
277
  )
272
278
 
273
279
  # Step 7: Validate the resulting table
280
+ logger.debug(f"Validating consensus table for {table_name}")
274
281
  _validate_consensus_table(new_id_table, sbml_df)
275
282
 
276
283
  return new_id_table, lookup_table
@@ -667,7 +674,7 @@ def construct_meta_entities_members(
667
674
  defined_by_schema = sbml_dfs_dict[list(sbml_dfs_dict.keys())[0]].schema[defined_by]
668
675
 
669
676
  # Step 2: Prepare the member table and validate its structure
670
- agg_tbl, defining_fk = _prepare_member_table(
677
+ agg_tbl, _ = _prepare_member_table(
671
678
  sbml_dfs_dict,
672
679
  defined_by,
673
680
  defined_lookup_tables,
@@ -681,9 +688,7 @@ def construct_meta_entities_members(
681
688
  membership_lookup = _create_membership_lookup(agg_tbl, table_schema)
682
689
 
683
690
  # Step 4: Create consensus entities and lookup table
684
- consensus_entities, lookup_table = _create_entity_consensus(
685
- membership_lookup, table_schema
686
- )
691
+ _, lookup_table = _create_entity_consensus(membership_lookup, table_schema)
687
692
 
688
693
  # Step 5: Log merger information
689
694
  report_consensus_merges(
@@ -1507,6 +1512,11 @@ def _filter_identifiers_by_qualifier(
1507
1512
  pd.DataFrame
1508
1513
  Filtered identifiers
1509
1514
  """
1515
+
1516
+ invalid_bqbs = set(meta_identifiers[IDENTIFIERS.BQB]) - set(VALID_BQB_TERMS)
1517
+ if len(invalid_bqbs) > 0:
1518
+ logger.warning(f"Invalid biological qualifiers: {invalid_bqbs}")
1519
+
1510
1520
  valid_identifiers = meta_identifiers.copy()
1511
1521
  return valid_identifiers[
1512
1522
  meta_identifiers[IDENTIFIERS.BQB].isin(defining_biological_qualifiers)
@@ -2034,22 +2044,6 @@ def _merge_entity_data_report_mismatches(
2034
2044
  return None
2035
2045
 
2036
2046
 
2037
- def _test_same_schema(sbml_dfs_dict: dict[str, sbml_dfs_core.SBML_dfs]) -> None:
2038
- """
2039
- Ensure that all sbml_dfs in the dict have the same schema
2040
- """
2041
-
2042
- if len(sbml_dfs_dict) != 0:
2043
- # extract all schemas
2044
- schema_list = [sbml_dfs_dict[x].schema for x in sbml_dfs_dict.keys()]
2045
- # if multiple entries are present then are they the same?
2046
- if len(sbml_dfs_dict) > 1:
2047
- if not all([x == schema_list[0] for x in schema_list]):
2048
- raise ValueError("sbml_df schemas were not identical")
2049
-
2050
- return None
2051
-
2052
-
2053
2047
  def _create_member_string(x: list[str]) -> str:
2054
2048
  x.sort()
2055
2049
  return "_".join(x)
@@ -55,28 +55,49 @@ SBML_DFS = SimpleNamespace(
55
55
  SBO_TERM="sbo_term",
56
56
  )
57
57
 
58
+ SCHEMA_DEFS = SimpleNamespace(
59
+ TABLE="table",
60
+ PK="pk",
61
+ FK="fk",
62
+ LABEL="label",
63
+ ID="id",
64
+ SOURCE="source",
65
+ VARS="vars",
66
+ )
67
+
58
68
  SBML_DFS_SCHEMA = SimpleNamespace(
59
69
  SCHEMA={
60
70
  SBML_DFS.COMPARTMENTS: {
61
- "pk": SBML_DFS.C_ID,
62
- "label": SBML_DFS.C_NAME,
63
- "id": SBML_DFS.C_IDENTIFIERS,
64
- "source": SBML_DFS.C_SOURCE,
65
- "vars": [SBML_DFS.C_NAME, SBML_DFS.C_IDENTIFIERS, SBML_DFS.C_SOURCE],
71
+ SCHEMA_DEFS.TABLE: SBML_DFS.COMPARTMENTS,
72
+ SCHEMA_DEFS.PK: SBML_DFS.C_ID,
73
+ SCHEMA_DEFS.LABEL: SBML_DFS.C_NAME,
74
+ SCHEMA_DEFS.ID: SBML_DFS.C_IDENTIFIERS,
75
+ SCHEMA_DEFS.SOURCE: SBML_DFS.C_SOURCE,
76
+ SCHEMA_DEFS.VARS: [
77
+ SBML_DFS.C_NAME,
78
+ SBML_DFS.C_IDENTIFIERS,
79
+ SBML_DFS.C_SOURCE,
80
+ ],
66
81
  },
67
82
  SBML_DFS.SPECIES: {
68
- "pk": SBML_DFS.S_ID,
69
- "label": SBML_DFS.S_NAME,
70
- "id": SBML_DFS.S_IDENTIFIERS,
71
- "source": SBML_DFS.S_SOURCE,
72
- "vars": [SBML_DFS.S_NAME, SBML_DFS.S_IDENTIFIERS, SBML_DFS.S_SOURCE],
83
+ SCHEMA_DEFS.TABLE: SBML_DFS.SPECIES,
84
+ SCHEMA_DEFS.PK: SBML_DFS.S_ID,
85
+ SCHEMA_DEFS.LABEL: SBML_DFS.S_NAME,
86
+ SCHEMA_DEFS.ID: SBML_DFS.S_IDENTIFIERS,
87
+ SCHEMA_DEFS.SOURCE: SBML_DFS.S_SOURCE,
88
+ SCHEMA_DEFS.VARS: [
89
+ SBML_DFS.S_NAME,
90
+ SBML_DFS.S_IDENTIFIERS,
91
+ SBML_DFS.S_SOURCE,
92
+ ],
73
93
  },
74
94
  SBML_DFS.COMPARTMENTALIZED_SPECIES: {
75
- "pk": SBML_DFS.SC_ID,
76
- "label": SBML_DFS.SC_NAME,
77
- "fk": [SBML_DFS.S_ID, SBML_DFS.C_ID],
78
- "source": SBML_DFS.SC_SOURCE,
79
- "vars": [
95
+ SCHEMA_DEFS.TABLE: SBML_DFS.COMPARTMENTALIZED_SPECIES,
96
+ SCHEMA_DEFS.PK: SBML_DFS.SC_ID,
97
+ SCHEMA_DEFS.LABEL: SBML_DFS.SC_NAME,
98
+ SCHEMA_DEFS.FK: [SBML_DFS.S_ID, SBML_DFS.C_ID],
99
+ SCHEMA_DEFS.SOURCE: SBML_DFS.SC_SOURCE,
100
+ SCHEMA_DEFS.VARS: [
80
101
  SBML_DFS.SC_NAME,
81
102
  SBML_DFS.S_ID,
82
103
  SBML_DFS.C_ID,
@@ -84,11 +105,12 @@ SBML_DFS_SCHEMA = SimpleNamespace(
84
105
  ],
85
106
  },
86
107
  SBML_DFS.REACTIONS: {
87
- "pk": SBML_DFS.R_ID,
88
- "label": SBML_DFS.R_NAME,
89
- "id": SBML_DFS.R_IDENTIFIERS,
90
- "source": SBML_DFS.R_SOURCE,
91
- "vars": [
108
+ SCHEMA_DEFS.TABLE: SBML_DFS.REACTIONS,
109
+ SCHEMA_DEFS.PK: SBML_DFS.R_ID,
110
+ SCHEMA_DEFS.LABEL: SBML_DFS.R_NAME,
111
+ SCHEMA_DEFS.ID: SBML_DFS.R_IDENTIFIERS,
112
+ SCHEMA_DEFS.SOURCE: SBML_DFS.R_SOURCE,
113
+ SCHEMA_DEFS.VARS: [
92
114
  SBML_DFS.R_NAME,
93
115
  SBML_DFS.R_IDENTIFIERS,
94
116
  SBML_DFS.R_SOURCE,
@@ -96,9 +118,10 @@ SBML_DFS_SCHEMA = SimpleNamespace(
96
118
  ],
97
119
  },
98
120
  SBML_DFS.REACTION_SPECIES: {
99
- "pk": SBML_DFS.RSC_ID,
100
- "fk": [SBML_DFS.R_ID, SBML_DFS.SC_ID],
101
- "vars": [
121
+ SCHEMA_DEFS.TABLE: SBML_DFS.REACTION_SPECIES,
122
+ SCHEMA_DEFS.PK: SBML_DFS.RSC_ID,
123
+ SCHEMA_DEFS.FK: [SBML_DFS.R_ID, SBML_DFS.SC_ID],
124
+ SCHEMA_DEFS.VARS: [
102
125
  SBML_DFS.R_ID,
103
126
  SBML_DFS.SC_ID,
104
127
  SBML_DFS.STOICHIOMETRY,
@@ -129,10 +152,10 @@ ENTITIES_TO_ENTITY_DATA = {
129
152
  REQUIRED_REACTION_FROMEDGELIST_COLUMNS = [
130
153
  "sc_id_up",
131
154
  "sc_id_down",
132
- "sbo_term",
133
- "r_name",
134
- "r_Identifiers",
135
- "r_isreversible",
155
+ SBML_DFS.SBO_TERM,
156
+ SBML_DFS.R_NAME,
157
+ SBML_DFS.R_IDENTIFIERS,
158
+ SBML_DFS.R_ISREVERSIBLE,
136
159
  ]
137
160
 
138
161
  NAPISTU_STANDARD_OUTPUTS = SimpleNamespace(
@@ -155,20 +178,6 @@ INTERACTION_EDGELIST_EXPECTED_VARS = {
155
178
  SBML_DFS.R_ISREVERSIBLE,
156
179
  }
157
180
 
158
- BQB_PRIORITIES = pd.DataFrame(
159
- [{"bqb": "BQB_IS", "bqb_rank": 1}, {"bqb": "BQB_HAS_PART", "bqb_rank": 2}]
160
- )
161
-
162
- ONTOLOGY_PRIORITIES = pd.DataFrame(
163
- [
164
- {"ontology": "reactome", "ontology_rank": 1},
165
- {"ontology": "ensembl_gene", "ontology_rank": 2},
166
- {"ontology": "chebi", "ontology_rank": 3},
167
- {"ontology": "uniprot", "ontology_rank": 4},
168
- {"ontology": "go", "ontology_rank": 5},
169
- ]
170
- )
171
-
172
181
  # SBML
173
182
  # Biological qualifiers
174
183
  # Biomodels qualifiers
@@ -189,16 +198,33 @@ BQB = SimpleNamespace(
189
198
  UNKNOWN="BQB_UNKNOWN",
190
199
  )
191
200
 
201
+ VALID_BQB_TERMS = [
202
+ BQB.IS,
203
+ BQB.HAS_PART,
204
+ BQB.IS_PART_OF,
205
+ BQB.IS_VERSION_OF,
206
+ BQB.HAS_VERSION,
207
+ BQB.IS_HOMOLOG_TO,
208
+ BQB.IS_DESCRIBED_BY,
209
+ BQB.IS_ENCODED_BY,
210
+ BQB.ENCODES,
211
+ BQB.OCCURS_IN,
212
+ BQB.HAS_PROPERTY,
213
+ BQB.IS_PROPERTY_OF,
214
+ BQB.HAS_TAXON,
215
+ BQB.UNKNOWN,
216
+ ]
217
+
192
218
  # molecules are distinctly defined by these BQB terms
193
- BQB_DEFINING_ATTRS = ["BQB_IS", "IS_HOMOLOG_TO"]
219
+ BQB_DEFINING_ATTRS = [BQB.IS, BQB.IS_HOMOLOG_TO]
194
220
 
195
221
  # a looser convention which will aggregate genes, transcripts, and proteins
196
222
  # if they are linked with the appropriate bioqualifiers
197
223
  BQB_DEFINING_ATTRS_LOOSE = [
198
- "BQB_IS",
199
- "IS_HOMOLOG_TO",
200
- "BQB_IS_ENCODED_BY",
201
- "BQB_ENCODES",
224
+ BQB.IS,
225
+ BQB.IS_HOMOLOG_TO,
226
+ BQB.IS_ENCODED_BY,
227
+ BQB.ENCODES,
202
228
  ]
203
229
 
204
230
  # identifiers
@@ -206,6 +232,13 @@ IDENTIFIERS = SimpleNamespace(
206
232
  ONTOLOGY="ontology", IDENTIFIER="identifier", BQB="bqb", URL="url"
207
233
  )
208
234
 
235
+ BQB_PRIORITIES = pd.DataFrame(
236
+ [
237
+ {IDENTIFIERS.BQB: BQB.IS, "bqb_rank": 1},
238
+ {IDENTIFIERS.BQB: BQB.HAS_PART, "bqb_rank": 2},
239
+ ]
240
+ )
241
+
209
242
  IDENTIFIERS_REQUIRED_VARS = {
210
243
  IDENTIFIERS.ONTOLOGY,
211
244
  IDENTIFIERS.IDENTIFIER,
@@ -217,26 +250,9 @@ SPECIES_IDENTIFIERS_REQUIRED_VARS = IDENTIFIERS_REQUIRED_VARS | {
217
250
  SBML_DFS.S_NAME,
218
251
  }
219
252
 
220
- BIOLOGICAL_QUALIFIERS = [
221
- "BQB_IS",
222
- "BQB_HAS_PART",
223
- "BQB_IS_PART_OF",
224
- "BQB_IS_VERSION_OF",
225
- "BQB_HAS_VERSION",
226
- "BQB_IS_HOMOLOG_TO",
227
- "BQB_IS_DESCRIBED_BY",
228
- "BQB_IS_ENCODED_BY",
229
- "BQB_ENCODES",
230
- "BQB_OCCURS_IN",
231
- "BQB_HAS_PROPERTY",
232
- "BQB_IS_PROPERTY_OF",
233
- "BQB_HAS_TAXON",
234
- "BQB_UNKNOWN",
235
- ]
236
-
237
253
 
238
254
  def get_biological_qualifier_codes():
239
- bio_qualifier_codes = {getattr(libsbml, bqb): bqb for bqb in BIOLOGICAL_QUALIFIERS}
255
+ bio_qualifier_codes = {getattr(libsbml, bqb): bqb for bqb in VALID_BQB_TERMS}
240
256
 
241
257
  return bio_qualifier_codes
242
258
 
@@ -409,6 +425,16 @@ ONTOLOGY_SPECIES_ALIASES = {
409
425
  ONTOLOGIES.UNIPROT: {"Uniprot"},
410
426
  }
411
427
 
428
+ ONTOLOGY_PRIORITIES = pd.DataFrame(
429
+ [
430
+ {"ontology": ONTOLOGIES.REACTOME, "ontology_rank": 1},
431
+ {"ontology": ONTOLOGIES.ENSEMBL_GENE, "ontology_rank": 2},
432
+ {"ontology": ONTOLOGIES.CHEBI, "ontology_rank": 3},
433
+ {"ontology": ONTOLOGIES.UNIPROT, "ontology_rank": 4},
434
+ {"ontology": ONTOLOGIES.GO, "ontology_rank": 5},
435
+ ]
436
+ )
437
+
412
438
  ENSEMBL_MOLECULE_TYPES_TO_ONTOLOGY = {
413
439
  "G": ONTOLOGIES.ENSEMBL_GENE,
414
440
  "T": ONTOLOGIES.ENSEMBL_TRANSCRIPT,
@@ -266,6 +266,7 @@ def adapt_pw_index(
266
266
  source: str | PWIndex,
267
267
  species: str | Iterable[str] | None,
268
268
  outdir: str | None = None,
269
+ update_index: bool = False,
269
270
  ) -> PWIndex:
270
271
  """Adapts a pw_index
271
272
 
@@ -288,8 +289,9 @@ def adapt_pw_index(
288
289
  raise ValueError("'source' needs to be str or PWIndex")
289
290
  pw_index.filter(species=species)
290
291
 
291
- if outdir is not None:
292
+ if outdir is not None and update_index:
292
293
  with open_fs(outdir, create=True) as fs:
293
294
  with fs.open("pw_index.tsv", "w") as f:
294
295
  pw_index.index.to_csv(f, sep="\t")
296
+
295
297
  return pw_index