napistu 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. {napistu-0.4.0/src/napistu.egg-info → napistu-0.4.1}/PKG-INFO +6 -1
  2. {napistu-0.4.0 → napistu-0.4.1}/README.md +5 -0
  3. {napistu-0.4.0 → napistu-0.4.1}/setup.cfg +1 -1
  4. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/constants.py +2 -0
  5. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/gcs/constants.py +11 -11
  6. napistu-0.4.1/src/napistu/ontologies/id_tables.py +282 -0
  7. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/sbml_dfs_core.py +53 -63
  8. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/sbml_dfs_utils.py +82 -18
  9. {napistu-0.4.0 → napistu-0.4.1/src/napistu.egg-info}/PKG-INFO +6 -1
  10. {napistu-0.4.0 → napistu-0.4.1}/src/napistu.egg-info/SOURCES.txt +2 -0
  11. napistu-0.4.1/src/tests/test_ontologies_id_tables.py +198 -0
  12. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_sbml_dfs_core.py +30 -19
  13. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_sbml_dfs_utils.py +70 -0
  14. {napistu-0.4.0 → napistu-0.4.1}/LICENSE +0 -0
  15. {napistu-0.4.0 → napistu-0.4.1}/pyproject.toml +0 -0
  16. {napistu-0.4.0 → napistu-0.4.1}/setup.py +0 -0
  17. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/__init__.py +0 -0
  18. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/__main__.py +0 -0
  19. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/consensus.py +0 -0
  20. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/context/__init__.py +0 -0
  21. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/context/discretize.py +0 -0
  22. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/context/filtering.py +0 -0
  23. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/gcs/__init__.py +0 -0
  24. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/gcs/downloads.py +0 -0
  25. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/gcs/utils.py +0 -0
  26. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/identifiers.py +0 -0
  27. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/indices.py +0 -0
  28. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/__init__.py +0 -0
  29. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/bigg.py +0 -0
  30. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/constants.py +0 -0
  31. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/gtex.py +0 -0
  32. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/hpa.py +0 -0
  33. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/identifiers_etl.py +0 -0
  34. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/napistu_edgelist.py +0 -0
  35. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/obo.py +0 -0
  36. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/psi_mi.py +0 -0
  37. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/reactome.py +0 -0
  38. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/sbml.py +0 -0
  39. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/string.py +0 -0
  40. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/trrust.py +0 -0
  41. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ingestion/yeast.py +0 -0
  42. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/matching/__init__.py +0 -0
  43. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/matching/constants.py +0 -0
  44. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/matching/interactions.py +0 -0
  45. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/matching/mount.py +0 -0
  46. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/matching/species.py +0 -0
  47. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/__init__.py +0 -0
  48. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/__main__.py +0 -0
  49. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/client.py +0 -0
  50. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/codebase.py +0 -0
  51. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/codebase_utils.py +0 -0
  52. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/component_base.py +0 -0
  53. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/config.py +0 -0
  54. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/constants.py +0 -0
  55. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/documentation.py +0 -0
  56. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/documentation_utils.py +0 -0
  57. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/execution.py +0 -0
  58. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/health.py +0 -0
  59. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/profiles.py +0 -0
  60. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/server.py +0 -0
  61. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/tutorials.py +0 -0
  62. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/tutorials_utils.py +0 -0
  63. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/mcp/utils.py +0 -0
  64. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/modify/__init__.py +0 -0
  65. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/modify/constants.py +0 -0
  66. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/modify/curation.py +0 -0
  67. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/modify/gaps.py +0 -0
  68. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/modify/pathwayannot.py +0 -0
  69. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/modify/uncompartmentalize.py +0 -0
  70. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/__init__.py +0 -0
  71. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/constants.py +0 -0
  72. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/data_handling.py +0 -0
  73. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/ig_utils.py +0 -0
  74. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/neighborhoods.py +0 -0
  75. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/net_create.py +0 -0
  76. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/net_create_utils.py +0 -0
  77. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/net_propagation.py +0 -0
  78. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/ng_core.py +0 -0
  79. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/ng_utils.py +0 -0
  80. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/paths.py +0 -0
  81. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/network/precompute.py +0 -0
  82. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ontologies/__init__.py +0 -0
  83. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ontologies/constants.py +0 -0
  84. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ontologies/dogma.py +0 -0
  85. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ontologies/genodexito.py +0 -0
  86. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ontologies/mygene.py +0 -0
  87. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/ontologies/renaming.py +0 -0
  88. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/rpy2/__init__.py +0 -0
  89. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/rpy2/callr.py +0 -0
  90. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/rpy2/constants.py +0 -0
  91. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/rpy2/rids.py +0 -0
  92. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/scverse/__init__.py +0 -0
  93. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/scverse/constants.py +0 -0
  94. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/scverse/loading.py +0 -0
  95. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/source.py +0 -0
  96. {napistu-0.4.0 → napistu-0.4.1}/src/napistu/utils.py +0 -0
  97. {napistu-0.4.0 → napistu-0.4.1}/src/napistu.egg-info/dependency_links.txt +0 -0
  98. {napistu-0.4.0 → napistu-0.4.1}/src/napistu.egg-info/entry_points.txt +0 -0
  99. {napistu-0.4.0 → napistu-0.4.1}/src/napistu.egg-info/requires.txt +0 -0
  100. {napistu-0.4.0 → napistu-0.4.1}/src/napistu.egg-info/top_level.txt +0 -0
  101. {napistu-0.4.0 → napistu-0.4.1}/src/tests/__init__.py +0 -0
  102. {napistu-0.4.0 → napistu-0.4.1}/src/tests/conftest.py +0 -0
  103. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_consensus.py +0 -0
  104. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_constants.py +0 -0
  105. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_context_discretize.py +0 -0
  106. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_context_filtering.py +0 -0
  107. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_curation.py +0 -0
  108. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_data/__init__.py +0 -0
  109. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_gaps.py +0 -0
  110. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_gcs.py +0 -0
  111. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_identifiers.py +0 -0
  112. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_indices.py +0 -0
  113. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_ingestion_napistu_edgelist.py +0 -0
  114. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_ingestion_obo.py +0 -0
  115. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_matching_interactions.py +0 -0
  116. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_matching_mount.py +0 -0
  117. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_matching_species.py +0 -0
  118. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_mcp_config.py +0 -0
  119. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_mcp_documentation_utils.py +0 -0
  120. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_mcp_server.py +0 -0
  121. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_data_handling.py +0 -0
  122. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_ig_utils.py +0 -0
  123. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_neighborhoods.py +0 -0
  124. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_net_create.py +0 -0
  125. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_net_create_utils.py +0 -0
  126. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_net_propagation.py +0 -0
  127. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_ng_core.py +0 -0
  128. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_ng_utils.py +0 -0
  129. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_paths.py +0 -0
  130. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_network_precompute.py +0 -0
  131. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_ontologies_genodexito.py +0 -0
  132. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_ontologies_mygene.py +0 -0
  133. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_ontologies_renaming.py +0 -0
  134. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_pathwayannot.py +0 -0
  135. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_rpy2_callr.py +0 -0
  136. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_rpy2_init.py +0 -0
  137. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_sbml.py +0 -0
  138. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_sbo.py +0 -0
  139. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_scverse_loading.py +0 -0
  140. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_set_coverage.py +0 -0
  141. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_source.py +0 -0
  142. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_uncompartmentalize.py +0 -0
  143. {napistu-0.4.0 → napistu-0.4.1}/src/tests/test_utils.py +0 -0
  144. {napistu-0.4.0 → napistu-0.4.1}/src/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -61,7 +61,12 @@ Dynamic: license-file
61
61
 
62
62
  # Napistu Python Library
63
63
 
64
+ [![PyPI version](https://badge.fury.io/py/napistu.svg)](https://badge.fury.io/py/napistu)
64
65
  [![Documentation Status](https://readthedocs.org/projects/napistu/badge/?version=latest)](https://napistu.readthedocs.io/en/latest/?badge=latest)
66
+ [![CI](https://github.com/napistu/napistu-py/actions/workflows/ci.yml/badge.svg)](https://github.com/napistu/napistu-py/actions/workflows/ci.yml)
67
+ [![Release](https://github.com/napistu/napistu-py/actions/workflows/release.yml/badge.svg)](https://github.com/napistu/napistu-py/actions/workflows/release.yml)
68
+ [![Deploy to Cloud Run](https://github.com/napistu/napistu-py/actions/workflows/deploy.yml/badge.svg)](https://github.com/napistu/napistu-py/actions/workflows/deploy.yml)
69
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
65
70
 
66
71
  This Python package hosts the majority of the algorithmic code for the [Napistu project](https://github.com/napistu/napistu).
67
72
 
@@ -1,6 +1,11 @@
1
1
  # Napistu Python Library
2
2
 
3
+ [![PyPI version](https://badge.fury.io/py/napistu.svg)](https://badge.fury.io/py/napistu)
3
4
  [![Documentation Status](https://readthedocs.org/projects/napistu/badge/?version=latest)](https://napistu.readthedocs.io/en/latest/?badge=latest)
5
+ [![CI](https://github.com/napistu/napistu-py/actions/workflows/ci.yml/badge.svg)](https://github.com/napistu/napistu-py/actions/workflows/ci.yml)
6
+ [![Release](https://github.com/napistu/napistu-py/actions/workflows/release.yml/badge.svg)](https://github.com/napistu/napistu-py/actions/workflows/release.yml)
7
+ [![Deploy to Cloud Run](https://github.com/napistu/napistu-py/actions/workflows/deploy.yml/badge.svg)](https://github.com/napistu/napistu-py/actions/workflows/deploy.yml)
8
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
4
9
 
5
10
  This Python package hosts the majority of the algorithmic code for the [Napistu project](https://github.com/napistu/napistu).
6
11
 
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = napistu
3
- version = 0.4.0
3
+ version = 0.4.1
4
4
  description = Connecting high-dimensional data to curated pathways
5
5
  long_description = file: README.md
6
6
  long_description_content_type = text/markdown
@@ -402,12 +402,14 @@ ONTOLOGIES = SimpleNamespace(
402
402
  ENSEMBL_PROTEIN_VERSION="ensembl_protein_version",
403
403
  GENE_NAME="gene_name",
404
404
  GO="go",
405
+ KEGG="kegg",
405
406
  MIRBASE="mirbase",
406
407
  NCBI_ENTREZ_GENE="ncbi_entrez_gene",
407
408
  PHAROS="pharos",
408
409
  REACTOME="reactome",
409
410
  SYMBOL="symbol",
410
411
  UNIPROT="uniprot",
412
+ WIKIPATHWAYS="wikipathways",
411
413
  )
412
414
 
413
415
  ONTOLOGIES_LIST = list(ONTOLOGIES.__dict__.values())
@@ -5,17 +5,17 @@ from types import SimpleNamespace
5
5
 
6
6
  GCS_SUBASSET_NAMES = SimpleNamespace(
7
7
  SBML_DFS="sbml_dfs",
8
- IDENTIFIERS="identifiers",
9
- REGULATORY_GRAPH="regulatory_graph",
8
+ NAPISTU_GRAPH="napistu_graph",
9
+ SPECIES_IDENTIFIERS="species_identifiers",
10
10
  REGULATORY_DISTANCES="regulatory_distances",
11
11
  )
12
12
 
13
13
 
14
14
  GCS_FILETYPES = SimpleNamespace(
15
15
  SBML_DFS="sbml_dfs.pkl",
16
- IDENTIFIERS="identifiers.tsv",
17
- REGULATORY_GRAPH="regulatory_graph.pkl",
18
- REGULATORY_DISTANCES="regulatory_distances.json",
16
+ NAPISTU_GRAPH="napistu_graph.pkl",
17
+ SPECIES_IDENTIFIERS="species_identifiers.tsv",
18
+ REGULATORY_DISTANCES="regulatory_distances.parquet",
19
19
  )
20
20
 
21
21
 
@@ -27,8 +27,8 @@ GCS_ASSETS = SimpleNamespace(
27
27
  "file": "test_pathway.tar.gz",
28
28
  "subassets": {
29
29
  GCS_SUBASSET_NAMES.SBML_DFS: GCS_FILETYPES.SBML_DFS,
30
- GCS_SUBASSET_NAMES.IDENTIFIERS: GCS_FILETYPES.IDENTIFIERS,
31
- GCS_SUBASSET_NAMES.REGULATORY_GRAPH: GCS_FILETYPES.REGULATORY_GRAPH,
30
+ GCS_SUBASSET_NAMES.NAPISTU_GRAPH: GCS_FILETYPES.NAPISTU_GRAPH,
31
+ GCS_SUBASSET_NAMES.SPECIES_IDENTIFIERS: GCS_FILETYPES.SPECIES_IDENTIFIERS,
32
32
  GCS_SUBASSET_NAMES.REGULATORY_DISTANCES: GCS_FILETYPES.REGULATORY_DISTANCES,
33
33
  },
34
34
  "public_url": "https://storage.googleapis.com/shackett-napistu-public/test_pathway.tar.gz",
@@ -37,8 +37,8 @@ GCS_ASSETS = SimpleNamespace(
37
37
  "file": "human_consensus.tar.gz",
38
38
  "subassets": {
39
39
  GCS_SUBASSET_NAMES.SBML_DFS: GCS_FILETYPES.SBML_DFS,
40
- GCS_SUBASSET_NAMES.IDENTIFIERS: GCS_FILETYPES.IDENTIFIERS,
41
- GCS_SUBASSET_NAMES.REGULATORY_GRAPH: GCS_FILETYPES.REGULATORY_GRAPH,
40
+ GCS_SUBASSET_NAMES.NAPISTU_GRAPH: GCS_FILETYPES.NAPISTU_GRAPH,
41
+ GCS_SUBASSET_NAMES.SPECIES_IDENTIFIERS: GCS_FILETYPES.SPECIES_IDENTIFIERS,
42
42
  },
43
43
  "public_url": "https://storage.googleapis.com/shackett-napistu-public/human_consensus.tar.gz",
44
44
  },
@@ -46,8 +46,8 @@ GCS_ASSETS = SimpleNamespace(
46
46
  "file": "human_consensus_w_distances.tar.gz",
47
47
  "subassets": {
48
48
  GCS_SUBASSET_NAMES.SBML_DFS: GCS_FILETYPES.SBML_DFS,
49
- GCS_SUBASSET_NAMES.IDENTIFIERS: GCS_FILETYPES.IDENTIFIERS,
50
- GCS_SUBASSET_NAMES.REGULATORY_GRAPH: GCS_FILETYPES.REGULATORY_GRAPH,
49
+ GCS_SUBASSET_NAMES.NAPISTU_GRAPH: GCS_FILETYPES.NAPISTU_GRAPH,
50
+ GCS_SUBASSET_NAMES.SPECIES_IDENTIFIERS: GCS_FILETYPES.SPECIES_IDENTIFIERS,
51
51
  GCS_SUBASSET_NAMES.REGULATORY_DISTANCES: GCS_FILETYPES.REGULATORY_DISTANCES,
52
52
  },
53
53
  "public_url": "https://storage.googleapis.com/calico-cpr-public/human_consensus_w_distances.tar.gz",
@@ -0,0 +1,282 @@
1
+ import logging
2
+ from typing import Optional, Union, Set
3
+
4
+ import pandas as pd
5
+
6
+ from napistu import sbml_dfs_utils
7
+ from napistu.constants import (
8
+ BQB,
9
+ BQB_DEFINING_ATTRS_LOOSE,
10
+ IDENTIFIERS,
11
+ SBML_DFS_SCHEMA,
12
+ SCHEMA_DEFS,
13
+ VALID_BQB_TERMS,
14
+ )
15
+ from napistu import utils
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def filter_id_table(
21
+ id_table: pd.DataFrame,
22
+ identifiers: Optional[Union[str, list, set]] = None,
23
+ ontologies: Optional[Union[str, list, set]] = None,
24
+ bqbs: Optional[Union[str, list, set]] = BQB_DEFINING_ATTRS_LOOSE + [BQB.HAS_PART],
25
+ ) -> pd.DataFrame:
26
+ """
27
+ Filter an identifier table by identifiers, ontologies, and BQB terms for a given entity type.
28
+
29
+ Parameters
30
+ ----------
31
+ id_table : pd.DataFrame
32
+ DataFrame containing identifier mappings to be filtered.
33
+ identifiers : str, list, set, or None, optional
34
+ Identifiers to filter by. If None, no filtering is applied on identifiers.
35
+ ontologies : str, list, set, or None, optional
36
+ Ontologies to filter by. If None, no filtering is applied on ontologies.
37
+ bqbs : str, list, set, or None, optional
38
+ BQB terms to filter by. If None, no filtering is applied on BQB terms. Default is [BQB.IS, BQB.HAS_PART].
39
+
40
+ Returns
41
+ -------
42
+ pd.DataFrame
43
+ Filtered DataFrame containing only rows matching the specified criteria.
44
+
45
+ Raises
46
+ ------
47
+ ValueError
48
+ If the id_table or filter values are invalid, or required columns are missing.
49
+ """
50
+
51
+ entity_type = sbml_dfs_utils.infer_entity_type(id_table)
52
+ _validate_id_table(id_table, entity_type)
53
+
54
+ # bqbs
55
+ if bqbs is not None:
56
+ bqbs = _sanitize_id_table_bqbs(bqbs, id_table)
57
+ id_table = id_table.query("bqb in @bqbs")
58
+
59
+ # ontologies
60
+ if ontologies is not None:
61
+ ontologies = _sanitize_id_table_ontologies(ontologies, id_table)
62
+ id_table = id_table.query("ontology in @ontologies")
63
+
64
+ # identifiers
65
+ if identifiers is not None:
66
+ identifiers = _sanitize_id_table_identifiers(identifiers, id_table)
67
+ id_table = id_table.query("identifier in @identifiers")
68
+
69
+ # return the filtered id_table
70
+ return id_table
71
+
72
+
73
+ def _validate_id_table(id_table: pd.DataFrame, entity_type: str) -> None:
74
+ """
75
+ Validate that the id_table contains the required columns and matches the schema for the given entity_type.
76
+
77
+ Parameters
78
+ ----------
79
+ id_table : pd.DataFrame
80
+ DataFrame containing identifier mappings for a given entity type.
81
+ entity_type : str
82
+ The type of entity (e.g., 'species', 'reactions') to validate against the schema.
83
+
84
+ Returns
85
+ -------
86
+ None
87
+
88
+ Raises
89
+ ------
90
+ ValueError
91
+ If entity_type is not present in the schema, or if required columns are missing in id_table.
92
+ """
93
+
94
+ schema = SBML_DFS_SCHEMA.SCHEMA
95
+
96
+ if entity_type not in schema.keys():
97
+ raise ValueError(
98
+ f"{entity_type} does not match a table in the SBML_dfs object. The tables "
99
+ f"which are present are {', '.join(schema.keys())}"
100
+ )
101
+
102
+ entity_table_attrs = schema[entity_type]
103
+
104
+ if SCHEMA_DEFS.ID not in entity_table_attrs.keys():
105
+ raise ValueError(f"{entity_type} does not have an 'id' attribute")
106
+
107
+ entity_pk = entity_table_attrs[SCHEMA_DEFS.PK]
108
+
109
+ utils.match_pd_vars(
110
+ id_table,
111
+ req_vars={
112
+ entity_pk,
113
+ IDENTIFIERS.ONTOLOGY,
114
+ IDENTIFIERS.IDENTIFIER,
115
+ IDENTIFIERS.URL,
116
+ IDENTIFIERS.BQB,
117
+ },
118
+ allow_series=False,
119
+ ).assert_present()
120
+
121
+ return None
122
+
123
+
124
+ def _sanitize_id_table_values(
125
+ values: Union[str, list, set],
126
+ id_table: pd.DataFrame,
127
+ column_name: str,
128
+ valid_values: Optional[Set[str]] = None,
129
+ value_type_name: str = None,
130
+ ) -> set:
131
+ """
132
+ Generic function to sanitize and validate values against an id_table column.
133
+
134
+ Parameters
135
+ ----------
136
+ values : str, list, or set
137
+ Values to sanitize and validate. Can be a single string, list of strings,
138
+ or set of strings.
139
+ id_table : pd.DataFrame
140
+ DataFrame containing the reference data to validate against.
141
+ column_name : str
142
+ Name of the column in id_table to check values against.
143
+ valid_values : set of str, optional
144
+ Optional set of globally valid values for additional validation
145
+ (e.g., VALID_BQB_TERMS). If provided, values must be a subset of this set.
146
+ value_type_name : str, optional
147
+ Human-readable name for the value type used in error messages.
148
+ If None, defaults to column_name.
149
+
150
+ Returns
151
+ -------
152
+ set
153
+ Set of sanitized and validated values.
154
+
155
+ Raises
156
+ ------
157
+ ValueError
158
+ If values is not a string, list, or set.
159
+ If any values are not in valid_values (when provided).
160
+ If none of the requested values are present in the id_table.
161
+
162
+ Warnings
163
+ --------
164
+ Logs a warning if some (but not all) requested values are missing from id_table.
165
+ """
166
+ if value_type_name is None:
167
+ value_type_name = column_name
168
+
169
+ # Convert to set
170
+ if isinstance(values, str):
171
+ values = {values}
172
+ elif isinstance(values, list):
173
+ values = set(values)
174
+ elif isinstance(values, set):
175
+ pass
176
+ else:
177
+ raise ValueError(
178
+ f"{value_type_name} must be a string, a set, or list, got {type(values).__name__}"
179
+ )
180
+
181
+ # Check against global valid values if provided
182
+ if valid_values is not None:
183
+ invalid_values = values.difference(valid_values)
184
+ if len(invalid_values) > 0:
185
+ raise ValueError(
186
+ f"The following {value_type_name} are not valid: {', '.join(invalid_values)}.\n"
187
+ f"Valid {value_type_name} are {', '.join(valid_values)}"
188
+ )
189
+
190
+ # Check against values present in the id_table
191
+ available_values = set(id_table[column_name].unique())
192
+ missing_values = values.difference(available_values)
193
+
194
+ if len(missing_values) == len(values):
195
+ raise ValueError(
196
+ f"None of the requested {value_type_name} are present in the id_table: {', '.join(missing_values)}.\n"
197
+ f"The included {value_type_name} are {', '.join(available_values)}"
198
+ )
199
+ elif len(missing_values) > 0:
200
+ logger.warning(
201
+ f"The following {value_type_name} are not present in the id_table: {', '.join(missing_values)}.\n"
202
+ f"The included {value_type_name} are {', '.join(available_values)}"
203
+ )
204
+
205
+ return values
206
+
207
+
208
+ def _sanitize_id_table_ontologies(
209
+ ontologies: Union[str, list, set], id_table: pd.DataFrame
210
+ ) -> set:
211
+ """
212
+ Sanitize and validate ontologies against the id_table.
213
+
214
+ Parameters
215
+ ----------
216
+ ontologies : str, list, or set
217
+ Ontology names to validate.
218
+ id_table : pd.DataFrame
219
+ DataFrame containing ontology reference data.
220
+
221
+ Returns
222
+ -------
223
+ set
224
+ Set of validated ontology names.
225
+ """
226
+ return _sanitize_id_table_values(
227
+ values=ontologies,
228
+ id_table=id_table,
229
+ column_name=IDENTIFIERS.ONTOLOGY,
230
+ value_type_name="ontologies",
231
+ )
232
+
233
+
234
+ def _sanitize_id_table_bqbs(bqbs: Union[str, list, set], id_table: pd.DataFrame) -> set:
235
+ """
236
+ Sanitize and validate BQBs against the id_table.
237
+
238
+ Parameters
239
+ ----------
240
+ bqbs : str, list, or set
241
+ BQB terms to validate.
242
+ id_table : pd.DataFrame
243
+ DataFrame containing BQB reference data.
244
+
245
+ Returns
246
+ -------
247
+ set
248
+ Set of validated BQB terms.
249
+ """
250
+ return _sanitize_id_table_values(
251
+ values=bqbs,
252
+ id_table=id_table,
253
+ column_name=IDENTIFIERS.BQB,
254
+ valid_values=VALID_BQB_TERMS,
255
+ value_type_name="bqbs",
256
+ )
257
+
258
+
259
+ def _sanitize_id_table_identifiers(
260
+ identifiers: Union[str, list, set], id_table: pd.DataFrame
261
+ ) -> set:
262
+ """
263
+ Sanitize and validate identifiers against the id_table.
264
+
265
+ Parameters
266
+ ----------
267
+ identifiers : str, list, or set
268
+ Identifier values to validate.
269
+ id_table : pd.DataFrame
270
+ DataFrame containing identifier reference data.
271
+
272
+ Returns
273
+ -------
274
+ set
275
+ Set of validated identifiers.
276
+ """
277
+ return _sanitize_id_table_values(
278
+ values=identifiers,
279
+ id_table=id_table,
280
+ column_name=IDENTIFIERS.IDENTIFIER,
281
+ value_type_name="identifiers",
282
+ )
@@ -19,17 +19,23 @@ from napistu import sbml_dfs_utils
19
19
  from napistu import source
20
20
  from napistu import utils
21
21
  from napistu.ingestion import sbml
22
- from napistu.constants import SBML_DFS
23
- from napistu.constants import SBML_DFS_SCHEMA
24
- from napistu.constants import IDENTIFIERS
25
- from napistu.constants import NAPISTU_STANDARD_OUTPUTS
26
- from napistu.constants import BQB_PRIORITIES
27
- from napistu.constants import ONTOLOGY_PRIORITIES
28
- from napistu.constants import MINI_SBO_FROM_NAME
29
- from napistu.constants import MINI_SBO_TO_NAME
30
- from napistu.constants import SBOTERM_NAMES
31
- from napistu.constants import ENTITIES_W_DATA
32
- from napistu.constants import ENTITIES_TO_ENTITY_DATA
22
+ from napistu.ontologies import id_tables
23
+ from napistu.constants import (
24
+ BQB,
25
+ BQB_DEFINING_ATTRS_LOOSE,
26
+ BQB_PRIORITIES,
27
+ ENTITIES_W_DATA,
28
+ ENTITIES_TO_ENTITY_DATA,
29
+ IDENTIFIERS,
30
+ MINI_SBO_FROM_NAME,
31
+ MINI_SBO_TO_NAME,
32
+ NAPISTU_STANDARD_OUTPUTS,
33
+ ONTOLOGY_PRIORITIES,
34
+ SBML_DFS,
35
+ SBML_DFS_SCHEMA,
36
+ SBOTERM_NAMES,
37
+ SCHEMA_DEFS,
38
+ )
33
39
 
34
40
  logger = logging.getLogger(__name__)
35
41
 
@@ -101,7 +107,7 @@ class SBML_dfs:
101
107
  Remove a reactions data table by label.
102
108
  remove_species_data(label)
103
109
  Remove a species data table by label.
104
- search_by_ids(ids, entity_type, identifiers_df, ontologies=None)
110
+ search_by_ids(id_table, identifiers=None, ontologies=None, bqbs=None)
105
111
  Find entities and identifiers matching a set of query IDs.
106
112
  search_by_name(name, entity_type, partial_match=True)
107
113
  Find entities by exact or partial name match.
@@ -455,12 +461,12 @@ class SBML_dfs:
455
461
  ValueError
456
462
  If id_type is invalid or identifiers are malformed
457
463
  """
458
- selected_table = self.get_table(id_type, {"id"})
464
+ selected_table = self.get_table(id_type, {SCHEMA_DEFS.ID})
459
465
  schema = SBML_DFS_SCHEMA.SCHEMA
460
466
 
461
467
  identifiers_dict = dict()
462
468
  for sysid in selected_table.index:
463
- id_entry = selected_table[schema[id_type]["id"]][sysid]
469
+ id_entry = selected_table[schema[id_type][SCHEMA_DEFS.ID]][sysid]
464
470
 
465
471
  if isinstance(id_entry, identifiers.Identifiers):
466
472
  identifiers_dict[sysid] = pd.DataFrame(id_entry.ids)
@@ -473,16 +479,16 @@ class SBML_dfs:
473
479
  )
474
480
  if not identifiers_dict:
475
481
  # Return empty DataFrame with expected columns if nothing found
476
- return pd.DataFrame(columns=[schema[id_type]["pk"], "entry"])
482
+ return pd.DataFrame(columns=[schema[id_type][SCHEMA_DEFS.PK], "entry"])
477
483
 
478
484
  identifiers_tbl = pd.concat(identifiers_dict)
479
485
 
480
- identifiers_tbl.index.names = [schema[id_type]["pk"], "entry"]
486
+ identifiers_tbl.index.names = [schema[id_type][SCHEMA_DEFS.PK], "entry"]
481
487
  identifiers_tbl = identifiers_tbl.reset_index()
482
488
 
483
489
  named_identifiers = identifiers_tbl.merge(
484
- selected_table.drop(schema[id_type]["id"], axis=1),
485
- left_on=schema[id_type]["pk"],
490
+ selected_table.drop(schema[id_type][SCHEMA_DEFS.ID], axis=1),
491
+ left_on=schema[id_type][SCHEMA_DEFS.PK],
486
492
  right_index=True,
487
493
  )
488
494
 
@@ -1163,24 +1169,25 @@ class SBML_dfs:
1163
1169
 
1164
1170
  def search_by_ids(
1165
1171
  self,
1166
- ids: list[str],
1167
- entity_type: str,
1168
- identifiers_df: pd.DataFrame,
1169
- ontologies: None | set[str] = None,
1172
+ id_table: pd.DataFrame,
1173
+ identifiers: Optional[Union[str, list, set]] = None,
1174
+ ontologies: Optional[Union[str, list, set]] = None,
1175
+ bqbs: Optional[Union[str, list, set]] = BQB_DEFINING_ATTRS_LOOSE
1176
+ + [BQB.HAS_PART],
1170
1177
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
1171
1178
  """
1172
1179
  Find entities and identifiers matching a set of query IDs.
1173
1180
 
1174
1181
  Parameters
1175
1182
  ----------
1176
- ids : List[str]
1177
- List of identifiers to search for
1178
- entity_type : str
1179
- Type of entity to search (e.g., 'species', 'reactions')
1180
- identifiers_df : pd.DataFrame
1183
+ id_table : pd.DataFrame
1181
1184
  DataFrame containing identifier mappings
1182
- ontologies : Optional[Set[str]], optional
1183
- Set of ontologies to filter by, by default None
1185
+ identifiers : Optional[Union[str, list, set]], optional
1186
+ Identifiers to filter by, by default None
1187
+ ontologies : Optional[Union[str, list, set]], optional
1188
+ Ontologies to filter by, by default None
1189
+ bqbs : Optional[Union[str, list, set]], optional
1190
+ BQB terms to filter by, by default [BQB.IS, BQB.HAS_PART]
1184
1191
 
1185
1192
  Returns
1186
1193
  -------
@@ -1196,42 +1203,25 @@ class SBML_dfs:
1196
1203
  If ontologies is not a set
1197
1204
  """
1198
1205
  # validate inputs
1199
- entity_table = self.get_table(entity_type, required_attributes={"id"})
1200
- entity_pk = self.schema[entity_type]["pk"]
1201
-
1202
- utils.match_pd_vars(
1203
- identifiers_df,
1204
- req_vars={
1205
- entity_pk,
1206
- IDENTIFIERS.ONTOLOGY,
1207
- IDENTIFIERS.IDENTIFIER,
1208
- IDENTIFIERS.URL,
1209
- IDENTIFIERS.BQB,
1210
- },
1211
- allow_series=False,
1212
- ).assert_present()
1213
-
1214
- if ontologies is not None:
1215
- if not isinstance(ontologies, set):
1216
- # for clarity this should not be reachable based on type hints
1217
- raise TypeError(
1218
- f"ontologies must be a set, but got {type(ontologies).__name__}"
1219
- )
1220
- ALL_VALID_ONTOLOGIES = identifiers_df["ontology"].unique()
1221
- invalid_ontologies = ontologies.difference(ALL_VALID_ONTOLOGIES)
1222
- if len(invalid_ontologies) > 0:
1223
- raise ValueError(
1224
- f"The following ontologies are not valid: {', '.join(invalid_ontologies)}.\n"
1225
- f"Valid ontologies are {', '.join(ALL_VALID_ONTOLOGIES)}"
1226
- )
1227
1206
 
1228
- # fitler to just to identifiers matchign the ontologies of interest
1229
- identifiers_df = identifiers_df.query("ontology in @ontologies")
1207
+ entity_type = sbml_dfs_utils.infer_entity_type(id_table)
1208
+ entity_table = self.get_table(entity_type, required_attributes={SCHEMA_DEFS.ID})
1209
+ entity_pk = self.schema[entity_type][SCHEMA_DEFS.PK]
1230
1210
 
1231
- matching_identifiers = identifiers_df.loc[
1232
- identifiers_df["identifier"].isin(ids)
1233
- ]
1234
- entity_subset = entity_table.loc[matching_identifiers[entity_pk].tolist()]
1211
+ matching_identifiers = id_tables.filter_id_table(
1212
+ id_table=id_table, identifiers=identifiers, ontologies=ontologies, bqbs=bqbs
1213
+ )
1214
+
1215
+ matching_keys = matching_identifiers[entity_pk].tolist()
1216
+ entity_subset = entity_table.loc[matching_keys]
1217
+
1218
+ if matching_identifiers.shape[0] != entity_subset.shape[0]:
1219
+ raise ValueError(
1220
+ f"Some identifiers did not match to an entity for {entity_type}. "
1221
+ "This suggests that the identifiers and sbml_dfs are not in sync. "
1222
+ "Please create new identifiers with sbml_dfs.get_characteristic_species_ids() "
1223
+ "or sbml_dfs.get_identifiers()."
1224
+ )
1235
1225
 
1236
1226
  return entity_subset, matching_identifiers
1237
1227
 
@@ -14,24 +14,29 @@ from napistu import utils
14
14
  from napistu import identifiers
15
15
  from napistu import indices
16
16
 
17
- from napistu.constants import BQB
18
- from napistu.constants import SBML_DFS
19
- from napistu.constants import SBML_DFS_SCHEMA
20
- from napistu.constants import IDENTIFIERS
21
- from napistu.constants import BQB_DEFINING_ATTRS
22
- from napistu.constants import BQB_DEFINING_ATTRS_LOOSE
23
- from napistu.constants import REQUIRED_REACTION_FROMEDGELIST_COLUMNS
24
- from napistu.constants import INTERACTION_EDGELIST_EXPECTED_VARS
25
- from napistu.constants import SBO_ROLES_DEFS
26
- from napistu.constants import MINI_SBO_FROM_NAME
27
- from napistu.constants import MINI_SBO_TO_NAME
28
- from napistu.constants import SBO_NAME_TO_ROLE
29
- from napistu.constants import ONTOLOGIES
30
- from napistu.constants import VALID_SBO_TERM_NAMES
31
- from napistu.constants import VALID_SBO_TERMS
32
- from napistu.ingestion.constants import VALID_COMPARTMENTS
33
- from napistu.ingestion.constants import COMPARTMENTS_GO_TERMS
34
- from napistu.ingestion.constants import GENERIC_COMPARTMENT
17
+ from napistu.constants import (
18
+ BQB,
19
+ BQB_DEFINING_ATTRS,
20
+ BQB_DEFINING_ATTRS_LOOSE,
21
+ SBML_DFS,
22
+ SBML_DFS_SCHEMA,
23
+ SCHEMA_DEFS,
24
+ IDENTIFIERS,
25
+ INTERACTION_EDGELIST_EXPECTED_VARS,
26
+ ONTOLOGIES,
27
+ MINI_SBO_FROM_NAME,
28
+ MINI_SBO_TO_NAME,
29
+ REQUIRED_REACTION_FROMEDGELIST_COLUMNS,
30
+ SBO_ROLES_DEFS,
31
+ SBO_NAME_TO_ROLE,
32
+ VALID_SBO_TERM_NAMES,
33
+ VALID_SBO_TERMS,
34
+ )
35
+ from napistu.ingestion.constants import (
36
+ COMPARTMENTS_GO_TERMS,
37
+ GENERIC_COMPARTMENT,
38
+ VALID_COMPARTMENTS,
39
+ )
35
40
 
36
41
  logger = logging.getLogger(__name__)
37
42
 
@@ -418,6 +423,65 @@ def id_formatter_inv(ids: list[str]) -> list[int]:
418
423
  return id_val
419
424
 
420
425
 
426
+ def infer_entity_type(df: pd.DataFrame) -> str:
427
+ """
428
+ Infer the entity type of a DataFrame based on its structure and schema.
429
+
430
+ Parameters
431
+ ----------
432
+ df : pd.DataFrame
433
+ The DataFrame to analyze
434
+
435
+ Returns
436
+ -------
437
+ str
438
+ The inferred entity type name
439
+
440
+ Raises
441
+ ------
442
+ ValueError
443
+ If no entity type can be determined
444
+ """
445
+ schema = SBML_DFS_SCHEMA.SCHEMA
446
+
447
+ # Get all primary keys
448
+ primary_keys = [
449
+ entity_schema.get(SCHEMA_DEFS.PK) for entity_schema in schema.values()
450
+ ]
451
+ primary_keys = [pk for pk in primary_keys if pk is not None]
452
+
453
+ # Check if index matches a primary key
454
+ if df.index.name in primary_keys:
455
+ for entity_type, entity_schema in schema.items():
456
+ if entity_schema.get(SCHEMA_DEFS.PK) == df.index.name:
457
+ return entity_type
458
+
459
+ # Get DataFrame columns that are also primary keys
460
+ df_columns = set(df.columns).intersection(primary_keys)
461
+
462
+ # Check for exact match with primary key + foreign keys
463
+ for entity_type, entity_schema in schema.items():
464
+ expected_keys = set()
465
+
466
+ # Add primary key
467
+ pk = entity_schema.get(SCHEMA_DEFS.PK)
468
+ if pk:
469
+ expected_keys.add(pk)
470
+
471
+ # Add foreign keys
472
+ fks = entity_schema.get(SCHEMA_DEFS.FK, [])
473
+ expected_keys.update(fks)
474
+
475
+ # Check for exact match
476
+ if df_columns == expected_keys:
477
+ return entity_type
478
+
479
+ # No match found
480
+ raise ValueError(
481
+ f"No entity type matches DataFrame with columns: {sorted(df_columns)}"
482
+ )
483
+
484
+
421
485
  def match_entitydata_index_to_entity(
422
486
  entity_data_dict: dict,
423
487
  an_entity_data_type: str,