napistu 0.4.2__tar.gz → 0.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {napistu-0.4.2/src/napistu.egg-info → napistu-0.4.4}/PKG-INFO +1 -1
  2. {napistu-0.4.2 → napistu-0.4.4}/setup.cfg +1 -1
  3. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/consensus.py +3 -4
  4. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/constants.py +51 -0
  5. napistu-0.4.4/src/napistu/ingestion/reactom_fi.py +208 -0
  6. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/neighborhoods.py +28 -7
  7. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/ng_utils.py +26 -6
  8. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/precompute.py +56 -0
  9. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/sbml_dfs_utils.py +8 -2
  10. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/source.py +243 -40
  11. napistu-0.4.4/src/napistu/statistics/hypothesis_testing.py +66 -0
  12. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/utils.py +23 -1
  13. {napistu-0.4.2 → napistu-0.4.4/src/napistu.egg-info}/PKG-INFO +1 -1
  14. {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/SOURCES.txt +3 -1
  15. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_precompute.py +30 -0
  16. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_sbml_dfs_utils.py +13 -0
  17. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_source.py +38 -6
  18. napistu-0.4.4/src/tests/test_statistics_hypothesis_testing.py +62 -0
  19. napistu-0.4.2/src/tests/test_set_coverage.py +0 -50
  20. {napistu-0.4.2 → napistu-0.4.4}/LICENSE +0 -0
  21. {napistu-0.4.2 → napistu-0.4.4}/README.md +0 -0
  22. {napistu-0.4.2 → napistu-0.4.4}/pyproject.toml +0 -0
  23. {napistu-0.4.2 → napistu-0.4.4}/setup.py +0 -0
  24. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/__init__.py +0 -0
  25. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/__main__.py +0 -0
  26. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/constants.py +0 -0
  27. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/context/__init__.py +0 -0
  28. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/context/discretize.py +0 -0
  29. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/context/filtering.py +0 -0
  30. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/gcs/__init__.py +0 -0
  31. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/gcs/constants.py +0 -0
  32. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/gcs/downloads.py +0 -0
  33. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/gcs/utils.py +0 -0
  34. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/identifiers.py +0 -0
  35. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/indices.py +0 -0
  36. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/__init__.py +0 -0
  37. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/bigg.py +0 -0
  38. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/gtex.py +0 -0
  39. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/hpa.py +0 -0
  40. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/identifiers_etl.py +0 -0
  41. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/napistu_edgelist.py +0 -0
  42. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/obo.py +0 -0
  43. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/psi_mi.py +0 -0
  44. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/reactome.py +0 -0
  45. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/sbml.py +0 -0
  46. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/string.py +0 -0
  47. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/trrust.py +0 -0
  48. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ingestion/yeast.py +0 -0
  49. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/__init__.py +0 -0
  50. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/constants.py +0 -0
  51. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/interactions.py +0 -0
  52. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/mount.py +0 -0
  53. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/matching/species.py +0 -0
  54. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/__init__.py +0 -0
  55. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/__main__.py +0 -0
  56. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/client.py +0 -0
  57. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/codebase.py +0 -0
  58. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/codebase_utils.py +0 -0
  59. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/component_base.py +0 -0
  60. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/config.py +0 -0
  61. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/constants.py +0 -0
  62. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/documentation.py +0 -0
  63. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/documentation_utils.py +0 -0
  64. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/execution.py +0 -0
  65. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/health.py +0 -0
  66. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/profiles.py +0 -0
  67. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/server.py +0 -0
  68. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/tutorials.py +0 -0
  69. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/tutorials_utils.py +0 -0
  70. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/mcp/utils.py +0 -0
  71. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/__init__.py +0 -0
  72. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/constants.py +0 -0
  73. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/curation.py +0 -0
  74. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/gaps.py +0 -0
  75. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/pathwayannot.py +0 -0
  76. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/modify/uncompartmentalize.py +0 -0
  77. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/__init__.py +0 -0
  78. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/constants.py +0 -0
  79. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/data_handling.py +0 -0
  80. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/ig_utils.py +0 -0
  81. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/net_create.py +0 -0
  82. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/net_create_utils.py +0 -0
  83. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/net_propagation.py +0 -0
  84. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/ng_core.py +0 -0
  85. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/network/paths.py +0 -0
  86. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/__init__.py +0 -0
  87. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/constants.py +0 -0
  88. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/dogma.py +0 -0
  89. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/genodexito.py +0 -0
  90. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/id_tables.py +0 -0
  91. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/mygene.py +0 -0
  92. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/ontologies/renaming.py +0 -0
  93. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/rpy2/__init__.py +0 -0
  94. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/rpy2/callr.py +0 -0
  95. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/rpy2/constants.py +0 -0
  96. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/rpy2/rids.py +0 -0
  97. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/sbml_dfs_core.py +0 -0
  98. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/scverse/__init__.py +0 -0
  99. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/scverse/constants.py +0 -0
  100. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/scverse/loading.py +0 -0
  101. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/statistics/__init__.py +0 -0
  102. {napistu-0.4.2 → napistu-0.4.4}/src/napistu/statistics/quantiles.py +0 -0
  103. {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/dependency_links.txt +0 -0
  104. {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/entry_points.txt +0 -0
  105. {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/requires.txt +0 -0
  106. {napistu-0.4.2 → napistu-0.4.4}/src/napistu.egg-info/top_level.txt +0 -0
  107. {napistu-0.4.2 → napistu-0.4.4}/src/tests/__init__.py +0 -0
  108. {napistu-0.4.2 → napistu-0.4.4}/src/tests/conftest.py +0 -0
  109. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_consensus.py +0 -0
  110. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_constants.py +0 -0
  111. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_context_discretize.py +0 -0
  112. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_context_filtering.py +0 -0
  113. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_curation.py +0 -0
  114. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_data/__init__.py +0 -0
  115. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_gaps.py +0 -0
  116. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_gcs.py +0 -0
  117. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_identifiers.py +0 -0
  118. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_indices.py +0 -0
  119. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ingestion_napistu_edgelist.py +0 -0
  120. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ingestion_obo.py +0 -0
  121. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_matching_interactions.py +0 -0
  122. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_matching_mount.py +0 -0
  123. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_matching_species.py +0 -0
  124. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_mcp_config.py +0 -0
  125. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_mcp_documentation_utils.py +0 -0
  126. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_mcp_server.py +0 -0
  127. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_data_handling.py +0 -0
  128. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_ig_utils.py +0 -0
  129. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_neighborhoods.py +0 -0
  130. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_net_create.py +0 -0
  131. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_net_create_utils.py +0 -0
  132. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_net_propagation.py +0 -0
  133. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_ng_core.py +0 -0
  134. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_ng_utils.py +0 -0
  135. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_network_paths.py +0 -0
  136. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ontologies_genodexito.py +0 -0
  137. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ontologies_id_tables.py +0 -0
  138. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ontologies_mygene.py +0 -0
  139. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_ontologies_renaming.py +0 -0
  140. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_pathwayannot.py +0 -0
  141. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_rpy2_callr.py +0 -0
  142. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_rpy2_init.py +0 -0
  143. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_sbml.py +0 -0
  144. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_sbml_dfs_core.py +0 -0
  145. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_sbo.py +0 -0
  146. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_scverse_loading.py +0 -0
  147. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_statistics_quantiles.py +0 -0
  148. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_uncompartmentalize.py +0 -0
  149. {napistu-0.4.2 → napistu-0.4.4}/src/tests/test_utils.py +0 -0
  150. {napistu-0.4.2 → napistu-0.4.4}/src/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.4.2
3
+ Version: 0.4.4
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = napistu
3
- version = 0.4.2
3
+ version = 0.4.4
4
4
  description = Connecting high-dimensional data to curated pathways
5
5
  long_description = file: README.md
6
6
  long_description_content_type = text/markdown
@@ -426,7 +426,7 @@ def post_consensus_species_ontology_check(sbml_dfs: sbml_dfs_core.SBML_dfs) -> s
426
426
 
427
427
  # get the sources of species in the consensus model
428
428
  consensus_sbmldf_tbl_var_sc = (
429
- source.unnest_sources(sbml_dfs.species, SBML_DFS.S_SOURCE, verbose=False)
429
+ source.unnest_sources(sbml_dfs.species, verbose=False)
430
430
  .reset_index()
431
431
  .sort_values([SOURCE_SPEC.NAME])
432
432
  )
@@ -504,12 +504,11 @@ def post_consensus_source_check(
504
504
  ) -> pd.DataFrame:
505
505
  """Provide sources of tables in a consensus model; the output df will be used to determine whether models are merged."""
506
506
 
507
- table_source = sbml_dfs.schema[table_name][SOURCE_SPEC.SOURCE]
508
- table_pk = sbml_dfs.schema[table_name]["pk"]
507
+ table_pk = sbml_dfs.schema[table_name][SCHEMA_DEFS.PK]
509
508
 
510
509
  sbml_dfs_tbl = getattr(sbml_dfs, table_name)
511
510
  sbml_dfs_tbl_pathway_source = (
512
- source.unnest_sources(sbml_dfs_tbl, table_source, verbose=False)
511
+ source.unnest_sources(sbml_dfs_tbl, verbose=False)
513
512
  .reset_index()
514
513
  .sort_values(["name"])
515
514
  )
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
 
4
4
  from types import SimpleNamespace
5
5
 
6
+ from napistu.constants import SBOTERM_NAMES
6
7
 
7
8
  SPECIES_FULL_NAME_HUMAN = "Homo sapiens"
8
9
  SPECIES_FULL_NAME_MOUSE = "Mus musculus"
@@ -90,6 +91,56 @@ REACTOME_PATHWAYS_URL = "https://reactome.org/download/current/ReactomePathways.
90
91
  REACTOME_PATHWAY_INDEX_COLUMNS = ["file", "source", "species", "pathway_id", "name"]
91
92
  REACTOME_PATHWAY_LIST_COLUMNS = ["pathway_id", "name", "species"]
92
93
 
94
+ # REACTOME FI
95
+ REACTOME_FI_URL = "http://cpws.reactome.org/caBigR3WebApp2025/FIsInGene_04142025_with_annotations.txt.zip"
96
+
97
+ REACTOME_FI = SimpleNamespace(
98
+ GENE1="Gene1",
99
+ GENE2="Gene2",
100
+ ANNOTATION="Annotation",
101
+ DIRECTION="Direction",
102
+ SCORE="Score",
103
+ )
104
+
105
+ REACTOME_FI_DIRECTIONS = SimpleNamespace(
106
+ UNDIRECTED="-",
107
+ STIMULATED_BY="<-",
108
+ STIMULATES="->",
109
+ STIMULATES_AND_STIMULATED_BY="<->",
110
+ INHIBITED_BY="|-",
111
+ INHIBITS="-|",
112
+ INHIBITS_AND_INHIBITED_BY="|-|",
113
+ STIMULATES_AND_INHIBITED_BY="|->",
114
+ INHIBITS_AND_STIMULATED_BY="<-|",
115
+ )
116
+
117
+ VALID_REACTOME_FI_DIRECTIONS = REACTOME_FI_DIRECTIONS.__dict__.values()
118
+
119
+ REACTOME_FI_RULES_REVERSE = SimpleNamespace(
120
+ NAME_RULES={"catalyzed by": SBOTERM_NAMES.CATALYST},
121
+ DIRECTION_RULES={
122
+ REACTOME_FI_DIRECTIONS.STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
123
+ REACTOME_FI_DIRECTIONS.STIMULATES_AND_STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
124
+ REACTOME_FI_DIRECTIONS.INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
125
+ REACTOME_FI_DIRECTIONS.INHIBITS_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
126
+ REACTOME_FI_DIRECTIONS.STIMULATES_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
127
+ REACTOME_FI_DIRECTIONS.UNDIRECTED: SBOTERM_NAMES.INTERACTOR,
128
+ },
129
+ )
130
+
131
+ REACTOME_FI_RULES_FORWARD = SimpleNamespace(
132
+ NAME_RULES={"catalyze(;$)": SBOTERM_NAMES.CATALYST},
133
+ DIRECTION_RULES={
134
+ REACTOME_FI_DIRECTIONS.STIMULATES: SBOTERM_NAMES.STIMULATOR,
135
+ REACTOME_FI_DIRECTIONS.STIMULATES_AND_STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
136
+ REACTOME_FI_DIRECTIONS.STIMULATES_AND_INHIBITED_BY: SBOTERM_NAMES.STIMULATOR,
137
+ REACTOME_FI_DIRECTIONS.INHIBITS: SBOTERM_NAMES.INHIBITOR,
138
+ REACTOME_FI_DIRECTIONS.INHIBITS_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
139
+ REACTOME_FI_DIRECTIONS.INHIBITS_AND_STIMULATED_BY: SBOTERM_NAMES.INHIBITOR,
140
+ REACTOME_FI_DIRECTIONS.UNDIRECTED: SBOTERM_NAMES.INTERACTOR,
141
+ },
142
+ )
143
+
93
144
  # SBML
94
145
  SBML_DEFS = SimpleNamespace(
95
146
  ERROR_NUMBER="error_number",
@@ -0,0 +1,208 @@
1
+ import logging
2
+ import pandas as pd
3
+
4
+ from napistu.identifiers import Identifiers
5
+ from napistu import utils
6
+ from napistu.ingestion.constants import (
7
+ REACTOME_FI,
8
+ REACTOME_FI_RULES_FORWARD,
9
+ REACTOME_FI_RULES_REVERSE,
10
+ REACTOME_FI_URL,
11
+ VALID_REACTOME_FI_DIRECTIONS,
12
+ )
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def download_reactome_fi(target_uri: str, url: str = REACTOME_FI_URL) -> None:
19
+ """
20
+ Download the Reactome Functional Interactions (FI) dataset as a TSV file.
21
+
22
+ Parameters
23
+ ----------
24
+ target_uri : str
25
+ The URI where the Reactome FI data should be saved. Should end with .tsv
26
+ url : str, optional
27
+ URL to download the zipped Reactome functional interactions TSV from.
28
+ Defaults to REACTOME_FI_URL.
29
+
30
+ Returns
31
+ -------
32
+ None
33
+
34
+ Raises
35
+ ------
36
+ ValueError
37
+ If target_uri does not end with .tsv
38
+ """
39
+
40
+ if not target_uri.endswith(".tsv"):
41
+ raise ValueError(f"Target URI must end with .tsv, got {target_uri}")
42
+
43
+ file_ext = url.split(".")[-1]
44
+ target_filename = url.split("/")[-1].split(f".{file_ext}")[0]
45
+ logger.info("Start downloading proteinatlas %s to %s", url, target_uri)
46
+ # target_filename is the name of the file in the zip file which will be renamed to target_uri
47
+ utils.download_wget(url, target_uri, target_filename=target_filename)
48
+
49
+ return None
50
+
51
+
52
+ def format_reactome_fi_edgelist(interactions: pd.DataFrame):
53
+ """
54
+ Format the Reactome FI interactions DataFrame as an edgelist for network analysis.
55
+
56
+ Parameters
57
+ ----------
58
+ interactions : pd.DataFrame
59
+ DataFrame containing Reactome FI interactions.
60
+
61
+ Returns
62
+ -------
63
+ Dictonary of:
64
+
65
+ interaction_edgelist : pd.DataFrame
66
+ Table containing molecular interactions with columns:
67
+ - upstream_name : str, matches "s_name" from species_df
68
+ - downstream_name : str, matches "s_name" from species_df
69
+ - upstream_compartment : str, matches "c_name" from compartments_df
70
+ - downstream_compartment : str, matches "c_name" from compartments_df
71
+ - r_name : str, name for the interaction
72
+ - sbo_term : str, SBO term defining interaction type
73
+ - r_Identifiers : identifiers.Identifiers, supporting identifiers
74
+ - r_isreversible : bool, whether reaction is reversible
75
+ species_df : pd.DataFrame
76
+ Table defining molecular species with columns:
77
+ - s_name : str, name of molecular species
78
+ - s_Identifiers : identifiers.Identifiers, species identifiers
79
+ compartments_df : pd.DataFrame
80
+ Table defining compartments with columns:
81
+ - c_name : str, name of compartment
82
+ - c_Identifiers : identifiers.Identifiers, compartment identifiers
83
+
84
+ Notes
85
+ -----
86
+ This function is not yet implemented and will raise NotImplementedError.
87
+ """
88
+
89
+ raise NotImplementedError("TO DO - This function is incomplete")
90
+
91
+ formatted_annotations = _parse_reactome_fi_annotations(interactions)
92
+
93
+ # this join will expand some rows to 2 since the bidirectional relationships are captured as separate edges in Napistu
94
+ annotated_interactions = interactions.merge(
95
+ formatted_annotations,
96
+ on=[REACTOME_FI.ANNOTATION, REACTOME_FI.DIRECTION],
97
+ how="left",
98
+ )
99
+
100
+ # flip reverse entries so all relationships are forward or undirected
101
+ formatted_interactions = (
102
+ pd.concat(
103
+ [
104
+ annotated_interactions.query("polarity == 'forward'"),
105
+ (
106
+ annotated_interactions.query("polarity == 'reverse'").rename(
107
+ columns={
108
+ REACTOME_FI.GENE1: REACTOME_FI.GENE2,
109
+ REACTOME_FI.GENE2: REACTOME_FI.GENE1,
110
+ }
111
+ )
112
+ ),
113
+ ]
114
+ )[[REACTOME_FI.GENE1, REACTOME_FI.GENE2, "sbo_term_name", "Score"]]
115
+ # looks like they were already unique edges
116
+ .sort_values("Score", ascending=False)
117
+ .groupby([REACTOME_FI.GENE1, REACTOME_FI.GENE2])
118
+ .first()
119
+ )
120
+
121
+ fi_edgelist = (
122
+ formatted_interactions.reset_index()
123
+ .rename(
124
+ columns={
125
+ REACTOME_FI.GENE1: "upstream_name",
126
+ REACTOME_FI.GENE2: "downstream_name",
127
+ }
128
+ )
129
+ .assign(r_Identifiers=Identifiers([]))
130
+ )
131
+
132
+ return fi_edgelist
133
+
134
+
135
+ def _parse_reactome_fi_annotations(interactions: pd.DataFrame) -> pd.DataFrame:
136
+ """
137
+ Parse and annotate Reactome FI interaction types and directions using regex-based rules.
138
+
139
+ Parameters
140
+ ----------
141
+ interactions : pd.DataFrame
142
+ DataFrame containing Reactome FI interactions, with annotation and direction columns.
143
+
144
+ Returns
145
+ -------
146
+ pd.DataFrame
147
+ DataFrame with annotation, direction, SBO term name, and polarity for each unique annotation/direction pair.
148
+
149
+ Raises
150
+ ------
151
+ ValueError
152
+ If an annotation/direction pair cannot be matched to a rule or if invalid directions are found.
153
+ """
154
+
155
+ distinct_annotations = (
156
+ interactions[[REACTOME_FI.ANNOTATION, REACTOME_FI.DIRECTION]]
157
+ .drop_duplicates()
158
+ .reset_index(drop=True)
159
+ )
160
+ invalid_directions = distinct_annotations.loc[
161
+ ~distinct_annotations[REACTOME_FI.DIRECTION].isin(VALID_REACTOME_FI_DIRECTIONS),
162
+ "Direction",
163
+ ]
164
+ if len(invalid_directions) > 0:
165
+ raise ValueError(f"Invalid directions: {invalid_directions}")
166
+
167
+ annotations = list()
168
+ for _, vals in distinct_annotations.iterrows():
169
+ annot, direction = vals
170
+
171
+ forward_match = utils.match_regex_dict(
172
+ annot, REACTOME_FI_RULES_FORWARD.NAME_RULES
173
+ )
174
+ if not forward_match:
175
+ if direction in REACTOME_FI_RULES_FORWARD.DIRECTION_RULES:
176
+ forward_match = REACTOME_FI_RULES_FORWARD.DIRECTION_RULES[direction]
177
+
178
+ reverse_match = utils.match_regex_dict(
179
+ annot, REACTOME_FI_RULES_REVERSE.NAME_RULES
180
+ )
181
+ if not reverse_match:
182
+ if direction in REACTOME_FI_RULES_REVERSE.DIRECTION_RULES:
183
+ reverse_match = REACTOME_FI_RULES_REVERSE.DIRECTION_RULES[direction]
184
+
185
+ if not (forward_match or reverse_match):
186
+ raise ValueError(f"No match found for {annot} with direction {direction}")
187
+
188
+ if forward_match:
189
+ annotations.append(
190
+ {
191
+ REACTOME_FI.ANNOTATION: annot,
192
+ REACTOME_FI.DIRECTION: direction,
193
+ "sbo_term_name": forward_match,
194
+ "polarity": "forward",
195
+ }
196
+ )
197
+
198
+ if reverse_match:
199
+ annotations.append(
200
+ {
201
+ REACTOME_FI.ANNOTATION: annot,
202
+ REACTOME_FI.DIRECTION: direction,
203
+ "sbo_term_name": reverse_match,
204
+ "polarity": "reverse",
205
+ }
206
+ )
207
+
208
+ return pd.DataFrame(annotations)
@@ -34,6 +34,7 @@ def find_and_prune_neighborhoods(
34
34
  napistu_graph: ig.Graph,
35
35
  compartmentalized_species: str | list[str],
36
36
  precomputed_distances: pd.DataFrame | None = None,
37
+ source_total_counts: pd.Series | None = None,
37
38
  network_type: str = NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM,
38
39
  order: int = 3,
39
40
  verbose: bool = True,
@@ -44,7 +45,7 @@ def find_and_prune_neighborhoods(
44
45
 
45
46
  Wrapper which combines find_neighborhoods() and prune_neighborhoods()
46
47
 
47
- Parameters
48
+ Parameters
48
49
  ----------
49
50
  sbml_dfs: sbml_dfs_core.SBML_dfs
50
51
  A mechanistic molecular model
@@ -54,6 +55,9 @@ def find_and_prune_neighborhoods(
54
55
  Compartmentalized species IDs for neighborhood centers
55
56
  precomputed_distances : pd.DataFrame or None
56
57
  If provided, an edgelist of origin->destination path weights and lengths
58
+ source_total_counts: pd.Series | None
59
+ Optional, A series of the total counts of each source. As produced by
60
+ source.get_source_total_counts()
57
61
  network_type: str
58
62
  If the network is directed should neighbors be located "downstream",
59
63
  or "upstream" of each compartmentalized species. The "hourglass" option
@@ -109,6 +113,7 @@ def find_and_prune_neighborhoods(
109
113
  order=order,
110
114
  verbose=verbose,
111
115
  precomputed_neighbors=precomputed_neighbors,
116
+ source_total_counts=source_total_counts,
112
117
  )
113
118
 
114
119
  pruned_neighborhoods = prune_neighborhoods(neighborhoods, top_n=top_n)
@@ -132,7 +137,7 @@ def load_neighborhoods(
132
137
 
133
138
  Load existing neighborhoods if they exist
134
139
  (and overwrite = False) and otherwise construct
135
- neighborhoods using the provided settings
140
+ neighborhoods using the provided settings
136
141
 
137
142
  Parameters
138
143
  ----------
@@ -509,12 +514,13 @@ def find_neighborhoods(
509
514
  order: int = 3,
510
515
  verbose: bool = True,
511
516
  precomputed_neighbors: pd.DataFrame | None = None,
517
+ source_total_counts: pd.Series | None = None,
512
518
  ) -> dict:
513
519
  """
514
520
  Find Neighborhood
515
521
 
516
522
  Create a network composed of all species and reactions within N steps of
517
- each of a set of compartmentalized species.
523
+ each of a set of compartmentalized species.
518
524
 
519
525
  Parameters
520
526
  ----------
@@ -535,11 +541,14 @@ def find_neighborhoods(
535
541
  precomputed_neighbors: pd.DataFrame or None
536
542
  If provided, a pre-filtered table of nodes nearby the compartmentalized species
537
543
  which will be used to skip on-the-fly neighborhood generation.
544
+ source_total_counts: pd.Series | None
545
+ Optional, A series of the total counts of each source. As produced by
546
+ source.get_source_total_counts()
538
547
 
539
548
  Returns:
540
549
  ----------
541
550
  A dict containing the neighborhood of each compartmentalized species.
542
- Each entry in the dict is a dict of the subgraph, vertices, and edges.
551
+ Each entry in the dict is a dict of the subgraph, vertices, and edges.
543
552
  """
544
553
 
545
554
  if not isinstance(network_type, str):
@@ -567,7 +576,12 @@ def find_neighborhoods(
567
576
  # format the vertices and edges in each compartmentalized species' network
568
577
  neighborhood_dict = {
569
578
  sc_id: create_neighborhood_dict_entry(
570
- sc_id, neighborhood_df, sbml_dfs, napistu_graph, verbose=verbose
579
+ sc_id,
580
+ neighborhood_df=neighborhood_df,
581
+ sbml_dfs=sbml_dfs,
582
+ napistu_graph=napistu_graph,
583
+ source_total_counts=source_total_counts,
584
+ verbose=verbose,
571
585
  )
572
586
  for sc_id in compartmentalized_species
573
587
  }
@@ -580,6 +594,7 @@ def create_neighborhood_dict_entry(
580
594
  neighborhood_df: pd.DataFrame,
581
595
  sbml_dfs: sbml_dfs_core.SBML_dfs,
582
596
  napistu_graph: ig.Graph,
597
+ source_total_counts: pd.Series | None = None,
583
598
  verbose: bool = False,
584
599
  ) -> dict[str, Any]:
585
600
  """
@@ -597,6 +612,9 @@ def create_neighborhood_dict_entry(
597
612
  A mechanistic molecular model
598
613
  napistu_graph: igraph.Graph
599
614
  A network connecting molecular species and reactions
615
+ source_total_counts: pd.Series
616
+ Optional, A series of the total counts of each source. As produced by
617
+ source.get_source_total_counts()
600
618
  verbose: bool
601
619
  Extra reporting?
602
620
 
@@ -645,7 +663,10 @@ def create_neighborhood_dict_entry(
645
663
 
646
664
  try:
647
665
  edge_sources = ng_utils.get_minimal_sources_edges(
648
- vertices.rename(columns={"name": "node"}), sbml_dfs
666
+ vertices.rename(columns={"name": "node"}),
667
+ sbml_dfs,
668
+ # optional, counts of sources across the whole model
669
+ source_total_counts,
649
670
  )
650
671
  except Exception:
651
672
  edge_sources = None
@@ -1441,7 +1462,7 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
1441
1462
  ----------
1442
1463
  one_neighborhood: dict
1443
1464
  The neighborhood around a single compartmentalized species - one of the values
1444
- in dict created by find_neighborhoods().
1465
+ in dict created by find_neighborhoods().
1445
1466
  top_n: int
1446
1467
  How many neighboring molecular species should be retained?
1447
1468
  If the neighborhood includes both upstream and downstream connections
@@ -66,7 +66,7 @@ def compartmentalize_species_pairs(
66
66
  Compartmentalize Shortest Paths
67
67
 
68
68
  For a set of origin and destination species pairs, consider each species in every
69
- compartment it operates in, seperately.
69
+ compartment it operates in, seperately.
70
70
 
71
71
  Parameters
72
72
  ----------
@@ -112,22 +112,42 @@ def compartmentalize_species_pairs(
112
112
 
113
113
 
114
114
  def get_minimal_sources_edges(
115
- vertices: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
115
+ vertices: pd.DataFrame,
116
+ sbml_dfs: sbml_dfs_core.SBML_dfs,
117
+ source_total_counts: Optional[pd.Series] = None,
116
118
  ) -> pd.DataFrame | None:
117
- """Assign edges to a set of sources."""
119
+ """
120
+ Assign edges to a set of sources.
121
+
122
+ Parameters
123
+ ----------
124
+ vertices: pd.DataFrame
125
+ A table of vertices.
126
+ sbml_dfs: sbml_dfs_core.SBML_dfs
127
+ A pathway model
128
+ source_total_counts: pd.Series
129
+ A series of the total counts of each source.
130
+
131
+ Returns
132
+ -------
133
+ edge_sources: pd.DataFrame
134
+ A table of edges and the sources they are assigned to.
135
+ """
136
+
118
137
  nodes = vertices["node"].tolist()
119
138
  present_reactions = sbml_dfs.reactions[sbml_dfs.reactions.index.isin(nodes)]
120
139
 
121
140
  if len(present_reactions) == 0:
122
141
  return None
123
142
 
124
- table_schema = sbml_dfs.schema[SBML_DFS.REACTIONS]
125
- source_df = source.unnest_sources(present_reactions, table_schema["source"])
143
+ source_df = source.unnest_sources(present_reactions)
126
144
 
127
145
  if source_df is None:
128
146
  return None
129
147
  else:
130
- edge_sources = source.greedy_set_coverge_of_sources(source_df, table_schema)
148
+ edge_sources = source.source_set_coverage(
149
+ source_df, source_total_counts, sbml_dfs
150
+ )
131
151
  return edge_sources.reset_index()[
132
152
  [SBML_DFS.R_ID, SOURCE_SPEC.PATHWAY_ID, SOURCE_SPEC.NAME]
133
153
  ]
@@ -110,6 +110,62 @@ def precompute_distances(
110
110
  return filtered_precomputed_distances
111
111
 
112
112
 
113
+ def filter_precomputed_distances_top_n(precomputed_distances, top_n=50):
114
+ """
115
+ Filter precomputed distances to only include the top-n pairs for each distance measure.
116
+
117
+ Parameters
118
+ ----------
119
+ precomputed_distances : pd.DataFrame
120
+ Precomputed distances.
121
+ top_n : int, optional
122
+ Top-n pairs to include for each distance measure.
123
+
124
+ Returns
125
+ -------
126
+ pd.DataFrame
127
+ Filtered precomputed distances.
128
+ """
129
+
130
+ # take the union of top-n for each distance measure; and from origin -> dest and dest -> origin
131
+ distance_vars = set(precomputed_distances.columns) - {
132
+ NAPISTU_EDGELIST.SC_ID_ORIGIN,
133
+ NAPISTU_EDGELIST.SC_ID_DEST,
134
+ }
135
+
136
+ valid_pairs = list()
137
+ for distance_var in distance_vars:
138
+ top_n_pairs_by_origin = (
139
+ precomputed_distances.sort_values(by=distance_var, ascending=False)
140
+ .groupby(NAPISTU_EDGELIST.SC_ID_ORIGIN)
141
+ .head(top_n)
142
+ )
143
+ top_n_pairs_by_dest = (
144
+ precomputed_distances.sort_values(by=distance_var, ascending=False)
145
+ .groupby(NAPISTU_EDGELIST.SC_ID_DEST)
146
+ .head(top_n)
147
+ )
148
+
149
+ valid_pairs.append(
150
+ top_n_pairs_by_origin[
151
+ [NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
152
+ ]
153
+ )
154
+ valid_pairs.append(
155
+ top_n_pairs_by_dest[
156
+ [NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
157
+ ]
158
+ )
159
+
160
+ all_valid_pairs = pd.concat(valid_pairs).drop_duplicates()
161
+
162
+ return precomputed_distances.merge(
163
+ all_valid_pairs,
164
+ on=[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST],
165
+ how="inner",
166
+ )
167
+
168
+
113
169
  def _calculate_distances_subset(
114
170
  napistu_graph: NapistuGraph,
115
171
  vs_to_partition: pd.DataFrame,
@@ -456,8 +456,14 @@ def infer_entity_type(df: pd.DataFrame) -> str:
456
456
  if entity_schema.get(SCHEMA_DEFS.PK) == df.index.name:
457
457
  return entity_type
458
458
 
459
- # Get DataFrame columns that are also primary keys
460
- df_columns = set(df.columns).intersection(primary_keys)
459
+ # Get DataFrame columns that are also primary keys, including index or MultiIndex names
460
+ index_names = []
461
+ if isinstance(df.index, pd.MultiIndex):
462
+ index_names = [name for name in df.index.names if name is not None]
463
+ elif df.index.name is not None:
464
+ index_names = [df.index.name]
465
+
466
+ df_columns = set(df.columns).union(index_names).intersection(primary_keys)
461
467
 
462
468
  # Check for exact match with primary key + foreign keys
463
469
  for entity_type, entity_schema in schema.items():