napistu 0.4.4__tar.gz → 0.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {napistu-0.4.4/src/napistu.egg-info → napistu-0.4.5}/PKG-INFO +1 -1
  2. {napistu-0.4.4 → napistu-0.4.5}/setup.cfg +1 -1
  3. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/neighborhoods.py +13 -5
  4. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/ng_utils.py +4 -1
  5. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/paths.py +13 -1
  6. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/source.py +31 -6
  7. {napistu-0.4.4 → napistu-0.4.5/src/napistu.egg-info}/PKG-INFO +1 -1
  8. {napistu-0.4.4 → napistu-0.4.5}/src/tests/conftest.py +10 -0
  9. napistu-0.4.5/src/tests/test_network_neighborhoods.py +131 -0
  10. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_source.py +1 -1
  11. napistu-0.4.4/src/tests/test_network_neighborhoods.py +0 -19
  12. {napistu-0.4.4 → napistu-0.4.5}/LICENSE +0 -0
  13. {napistu-0.4.4 → napistu-0.4.5}/README.md +0 -0
  14. {napistu-0.4.4 → napistu-0.4.5}/pyproject.toml +0 -0
  15. {napistu-0.4.4 → napistu-0.4.5}/setup.py +0 -0
  16. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/__init__.py +0 -0
  17. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/__main__.py +0 -0
  18. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/consensus.py +0 -0
  19. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/constants.py +0 -0
  20. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/context/__init__.py +0 -0
  21. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/context/discretize.py +0 -0
  22. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/context/filtering.py +0 -0
  23. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/gcs/__init__.py +0 -0
  24. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/gcs/constants.py +0 -0
  25. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/gcs/downloads.py +0 -0
  26. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/gcs/utils.py +0 -0
  27. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/identifiers.py +0 -0
  28. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/indices.py +0 -0
  29. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/__init__.py +0 -0
  30. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/bigg.py +0 -0
  31. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/constants.py +0 -0
  32. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/gtex.py +0 -0
  33. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/hpa.py +0 -0
  34. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/identifiers_etl.py +0 -0
  35. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/napistu_edgelist.py +0 -0
  36. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/obo.py +0 -0
  37. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/psi_mi.py +0 -0
  38. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/reactom_fi.py +0 -0
  39. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/reactome.py +0 -0
  40. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/sbml.py +0 -0
  41. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/string.py +0 -0
  42. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/trrust.py +0 -0
  43. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ingestion/yeast.py +0 -0
  44. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/matching/__init__.py +0 -0
  45. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/matching/constants.py +0 -0
  46. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/matching/interactions.py +0 -0
  47. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/matching/mount.py +0 -0
  48. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/matching/species.py +0 -0
  49. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/__init__.py +0 -0
  50. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/__main__.py +0 -0
  51. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/client.py +0 -0
  52. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/codebase.py +0 -0
  53. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/codebase_utils.py +0 -0
  54. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/component_base.py +0 -0
  55. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/config.py +0 -0
  56. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/constants.py +0 -0
  57. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/documentation.py +0 -0
  58. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/documentation_utils.py +0 -0
  59. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/execution.py +0 -0
  60. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/health.py +0 -0
  61. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/profiles.py +0 -0
  62. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/server.py +0 -0
  63. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/tutorials.py +0 -0
  64. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/tutorials_utils.py +0 -0
  65. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/mcp/utils.py +0 -0
  66. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/modify/__init__.py +0 -0
  67. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/modify/constants.py +0 -0
  68. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/modify/curation.py +0 -0
  69. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/modify/gaps.py +0 -0
  70. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/modify/pathwayannot.py +0 -0
  71. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/modify/uncompartmentalize.py +0 -0
  72. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/__init__.py +0 -0
  73. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/constants.py +0 -0
  74. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/data_handling.py +0 -0
  75. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/ig_utils.py +0 -0
  76. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/net_create.py +0 -0
  77. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/net_create_utils.py +0 -0
  78. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/net_propagation.py +0 -0
  79. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/ng_core.py +0 -0
  80. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/network/precompute.py +0 -0
  81. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ontologies/__init__.py +0 -0
  82. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ontologies/constants.py +0 -0
  83. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ontologies/dogma.py +0 -0
  84. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ontologies/genodexito.py +0 -0
  85. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ontologies/id_tables.py +0 -0
  86. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ontologies/mygene.py +0 -0
  87. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/ontologies/renaming.py +0 -0
  88. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/rpy2/__init__.py +0 -0
  89. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/rpy2/callr.py +0 -0
  90. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/rpy2/constants.py +0 -0
  91. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/rpy2/rids.py +0 -0
  92. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/sbml_dfs_core.py +0 -0
  93. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/sbml_dfs_utils.py +0 -0
  94. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/scverse/__init__.py +0 -0
  95. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/scverse/constants.py +0 -0
  96. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/scverse/loading.py +0 -0
  97. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/statistics/__init__.py +0 -0
  98. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/statistics/hypothesis_testing.py +0 -0
  99. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/statistics/quantiles.py +0 -0
  100. {napistu-0.4.4 → napistu-0.4.5}/src/napistu/utils.py +0 -0
  101. {napistu-0.4.4 → napistu-0.4.5}/src/napistu.egg-info/SOURCES.txt +0 -0
  102. {napistu-0.4.4 → napistu-0.4.5}/src/napistu.egg-info/dependency_links.txt +0 -0
  103. {napistu-0.4.4 → napistu-0.4.5}/src/napistu.egg-info/entry_points.txt +0 -0
  104. {napistu-0.4.4 → napistu-0.4.5}/src/napistu.egg-info/requires.txt +0 -0
  105. {napistu-0.4.4 → napistu-0.4.5}/src/napistu.egg-info/top_level.txt +0 -0
  106. {napistu-0.4.4 → napistu-0.4.5}/src/tests/__init__.py +0 -0
  107. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_consensus.py +0 -0
  108. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_constants.py +0 -0
  109. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_context_discretize.py +0 -0
  110. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_context_filtering.py +0 -0
  111. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_curation.py +0 -0
  112. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_data/__init__.py +0 -0
  113. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_gaps.py +0 -0
  114. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_gcs.py +0 -0
  115. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_identifiers.py +0 -0
  116. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_indices.py +0 -0
  117. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_ingestion_napistu_edgelist.py +0 -0
  118. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_ingestion_obo.py +0 -0
  119. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_matching_interactions.py +0 -0
  120. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_matching_mount.py +0 -0
  121. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_matching_species.py +0 -0
  122. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_mcp_config.py +0 -0
  123. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_mcp_documentation_utils.py +0 -0
  124. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_mcp_server.py +0 -0
  125. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_network_data_handling.py +0 -0
  126. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_network_ig_utils.py +0 -0
  127. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_network_net_create.py +0 -0
  128. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_network_net_create_utils.py +0 -0
  129. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_network_net_propagation.py +0 -0
  130. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_network_ng_core.py +0 -0
  131. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_network_ng_utils.py +0 -0
  132. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_network_paths.py +0 -0
  133. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_network_precompute.py +0 -0
  134. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_ontologies_genodexito.py +0 -0
  135. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_ontologies_id_tables.py +0 -0
  136. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_ontologies_mygene.py +0 -0
  137. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_ontologies_renaming.py +0 -0
  138. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_pathwayannot.py +0 -0
  139. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_rpy2_callr.py +0 -0
  140. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_rpy2_init.py +0 -0
  141. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_sbml.py +0 -0
  142. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_sbml_dfs_core.py +0 -0
  143. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_sbml_dfs_utils.py +0 -0
  144. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_sbo.py +0 -0
  145. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_scverse_loading.py +0 -0
  146. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_statistics_hypothesis_testing.py +0 -0
  147. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_statistics_quantiles.py +0 -0
  148. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_uncompartmentalize.py +0 -0
  149. {napistu-0.4.4 → napistu-0.4.5}/src/tests/test_utils.py +0 -0
  150. {napistu-0.4.4 → napistu-0.4.5}/src/tests/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.4.4
3
+ Version: 0.4.5
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = napistu
3
- version = 0.4.4
3
+ version = 0.4.5
4
4
  description = Connecting high-dimensional data to curated pathways
5
5
  long_description = file: README.md
6
6
  long_description_content_type = text/markdown
@@ -512,9 +512,10 @@ def find_neighborhoods(
512
512
  compartmentalized_species: list[str],
513
513
  network_type: str = "downstream",
514
514
  order: int = 3,
515
- verbose: bool = True,
515
+ min_pw_size: int = 3,
516
516
  precomputed_neighbors: pd.DataFrame | None = None,
517
517
  source_total_counts: pd.Series | None = None,
518
+ verbose: bool = True,
518
519
  ) -> dict:
519
520
  """
520
521
  Find Neighborhood
@@ -536,14 +537,16 @@ def find_neighborhoods(
536
537
  locates both upstream and downstream species.
537
538
  order: int
538
539
  Max steps away from center node
539
- verbose: bool
540
- Extra reporting
541
540
  precomputed_neighbors: pd.DataFrame or None
542
541
  If provided, a pre-filtered table of nodes nearby the compartmentalized species
543
542
  which will be used to skip on-the-fly neighborhood generation.
543
+ min_pw_size: int
544
+ the minimum size of a pathway to be considered
544
545
  source_total_counts: pd.Series | None
545
546
  Optional, A series of the total counts of each source. As produced by
546
- source.get_source_total_counts()
547
+ source.get_source_total_counts()\
548
+ verbose: bool
549
+ Extra reporting
547
550
 
548
551
  Returns:
549
552
  ----------
@@ -580,6 +583,7 @@ def find_neighborhoods(
580
583
  neighborhood_df=neighborhood_df,
581
584
  sbml_dfs=sbml_dfs,
582
585
  napistu_graph=napistu_graph,
586
+ min_pw_size=min_pw_size,
583
587
  source_total_counts=source_total_counts,
584
588
  verbose=verbose,
585
589
  )
@@ -594,6 +598,7 @@ def create_neighborhood_dict_entry(
594
598
  neighborhood_df: pd.DataFrame,
595
599
  sbml_dfs: sbml_dfs_core.SBML_dfs,
596
600
  napistu_graph: ig.Graph,
601
+ min_pw_size: int = 3,
597
602
  source_total_counts: pd.Series | None = None,
598
603
  verbose: bool = False,
599
604
  ) -> dict[str, Any]:
@@ -612,6 +617,8 @@ def create_neighborhood_dict_entry(
612
617
  A mechanistic molecular model
613
618
  napistu_graph: igraph.Graph
614
619
  A network connecting molecular species and reactions
620
+ min_pw_size: int
621
+ the minimum size of a pathway to be considered
615
622
  source_total_counts: pd.Series
616
623
  Optional, A series of the total counts of each source. As produced by
617
624
  source.get_source_total_counts()
@@ -665,8 +672,9 @@ def create_neighborhood_dict_entry(
665
672
  edge_sources = ng_utils.get_minimal_sources_edges(
666
673
  vertices.rename(columns={"name": "node"}),
667
674
  sbml_dfs,
675
+ min_pw_size=min_pw_size,
668
676
  # optional, counts of sources across the whole model
669
- source_total_counts,
677
+ source_total_counts=source_total_counts,
670
678
  )
671
679
  except Exception:
672
680
  edge_sources = None
@@ -114,6 +114,7 @@ def compartmentalize_species_pairs(
114
114
  def get_minimal_sources_edges(
115
115
  vertices: pd.DataFrame,
116
116
  sbml_dfs: sbml_dfs_core.SBML_dfs,
117
+ min_pw_size: int = 3,
117
118
  source_total_counts: Optional[pd.Series] = None,
118
119
  ) -> pd.DataFrame | None:
119
120
  """
@@ -125,6 +126,8 @@ def get_minimal_sources_edges(
125
126
  A table of vertices.
126
127
  sbml_dfs: sbml_dfs_core.SBML_dfs
127
128
  A pathway model
129
+ min_pw_size: int
130
+ the minimum size of a pathway to be considered
128
131
  source_total_counts: pd.Series
129
132
  A series of the total counts of each source.
130
133
 
@@ -146,7 +149,7 @@ def get_minimal_sources_edges(
146
149
  return None
147
150
  else:
148
151
  edge_sources = source.source_set_coverage(
149
- source_df, source_total_counts, sbml_dfs
152
+ source_df, source_total_counts, sbml_dfs, min_pw_size=min_pw_size
150
153
  )
151
154
  return edge_sources.reset_index()[
152
155
  [SBML_DFS.R_ID, SOURCE_SPEC.PATHWAY_ID, SOURCE_SPEC.NAME]
@@ -241,6 +241,8 @@ def find_all_shortest_reaction_paths(
241
241
  target_species_paths: pd.DataFrame,
242
242
  weight_var: str = NAPISTU_GRAPH_EDGES.WEIGHTS,
243
243
  precomputed_distances: pd.DataFrame | None = None,
244
+ min_pw_size: int = 3,
245
+ source_total_counts: pd.Series | None = None,
244
246
  ):
245
247
  """
246
248
  Shortest Reaction Paths
@@ -259,6 +261,11 @@ def find_all_shortest_reaction_paths(
259
261
  An edge attribute to use when forming a weighted shortest path
260
262
  precomputed_distances : pd.DataFrame | None
261
263
  A table containing precalculated path summaries between pairs of compartmentalized species
264
+ min_pw_size : int
265
+ the minimum size of a pathway to be considered
266
+ source_total_counts : pd.Series | None
267
+ A series of the total counts of each source. As produced by
268
+ source.get_source_total_counts()
262
269
 
263
270
  Returns:
264
271
  ----------
@@ -325,7 +332,12 @@ def find_all_shortest_reaction_paths(
325
332
  ).reset_index()
326
333
 
327
334
  # at a minimal set of pathway sources to organize reactions
328
- edge_sources = get_minimal_sources_edges(all_shortest_reaction_paths_df, sbml_dfs)
335
+ edge_sources = get_minimal_sources_edges(
336
+ all_shortest_reaction_paths_df,
337
+ sbml_dfs,
338
+ min_pw_size=min_pw_size,
339
+ source_total_counts=source_total_counts,
340
+ )
329
341
 
330
342
  # create a new small network of shortest paths
331
343
  unique_path_nodes = (
@@ -1,4 +1,5 @@
1
1
  from __future__ import annotations
2
+ import logging
2
3
 
3
4
  import numpy as np
4
5
  import pandas as pd
@@ -10,6 +11,8 @@ from napistu import sbml_dfs_utils
10
11
  from napistu.statistics import hypothesis_testing
11
12
  from napistu.constants import SBML_DFS_SCHEMA, SCHEMA_DEFS, SOURCE_SPEC
12
13
 
14
+ logger = logging.getLogger(__name__)
15
+
13
16
 
14
17
  class Source:
15
18
  """
@@ -244,7 +247,7 @@ def unnest_sources(source_table: pd.DataFrame, verbose: bool = False) -> pd.Data
244
247
 
245
248
  for i in range(source_table.shape[0]):
246
249
  if verbose:
247
- print(f"Processing {source_table_index.index.values[i]}")
250
+ logger.info(f"Processing {source_table_index.index.values[i]}")
248
251
 
249
252
  # check that the entries of sourcevar are Source objects
250
253
  source_value = source_table[source_var].iloc[i]
@@ -255,7 +258,7 @@ def unnest_sources(source_table: pd.DataFrame, verbose: bool = False) -> pd.Data
255
258
  )
256
259
 
257
260
  if source_value.source is None:
258
- print("Some sources were only missing - returning None")
261
+ logger.warning("Some sources were only missing - returning None")
259
262
  return None
260
263
 
261
264
  source_tbl = pd.DataFrame(source_value.source)
@@ -278,6 +281,7 @@ def source_set_coverage(
278
281
  select_sources_df: pd.DataFrame,
279
282
  source_total_counts: Optional[pd.Series] = None,
280
283
  sbml_dfs: Optional[sbml_dfs_core.SBML_dfs] = None,
284
+ min_pw_size: int = 3,
281
285
  ) -> pd.DataFrame:
282
286
  """
283
287
  Greedy Set Coverage of Sources
@@ -298,6 +302,8 @@ def source_set_coverage(
298
302
  sbml_dfs: sbml_dfs_core.SBML_dfs
299
303
  if `source_total_counts` is provided then `sbml_dfs` must be provided
300
304
  to calculate the total number of entities in the table.
305
+ min_pw_size: int
306
+ the minimum size of a pathway to be considered
301
307
 
302
308
  Returns
303
309
  -------
@@ -325,10 +331,16 @@ def source_set_coverage(
325
331
  # find the pathway with the most members
326
332
 
327
333
  if source_total_counts is None:
328
- top_pathway = _select_top_pathway_by_size(unaccounted_for_members)
334
+ top_pathway = _select_top_pathway_by_size(
335
+ unaccounted_for_members, min_pw_size=min_pw_size
336
+ )
329
337
  else:
330
338
  top_pathway = _select_top_pathway_by_enrichment(
331
- unaccounted_for_members, source_total_counts, n_total_entities, pk
339
+ unaccounted_for_members,
340
+ source_total_counts,
341
+ n_total_entities,
342
+ pk,
343
+ min_pw_size=min_pw_size,
332
344
  )
333
345
 
334
346
  if top_pathway is None:
@@ -368,6 +380,13 @@ def get_source_total_counts(
368
380
  """
369
381
 
370
382
  all_sources_table = unnest_sources(sbml_dfs.get_table(entity_type))
383
+
384
+ if all_sources_table is None:
385
+ logger.warning(
386
+ f"No sources found for {entity_type} in sbml_dfs. Returning an empty series."
387
+ )
388
+ return pd.Series([], name="total_counts")
389
+
371
390
  source_total_counts = all_sources_table.value_counts(SOURCE_SPEC.PATHWAY_ID).rename(
372
391
  "total_counts"
373
392
  )
@@ -515,9 +534,15 @@ def _safe_source_merge(member_Sources: Source | list) -> Source:
515
534
  raise TypeError("Expecting source.Source or pd.Series")
516
535
 
517
536
 
518
- def _select_top_pathway_by_size(unaccounted_for_members: pd.DataFrame) -> str:
537
+ def _select_top_pathway_by_size(
538
+ unaccounted_for_members: pd.DataFrame, min_pw_size: int = 3
539
+ ) -> str:
519
540
 
520
541
  pathway_members = unaccounted_for_members.value_counts(SOURCE_SPEC.PATHWAY_ID)
542
+ pathway_members = pathway_members.loc[pathway_members >= min_pw_size]
543
+ if pathway_members.shape[0] == 0:
544
+ return None
545
+
521
546
  top_pathway = pathway_members[pathway_members == max(pathway_members)].index[0]
522
547
 
523
548
  return top_pathway
@@ -528,7 +553,7 @@ def _select_top_pathway_by_enrichment(
528
553
  source_total_counts: pd.Series,
529
554
  n_total_entities: int,
530
555
  table_pk: str,
531
- min_pw_size: int = 5,
556
+ min_pw_size: int = 3,
532
557
  ) -> str:
533
558
 
534
559
  n_observed_entities = len(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.4.4
3
+ Version: 0.4.5
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -139,6 +139,16 @@ def napistu_graph_undirected(sbml_dfs):
139
139
  )
140
140
 
141
141
 
142
+ @fixture
143
+ def napistu_graph_metabolism(sbml_dfs_metabolism):
144
+ """
145
+ Pytest fixture to create a NapistuGraph from sbml_dfs_glucose_metabolism with directed=True and topology weighting.
146
+ """
147
+ return process_napistu_graph(
148
+ sbml_dfs_metabolism, directed=True, weighting_strategy="topology"
149
+ )
150
+
151
+
142
152
  @pytest.fixture
143
153
  def reaction_species_examples():
144
154
  """
@@ -0,0 +1,131 @@
1
+ import pandas as pd
2
+
3
+ from napistu.network import ng_utils
4
+ from napistu.network import neighborhoods
5
+ from napistu import source
6
+
7
+ from napistu.constants import SBML_DFS
8
+ from napistu.network.constants import NEIGHBORHOOD_NETWORK_TYPES
9
+
10
+
11
+ def test_neighborhood(sbml_dfs, napistu_graph):
12
+ species = sbml_dfs.species
13
+ source_species = species[species[SBML_DFS.S_NAME] == "NADH"].index.tolist()
14
+
15
+ query_sc_species = ng_utils.compartmentalize_species(sbml_dfs, source_species)
16
+ compartmentalized_species = query_sc_species[SBML_DFS.SC_ID].tolist()
17
+
18
+ neighborhood = neighborhoods.find_neighborhoods(
19
+ sbml_dfs,
20
+ napistu_graph,
21
+ compartmentalized_species=compartmentalized_species,
22
+ order=3,
23
+ )
24
+
25
+ assert neighborhood["species_73473"]["vertices"].shape[0] == 6
26
+
27
+
28
+ def test_find_and_prune_neighborhoods_with_source_counts(
29
+ sbml_dfs_metabolism, napistu_graph_metabolism
30
+ ):
31
+ """
32
+ Test find_and_prune_neighborhoods function with source_total_counts parameter.
33
+
34
+ This test verifies that the function works correctly when source_total_counts
35
+ is provided, which enables source-based edge assignment in neighborhoods.
36
+ """
37
+ # Create source_total_counts using the source module
38
+ source_total_counts = source.get_source_total_counts(
39
+ sbml_dfs_metabolism, SBML_DFS.REACTIONS
40
+ )
41
+
42
+ # Verify source_total_counts is created correctly
43
+ assert isinstance(source_total_counts, pd.Series)
44
+ assert len(source_total_counts) > 0
45
+ assert source_total_counts.name == "total_counts"
46
+ assert all(source_total_counts > 0)
47
+
48
+ # Get a test species to create neighborhood around
49
+ species = sbml_dfs_metabolism.species
50
+ source_species = species[species[SBML_DFS.S_NAME] == "NADH"].index.tolist()
51
+
52
+ query_sc_species = ng_utils.compartmentalize_species(
53
+ sbml_dfs_metabolism, source_species
54
+ )
55
+ compartmentalized_species = query_sc_species[SBML_DFS.SC_ID].tolist()
56
+
57
+ # Test find_and_prune_neighborhoods with source_total_counts
58
+ neighborhoods_result = neighborhoods.find_and_prune_neighborhoods(
59
+ sbml_dfs=sbml_dfs_metabolism,
60
+ napistu_graph=napistu_graph_metabolism,
61
+ compartmentalized_species=compartmentalized_species,
62
+ source_total_counts=source_total_counts,
63
+ network_type=NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
64
+ order=3,
65
+ verbose=False,
66
+ top_n=10,
67
+ )
68
+
69
+ # Verify the result structure
70
+ assert isinstance(neighborhoods_result, dict)
71
+ assert len(neighborhoods_result) > 0
72
+
73
+ # Check each neighborhood has the expected structure
74
+ for sc_id, neighborhood in neighborhoods_result.items():
75
+ assert isinstance(neighborhood, dict)
76
+ assert "graph" in neighborhood
77
+ assert "vertices" in neighborhood
78
+ assert "edges" in neighborhood
79
+ assert "edge_sources" in neighborhood
80
+
81
+ # Verify edge_sources is populated when source_total_counts is provided
82
+ # (this is the key difference when source_total_counts is passed)
83
+ if neighborhood["edges"].shape[0] > 0:
84
+ # If there are edges, edge_sources should be populated
85
+ assert neighborhood["edge_sources"] is not None
86
+ assert isinstance(neighborhood["edge_sources"], pd.DataFrame)
87
+
88
+ # Check edge_sources has expected columns
89
+ expected_columns = [SBML_DFS.R_ID, "pathway_id", "name"]
90
+ for col in expected_columns:
91
+ assert col in neighborhood["edge_sources"].columns
92
+
93
+ # Verify vertices structure
94
+ vertices = neighborhood["vertices"]
95
+ assert isinstance(vertices, pd.DataFrame)
96
+ assert vertices.shape[0] > 0
97
+
98
+ # Verify edges structure
99
+ edges = neighborhood["edges"]
100
+ assert isinstance(edges, pd.DataFrame)
101
+
102
+ # Verify graph structure
103
+ graph = neighborhood["graph"]
104
+ assert hasattr(graph, "vcount")
105
+ assert hasattr(graph, "ecount")
106
+
107
+ # Test without source_total_counts for comparison
108
+ neighborhoods_result_no_source = neighborhoods.find_and_prune_neighborhoods(
109
+ sbml_dfs=sbml_dfs_metabolism,
110
+ napistu_graph=napistu_graph_metabolism,
111
+ compartmentalized_species=compartmentalized_species,
112
+ source_total_counts=None, # No source counts
113
+ network_type=NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM,
114
+ order=3,
115
+ verbose=False,
116
+ top_n=10,
117
+ )
118
+
119
+ # Verify both results have the same basic structure
120
+ assert len(neighborhoods_result) == len(neighborhoods_result_no_source)
121
+
122
+ # The main difference should be in edge_sources handling
123
+ for sc_id in neighborhoods_result:
124
+ with_source = neighborhoods_result[sc_id]["edge_sources"]
125
+ without_source = neighborhoods_result_no_source[sc_id]["edge_sources"]
126
+
127
+ # Both should either be None or DataFrames, but the content may differ
128
+ assert (with_source is None) == (without_source is None)
129
+ if with_source is not None and without_source is not None:
130
+ assert isinstance(with_source, pd.DataFrame)
131
+ assert isinstance(without_source, pd.DataFrame)
@@ -96,4 +96,4 @@ def test_source_set_coverage_enrichment(sbml_dfs_metabolism):
96
96
  source_df, source_total_counts=source_total_counts, sbml_dfs=sbml_dfs_metabolism
97
97
  )
98
98
 
99
- assert set_coverage.shape == (30, 6)
99
+ assert set_coverage.shape == (34, 6)
@@ -1,19 +0,0 @@
1
- from napistu.network import ng_utils
2
- from napistu.network import neighborhoods
3
-
4
-
5
- def test_neighborhood(sbml_dfs, napistu_graph):
6
- species = sbml_dfs.species
7
- source_species = species[species["s_name"] == "NADH"].index.tolist()
8
-
9
- query_sc_species = ng_utils.compartmentalize_species(sbml_dfs, source_species)
10
- compartmentalized_species = query_sc_species["sc_id"].tolist()
11
-
12
- neighborhood = neighborhoods.find_neighborhoods(
13
- sbml_dfs,
14
- napistu_graph,
15
- compartmentalized_species=compartmentalized_species,
16
- order=3,
17
- )
18
-
19
- assert neighborhood["species_73473"]["vertices"].shape[0] == 6
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes