napistu 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -512,9 +512,10 @@ def find_neighborhoods(
512
512
  compartmentalized_species: list[str],
513
513
  network_type: str = "downstream",
514
514
  order: int = 3,
515
- verbose: bool = True,
515
+ min_pw_size: int = 3,
516
516
  precomputed_neighbors: pd.DataFrame | None = None,
517
517
  source_total_counts: pd.Series | None = None,
518
+ verbose: bool = True,
518
519
  ) -> dict:
519
520
  """
520
521
  Find Neighborhood
@@ -536,14 +537,16 @@ def find_neighborhoods(
536
537
  locates both upstream and downstream species.
537
538
  order: int
538
539
  Max steps away from center node
539
- verbose: bool
540
- Extra reporting
541
540
  precomputed_neighbors: pd.DataFrame or None
542
541
  If provided, a pre-filtered table of nodes nearby the compartmentalized species
543
542
  which will be used to skip on-the-fly neighborhood generation.
543
+ min_pw_size: int
544
+ the minimum size of a pathway to be considered
544
545
  source_total_counts: pd.Series | None
545
546
  Optional, A series of the total counts of each source. As produced by
546
- source.get_source_total_counts()
547
+ source.get_source_total_counts()\
548
+ verbose: bool
549
+ Extra reporting
547
550
 
548
551
  Returns:
549
552
  ----------
@@ -580,6 +583,7 @@ def find_neighborhoods(
580
583
  neighborhood_df=neighborhood_df,
581
584
  sbml_dfs=sbml_dfs,
582
585
  napistu_graph=napistu_graph,
586
+ min_pw_size=min_pw_size,
583
587
  source_total_counts=source_total_counts,
584
588
  verbose=verbose,
585
589
  )
@@ -594,6 +598,7 @@ def create_neighborhood_dict_entry(
594
598
  neighborhood_df: pd.DataFrame,
595
599
  sbml_dfs: sbml_dfs_core.SBML_dfs,
596
600
  napistu_graph: ig.Graph,
601
+ min_pw_size: int = 3,
597
602
  source_total_counts: pd.Series | None = None,
598
603
  verbose: bool = False,
599
604
  ) -> dict[str, Any]:
@@ -612,6 +617,8 @@ def create_neighborhood_dict_entry(
612
617
  A mechanistic molecular model
613
618
  napistu_graph: igraph.Graph
614
619
  A network connecting molecular species and reactions
620
+ min_pw_size: int
621
+ the minimum size of a pathway to be considered
615
622
  source_total_counts: pd.Series
616
623
  Optional, A series of the total counts of each source. As produced by
617
624
  source.get_source_total_counts()
@@ -665,8 +672,9 @@ def create_neighborhood_dict_entry(
665
672
  edge_sources = ng_utils.get_minimal_sources_edges(
666
673
  vertices.rename(columns={"name": "node"}),
667
674
  sbml_dfs,
675
+ min_pw_size=min_pw_size,
668
676
  # optional, counts of sources across the whole model
669
- source_total_counts,
677
+ source_total_counts=source_total_counts,
670
678
  )
671
679
  except Exception:
672
680
  edge_sources = None
@@ -114,6 +114,7 @@ def compartmentalize_species_pairs(
114
114
  def get_minimal_sources_edges(
115
115
  vertices: pd.DataFrame,
116
116
  sbml_dfs: sbml_dfs_core.SBML_dfs,
117
+ min_pw_size: int = 3,
117
118
  source_total_counts: Optional[pd.Series] = None,
118
119
  ) -> pd.DataFrame | None:
119
120
  """
@@ -125,6 +126,8 @@ def get_minimal_sources_edges(
125
126
  A table of vertices.
126
127
  sbml_dfs: sbml_dfs_core.SBML_dfs
127
128
  A pathway model
129
+ min_pw_size: int
130
+ the minimum size of a pathway to be considered
128
131
  source_total_counts: pd.Series
129
132
  A series of the total counts of each source.
130
133
 
@@ -146,7 +149,7 @@ def get_minimal_sources_edges(
146
149
  return None
147
150
  else:
148
151
  edge_sources = source.source_set_coverage(
149
- source_df, source_total_counts, sbml_dfs
152
+ source_df, source_total_counts, sbml_dfs, min_pw_size=min_pw_size
150
153
  )
151
154
  return edge_sources.reset_index()[
152
155
  [SBML_DFS.R_ID, SOURCE_SPEC.PATHWAY_ID, SOURCE_SPEC.NAME]
napistu/network/paths.py CHANGED
@@ -241,6 +241,8 @@ def find_all_shortest_reaction_paths(
241
241
  target_species_paths: pd.DataFrame,
242
242
  weight_var: str = NAPISTU_GRAPH_EDGES.WEIGHTS,
243
243
  precomputed_distances: pd.DataFrame | None = None,
244
+ min_pw_size: int = 3,
245
+ source_total_counts: pd.Series | None = None,
244
246
  ):
245
247
  """
246
248
  Shortest Reaction Paths
@@ -259,6 +261,11 @@ def find_all_shortest_reaction_paths(
259
261
  An edge attribute to use when forming a weighted shortest path
260
262
  precomputed_distances : pd.DataFrame | None
261
263
  A table containing precalculated path summaries between pairs of compartmentalized species
264
+ min_pw_size : int
265
+ the minimum size of a pathway to be considered
266
+ source_total_counts : pd.Series | None
267
+ A series of the total counts of each source. As produced by
268
+ source.get_source_total_counts()
262
269
 
263
270
  Returns:
264
271
  ----------
@@ -325,7 +332,12 @@ def find_all_shortest_reaction_paths(
325
332
  ).reset_index()
326
333
 
327
334
  # at a minimal set of pathway sources to organize reactions
328
- edge_sources = get_minimal_sources_edges(all_shortest_reaction_paths_df, sbml_dfs)
335
+ edge_sources = get_minimal_sources_edges(
336
+ all_shortest_reaction_paths_df,
337
+ sbml_dfs,
338
+ min_pw_size=min_pw_size,
339
+ source_total_counts=source_total_counts,
340
+ )
329
341
 
330
342
  # create a new small network of shortest paths
331
343
  unique_path_nodes = (
napistu/source.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from __future__ import annotations
2
+ import logging
2
3
 
3
4
  import numpy as np
4
5
  import pandas as pd
@@ -10,6 +11,8 @@ from napistu import sbml_dfs_utils
10
11
  from napistu.statistics import hypothesis_testing
11
12
  from napistu.constants import SBML_DFS_SCHEMA, SCHEMA_DEFS, SOURCE_SPEC
12
13
 
14
+ logger = logging.getLogger(__name__)
15
+
13
16
 
14
17
  class Source:
15
18
  """
@@ -244,7 +247,7 @@ def unnest_sources(source_table: pd.DataFrame, verbose: bool = False) -> pd.Data
244
247
 
245
248
  for i in range(source_table.shape[0]):
246
249
  if verbose:
247
- print(f"Processing {source_table_index.index.values[i]}")
250
+ logger.info(f"Processing {source_table_index.index.values[i]}")
248
251
 
249
252
  # check that the entries of sourcevar are Source objects
250
253
  source_value = source_table[source_var].iloc[i]
@@ -255,7 +258,7 @@ def unnest_sources(source_table: pd.DataFrame, verbose: bool = False) -> pd.Data
255
258
  )
256
259
 
257
260
  if source_value.source is None:
258
- print("Some sources were only missing - returning None")
261
+ logger.warning("Some sources were only missing - returning None")
259
262
  return None
260
263
 
261
264
  source_tbl = pd.DataFrame(source_value.source)
@@ -278,6 +281,7 @@ def source_set_coverage(
278
281
  select_sources_df: pd.DataFrame,
279
282
  source_total_counts: Optional[pd.Series] = None,
280
283
  sbml_dfs: Optional[sbml_dfs_core.SBML_dfs] = None,
284
+ min_pw_size: int = 3,
281
285
  ) -> pd.DataFrame:
282
286
  """
283
287
  Greedy Set Coverage of Sources
@@ -298,6 +302,8 @@ def source_set_coverage(
298
302
  sbml_dfs: sbml_dfs_core.SBML_dfs
299
303
  if `source_total_counts` is provided then `sbml_dfs` must be provided
300
304
  to calculate the total number of entities in the table.
305
+ min_pw_size: int
306
+ the minimum size of a pathway to be considered
301
307
 
302
308
  Returns
303
309
  -------
@@ -325,10 +331,16 @@ def source_set_coverage(
325
331
  # find the pathway with the most members
326
332
 
327
333
  if source_total_counts is None:
328
- top_pathway = _select_top_pathway_by_size(unaccounted_for_members)
334
+ top_pathway = _select_top_pathway_by_size(
335
+ unaccounted_for_members, min_pw_size=min_pw_size
336
+ )
329
337
  else:
330
338
  top_pathway = _select_top_pathway_by_enrichment(
331
- unaccounted_for_members, source_total_counts, n_total_entities, pk
339
+ unaccounted_for_members,
340
+ source_total_counts,
341
+ n_total_entities,
342
+ pk,
343
+ min_pw_size=min_pw_size,
332
344
  )
333
345
 
334
346
  if top_pathway is None:
@@ -368,6 +380,13 @@ def get_source_total_counts(
368
380
  """
369
381
 
370
382
  all_sources_table = unnest_sources(sbml_dfs.get_table(entity_type))
383
+
384
+ if all_sources_table is None:
385
+ logger.warning(
386
+ f"No sources found for {entity_type} in sbml_dfs. Returning an empty series."
387
+ )
388
+ return pd.Series([], name="total_counts")
389
+
371
390
  source_total_counts = all_sources_table.value_counts(SOURCE_SPEC.PATHWAY_ID).rename(
372
391
  "total_counts"
373
392
  )
@@ -515,9 +534,15 @@ def _safe_source_merge(member_Sources: Source | list) -> Source:
515
534
  raise TypeError("Expecting source.Source or pd.Series")
516
535
 
517
536
 
518
- def _select_top_pathway_by_size(unaccounted_for_members: pd.DataFrame) -> str:
537
+ def _select_top_pathway_by_size(
538
+ unaccounted_for_members: pd.DataFrame, min_pw_size: int = 3
539
+ ) -> str:
519
540
 
520
541
  pathway_members = unaccounted_for_members.value_counts(SOURCE_SPEC.PATHWAY_ID)
542
+ pathway_members = pathway_members.loc[pathway_members >= min_pw_size]
543
+ if pathway_members.shape[0] == 0:
544
+ return None
545
+
521
546
  top_pathway = pathway_members[pathway_members == max(pathway_members)].index[0]
522
547
 
523
548
  return top_pathway
@@ -528,7 +553,7 @@ def _select_top_pathway_by_enrichment(
528
553
  source_total_counts: pd.Series,
529
554
  n_total_entities: int,
530
555
  table_pk: str,
531
- min_pw_size: int = 5,
556
+ min_pw_size: int = 3,
532
557
  ) -> str:
533
558
 
534
559
  n_observed_entities = len(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.4.4
3
+ Version: 0.4.5
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -6,7 +6,7 @@ napistu/identifiers.py,sha256=e2-nTVzr5AINa0y1ER9218bKXyF2kAeJ9At22S4Z00o,33914
6
6
  napistu/indices.py,sha256=Zjg3gE0JQ3T879lCPazYg-WXVE6hvcAr713ZKpJ32rk,9830
7
7
  napistu/sbml_dfs_core.py,sha256=s0OyoHs-AjOcbZu1d3KNkW_PI7Rxbhu5ZLpfQeO4iY8,72639
8
8
  napistu/sbml_dfs_utils.py,sha256=SOy1Ii2hDFOfQa7pFAJS9EfAmfBVD_sHvDJBVmCN_p8,46456
9
- napistu/source.py,sha256=iDDKpN-4k_W_tyxEjqe_z-yPJv7uoFRRBhkiBtOH5C8,20416
9
+ napistu/source.py,sha256=iUB0SqzHW5qe0IMfnWvUCfNpjYpbXDv0s2pHNgZ8BFc,21102
10
10
  napistu/utils.py,sha256=p2sJxTklmV30XS6hanJRjcdfgeaZpkULuMyQX3BPP0c,36404
11
11
  napistu/context/__init__.py,sha256=LQBEqipcHKK0E5UlDEg1ct-ymCs93IlUrUaH8BCevf0,242
12
12
  napistu/context/discretize.py,sha256=Qq7zg46F_I-PvQIT2_pEDQV7YEtUQCxKoRvT5Gu9QsE,15052
@@ -62,13 +62,13 @@ napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,2
62
62
  napistu/network/constants.py,sha256=nG_lUZYLgop8oxOGjDYqvxXJzVdOwKZ3aWnxlhtSaIo,6915
63
63
  napistu/network/data_handling.py,sha256=KncrAKjXI3169BgVE-SnY8FkpVF60JnUwfMHtbqvsTc,14725
64
64
  napistu/network/ig_utils.py,sha256=MuyEyOVtSHndil6QuuRCimBZrJ2jTaF5qQESgYlu02M,17042
65
- napistu/network/neighborhoods.py,sha256=kXoD5d3plcTEw-6XCbb5QjaCt0jsKwn17VdAvnGoFhY,57041
65
+ napistu/network/neighborhoods.py,sha256=hi8FT5sGd1vtkR5Uu10wr0Ik5Z3fz9e5fhvXqfi7QPQ,57340
66
66
  napistu/network/net_create.py,sha256=66kV_xoWnu4BVLaJZ1TAC7wBSsjPDqjoAXH-X9ShV3s,59091
67
67
  napistu/network/net_create_utils.py,sha256=zajwaz2xAij_9fEnD77SgBw_EnNAnJ8jBCmmK2rk_bA,24672
68
68
  napistu/network/net_propagation.py,sha256=Il5nDOWh3nLz8gRhDFHGp2LxcvJ9C1twiSZjDeiZMUo,23490
69
69
  napistu/network/ng_core.py,sha256=dGnTUKR4WtnvaYMyIHqqF55FY4mJSa7wjA2LZ4cVB6U,11720
70
- napistu/network/ng_utils.py,sha256=ahSm-8M2pV662V7MMVcGaoguBM55_y-F7LDmZSVp9ag,15951
71
- napistu/network/paths.py,sha256=r6LVKVvX7i3ctBA5r-xvHfpH5Zsd0VDHUCtin2iag20,17453
70
+ napistu/network/ng_utils.py,sha256=DkI_Ln2uFiNDjPEnUnf7kyy6XwyqvpeUkk8DRjTGZQQ,16078
71
+ napistu/network/paths.py,sha256=BcoYNkCplaM_QPqWWfiwD89bsvwlyvvacSiEzHacfmA,17863
72
72
  napistu/network/precompute.py,sha256=ARU2tktWnxFISaHAY8chpkg8pusZPv7TT5jSIB9eFF0,10081
73
73
  napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
74
74
  napistu/ontologies/constants.py,sha256=GyOFvezSxDK1VigATcruTKtNhjcYaid1ggulEf_HEtQ,4345
@@ -87,9 +87,9 @@ napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27
87
87
  napistu/statistics/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
88
88
  napistu/statistics/hypothesis_testing.py,sha256=k0mBFAMF0XHVcKwS26aPnEbq_FIUVwXU1gZ6cKfFbCk,2190
89
89
  napistu/statistics/quantiles.py,sha256=1-LnmVzC2CQWxCKUh0yi6YfKrbsZM1-kkD7nu2-aS5s,3042
90
- napistu-0.4.4.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
90
+ napistu-0.4.5.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
91
91
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
- tests/conftest.py,sha256=t-GHb0MvSsC-MyhkFpOy2K3t5fi7eaig_Rc2xEQC-t8,9678
92
+ tests/conftest.py,sha256=Rw0KtnVyykZhRjnlmNu4oV47lNIeYUJVHu4y47RnVq0,9990
93
93
  tests/test_consensus.py,sha256=Hzfrgp4SpkRDnEMVMD3f0UInSycndB8kKzC4wDDvRas,15076
94
94
  tests/test_constants.py,sha256=gJLDv7QMeeBiiupyMazj6mumk20KWvGMgm2myHMKKfc,531
95
95
  tests/test_context_discretize.py,sha256=5Mr9WqwHGYMO37M1TnMmSfC64UZ73mnoCiEM2IQHVDY,1667
@@ -109,7 +109,7 @@ tests/test_mcp_documentation_utils.py,sha256=OW0N2N_2IOktbYTcCWhhWz4bANi8IB60l1q
109
109
  tests/test_mcp_server.py,sha256=bP3PWVQsEfX6-lAgXKP32njdg__o65n2WuLvkxTTHkQ,11215
110
110
  tests/test_network_data_handling.py,sha256=4aS8z2AlKkVd-JhK4BQ8fjeiW8_bJ1hZ3cc71Jh7Glk,12716
111
111
  tests/test_network_ig_utils.py,sha256=XihmEpX890sr-LYmsb_t4aN0sKIDWCnXkTpDhpuTDmw,7199
112
- tests/test_network_neighborhoods.py,sha256=8BV17m5X1OUd5FwasTTYUOkNYUHDPUkxOKH_VZCsyBE,631
112
+ tests/test_network_neighborhoods.py,sha256=OvVfgGodbS3MpuSfj-__VKjBj99Ng4WWLmINlIIvbvo,5100
113
113
  tests/test_network_net_create.py,sha256=L0U91b4jVHDuC3DFo-_BUFVuv4GuSxZuLAo7r-7EJxY,12877
114
114
  tests/test_network_net_create_utils.py,sha256=0J6KIh2HBc4koFsvwMaul1QRtj5x92kR9HBdDZajnAw,18971
115
115
  tests/test_network_net_propagation.py,sha256=kZeDHD93iMrLVvxO4OyfRH5_vgsYeQyC40OI9Dsb0xY,14999
@@ -129,15 +129,15 @@ tests/test_sbml_dfs_core.py,sha256=nnLPpZTVtCznOBohk7CX67x6sMqktJWt-sZMWQKoaDs,2
129
129
  tests/test_sbml_dfs_utils.py,sha256=ZD9x2B81fsfYEjAV9wphHOR7ywjNcfvfw1LGNv4PxUA,11471
130
130
  tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
131
131
  tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
132
- tests/test_source.py,sha256=iV-Yyu8flhIGWF17SCL8msG2bjqwb9w2IZ694b0iZ-o,2985
132
+ tests/test_source.py,sha256=pe090MsiZ7Tl9P0rhuq17sqMmxUBCch2zoxTwLrNeJQ,2985
133
133
  tests/test_statistics_hypothesis_testing.py,sha256=qD-oS9zo5JlH-jdtiOrWAKI4nKFuZvvh6361_pFSpIs,2259
134
134
  tests/test_statistics_quantiles.py,sha256=yNDeqwgbP-1Rx3C_dLX_wnwT_Lr-iJWClmeKmElqmTE,4984
135
135
  tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRax8c,1289
136
136
  tests/test_utils.py,sha256=qPSpV-Q9b6vmdycgaDmQqtcvzKnAVnN9j5xJ9x-T6bg,23959
137
137
  tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
138
138
  tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
- napistu-0.4.4.dist-info/METADATA,sha256=E15A5Ve2RZTn4HtXGD2rDO1Q7AEaTfSdo3fgLuwravE,4078
140
- napistu-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
141
- napistu-0.4.4.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
142
- napistu-0.4.4.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
143
- napistu-0.4.4.dist-info/RECORD,,
139
+ napistu-0.4.5.dist-info/METADATA,sha256=JJzjckSlzdusT7COjo-FxaNRBGMWtfJc-kfUDvjyvW4,4078
140
+ napistu-0.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
141
+ napistu-0.4.5.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
142
+ napistu-0.4.5.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
143
+ napistu-0.4.5.dist-info/RECORD,,
tests/conftest.py CHANGED
@@ -139,6 +139,16 @@ def napistu_graph_undirected(sbml_dfs):
139
139
  )
140
140
 
141
141
 
142
+ @fixture
143
+ def napistu_graph_metabolism(sbml_dfs_metabolism):
144
+ """
145
+ Pytest fixture to create a NapistuGraph from sbml_dfs_glucose_metabolism with directed=True and topology weighting.
146
+ """
147
+ return process_napistu_graph(
148
+ sbml_dfs_metabolism, directed=True, weighting_strategy="topology"
149
+ )
150
+
151
+
142
152
  @pytest.fixture
143
153
  def reaction_species_examples():
144
154
  """
@@ -1,13 +1,19 @@
1
+ import pandas as pd
2
+
1
3
  from napistu.network import ng_utils
2
4
  from napistu.network import neighborhoods
5
+ from napistu import source
6
+
7
+ from napistu.constants import SBML_DFS
8
+ from napistu.network.constants import NEIGHBORHOOD_NETWORK_TYPES
3
9
 
4
10
 
5
11
  def test_neighborhood(sbml_dfs, napistu_graph):
6
12
  species = sbml_dfs.species
7
- source_species = species[species["s_name"] == "NADH"].index.tolist()
13
+ source_species = species[species[SBML_DFS.S_NAME] == "NADH"].index.tolist()
8
14
 
9
15
  query_sc_species = ng_utils.compartmentalize_species(sbml_dfs, source_species)
10
- compartmentalized_species = query_sc_species["sc_id"].tolist()
16
+ compartmentalized_species = query_sc_species[SBML_DFS.SC_ID].tolist()
11
17
 
12
18
  neighborhood = neighborhoods.find_neighborhoods(
13
19
  sbml_dfs,
@@ -17,3 +23,109 @@ def test_neighborhood(sbml_dfs, napistu_graph):
17
23
  )
18
24
 
19
25
  assert neighborhood["species_73473"]["vertices"].shape[0] == 6
26
+
27
+
28
+ def test_find_and_prune_neighborhoods_with_source_counts(
29
+ sbml_dfs_metabolism, napistu_graph_metabolism
30
+ ):
31
+ """
32
+ Test find_and_prune_neighborhoods function with source_total_counts parameter.
33
+
34
+ This test verifies that the function works correctly when source_total_counts
35
+ is provided, which enables source-based edge assignment in neighborhoods.
36
+ """
37
+ # Create source_total_counts using the source module
38
+ source_total_counts = source.get_source_total_counts(
39
+ sbml_dfs_metabolism, SBML_DFS.REACTIONS
40
+ )
41
+
42
+ # Verify source_total_counts is created correctly
43
+ assert isinstance(source_total_counts, pd.Series)
44
+ assert len(source_total_counts) > 0
45
+ assert source_total_counts.name == "total_counts"
46
+ assert all(source_total_counts > 0)
47
+
48
+ # Get a test species to create neighborhood around
49
+ species = sbml_dfs_metabolism.species
50
+ source_species = species[species[SBML_DFS.S_NAME] == "NADH"].index.tolist()
51
+
52
+ query_sc_species = ng_utils.compartmentalize_species(
53
+ sbml_dfs_metabolism, source_species
54
+ )
55
+ compartmentalized_species = query_sc_species[SBML_DFS.SC_ID].tolist()
56
+
57
+ # Test find_and_prune_neighborhoods with source_total_counts
58
+ neighborhoods_result = neighborhoods.find_and_prune_neighborhoods(
59
+ sbml_dfs=sbml_dfs_metabolism,
60
+ napistu_graph=napistu_graph_metabolism,
61
+ compartmentalized_species=compartmentalized_species,
62
+ source_total_counts=source_total_counts,
63
+ network_type=NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
64
+ order=3,
65
+ verbose=False,
66
+ top_n=10,
67
+ )
68
+
69
+ # Verify the result structure
70
+ assert isinstance(neighborhoods_result, dict)
71
+ assert len(neighborhoods_result) > 0
72
+
73
+ # Check each neighborhood has the expected structure
74
+ for sc_id, neighborhood in neighborhoods_result.items():
75
+ assert isinstance(neighborhood, dict)
76
+ assert "graph" in neighborhood
77
+ assert "vertices" in neighborhood
78
+ assert "edges" in neighborhood
79
+ assert "edge_sources" in neighborhood
80
+
81
+ # Verify edge_sources is populated when source_total_counts is provided
82
+ # (this is the key difference when source_total_counts is passed)
83
+ if neighborhood["edges"].shape[0] > 0:
84
+ # If there are edges, edge_sources should be populated
85
+ assert neighborhood["edge_sources"] is not None
86
+ assert isinstance(neighborhood["edge_sources"], pd.DataFrame)
87
+
88
+ # Check edge_sources has expected columns
89
+ expected_columns = [SBML_DFS.R_ID, "pathway_id", "name"]
90
+ for col in expected_columns:
91
+ assert col in neighborhood["edge_sources"].columns
92
+
93
+ # Verify vertices structure
94
+ vertices = neighborhood["vertices"]
95
+ assert isinstance(vertices, pd.DataFrame)
96
+ assert vertices.shape[0] > 0
97
+
98
+ # Verify edges structure
99
+ edges = neighborhood["edges"]
100
+ assert isinstance(edges, pd.DataFrame)
101
+
102
+ # Verify graph structure
103
+ graph = neighborhood["graph"]
104
+ assert hasattr(graph, "vcount")
105
+ assert hasattr(graph, "ecount")
106
+
107
+ # Test without source_total_counts for comparison
108
+ neighborhoods_result_no_source = neighborhoods.find_and_prune_neighborhoods(
109
+ sbml_dfs=sbml_dfs_metabolism,
110
+ napistu_graph=napistu_graph_metabolism,
111
+ compartmentalized_species=compartmentalized_species,
112
+ source_total_counts=None, # No source counts
113
+ network_type=NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM,
114
+ order=3,
115
+ verbose=False,
116
+ top_n=10,
117
+ )
118
+
119
+ # Verify both results have the same basic structure
120
+ assert len(neighborhoods_result) == len(neighborhoods_result_no_source)
121
+
122
+ # The main difference should be in edge_sources handling
123
+ for sc_id in neighborhoods_result:
124
+ with_source = neighborhoods_result[sc_id]["edge_sources"]
125
+ without_source = neighborhoods_result_no_source[sc_id]["edge_sources"]
126
+
127
+ # Both should either be None or DataFrames, but the content may differ
128
+ assert (with_source is None) == (without_source is None)
129
+ if with_source is not None and without_source is not None:
130
+ assert isinstance(with_source, pd.DataFrame)
131
+ assert isinstance(without_source, pd.DataFrame)
tests/test_source.py CHANGED
@@ -96,4 +96,4 @@ def test_source_set_coverage_enrichment(sbml_dfs_metabolism):
96
96
  source_df, source_total_counts=source_total_counts, sbml_dfs=sbml_dfs_metabolism
97
97
  )
98
98
 
99
- assert set_coverage.shape == (30, 6)
99
+ assert set_coverage.shape == (34, 6)