napistu 0.4.6__py3-none-any.whl → 0.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,8 +10,14 @@ from napistu.constants import SBOTERM_NAMES
10
10
 
11
11
  NAPISTU_GRAPH = SimpleNamespace(VERTICES="vertices", EDGES="edges", METADATA="metadata")
12
12
 
13
- NAPISTU_GRAPH_DIRECTEDNESS = SimpleNamespace(
14
- DIRECTED="directed", UNDIRECTED="undirected"
13
+ GRAPH_DIRECTEDNESS = SimpleNamespace(DIRECTED="directed", UNDIRECTED="undirected")
14
+
15
+ GRAPH_RELATIONSHIPS = SimpleNamespace(
16
+ ANCESTORS="ancestors",
17
+ CHILDREN="children",
18
+ DESCENDANTS="descendants",
19
+ FOCAL="focal",
20
+ PARENTS="parents",
15
21
  )
16
22
 
17
23
  NAPISTU_GRAPH_VERTICES = SimpleNamespace(
@@ -230,4 +236,17 @@ NEIGHBORHOOD_DICT_KEYS = SimpleNamespace(
230
236
  VERTICES="vertices",
231
237
  EDGES="edges",
232
238
  REACTION_SOURCES="reaction_sources",
239
+ NEIGHBORHOOD_PATH_ENTITIES="neighborhood_path_entities",
240
+ )
241
+
242
+ DISTANCES = SimpleNamespace(
243
+ # core attributes of precomputed distances
244
+ SC_ID_ORIGIN="sc_id_origin",
245
+ SC_ID_DEST="sc_id_dest",
246
+ PATH_LENGTH="path_length",
247
+ PATH_UPSTREAM_WEIGHTS="path_upstream_weights",
248
+ PATH_WEIGHTS="path_weights",
249
+ # other attributes associated with paths/distances
250
+ FINAL_FROM="final_from",
251
+ FINAL_TO="final_to",
233
252
  )
@@ -22,11 +22,16 @@ from napistu.constants import (
22
22
  MINI_SBO_NAME_TO_POLARITY,
23
23
  MINI_SBO_TO_NAME,
24
24
  NAPISTU_EDGELIST,
25
+ ONTOLOGIES,
25
26
  SBML_DFS,
26
27
  )
27
28
 
28
29
  from napistu.network.constants import (
30
+ DISTANCES,
31
+ GRAPH_RELATIONSHIPS,
29
32
  GRAPH_WIRING_APPROACHES,
33
+ NAPISTU_GRAPH_EDGES,
34
+ NAPISTU_GRAPH_NODE_TYPES,
30
35
  NAPISTU_GRAPH_VERTICES,
31
36
  NEIGHBORHOOD_DICT_KEYS,
32
37
  NEIGHBORHOOD_NETWORK_TYPES,
@@ -102,6 +107,16 @@ def find_and_prune_neighborhoods(
102
107
  if not isinstance(compartmentalized_species, list):
103
108
  raise TypeError("compartmentalized_species must be a list")
104
109
 
110
+ invalid_cspecies = [
111
+ x
112
+ for x in compartmentalized_species
113
+ if x not in sbml_dfs.compartmentalized_species.index
114
+ ]
115
+ if len(invalid_cspecies) > 0:
116
+ raise ValueError(
117
+ f"compartmentalized_species contains invalid species: {invalid_cspecies}"
118
+ )
119
+
105
120
  if isinstance(precomputed_distances, pd.DataFrame):
106
121
  logger.info("Pre-computed neighbors based on precomputed_distances")
107
122
 
@@ -116,7 +131,7 @@ def find_and_prune_neighborhoods(
116
131
  else:
117
132
  precomputed_neighbors = None
118
133
 
119
- neighborhoods = find_neighborhoods(
134
+ neighborhood_dicts = find_neighborhoods(
120
135
  sbml_dfs=sbml_dfs,
121
136
  napistu_graph=napistu_graph,
122
137
  compartmentalized_species=compartmentalized_species,
@@ -128,7 +143,7 @@ def find_and_prune_neighborhoods(
128
143
  verbose=verbose,
129
144
  )
130
145
 
131
- pruned_neighborhoods = prune_neighborhoods(neighborhoods, top_n=top_n)
146
+ pruned_neighborhoods = prune_neighborhoods(neighborhood_dicts, top_n=top_n)
132
147
 
133
148
  return pruned_neighborhoods
134
149
 
@@ -176,7 +191,7 @@ def load_neighborhoods(
176
191
  -------
177
192
  all_neighborhoods_df: pd.DataFrame
178
193
  A table containing all species in each query s_ids neighborhood
179
- neighborhoods_dict: dict
194
+ neighborhood_dicts: dict
180
195
  Outputs from find_and_prune_neighborhoods for each s_id
181
196
 
182
197
  """
@@ -190,16 +205,16 @@ def load_neighborhoods(
190
205
  neighborhood_paths = [vertices_path, networks_path]
191
206
 
192
207
  if all([os.path.isfile(x) for x in neighborhood_paths]) and overwrite is False:
193
- print(f"loading existing neighborhoods for {neighborhood_prefix}")
208
+ logger.info(f"loading existing neighborhoods for {neighborhood_prefix}")
194
209
 
195
210
  all_neighborhoods_df = pd.read_csv(vertices_path, sep="\t")
196
211
  with open(networks_path, "rb") as in_file:
197
- neighborhoods_dict = pickle.load(in_file)
212
+ neighborhood_dicts = pickle.load(in_file)
198
213
 
199
214
  else:
200
- print(f"creating neighborhoods based on {neighborhood_prefix}")
215
+ logger.info(f"creating neighborhoods based on {neighborhood_prefix}")
201
216
 
202
- all_neighborhoods_df, neighborhoods_dict = create_neighborhoods(
217
+ all_neighborhoods_df, neighborhood_dicts = create_neighborhoods(
203
218
  s_ids=s_ids,
204
219
  sbml_dfs=sbml_dfs,
205
220
  napistu_graph=napistu_graph,
@@ -214,9 +229,9 @@ def load_neighborhoods(
214
229
 
215
230
  # pickle neighborhoods
216
231
  with open(networks_path, "wb") as fh:
217
- pickle.dump(neighborhoods_dict, fh)
232
+ pickle.dump(neighborhood_dicts, fh)
218
233
 
219
- return all_neighborhoods_df, neighborhoods_dict
234
+ return all_neighborhoods_df, neighborhood_dicts
220
235
 
221
236
 
222
237
  def create_neighborhoods(
@@ -254,7 +269,7 @@ def create_neighborhoods(
254
269
  -------
255
270
  all_neighborhoods_df: pd.DataFrame
256
271
  A table containing all species in each query s_ids neighborhood
257
- neighborhoods_dict: dict
272
+ neighborhood_dicts: dict
258
273
  Outputs from find_and_prune_neighborhoods for each s_id
259
274
  """
260
275
 
@@ -275,13 +290,13 @@ def create_neighborhoods(
275
290
  raise TypeError(f"top_n was a {type(top_n)} and must be an int")
276
291
 
277
292
  neighborhoods_list = list()
278
- neighborhoods_dict = dict()
293
+ neighborhood_dicts = dict()
279
294
  for s_id in s_ids:
280
295
  query_sc_species = ng_utils.compartmentalize_species(sbml_dfs, s_id)
281
296
 
282
297
  compartmentalized_species = query_sc_species[SBML_DFS.SC_ID].tolist()
283
298
 
284
- neighborhoods = find_and_prune_neighborhoods(
299
+ neighborhood_dicts = find_and_prune_neighborhoods(
285
300
  sbml_dfs,
286
301
  napistu_graph,
287
302
  compartmentalized_species=compartmentalized_species,
@@ -295,25 +310,25 @@ def create_neighborhoods(
295
310
 
296
311
  neighborhood_entities = pd.concat(
297
312
  [
298
- neighborhoods[sc_id][NEIGHBORHOOD_DICT_KEYS.VERTICES].assign(
313
+ neighborhood_dicts[sc_id][NEIGHBORHOOD_DICT_KEYS.VERTICES].assign(
299
314
  focal_sc_id=sc_id
300
315
  )
301
- for sc_id in neighborhoods.keys()
316
+ for sc_id in neighborhood_dicts.keys()
302
317
  ]
303
318
  ).assign(focal_s_id=s_id)
304
319
 
305
320
  neighborhood_species = neighborhood_entities.merge(
306
321
  sbml_dfs.compartmentalized_species[SBML_DFS.S_ID],
307
- left_on="name",
322
+ left_on=NAPISTU_GRAPH_VERTICES.NAME,
308
323
  right_index=True,
309
324
  )
310
325
 
311
326
  neighborhoods_list.append(neighborhood_species)
312
- neighborhoods_dict[s_id] = neighborhoods
327
+ neighborhood_dicts[s_id] = neighborhood_dicts
313
328
 
314
329
  all_neighborhoods_df = pd.concat(neighborhoods_list).reset_index(drop=True)
315
330
 
316
- return all_neighborhoods_df, neighborhoods_dict
331
+ return all_neighborhoods_df, neighborhood_dicts
317
332
 
318
333
 
319
334
  def create_neighborhood_prefix(network_type: str, order: int, top_n: int) -> str:
@@ -383,7 +398,7 @@ def load_neighborhoods_by_partition(
383
398
  if parition_sids_df.shape[0] == 0:
384
399
  raise ValueError(f"No s_ids associated with partition {selected_partition}")
385
400
 
386
- parition_sids = parition_sids_df["s_id"].tolist()
401
+ parition_sids = parition_sids_df[SBML_DFS.S_ID].tolist()
387
402
 
388
403
  # read pathway and network data
389
404
 
@@ -443,7 +458,7 @@ def read_paritioned_neighborhoods(
443
458
  -------
444
459
  all_neighborhoods_df: pd.DataFrame
445
460
  A table containing all species in each query s_ids neighborhood
446
- neighborhoods_dict: dict
461
+ neighborhood_dicts: dict
447
462
  Outputs from find_and_prune_neighborhoods for each s_id
448
463
 
449
464
  """
@@ -508,7 +523,7 @@ def read_paritioned_neighborhoods(
508
523
 
509
524
  # combine all partitions' dfs and dicts
510
525
  all_neighborhoods_df = pd.concat(neighborhood_paths_list).reset_index(drop=True)
511
- neighborhoods_dict = dict(ChainMap(*path_dict_list))
526
+ neighborhood_dicts = dict(ChainMap(*path_dict_list))
512
527
 
513
528
  # TO DO - remove s_id duplication (these are present in the vertices table in the partition outputs)
514
529
  if not all(all_neighborhoods_df["s_id_x"] == all_neighborhoods_df["s_id_y"]):
@@ -517,7 +532,7 @@ def read_paritioned_neighborhoods(
517
532
  {"s_id_x": "s_id"}, axis=1
518
533
  )
519
534
 
520
- return all_neighborhoods_df, neighborhoods_dict
535
+ return all_neighborhoods_df, neighborhood_dicts
521
536
 
522
537
 
523
538
  def find_neighborhoods(
@@ -558,7 +573,7 @@ def find_neighborhoods(
558
573
  the minimum size of a pathway to be considered
559
574
  source_total_counts: pd.Series | None
560
575
  Optional, A series of the total counts of each source. As produced by
561
- source.get_source_total_counts()\
576
+ source.get_source_total_counts()
562
577
  verbose: bool
563
578
  Extra reporting
564
579
 
@@ -579,6 +594,16 @@ def find_neighborhoods(
579
594
  if not isinstance(order, int):
580
595
  raise TypeError(f"order was a {type(order)} and must be an int")
581
596
 
597
+ invalid_cspecies = [
598
+ x
599
+ for x in compartmentalized_species
600
+ if x not in sbml_dfs.compartmentalized_species.index
601
+ ]
602
+ if len(invalid_cspecies) > 0:
603
+ raise ValueError(
604
+ f"compartmentalized_species contains invalid species: {invalid_cspecies}"
605
+ )
606
+
582
607
  # create a table which includes cspecies and reaction nearby each of the
583
608
  # focal compartmentalized_speecies
584
609
  neighborhood_df = _build_raw_neighborhood_df(
@@ -656,12 +681,12 @@ def create_neighborhood_dict_entry(
656
681
  focal node.
657
682
  """
658
683
 
659
- one_neighborhood_df = neighborhood_df[neighborhood_df["sc_id"] == sc_id]
684
+ one_neighborhood_df = neighborhood_df[neighborhood_df[SBML_DFS.SC_ID] == sc_id]
660
685
 
661
686
  if verbose:
662
687
  _create_neighborhood_dict_entry_logging(sc_id, one_neighborhood_df, sbml_dfs)
663
688
 
664
- if not one_neighborhood_df["name"].eq(sc_id).any():
689
+ if not one_neighborhood_df[NAPISTU_GRAPH_VERTICES.NAME].eq(sc_id).any():
665
690
  raise ValueError(
666
691
  f"The focal node sc_id = {sc_id} was not in 'one_neighborhood_df'.\
667
692
  By convention it should be part of its neighborhood"
@@ -677,19 +702,22 @@ def create_neighborhood_dict_entry(
677
702
 
678
703
  # add edge polarity: whether edges are activating, inhibiting or unknown
679
704
  if edges.shape[0] > 0:
680
- edges["link_polarity"] = (
681
- edges["sbo_term"].map(MINI_SBO_TO_NAME).map(MINI_SBO_NAME_TO_POLARITY)
705
+ edges[NET_POLARITY.LINK_POLARITY] = (
706
+ edges[SBML_DFS.SBO_TERM]
707
+ .map(MINI_SBO_TO_NAME)
708
+ .map(MINI_SBO_NAME_TO_POLARITY)
682
709
  )
683
710
 
684
711
  try:
685
712
  reaction_sources = ng_utils.get_minimal_sources_edges(
686
- vertices.rename(columns={"name": "node"}),
713
+ vertices.rename(columns={NAPISTU_GRAPH_VERTICES.NAME: "node"}),
687
714
  sbml_dfs,
688
715
  min_pw_size=min_pw_size,
689
716
  # optional, counts of sources across the whole model
690
717
  source_total_counts=source_total_counts,
691
718
  )
692
719
  except Exception:
720
+ logger.warning(f"Could not get reaction sources for {sc_id}; returning None")
693
721
  reaction_sources = None
694
722
 
695
723
  # to add weights to the network solve the shortest path problem
@@ -697,75 +725,38 @@ def create_neighborhood_dict_entry(
697
725
  # solve this problem separately whether a given neighbor is an
698
726
  # ancestor or descendant
699
727
 
700
- # focal node -> descendants
701
-
702
- one_descendants_df = one_neighborhood_df[
703
- one_neighborhood_df["relationship"] == "descendants"
704
- ]
705
- descendants_list = list(set(one_descendants_df["name"].tolist()).union({sc_id}))
706
-
707
- # hide warnings which are mostly just Dijkstra complaining about not finding neighbors
708
- with warnings.catch_warnings():
709
- # igraph throws warnings for each pair of unconnected species
710
- warnings.simplefilter("ignore")
711
-
712
- neighborhood_paths = neighborhood_graph.get_shortest_paths(
713
- # focal node
714
- v=sc_id,
715
- to=descendants_list,
716
- weights="weights",
717
- mode="out",
718
- output="epath",
719
- )
720
-
721
- downstream_path_attrs, downstream_entity_dict = _calculate_path_attrs(
722
- neighborhood_paths, edges, vertices=descendants_list, weight_var="weights"
723
- )
724
- downstream_path_attrs = downstream_path_attrs.assign(node_orientation="downstream")
725
-
726
- # ancestors -> focal_node
727
-
728
- one_ancestors_df = one_neighborhood_df[
729
- one_neighborhood_df["relationship"] == "ancestors"
730
- ]
731
- ancestors_list = list(set(one_ancestors_df["name"].tolist()).union({sc_id}))
732
-
733
- with warnings.catch_warnings():
734
- # igraph throws warnings for each pair of unconnected species
735
- warnings.simplefilter("ignore")
736
-
737
- neighborhood_paths = neighborhood_graph.get_shortest_paths(
738
- v=sc_id,
739
- to=ancestors_list,
740
- weights="upstream_weights",
741
- mode="in",
742
- output="epath",
743
- )
744
-
745
- upstream_path_attrs, upstream_entity_dict = _calculate_path_attrs(
746
- neighborhood_paths,
728
+ (
729
+ downstream_path_attrs,
730
+ downstream_entity_dict,
731
+ upstream_path_attrs,
732
+ upstream_entity_dict,
733
+ ) = _find_neighbors_paths(
734
+ neighborhood_graph,
735
+ one_neighborhood_df,
736
+ sc_id,
747
737
  edges,
748
- vertices=ancestors_list,
749
- weight_var="upstream_weights",
750
738
  )
751
- upstream_path_attrs = upstream_path_attrs.assign(node_orientation="upstream")
752
739
 
753
740
  # combine upstream and downstream shortest paths
754
741
  # in cases a node is upstream and downstream of the focal node
755
742
  # by taking the lowest path weight
756
743
  vertex_neighborhood_attrs = (
757
744
  pd.concat([downstream_path_attrs, upstream_path_attrs])
758
- .sort_values("path_weight")
745
+ .sort_values(DISTANCES.PATH_WEIGHTS)
759
746
  .groupby("neighbor")
760
747
  .first()
761
748
  )
762
749
  # label the focal node
763
- vertex_neighborhood_attrs.loc[sc_id, "node_orientation"] = "focal"
750
+ vertex_neighborhood_attrs.loc[sc_id, "node_orientation"] = GRAPH_RELATIONSHIPS.FOCAL
764
751
 
765
752
  # if the precomputed distances, graph and/or sbml_dfs are inconsistent
766
753
  # then the shortest paths search may just return empty lists
767
754
  # throw a clearer error message in this case.
768
- EXPECTED_VERTEX_ATTRS = {"final_from", "final_to", "net_polarity"}
755
+ EXPECTED_VERTEX_ATTRS = {
756
+ DISTANCES.FINAL_FROM,
757
+ DISTANCES.FINAL_TO,
758
+ NET_POLARITY.NET_POLARITY,
759
+ }
769
760
  missing_vertex_attrs = EXPECTED_VERTEX_ATTRS.difference(
770
761
  set(vertex_neighborhood_attrs.columns.tolist())
771
762
  )
@@ -780,22 +771,22 @@ def create_neighborhood_dict_entry(
780
771
  # add net_polarity to edges in addition to nodes
781
772
  edges = edges.merge(
782
773
  vertex_neighborhood_attrs.reset_index()[
783
- ["final_from", "final_to", "net_polarity"]
774
+ [DISTANCES.FINAL_FROM, DISTANCES.FINAL_TO, NET_POLARITY.NET_POLARITY]
784
775
  ].dropna(),
785
- left_on=["from", "to"],
786
- right_on=["final_from", "final_to"],
776
+ left_on=[NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO],
777
+ right_on=[DISTANCES.FINAL_FROM, DISTANCES.FINAL_TO],
787
778
  how="left",
788
779
  )
789
780
 
790
781
  vertices = vertices.merge(
791
- vertex_neighborhood_attrs, left_on="name", right_index=True
782
+ vertex_neighborhood_attrs, left_on=NAPISTU_GRAPH_VERTICES.NAME, right_index=True
792
783
  )
793
784
 
794
785
  # drop nodes with a path length / weight of zero
795
786
  # which are NOT the focal node
796
787
  # these were cases where no path to/from the focal node to the query node was found
797
788
  disconnected_neighbors = vertices.query(
798
- "(not node_orientation == 'focal') and path_weight == 0"
789
+ f"(not node_orientation == '{GRAPH_RELATIONSHIPS.FOCAL}') and {DISTANCES.PATH_WEIGHTS} == 0"
799
790
  )
800
791
  vertices = vertices[~vertices.index.isin(disconnected_neighbors.index.tolist())]
801
792
 
@@ -803,8 +794,8 @@ def create_neighborhood_dict_entry(
803
794
  vertices = add_vertices_uri_urls(vertices, sbml_dfs)
804
795
 
805
796
  neighborhood_path_entities = {
806
- "downstream": downstream_entity_dict,
807
- "upstream": upstream_entity_dict,
797
+ NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM: downstream_entity_dict,
798
+ NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM: upstream_entity_dict,
808
799
  }
809
800
 
810
801
  # update graph with additional vertex and edge attributes
@@ -812,16 +803,16 @@ def create_neighborhood_dict_entry(
812
803
  vertices=vertices.to_dict("records"),
813
804
  edges=edges.to_dict("records"),
814
805
  directed=napistu_graph.is_directed(),
815
- vertex_name_attr="name",
816
- edge_foreign_keys=("from", "to"),
806
+ vertex_name_attr=NAPISTU_GRAPH_VERTICES.NAME,
807
+ edge_foreign_keys=(NAPISTU_GRAPH_EDGES.FROM, NAPISTU_GRAPH_EDGES.TO),
817
808
  )
818
809
 
819
810
  outdict = {
820
- "graph": updated_napistu_graph,
821
- "vertices": vertices,
822
- "edges": edges,
823
- "reaction_sources": reaction_sources,
824
- "neighborhood_path_entities": neighborhood_path_entities,
811
+ NEIGHBORHOOD_DICT_KEYS.GRAPH: updated_napistu_graph,
812
+ NEIGHBORHOOD_DICT_KEYS.VERTICES: vertices,
813
+ NEIGHBORHOOD_DICT_KEYS.EDGES: edges,
814
+ NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES: reaction_sources,
815
+ NEIGHBORHOOD_DICT_KEYS.NEIGHBORHOOD_PATH_ENTITIES: neighborhood_path_entities,
825
816
  }
826
817
 
827
818
  return outdict
@@ -831,9 +822,11 @@ def _create_neighborhood_dict_entry_logging(
831
822
  sc_id: str, one_neighborhood_df: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
832
823
  ):
833
824
  df_summary = one_neighborhood_df.copy()
834
- df_summary["node_type"] = [
835
- "species" if x else "reactions"
836
- for x in df_summary["name"].isin(sbml_dfs.compartmentalized_species.index)
825
+ df_summary[NAPISTU_GRAPH_VERTICES.NODE_TYPE] = [
826
+ NAPISTU_GRAPH_NODE_TYPES.SPECIES if x else NAPISTU_GRAPH_NODE_TYPES.REACTION
827
+ for x in df_summary[NAPISTU_GRAPH_VERTICES.NAME].isin(
828
+ sbml_dfs.compartmentalized_species.index
829
+ )
837
830
  ]
838
831
  relationship_counts = df_summary.value_counts(
839
832
  ["relationship", "node_type"]
@@ -857,22 +850,45 @@ def add_vertices_uri_urls(
857
850
  vertices: pd.DataFrame, sbml_dfs: sbml_dfs_core.SBML_dfs
858
851
  ) -> pd.DataFrame:
859
852
  """
860
- Add Vertices URI URLs
853
+ Add URI URLs to neighborhood vertices DataFrame.
861
854
 
862
- Add a url variable to the neighborhood vertices pd.DataFrame
855
+ This function enriches a vertices DataFrame with URI URLs for both species and
856
+ reactions. For species, it adds standard reference identifiers and Pharos IDs
857
+ where available. For reactions, it adds reaction-specific URI URLs.
863
858
 
864
859
  Parameters
865
860
  ----------
866
861
  vertices: pd.DataFrame
867
- table of neighborhood vertices
862
+ DataFrame containing neighborhood vertices with the following required columns:
863
+ - NAPISTU_GRAPH_VERTICES.NAME: The name/identifier of each vertex
864
+ - NAPISTU_GRAPH_VERTICES.NODE_TYPE: The type of node, either
865
+ NAPISTU_GRAPH_NODE_TYPES.SPECIES or NAPISTU_GRAPH_NODE_TYPES.REACTION
868
866
  sbml_dfs: sbml_dfs_core.SBML_dfs
869
- consensus network model
867
+ Pathway model including species, compartmentalized species, reactions and ontologies
870
868
 
871
869
  Returns
872
870
  -------
873
- vertices: pd.DataFrame
874
- input table with a url field
871
+ pd.DataFrame
872
+ Input vertices DataFrame enriched with URI URL columns:
873
+ - For species: standard reference identifier URLs and Pharos IDs
874
+ - For reactions: reaction-specific URI URLs
875
+ - Empty strings for missing URLs
875
876
 
877
+ Raises
878
+ ------
879
+ ValueError
880
+ If vertices DataFrame is empty (no rows)
881
+ TypeError
882
+ If the output is not a pandas DataFrame
883
+ ValueError
884
+ If the output row count doesn't match the input row count
885
+
886
+ Notes
887
+ -----
888
+ - Species vertices are merged with compartmentalized_species to get s_id mappings
889
+ - Reaction vertices are processed directly using their names
890
+ - Missing URLs are filled with empty strings
891
+ - The function preserves the original row order and count
876
892
  """
877
893
 
878
894
  if vertices.shape[0] <= 0:
@@ -882,41 +898,53 @@ def add_vertices_uri_urls(
882
898
 
883
899
  # add s_ids
884
900
  neighborhood_species = vertices[
885
- vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == "species"
901
+ vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.SPECIES
886
902
  ].merge(
887
903
  sbml_dfs.compartmentalized_species[SBML_DFS.S_ID],
888
- left_on="name",
904
+ left_on=NAPISTU_GRAPH_VERTICES.NAME,
889
905
  right_index=True,
890
906
  how="left",
891
907
  )
892
908
 
893
909
  # add a standard reference identifier
894
910
  neighborhood_species_aug = neighborhood_species.merge(
895
- sbml_dfs.get_uri_urls("species", neighborhood_species[SBML_DFS.S_ID]),
911
+ sbml_dfs.get_uri_urls(
912
+ NAPISTU_GRAPH_NODE_TYPES.SPECIES, neighborhood_species[SBML_DFS.S_ID]
913
+ ),
896
914
  left_on=SBML_DFS.S_ID,
897
915
  right_index=True,
898
916
  how="left",
899
917
  # add pharos ids where available
900
918
  ).merge(
901
919
  sbml_dfs.get_uri_urls(
902
- "species", neighborhood_species[SBML_DFS.S_ID], required_ontology="pharos"
903
- ).rename("pharos"),
920
+ NAPISTU_GRAPH_NODE_TYPES.SPECIES,
921
+ neighborhood_species[SBML_DFS.S_ID],
922
+ required_ontology=ONTOLOGIES.PHAROS,
923
+ ).rename(ONTOLOGIES.PHAROS),
904
924
  left_on=SBML_DFS.S_ID,
905
925
  right_index=True,
906
926
  how="left",
907
927
  )
908
928
 
909
- if sum(vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == "reaction") > 0:
929
+ if (
930
+ sum(
931
+ vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
932
+ == NAPISTU_GRAPH_NODE_TYPES.REACTION
933
+ )
934
+ > 0
935
+ ):
910
936
  neighborhood_reactions = vertices[
911
- vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == "reaction"
937
+ vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
938
+ == NAPISTU_GRAPH_NODE_TYPES.REACTION
912
939
  ].merge(
913
940
  sbml_dfs.get_uri_urls(
914
- "reactions",
915
- vertices[vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == "reaction"][
916
- "name"
917
- ],
941
+ SBML_DFS.REACTIONS,
942
+ vertices[
943
+ vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
944
+ == NAPISTU_GRAPH_NODE_TYPES.REACTION
945
+ ][NAPISTU_GRAPH_VERTICES.NAME],
918
946
  ),
919
- left_on="name",
947
+ left_on=NAPISTU_GRAPH_VERTICES.NAME,
920
948
  right_index=True,
921
949
  how="left",
922
950
  )
@@ -965,7 +993,7 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
965
993
  if not isinstance(top_n, int):
966
994
  raise TypeError(f"top_n was a {type(top_n)} and must be an int")
967
995
 
968
- pruned_neighborhoods_dict = dict()
996
+ pruned_neighborhood_dicts = dict()
969
997
 
970
998
  for an_sc_id in neighborhoods.keys():
971
999
  one_neighborhood = neighborhoods[an_sc_id]
@@ -977,14 +1005,14 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
977
1005
  # reduce neighborhood to this set of high-weight vertices
978
1006
  all_neighbors = pd.DataFrame(
979
1007
  {
980
- NAPISTU_GRAPH_VERTICES.NODE_NAME: one_neighborhood[
1008
+ NAPISTU_GRAPH_VERTICES.NAME: one_neighborhood[
981
1009
  NEIGHBORHOOD_DICT_KEYS.GRAPH
982
- ].vs[NAPISTU_GRAPH_VERTICES.NODE_NAME]
1010
+ ].vs[NAPISTU_GRAPH_VERTICES.NAME]
983
1011
  }
984
1012
  )
985
1013
  pruned_vertices_indices = all_neighbors[
986
- all_neighbors[NAPISTU_GRAPH_VERTICES.NODE_NAME].isin(
987
- pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_NAME]
1014
+ all_neighbors[NAPISTU_GRAPH_VERTICES.NAME].isin(
1015
+ pruned_vertices[NAPISTU_GRAPH_VERTICES.NAME]
988
1016
  )
989
1017
  ].index.tolist()
990
1018
 
@@ -996,8 +1024,9 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
996
1024
  pruned_edges = pd.DataFrame([e.attributes() for e in pruned_neighborhood.es])
997
1025
 
998
1026
  pruned_reactions = pruned_vertices[
999
- pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == "reaction"
1000
- ][NAPISTU_GRAPH_VERTICES.NODE_NAME]
1027
+ pruned_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
1028
+ == NAPISTU_GRAPH_NODE_TYPES.REACTION
1029
+ ][NAPISTU_GRAPH_VERTICES.NAME]
1001
1030
 
1002
1031
  if pruned_reactions.shape[0] != 0:
1003
1032
  if one_neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES] is None:
@@ -1018,14 +1047,14 @@ def prune_neighborhoods(neighborhoods: dict, top_n: int = 100) -> dict:
1018
1047
  NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES
1019
1048
  ]
1020
1049
 
1021
- pruned_neighborhoods_dict[an_sc_id] = {
1050
+ pruned_neighborhood_dicts[an_sc_id] = {
1022
1051
  NEIGHBORHOOD_DICT_KEYS.GRAPH: pruned_neighborhood,
1023
1052
  NEIGHBORHOOD_DICT_KEYS.VERTICES: pruned_vertices,
1024
1053
  NEIGHBORHOOD_DICT_KEYS.EDGES: pruned_edges,
1025
1054
  NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES: pruned_reaction_sources,
1026
1055
  }
1027
1056
 
1028
- return pruned_neighborhoods_dict
1057
+ return pruned_neighborhood_dicts
1029
1058
 
1030
1059
 
1031
1060
  def plot_neighborhood(
@@ -1065,8 +1094,8 @@ def plot_neighborhood(
1065
1094
  "focal disease": "lime",
1066
1095
  "disease": "aquamarine",
1067
1096
  "focal": "lightcoral",
1068
- "species": "firebrick",
1069
- "reaction": "dodgerblue",
1097
+ NAPISTU_GRAPH_NODE_TYPES.SPECIES: "firebrick",
1098
+ NAPISTU_GRAPH_NODE_TYPES.REACTION: "dodgerblue",
1070
1099
  }
1071
1100
 
1072
1101
  edge_polarity_colors = {
@@ -1173,7 +1202,7 @@ def _precompute_neighbors(
1173
1202
 
1174
1203
  # filter by distance
1175
1204
  close_cspecies_subset_precomputed_distances = cspecies_subset_precomputed_distances[
1176
- cspecies_subset_precomputed_distances["path_length"] <= order
1205
+ cspecies_subset_precomputed_distances[DISTANCES.PATH_LENGTH] <= order
1177
1206
  ]
1178
1207
 
1179
1208
  # filter to retain top_n
@@ -1183,12 +1212,12 @@ def _precompute_neighbors(
1183
1212
  ]:
1184
1213
  top_descendants = (
1185
1214
  close_cspecies_subset_precomputed_distances[
1186
- close_cspecies_subset_precomputed_distances["sc_id_origin"].isin(
1187
- compartmentalized_species
1188
- )
1215
+ close_cspecies_subset_precomputed_distances[
1216
+ DISTANCES.SC_ID_ORIGIN
1217
+ ].isin(compartmentalized_species)
1189
1218
  ]
1190
1219
  # sort by path_weight so we can retain the lowest weight neighbors
1191
- .sort_values("path_weights")
1220
+ .sort_values(DISTANCES.PATH_WEIGHTS)
1192
1221
  .groupby(NAPISTU_EDGELIST.SC_ID_ORIGIN)
1193
1222
  .head(top_n)
1194
1223
  )
@@ -1216,7 +1245,7 @@ def _precompute_neighbors(
1216
1245
  # the logic is flipped if we are looking for ancestors where
1217
1246
  # we penalize based on the number of parents of a node when
1218
1247
  # we use it (i.e., the default upstream_weights).
1219
- .sort_values("path_upstream_weights")
1248
+ .sort_values(DISTANCES.PATH_UPSTREAM_WEIGHTS)
1220
1249
  .groupby(NAPISTU_EDGELIST.SC_ID_DEST)
1221
1250
  .head(top_n)
1222
1251
  )
@@ -1233,7 +1262,7 @@ def _precompute_neighbors(
1233
1262
  precomputed_neighbors=top_descendants,
1234
1263
  compartmentalized_species=compartmentalized_species,
1235
1264
  sbml_dfs=sbml_dfs,
1236
- relationship="descendants",
1265
+ relationship=GRAPH_RELATIONSHIPS.DESCENDANTS,
1237
1266
  )
1238
1267
 
1239
1268
  if downstream_reactions is not None:
@@ -1247,7 +1276,7 @@ def _precompute_neighbors(
1247
1276
  precomputed_neighbors=top_ancestors,
1248
1277
  compartmentalized_species=compartmentalized_species,
1249
1278
  sbml_dfs=sbml_dfs,
1250
- relationship="ancestors",
1279
+ relationship=GRAPH_RELATIONSHIPS.ANCESTORS,
1251
1280
  )
1252
1281
 
1253
1282
  if upstream_reactions is not None:
@@ -1313,7 +1342,7 @@ def _build_raw_neighborhood_df(
1313
1342
  descendants_df = _find_neighbors(
1314
1343
  napistu_graph=napistu_graph,
1315
1344
  compartmentalized_species=compartmentalized_species,
1316
- relationship="descendants",
1345
+ relationship=GRAPH_RELATIONSHIPS.DESCENDANTS,
1317
1346
  order=order,
1318
1347
  precomputed_neighbors=precomputed_neighbors,
1319
1348
  )
@@ -1326,7 +1355,7 @@ def _build_raw_neighborhood_df(
1326
1355
  ancestors_df = _find_neighbors(
1327
1356
  napistu_graph=napistu_graph,
1328
1357
  compartmentalized_species=compartmentalized_species,
1329
- relationship="ancestors",
1358
+ relationship=GRAPH_RELATIONSHIPS.ANCESTORS,
1330
1359
  order=order,
1331
1360
  precomputed_neighbors=precomputed_neighbors,
1332
1361
  )
@@ -1342,8 +1371,9 @@ def _build_raw_neighborhood_df(
1342
1371
  raise NotImplementedError("invalid network_type")
1343
1372
 
1344
1373
  # add name since this is an easy way to lookup igraph vertices
1345
- neighborhood_df["name"] = [
1346
- x["name"] for x in napistu_graph.vs[neighborhood_df["neighbor"]]
1374
+ neighborhood_df[NAPISTU_GRAPH_VERTICES.NAME] = [
1375
+ x[NAPISTU_GRAPH_VERTICES.NAME]
1376
+ for x in napistu_graph.vs[neighborhood_df["neighbor"]]
1347
1377
  ]
1348
1378
 
1349
1379
  return neighborhood_df
@@ -1369,15 +1399,21 @@ def _find_neighbors(
1369
1399
  if isinstance(precomputed_neighbors, pd.DataFrame):
1370
1400
  # add graph indices to neighbors
1371
1401
  nodes_to_names = (
1372
- pd.DataFrame({"name": napistu_graph.vs["name"]})
1402
+ pd.DataFrame(
1403
+ {
1404
+ NAPISTU_GRAPH_VERTICES.NAME: napistu_graph.vs[
1405
+ NAPISTU_GRAPH_VERTICES.NAME
1406
+ ]
1407
+ }
1408
+ )
1373
1409
  .reset_index()
1374
1410
  .rename({"index": "neighbor"}, axis=1)
1375
1411
  )
1376
1412
 
1377
- if relationship == "descendants":
1413
+ if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
1378
1414
  bait_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
1379
1415
  target_id = NAPISTU_EDGELIST.SC_ID_DEST
1380
- elif relationship == "ancestors":
1416
+ elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
1381
1417
  bait_id = NAPISTU_EDGELIST.SC_ID_DEST
1382
1418
  target_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
1383
1419
  else:
@@ -1389,15 +1425,17 @@ def _find_neighbors(
1389
1425
  precomputed_neighbors[
1390
1426
  precomputed_neighbors[bait_id].isin(compartmentalized_species)
1391
1427
  ]
1392
- .merge(nodes_to_names.rename({"name": target_id}, axis=1))
1428
+ .merge(
1429
+ nodes_to_names.rename({NAPISTU_GRAPH_VERTICES.NAME: target_id}, axis=1)
1430
+ )
1393
1431
  .rename({bait_id: SBML_DFS.SC_ID}, axis=1)
1394
1432
  .drop([target_id], axis=1)
1395
1433
  .assign(relationship=relationship)
1396
1434
  )
1397
1435
  else:
1398
- if relationship == "descendants":
1436
+ if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
1399
1437
  mode_type = "out"
1400
- elif relationship == "ancestors":
1438
+ elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
1401
1439
  mode_type = "in"
1402
1440
  else:
1403
1441
  raise ValueError(
@@ -1443,10 +1481,10 @@ def _find_reactions_by_relationship(
1443
1481
  if precomputed_neighbors.shape[0] == 0:
1444
1482
  return None
1445
1483
 
1446
- if relationship == "descendants":
1484
+ if relationship == GRAPH_RELATIONSHIPS.DESCENDANTS:
1447
1485
  bait_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
1448
1486
  target_id = NAPISTU_EDGELIST.SC_ID_DEST
1449
- elif relationship == "ancestors":
1487
+ elif relationship == GRAPH_RELATIONSHIPS.ANCESTORS:
1450
1488
  bait_id = NAPISTU_EDGELIST.SC_ID_DEST
1451
1489
  target_id = NAPISTU_EDGELIST.SC_ID_ORIGIN
1452
1490
  else:
@@ -1525,10 +1563,11 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
1525
1563
 
1526
1564
  """
1527
1565
 
1528
- neighborhood_vertices = one_neighborhood["vertices"]
1566
+ neighborhood_vertices = one_neighborhood[NEIGHBORHOOD_DICT_KEYS.VERTICES]
1529
1567
 
1530
1568
  indexed_neighborhood_species = neighborhood_vertices[
1531
- neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == "species"
1569
+ neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NODE_TYPE]
1570
+ == NAPISTU_GRAPH_NODE_TYPES.SPECIES
1532
1571
  ].set_index("node_orientation")
1533
1572
 
1534
1573
  pruned_oriented_neighbors = list()
@@ -1538,14 +1577,14 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
1538
1577
  # handle cases where only one entry exists to DF->series coercion occurs
1539
1578
  vertex_subset = vertex_subset.to_frame().T
1540
1579
 
1541
- sorted_vertex_set = vertex_subset.sort_values("path_weight")
1542
- weight_cutoff = sorted_vertex_set["path_weight"].iloc[
1580
+ sorted_vertex_set = vertex_subset.sort_values(DISTANCES.PATH_WEIGHTS)
1581
+ weight_cutoff = sorted_vertex_set[DISTANCES.PATH_WEIGHTS].iloc[
1543
1582
  min(top_n - 1, sorted_vertex_set.shape[0] - 1)
1544
1583
  ]
1545
1584
 
1546
1585
  top_neighbors = sorted_vertex_set[
1547
- sorted_vertex_set["path_weight"] <= weight_cutoff
1548
- ]["name"].tolist()
1586
+ sorted_vertex_set[DISTANCES.PATH_WEIGHTS] <= weight_cutoff
1587
+ ][NAPISTU_GRAPH_VERTICES.NAME].tolist()
1549
1588
 
1550
1589
  # include reactions and other species necessary to reach the top neighbors
1551
1590
  # by pulling in the past solutions to weighted shortest paths problems
@@ -1564,7 +1603,7 @@ def _prune_vertex_set(one_neighborhood: dict, top_n: int) -> pd.DataFrame:
1564
1603
  # combine all neighbors
1565
1604
  pruned_neighbors = set().union(*pruned_oriented_neighbors)
1566
1605
  pruned_vertices = neighborhood_vertices[
1567
- neighborhood_vertices["name"].isin(pruned_neighbors)
1606
+ neighborhood_vertices[NAPISTU_GRAPH_VERTICES.NAME].isin(pruned_neighbors)
1568
1607
  ].reset_index(drop=True)
1569
1608
 
1570
1609
  return pruned_vertices
@@ -1574,7 +1613,7 @@ def _calculate_path_attrs(
1574
1613
  neighborhood_paths: list[list],
1575
1614
  edges: pd.DataFrame,
1576
1615
  vertices: list,
1577
- weight_var: str = "weights",
1616
+ weight_var: str = NAPISTU_GRAPH_EDGES.WEIGHTS,
1578
1617
  ) -> tuple[pd.DataFrame, dict[Any, set]]:
1579
1618
  """
1580
1619
  Calculate Path Attributes
@@ -1624,15 +1663,15 @@ def _calculate_path_attrs(
1624
1663
  # if all_path_edges.ngroups > 0:
1625
1664
  path_attributes_df = pd.concat(
1626
1665
  [
1627
- all_path_edges[weight_var].agg("sum").rename("path_weight"),
1628
- all_path_edges.agg("size").rename("path_length"),
1629
- all_path_edges["link_polarity"]
1666
+ all_path_edges[weight_var].agg("sum").rename(DISTANCES.PATH_WEIGHTS),
1667
+ all_path_edges.agg("size").rename(DISTANCES.PATH_LENGTH),
1668
+ all_path_edges[NET_POLARITY.LINK_POLARITY]
1630
1669
  .agg(paths._terminal_net_polarity)
1631
- .rename("net_polarity"),
1670
+ .rename(NET_POLARITY.NET_POLARITY),
1632
1671
  # add the final edge since this can be used to add path attributes to edges
1633
1672
  # i.e., apply net_polarity to an edge
1634
- all_path_edges["from"].agg("last").rename("final_from"),
1635
- all_path_edges["to"].agg("last").rename("final_to"),
1673
+ all_path_edges["from"].agg("last").rename(DISTANCES.FINAL_FROM),
1674
+ all_path_edges["to"].agg("last").rename(DISTANCES.FINAL_TO),
1636
1675
  ],
1637
1676
  axis=1,
1638
1677
  ).reset_index()
@@ -1655,7 +1694,11 @@ def _calculate_path_attrs(
1655
1694
  if len(neighborhood_paths[i]) == 0
1656
1695
  ]
1657
1696
  edgeles_nodes_df = pd.DataFrame({"neighbor": edgeless_nodes}).assign(
1658
- path_length=0, path_weight=0, net_polarity=None
1697
+ **{
1698
+ DISTANCES.PATH_LENGTH: 0,
1699
+ DISTANCES.PATH_WEIGHTS: 0,
1700
+ NET_POLARITY.NET_POLARITY: None,
1701
+ }
1659
1702
  )
1660
1703
 
1661
1704
  # add edgeless entries as entries in the two outputs
@@ -1672,3 +1715,118 @@ def _calculate_path_attrs(
1672
1715
  )
1673
1716
 
1674
1717
  return path_attributes_df, neighborhood_path_entities
1718
+
1719
+
1720
+ def _find_neighbors_paths(
1721
+ neighborhood_graph: ig.Graph,
1722
+ one_neighborhood_df: pd.DataFrame,
1723
+ sc_id: str,
1724
+ edges: pd.DataFrame,
1725
+ ) -> tuple[pd.DataFrame, dict[Any, set], pd.DataFrame, dict[Any, set]]:
1726
+ """
1727
+ Find shortest paths between the focal node and its neighbors in both directions.
1728
+
1729
+ This function calculates shortest paths from the focal node to its descendants
1730
+ (downstream) and ancestors (upstream) using igraph's shortest path algorithms.
1731
+ It uses _calculate_path_attrs to compute path attributes including path weights,
1732
+ lengths, and polarity information.
1733
+
1734
+ Parameters
1735
+ ----------
1736
+ neighborhood_graph: ig.Graph
1737
+ The igraph Graph object representing the neighborhood network
1738
+ one_neighborhood_df: pd.DataFrame
1739
+ DataFrame containing neighborhood information with 'relationship' column
1740
+ indicating 'descendants' or 'ancestors' for each node
1741
+ sc_id: str
1742
+ The compartmentalized species ID of the focal node
1743
+ edges: pd.DataFrame
1744
+ DataFrame containing edge information with columns for 'from', 'to',
1745
+ weights, and link polarity
1746
+
1747
+ Returns
1748
+ -------
1749
+ downstream_path_attrs: pd.DataFrame
1750
+ DataFrame containing path attributes for downstream paths from focal node
1751
+ to descendants. Includes columns: neighbor, path_weight, path_length,
1752
+ net_polarity, final_from, final_to, node_orientation
1753
+ downstream_entity_dict: dict[Any, set]
1754
+ Dictionary mapping each descendant neighbor to the set of entities
1755
+ (nodes) connecting it to the focal node
1756
+ upstream_path_attrs: pd.DataFrame
1757
+ DataFrame containing path attributes for upstream paths from focal node
1758
+ to ancestors. Includes columns: neighbor, path_weight, path_length,
1759
+ net_polarity, final_from, final_to, node_orientation
1760
+ upstream_entity_dict: dict[Any, set]
1761
+ Dictionary mapping each ancestor neighbor to the set of entities
1762
+ (nodes) connecting it to the focal node
1763
+ """
1764
+
1765
+ one_descendants_df = one_neighborhood_df[
1766
+ one_neighborhood_df["relationship"] == GRAPH_RELATIONSHIPS.DESCENDANTS
1767
+ ]
1768
+ descendants_list = list(
1769
+ set(one_descendants_df[NAPISTU_GRAPH_VERTICES.NAME].tolist()).union({sc_id})
1770
+ )
1771
+
1772
+ # hide warnings which are mostly just Dijkstra complaining about not finding neighbors
1773
+ with warnings.catch_warnings():
1774
+ # igraph throws warnings for each pair of unconnected species
1775
+ warnings.simplefilter("ignore")
1776
+
1777
+ neighborhood_paths = neighborhood_graph.get_shortest_paths(
1778
+ # focal node
1779
+ v=sc_id,
1780
+ to=descendants_list,
1781
+ weights=NAPISTU_GRAPH_EDGES.WEIGHTS,
1782
+ mode="out",
1783
+ output="epath",
1784
+ )
1785
+
1786
+ downstream_path_attrs, downstream_entity_dict = _calculate_path_attrs(
1787
+ neighborhood_paths,
1788
+ edges,
1789
+ vertices=descendants_list,
1790
+ weight_var=NAPISTU_GRAPH_EDGES.WEIGHTS,
1791
+ )
1792
+ downstream_path_attrs = downstream_path_attrs.assign(
1793
+ node_orientation=NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM
1794
+ )
1795
+
1796
+ # ancestors -> focal_node
1797
+
1798
+ one_ancestors_df = one_neighborhood_df[
1799
+ one_neighborhood_df["relationship"] == GRAPH_RELATIONSHIPS.ANCESTORS
1800
+ ]
1801
+ ancestors_list = list(
1802
+ set(one_ancestors_df[NAPISTU_GRAPH_VERTICES.NAME].tolist()).union({sc_id})
1803
+ )
1804
+
1805
+ with warnings.catch_warnings():
1806
+ # igraph throws warnings for each pair of unconnected species
1807
+ warnings.simplefilter("ignore")
1808
+
1809
+ neighborhood_paths = neighborhood_graph.get_shortest_paths(
1810
+ v=sc_id,
1811
+ to=ancestors_list,
1812
+ weights=NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
1813
+ mode="in",
1814
+ output="epath",
1815
+ )
1816
+
1817
+ upstream_path_attrs, upstream_entity_dict = _calculate_path_attrs(
1818
+ neighborhood_paths,
1819
+ edges,
1820
+ vertices=ancestors_list,
1821
+ weight_var=NAPISTU_GRAPH_EDGES.UPSTREAM_WEIGHTS,
1822
+ )
1823
+ upstream_path_attrs = upstream_path_attrs.assign(
1824
+ node_orientation=NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM
1825
+ )
1826
+
1827
+ return (
1828
+ downstream_path_attrs,
1829
+ downstream_entity_dict,
1830
+ upstream_path_attrs,
1831
+ upstream_entity_dict,
1832
+ )
@@ -23,7 +23,7 @@ from napistu.constants import SBML_DFS
23
23
  from napistu.constants import SOURCE_SPEC
24
24
  from napistu.identifiers import _validate_assets_sbml_ids
25
25
  from napistu.network.constants import GRAPH_WIRING_APPROACHES
26
- from napistu.network.constants import NAPISTU_GRAPH_DIRECTEDNESS
26
+ from napistu.network.constants import GRAPH_DIRECTEDNESS
27
27
 
28
28
  logger = logging.getLogger(__name__)
29
29
 
@@ -406,9 +406,9 @@ def _create_network_save_string(
406
406
  model_prefix: str, outdir: str, directed: bool, wiring_approach: str
407
407
  ) -> str:
408
408
  if directed:
409
- directed_str = NAPISTU_GRAPH_DIRECTEDNESS.DIRECTED
409
+ directed_str = GRAPH_DIRECTEDNESS.DIRECTED
410
410
  else:
411
- directed_str = NAPISTU_GRAPH_DIRECTEDNESS.UNDIRECTED
411
+ directed_str = GRAPH_DIRECTEDNESS.UNDIRECTED
412
412
 
413
413
  export_pkl_path = os.path.join(
414
414
  outdir,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: napistu
3
- Version: 0.4.6
3
+ Version: 0.4.7
4
4
  Summary: Connecting high-dimensional data to curated pathways
5
5
  Home-page: https://github.com/napistu/napistu-py
6
6
  Author: Sean Hackett
@@ -59,15 +59,15 @@ napistu/modify/gaps.py,sha256=CV-bdSfanhrnCIFVWfNuQJbtjvj4hsEwheKYR-Z3tNA,26844
59
59
  napistu/modify/pathwayannot.py,sha256=xuBSMDFWbg_d6-Gzv0Td3Q5nnFTa-Qzic48g1b1AZtQ,48081
60
60
  napistu/modify/uncompartmentalize.py,sha256=y5LkXn5x6u80dB_McfAIh88BxZGIAVFLujkP7sPNRh0,9690
61
61
  napistu/network/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
62
- napistu/network/constants.py,sha256=tnLhxThPwuOmvp_eoF6kdcKcxtrYV4O5mn0J6MV8FSo,7203
62
+ napistu/network/constants.py,sha256=zQkBTeZ2_K_rId6IUvKKTv9chx_i6K5B8vPZ19-LkNQ,7782
63
63
  napistu/network/data_handling.py,sha256=KncrAKjXI3169BgVE-SnY8FkpVF60JnUwfMHtbqvsTc,14725
64
64
  napistu/network/ig_utils.py,sha256=MuyEyOVtSHndil6QuuRCimBZrJ2jTaF5qQESgYlu02M,17042
65
- napistu/network/neighborhoods.py,sha256=Cg-82QxBQUi2fYGs1Pv_e56CxPUAGq9hX1EiYAXlSJ0,58972
65
+ napistu/network/neighborhoods.py,sha256=RNqaz91MTxuU4jNfrGdEcnm6rCykgdtHDPu2abAE-Kg,65516
66
66
  napistu/network/net_create.py,sha256=66kV_xoWnu4BVLaJZ1TAC7wBSsjPDqjoAXH-X9ShV3s,59091
67
67
  napistu/network/net_create_utils.py,sha256=zajwaz2xAij_9fEnD77SgBw_EnNAnJ8jBCmmK2rk_bA,24672
68
68
  napistu/network/net_propagation.py,sha256=Il5nDOWh3nLz8gRhDFHGp2LxcvJ9C1twiSZjDeiZMUo,23490
69
69
  napistu/network/ng_core.py,sha256=dGnTUKR4WtnvaYMyIHqqF55FY4mJSa7wjA2LZ4cVB6U,11720
70
- napistu/network/ng_utils.py,sha256=3FrG-rln6NpGs538NP--IRcNVVoJghghJYBUGji94Sg,16094
70
+ napistu/network/ng_utils.py,sha256=LX9DzMnz0AQMhJGUh3r8bg4dyEgWs_tym1Olu1FwlbQ,16070
71
71
  napistu/network/paths.py,sha256=ZnIqwBIsgz4C4TLyg3c_pCO5zZ97gmCNepDmq2QNEQc,18020
72
72
  napistu/network/precompute.py,sha256=ARU2tktWnxFISaHAY8chpkg8pusZPv7TT5jSIB9eFF0,10081
73
73
  napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
@@ -87,7 +87,7 @@ napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27
87
87
  napistu/statistics/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
88
88
  napistu/statistics/hypothesis_testing.py,sha256=k0mBFAMF0XHVcKwS26aPnEbq_FIUVwXU1gZ6cKfFbCk,2190
89
89
  napistu/statistics/quantiles.py,sha256=1-LnmVzC2CQWxCKUh0yi6YfKrbsZM1-kkD7nu2-aS5s,3042
90
- napistu-0.4.6.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
90
+ napistu-0.4.7.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
91
91
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
92
  tests/conftest.py,sha256=Rw0KtnVyykZhRjnlmNu4oV47lNIeYUJVHu4y47RnVq0,9990
93
93
  tests/test_consensus.py,sha256=Hzfrgp4SpkRDnEMVMD3f0UInSycndB8kKzC4wDDvRas,15076
@@ -109,14 +109,14 @@ tests/test_mcp_documentation_utils.py,sha256=OW0N2N_2IOktbYTcCWhhWz4bANi8IB60l1q
109
109
  tests/test_mcp_server.py,sha256=bP3PWVQsEfX6-lAgXKP32njdg__o65n2WuLvkxTTHkQ,11215
110
110
  tests/test_network_data_handling.py,sha256=4aS8z2AlKkVd-JhK4BQ8fjeiW8_bJ1hZ3cc71Jh7Glk,12716
111
111
  tests/test_network_ig_utils.py,sha256=XihmEpX890sr-LYmsb_t4aN0sKIDWCnXkTpDhpuTDmw,7199
112
- tests/test_network_neighborhoods.py,sha256=gMOiVIL7q5Feae6ziZbMs08IPxrVsRlbfVb0UV0SUgg,5622
112
+ tests/test_network_neighborhoods.py,sha256=3k0d-Pk_rWtGwxTg-Jpjv3CsVSj4qMn71MEHEKcqHII,8746
113
113
  tests/test_network_net_create.py,sha256=L0U91b4jVHDuC3DFo-_BUFVuv4GuSxZuLAo7r-7EJxY,12877
114
114
  tests/test_network_net_create_utils.py,sha256=0J6KIh2HBc4koFsvwMaul1QRtj5x92kR9HBdDZajnAw,18971
115
115
  tests/test_network_net_propagation.py,sha256=kZeDHD93iMrLVvxO4OyfRH5_vgsYeQyC40OI9Dsb0xY,14999
116
116
  tests/test_network_ng_core.py,sha256=w-iNBTtenennJhaLFauk952pEsk7W0-Fa8lPvIRqHyY,628
117
117
  tests/test_network_ng_utils.py,sha256=QVVuRnvCRfTSIlGdwQTIF9lr0wOwoc5gGeXAUY_AdgE,713
118
118
  tests/test_network_paths.py,sha256=Bx1uqyIAPw_i27s94dyjCQcV_04O9yovlrQgr1lFjS4,2143
119
- tests/test_network_precompute.py,sha256=IPr1KhtxBD0fXx_2TvZqnevrD-Iig35otb8yloRFpRc,10014
119
+ tests/test_network_precompute.py,sha256=MhmT6LQF-JcMR9YR78W-cLHbkQnp8ro-MHJ5yvMVhJE,10409
120
120
  tests/test_ontologies_genodexito.py,sha256=6fINyUiubHZqu7qxye09DQfJXw28ZMAJc3clPb-cCoY,2298
121
121
  tests/test_ontologies_id_tables.py,sha256=CpwpbmQvTc1BaVd6jbDKHAVE2etwN0vx93nC8jpnMlE,7265
122
122
  tests/test_ontologies_mygene.py,sha256=VkdRcKIWmcG6V-2dpfvsBiOJN5dO-j0RqZNxtJRcyBU,1583
@@ -136,8 +136,8 @@ tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRa
136
136
  tests/test_utils.py,sha256=qPSpV-Q9b6vmdycgaDmQqtcvzKnAVnN9j5xJ9x-T6bg,23959
137
137
  tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
138
138
  tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
- napistu-0.4.6.dist-info/METADATA,sha256=pmQ510PR2BSlSrqleSjf7FGULyIiI9Cr12MtbcEOe2M,4078
140
- napistu-0.4.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
141
- napistu-0.4.6.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
142
- napistu-0.4.6.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
143
- napistu-0.4.6.dist-info/RECORD,,
139
+ napistu-0.4.7.dist-info/METADATA,sha256=PgIDsBflFe6QmORKY6hfoEI9_Qqrpwa7Oc9126D47jc,4078
140
+ napistu-0.4.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
141
+ napistu-0.4.7.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
142
+ napistu-0.4.7.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
143
+ napistu-0.4.7.dist-info/RECORD,,
@@ -1,11 +1,17 @@
1
1
  import pandas as pd
2
+ import pytest
2
3
 
3
4
  from napistu.network import ng_utils
4
5
  from napistu.network import neighborhoods
5
6
  from napistu import source
6
7
 
7
8
  from napistu.constants import SBML_DFS
8
- from napistu.network.constants import NEIGHBORHOOD_DICT_KEYS, NEIGHBORHOOD_NETWORK_TYPES
9
+ from napistu.network.constants import (
10
+ NAPISTU_GRAPH_VERTICES,
11
+ NAPISTU_GRAPH_NODE_TYPES,
12
+ NEIGHBORHOOD_DICT_KEYS,
13
+ NEIGHBORHOOD_NETWORK_TYPES,
14
+ )
9
15
 
10
16
 
11
17
  def test_neighborhood(sbml_dfs, napistu_graph):
@@ -87,6 +93,7 @@ def test_find_and_prune_neighborhoods_with_source_counts(
87
93
  assert isinstance(
88
94
  neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES], pd.DataFrame
89
95
  )
96
+ assert neighborhood[NEIGHBORHOOD_DICT_KEYS.REACTION_SOURCES].shape[0] > 0
90
97
 
91
98
  # Check reaction_sources has expected columns
92
99
  expected_columns = [SBML_DFS.R_ID, "pathway_id", "name"]
@@ -139,3 +146,85 @@ def test_find_and_prune_neighborhoods_with_source_counts(
139
146
  if with_source is not None and without_source is not None:
140
147
  assert isinstance(with_source, pd.DataFrame)
141
148
  assert isinstance(without_source, pd.DataFrame)
149
+
150
+ # Test error handling for invalid parameters
151
+ # Test invalid network_type
152
+ with pytest.raises(ValueError):
153
+ neighborhoods.find_and_prune_neighborhoods(
154
+ sbml_dfs=sbml_dfs_metabolism,
155
+ napistu_graph=napistu_graph_metabolism,
156
+ compartmentalized_species=compartmentalized_species,
157
+ source_total_counts=source_total_counts,
158
+ min_pw_size=1,
159
+ network_type="invalid_network_type",
160
+ order=3,
161
+ verbose=False,
162
+ top_n=10,
163
+ )
164
+
165
+ # Test invalid order (negative)
166
+ with pytest.raises(ValueError):
167
+ neighborhoods.find_and_prune_neighborhoods(
168
+ sbml_dfs=sbml_dfs_metabolism,
169
+ napistu_graph=napistu_graph_metabolism,
170
+ compartmentalized_species=compartmentalized_species,
171
+ source_total_counts=source_total_counts,
172
+ min_pw_size=1,
173
+ network_type=NEIGHBORHOOD_NETWORK_TYPES.HOURGLASS,
174
+ order=-1,
175
+ verbose=False,
176
+ top_n=10,
177
+ )
178
+
179
+
180
+ def test_add_vertices_uri_urls(sbml_dfs):
181
+ """
182
+ Test add_vertices_uri_urls function.
183
+
184
+ This test verifies that the function correctly adds URI URLs to vertices
185
+ DataFrame for both species and reactions.
186
+ """
187
+
188
+ # Get real species and reaction names from the sbml_dfs fixture
189
+ real_species = sbml_dfs.compartmentalized_species.index[0] # Get first species
190
+ real_reaction = sbml_dfs.reactions.index[0] # Get first reaction
191
+
192
+ # Create a test vertices DataFrame with real species and reactions
193
+ test_vertices = pd.DataFrame(
194
+ {
195
+ NAPISTU_GRAPH_VERTICES.NAME: [real_species, real_reaction],
196
+ NAPISTU_GRAPH_VERTICES.NODE_TYPE: [
197
+ NAPISTU_GRAPH_NODE_TYPES.SPECIES,
198
+ NAPISTU_GRAPH_NODE_TYPES.REACTION,
199
+ ],
200
+ }
201
+ )
202
+
203
+ # Test basic functionality
204
+ result = neighborhoods.add_vertices_uri_urls(test_vertices, sbml_dfs)
205
+
206
+ # Verify basic structure
207
+ assert isinstance(result, pd.DataFrame)
208
+ assert result.shape[0] == test_vertices.shape[0] # Same number of rows
209
+ assert result.shape[1] >= test_vertices.shape[1] # At least same number of columns
210
+
211
+ # Verify original columns are preserved
212
+ for col in test_vertices.columns:
213
+ assert col in result.columns
214
+ assert all(result[col] == test_vertices[col])
215
+
216
+ # Verify species vertices have s_id column
217
+ species_vertices = result[
218
+ result[NAPISTU_GRAPH_VERTICES.NODE_TYPE] == NAPISTU_GRAPH_NODE_TYPES.SPECIES
219
+ ]
220
+ assert SBML_DFS.S_ID in species_vertices.columns
221
+
222
+ # Test error handling
223
+ import pytest
224
+
225
+ # Test with empty DataFrame
226
+ empty_vertices = pd.DataFrame(
227
+ columns=[NAPISTU_GRAPH_VERTICES.NAME, NAPISTU_GRAPH_VERTICES.NODE_TYPE]
228
+ )
229
+ with pytest.raises(ValueError, match="vertices must have at least one row"):
230
+ neighborhoods.add_vertices_uri_urls(empty_vertices, sbml_dfs)
@@ -14,6 +14,12 @@ from napistu.network import net_create
14
14
  from napistu.network import paths
15
15
  from napistu.network import precompute
16
16
 
17
+ from napistu.network.constants import (
18
+ NAPISTU_GRAPH_VERTICES,
19
+ DISTANCES,
20
+ NEIGHBORHOOD_NETWORK_TYPES,
21
+ )
22
+
17
23
  test_path = os.path.abspath(os.path.join(__file__, os.pardir))
18
24
  sbml_path = os.path.join(test_path, "test_data", "reactome_glucose_metabolism.sbml")
19
25
  if not os.path.isfile(sbml_path):
@@ -138,6 +144,7 @@ def test_precomputed_distances_shortest_paths():
138
144
 
139
145
 
140
146
  def test_precomputed_distances_neighborhoods():
147
+
141
148
  compartmentalized_species = sbml_dfs.compartmentalized_species[
142
149
  sbml_dfs.compartmentalized_species["s_id"] == "S00000000"
143
150
  ].index.tolist()
@@ -169,8 +176,12 @@ def test_precomputed_distances_neighborhoods():
169
176
  pruned_vert_otf = pruned_neighborhoods_otf[key]["vertices"]
170
177
  pruned_vert_precomp = pruned_neighborhoods_precomputed[key]["vertices"]
171
178
 
172
- join_key = ["name", "node_name", "node_orientation"]
173
- join_key_w_vars = [*join_key, *["path_weight", "path_length"]]
179
+ join_key = [
180
+ NAPISTU_GRAPH_VERTICES.NAME,
181
+ NAPISTU_GRAPH_VERTICES.NODE_NAME,
182
+ "node_orientation",
183
+ ]
184
+ join_key_w_vars = [*join_key, *[DISTANCES.PATH_WEIGHTS, DISTANCES.PATH_LENGTH]]
174
185
  neighbor_comparison = (
175
186
  pruned_vert_precomp[join_key_w_vars]
176
187
  .assign(in_precompute=True)
@@ -197,23 +208,27 @@ def test_precomputed_distances_neighborhoods():
197
208
  # which should be the same if we are pre-selecting the correct neighbors
198
209
  # as part of _precompute_neighbors()
199
210
  downstream_disagreement_w_precompute = (
200
- comparison_df[comparison_df["node_orientation"] == "downstream"]
211
+ comparison_df[
212
+ comparison_df["node_orientation"] == NEIGHBORHOOD_NETWORK_TYPES.DOWNSTREAM
213
+ ]
201
214
  .merge(
202
215
  precomputed_distances,
203
- left_on=["focal_sc_id", "name"],
204
- right_on=["sc_id_origin", "sc_id_dest"],
216
+ left_on=["focal_sc_id", NAPISTU_GRAPH_VERTICES.NAME],
217
+ right_on=[DISTANCES.SC_ID_ORIGIN, DISTANCES.SC_ID_DEST],
205
218
  )
206
- .query("abs(path_weight_x - path_weights) > 1e-13")
219
+ .query("abs(path_weights_x - path_weights) > 1e-13")
207
220
  )
208
221
 
209
222
  upstream_disagreement_w_precompute = (
210
- comparison_df[comparison_df["node_orientation"] == "upstream"]
223
+ comparison_df[
224
+ comparison_df["node_orientation"] == NEIGHBORHOOD_NETWORK_TYPES.UPSTREAM
225
+ ]
211
226
  .merge(
212
227
  precomputed_distances,
213
- left_on=["focal_sc_id", "name"],
214
- right_on=["sc_id_dest", "sc_id_origin"],
228
+ left_on=["focal_sc_id", NAPISTU_GRAPH_VERTICES.NAME],
229
+ right_on=[DISTANCES.SC_ID_DEST, DISTANCES.SC_ID_ORIGIN],
215
230
  )
216
- .query("abs(path_weight_x - path_upstream_weights) > 1e-13")
231
+ .query("abs(path_weights_x - path_upstream_weights) > 1e-13")
217
232
  )
218
233
 
219
234
  assert downstream_disagreement_w_precompute.shape[0] == 0