napistu 0.1.0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. napistu/__init__.py +1 -1
  2. napistu/consensus.py +1010 -513
  3. napistu/constants.py +24 -0
  4. napistu/gcs/constants.py +2 -2
  5. napistu/gcs/downloads.py +57 -25
  6. napistu/gcs/utils.py +21 -0
  7. napistu/identifiers.py +105 -6
  8. napistu/ingestion/constants.py +0 -1
  9. napistu/ingestion/obo.py +24 -8
  10. napistu/ingestion/psi_mi.py +20 -5
  11. napistu/ingestion/reactome.py +8 -32
  12. napistu/mcp/__init__.py +69 -0
  13. napistu/mcp/__main__.py +180 -0
  14. napistu/mcp/codebase.py +182 -0
  15. napistu/mcp/codebase_utils.py +298 -0
  16. napistu/mcp/constants.py +72 -0
  17. napistu/mcp/documentation.py +166 -0
  18. napistu/mcp/documentation_utils.py +235 -0
  19. napistu/mcp/execution.py +382 -0
  20. napistu/mcp/profiles.py +73 -0
  21. napistu/mcp/server.py +86 -0
  22. napistu/mcp/tutorials.py +124 -0
  23. napistu/mcp/tutorials_utils.py +230 -0
  24. napistu/mcp/utils.py +47 -0
  25. napistu/mechanism_matching.py +782 -26
  26. napistu/modify/constants.py +41 -0
  27. napistu/modify/curation.py +4 -1
  28. napistu/modify/gaps.py +243 -156
  29. napistu/modify/pathwayannot.py +26 -8
  30. napistu/network/neighborhoods.py +16 -7
  31. napistu/network/net_create.py +209 -54
  32. napistu/network/net_propagation.py +118 -0
  33. napistu/network/net_utils.py +1 -32
  34. napistu/rpy2/netcontextr.py +10 -7
  35. napistu/rpy2/rids.py +7 -5
  36. napistu/sbml_dfs_core.py +46 -29
  37. napistu/sbml_dfs_utils.py +37 -1
  38. napistu/source.py +8 -2
  39. napistu/utils.py +67 -8
  40. napistu-0.2.4.dev2.dist-info/METADATA +84 -0
  41. napistu-0.2.4.dev2.dist-info/RECORD +95 -0
  42. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/WHEEL +1 -1
  43. tests/conftest.py +11 -5
  44. tests/test_consensus.py +4 -1
  45. tests/test_gaps.py +127 -0
  46. tests/test_gcs.py +3 -2
  47. tests/test_igraph.py +14 -0
  48. tests/test_mcp_documentation_utils.py +13 -0
  49. tests/test_mechanism_matching.py +658 -0
  50. tests/test_net_propagation.py +89 -0
  51. tests/test_net_utils.py +83 -0
  52. tests/test_sbml.py +2 -0
  53. tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
  54. tests/test_utils.py +81 -0
  55. napistu-0.1.0.dist-info/METADATA +0 -56
  56. napistu-0.1.0.dist-info/RECORD +0 -77
  57. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
  58. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
  59. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/top_level.txt +0 -0
@@ -91,10 +91,14 @@ def sbml_dfs_to_rcpr_string_graph(
91
91
  )
92
92
 
93
93
  # Perform validations
94
- assert dat_gene["s_id"].is_unique
95
- assert dat_gene["sc_id"].is_unique
96
- assert dat_gene[COL_GENE].is_unique
97
- assert dat_gene[COL_GENE].hasnans is False
94
+ if not dat_gene["s_id"].is_unique:
95
+ raise ValueError("dat_gene['s_id'] must be unique")
96
+ if not dat_gene["sc_id"].is_unique:
97
+ raise ValueError("dat_gene['sc_id'] must be unique")
98
+ if not dat_gene[COL_GENE].is_unique:
99
+ raise ValueError("dat_gene[COL_GENE] must be unique")
100
+ if dat_gene[COL_GENE].hasnans:
101
+ raise ValueError("dat_gene[COL_GENE] must not have NaNs")
98
102
 
99
103
  # Reshape into the correct format
100
104
  dat_reactions = dat_gene[["sc_id", COL_GENE]].merge(
@@ -102,9 +106,8 @@ def sbml_dfs_to_rcpr_string_graph(
102
106
  )[[COL_GENE, "r_id"]]
103
107
  # assert that this has the correct shape, ie 2x the shape of the number
104
108
  # of reactions
105
- assert (
106
- dat_reactions.shape[0] == 2 * sbml_dfs.reactions.shape[0]
107
- ), "There should be exactly 2 reactants per reactions"
109
+ if dat_reactions.shape[0] != 2 * sbml_dfs.reactions.shape[0]:
110
+ raise ValueError("There should be exactly 2 reactants per reaction")
108
111
 
109
112
  # This is the fastest way I found to reshape this into the
110
113
  # Edgelist format
napistu/rpy2/rids.py CHANGED
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  import logging
4
4
 
5
5
  import pandas as pd
6
- from napistu import consensus
7
6
  from napistu import constants
8
7
  from napistu import identifiers
9
8
  from napistu import sbml_dfs_core
@@ -67,7 +66,8 @@ def expand_identifiers(
67
66
 
68
67
  # pull out all identifiers as a pd.DataFrame
69
68
  all_entity_identifiers = sbml_dfs.get_identifiers(id_type)
70
- assert isinstance(all_entity_identifiers, pd.DataFrame)
69
+ if not isinstance(all_entity_identifiers, pd.DataFrame):
70
+ raise TypeError("all_entity_identifiers must be a pandas DataFrame")
71
71
 
72
72
  if id_type == "species":
73
73
  all_entity_identifiers = _check_species_identifiers_entrez_gene_ontology(
@@ -226,8 +226,10 @@ def create_bioconductor_mapping_tables(
226
226
 
227
227
  """
228
228
 
229
- assert isinstance(mappings, set)
230
- assert isinstance(species, str)
229
+ if not isinstance(mappings, set):
230
+ raise TypeError(f"mappings must be a set, but got {type(mappings).__name__}")
231
+ if not isinstance(species, str):
232
+ raise TypeError(f"species must be a str, but got {type(species).__name__}")
231
233
 
232
234
  logger.info(
233
235
  f"Creating mapping tables from entrez genes to/from {', '.join(mappings)}"
@@ -620,7 +622,7 @@ def connect_dogmatic_mappings(species: str, r_paths: str | None = None) -> dict:
620
622
  ]
621
623
 
622
624
  s_name_series = (
623
- consensus._add_nameness_score(possible_names, IDENTIFIERS.IDENTIFIER)
625
+ utils._add_nameness_score(possible_names, IDENTIFIERS.IDENTIFIER)
624
626
  .sort_values(["ontology_preference", "nameness_score"])
625
627
  .groupby("cluster")
626
628
  .first()
napistu/sbml_dfs_core.py CHANGED
@@ -8,7 +8,6 @@ from typing import Mapping
8
8
  from typing import MutableMapping
9
9
  from typing import TYPE_CHECKING
10
10
 
11
- import numpy as np
12
11
  import pandas as pd
13
12
  from napistu import identifiers
14
13
  from napistu import sbml_dfs_utils
@@ -171,7 +170,10 @@ class SBML_dfs:
171
170
  )
172
171
 
173
172
  if required_attributes is not None:
174
- assert isinstance(required_attributes, set)
173
+ if not isinstance(required_attributes, set):
174
+ raise TypeError(
175
+ f"required_attributes must be a set, but got {type(required_attributes).__name__}"
176
+ )
175
177
 
176
178
  # determine whether required_attributes are appropriate
177
179
  VALID_REQUIRED_ATTRIBUTES = {"id", "source", "label"}
@@ -221,7 +223,11 @@ class SBML_dfs:
221
223
  ).assert_present()
222
224
 
223
225
  if ontologies is not None:
224
- assert isinstance(ontologies, set)
226
+ if not isinstance(ontologies, set):
227
+ # for clarity this should not be reachable based on type hints
228
+ raise TypeError(
229
+ f"ontologies must be a set, but got {type(ontologies).__name__}"
230
+ )
225
231
  ALL_VALID_ONTOLOGIES = identifiers_df["ontology"].unique()
226
232
  invalid_ontologies = ontologies.difference(ALL_VALID_ONTOLOGIES)
227
233
  if len(invalid_ontologies) > 0:
@@ -289,10 +295,7 @@ class SBML_dfs:
289
295
  self.compartmentalized_species.join(cspecies_n_connections)
290
296
  .join(cspecies_n_children)
291
297
  .join(cspecies_n_parents)
292
- .fillna(0)
293
- .astype(
294
- {"sc_degree": "int32", "sc_children": "int32", "sc_parents": "int32"}
295
- )
298
+ .fillna(int(0)) # Explicitly fill with int(0) to avoid downcasting warning
296
299
  .merge(species_features, left_on="s_id", right_index=True)
297
300
  .drop(columns=["sc_name", "s_id", "c_id"])
298
301
  )
@@ -307,13 +310,16 @@ class SBML_dfs:
307
310
 
308
311
  if isinstance(id_entry, identifiers.Identifiers):
309
312
  identifiers_dict[sysid] = pd.DataFrame(id_entry.ids)
310
- elif np.isnan(id_entry):
313
+ elif pd.isna(id_entry):
311
314
  continue
312
315
  else:
313
316
  raise ValueError(
314
317
  f"id_entry was a {type(id_entry)} and must either be"
315
- " an identifiers.Identifiers object or NaN"
318
+ " an identifiers.Identifiers object or a missing value (None, np.nan, pd.NA)"
316
319
  )
320
+ if not identifiers_dict:
321
+ # Return empty DataFrame with expected columns if nothing found
322
+ return pd.DataFrame(columns=[schema[id_type]["pk"], "entry"])
317
323
  identifiers_tbl = pd.concat(identifiers_dict)
318
324
 
319
325
  identifiers_tbl.index.names = [schema[id_type]["pk"], "entry"]
@@ -362,7 +368,7 @@ class SBML_dfs:
362
368
  all_ids = pd.concat(
363
369
  [
364
370
  sbml_dfs_utils._stub_ids(
365
- entity_table[schema[entity_type]["id"]][i].ids
371
+ entity_table[schema[entity_type]["id"]].iloc[i].ids
366
372
  ).assign(id=entity_table.index[i])
367
373
  for i in range(0, entity_table.shape[0])
368
374
  ]
@@ -816,7 +822,10 @@ class SBML_dfs:
816
822
  _validate_matching_data(reactions_data_table, self.reactions)
817
823
 
818
824
  def _validate_reaction_species(self):
819
- assert all(self.reaction_species[SBML_DFS.STOICHIOMETRY].notnull())
825
+ if not all(self.reaction_species[SBML_DFS.STOICHIOMETRY].notnull()):
826
+ raise ValueError(
827
+ "All reaction_species[SBML_DFS.STOICHIOMETRY] must be not null"
828
+ )
820
829
 
821
830
  # test for null SBO terms
822
831
  n_null_sbo_terms = sum(self.reaction_species[SBML_DFS.SBO_TERM].isnull())
@@ -963,7 +972,7 @@ def reaction_summary(r_id: str, sbml_dfs: SBML_dfs) -> pd.DataFrame:
963
972
  augmented_matching_reaction_species, sbml_dfs.reactions, SBML_DFS.S_NAME
964
973
  )
965
974
  + " ["
966
- + augmented_matching_reaction_species[SBML_DFS.C_NAME][0]
975
+ + augmented_matching_reaction_species[SBML_DFS.C_NAME].iloc[0]
967
976
  + "]"
968
977
  )
969
978
  else:
@@ -1118,9 +1127,14 @@ def construct_formula_string(
1118
1127
  ]
1119
1128
 
1120
1129
  rxn_reversible = bool(
1121
- reactions_df.loc[reaction_species_df[SBML_DFS.R_ID][0], SBML_DFS.R_ISREVERSIBLE]
1130
+ reactions_df.loc[
1131
+ reaction_species_df[SBML_DFS.R_ID].iloc[0], SBML_DFS.R_ISREVERSIBLE
1132
+ ]
1122
1133
  ) # convert from a np.bool_ to bool if needed
1123
- assert isinstance(rxn_reversible, bool)
1134
+ if not isinstance(rxn_reversible, bool):
1135
+ raise TypeError(
1136
+ f"rxn_reversible must be a bool, but got {type(rxn_reversible).__name__}"
1137
+ )
1124
1138
 
1125
1139
  if rxn_reversible:
1126
1140
  arrow_type = " <-> "
@@ -1265,6 +1279,7 @@ def filter_to_characteristic_species_ids(
1265
1279
  data=[True] * len(promiscuous_component_identifiers_index),
1266
1280
  index=promiscuous_component_identifiers_index,
1267
1281
  name="is_shared_component",
1282
+ dtype=bool,
1268
1283
  )
1269
1284
 
1270
1285
  if len(promiscuous_component_identifiers) == 0:
@@ -1476,7 +1491,10 @@ def infer_sbo_terms(sbml_dfs: SBML_dfs) -> SBML_dfs:
1476
1491
  ~sbml_dfs.reaction_species[SBML_DFS.SBO_TERM].isin(MINI_SBO_TO_NAME.keys())
1477
1492
  ]
1478
1493
 
1479
- assert all(sbml_dfs.reaction_species[SBML_DFS.SBO_TERM].notnull())
1494
+ if not all(sbml_dfs.reaction_species[SBML_DFS.SBO_TERM].notnull()):
1495
+ raise ValueError(
1496
+ "All sbml_dfs.reaction_species[SBML_DFS.SBO_TERM] must be not null"
1497
+ )
1480
1498
  if invalid_sbo_terms.shape[0] == 0:
1481
1499
  logger.info("All sbo_terms were valid; returning input sbml_dfs")
1482
1500
  return sbml_dfs
@@ -1485,22 +1503,25 @@ def infer_sbo_terms(sbml_dfs: SBML_dfs) -> SBML_dfs:
1485
1503
 
1486
1504
  # add missing/invalid terms based on stoichiometry
1487
1505
  invalid_sbo_terms.loc[
1488
- invalid_sbo_terms[sbml_dfs.STOICHIOMETRY] < 0, SBML_DFS.SBO_TERM
1506
+ invalid_sbo_terms[SBML_DFS.STOICHIOMETRY] < 0, SBML_DFS.SBO_TERM
1489
1507
  ] = MINI_SBO_FROM_NAME[SBOTERM_NAMES.REACTANT]
1490
1508
 
1491
1509
  invalid_sbo_terms.loc[
1492
- invalid_sbo_terms[sbml_dfs.STOICHIOMETRY] > 0, SBML_DFS.SBO_TERM
1510
+ invalid_sbo_terms[SBML_DFS.STOICHIOMETRY] > 0, SBML_DFS.SBO_TERM
1493
1511
  ] = MINI_SBO_FROM_NAME[SBOTERM_NAMES.PRODUCT]
1494
1512
 
1495
1513
  invalid_sbo_terms.loc[
1496
- invalid_sbo_terms[sbml_dfs.STOICHIOMETRY] == 0, SBML_DFS.SBO_TERM
1514
+ invalid_sbo_terms[SBML_DFS.STOICHIOMETRY] == 0, SBML_DFS.SBO_TERM
1497
1515
  ] = MINI_SBO_FROM_NAME[SBOTERM_NAMES.STIMULATOR]
1498
1516
 
1499
1517
  updated_reaction_species = pd.concat(
1500
1518
  [valid_sbo_terms, invalid_sbo_terms]
1501
1519
  ).sort_index()
1502
1520
 
1503
- assert sbml_dfs.reaction_species.shape[0] == updated_reaction_species.shape[0]
1521
+ if sbml_dfs.reaction_species.shape[0] != updated_reaction_species.shape[0]:
1522
+ raise ValueError(
1523
+ f"Trying to overwrite {sbml_dfs.reaction_species.shape[0]} reaction_species with {updated_reaction_species.shape[0]}"
1524
+ )
1504
1525
  sbml_dfs.reaction_species = updated_reaction_species
1505
1526
 
1506
1527
  return sbml_dfs
@@ -1583,15 +1604,11 @@ def export_sbml_dfs(
1583
1604
  raise TypeError(
1584
1605
  f"sbml_dfs was a {type(sbml_dfs)} and must" " be an sbml.SBML_dfs"
1585
1606
  )
1586
- # select valid BQB attributes based on dogmatic flag
1587
- defining_biological_qualifiers = sbml_dfs_utils._dogmatic_to_defining_bqbs(dogmatic)
1588
-
1589
- # pre-summarize ontologies
1590
- species_identifiers = sbml_dfs.get_identifiers(SBML_DFS.SPECIES)
1591
- # drop some BQB_HAS_PART annotations
1592
- species_identifiers = filter_to_characteristic_species_ids(
1593
- species_identifiers,
1594
- defining_biological_qualifiers=defining_biological_qualifiers,
1607
+
1608
+ # filter to identifiers which make sense when mapping from ids -> species
1609
+ species_identifiers = sbml_dfs_utils.get_characteristic_species_ids(
1610
+ sbml_dfs,
1611
+ dogmatic=dogmatic,
1595
1612
  )
1596
1613
 
1597
1614
  try:
@@ -2000,7 +2017,7 @@ def find_underspecified_reactions(
2000
2017
  ),
2001
2018
  how="left",
2002
2019
  )
2003
- .fillna(False)[SBML_DFS.R_ID]
2020
+ .fillna(False)[SBML_DFS.R_ID] # Fill boolean column with False
2004
2021
  .tolist()
2005
2022
  )
2006
2023
 
napistu/sbml_dfs_utils.py CHANGED
@@ -13,6 +13,7 @@ import pandas as pd
13
13
  from napistu import utils
14
14
  from napistu import indices
15
15
 
16
+ from napistu import sbml_dfs_core
16
17
  from napistu.constants import SBML_DFS
17
18
  from napistu.constants import IDENTIFIERS
18
19
  from napistu.constants import BQB_DEFINING_ATTRS
@@ -151,7 +152,6 @@ def adapt_pw_index(
151
152
 
152
153
 
153
154
  def _dogmatic_to_defining_bqbs(dogmatic: bool = False) -> str:
154
- assert isinstance(dogmatic, bool)
155
155
  if dogmatic:
156
156
  logger.info(
157
157
  "Running in dogmatic mode - differences genes, transcripts, and proteins will "
@@ -268,6 +268,42 @@ def check_entity_data_index_matching(sbml_dfs, table):
268
268
  return sbml_dfs
269
269
 
270
270
 
271
+ def get_characteristic_species_ids(
272
+ sbml_dfs: sbml_dfs_core.SBML_dfs, dogmatic: bool = True
273
+ ) -> pd.DataFrame:
274
+ """
275
+ Get Characteristic Species IDs
276
+
277
+ List the systematic identifiers which are characteristic of molecular species, e.g., excluding subcomponents, and optionally, treating proteins, transcripts, and genes equiavlently.
278
+
279
+ Parameters
280
+ ----------
281
+ sbml_dfs : sbml_dfs_core.SBML_dfs
282
+ The SBML_dfs object.
283
+ dogmatic : bool, default=True
284
+ Whether to use the dogmatic flag to determine which BQB attributes are valid.
285
+
286
+ Returns
287
+ -------
288
+ pd.DataFrame
289
+ A DataFrame containing the systematic identifiers which are characteristic of molecular species.
290
+ """
291
+
292
+ # select valid BQB attributes based on dogmatic flag
293
+ defining_biological_qualifiers = _dogmatic_to_defining_bqbs(dogmatic)
294
+
295
+ # pre-summarize ontologies
296
+ species_identifiers = sbml_dfs.get_identifiers(SBML_DFS.SPECIES)
297
+
298
+ # drop some BQB_HAS_PART annotations
299
+ species_identifiers = sbml_dfs_core.filter_to_characteristic_species_ids(
300
+ species_identifiers,
301
+ defining_biological_qualifiers=defining_biological_qualifiers,
302
+ )
303
+
304
+ return species_identifiers
305
+
306
+
271
307
  def _dogmatic_to_defining_bqbs(dogmatic: bool = False) -> str:
272
308
  assert isinstance(dogmatic, bool)
273
309
  if dogmatic:
napistu/source.py CHANGED
@@ -191,7 +191,10 @@ def unnest_sources(
191
191
  # check that the entries of sourcevar are Source objects
192
192
  source_value = source_table[source_var].iloc[i]
193
193
 
194
- assert isinstance(source_value, Source)
194
+ if not isinstance(source_value, Source):
195
+ raise TypeError(
196
+ f"source_value must be a Source, but got {type(source_value).__name__}"
197
+ )
195
198
 
196
199
  if source_value.source is None:
197
200
  print("Some sources were only missing - returning None")
@@ -310,7 +313,10 @@ def _deduplicate_source_df(source_df: pd.DataFrame, table_schema: dict) -> pd.Da
310
313
  )
311
314
 
312
315
  category_index = membership_categories.index.unique()
313
- assert isinstance(category_index, pd.core.indexes.base.Index)
316
+ if not isinstance(category_index, pd.core.indexes.base.Index):
317
+ raise TypeError(
318
+ f"category_index must be a pandas Index, but got {type(category_index).__name__}"
319
+ )
314
320
 
315
321
  merged_sources = pd.concat(
316
322
  [
napistu/utils.py CHANGED
@@ -14,6 +14,8 @@ from contextlib import closing
14
14
  from itertools import starmap
15
15
  from typing import Any
16
16
  from typing import Union
17
+ from typing import Optional
18
+ from typing import List
17
19
  from urllib.parse import urlparse
18
20
 
19
21
  import igraph as ig
@@ -729,6 +731,62 @@ def ensure_pd_df(pd_df_or_series: pd.DataFrame | pd.Series) -> pd.DataFrame:
729
731
  )
730
732
 
731
733
 
734
+ def drop_extra_cols(
735
+ df_in: pd.DataFrame,
736
+ df_out: pd.DataFrame,
737
+ always_include: Optional[List[str]] = None,
738
+ ) -> pd.DataFrame:
739
+ """Remove columns in df_out that are not in df_in, except those specified in always_include.
740
+
741
+ Parameters
742
+ ----------
743
+ df_in : pd.DataFrame
744
+ Reference DataFrame whose columns determine what to keep
745
+ df_out : pd.DataFrame
746
+ DataFrame to filter columns from
747
+ always_include : Optional[List[str]], optional
748
+ List of column names to always include in output, even if not in df_in
749
+
750
+ Returns
751
+ -------
752
+ pd.DataFrame
753
+ DataFrame with columns filtered to match df_in plus any always_include columns.
754
+ Column order follows df_in, with always_include columns appended at the end.
755
+
756
+ Examples
757
+ --------
758
+ >>> df_in = pd.DataFrame({'a': [1], 'b': [2]})
759
+ >>> df_out = pd.DataFrame({'a': [3], 'c': [4], 'd': [5]})
760
+ >>> _drop_extra_cols(df_in, df_out)
761
+ # Returns DataFrame with just column 'a'
762
+
763
+ >>> _drop_extra_cols(df_in, df_out, always_include=['d'])
764
+ # Returns DataFrame with columns ['a', 'd']
765
+ """
766
+ # Handle None case for always_include
767
+ if always_include is None:
768
+ always_include = []
769
+
770
+ # Get columns to retain: intersection with df_in plus always_include
771
+ retained_cols = df_in.columns.intersection(df_out.columns).union(always_include)
772
+
773
+ # Filter to only columns that exist in df_out
774
+ retained_cols = retained_cols.intersection(df_out.columns)
775
+
776
+ # Order columns: first those matching df_in's order, then any remaining always_include
777
+ ordered_cols = []
778
+ # Add columns that are in df_in in their original order
779
+ for col in df_in.columns:
780
+ if col in retained_cols:
781
+ ordered_cols.append(col)
782
+ # Add any remaining always_include columns that weren't in df_in
783
+ for col in always_include:
784
+ if col in retained_cols and col not in ordered_cols:
785
+ ordered_cols.append(col)
786
+
787
+ return df_out.loc[:, ordered_cols]
788
+
789
+
732
790
  def format_identifiers_as_edgelist(
733
791
  df: pd.DataFrame, defining_vars: list[str]
734
792
  ) -> pd.DataFrame:
@@ -750,14 +808,14 @@ def format_identifiers_as_edgelist(
750
808
  values of the index and defining_vars
751
809
  """
752
810
 
753
- assert isinstance(df, pd.DataFrame)
754
811
  # requires a named index by convention
755
812
  if None in df.index.names:
756
813
  raise ValueError(
757
814
  "df did not have a named index. A named index or multindex is expected"
758
815
  )
759
816
 
760
- assert isinstance(defining_vars, list)
817
+ if not isinstance(defining_vars, list):
818
+ raise TypeError("defining_vars must be a list")
761
819
 
762
820
  logger.info(
763
821
  f"creating an edgelist linking index levels {', '.join(df.index.names)} and linking it "
@@ -783,14 +841,15 @@ def format_identifiers_as_edgelist(
783
841
  return df
784
842
 
785
843
 
786
- def find_weakly_connected_subgraphs(edgelist):
844
+ def find_weakly_connected_subgraphs(edgelist: pd.DataFrame) -> pd.DataFrame:
787
845
  """Find all cliques of loosly connected components."""
788
846
 
789
- assert isinstance(edgelist, pd.DataFrame)
790
- assert edgelist.shape[1] == 2
791
- assert edgelist.columns.tolist() == ["ind", "id"]
792
- # at least some entries in ind should start with ind because this is how we'll pull them out
793
- assert any(edgelist["ind"].str.startswith("ind"))
847
+ if edgelist.shape[1] != 2:
848
+ raise ValueError("edgelist must have exactly 2 columns")
849
+ if edgelist.columns.tolist() != ["ind", "id"]:
850
+ raise ValueError("edgelist columns must be ['ind', 'id']")
851
+ if not any(edgelist["ind"].str.startswith("ind")):
852
+ raise ValueError("At least some entries in 'ind' must start with 'ind'")
794
853
 
795
854
  id_graph = ig.Graph.TupleList(edgelist.itertuples(index=False))
796
855
 
@@ -0,0 +1,84 @@
1
+ Metadata-Version: 2.4
2
+ Name: napistu
3
+ Version: 0.2.4.dev2
4
+ Summary: Connecting high-dimensional data to curated pathways
5
+ Home-page: https://github.com/napistu/napistu-py
6
+ Author: Sean Hackett
7
+ Author-email: seanmchackett@gmail.com
8
+ Project-URL: Documentation, https://napistu.readthedocs.io
9
+ Project-URL: Source, https://github.com/napistu/napistu-py
10
+ Project-URL: Tracker, https://github.com/napistu/napistu-py/issues
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: Jinja2
19
+ Requires-Dist: PyYAML<7.0.0,>=6.0.0
20
+ Requires-Dist: click<9.0.0,>=8.0.0
21
+ Requires-Dist: click-logging
22
+ Requires-Dist: fs<3.0.0,>=2.4.0
23
+ Requires-Dist: fs-gcsfs<2.0.0,>=1.5.0
24
+ Requires-Dist: igraph
25
+ Requires-Dist: matplotlib<4.0.0,>=3.5.0
26
+ Requires-Dist: numpy<3.0.0,>=1.24.0
27
+ Requires-Dist: pandas<3.0.0,>=1.5.0
28
+ Requires-Dist: pydantic<3.0.0,>=2.0.0
29
+ Requires-Dist: python-libsbml
30
+ Requires-Dist: requests>=2.25.0
31
+ Requires-Dist: scipy<2.0.0,>=1.10.0
32
+ Requires-Dist: tqdm
33
+ Requires-Dist: zeep<4.0.0,>=3.0.0
34
+ Provides-Extra: dev
35
+ Requires-Dist: black>=24.0.0; extra == "dev"
36
+ Requires-Dist: ipykernel; extra == "dev"
37
+ Requires-Dist: pre-commit<4.0.0,>=3.0.0; extra == "dev"
38
+ Requires-Dist: pytest<8.0.0,>=7.0.0; extra == "dev"
39
+ Requires-Dist: pytest-asyncio; extra == "dev"
40
+ Requires-Dist: pytest-cov; extra == "dev"
41
+ Requires-Dist: ruff; extra == "dev"
42
+ Requires-Dist: testcontainers; extra == "dev"
43
+ Provides-Extra: mcp
44
+ Requires-Dist: fastmcp<3.0.0,>=2.0.0; extra == "mcp"
45
+ Requires-Dist: mcp<2.0.0,>=1.0.0; extra == "mcp"
46
+ Requires-Dist: httpx>=0.24.0; extra == "mcp"
47
+ Requires-Dist: beautifulsoup4<5.0.0,>=4.11.0; extra == "mcp"
48
+ Requires-Dist: markdown>=3.4.0; extra == "mcp"
49
+ Requires-Dist: jupyter-client>=7.0.0; extra == "mcp"
50
+ Requires-Dist: nbformat>=5.0.0; extra == "mcp"
51
+ Provides-Extra: rpy2
52
+ Requires-Dist: pyarrow<19.0.0,>=15.0.0; extra == "rpy2"
53
+ Requires-Dist: rpy2<4.0.0,>=3.5.0; extra == "rpy2"
54
+ Requires-Dist: rpy2-arrow<1.0.0,>=0.1.0; extra == "rpy2"
55
+ Dynamic: license-file
56
+
57
+ # Napistu Python Library
58
+
59
+ [![Documentation Status](https://readthedocs.org/projects/napistu/badge/?version=latest)](https://napistu.readthedocs.io/en/latest/?badge=latest)
60
+
61
+ This Python package hosts the majority of the algorithmic code for the [Napistu project](https://github.com/napistu/napistu).
62
+
63
+ ## Setup
64
+
65
+ Napistu is available on [PyPI](https://pypi.org/project/napistu) so the recommended way to use it is just to pip install with:
66
+
67
+ ```bash
68
+ pip install napistu
69
+ ```
70
+
71
+ Alternatively, you can clone this repository and perform a local install. e.g., from this directory:
72
+
73
+ ```bash
74
+ pip install .
75
+ ```
76
+
77
+ ## Documentation
78
+ 📘 **Full Documentation**: [https://napistu.readthedocs.io](https://napistu.readthedocs.io)
79
+ ** Wiki **: [napistu/wiki](https://github.com/napistu/napistu/wiki)
80
+
81
+ ### Quick Links
82
+ - [Installation Guide](https://napistu.readthedocs.io/en/latest/installation.html)
83
+ - [API Reference](https://napistu.readthedocs.io/en/latest/api.html)
84
+ - [Usage Examples](https://napistu.readthedocs.io/en/latest/usage.html)
@@ -0,0 +1,95 @@
1
+ napistu/__init__.py,sha256=HEXcB6w8PCTD-tm4rq7AoFi7ufZNDuoG3EoM0kz9aPY,269
2
+ napistu/__main__.py,sha256=i1OyReHD58GjyGYShXmMuBfA0VoGBF9dirg2nA4JCa8,28334
3
+ napistu/consensus.py,sha256=UbKKSLP1O46e3Rk8d_aqNlhRHeR3sZRztAgIm7-XK6Y,69960
4
+ napistu/constants.py,sha256=2I-JxgvZ1_0VG2J9wHkxIM9-axRS5_zp5KhbRJZzbYk,15431
5
+ napistu/identifiers.py,sha256=cgtg4tankKnao4DZfAmvDUSf1fZxnsYSgblVKcpBPFQ,32316
6
+ napistu/indices.py,sha256=UeJjjsYs0sGvZIKz1y4ZQ6aUkABn-6TCUDZ2VCVT9JI,7534
7
+ napistu/mechanism_matching.py,sha256=sgHV3KI5zYKVBBzebBLWYo_LOeVwuMX9t4WD7kWmepY,49923
8
+ napistu/sbml_dfs_core.py,sha256=lHdfLc1cnwMuKJa-cpQrHlH9nafOfqIQXd89U6k1iGg,80262
9
+ napistu/sbml_dfs_utils.py,sha256=LJo6WWTrmnE58ZLDuibeeHk88uCdfunWdja7XxdZpps,11525
10
+ napistu/source.py,sha256=9uUJrkY4jHaKlzz5nNcQQ8wUAep2pfqhlHxHw1hmEkI,13648
11
+ napistu/utils.py,sha256=Fo1cidsXIGKsHYMo7NdPpguUK4nOuhuS9raYmx28_94,30203
12
+ napistu/gcs/__init__.py,sha256=1kqmRHlEyI7VpILzurZb1URwC_UIc1PTMEBHQnjXW6s,246
13
+ napistu/gcs/constants.py,sha256=g6PaU99GY5XvaRHx4BGmWHUpcJ36-Zh_GzeNVOeHviM,2856
14
+ napistu/gcs/downloads.py,sha256=SvGv9WYr_Vt3guzyz1QiAuBndeKPTBtWSFLj1-QbLf4,6348
15
+ napistu/gcs/utils.py,sha256=eLSsvewWJdCguyj2k0ozUGP5BTemaE1PZg41Z3aY5kM,571
16
+ napistu/ingestion/__init__.py,sha256=1kqmRHlEyI7VpILzurZb1URwC_UIc1PTMEBHQnjXW6s,246
17
+ napistu/ingestion/bigg.py,sha256=XPJZv64mrIMCuKe1mjQfS5QPR9tmengGvndSjc3QFLA,5559
18
+ napistu/ingestion/constants.py,sha256=KrLFyIGI5WsyOROwRo6QIv0GZXeeSAM4BNX7rSw01QE,9856
19
+ napistu/ingestion/cpr_edgelist.py,sha256=eVT9M7gmdBuGHcAYlvkD_zzvTtyzXufKWjwDiT8OxF4,3572
20
+ napistu/ingestion/identifiers_etl.py,sha256=6ppDUA6lEZurdmVbiFLOUzphYbr-hndMhtqsQnq_yAc,5009
21
+ napistu/ingestion/obo.py,sha256=AQkIPWbjA464Lma0tx91JucWkIwLjC7Jgv5VHGRTDkE,9601
22
+ napistu/ingestion/psi_mi.py,sha256=5eJjm7XWogL9oTyGqR52kntHClLwLsTePKqCvUGyi-w,10111
23
+ napistu/ingestion/reactome.py,sha256=Hn9X-vDp4o_HK-OtaQvel3vJeZ8_TC1-4N2rruK9Oks,7099
24
+ napistu/ingestion/sbml.py,sha256=gK6_jHgo6oaiG16WlrbBSvxq_0VzFR4a5fG9IQrp5bU,24153
25
+ napistu/ingestion/string.py,sha256=tsaHrjppgFbl9NnRcB2DytpoontqrpfQL65zD9HPgEM,11668
26
+ napistu/ingestion/trrust.py,sha256=ccjZc_eF3PdxxurnukiEo_e0-aKc_3z22NYbaJBtHdY,9774
27
+ napistu/ingestion/yeast.py,sha256=bwFBNxRq-dLDaddgBL1hpfZj0eQ56nBXyR_9n0NZT9Y,5233
28
+ napistu/mcp/__init__.py,sha256=gDkP4J4vAjgq96_760lXKDURPUpQxVIxaRO9XzYrqdA,1970
29
+ napistu/mcp/__main__.py,sha256=LzCbgk07XXk0R3JSzcxI2gSX3hZ9a389qt8xujPJR7E,5335
30
+ napistu/mcp/codebase.py,sha256=H32R5vRJqsLpClURIUpemTO8F_YnFW1RDdaeEbYWotk,5674
31
+ napistu/mcp/codebase_utils.py,sha256=r1nbDmGzq-NaH9cT11jC53mEjszQpwQ0uZUJkMHvgVs,10567
32
+ napistu/mcp/constants.py,sha256=s0anHxVre6i6JYFQimB_ppRLDdqiCxbMHNVEYvf6O0U,2852
33
+ napistu/mcp/documentation.py,sha256=L99GF5ilhq7CpzYkRy2BeOzKVQnolK_S9wqf8ChZY2Y,5216
34
+ napistu/mcp/documentation_utils.py,sha256=JH3BfVk2dpSLUvnC77iaeTIRknOBpNMBQ2qhquUhuJM,7099
35
+ napistu/mcp/execution.py,sha256=cYhLzIvihtLjG4J195FZuCM8uVihtgW-R6J4zuPAY5s,12422
36
+ napistu/mcp/profiles.py,sha256=Nbr1e-n7muJMcY0HxuEJQePUQWPM2koQ9roVLEZa7Pg,2027
37
+ napistu/mcp/server.py,sha256=Igl4SL3KIuWyByBahHz1K3TQ2Zk1rLbszofHULoHPWs,2649
38
+ napistu/mcp/tutorials.py,sha256=QMX32aWaqRqj4vIc_PVDdT_t55ZBpw4xOWnfQyewZMk,3395
39
+ napistu/mcp/tutorials_utils.py,sha256=6_s6FP2i8Na6VaKVzLDnSnA5JQVgyd3ZHEIz8HSHtzU,6599
40
+ napistu/mcp/utils.py,sha256=WB4c6s8aPZLgi_Wvhhq0DE8Cnz2QGff0V8hrF1feVRg,1296
41
+ napistu/modify/__init__.py,sha256=1kqmRHlEyI7VpILzurZb1URwC_UIc1PTMEBHQnjXW6s,246
42
+ napistu/modify/constants.py,sha256=H6K6twzPlxt0yp6QLAxIx0Tp8YzYhtKKXPdmXi5V_QQ,3689
43
+ napistu/modify/curation.py,sha256=sQeSO53ZLdn14ww2GSKkoP0vJnDpAoSWb-YDjUf5hDQ,21743
44
+ napistu/modify/gaps.py,sha256=YrILH3e9kXIxm6kTrPYHZyfrUH5j5eLC-CHZ-A6XGhA,26739
45
+ napistu/modify/pathwayannot.py,sha256=xuBSMDFWbg_d6-Gzv0Td3Q5nnFTa-Qzic48g1b1AZtQ,48081
46
+ napistu/modify/uncompartmentalize.py,sha256=U5X4Q7Z-YIkC8_711x3sU21vTVdv9rKfauwz4JNzl6c,9690
47
+ napistu/network/__init__.py,sha256=1kqmRHlEyI7VpILzurZb1URwC_UIc1PTMEBHQnjXW6s,246
48
+ napistu/network/constants.py,sha256=jz8vRjgns74piUcvmoIP_f-8s9w15SxWAEw2lf6XmDY,3661
49
+ napistu/network/neighborhoods.py,sha256=TkZjFWqmrYVxQftl1-2oLKOo4PDIFjxYlPjUHHIQOBk,55931
50
+ napistu/network/net_create.py,sha256=G5xQNEZgrBAspdFbkkSyvDBSzWW1v1qZozP0YknfhS8,67331
51
+ napistu/network/net_propagation.py,sha256=3PVKZ3zNkX_UZ6nq1LhnxgrojPF7BnyDVZjPKmwkf6I,3990
52
+ napistu/network/net_utils.py,sha256=G_ciC7WinXnrGBMeNkv0-jyDNcT9EXBfmS8mBxLbQJE,20230
53
+ napistu/network/paths.py,sha256=a2J3JWIdMufdNs8Amh6I7s3TOVD2EzLV9khqbWHvGlA,16652
54
+ napistu/network/precompute.py,sha256=83Vr2pxCmEtJJmE_Lq1BI-pEmESDNG0N7vByXjBf_oQ,7517
55
+ napistu/rpy2/__init__.py,sha256=B9tZHiEp6bvysjqvBRQ1aGY493Ks9kouwb0pW7KsKqA,4100
56
+ napistu/rpy2/callr.py,sha256=76ICWj7Jso-qrYLNfiV-DgPyrMTdRXz_EhyGOD9CbFM,4301
57
+ napistu/rpy2/constants.py,sha256=JpJqsxImZis8fFFfePXYdbkhUZhXDZoHSHVf92w1h8U,2619
58
+ napistu/rpy2/netcontextr.py,sha256=fZKd3NXXu5nfB-z0UW6wedxBkZA8dJ4uB1uOD9wI_eg,16523
59
+ napistu/rpy2/rids.py,sha256=lKyOujjdosuN-Oc54uCQI1nKchTx4zXPwgFbG7QX7d8,24153
60
+ napistu-0.2.4.dev2.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
61
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
+ tests/conftest.py,sha256=Xj5PWhedsR7HELbJfBY5XXzv_BAHbFMc1K2n6Nrbrno,2595
63
+ tests/test_consensus.py,sha256=3dJvvPsPG7bHbw_FY4Pm647N_Gt_Ud9157OKYfPCUd4,9502
64
+ tests/test_constants.py,sha256=gJLDv7QMeeBiiupyMazj6mumk20KWvGMgm2myHMKKfc,531
65
+ tests/test_curation.py,sha256=-Q2J0D7qs9PGjHZX-rM4NxRLLdwxoapytSo_98q9ItY,3864
66
+ tests/test_edgelist.py,sha256=bdEtQJdd4MeQsNtng9afHYNVDsEy0U07sfVwguAdIBM,560
67
+ tests/test_gaps.py,sha256=mGGeyx1vUnmEOF__bsqfCYq9Y8a1P-9mefqna4Qgc_k,4062
68
+ tests/test_gcs.py,sha256=p_uQWuY2TcGj3zV3qFC-GXBqj4St8YENR_XRpQ6UH5g,570
69
+ tests/test_identifiers.py,sha256=RyuPAMhYI8cDOl2r62idweLxgy7rAs9omeZQ62h56kY,5019
70
+ tests/test_igraph.py,sha256=vSGP13Bwaoznk6LsnOXoARxyh7VoT4fHvvTYcHT2lkw,11353
71
+ tests/test_indices.py,sha256=-TrKfX4qXsofg_TPQEhHaQc_CuQMEd4_0maJgGCgSfE,2468
72
+ tests/test_mcp_documentation_utils.py,sha256=lh9cKQIyMKntFVRG7Ui6oJ-gkawDta1Rfj_GEVG5k4M,470
73
+ tests/test_mechanism_matching.py,sha256=RYNlOG2voocPMv78xuvVIDZ8GpPyEEXaQXpkokn-EEI,28813
74
+ tests/test_net_propagation.py,sha256=9pKkUdduWejH4iKNCJXKFzAkdNpCfrMbiUWySgI_LH4,3244
75
+ tests/test_net_utils.py,sha256=QxZvZ5dPt-Of4zTOKe3_4Lv1VTZI3ClHiJWt2NJXZd0,4708
76
+ tests/test_netcontextr.py,sha256=PKH0D-8EL0HNrCMtF-fAaYv5Lao4mwVPDZLQ5LHJXqc,3399
77
+ tests/test_obo.py,sha256=47qNCElPzu2nA36Oq83Dqp1RGhITqztjl7UyZ5cMsj4,959
78
+ tests/test_pathwayannot.py,sha256=bceosccNy9tgxQei_7j7ATBSSvBSxOngJvK-mAzR_K0,3312
79
+ tests/test_precomputed_distances.py,sha256=ht7lVz0wGOOQl9UTI1o9ftm0Dk7q8E40UV2jxVmE-Tg,7203
80
+ tests/test_rpy2.py,sha256=beihvGlWsQA9U7V3tfqBIOUL-S8m8Nj84Bg2Wt2sNH8,1491
81
+ tests/test_sbml.py,sha256=oEngnwT19GVnPYeoqoTU_p2o3wVFoHXQB50IwO5Ru_M,1319
82
+ tests/test_sbml_dfs_core.py,sha256=iF_xDrfWFIoxYUjFJY7tUSK4Yw-Vi6YWNockMqhGvUA,11953
83
+ tests/test_sbml_dfs_utils.py,sha256=onFWdhrTix30XR1-CMrMXld37BYxEGi6TZrweugLDzI,505
84
+ tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
85
+ tests/test_set_coverage.py,sha256=gM6Zl3MhVRHUi0_z0ISqpeXckWT8XdpXb58ipCoWyHU,1606
86
+ tests/test_source.py,sha256=hT0IlpexR5zP0OhWl5BBaho9d1aCYQlFZLwRIRRnw_Y,1969
87
+ tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRax8c,1289
88
+ tests/test_utils.py,sha256=B9frW_ugWtGWsM-q7Lw7pnfE9a_d6LZTow9BrWU9UDw,17939
89
+ tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
90
+ tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
+ napistu-0.2.4.dev2.dist-info/METADATA,sha256=oG0eycgcedtrx6Fv4W7f-VkCYViLY5iaQNwi1_I9ZT8,3202
92
+ napistu-0.2.4.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
93
+ napistu-0.2.4.dev2.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
94
+ napistu-0.2.4.dev2.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
95
+ napistu-0.2.4.dev2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5