napistu 0.1.0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. napistu/__init__.py +1 -1
  2. napistu/consensus.py +1010 -513
  3. napistu/constants.py +24 -0
  4. napistu/gcs/constants.py +2 -2
  5. napistu/gcs/downloads.py +57 -25
  6. napistu/gcs/utils.py +21 -0
  7. napistu/identifiers.py +105 -6
  8. napistu/ingestion/constants.py +0 -1
  9. napistu/ingestion/obo.py +24 -8
  10. napistu/ingestion/psi_mi.py +20 -5
  11. napistu/ingestion/reactome.py +8 -32
  12. napistu/mcp/__init__.py +69 -0
  13. napistu/mcp/__main__.py +180 -0
  14. napistu/mcp/codebase.py +182 -0
  15. napistu/mcp/codebase_utils.py +298 -0
  16. napistu/mcp/constants.py +72 -0
  17. napistu/mcp/documentation.py +166 -0
  18. napistu/mcp/documentation_utils.py +235 -0
  19. napistu/mcp/execution.py +382 -0
  20. napistu/mcp/profiles.py +73 -0
  21. napistu/mcp/server.py +86 -0
  22. napistu/mcp/tutorials.py +124 -0
  23. napistu/mcp/tutorials_utils.py +230 -0
  24. napistu/mcp/utils.py +47 -0
  25. napistu/mechanism_matching.py +782 -26
  26. napistu/modify/constants.py +41 -0
  27. napistu/modify/curation.py +4 -1
  28. napistu/modify/gaps.py +243 -156
  29. napistu/modify/pathwayannot.py +26 -8
  30. napistu/network/neighborhoods.py +16 -7
  31. napistu/network/net_create.py +209 -54
  32. napistu/network/net_propagation.py +118 -0
  33. napistu/network/net_utils.py +1 -32
  34. napistu/rpy2/netcontextr.py +10 -7
  35. napistu/rpy2/rids.py +7 -5
  36. napistu/sbml_dfs_core.py +46 -29
  37. napistu/sbml_dfs_utils.py +37 -1
  38. napistu/source.py +8 -2
  39. napistu/utils.py +67 -8
  40. napistu-0.2.4.dev2.dist-info/METADATA +84 -0
  41. napistu-0.2.4.dev2.dist-info/RECORD +95 -0
  42. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/WHEEL +1 -1
  43. tests/conftest.py +11 -5
  44. tests/test_consensus.py +4 -1
  45. tests/test_gaps.py +127 -0
  46. tests/test_gcs.py +3 -2
  47. tests/test_igraph.py +14 -0
  48. tests/test_mcp_documentation_utils.py +13 -0
  49. tests/test_mechanism_matching.py +658 -0
  50. tests/test_net_propagation.py +89 -0
  51. tests/test_net_utils.py +83 -0
  52. tests/test_sbml.py +2 -0
  53. tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
  54. tests/test_utils.py +81 -0
  55. napistu-0.1.0.dist-info/METADATA +0 -56
  56. napistu-0.1.0.dist-info/RECORD +0 -77
  57. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
  58. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
  59. {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/top_level.txt +0 -0
@@ -24,6 +24,8 @@ from napistu.constants import ONTOLOGIES
24
24
  from napistu.constants import ENSEMBL_PREFIX_TO_ONTOLOGY
25
25
  from napistu.modify.constants import COFACTOR_SCHEMA
26
26
  from napistu.modify.constants import COFACTOR_CHEBI_IDS
27
+ from napistu.modify.constants import NEO4_MEMBERS_SET
28
+ from napistu.modify.constants import REACTOME_CROSSREF_SET
27
29
 
28
30
  logger = logging.getLogger(__name__)
29
31
 
@@ -499,12 +501,14 @@ def add_entity_sets(
499
501
  reactome_members = _read_neo4j_members(neo4j_members)
500
502
 
501
503
  # create missing species and compartmentalized species
504
+ logger.info("Adding entity set species")
502
505
  (
503
506
  merged_membership,
504
507
  new_species_for_sbml_dfs,
505
508
  set_component_species_ids,
506
509
  ) = _add_entity_sets_species(sbml_dfs, reactome_members)
507
510
 
511
+ logger.info("Adding complex formation species")
508
512
  (
509
513
  new_compartmentalized_species_for_sbml_dfs,
510
514
  updated_compartmentalized_membership,
@@ -515,6 +519,7 @@ def add_entity_sets(
515
519
  set_component_species_ids,
516
520
  )
517
521
 
522
+ logger.info("Adding entity set reactions")
518
523
  (
519
524
  new_reactions_for_sbml_dfs,
520
525
  new_reaction_species_for_sbml_dfs,
@@ -569,7 +574,8 @@ def add_reactome_identifiers(
569
574
 
570
575
  """
571
576
 
572
- select_reactome_ids = _format_reactome_crossref_ids(crossref_path)
577
+ logger.info("Reading Reactome crossref ids")
578
+ select_reactome_ids = _read_reactome_crossref_ids(crossref_path)
573
579
 
574
580
  # read all current identifiers
575
581
  current_ids = sbml_dfs.get_identifiers(SBML_DFS.SPECIES)
@@ -582,6 +588,7 @@ def add_reactome_identifiers(
582
588
  )
583
589
 
584
590
  # combine existing s_ids with additional cross-ref annotations using uniprot ids
591
+ logger.info("Merging Reactome crossref ids with existing identifiers")
585
592
  merged_crossrefs = _merge_reactome_crossref_ids(
586
593
  current_molecular_ids, select_reactome_ids
587
594
  )
@@ -637,7 +644,8 @@ def add_reactome_identifiers(
637
644
  updated_identifiers.index.name = SBML_DFS.S_ID
638
645
  updated_identifiers.name = "new_Identifiers"
639
646
 
640
- # add new identifiers to species table
647
+ # add new identifiers to species tabl
648
+ logger.info("Adding new identifiers to species table")
641
649
  updated_species = sbml_dfs.species.merge(
642
650
  updated_identifiers,
643
651
  left_index=True,
@@ -1173,6 +1181,9 @@ def _read_neo4j_members(neo4j_members: str) -> pd.DataFrame:
1173
1181
  with bfs.open(path, "rb") as f:
1174
1182
  reactome_members = pd.read_csv(f).assign(url="")
1175
1183
 
1184
+ # check that the expected columns are present
1185
+ utils.match_pd_vars(reactome_members, NEO4_MEMBERS_SET).assert_present()
1186
+
1176
1187
  reactome_members[IDENTIFIERS.ONTOLOGY] = reactome_members[
1177
1188
  IDENTIFIERS.ONTOLOGY
1178
1189
  ].str.lower()
@@ -1295,9 +1306,13 @@ def _merge_reactome_crossref_ids(
1295
1306
  ].drop("_merge", axis=1)
1296
1307
 
1297
1308
  merged_crossrefs = pd.concat([uni_rct_with_crossrefs, uni_no_rct_with_crossrefs])
1298
- assert (
1299
- uni_rct_with_crossrefs.shape[0] + uni_no_rct_with_crossrefs.shape[0]
1300
- ) == merged_crossrefs.shape[0]
1309
+ if (
1310
+ not (uni_rct_with_crossrefs.shape[0] + uni_no_rct_with_crossrefs.shape[0])
1311
+ == merged_crossrefs.shape[0]
1312
+ ):
1313
+ raise ValueError(
1314
+ "The number of merged crossrefs does not match the sum of the number of uniprot + reactome crossrefs and uniprot-only crossrefs"
1315
+ )
1301
1316
 
1302
1317
  species_with_protein_and_no_gene = current_molecular_ids[
1303
1318
  current_molecular_ids[IDENTIFIERS.ONTOLOGY] == ONTOLOGIES.UNIPROT
@@ -1332,9 +1347,9 @@ def _merge_reactome_crossref_ids(
1332
1347
  return merged_crossrefs
1333
1348
 
1334
1349
 
1335
- def _format_reactome_crossref_ids(
1350
+ def _read_reactome_crossref_ids(
1336
1351
  crossref_path: str,
1337
- ) -> str:
1352
+ ) -> pd.DataFrame:
1338
1353
  """
1339
1354
  Format Reactome CrossRef IDs
1340
1355
 
@@ -1347,7 +1362,7 @@ def _format_reactome_crossref_ids(
1347
1362
 
1348
1363
  Returns
1349
1364
  -------
1350
- select_reactome_ids: str
1365
+ select_reactome_ids: pd.DataFrame
1351
1366
  Crossref identifiers
1352
1367
 
1353
1368
  """
@@ -1357,6 +1372,9 @@ def _format_reactome_crossref_ids(
1357
1372
  with bfs.open(path, "rb") as f:
1358
1373
  reactome_ids = pd.read_csv(f)
1359
1374
 
1375
+ # check that the expected columns are present
1376
+ utils.match_pd_vars(reactome_ids, REACTOME_CROSSREF_SET).assert_present()
1377
+
1360
1378
  # only use ensembl and pharos for now
1361
1379
 
1362
1380
  # rename pharos ontology
@@ -84,7 +84,8 @@ def find_and_prune_neighborhoods(
84
84
 
85
85
  if isinstance(compartmentalized_species, str):
86
86
  compartmentalized_species = [compartmentalized_species]
87
- assert isinstance(compartmentalized_species, list)
87
+ if not isinstance(compartmentalized_species, list):
88
+ raise TypeError("compartmentalized_species must be a list")
88
89
 
89
90
  if isinstance(precomputed_distances, pd.DataFrame):
90
91
  logger.info("Pre-computed neighbors based on precomputed_distances")
@@ -832,8 +833,8 @@ def add_vertices_uri_urls(
832
833
 
833
834
  """
834
835
 
835
- assert isinstance(vertices, pd.DataFrame)
836
- assert vertices.shape[0] > 0
836
+ if vertices.shape[0] <= 0:
837
+ raise ValueError("vertices must have at least one row")
837
838
 
838
839
  # add uri urls for each node
839
840
 
@@ -880,7 +881,8 @@ def add_vertices_uri_urls(
880
881
  [neighborhood_species_aug, neighborhood_reactions]
881
882
  ).fillna("")
882
883
 
883
- assert isinstance(updated_vertices, pd.DataFrame)
884
+ if not isinstance(updated_vertices, pd.DataFrame):
885
+ raise TypeError("updated_vertices must be a pandas DataFrame")
884
886
  if vertices.shape[0] != updated_vertices.shape[0]:
885
887
  raise ValueError("output vertices rows did not match input")
886
888
 
@@ -1532,7 +1534,8 @@ def _calculate_path_attrs(
1532
1534
  raise TypeError("neighborhood_paths should be a list of lists of edge indices")
1533
1535
  if not isinstance(vertices, list):
1534
1536
  raise TypeError("vertices should be a list of list of vertices")
1535
- assert len(vertices) > 0 # control for length zero vertices upstream
1537
+ if len(vertices) <= 0:
1538
+ raise ValueError("vertices must have length greater than zero")
1536
1539
  if len(neighborhood_paths) != len(vertices):
1537
1540
  raise ValueError("vertices and neighborhood_paths were not the same length")
1538
1541
 
@@ -1588,7 +1591,13 @@ def _calculate_path_attrs(
1588
1591
  path_attributes_df = pd.concat([path_attributes_df, edgeles_nodes_df])
1589
1592
  neighborhood_path_entities.update({x: {x} for x in edgeless_nodes})
1590
1593
 
1591
- assert path_attributes_df.shape[0] == len(neighborhood_paths)
1592
- assert len(neighborhood_path_entities) == len(neighborhood_paths)
1594
+ if path_attributes_df.shape[0] != len(neighborhood_paths):
1595
+ raise ValueError(
1596
+ "path_attributes_df row count must match number of neighborhood_paths"
1597
+ )
1598
+ if len(neighborhood_path_entities) != len(neighborhood_paths):
1599
+ raise ValueError(
1600
+ "neighborhood_path_entities length must match number of neighborhood_paths"
1601
+ )
1593
1602
 
1594
1603
  return path_attributes_df, neighborhood_path_entities