napistu 0.1.0__py3-none-any.whl → 0.2.4.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/__init__.py +1 -1
- napistu/consensus.py +1010 -513
- napistu/constants.py +24 -0
- napistu/gcs/constants.py +2 -2
- napistu/gcs/downloads.py +57 -25
- napistu/gcs/utils.py +21 -0
- napistu/identifiers.py +105 -6
- napistu/ingestion/constants.py +0 -1
- napistu/ingestion/obo.py +24 -8
- napistu/ingestion/psi_mi.py +20 -5
- napistu/ingestion/reactome.py +8 -32
- napistu/mcp/__init__.py +69 -0
- napistu/mcp/__main__.py +180 -0
- napistu/mcp/codebase.py +182 -0
- napistu/mcp/codebase_utils.py +298 -0
- napistu/mcp/constants.py +72 -0
- napistu/mcp/documentation.py +166 -0
- napistu/mcp/documentation_utils.py +235 -0
- napistu/mcp/execution.py +382 -0
- napistu/mcp/profiles.py +73 -0
- napistu/mcp/server.py +86 -0
- napistu/mcp/tutorials.py +124 -0
- napistu/mcp/tutorials_utils.py +230 -0
- napistu/mcp/utils.py +47 -0
- napistu/mechanism_matching.py +782 -26
- napistu/modify/constants.py +41 -0
- napistu/modify/curation.py +4 -1
- napistu/modify/gaps.py +243 -156
- napistu/modify/pathwayannot.py +26 -8
- napistu/network/neighborhoods.py +16 -7
- napistu/network/net_create.py +209 -54
- napistu/network/net_propagation.py +118 -0
- napistu/network/net_utils.py +1 -32
- napistu/rpy2/netcontextr.py +10 -7
- napistu/rpy2/rids.py +7 -5
- napistu/sbml_dfs_core.py +46 -29
- napistu/sbml_dfs_utils.py +37 -1
- napistu/source.py +8 -2
- napistu/utils.py +67 -8
- napistu-0.2.4.dev2.dist-info/METADATA +84 -0
- napistu-0.2.4.dev2.dist-info/RECORD +95 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/WHEEL +1 -1
- tests/conftest.py +11 -5
- tests/test_consensus.py +4 -1
- tests/test_gaps.py +127 -0
- tests/test_gcs.py +3 -2
- tests/test_igraph.py +14 -0
- tests/test_mcp_documentation_utils.py +13 -0
- tests/test_mechanism_matching.py +658 -0
- tests/test_net_propagation.py +89 -0
- tests/test_net_utils.py +83 -0
- tests/test_sbml.py +2 -0
- tests/{test_sbml_dfs_create.py → test_sbml_dfs_core.py} +68 -4
- tests/test_utils.py +81 -0
- napistu-0.1.0.dist-info/METADATA +0 -56
- napistu-0.1.0.dist-info/RECORD +0 -77
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/entry_points.txt +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.1.0.dist-info → napistu-0.2.4.dev2.dist-info}/top_level.txt +0 -0
napistu/modify/pathwayannot.py
CHANGED
@@ -24,6 +24,8 @@ from napistu.constants import ONTOLOGIES
|
|
24
24
|
from napistu.constants import ENSEMBL_PREFIX_TO_ONTOLOGY
|
25
25
|
from napistu.modify.constants import COFACTOR_SCHEMA
|
26
26
|
from napistu.modify.constants import COFACTOR_CHEBI_IDS
|
27
|
+
from napistu.modify.constants import NEO4_MEMBERS_SET
|
28
|
+
from napistu.modify.constants import REACTOME_CROSSREF_SET
|
27
29
|
|
28
30
|
logger = logging.getLogger(__name__)
|
29
31
|
|
@@ -499,12 +501,14 @@ def add_entity_sets(
|
|
499
501
|
reactome_members = _read_neo4j_members(neo4j_members)
|
500
502
|
|
501
503
|
# create missing species and compartmentalized species
|
504
|
+
logger.info("Adding entity set species")
|
502
505
|
(
|
503
506
|
merged_membership,
|
504
507
|
new_species_for_sbml_dfs,
|
505
508
|
set_component_species_ids,
|
506
509
|
) = _add_entity_sets_species(sbml_dfs, reactome_members)
|
507
510
|
|
511
|
+
logger.info("Adding complex formation species")
|
508
512
|
(
|
509
513
|
new_compartmentalized_species_for_sbml_dfs,
|
510
514
|
updated_compartmentalized_membership,
|
@@ -515,6 +519,7 @@ def add_entity_sets(
|
|
515
519
|
set_component_species_ids,
|
516
520
|
)
|
517
521
|
|
522
|
+
logger.info("Adding entity set reactions")
|
518
523
|
(
|
519
524
|
new_reactions_for_sbml_dfs,
|
520
525
|
new_reaction_species_for_sbml_dfs,
|
@@ -569,7 +574,8 @@ def add_reactome_identifiers(
|
|
569
574
|
|
570
575
|
"""
|
571
576
|
|
572
|
-
|
577
|
+
logger.info("Reading Reactome crossref ids")
|
578
|
+
select_reactome_ids = _read_reactome_crossref_ids(crossref_path)
|
573
579
|
|
574
580
|
# read all current identifiers
|
575
581
|
current_ids = sbml_dfs.get_identifiers(SBML_DFS.SPECIES)
|
@@ -582,6 +588,7 @@ def add_reactome_identifiers(
|
|
582
588
|
)
|
583
589
|
|
584
590
|
# combine existing s_ids with additional cross-ref annotations using uniprot ids
|
591
|
+
logger.info("Merging Reactome crossref ids with existing identifiers")
|
585
592
|
merged_crossrefs = _merge_reactome_crossref_ids(
|
586
593
|
current_molecular_ids, select_reactome_ids
|
587
594
|
)
|
@@ -637,7 +644,8 @@ def add_reactome_identifiers(
|
|
637
644
|
updated_identifiers.index.name = SBML_DFS.S_ID
|
638
645
|
updated_identifiers.name = "new_Identifiers"
|
639
646
|
|
640
|
-
# add new identifiers to species
|
647
|
+
# add new identifiers to species tabl
|
648
|
+
logger.info("Adding new identifiers to species table")
|
641
649
|
updated_species = sbml_dfs.species.merge(
|
642
650
|
updated_identifiers,
|
643
651
|
left_index=True,
|
@@ -1173,6 +1181,9 @@ def _read_neo4j_members(neo4j_members: str) -> pd.DataFrame:
|
|
1173
1181
|
with bfs.open(path, "rb") as f:
|
1174
1182
|
reactome_members = pd.read_csv(f).assign(url="")
|
1175
1183
|
|
1184
|
+
# check that the expected columns are present
|
1185
|
+
utils.match_pd_vars(reactome_members, NEO4_MEMBERS_SET).assert_present()
|
1186
|
+
|
1176
1187
|
reactome_members[IDENTIFIERS.ONTOLOGY] = reactome_members[
|
1177
1188
|
IDENTIFIERS.ONTOLOGY
|
1178
1189
|
].str.lower()
|
@@ -1295,9 +1306,13 @@ def _merge_reactome_crossref_ids(
|
|
1295
1306
|
].drop("_merge", axis=1)
|
1296
1307
|
|
1297
1308
|
merged_crossrefs = pd.concat([uni_rct_with_crossrefs, uni_no_rct_with_crossrefs])
|
1298
|
-
|
1299
|
-
uni_rct_with_crossrefs.shape[0] + uni_no_rct_with_crossrefs.shape[0]
|
1300
|
-
|
1309
|
+
if (
|
1310
|
+
not (uni_rct_with_crossrefs.shape[0] + uni_no_rct_with_crossrefs.shape[0])
|
1311
|
+
== merged_crossrefs.shape[0]
|
1312
|
+
):
|
1313
|
+
raise ValueError(
|
1314
|
+
"The number of merged crossrefs does not match the sum of the number of uniprot + reactome crossrefs and uniprot-only crossrefs"
|
1315
|
+
)
|
1301
1316
|
|
1302
1317
|
species_with_protein_and_no_gene = current_molecular_ids[
|
1303
1318
|
current_molecular_ids[IDENTIFIERS.ONTOLOGY] == ONTOLOGIES.UNIPROT
|
@@ -1332,9 +1347,9 @@ def _merge_reactome_crossref_ids(
|
|
1332
1347
|
return merged_crossrefs
|
1333
1348
|
|
1334
1349
|
|
1335
|
-
def
|
1350
|
+
def _read_reactome_crossref_ids(
|
1336
1351
|
crossref_path: str,
|
1337
|
-
) ->
|
1352
|
+
) -> pd.DataFrame:
|
1338
1353
|
"""
|
1339
1354
|
Format Reactome CrossRef IDs
|
1340
1355
|
|
@@ -1347,7 +1362,7 @@ def _format_reactome_crossref_ids(
|
|
1347
1362
|
|
1348
1363
|
Returns
|
1349
1364
|
-------
|
1350
|
-
select_reactome_ids:
|
1365
|
+
select_reactome_ids: pd.DataFrame
|
1351
1366
|
Crossref identifiers
|
1352
1367
|
|
1353
1368
|
"""
|
@@ -1357,6 +1372,9 @@ def _format_reactome_crossref_ids(
|
|
1357
1372
|
with bfs.open(path, "rb") as f:
|
1358
1373
|
reactome_ids = pd.read_csv(f)
|
1359
1374
|
|
1375
|
+
# check that the expected columns are present
|
1376
|
+
utils.match_pd_vars(reactome_ids, REACTOME_CROSSREF_SET).assert_present()
|
1377
|
+
|
1360
1378
|
# only use ensembl and pharos for now
|
1361
1379
|
|
1362
1380
|
# rename pharos ontology
|
napistu/network/neighborhoods.py
CHANGED
@@ -84,7 +84,8 @@ def find_and_prune_neighborhoods(
|
|
84
84
|
|
85
85
|
if isinstance(compartmentalized_species, str):
|
86
86
|
compartmentalized_species = [compartmentalized_species]
|
87
|
-
|
87
|
+
if not isinstance(compartmentalized_species, list):
|
88
|
+
raise TypeError("compartmentalized_species must be a list")
|
88
89
|
|
89
90
|
if isinstance(precomputed_distances, pd.DataFrame):
|
90
91
|
logger.info("Pre-computed neighbors based on precomputed_distances")
|
@@ -832,8 +833,8 @@ def add_vertices_uri_urls(
|
|
832
833
|
|
833
834
|
"""
|
834
835
|
|
835
|
-
|
836
|
-
|
836
|
+
if vertices.shape[0] <= 0:
|
837
|
+
raise ValueError("vertices must have at least one row")
|
837
838
|
|
838
839
|
# add uri urls for each node
|
839
840
|
|
@@ -880,7 +881,8 @@ def add_vertices_uri_urls(
|
|
880
881
|
[neighborhood_species_aug, neighborhood_reactions]
|
881
882
|
).fillna("")
|
882
883
|
|
883
|
-
|
884
|
+
if not isinstance(updated_vertices, pd.DataFrame):
|
885
|
+
raise TypeError("updated_vertices must be a pandas DataFrame")
|
884
886
|
if vertices.shape[0] != updated_vertices.shape[0]:
|
885
887
|
raise ValueError("output vertices rows did not match input")
|
886
888
|
|
@@ -1532,7 +1534,8 @@ def _calculate_path_attrs(
|
|
1532
1534
|
raise TypeError("neighborhood_paths should be a list of lists of edge indices")
|
1533
1535
|
if not isinstance(vertices, list):
|
1534
1536
|
raise TypeError("vertices should be a list of list of vertices")
|
1535
|
-
|
1537
|
+
if len(vertices) <= 0:
|
1538
|
+
raise ValueError("vertices must have length greater than zero")
|
1536
1539
|
if len(neighborhood_paths) != len(vertices):
|
1537
1540
|
raise ValueError("vertices and neighborhood_paths were not the same length")
|
1538
1541
|
|
@@ -1588,7 +1591,13 @@ def _calculate_path_attrs(
|
|
1588
1591
|
path_attributes_df = pd.concat([path_attributes_df, edgeles_nodes_df])
|
1589
1592
|
neighborhood_path_entities.update({x: {x} for x in edgeless_nodes})
|
1590
1593
|
|
1591
|
-
|
1592
|
-
|
1594
|
+
if path_attributes_df.shape[0] != len(neighborhood_paths):
|
1595
|
+
raise ValueError(
|
1596
|
+
"path_attributes_df row count must match number of neighborhood_paths"
|
1597
|
+
)
|
1598
|
+
if len(neighborhood_path_entities) != len(neighborhood_paths):
|
1599
|
+
raise ValueError(
|
1600
|
+
"neighborhood_path_entities length must match number of neighborhood_paths"
|
1601
|
+
)
|
1593
1602
|
|
1594
1603
|
return path_attributes_df, neighborhood_path_entities
|