PyPI - napistu - Versions diffs - 0.1.0__py3-none-any.whl - Mend

napistu 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

napistu/__init__.py +12 -0
napistu/__main__.py +867 -0
napistu/consensus.py +1557 -0
napistu/constants.py +500 -0
napistu/gcs/__init__.py +10 -0
napistu/gcs/constants.py +69 -0
napistu/gcs/downloads.py +180 -0
napistu/identifiers.py +805 -0
napistu/indices.py +227 -0
napistu/ingestion/__init__.py +10 -0
napistu/ingestion/bigg.py +146 -0
napistu/ingestion/constants.py +296 -0
napistu/ingestion/cpr_edgelist.py +106 -0
napistu/ingestion/identifiers_etl.py +148 -0
napistu/ingestion/obo.py +268 -0
napistu/ingestion/psi_mi.py +276 -0
napistu/ingestion/reactome.py +218 -0
napistu/ingestion/sbml.py +621 -0
napistu/ingestion/string.py +356 -0
napistu/ingestion/trrust.py +285 -0
napistu/ingestion/yeast.py +147 -0
napistu/mechanism_matching.py +597 -0
napistu/modify/__init__.py +10 -0
napistu/modify/constants.py +86 -0
napistu/modify/curation.py +628 -0
napistu/modify/gaps.py +635 -0
napistu/modify/pathwayannot.py +1381 -0
napistu/modify/uncompartmentalize.py +264 -0
napistu/network/__init__.py +10 -0
napistu/network/constants.py +117 -0
napistu/network/neighborhoods.py +1594 -0
napistu/network/net_create.py +1647 -0
napistu/network/net_utils.py +652 -0
napistu/network/paths.py +500 -0
napistu/network/precompute.py +221 -0
napistu/rpy2/__init__.py +127 -0
napistu/rpy2/callr.py +168 -0
napistu/rpy2/constants.py +101 -0
napistu/rpy2/netcontextr.py +464 -0
napistu/rpy2/rids.py +697 -0
napistu/sbml_dfs_core.py +2216 -0
napistu/sbml_dfs_utils.py +304 -0
napistu/source.py +394 -0
napistu/utils.py +943 -0
napistu-0.1.0.dist-info/METADATA +56 -0
napistu-0.1.0.dist-info/RECORD +77 -0
napistu-0.1.0.dist-info/WHEEL +5 -0
napistu-0.1.0.dist-info/entry_points.txt +2 -0
napistu-0.1.0.dist-info/licenses/LICENSE +21 -0
napistu-0.1.0.dist-info/top_level.txt +2 -0
tests/__init__.py +0 -0
tests/conftest.py +83 -0
tests/test_consensus.py +255 -0
tests/test_constants.py +20 -0
tests/test_curation.py +134 -0
tests/test_data/__init__.py +0 -0
tests/test_edgelist.py +20 -0
tests/test_gcs.py +23 -0
tests/test_identifiers.py +151 -0
tests/test_igraph.py +353 -0
tests/test_indices.py +88 -0
tests/test_mechanism_matching.py +126 -0
tests/test_net_utils.py +66 -0
tests/test_netcontextr.py +105 -0
tests/test_obo.py +34 -0
tests/test_pathwayannot.py +95 -0
tests/test_precomputed_distances.py +222 -0
tests/test_rpy2.py +61 -0
tests/test_sbml.py +46 -0
tests/test_sbml_dfs_create.py +307 -0
tests/test_sbml_dfs_utils.py +22 -0
tests/test_sbo.py +11 -0
tests/test_set_coverage.py +50 -0
tests/test_source.py +67 -0
tests/test_uncompartmentalize.py +40 -0
tests/test_utils.py +487 -0
tests/utils.py +30 -0

napistu-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,56 @@
+Metadata-Version: 2.4
+Name: napistu
+Version: 0.1.0
+Summary: Connecting high-dimensional data to curated pathways
+Home-page: https://github.com/napistu/napistu-py
+Author: Sean Hackett
+Author-email: seanmchackett@gmail.com
+Project-URL: Bug Tracker, https://github.com/napistu/napistu-py/issues
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: Jinja2
+Requires-Dist: PyYAML==6.*
+Requires-Dist: click==8.*
+Requires-Dist: click-logging
+Requires-Dist: fs==2.4.*
+Requires-Dist: fs-gcsfs==1.5.*
+Requires-Dist: igraph
+Requires-Dist: matplotlib==3.*
+Requires-Dist: numpy==1.26.*
+Requires-Dist: pandas==1.5.*
+Requires-Dist: pydantic==2.*
+Requires-Dist: python-libsbml
+Requires-Dist: requests>=2
+Requires-Dist: scipy==1.14.*
+Requires-Dist: tqdm
+Requires-Dist: zeep==3.*
+Provides-Extra: dev
+Requires-Dist: black==25.*; extra == "dev"
+Requires-Dist: ipykernel; extra == "dev"
+Requires-Dist: pre-commit==3.3.*; extra == "dev"
+Requires-Dist: pytest==7.*; extra == "dev"
+Requires-Dist: pytest-cov; extra == "dev"
+Requires-Dist: ruff; extra == "dev"
+Requires-Dist: testcontainers; extra == "dev"
+Provides-Extra: rpy2
+Requires-Dist: pyarrow==18.0.0; extra == "rpy2"
+Requires-Dist: rpy2==3.5.*; extra == "rpy2"
+Requires-Dist: rpy2-arrow==0.1.1; extra == "rpy2"
+Dynamic: license-file
+# Napistu Python Library
+This Python package hosts the majority of the algorithmic code for the [Napistu project](https://github.com/napistu/napistu).
+## Setup
+Currently the only way to use this repository is to clone the repo and perform a local install. e.g., from this directory:
+```bash
+pip install .
+```

napistu-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,77 @@
+napistu/__init__.py,sha256=rz6NdV9Fm6a6bBR17VQPHeJQD4DUZWC7zR9a7nNMOhw,269
+napistu/__main__.py,sha256=i1OyReHD58GjyGYShXmMuBfA0VoGBF9dirg2nA4JCa8,28334
+napistu/consensus.py,sha256=p8GjWFzq1cvBB-H-LVSatWL_9fxbdYq2wsF4-JZnc_M,54641
+napistu/constants.py,sha256=hQ1OLH07xFTxMukJLCptzqqHk22vgrByej8lvMb2qbc,14702
+napistu/identifiers.py,sha256=wque0qsMZK2AMsAhkF1ERSMrEF7h6b5SMp3iqVu8e1o,28796
+napistu/indices.py,sha256=UeJjjsYs0sGvZIKz1y4ZQ6aUkABn-6TCUDZ2VCVT9JI,7534
+napistu/mechanism_matching.py,sha256=CPhtM6GERmGlBK8zH1cEvSpsasa0mG7ojLKDOze3dyE,21704
+napistu/sbml_dfs_core.py,sha256=iSng-3cpJVpVMb340YGM6s0pFBTA9SyYCdYvHdZRSMA,79387
+napistu/sbml_dfs_utils.py,sha256=j6Bu3acqOFSEbyVzASXhlnV8hQvi4k-vdMYzVMPzz5A,10318
+napistu/source.py,sha256=oBgw2OZLVBETQG8Mwoc5ZUe-6cg_Yt6Mxsto3fCdw1k,13386
+napistu/utils.py,sha256=G4IfG_WBSxkn5RBdPPn9sAkzrz2BdKgXlFFmMsB9wsA,28038
+napistu/gcs/__init__.py,sha256=1kqmRHlEyI7VpILzurZb1URwC_UIc1PTMEBHQnjXW6s,246
+napistu/gcs/constants.py,sha256=rc-oQBh6pdu7cjqTCerHG_fDub-FQcEjzWh2ic715cs,2844
+napistu/gcs/downloads.py,sha256=EiOxLW1MMexdPTSiakWknTB-BGY1y__s2n1z9Sd8VYM,5033
+napistu/ingestion/__init__.py,sha256=1kqmRHlEyI7VpILzurZb1URwC_UIc1PTMEBHQnjXW6s,246
+napistu/ingestion/bigg.py,sha256=XPJZv64mrIMCuKe1mjQfS5QPR9tmengGvndSjc3QFLA,5559
+napistu/ingestion/constants.py,sha256=TYATiVNrLyuQ1AvLVt35F1xQ8pQ3U19o_N6ZSkdW3PA,9941
+napistu/ingestion/cpr_edgelist.py,sha256=eVT9M7gmdBuGHcAYlvkD_zzvTtyzXufKWjwDiT8OxF4,3572
+napistu/ingestion/identifiers_etl.py,sha256=6ppDUA6lEZurdmVbiFLOUzphYbr-hndMhtqsQnq_yAc,5009
+napistu/ingestion/obo.py,sha256=pszLLfImZxDYjL3WQUCow2hQFURROGHqIq3qbgVtzAM,8836
+napistu/ingestion/psi_mi.py,sha256=Icj73EK75ytFPBw-TH2B6yW1ZWAmckmn5mtPl9pIxuA,9389
+napistu/ingestion/reactome.py,sha256=-Q3GsAsfVkTD7cDD1fLEEnWQbI6vs7nxsdYInk7ZvVE,7907
+napistu/ingestion/sbml.py,sha256=gK6_jHgo6oaiG16WlrbBSvxq_0VzFR4a5fG9IQrp5bU,24153
+napistu/ingestion/string.py,sha256=tsaHrjppgFbl9NnRcB2DytpoontqrpfQL65zD9HPgEM,11668
+napistu/ingestion/trrust.py,sha256=ccjZc_eF3PdxxurnukiEo_e0-aKc_3z22NYbaJBtHdY,9774
+napistu/ingestion/yeast.py,sha256=bwFBNxRq-dLDaddgBL1hpfZj0eQ56nBXyR_9n0NZT9Y,5233
+napistu/modify/__init__.py,sha256=1kqmRHlEyI7VpILzurZb1URwC_UIc1PTMEBHQnjXW6s,246
+napistu/modify/constants.py,sha256=KHigix_8A8kCLWYVGR9_6_n34UNDcq2guDLC1KLeNZ4,2648
+napistu/modify/curation.py,sha256=UNeAfJ26XDFvSwkPL8WHCAP0FQYiVUrSvJn3UIt5jy8,21607
+napistu/modify/gaps.py,sha256=XqwfvzgJywA7ws5hzDlj22xs5tRGc4xOdbQ2v51UJqc,23983
+napistu/modify/pathwayannot.py,sha256=onbQy9YNYPbeOih8fSxymxUQJc1jXjRIQOABv3xkvng,47183
+napistu/modify/uncompartmentalize.py,sha256=U5X4Q7Z-YIkC8_711x3sU21vTVdv9rKfauwz4JNzl6c,9690
+napistu/network/__init__.py,sha256=1kqmRHlEyI7VpILzurZb1URwC_UIc1PTMEBHQnjXW6s,246
+napistu/network/constants.py,sha256=jz8vRjgns74piUcvmoIP_f-8s9w15SxWAEw2lf6XmDY,3661
+napistu/network/neighborhoods.py,sha256=TopPpcUD09bAfJuT_L4dkHwJhV1VJJlzXpyzldYi85A,55512
+napistu/network/net_create.py,sha256=9Rb5I6uLlL50SNADucsT_90F9k7rzmp2EQfEgNlr37E,60343
+napistu/network/net_utils.py,sha256=cMWLOHlz4XvPA8PlPiSFYNDjAEv4t1qlZxwabBaZrK8,21188
+napistu/network/paths.py,sha256=a2J3JWIdMufdNs8Amh6I7s3TOVD2EzLV9khqbWHvGlA,16652
+napistu/network/precompute.py,sha256=83Vr2pxCmEtJJmE_Lq1BI-pEmESDNG0N7vByXjBf_oQ,7517
+napistu/rpy2/__init__.py,sha256=B9tZHiEp6bvysjqvBRQ1aGY493Ks9kouwb0pW7KsKqA,4100
+napistu/rpy2/callr.py,sha256=76ICWj7Jso-qrYLNfiV-DgPyrMTdRXz_EhyGOD9CbFM,4301
+napistu/rpy2/constants.py,sha256=JpJqsxImZis8fFFfePXYdbkhUZhXDZoHSHVf92w1h8U,2619
+napistu/rpy2/netcontextr.py,sha256=gkpBgrASNeH_8IjFyY-Tj-S87HjNOkGdfMta0WRdEnU,16278
+napistu/rpy2/rids.py,sha256=sGMTRuOQRDpHBHZwfTS7uKUW9TBI_yMpht6SFhup8vw,23937
+napistu-0.1.0.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
+tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+tests/conftest.py,sha256=uDuqgZKR37csoUI4t4U-pzEoANMFAIyTA4k8XLRu_Bw,2572
+tests/test_consensus.py,sha256=u_5Hyocz2peNbJMd5ydgoAwQ4il0lFm-PrzrckTCitI,9403
+tests/test_constants.py,sha256=gJLDv7QMeeBiiupyMazj6mumk20KWvGMgm2myHMKKfc,531
+tests/test_curation.py,sha256=-Q2J0D7qs9PGjHZX-rM4NxRLLdwxoapytSo_98q9ItY,3864
+tests/test_edgelist.py,sha256=bdEtQJdd4MeQsNtng9afHYNVDsEy0U07sfVwguAdIBM,560
+tests/test_gcs.py,sha256=sq-zIDfmLIpZ5oFKCmyuaw9tfSzAY5hSnpuN-xqiqpk,561
+tests/test_identifiers.py,sha256=RyuPAMhYI8cDOl2r62idweLxgy7rAs9omeZQ62h56kY,5019
+tests/test_igraph.py,sha256=HoYeFAAFXWtxdonnUTpV59-jCLicNa0_utPaaGKXMAw,10926
+tests/test_indices.py,sha256=-TrKfX4qXsofg_TPQEhHaQc_CuQMEd4_0maJgGCgSfE,2468
+tests/test_mechanism_matching.py,sha256=gD_n2saM7yYa56QU0RMAYMKMAk7oF8ESbM7GHbI6bFY,4156
+tests/test_net_utils.py,sha256=4HqfFF6yycAz7oQYRz6MefzQVQ_ZjWpeUEA4lUDOMJc,1614
+tests/test_netcontextr.py,sha256=PKH0D-8EL0HNrCMtF-fAaYv5Lao4mwVPDZLQ5LHJXqc,3399
+tests/test_obo.py,sha256=47qNCElPzu2nA36Oq83Dqp1RGhITqztjl7UyZ5cMsj4,959
+tests/test_pathwayannot.py,sha256=bceosccNy9tgxQei_7j7ATBSSvBSxOngJvK-mAzR_K0,3312
+tests/test_precomputed_distances.py,sha256=ht7lVz0wGOOQl9UTI1o9ftm0Dk7q8E40UV2jxVmE-Tg,7203
+tests/test_rpy2.py,sha256=beihvGlWsQA9U7V3tfqBIOUL-S8m8Nj84Bg2Wt2sNH8,1491
+tests/test_sbml.py,sha256=w_VU06psAP0Ku3B0flbP4hKhBfx2ZWV3oOdUgWzrMP4,1276
+tests/test_sbml_dfs_create.py,sha256=w29mUcnC6g9Yqp8Q3b-oRQc5GiDvzjS5_GOE_LjwGZo,9982
+tests/test_sbml_dfs_utils.py,sha256=onFWdhrTix30XR1-CMrMXld37BYxEGi6TZrweugLDzI,505
+tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
+tests/test_set_coverage.py,sha256=gM6Zl3MhVRHUi0_z0ISqpeXckWT8XdpXb58ipCoWyHU,1606
+tests/test_source.py,sha256=hT0IlpexR5zP0OhWl5BBaho9d1aCYQlFZLwRIRRnw_Y,1969
+tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRax8c,1289
+tests/test_utils.py,sha256=knOWMN9xgaNLDj_4T_ZI3f22p1ZqovRLVDBFaMhOnFs,14845
+tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
+tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+napistu-0.1.0.dist-info/METADATA,sha256=bFJYn_d8Q0WfF5fpnE5tRZAqU3jNHuxXV0xyI-bt0yk,1830
+napistu-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+napistu-0.1.0.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
+napistu-0.1.0.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
+napistu-0.1.0.dist-info/RECORD,,

napistu-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (78.1.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

napistu-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ cpr = cpr.__main__:cli

napistu-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Calico
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

napistu-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ napistu
2	+ tests

tests/__init__.py ADDED Viewed

File without changes

tests/conftest.py ADDED Viewed

@@ -0,0 +1,83 @@
+from __future__ import annotations
+import os
+import sys
+from napistu import consensus
+from napistu import indices
+from napistu import sbml_dfs_core
+from napistu.ingestion import sbml
+from pytest import fixture
+from pytest import skip
+@fixture
+def sbml_path():
+    test_path = os.path.abspath(os.path.join(__file__, os.pardir))
+    sbml_path = os.path.join(test_path, "test_data", "R-HSA-1237044.sbml")
+    if not os.path.isfile(sbml_path):
+        raise ValueError(f"{sbml_path} not found")
+    return sbml_path
+@fixture
+def sbml_model(sbml_path):
+    sbml_model = sbml.SBML(sbml_path)
+    return sbml_model
+@fixture
+def sbml_dfs(sbml_model):
+    sbml_dfs = sbml_dfs_core.SBML_dfs(sbml_model)
+    return sbml_dfs
+@fixture
+def sbml_dfs_metabolism():
+    test_path = os.path.abspath(os.path.join(__file__, os.pardir))
+    test_data = os.path.join(test_path, "test_data")
+    pw_index = indices.PWIndex(os.path.join(test_data, "pw_index_metabolism.tsv"))
+    sbml_dfs_dict = consensus.construct_sbml_dfs_dict(pw_index)
+    sbml_dfs = consensus.construct_consensus_model(sbml_dfs_dict, pw_index)
+    return sbml_dfs
+@fixture
+def sbml_dfs_glucose_metabolism():
+    test_path = os.path.abspath(os.path.join(__file__, os.pardir))
+    test_data = os.path.join(test_path, "test_data")
+    sbml_path = os.path.join(test_data, "reactome_glucose_metabolism.sbml")
+    sbml_model = sbml.SBML(sbml_path).model
+    sbml_dfs = sbml_dfs_core.SBML_dfs(sbml_model)
+    return sbml_dfs
+# Define custom markers for platforms
+def pytest_configure(config):
+    config.addinivalue_line("markers", "skip_on_windows: mark test to skip on Windows")
+    config.addinivalue_line("markers", "skip_on_macos: mark test to skip on macOS")
+    config.addinivalue_line("markers", "unix_only: mark test to run only on Unix/Linux systems")
+# Define platform conditions
+is_windows = sys.platform == "win32"
+is_macos = sys.platform == "darwin"
+is_unix = not (is_windows or is_macos)
+# Apply skipping based on platform
+def pytest_runtest_setup(item):
+    # Skip tests marked to be skipped on Windows
+    if is_windows and any(mark.name == "skip_on_windows" for mark in item.iter_markers()):
+        skip("Test skipped on Windows")
+    # Skip tests marked to be skipped on macOS
+    if is_macos and any(mark.name == "skip_on_macos" for mark in item.iter_markers()):
+        skip("Test skipped on macOS")
+    # Skip tests that should run only on Unix
+    if not is_unix and any(mark.name == "unix_only" for mark in item.iter_markers()):
+        skip("Test runs only on Unix systems")

tests/test_consensus.py ADDED Viewed

@@ -0,0 +1,255 @@
+from __future__ import annotations
+import os
+import pandas as pd
+import pytest
+from napistu import consensus
+from napistu import indices
+from napistu import source
+from napistu.ingestion import sbml
+from napistu.modify import pathwayannot
+test_path = os.path.abspath(os.path.join(__file__, os.pardir))
+test_data = os.path.join(test_path, "test_data")
+def test_reduce_to_consensus_ids():
+    sbml_path = os.path.join(test_data, "R-HSA-1237044.sbml")
+    # test aggregating by IDs, by moving from compartmentalized_species -> species
+    sbml_model = sbml.SBML(sbml_path).model
+    comp_species_df = sbml.setup_cspecies(sbml_model)
+    comp_species_df.index.names = ["s_id"]
+    consensus_species, species_lookup = consensus.reduce_to_consensus_ids(
+        comp_species_df, {"pk": "s_id", "id": "s_Identifiers"}
+    )
+    assert isinstance(consensus_species, pd.DataFrame)
+    assert consensus_species.shape == (18, 4)
+    assert isinstance(species_lookup, pd.Series)
+    assert species_lookup.size == 23
+def test_consensus():
+    pw_index = indices.PWIndex(os.path.join(test_data, "pw_index.tsv"))
+    sbml_dfs_dict = consensus.construct_sbml_dfs_dict(pw_index)
+    consensus_model = consensus.construct_consensus_model(sbml_dfs_dict, pw_index)
+    assert consensus_model.species.shape == (38, 3)
+    assert consensus_model.reactions.shape == (30, 4)
+    assert consensus_model.reaction_species.shape == (137, 4)
+    consensus_model = pathwayannot.drop_cofactors(consensus_model)
+    assert consensus_model.species.shape == (38, 3)
+    assert consensus_model.reaction_species.shape == (52, 4)
+    # update reaction_species.shape after more cofactors identified
+    consensus_model.validate()
+def test_source_tracking():
+    # create input data
+    table_schema = {"source": "source_var", "pk": "primary_key"}
+    # define existing sources and the new_id entity they belong to
+    # here, we are assuming that each model has a blank source object
+    # as if it came from a non-consensus model
+    agg_tbl = pd.DataFrame(
+        {
+            "new_id": [0, 0, 1, 1],
+        }
+    )
+    agg_tbl[table_schema["source"]] = source.Source(init=True)
+    # define new_ids and the models they came from
+    # these models will be matched to the pw_index to flush out metadata
+    lookup_table = pd.DataFrame(
+        {
+            "new_id": [0, 0, 1, 1],
+            "model": ["R-HSA-1237044", "R-HSA-425381", "R-HSA-1237044", "R-HSA-425381"],
+        }
+    )
+    # use an existing pw_index since pw_index currently checks for the existence of the source file
+    pw_index = indices.PWIndex(os.path.join(test_data, "pw_index.tsv"))
+    # test create source table
+    source_table = source.create_source_table(lookup_table, table_schema, pw_index)
+    assert source_table["source_var"][0].source.shape == (2, 8)
+    # test create_consensus_sources
+    consensus_sources = consensus.create_consensus_sources(
+        agg_tbl, lookup_table, table_schema, pw_index
+    )
+    assert consensus_sources[0].source.shape == (2, 8)
+    # lets add a model which does not have a reference in the pw_index
+    invalid_lookup_table = pd.DataFrame(
+        {
+            "new_id": [0, 0, 1, 1],
+            "model": ["R-HSA-1237044", "R-HSA-425381", "R-HSA-1237044", "typo"],
+        }
+    )
+    # expect a ValueError when the model is not found
+    with pytest.raises(ValueError) as _:
+        source.create_source_table(invalid_lookup_table, table_schema, pw_index)
+    # now we will aggregate the consensus model above with a new single model (which has some
+    # overlapping entries with the consensusd (id 1) and some new ids (id 2)
+    agg_tbl2 = pd.DataFrame(
+        {
+            "new_id": [0, 1, 1, 2],
+        }
+    )
+    agg_tbl2[table_schema["source"]] = consensus_sources.tolist() + [
+        source.Source(init=True) for i in range(0, 2)
+    ]
+    lookup_table2 = pd.DataFrame(
+        {
+            "new_id": [0, 1, 1, 2],
+            # the model for the first two entries should really correspond to the "consensus"
+            # but since this is not a file I will stub with one of the pw_index entries
+            "model": [
+                "R-HSA-1247673",
+                "R-HSA-1247673",
+                "R-HSA-1475029",
+                "R-HSA-1475029",
+            ],
+        }
+    )
+    source_table = source.create_source_table(lookup_table2, table_schema, pw_index)
+    assert source_table.shape == (3, 1)
+    assert [
+        source_table["source_var"][i].source.shape
+        for i in range(0, source_table.shape[0])
+    ] == [(1, 8), (2, 8), (1, 8)]
+    consensus_sources = consensus.create_consensus_sources(
+        agg_tbl2, lookup_table2, table_schema, pw_index
+    )
+    assert [
+        consensus_sources[i].source.shape for i in range(0, consensus_sources.shape[0])
+    ] == [(3, 8), (4, 8), (1, 8)]
+def test_passing_entity_data():
+    pw_index = indices.PWIndex(os.path.join(test_data, "pw_index.tsv"))
+    sbml_dfs_dict = consensus.construct_sbml_dfs_dict(pw_index)
+    for model in list(sbml_dfs_dict.keys())[0:3]:
+        sbml_dfs_dict[model].add_species_data(
+            "my_species_data",
+            sbml_dfs_dict[model]
+            .species.iloc[0:5]
+            .assign(my_species_data_var="testing")["my_species_data_var"]
+            .to_frame(),
+        )
+        sbml_dfs_dict[model].add_reactions_data(
+            "my_reactions_data",
+            sbml_dfs_dict[model]
+            .reactions.iloc[0:5]
+            .assign(my_reactions_data_var1="testing")
+            .assign(my_reactions_data_var2="testing2")[
+                ["my_reactions_data_var1", "my_reactions_data_var2"]
+            ],
+        )
+    # create a consensus with perfect merges of overlapping id-table-variable values
+    # i.e., when combined all merged entries have the same attributes
+    consensus_model = consensus.construct_consensus_model(sbml_dfs_dict, pw_index)
+    assert len(consensus_model.species_data) == 1
+    assert consensus_model.species_data["my_species_data"].shape == (10, 1)
+    assert len(consensus_model.reactions_data) == 1
+    assert consensus_model.reactions_data["my_reactions_data"].shape == (14, 2)
+    # add different tables from different models
+    for model in list(sbml_dfs_dict.keys())[3:5]:
+        sbml_dfs_dict[model].add_species_data(
+            "my_other_species_data",
+            sbml_dfs_dict[model]
+            .species.iloc[0:5]
+            .assign(my_species_data="testing")["my_species_data"]
+            .to_frame(),
+        )
+    consensus_model = consensus.construct_consensus_model(sbml_dfs_dict, pw_index)
+    assert len(consensus_model.species_data) == 2
+    # create a case where reactions will be merged and the same reaction
+    # in different models has a different value for its reactions_data
+    minimal_pw_index = pw_index
+    minimal_pw_index.index = minimal_pw_index.index.iloc[0:2]
+    minimal_pw_index.index["file"].loc[1] = minimal_pw_index.index["file"][0]
+    duplicated_sbml_dfs_dict = consensus.construct_sbml_dfs_dict(minimal_pw_index)
+    # explicitely define the order we'll loop through models so that
+    # the position of a model can be used to set mismatching attributes
+    # for otherwise identical models
+    model_order = list(duplicated_sbml_dfs_dict.keys())
+    for model in duplicated_sbml_dfs_dict.keys():
+        model_index = model_order.index(model)
+        duplicated_sbml_dfs_dict[model].add_reactions_data(
+            "my_mismatched_data",
+            duplicated_sbml_dfs_dict[model]
+            .reactions.iloc[0:5]
+            .assign(my_reactions_data_var1=model)["my_reactions_data_var1"]
+            .to_frame()
+            .assign(numeric_var=[x + model_index for x in range(0, 5)])
+            .assign(bool_var=[x + model_index % 2 == 0 for x in range(0, 5)]),
+        )
+    # assign reversibility is True for one model to
+    # confirm that reversibility trumps irreversible
+    # when merging reactions with identical stoichiometry but
+    # different reversibility attributes
+    duplicated_sbml_dfs_dict["R-HSA-1237044"].reactions = duplicated_sbml_dfs_dict[
+        "R-HSA-1237044"
+    ].reactions.assign(r_isreversible=True)
+    consensus_model = consensus.construct_consensus_model(
+        duplicated_sbml_dfs_dict, pw_index
+    )
+    assert consensus_model.reactions_data["my_mismatched_data"].shape == (5, 3)
+    assert consensus_model.reactions["r_isreversible"].eq(True).all()
+def test_consensus_ontology_check():
+    pw_index = indices.PWIndex(os.path.join(test_data, "pw_index.tsv"))
+    test_sbml_dfs_dict = consensus.construct_sbml_dfs_dict(pw_index)
+    test_consensus_model = consensus.construct_consensus_model(
+        test_sbml_dfs_dict, pw_index
+    )
+    pre_shared_onto_sp_list, pre_onto_df = consensus.pre_consensus_ontology_check(
+        test_sbml_dfs_dict, "species"
+    )
+    assert set(pre_shared_onto_sp_list) == {"chebi", "reactome", "uniprot"}
+    post_shared_onto_sp_set = consensus.post_consensus_species_ontology_check(
+        test_consensus_model
+    )
+    assert post_shared_onto_sp_set == {"chebi", "reactome", "uniprot"}
+################################################
+# __main__
+################################################
+if __name__ == "__main__":
+    test_reduce_to_consensus_ids()
+    test_consensus()
+    test_source_tracking()
+    test_passing_entity_data()
+    test_consensus_ontology_check()

tests/test_constants.py ADDED Viewed

@@ -0,0 +1,20 @@
+from __future__ import annotations
+from napistu import constants
+def test_sbo_constants():
+    # all SBO terms in "MINI_SBO" set have a role
+    assert set(constants.SBO_NAME_TO_ROLE.keys()) == set(
+        constants.MINI_SBO_FROM_NAME.keys()
+    )
+    # all roles are valid
+    assert [x in constants.VALID_SBO_ROLES for x in constants.SBO_NAME_TO_ROLE.values()]
+################################################
+# __main__
+################################################
+if __name__ == "__main__":
+    test_sbo_constants()

tests/test_curation.py ADDED Viewed

@@ -0,0 +1,134 @@
+from __future__ import annotations
+import os
+import pandas as pd
+from napistu import sbml_dfs_core
+from napistu.ingestion import sbml
+from napistu.modify import curation
+test_path = os.path.abspath(os.path.join(__file__, os.pardir))
+sbml_path = os.path.join(test_path, "test_data", "R-HSA-1237044.sbml")
+if not os.path.isfile(sbml_path):
+    raise ValueError(f"{sbml_path} not found")
+# setup mock curations
+curation_dict = dict()
+curation_dict["species"] = pd.DataFrame(
+    [
+        {
+            "species": "hello",
+            "uri": "http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:35828",
+            "curator": "Sean",
+        },
+        {"species": "good day", "uri": None, "curator": "Sean"},
+    ]
+)
+curation_dict["compartmentalized_species"] = pd.DataFrame(
+    [
+        {
+            "compartmentalized_species": "hello [cytosol]",
+            "s_name": "hello",
+            "c_name": "cytosol",
+            "curator": "Sean",
+        }
+    ]
+)
+curation_dict["reactions"] = pd.DataFrame(
+    [
+        {
+            "reactions": "there",
+            "stoichiometry": "hello [cytosol] -> CO2 [cytosol]",
+            "uri": None,
+            "evidence": "how is",
+            "curator": "Sean",
+        },
+        {
+            "reactions": "where",
+            "stoichiometry": "CO2 [cytosol] -> hello [cytosol]",
+            "uri": None,
+            "evidence": "your family",
+            "curator": "Sean",
+        },
+    ]
+)
+curation_dict["reaction_species"] = pd.DataFrame(
+    [
+        {
+            "reaction_species": "NADH [cytosol]",
+            "r_name": "CYB5Rs reduce MetHb to HbA",
+            "stoichiometry": 0,
+            "sbo_term_name": "stimulator",
+            "evidence": "weeeee",
+            "curator": "Sean",
+        }
+    ]
+)
+curation_dict["remove"] = pd.DataFrame(
+    [
+        {"remove": "reaction_1237042", "table": "reactions", "variable": "r_id"},
+        {
+            "remove": "CYB5Rs reduce MetHb to HbA",
+            "table": "reactions",
+            "variable": "r_name",
+        },
+        {"remove": "CO2", "table": "species", "variable": "s_name"},
+    ]
+)
+def test_remove_entities():
+    sbml_model = sbml.SBML(sbml_path)
+    sbml_dfs = sbml_dfs_core.SBML_dfs(sbml_model)
+    sbml_dfs.validate()
+    invalid_entities_dict = curation._find_invalid_entities(
+        sbml_dfs, curation_dict["remove"]
+    )
+    invalid_pks = set(invalid_entities_dict.keys())
+    assert invalid_pks == {"sc_id", "rsc_id", "r_id", "s_id"}
+    n_species = sbml_dfs.species.shape[0]
+    n_reactions = sbml_dfs.reactions.shape[0]
+    n_compartmentalized_species = sbml_dfs.compartmentalized_species.shape[0]
+    n_reaction_species = sbml_dfs.reaction_species.shape[0]
+    # should be untouched
+    n_compartments = sbml_dfs.compartments.shape[0]
+    sbml_dfs = curation._remove_entities(sbml_dfs, invalid_entities_dict)
+    assert n_species - sbml_dfs.species.shape[0] == 1
+    assert n_reactions - sbml_dfs.reactions.shape[0] == 2
+    assert (
+        n_compartmentalized_species - sbml_dfs.compartmentalized_species.shape[0] == 2
+    )
+    assert n_reaction_species - sbml_dfs.reaction_species.shape[0] == 14
+    assert n_compartments - sbml_dfs.compartments.shape[0] == 0
+def test_add_entities():
+    sbml_model = sbml.SBML(sbml_path)
+    sbml_dfs = sbml_dfs_core.SBML_dfs(sbml_model)
+    sbml_dfs.validate()
+    new_entities = curation.format_curations(curation_dict, sbml_dfs)
+    assert new_entities["species"].shape == (2, 3)
+    assert new_entities["reactions"].shape == (2, 4)
+    assert new_entities["compartmentalized_species"].shape == (1, 4)
+    assert new_entities["reaction_species"].shape == (5, 4)
+################################################
+# __main__
+################################################
+if __name__ == "__main__":
+    test_remove_entities()
+    test_add_entities()

tests/test_data/__init__.py ADDED Viewed

File without changes