PyPI - napistu - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

napistu 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

napistu/__main__.py +38 -27
napistu/consensus.py +22 -27
napistu/constants.py +91 -65
napistu/context/filtering.py +2 -1
napistu/identifiers.py +3 -6
napistu/indices.py +3 -1
napistu/ingestion/bigg.py +6 -6
napistu/ingestion/sbml.py +298 -295
napistu/ingestion/string.py +16 -19
napistu/ingestion/trrust.py +22 -27
napistu/ingestion/yeast.py +2 -1
napistu/matching/interactions.py +4 -4
napistu/matching/species.py +1 -1
napistu/modify/uncompartmentalize.py +1 -1
napistu/network/net_create.py +1 -1
napistu/network/paths.py +1 -1
napistu/ontologies/dogma.py +2 -1
napistu/ontologies/genodexito.py +5 -1
napistu/ontologies/renaming.py +4 -0
napistu/sbml_dfs_core.py +1343 -2167
napistu/sbml_dfs_utils.py +1086 -143
napistu/utils.py +52 -41
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/METADATA +2 -2
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/RECORD +40 -40
tests/conftest.py +113 -13
tests/test_consensus.py +161 -4
tests/test_context_filtering.py +2 -2
tests/test_gaps.py +26 -15
tests/test_network_net_create.py +1 -1
tests/test_network_precompute.py +1 -1
tests/test_ontologies_genodexito.py +3 -0
tests/test_ontologies_mygene.py +3 -0
tests/test_ontologies_renaming.py +28 -24
tests/test_sbml_dfs_core.py +260 -211
tests/test_sbml_dfs_utils.py +194 -36
tests/test_utils.py +19 -0
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/WHEEL +0 -0
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/entry_points.txt +0 -0
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/licenses/LICENSE +0 -0
{napistu-0.3.5.dist-info → napistu-0.3.7.dist-info}/top_level.txt +0 -0

napistu/ingestion/string.py CHANGED Viewed

@@ -5,17 +5,14 @@ import logging
 import pandas as pd
 from napistu import identifiers
 from napistu import sbml_dfs_core
+from napistu import sbml_dfs_utils
 from napistu import source
 from napistu import utils
+from napistu.ingestion import napistu_edgelist
 from napistu.constants import BQB
 from napistu.constants import MINI_SBO_FROM_NAME
-from napistu.ingestion import napistu_edgelist
-from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
-from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
-from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
-from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
-from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
-from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
+from napistu.constants import ONTOLOGIES
+from napistu.constants import SBML_DFS
 from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
 from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
 from napistu.ingestion.constants import STRING_PROTEIN_ID
@@ -136,10 +133,10 @@ def convert_string_to_sbml_dfs(
     # define identifier mapping from aliases to use:
     alias_to_identifier = {
-        "Ensembl_gene": ("ensembl_gene", BQB.IS_ENCODED_BY),
-        "Ensembl_transcript": ("ensembl_transcript", BQB.IS_ENCODED_BY),
-        "Ensembl_translation": ("ensembl_protein", BQB.IS),
-        "Ensembl_UniProt_AC": ("uniprot", BQB.IS),
+        "Ensembl_gene": (ONTOLOGIES.ENSEMBL_GENE, BQB.IS_ENCODED_BY),
+        "Ensembl_transcript": (ONTOLOGIES.ENSEMBL_TRANSCRIPT, BQB.IS_ENCODED_BY),
+        "Ensembl_translation": (ONTOLOGIES.ENSEMBL_PROTEIN, BQB.IS),
+        "Ensembl_UniProt_AC": (ONTOLOGIES.UNIPROT, BQB.IS),
     }
     # filter aliases to only keep required ones
@@ -158,7 +155,7 @@ def convert_string_to_sbml_dfs(
     # Define compartments
     # Currently we are mapping everything to the `CELLULAR_COMPONENT`
     # which is a catch-all go: for unknown localisation
-    compartments_df = sbml_dfs_core._stub_compartments()
+    compartments_df = sbml_dfs_utils.stub_compartments()
     # define interactions
     interaction_edgelist = _build_interactor_edgelist(uq_string_edgelist)
@@ -275,17 +272,17 @@ def _build_species_df(
     species_df = (
         pd.Series(
             list(set(edgelist[source_col]).union(edgelist[target_col])),
-            name=SBML_SPECIES_DICT_NAME,
+            name=SBML_DFS.S_NAME,
         )
         .to_frame()
-        .set_index(SBML_SPECIES_DICT_NAME, drop=False)
+        .set_index(SBML_DFS.S_NAME, drop=False)
         .apply(
             _get_identifiers,
             alias_to_identifier=alias_to_identifier,
             dat_alias=aliases,
             axis=1,
         )
-        .rename(SBML_SPECIES_DICT_IDENTIFIERS)
+        .rename(SBML_DFS.S_IDENTIFIERS)
         .reset_index()
     )
     return species_df
@@ -311,8 +308,8 @@ def _build_interactor_edgelist(
         **{
             STRING_UPSTREAM_COMPARTMENT: compartment,
             STRING_DOWNSTREAM_COMPARTMENT: compartment,
-            SMBL_REACTION_SPEC_SBO_TERM: sbo_interactor,
-            SMBL_REACTION_DICT_IDENTIFIERS: lambda x: identifiers.Identifiers([]),
+            SBML_DFS.SBO_TERM: sbo_interactor,
+            SBML_DFS.R_IDENTIFIERS: lambda x: identifiers.Identifiers([]),
         }
     )
     if add_reverse_interactions:
@@ -335,10 +332,10 @@ def _build_interactor_edgelist(
         )
     interaction_edgelist = dat
-    interaction_edgelist[SMBL_REACTION_DICT_NAME] = _build_string_reaction_name(
+    interaction_edgelist[SBML_DFS.R_NAME] = _build_string_reaction_name(
         dat[STRING_UPSTREAM_NAME], dat[STRING_DOWNSTREAM_NAME]
     )
-    interaction_edgelist[SMBL_REACTION_DICT_IS_REVERSIBLE] = True
+    interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = True
     return interaction_edgelist

napistu/ingestion/trrust.py CHANGED Viewed

@@ -8,16 +8,11 @@ from napistu import identifiers
 from napistu import sbml_dfs_core
 from napistu import source
 from napistu import utils
+from napistu.constants import BQB
+from napistu.constants import IDENTIFIERS
 from napistu.constants import MINI_SBO_FROM_NAME
 from napistu.constants import SBOTERM_NAMES
-from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_IDENTIFIERS
-from napistu.ingestion.constants import SBML_COMPARTMENT_DICT_NAME
-from napistu.ingestion.constants import SBML_SPECIES_DICT_IDENTIFIERS
-from napistu.ingestion.constants import SBML_SPECIES_DICT_NAME
-from napistu.ingestion.constants import SMBL_REACTION_DICT_IDENTIFIERS
-from napistu.ingestion.constants import SMBL_REACTION_DICT_IS_REVERSIBLE
-from napistu.ingestion.constants import SMBL_REACTION_DICT_NAME
-from napistu.ingestion.constants import SMBL_REACTION_SPEC_SBO_TERM
+from napistu.constants import SBML_DFS
 from napistu.ingestion.constants import SPECIES_FULL_NAME_HUMAN
 from napistu.ingestion.constants import STRING_DOWNSTREAM_COMPARTMENT
 from napistu.ingestion.constants import STRING_DOWNSTREAM_NAME
@@ -81,16 +76,16 @@ def convert_trrust_to_sbml_dfs(
     species_df = (
         pd.DataFrame(
             {
-                SBML_SPECIES_DICT_NAME: list(
+                SBML_DFS.S_NAME: list(
                     {*edge_summaries_df["from"], *edge_summaries_df["to"]}
                 )
             }
         )
         .merge(
-            uniprot_2_symbol.rename({TRRUST_SYMBOL: SBML_SPECIES_DICT_NAME}, axis=1),
+            uniprot_2_symbol.rename({TRRUST_SYMBOL: SBML_DFS.S_NAME}, axis=1),
             how="left",
         )
-        .set_index(SBML_SPECIES_DICT_NAME)
+        .set_index(SBML_DFS.S_NAME)
     )
     # create Identifiers objects for all species with uniprot IDs
@@ -106,14 +101,14 @@ def convert_trrust_to_sbml_dfs(
         [
             identifiers.Identifiers(
                 [
-                    identifiers.format_uri(uri=x, biological_qualifier_type="BQB_IS")
-                    for x in species_w_ids.loc[[ind]]["url"].tolist()
+                    identifiers.format_uri(uri=x, biological_qualifier_type=BQB.IS)
+                    for x in species_w_ids.loc[[ind]][IDENTIFIERS.URL].tolist()
                 ]
             )
             for ind in species_w_ids.index.unique()
         ],
         index=species_w_ids.index.unique(),
-    ).rename(SBML_SPECIES_DICT_IDENTIFIERS)
+    ).rename(SBML_DFS.S_IDENTIFIERS)
     # just retain s_name and s_Identifiers
     # this just needs a source object which will be added later
@@ -124,21 +119,21 @@ def convert_trrust_to_sbml_dfs(
         .merge(
             species_w_ids_series,
             how="left",
-            left_on=SBML_SPECIES_DICT_NAME,
+            left_on=SBML_DFS.S_NAME,
             right_index=True,
         )
         .reset_index(drop=True)
     )
     # stub genes with missing IDs
-    species_df[SBML_SPECIES_DICT_IDENTIFIERS] = species_df[SBML_SPECIES_DICT_IDENTIFIERS].fillna(  # type: ignore
+    species_df[SBML_DFS.S_IDENTIFIERS] = species_df[SBML_DFS.S_IDENTIFIERS].fillna(  # type: ignore
         value=identifiers.Identifiers([])
     )
     # define distinct compartments
     compartments_df = pd.DataFrame(
         {
-            SBML_COMPARTMENT_DICT_NAME: TRRUST_COMPARTMENT_NUCLEOPLASM,
-            SBML_COMPARTMENT_DICT_IDENTIFIERS: identifiers.Identifiers(
+            SBML_DFS.C_NAME: TRRUST_COMPARTMENT_NUCLEOPLASM,
+            SBML_DFS.C_IDENTIFIERS: identifiers.Identifiers(
                 [
                     identifiers.format_uri(
                         uri=identifiers.create_uri_url(
@@ -159,7 +154,7 @@ def convert_trrust_to_sbml_dfs(
         upstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
         downstream_compartment=TRRUST_COMPARTMENT_NUCLEOPLASM,
     )
-    gene_gene_identifier_edgelist[SMBL_REACTION_DICT_NAME] = [
+    gene_gene_identifier_edgelist[SBML_DFS.R_NAME] = [
         f"{x} {y} of {z}"
         for x, y, z in zip(
             gene_gene_identifier_edgelist[STRING_UPSTREAM_NAME],
@@ -171,15 +166,15 @@ def convert_trrust_to_sbml_dfs(
     # convert relationships to SBO terms
     interaction_edgelist = gene_gene_identifier_edgelist.replace(
         {"sign": MINI_SBO_FROM_NAME}
-    ).rename({"sign": SMBL_REACTION_SPEC_SBO_TERM}, axis=1)
+    ).rename({"sign": SBML_DFS.SBO_TERM}, axis=1)
     # format pubmed identifiers of interactions
-    interaction_edgelist[SMBL_REACTION_DICT_IDENTIFIERS] = [
+    interaction_edgelist[SBML_DFS.R_IDENTIFIERS] = [
         _format_pubmed_for_interactions(x) for x in interaction_edgelist["reference"]
     ]
     # directionality: by default, set r_isreversible to False for TRRUST data
-    interaction_edgelist[SMBL_REACTION_DICT_IS_REVERSIBLE] = False
+    interaction_edgelist[SBML_DFS.R_ISREVERSIBLE] = False
     # reduce to essential variables
     interaction_edgelist = interaction_edgelist[
@@ -188,10 +183,10 @@ def convert_trrust_to_sbml_dfs(
             STRING_DOWNSTREAM_NAME,
             STRING_UPSTREAM_COMPARTMENT,
             STRING_DOWNSTREAM_COMPARTMENT,
-            SMBL_REACTION_DICT_NAME,
-            SMBL_REACTION_SPEC_SBO_TERM,
-            SMBL_REACTION_DICT_IDENTIFIERS,
-            SMBL_REACTION_DICT_IS_REVERSIBLE,
+            SBML_DFS.R_NAME,
+            SBML_DFS.SBO_TERM,
+            SBML_DFS.R_IDENTIFIERS,
+            SBML_DFS.R_ISREVERSIBLE,
         ]
     ]
@@ -277,7 +272,7 @@ def _format_pubmed_for_interactions(pubmed_set):
         url = identifiers.create_uri_url(ontology="pubmed", identifier=p, strict=False)
         if url is not None:
             valid_url = identifiers.format_uri(
-                uri=url, biological_qualifier_type="BQB_IS_DESCRIBED_BY"
+                uri=url, biological_qualifier_type=BQB.IS_DESCRIBED_BY
             )
             ids.append(valid_url)

napistu/ingestion/yeast.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import pandas as pd
 from napistu import identifiers
 from napistu import sbml_dfs_core
+from napistu import sbml_dfs_utils
 from napistu import source
 from napistu import utils
 from napistu.constants import BQB
@@ -105,7 +106,7 @@ def convert_idea_kinetics_to_sbml_dfs(
     # Constant fields (for this data source)
     # setup compartments (just treat this as uncompartmentalized for now)
-    compartments_df = sbml_dfs_core._stub_compartments()
+    compartments_df = sbml_dfs_utils.stub_compartments()
     # Per convention unaggregated models receive an empty source
     interaction_source = source.Source(init=True)

napistu/matching/interactions.py CHANGED Viewed

@@ -40,7 +40,7 @@ def edgelist_to_pathway_species(
         pd.Dataframe containing a "identifier_upstream" and "identifier_downstream" variables used to to match entries
     species_identifiers: pd.DataFrame
         A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species") generally using
-        sbml_dfs_core.export_sbml_dfs()
+        sbml_dfs.export_sbml_dfs()
     ontologies: set
         A set of ontologies used to match features to pathway species
     feature_id_var: str, default=FEATURE_ID_VAR_DEFAULT
@@ -138,7 +138,7 @@ def edgelist_to_scids(
         A mechanistic model
     species_identifiers: pd.DataFrame
         A table of molecular species identifiers produced from
-        sbml_dfs.get_identifiers("species") generally using sbml_dfs_core.export_sbml_dfs()
+        sbml_dfs.get_identifiers("species") generally using sbml_dfs.export_sbml_dfs()
     ontologies: set
         A set of ontologies used to match features to pathway species
@@ -218,7 +218,7 @@ def filter_to_direct_mechanistic_interactions(
     species_identifiers: pd.DataFrame
         A table of molecular species identifiers
         produced from sbml_dfs.get_identifiers("species") generally
-        using sbml_dfs_core.export_sbml_dfs()
+        using sbml_dfs.export_sbml_dfs()
     ontologies: set
         A set of ontologies used to match features to pathway species
@@ -342,7 +342,7 @@ def filter_to_indirect_mechanistic_interactions(
         A mechanistic model
     species_identifiers: pandas.DataFrame
         A table of molecular species identifiers produced from
-        sbml_dfs.get_identifiers("species") generally using sbml_dfs_core.export_sbml_dfs()
+        sbml_dfs.get_identifiers("species") generally using sbml_dfs.export_sbml_dfs()
     napistu_graph: igraph.Graph
         A network representation of the sbml_dfs model
     ontologies: set

napistu/matching/species.py CHANGED Viewed

@@ -33,7 +33,7 @@ def features_to_pathway_species(
         pd.Dataframe containing a "feature_identifiers_var" variable used to match entries
     species_identifiers: pd.DataFrame
         A table of molecular species identifiers produced from sbml_dfs.get_identifiers("species")
-        generally using sbml_dfs_core.export_sbml_dfs()
+        generally using sbml_dfs.export_sbml_dfs()
     ontologies: set
         A set of ontologies used to match features to pathway species
     feature_identifiers_var: str

napistu/modify/uncompartmentalize.py CHANGED Viewed

@@ -48,7 +48,7 @@ def uncompartmentalize_sbml_dfs(
         )
     # 1. update the compartments table to the stubbed default level: GO CELLULAR_COMPONENT
-    stubbed_compartment = sbml_dfs_core._stub_compartments().assign(
+    stubbed_compartment = sbml_dfs_utils.stub_compartments().assign(
         c_Source=_create_stubbed_source()
     )

napistu/network/net_create.py CHANGED Viewed

@@ -1697,7 +1697,7 @@ def _create_topology_weights(
         base_score (float): offset which will be added to all weights.
         protein_multiplier (int): multiplier for non-metabolite species (lower weight paths will tend to be selected).
         metabolite_multiplier (int): multiplier for metabolites [defined a species with a ChEBI ID).
-        unknown_multiplier (int): multiplier for species without any identifier. See sbml_dfs_core.species_type_types.
+        unknown_multiplier (int): multiplier for species without any identifier. See sbml_dfs_utils.species_type_types.
         scale_multiplier_by_meandegree (bool): if True then multipliers will be rescaled by the average number of
             connections a node has (i.e., its degree) so that weights will be relatively similar regardless of network
             size and sparsity.

napistu/network/paths.py CHANGED Viewed

@@ -489,7 +489,7 @@ def _label_path_reactions(sbml_dfs: sbml_dfs_core.SBML_dfs, paths_df: pd.DataFra
         reaction_info = (
             pd.concat(
                 [
-                    sbml_dfs_core.reaction_summaries(sbml_dfs, r_ids=x)
+                    sbml_dfs.reaction_formulas(r_ids=x)
                     for x in set(reaction_paths["node"])
                 ]
             )

napistu/ontologies/dogma.py CHANGED Viewed

@@ -4,6 +4,7 @@ import logging
 import pandas as pd
 from napistu import sbml_dfs_core
+from napistu import sbml_dfs_utils
 from napistu import source
 from napistu import identifiers
 from napistu import utils
@@ -59,7 +60,7 @@ def create_dogmatic_sbml_dfs(
     )
     # stub required but invariant variables
-    compartments_df = sbml_dfs_core._stub_compartments()
+    compartments_df = sbml_dfs_utils.stub_compartments()
     interaction_source = source.Source(init=True)
     # interactions table. This is required to create the sbml_dfs but we'll drop the info later

napistu/ontologies/genodexito.py CHANGED Viewed

@@ -356,7 +356,7 @@ class Genodexito:
             )
             logger.debug(
                 f"{ids.shape[0] - expanded_ids.shape[0]} "
-                "ids are not included in expanded ids"
+                "ids are not included in expanded ids. These will be filled with empty Identifiers"
             )
         else:
             matched_expanded_ids = expanded_ids
@@ -364,6 +364,10 @@ class Genodexito:
         updated_ids = ids.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
             pd.DataFrame(matched_expanded_ids)
         )
+        # fill missing attributes with empty Identifiers
+        updated_ids[SBML_DFS.S_IDENTIFIERS] = updated_ids[
+            SBML_DFS.S_IDENTIFIERS
+        ].fillna(identifiers.Identifiers([]))
         setattr(sbml_dfs, "species", updated_ids)

napistu/ontologies/renaming.py CHANGED Viewed

@@ -72,6 +72,10 @@ def rename_species_ontologies(
     updated_species = sbml_dfs.species.drop(SBML_DFS.S_IDENTIFIERS, axis=1).join(
         pd.DataFrame(species_identifiers)
     )
+    # fill missing attributes with empty Identifiers
+    updated_species[SBML_DFS.S_IDENTIFIERS] = updated_species[
+        SBML_DFS.S_IDENTIFIERS
+    ].fillna(identifiers.Identifiers([]))
     setattr(sbml_dfs, "species", updated_species)

napistu 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

napistu 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl