napistu 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/constants.py +4 -10
- napistu/network/ig_utils.py +35 -0
- napistu/network/precompute.py +11 -4
- napistu/sbml_dfs_core.py +748 -543
- napistu/source.py +1 -1
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/METADATA +2 -2
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/RECORD +15 -15
- tests/test_network_ig_utils.py +36 -0
- tests/test_network_precompute.py +4 -1
- tests/test_sbml_dfs_core.py +220 -1
- tests/test_sbml_dfs_utils.py +47 -6
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/WHEEL +0 -0
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/entry_points.txt +0 -0
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.3.3.dist-info → napistu-0.3.5.dist-info}/top_level.txt +0 -0
napistu/sbml_dfs_core.py
CHANGED
@@ -32,7 +32,6 @@ from napistu.constants import SBOTERM_NAMES
|
|
32
32
|
from napistu.constants import SBO_ROLES_DEFS
|
33
33
|
from napistu.constants import ENTITIES_W_DATA
|
34
34
|
from napistu.constants import ENTITIES_TO_ENTITY_DATA
|
35
|
-
from napistu.constants import CHARACTERISTIC_COMPLEX_ONTOLOGIES
|
36
35
|
from napistu.ingestion.constants import GENERIC_COMPARTMENT
|
37
36
|
from napistu.ingestion.constants import COMPARTMENT_ALIASES
|
38
37
|
from napistu.ingestion.constants import COMPARTMENTS_GO_TERMS
|
@@ -1471,12 +1470,6 @@ def filter_to_characteristic_species_ids(
|
|
1471
1470
|
# add components within modestly sized protein complexes
|
1472
1471
|
# look at HAS_PART IDs
|
1473
1472
|
bqb_has_parts_species = species_ids[species_ids[IDENTIFIERS.BQB] == BQB.HAS_PART]
|
1474
|
-
# filter to genes
|
1475
|
-
bqb_has_parts_species = bqb_has_parts_species[
|
1476
|
-
bqb_has_parts_species[IDENTIFIERS.ONTOLOGY].isin(
|
1477
|
-
CHARACTERISTIC_COMPLEX_ONTOLOGIES
|
1478
|
-
)
|
1479
|
-
]
|
1480
1473
|
|
1481
1474
|
# number of species in a complex
|
1482
1475
|
n_species_components = bqb_has_parts_species.value_counts(
|
@@ -1488,38 +1481,10 @@ def filter_to_characteristic_species_ids(
|
|
1488
1481
|
].index.get_level_values(SBML_DFS.S_ID)
|
1489
1482
|
)
|
1490
1483
|
|
1491
|
-
|
1492
|
-
|
1493
|
-
[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
|
1494
|
-
)
|
1495
|
-
promiscuous_component_identifiers_index = n_complexes_involvedin[
|
1496
|
-
n_complexes_involvedin > max_promiscuity
|
1497
|
-
].index
|
1498
|
-
promiscuous_component_identifiers = pd.Series(
|
1499
|
-
data=[True] * len(promiscuous_component_identifiers_index),
|
1500
|
-
index=promiscuous_component_identifiers_index,
|
1501
|
-
name="is_shared_component",
|
1502
|
-
dtype=bool,
|
1503
|
-
)
|
1504
|
-
|
1505
|
-
if len(promiscuous_component_identifiers) == 0:
|
1506
|
-
# no complexes to filter
|
1507
|
-
return species_ids
|
1508
|
-
|
1509
|
-
filtered_bqb_has_parts = bqb_has_parts_species.merge(
|
1510
|
-
promiscuous_component_identifiers,
|
1511
|
-
left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
|
1512
|
-
right_index=True,
|
1513
|
-
how="left",
|
1484
|
+
filtered_bqb_has_parts = _filter_promiscuous_components(
|
1485
|
+
bqb_has_parts_species, max_promiscuity
|
1514
1486
|
)
|
1515
1487
|
|
1516
|
-
filtered_bqb_has_parts["is_shared_component"] = filtered_bqb_has_parts[
|
1517
|
-
"is_shared_component"
|
1518
|
-
].fillna(False)
|
1519
|
-
# drop identifiers shared as components across many species
|
1520
|
-
filtered_bqb_has_parts = filtered_bqb_has_parts[
|
1521
|
-
~filtered_bqb_has_parts["is_shared_component"]
|
1522
|
-
].drop(["is_shared_component"], axis=1)
|
1523
1488
|
# drop species parts if there are many components
|
1524
1489
|
filtered_bqb_has_parts = filtered_bqb_has_parts[
|
1525
1490
|
~filtered_bqb_has_parts[SBML_DFS.S_ID].isin(big_complex_sids)
|
@@ -1887,550 +1852,199 @@ def sbml_dfs_from_edgelist(
|
|
1887
1852
|
keep_reactions_data: bool | str = False,
|
1888
1853
|
) -> SBML_dfs:
|
1889
1854
|
"""
|
1890
|
-
Create SBML_dfs from
|
1891
|
-
|
1892
|
-
Combine a set of interactions into an sbml.SBML_dfs mechanistic model
|
1855
|
+
Create SBML_dfs from interaction edgelist.
|
1893
1856
|
|
1894
|
-
|
1895
|
-
|
1896
|
-
- upstream_name (str): matching "s_name" from "species_df"
|
1897
|
-
- downstream_name (str): matching "s_name" from "species_df"
|
1898
|
-
- upstream_compartment (str): compartment of "upstream_name"
|
1899
|
-
with names matching "c_name" from "compartments_df"
|
1900
|
-
- downstream_compartment (str): compartment of "downstream_name"
|
1901
|
-
with names matching "c_name" from "compartments_df"
|
1902
|
-
- r_name (str): a name for the interaction
|
1903
|
-
- sbo_term (str): sbo term defining the type of
|
1904
|
-
molecular interaction (see MINI_SBO_FROM_NAME)
|
1905
|
-
- r_Identifiers (identifiers.Identifiers): identifiers
|
1906
|
-
supporting the interaction (e.g., pubmed ids)
|
1907
|
-
- r_isreversible (bool): Is this reaction reversible?
|
1908
|
-
If True, the reaction is reversible
|
1909
|
-
By default, the interactions of TRRUST networks are irreversible, and reversible for STRING networks
|
1910
|
-
species_df (pd.DataFrame): A table defining unique molecular
|
1911
|
-
species participating in "interaction_edgelist":
|
1912
|
-
- s_name (str): name of molecular species
|
1913
|
-
- s_Identifiers (identifiers.Identifiers): identifiers
|
1914
|
-
defining the species
|
1915
|
-
compartments_df (pd.DataFrame): A table defining compartments
|
1916
|
-
where interactions are occurring "interaction_edgelist":
|
1917
|
-
- c_name (str): name of compartment
|
1918
|
-
- c_Identifiers (identifiers.Identifiers):
|
1919
|
-
identifiers defining the compartment (see
|
1920
|
-
bigg.annotate_recon() for a set of names > go categories)
|
1921
|
-
interaction_source (source.Source): A source object
|
1922
|
-
which will tie model entities to the interaction source
|
1923
|
-
upstream_stoichiometry (int): stoichiometry of
|
1924
|
-
upstream species in reaction
|
1925
|
-
downstream_stoichiometry (int): stoichiometry of
|
1926
|
-
downstream species in reaction
|
1927
|
-
downstream_sbo_name (str): sbo term defining the
|
1928
|
-
type of molecular interaction for the downstream reactand
|
1929
|
-
(see MINI_SBO_FROM_NAME)
|
1930
|
-
keep_species_data (bool | str): Should species data
|
1931
|
-
be kept in the model? If True, all species data will be kept
|
1932
|
-
and saved as "species_data" in the SBML_dfs. The label will be 'source'
|
1933
|
-
If False, no species data will be kept.
|
1934
|
-
If a string: label for the species data to be kept.
|
1935
|
-
keep_reactions_data (bool | str): Should reaction data be kept in the model?
|
1936
|
-
If True, all reaction data will be kept and saved
|
1937
|
-
as "reactions_data" in the SBML_dfs. The label will be 'source'.
|
1938
|
-
If False, no reaction data will be kept.
|
1939
|
-
If a string: label for the reaction data to be kept.
|
1940
|
-
|
1941
|
-
Returns:
|
1942
|
-
sbml.SBML_dfs
|
1857
|
+
Combines a set of molecular interactions into a mechanistic SBML_dfs model
|
1858
|
+
by processing interaction data, species information, and compartment definitions.
|
1943
1859
|
|
1860
|
+
Parameters
|
1861
|
+
----------
|
1862
|
+
interaction_edgelist : pd.DataFrame
|
1863
|
+
Table containing molecular interactions with columns:
|
1864
|
+
- upstream_name : str, matches "s_name" from species_df
|
1865
|
+
- downstream_name : str, matches "s_name" from species_df
|
1866
|
+
- upstream_compartment : str, matches "c_name" from compartments_df
|
1867
|
+
- downstream_compartment : str, matches "c_name" from compartments_df
|
1868
|
+
- r_name : str, name for the interaction
|
1869
|
+
- sbo_term : str, SBO term defining interaction type
|
1870
|
+
- r_Identifiers : identifiers.Identifiers, supporting identifiers
|
1871
|
+
- r_isreversible : bool, whether reaction is reversible
|
1872
|
+
species_df : pd.DataFrame
|
1873
|
+
Table defining molecular species with columns:
|
1874
|
+
- s_name : str, name of molecular species
|
1875
|
+
- s_Identifiers : identifiers.Identifiers, species identifiers
|
1876
|
+
compartments_df : pd.DataFrame
|
1877
|
+
Table defining compartments with columns:
|
1878
|
+
- c_name : str, name of compartment
|
1879
|
+
- c_Identifiers : identifiers.Identifiers, compartment identifiers
|
1880
|
+
interaction_source : source.Source
|
1881
|
+
Source object linking model entities to interaction source
|
1882
|
+
upstream_stoichiometry : int, default 0
|
1883
|
+
Stoichiometry of upstream species in reactions
|
1884
|
+
downstream_stoichiometry : int, default 1
|
1885
|
+
Stoichiometry of downstream species in reactions
|
1886
|
+
downstream_sbo_name : str, default SBOTERM_NAMES.PRODUCT
|
1887
|
+
SBO term for downstream reactant type
|
1888
|
+
keep_species_data : bool or str, default False
|
1889
|
+
Whether to preserve extra species columns. If True, saves as 'source' label.
|
1890
|
+
If string, uses as custom label. If False, discards extra data.
|
1891
|
+
keep_reactions_data : bool or str, default False
|
1892
|
+
Whether to preserve extra reaction columns. If True, saves as 'source' label.
|
1893
|
+
If string, uses as custom label. If False, discards extra data.
|
1894
|
+
|
1895
|
+
Returns
|
1896
|
+
-------
|
1897
|
+
SBML_dfs
|
1898
|
+
Validated SBML data structure containing compartments, species,
|
1899
|
+
compartmentalized species, reactions, and reaction species tables.
|
1944
1900
|
"""
|
1901
|
+
# 1. Validate inputs
|
1902
|
+
_edgelist_validate_inputs(interaction_edgelist, species_df, compartments_df)
|
1945
1903
|
|
1946
|
-
#
|
1947
|
-
|
1948
|
-
interaction_edgelist, species_df,
|
1904
|
+
# 2. Identify which extra columns to preserve
|
1905
|
+
extra_columns = _edgelist_identify_extra_columns(
|
1906
|
+
interaction_edgelist, species_df, keep_reactions_data, keep_species_data
|
1949
1907
|
)
|
1950
1908
|
|
1951
|
-
#
|
1952
|
-
|
1953
|
-
|
1954
|
-
|
1955
|
-
|
1956
|
-
"
|
1957
|
-
"upstream_compartment",
|
1958
|
-
"downstream_compartment",
|
1959
|
-
SBML_DFS.R_NAME,
|
1960
|
-
SBML_DFS.SBO_TERM,
|
1961
|
-
SBML_DFS.R_IDENTIFIERS,
|
1962
|
-
SBML_DFS.R_ISREVERSIBLE,
|
1963
|
-
}
|
1964
|
-
if keep_reactions_data is not False:
|
1965
|
-
extra_reactions_columns = [
|
1966
|
-
c
|
1967
|
-
for c in interaction_edgelist.columns
|
1968
|
-
if c not in interaction_edgelist_required_vars
|
1969
|
-
]
|
1970
|
-
else:
|
1971
|
-
extra_reactions_columns = []
|
1972
|
-
# Extra species columns
|
1973
|
-
if keep_species_data is not False:
|
1974
|
-
extra_species_columns = [
|
1975
|
-
c
|
1976
|
-
for c in species_df.columns
|
1977
|
-
if c not in {SBML_DFS.S_NAME, SBML_DFS.S_IDENTIFIERS}
|
1978
|
-
]
|
1979
|
-
else:
|
1980
|
-
extra_species_columns = []
|
1981
|
-
|
1982
|
-
# format compartments
|
1983
|
-
compartments_df[SBML_DFS.C_SOURCE] = interaction_source
|
1984
|
-
compartments_df[SBML_DFS.C_ID] = sbml_dfs_utils.id_formatter(
|
1985
|
-
range(compartments_df.shape[0]), SBML_DFS.C_ID
|
1909
|
+
# 3. Process compartments and species tables
|
1910
|
+
processed_compartments = _edgelist_process_compartments(
|
1911
|
+
compartments_df, interaction_source
|
1912
|
+
)
|
1913
|
+
processed_species, species_data = _edgelist_process_species(
|
1914
|
+
species_df, interaction_source, extra_columns["species"]
|
1986
1915
|
)
|
1987
|
-
compartments_df = compartments_df.set_index(SBML_DFS.C_ID)[
|
1988
|
-
[SBML_DFS.C_NAME, SBML_DFS.C_IDENTIFIERS, SBML_DFS.C_SOURCE]
|
1989
|
-
]
|
1990
1916
|
|
1991
|
-
#
|
1992
|
-
|
1993
|
-
|
1994
|
-
|
1917
|
+
# 4. Create compartmentalized species
|
1918
|
+
comp_species = _edgelist_create_compartmentalized_species(
|
1919
|
+
interaction_edgelist,
|
1920
|
+
processed_species,
|
1921
|
+
processed_compartments,
|
1922
|
+
interaction_source,
|
1995
1923
|
)
|
1996
1924
|
|
1997
|
-
|
1998
|
-
|
1999
|
-
|
2000
|
-
|
2001
|
-
|
2002
|
-
|
2003
|
-
|
2004
|
-
|
1925
|
+
# 5. Create reactions and reaction species
|
1926
|
+
reactions, reaction_species, reactions_data = (
|
1927
|
+
_edgelist_create_reactions_and_species(
|
1928
|
+
interaction_edgelist,
|
1929
|
+
comp_species,
|
1930
|
+
processed_species,
|
1931
|
+
processed_compartments,
|
1932
|
+
interaction_source,
|
1933
|
+
upstream_stoichiometry,
|
1934
|
+
downstream_stoichiometry,
|
1935
|
+
downstream_sbo_name,
|
1936
|
+
extra_columns["reactions"],
|
1937
|
+
)
|
1938
|
+
)
|
2005
1939
|
|
2006
|
-
#
|
1940
|
+
# 6. Assemble final SBML_dfs object
|
1941
|
+
sbml_model = _edgelist_assemble_sbml_model(
|
1942
|
+
processed_compartments,
|
1943
|
+
processed_species,
|
1944
|
+
comp_species,
|
1945
|
+
reactions,
|
1946
|
+
reaction_species,
|
1947
|
+
species_data,
|
1948
|
+
reactions_data,
|
1949
|
+
keep_species_data,
|
1950
|
+
keep_reactions_data,
|
1951
|
+
extra_columns,
|
1952
|
+
)
|
2007
1953
|
|
2008
|
-
|
2009
|
-
comp_species = pd.concat(
|
2010
|
-
[
|
2011
|
-
interaction_edgelist[["upstream_name", "upstream_compartment"]].rename(
|
2012
|
-
{
|
2013
|
-
"upstream_name": SBML_DFS.S_NAME,
|
2014
|
-
"upstream_compartment": SBML_DFS.C_NAME,
|
2015
|
-
},
|
2016
|
-
axis=1,
|
2017
|
-
),
|
2018
|
-
interaction_edgelist[["downstream_name", "downstream_compartment"]].rename(
|
2019
|
-
{
|
2020
|
-
"downstream_name": SBML_DFS.S_NAME,
|
2021
|
-
"downstream_compartment": SBML_DFS.C_NAME,
|
2022
|
-
},
|
2023
|
-
axis=1,
|
2024
|
-
),
|
2025
|
-
]
|
2026
|
-
).drop_duplicates()
|
1954
|
+
return sbml_model
|
2027
1955
|
|
2028
|
-
|
2029
|
-
comp_species_w_ids = comp_species.merge(
|
2030
|
-
species_df[SBML_DFS.S_NAME].reset_index(),
|
2031
|
-
how="left",
|
2032
|
-
left_on=SBML_DFS.S_NAME,
|
2033
|
-
right_on=SBML_DFS.S_NAME,
|
2034
|
-
).merge(
|
2035
|
-
compartments_df[SBML_DFS.C_NAME].reset_index(),
|
2036
|
-
how="left",
|
2037
|
-
left_on=SBML_DFS.C_NAME,
|
2038
|
-
right_on=SBML_DFS.C_NAME,
|
2039
|
-
)
|
1956
|
+
return sbml_model
|
2040
1957
|
|
2041
|
-
# check whether all species and compartments exist
|
2042
|
-
_sbml_dfs_from_edgelist_check_cspecies_merge(comp_species_w_ids, comp_species)
|
2043
1958
|
|
2044
|
-
|
2045
|
-
|
2046
|
-
f"{s} [{c}]"
|
2047
|
-
for s, c in zip(
|
2048
|
-
comp_species_w_ids[SBML_DFS.S_NAME], comp_species_w_ids[SBML_DFS.C_NAME]
|
2049
|
-
)
|
2050
|
-
]
|
2051
|
-
# add source object
|
2052
|
-
comp_species_w_ids[SBML_DFS.SC_SOURCE] = interaction_source
|
2053
|
-
# name index
|
2054
|
-
comp_species_w_ids[SBML_DFS.SC_ID] = sbml_dfs_utils.id_formatter(
|
2055
|
-
range(comp_species_w_ids.shape[0]), SBML_DFS.SC_ID
|
2056
|
-
)
|
2057
|
-
comp_species_w_ids = comp_species_w_ids.set_index(SBML_DFS.SC_ID)[
|
2058
|
-
[SBML_DFS.SC_NAME, SBML_DFS.S_ID, SBML_DFS.C_ID, SBML_DFS.SC_SOURCE]
|
2059
|
-
]
|
1959
|
+
def species_type_types(x):
|
1960
|
+
"""Assign a high-level molecule type to a molecular species"""
|
2060
1961
|
|
2061
|
-
|
1962
|
+
if isinstance(x, identifiers.Identifiers):
|
1963
|
+
if x.filter(["chebi"]):
|
1964
|
+
return "metabolite"
|
1965
|
+
elif x.filter(["molodex"]):
|
1966
|
+
return "drug"
|
1967
|
+
else:
|
1968
|
+
return "protein"
|
1969
|
+
else:
|
1970
|
+
return "unknown"
|
2062
1971
|
|
2063
|
-
# create a from cs_species -> to cs_species edgelist
|
2064
|
-
# interaction_edgelist
|
2065
|
-
comp_species_w_names = (
|
2066
|
-
comp_species_w_ids.reset_index()
|
2067
|
-
.merge(species_df[SBML_DFS.S_NAME].reset_index())
|
2068
|
-
.merge(compartments_df[SBML_DFS.C_NAME].reset_index())
|
2069
|
-
)
|
2070
1972
|
|
2071
|
-
|
2072
|
-
|
2073
|
-
|
2074
|
-
SBML_DFS.SC_ID: "sc_id_up",
|
2075
|
-
SBML_DFS.S_NAME: "upstream_name",
|
2076
|
-
SBML_DFS.C_NAME: "upstream_compartment",
|
2077
|
-
},
|
2078
|
-
axis=1,
|
2079
|
-
),
|
2080
|
-
how="left",
|
2081
|
-
).merge(
|
2082
|
-
comp_species_w_names[[SBML_DFS.SC_ID, SBML_DFS.S_NAME, SBML_DFS.C_NAME]].rename(
|
1973
|
+
def stub_ids(ids):
|
1974
|
+
if len(ids) == 0:
|
1975
|
+
return pd.DataFrame(
|
2083
1976
|
{
|
2084
|
-
|
2085
|
-
|
2086
|
-
|
2087
|
-
|
2088
|
-
|
2089
|
-
),
|
2090
|
-
how="left",
|
2091
|
-
)[
|
2092
|
-
REQUIRED_REACTION_FROMEDGELIST_COLUMNS + extra_reactions_columns
|
2093
|
-
]
|
2094
|
-
|
2095
|
-
# some extra checks
|
2096
|
-
if interaction_edgelist.shape[0] != interaction_edgelist_w_cspecies.shape[0]:
|
2097
|
-
raise ValueError(
|
2098
|
-
"Merging compartmentalized species to interaction_edgelist"
|
2099
|
-
" resulted in an increase in the tables from "
|
2100
|
-
f"{interaction_edgelist.shape[0]} to "
|
2101
|
-
f"{interaction_edgelist_w_cspecies.shape[0]} indicating"
|
2102
|
-
" a 1-many join which should have been 1-1"
|
1977
|
+
IDENTIFIERS.ONTOLOGY: [None],
|
1978
|
+
IDENTIFIERS.IDENTIFIER: [None],
|
1979
|
+
IDENTIFIERS.URL: [None],
|
1980
|
+
IDENTIFIERS.BQB: [None],
|
1981
|
+
}
|
2103
1982
|
)
|
1983
|
+
else:
|
1984
|
+
return pd.DataFrame(ids)
|
2104
1985
|
|
2105
|
-
# create one reaction per interaction
|
2106
|
-
interaction_edgelist_w_cspecies[SBML_DFS.R_SOURCE] = interaction_source
|
2107
|
-
interaction_edgelist_w_cspecies[SBML_DFS.R_ID] = sbml_dfs_utils.id_formatter(
|
2108
|
-
range(interaction_edgelist_w_cspecies.shape[0]), SBML_DFS.R_ID
|
2109
|
-
)
|
2110
1986
|
|
2111
|
-
|
2112
|
-
|
2113
|
-
|
2114
|
-
SBML_DFS.R_SOURCE,
|
2115
|
-
SBML_DFS.R_ISREVERSIBLE,
|
2116
|
-
]
|
2117
|
-
reactions_df = interaction_edgelist_w_cspecies.copy().set_index(SBML_DFS.R_ID)[
|
2118
|
-
reactions_df_columns + extra_reactions_columns
|
2119
|
-
]
|
2120
|
-
# Keep extra columns to save them as extra data
|
2121
|
-
reactions_data = reactions_df[extra_reactions_columns]
|
2122
|
-
reactions_df = reactions_df[reactions_df_columns]
|
1987
|
+
def add_sbo_role(reaction_species: pd.DataFrame) -> pd.DataFrame:
|
1988
|
+
"""
|
1989
|
+
Add an sbo_role column to the reaction_species table.
|
2123
1990
|
|
2124
|
-
|
2125
|
-
|
2126
|
-
[
|
2127
|
-
# upstream interactions are defined by sbo_term and should generally
|
2128
|
-
# be modifiers/stimulator/inhibitor/interactor
|
2129
|
-
interaction_edgelist_w_cspecies[["sc_id_up", "sbo_term", "r_id"]]
|
2130
|
-
.assign(stoichiometry=upstream_stoichiometry)
|
2131
|
-
.rename({"sc_id_up": "sc_id"}, axis=1),
|
2132
|
-
# downstream interactions indicate some modification of the state
|
2133
|
-
# of the species and hence are defined as product
|
2134
|
-
interaction_edgelist_w_cspecies[["sc_id_down", "r_id"]]
|
2135
|
-
.assign(
|
2136
|
-
stoichiometry=downstream_stoichiometry,
|
2137
|
-
sbo_term=MINI_SBO_FROM_NAME[downstream_sbo_name],
|
2138
|
-
)
|
2139
|
-
.rename({"sc_id_down": "sc_id"}, axis=1),
|
2140
|
-
]
|
2141
|
-
)
|
2142
|
-
reaction_species_df["rsc_id"] = sbml_dfs_utils.id_formatter(
|
2143
|
-
range(reaction_species_df.shape[0]), "rsc_id"
|
2144
|
-
)
|
2145
|
-
reaction_species_df = reaction_species_df.set_index("rsc_id")
|
1991
|
+
The sbo_role column is a string column that contains the SBO role of the reaction species.
|
1992
|
+
The values in the sbo_role column are taken from the sbo_term column.
|
2146
1993
|
|
2147
|
-
|
2148
|
-
|
2149
|
-
"compartments": compartments_df,
|
2150
|
-
"species": species_df,
|
2151
|
-
"compartmentalized_species": comp_species_w_ids,
|
2152
|
-
"reactions": reactions_df,
|
2153
|
-
"reaction_species": reaction_species_df,
|
2154
|
-
}
|
2155
|
-
if len(extra_reactions_columns) > 0:
|
2156
|
-
if isinstance(keep_reactions_data, str):
|
2157
|
-
reactions_data_label = keep_reactions_data
|
2158
|
-
else:
|
2159
|
-
reactions_data_label = "source"
|
2160
|
-
sbml_tbl_dict["reactions_data"] = {reactions_data_label: reactions_data}
|
1994
|
+
The sbo_role column is added to the reaction_species table by mapping the sbo_term column to the SBO_NAME_TO_ROLE dictionary.
|
1995
|
+
"""
|
2161
1996
|
|
2162
|
-
|
2163
|
-
if isinstance(keep_species_data, str):
|
2164
|
-
species_data_label = keep_species_data
|
2165
|
-
else:
|
2166
|
-
species_data_label = "source"
|
2167
|
-
sbml_tbl_dict["species_data"] = {species_data_label: species_data}
|
1997
|
+
validate_sbml_dfs_table(reaction_species, SBML_DFS.REACTION_SPECIES)
|
2168
1998
|
|
2169
|
-
|
2170
|
-
|
1999
|
+
reaction_species = (
|
2000
|
+
reaction_species.assign(sbo_role=reaction_species[SBML_DFS.SBO_TERM])
|
2001
|
+
.replace({SBO_ROLES_DEFS.SBO_ROLE: MINI_SBO_TO_NAME})
|
2002
|
+
.replace({SBO_ROLES_DEFS.SBO_ROLE: SBO_NAME_TO_ROLE})
|
2003
|
+
)
|
2171
2004
|
|
2172
|
-
|
2005
|
+
undefined_roles = set(reaction_species[SBO_ROLES_DEFS.SBO_ROLE].unique()) - set(
|
2006
|
+
SBO_NAME_TO_ROLE.values()
|
2007
|
+
)
|
2008
|
+
if len(undefined_roles) > 0:
|
2009
|
+
logger.warning(
|
2010
|
+
f"The following SBO roles are not defined: {undefined_roles}. They will be treated as {SBO_ROLES_DEFS.OPTIONAL} when determining reaction operability."
|
2011
|
+
)
|
2012
|
+
mask = reaction_species[SBO_ROLES_DEFS.SBO_ROLE].isin(undefined_roles)
|
2013
|
+
reaction_species.loc[mask, SBO_ROLES_DEFS.SBO_ROLE] = SBO_ROLES_DEFS.OPTIONAL
|
2173
2014
|
|
2015
|
+
return reaction_species
|
2174
2016
|
|
2175
|
-
def _sbml_dfs_from_edgelist_validate_inputs(
|
2176
|
-
interaction_edgelist: pd.DataFrame,
|
2177
|
-
species_df: pd.DataFrame,
|
2178
|
-
compartments_df: pd.DataFrame,
|
2179
|
-
) -> None:
|
2180
|
-
"""Check that the inputs for creating an SBML_dfs from an edgelist are appropriate."""
|
2181
2017
|
|
2182
|
-
|
2183
|
-
|
2184
|
-
|
2185
|
-
|
2186
|
-
|
2187
|
-
|
2188
|
-
if len(missing_required_fields) > 0:
|
2018
|
+
def find_underspecified_reactions(
|
2019
|
+
reaction_species_w_roles: pd.DataFrame,
|
2020
|
+
) -> pd.DataFrame:
|
2021
|
+
|
2022
|
+
# check that both sbo_role and "new" are present
|
2023
|
+
if SBO_ROLES_DEFS.SBO_ROLE not in reaction_species_w_roles.columns:
|
2189
2024
|
raise ValueError(
|
2190
|
-
|
2191
|
-
' in "compartments_df" but were not present in the input file.'
|
2025
|
+
"The sbo_role column is not present in the reaction_species_w_roles table. Please call add_sbo_role() first."
|
2192
2026
|
)
|
2193
|
-
|
2194
|
-
# check species
|
2195
|
-
species_df_expected_vars = {SBML_DFS.S_NAME, SBML_DFS.S_IDENTIFIERS}
|
2196
|
-
species_df_columns = set(species_df.columns.tolist())
|
2197
|
-
missing_required_fields = species_df_expected_vars.difference(species_df_columns)
|
2198
|
-
if len(missing_required_fields) > 0:
|
2027
|
+
if "new" not in reaction_species_w_roles.columns:
|
2199
2028
|
raise ValueError(
|
2200
|
-
|
2201
|
-
' variables in "species_df" but were not present '
|
2202
|
-
"in the input file."
|
2029
|
+
"The new column is not present in the reaction_species_w_roles table. This should indicate what cspecies would be preserved in the reaction should it be preserved."
|
2203
2030
|
)
|
2204
|
-
|
2205
|
-
|
2206
|
-
interaction_edgelist_columns = set(interaction_edgelist.columns.tolist())
|
2207
|
-
missing_required_fields = INTERACTION_EDGELIST_EXPECTED_VARS.difference(
|
2208
|
-
interaction_edgelist_columns
|
2209
|
-
)
|
2210
|
-
if len(missing_required_fields) > 0:
|
2031
|
+
# check that new is a boolean column
|
2032
|
+
if reaction_species_w_roles["new"].dtype != bool:
|
2211
2033
|
raise ValueError(
|
2212
|
-
|
2213
|
-
'variables in "interaction_edgelist" but were not '
|
2214
|
-
"present in the input file."
|
2034
|
+
"The new column is not a boolean column. Please ensure that the new column is a boolean column. This should indicate what cspecies would be preserved in the reaction should it be preserved."
|
2215
2035
|
)
|
2216
2036
|
|
2217
|
-
|
2037
|
+
reactions_with_lost_defining_members = set(
|
2038
|
+
reaction_species_w_roles.query("~new")
|
2039
|
+
.query("sbo_role == 'DEFINING'")[SBML_DFS.R_ID]
|
2040
|
+
.tolist()
|
2041
|
+
)
|
2218
2042
|
|
2219
|
-
|
2220
|
-
|
2221
|
-
|
2222
|
-
|
2223
|
-
|
2224
|
-
|
2225
|
-
# check for 1-many merge
|
2226
|
-
if merged_species.shape[0] != original_species.shape[0]:
|
2227
|
-
raise ValueError(
|
2228
|
-
"Merging compartmentalized species to species_df"
|
2229
|
-
" and compartments_df by names resulted in an "
|
2230
|
-
f"increase in the tables from {original_species.shape[0]}"
|
2231
|
-
f" to {merged_species.shape[0]} indicating that names were"
|
2232
|
-
" not unique"
|
2233
|
-
)
|
2234
|
-
|
2235
|
-
# check for missing species and compartments
|
2236
|
-
missing_compartments = merged_species[merged_species[SBML_DFS.C_ID].isna()][
|
2237
|
-
SBML_DFS.C_NAME
|
2238
|
-
].unique()
|
2239
|
-
if len(missing_compartments) >= 1:
|
2240
|
-
raise ValueError(
|
2241
|
-
f"{len(missing_compartments)} compartments were present in"
|
2242
|
-
' "interaction_edgelist" but not "compartments_df":'
|
2243
|
-
f" {', '.join(missing_compartments)}"
|
2244
|
-
)
|
2245
|
-
|
2246
|
-
missing_species = merged_species[merged_species[SBML_DFS.S_ID].isna()][
|
2247
|
-
SBML_DFS.S_NAME
|
2248
|
-
].unique()
|
2249
|
-
if len(missing_species) >= 1:
|
2250
|
-
raise ValueError(
|
2251
|
-
f"{len(missing_species)} species were present in "
|
2252
|
-
'"interaction_edgelist" but not "species_df":'
|
2253
|
-
f" {', '.join(missing_species)}"
|
2254
|
-
)
|
2255
|
-
|
2256
|
-
return None
|
2257
|
-
|
2258
|
-
|
2259
|
-
def _stub_compartments(
|
2260
|
-
stubbed_compartment: str = GENERIC_COMPARTMENT,
|
2261
|
-
) -> pd.DataFrame:
|
2262
|
-
"""Stub Compartments
|
2263
|
-
|
2264
|
-
Create a compartments table with only a single compartment
|
2265
|
-
|
2266
|
-
Args:
|
2267
|
-
stubbed_compartment (str): the name of a compartment which should match the
|
2268
|
-
keys in constants.COMPARTMENTS and constants.COMPARTMENTS_GO_TERMS
|
2269
|
-
|
2270
|
-
Returns:
|
2271
|
-
compartments_df (pd.DataFrame): compartments dataframe
|
2272
|
-
"""
|
2273
|
-
|
2274
|
-
if stubbed_compartment not in COMPARTMENT_ALIASES.keys():
|
2275
|
-
raise ValueError(
|
2276
|
-
f"{stubbed_compartment} is not defined in constants.COMPARTMENTS"
|
2277
|
-
)
|
2278
|
-
|
2279
|
-
if stubbed_compartment not in COMPARTMENTS_GO_TERMS.keys():
|
2280
|
-
raise ValueError(
|
2281
|
-
f"{stubbed_compartment} is not defined in constants.COMPARTMENTS_GO_TERMS"
|
2282
|
-
)
|
2283
|
-
|
2284
|
-
stubbed_compartment_id = COMPARTMENTS_GO_TERMS[stubbed_compartment]
|
2285
|
-
|
2286
|
-
formatted_uri = identifiers.format_uri(
|
2287
|
-
uri=identifiers.create_uri_url(
|
2288
|
-
ontology=ONTOLOGIES.GO,
|
2289
|
-
identifier=stubbed_compartment_id,
|
2290
|
-
),
|
2291
|
-
biological_qualifier_type=BQB.IS,
|
2292
|
-
)
|
2293
|
-
|
2294
|
-
compartments_df = pd.DataFrame(
|
2295
|
-
{
|
2296
|
-
SBML_DFS.C_NAME: [stubbed_compartment],
|
2297
|
-
SBML_DFS.C_IDENTIFIERS: [identifiers.Identifiers([formatted_uri])],
|
2298
|
-
}
|
2299
|
-
)
|
2300
|
-
compartments_df.index = sbml_dfs_utils.id_formatter([0], SBML_DFS.C_ID) # type: ignore
|
2301
|
-
compartments_df.index.name = SBML_DFS.C_ID
|
2302
|
-
|
2303
|
-
return compartments_df
|
2304
|
-
|
2305
|
-
|
2306
|
-
def _validate_matching_data(data_table: pd.DataFrame, ref_table: pd.DataFrame):
|
2307
|
-
"""Validates a table against a reference
|
2308
|
-
|
2309
|
-
This check if the table has the same index, no duplicates in the index
|
2310
|
-
and that all values in the index are in the reference table.
|
2311
|
-
|
2312
|
-
Args:
|
2313
|
-
data_table (pd.DataFrame): a table with data that should
|
2314
|
-
match the reference
|
2315
|
-
ref_table (pd.DataFrame): a reference table
|
2316
|
-
|
2317
|
-
Raises:
|
2318
|
-
ValueError: not same index name
|
2319
|
-
ValueError: index contains duplicates
|
2320
|
-
ValueError: index not subset of index of reactions table
|
2321
|
-
"""
|
2322
|
-
ref_index_name = ref_table.index.name
|
2323
|
-
if data_table.index.name != ref_index_name:
|
2324
|
-
raise ValueError(
|
2325
|
-
"the index name for reaction data table was not"
|
2326
|
-
f" {ref_index_name}: {data_table.index.name}"
|
2327
|
-
)
|
2328
|
-
ids = data_table.index
|
2329
|
-
if any(ids.duplicated()):
|
2330
|
-
raise ValueError(
|
2331
|
-
"the index for reaction data table " "contained duplicate values"
|
2332
|
-
)
|
2333
|
-
if not all(ids.isin(ref_table.index)):
|
2334
|
-
raise ValueError(
|
2335
|
-
"the index for reaction data table contained values"
|
2336
|
-
" not found in the reactions table"
|
2337
|
-
)
|
2338
|
-
if not isinstance(data_table, pd.DataFrame):
|
2339
|
-
raise TypeError(
|
2340
|
-
f"The data table was type {type(data_table).__name__}"
|
2341
|
-
" but must be a pd.DataFrame"
|
2342
|
-
)
|
2343
|
-
|
2344
|
-
|
2345
|
-
def species_type_types(x):
|
2346
|
-
"""Assign a high-level molecule type to a molecular species"""
|
2347
|
-
|
2348
|
-
if isinstance(x, identifiers.Identifiers):
|
2349
|
-
if x.filter(["chebi"]):
|
2350
|
-
return "metabolite"
|
2351
|
-
elif x.filter(["molodex"]):
|
2352
|
-
return "drug"
|
2353
|
-
else:
|
2354
|
-
return "protein"
|
2355
|
-
else:
|
2356
|
-
return "unknown"
|
2357
|
-
|
2358
|
-
|
2359
|
-
def stub_ids(ids):
|
2360
|
-
if len(ids) == 0:
|
2361
|
-
return pd.DataFrame(
|
2362
|
-
{
|
2363
|
-
IDENTIFIERS.ONTOLOGY: [None],
|
2364
|
-
IDENTIFIERS.IDENTIFIER: [None],
|
2365
|
-
IDENTIFIERS.URL: [None],
|
2366
|
-
IDENTIFIERS.BQB: [None],
|
2367
|
-
}
|
2368
|
-
)
|
2369
|
-
else:
|
2370
|
-
return pd.DataFrame(ids)
|
2371
|
-
|
2372
|
-
|
2373
|
-
def add_sbo_role(reaction_species: pd.DataFrame) -> pd.DataFrame:
|
2374
|
-
"""
|
2375
|
-
Add an sbo_role column to the reaction_species table.
|
2376
|
-
|
2377
|
-
The sbo_role column is a string column that contains the SBO role of the reaction species.
|
2378
|
-
The values in the sbo_role column are taken from the sbo_term column.
|
2379
|
-
|
2380
|
-
The sbo_role column is added to the reaction_species table by mapping the sbo_term column to the SBO_NAME_TO_ROLE dictionary.
|
2381
|
-
"""
|
2382
|
-
|
2383
|
-
validate_sbml_dfs_table(reaction_species, SBML_DFS.REACTION_SPECIES)
|
2384
|
-
|
2385
|
-
reaction_species = (
|
2386
|
-
reaction_species.assign(sbo_role=reaction_species[SBML_DFS.SBO_TERM])
|
2387
|
-
.replace({SBO_ROLES_DEFS.SBO_ROLE: MINI_SBO_TO_NAME})
|
2388
|
-
.replace({SBO_ROLES_DEFS.SBO_ROLE: SBO_NAME_TO_ROLE})
|
2389
|
-
)
|
2390
|
-
|
2391
|
-
undefined_roles = set(reaction_species[SBO_ROLES_DEFS.SBO_ROLE].unique()) - set(
|
2392
|
-
SBO_NAME_TO_ROLE.values()
|
2393
|
-
)
|
2394
|
-
if len(undefined_roles) > 0:
|
2395
|
-
logger.warning(
|
2396
|
-
f"The following SBO roles are not defined: {undefined_roles}. They will be treated as {SBO_ROLES_DEFS.OPTIONAL} when determining reaction operability."
|
2397
|
-
)
|
2398
|
-
mask = reaction_species[SBO_ROLES_DEFS.SBO_ROLE].isin(undefined_roles)
|
2399
|
-
reaction_species.loc[mask, SBO_ROLES_DEFS.SBO_ROLE] = SBO_ROLES_DEFS.OPTIONAL
|
2400
|
-
|
2401
|
-
return reaction_species
|
2402
|
-
|
2403
|
-
|
2404
|
-
def find_underspecified_reactions(
|
2405
|
-
reaction_species_w_roles: pd.DataFrame,
|
2406
|
-
) -> pd.DataFrame:
|
2407
|
-
|
2408
|
-
# check that both sbo_role and "new" are present
|
2409
|
-
if SBO_ROLES_DEFS.SBO_ROLE not in reaction_species_w_roles.columns:
|
2410
|
-
raise ValueError(
|
2411
|
-
"The sbo_role column is not present in the reaction_species_w_roles table. Please call add_sbo_role() first."
|
2412
|
-
)
|
2413
|
-
if "new" not in reaction_species_w_roles.columns:
|
2414
|
-
raise ValueError(
|
2415
|
-
"The new column is not present in the reaction_species_w_roles table. This should indicate what cspecies would be preserved in the reaction should it be preserved."
|
2416
|
-
)
|
2417
|
-
# check that new is a boolean column
|
2418
|
-
if reaction_species_w_roles["new"].dtype != bool:
|
2419
|
-
raise ValueError(
|
2420
|
-
"The new column is not a boolean column. Please ensure that the new column is a boolean column. This should indicate what cspecies would be preserved in the reaction should it be preserved."
|
2421
|
-
)
|
2422
|
-
|
2423
|
-
reactions_with_lost_defining_members = set(
|
2424
|
-
reaction_species_w_roles.query("~new")
|
2425
|
-
.query("sbo_role == 'DEFINING'")[SBML_DFS.R_ID]
|
2426
|
-
.tolist()
|
2427
|
-
)
|
2428
|
-
|
2429
|
-
N_reactions_with_lost_defining_members = len(reactions_with_lost_defining_members)
|
2430
|
-
if N_reactions_with_lost_defining_members > 0:
|
2431
|
-
logger.info(
|
2432
|
-
f"Removing {N_reactions_with_lost_defining_members} reactions which have lost at least one defining species"
|
2433
|
-
)
|
2043
|
+
N_reactions_with_lost_defining_members = len(reactions_with_lost_defining_members)
|
2044
|
+
if N_reactions_with_lost_defining_members > 0:
|
2045
|
+
logger.info(
|
2046
|
+
f"Removing {N_reactions_with_lost_defining_members} reactions which have lost at least one defining species"
|
2047
|
+
)
|
2434
2048
|
|
2435
2049
|
# find the cases where all "new" values for a given (r_id, sbo_term) are False
|
2436
2050
|
reactions_with_lost_requirements = set(
|
@@ -2595,3 +2209,594 @@ def _perform_sbml_dfs_table_validation(
|
|
2595
2209
|
# check for empty table
|
2596
2210
|
if table_data.shape[0] == 0:
|
2597
2211
|
raise ValueError(f"{table_name} contained no entries")
|
2212
|
+
|
2213
|
+
|
2214
|
+
def _filter_promiscuous_components(
|
2215
|
+
bqb_has_parts_species: pd.DataFrame, max_promiscuity: int
|
2216
|
+
) -> pd.DataFrame:
|
2217
|
+
|
2218
|
+
# number of complexes a species is part of
|
2219
|
+
n_complexes_involvedin = bqb_has_parts_species.value_counts(
|
2220
|
+
[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER]
|
2221
|
+
)
|
2222
|
+
promiscuous_component_identifiers_index = n_complexes_involvedin[
|
2223
|
+
n_complexes_involvedin > max_promiscuity
|
2224
|
+
].index
|
2225
|
+
promiscuous_component_identifiers = pd.Series(
|
2226
|
+
data=[True] * len(promiscuous_component_identifiers_index),
|
2227
|
+
index=promiscuous_component_identifiers_index,
|
2228
|
+
name="is_shared_component",
|
2229
|
+
dtype=bool,
|
2230
|
+
)
|
2231
|
+
|
2232
|
+
if len(promiscuous_component_identifiers) == 0:
|
2233
|
+
return bqb_has_parts_species
|
2234
|
+
|
2235
|
+
filtered_bqb_has_parts = bqb_has_parts_species.merge(
|
2236
|
+
promiscuous_component_identifiers,
|
2237
|
+
left_on=[IDENTIFIERS.ONTOLOGY, IDENTIFIERS.IDENTIFIER],
|
2238
|
+
right_index=True,
|
2239
|
+
how="left",
|
2240
|
+
)
|
2241
|
+
|
2242
|
+
filtered_bqb_has_parts["is_shared_component"] = (
|
2243
|
+
filtered_bqb_has_parts["is_shared_component"].astype("boolean").fillna(False)
|
2244
|
+
)
|
2245
|
+
# drop identifiers shared as components across many species
|
2246
|
+
filtered_bqb_has_parts = filtered_bqb_has_parts[
|
2247
|
+
~filtered_bqb_has_parts["is_shared_component"]
|
2248
|
+
].drop(["is_shared_component"], axis=1)
|
2249
|
+
|
2250
|
+
return filtered_bqb_has_parts
|
2251
|
+
|
2252
|
+
|
2253
|
+
def _edgelist_validate_inputs(
|
2254
|
+
interaction_edgelist: pd.DataFrame,
|
2255
|
+
species_df: pd.DataFrame,
|
2256
|
+
compartments_df: pd.DataFrame,
|
2257
|
+
) -> None:
|
2258
|
+
"""
|
2259
|
+
Validate input DataFrames have required columns.
|
2260
|
+
|
2261
|
+
Parameters
|
2262
|
+
----------
|
2263
|
+
interaction_edgelist : pd.DataFrame
|
2264
|
+
Interaction data to validate
|
2265
|
+
species_df : pd.DataFrame
|
2266
|
+
Species data to validate
|
2267
|
+
compartments_df : pd.DataFrame
|
2268
|
+
Compartments data to validate
|
2269
|
+
"""
|
2270
|
+
|
2271
|
+
# check compartments
|
2272
|
+
compartments_df_expected_vars = {SBML_DFS.C_NAME, SBML_DFS.C_IDENTIFIERS}
|
2273
|
+
compartments_df_columns = set(compartments_df.columns.tolist())
|
2274
|
+
missing_required_fields = compartments_df_expected_vars.difference(
|
2275
|
+
compartments_df_columns
|
2276
|
+
)
|
2277
|
+
if len(missing_required_fields) > 0:
|
2278
|
+
raise ValueError(
|
2279
|
+
f"{', '.join(missing_required_fields)} are required variables"
|
2280
|
+
' in "compartments_df" but were not present in the input file.'
|
2281
|
+
)
|
2282
|
+
|
2283
|
+
# check species
|
2284
|
+
species_df_expected_vars = {SBML_DFS.S_NAME, SBML_DFS.S_IDENTIFIERS}
|
2285
|
+
species_df_columns = set(species_df.columns.tolist())
|
2286
|
+
missing_required_fields = species_df_expected_vars.difference(species_df_columns)
|
2287
|
+
if len(missing_required_fields) > 0:
|
2288
|
+
raise ValueError(
|
2289
|
+
f"{', '.join(missing_required_fields)} are required"
|
2290
|
+
' variables in "species_df" but were not present '
|
2291
|
+
"in the input file."
|
2292
|
+
)
|
2293
|
+
|
2294
|
+
# check interactions
|
2295
|
+
interaction_edgelist_columns = set(interaction_edgelist.columns.tolist())
|
2296
|
+
missing_required_fields = INTERACTION_EDGELIST_EXPECTED_VARS.difference(
|
2297
|
+
interaction_edgelist_columns
|
2298
|
+
)
|
2299
|
+
if len(missing_required_fields) > 0:
|
2300
|
+
raise ValueError(
|
2301
|
+
f"{', '.join(missing_required_fields)} are required "
|
2302
|
+
'variables in "interaction_edgelist" but were not '
|
2303
|
+
"present in the input file."
|
2304
|
+
)
|
2305
|
+
|
2306
|
+
return None
|
2307
|
+
|
2308
|
+
|
2309
|
+
def _edgelist_identify_extra_columns(
|
2310
|
+
interaction_edgelist, species_df, keep_reactions_data, keep_species_data
|
2311
|
+
):
|
2312
|
+
"""
|
2313
|
+
Identify extra columns in input data that should be preserved.
|
2314
|
+
|
2315
|
+
Parameters
|
2316
|
+
----------
|
2317
|
+
interaction_edgelist : pd.DataFrame
|
2318
|
+
Interaction data containing potential extra columns
|
2319
|
+
species_df : pd.DataFrame
|
2320
|
+
Species data containing potential extra columns
|
2321
|
+
keep_reactions_data : bool or str
|
2322
|
+
Whether to keep extra reaction columns
|
2323
|
+
keep_species_data : bool or str
|
2324
|
+
Whether to keep extra species columns
|
2325
|
+
|
2326
|
+
Returns
|
2327
|
+
-------
|
2328
|
+
dict
|
2329
|
+
Dictionary with 'reactions' and 'species' keys containing lists of extra column names
|
2330
|
+
"""
|
2331
|
+
extra_reactions_columns = []
|
2332
|
+
extra_species_columns = []
|
2333
|
+
|
2334
|
+
if keep_reactions_data is not False:
|
2335
|
+
extra_reactions_columns = [
|
2336
|
+
c
|
2337
|
+
for c in interaction_edgelist.columns
|
2338
|
+
if c not in INTERACTION_EDGELIST_EXPECTED_VARS
|
2339
|
+
]
|
2340
|
+
|
2341
|
+
if keep_species_data is not False:
|
2342
|
+
extra_species_columns = [
|
2343
|
+
c
|
2344
|
+
for c in species_df.columns
|
2345
|
+
if c not in {SBML_DFS.S_NAME, SBML_DFS.S_IDENTIFIERS}
|
2346
|
+
]
|
2347
|
+
|
2348
|
+
return {"reactions": extra_reactions_columns, "species": extra_species_columns}
|
2349
|
+
|
2350
|
+
|
2351
|
+
def _edgelist_process_compartments(compartments_df, interaction_source):
|
2352
|
+
"""
|
2353
|
+
Format compartments DataFrame with source and ID columns.
|
2354
|
+
|
2355
|
+
Parameters
|
2356
|
+
----------
|
2357
|
+
compartments_df : pd.DataFrame
|
2358
|
+
Raw compartments data
|
2359
|
+
interaction_source : source.Source
|
2360
|
+
Source object to assign to compartments
|
2361
|
+
|
2362
|
+
Returns
|
2363
|
+
-------
|
2364
|
+
pd.DataFrame
|
2365
|
+
Processed compartments with IDs, indexed by compartment ID
|
2366
|
+
"""
|
2367
|
+
compartments = compartments_df.copy()
|
2368
|
+
compartments[SBML_DFS.C_SOURCE] = interaction_source
|
2369
|
+
compartments[SBML_DFS.C_ID] = sbml_dfs_utils.id_formatter(
|
2370
|
+
range(compartments.shape[0]), SBML_DFS.C_ID
|
2371
|
+
)
|
2372
|
+
return compartments.set_index(SBML_DFS.C_ID)[
|
2373
|
+
[SBML_DFS.C_NAME, SBML_DFS.C_IDENTIFIERS, SBML_DFS.C_SOURCE]
|
2374
|
+
]
|
2375
|
+
|
2376
|
+
|
2377
|
+
def _edgelist_process_species(species_df, interaction_source, extra_species_columns):
|
2378
|
+
"""
|
2379
|
+
Format species DataFrame and extract extra data.
|
2380
|
+
|
2381
|
+
Parameters
|
2382
|
+
----------
|
2383
|
+
species_df : pd.DataFrame
|
2384
|
+
Raw species data
|
2385
|
+
interaction_source : source.Source
|
2386
|
+
Source object to assign to species
|
2387
|
+
extra_species_columns : list
|
2388
|
+
Names of extra columns to preserve separately
|
2389
|
+
|
2390
|
+
Returns
|
2391
|
+
-------
|
2392
|
+
tuple of pd.DataFrame
|
2393
|
+
Processed species DataFrame and species extra data DataFrame
|
2394
|
+
"""
|
2395
|
+
species = species_df.copy()
|
2396
|
+
species[SBML_DFS.S_SOURCE] = interaction_source
|
2397
|
+
species[SBML_DFS.S_ID] = sbml_dfs_utils.id_formatter(
|
2398
|
+
range(species.shape[0]), SBML_DFS.S_ID
|
2399
|
+
)
|
2400
|
+
|
2401
|
+
required_cols = [SBML_DFS.S_NAME, SBML_DFS.S_IDENTIFIERS, SBML_DFS.S_SOURCE]
|
2402
|
+
species_indexed = species.set_index(SBML_DFS.S_ID)[
|
2403
|
+
required_cols + extra_species_columns
|
2404
|
+
]
|
2405
|
+
|
2406
|
+
# Separate extra data from main species table
|
2407
|
+
species_data = species_indexed[extra_species_columns]
|
2408
|
+
processed_species = species_indexed[required_cols]
|
2409
|
+
|
2410
|
+
return processed_species, species_data
|
2411
|
+
|
2412
|
+
|
2413
|
+
def _edgelist_create_compartmentalized_species(
|
2414
|
+
interaction_edgelist, species_df, compartments_df, interaction_source
|
2415
|
+
):
|
2416
|
+
"""
|
2417
|
+
Create compartmentalized species from interactions.
|
2418
|
+
|
2419
|
+
Parameters
|
2420
|
+
----------
|
2421
|
+
interaction_edgelist : pd.DataFrame
|
2422
|
+
Interaction data containing species-compartment combinations
|
2423
|
+
species_df : pd.DataFrame
|
2424
|
+
Processed species data with IDs
|
2425
|
+
compartments_df : pd.DataFrame
|
2426
|
+
Processed compartments data with IDs
|
2427
|
+
interaction_source : source.Source
|
2428
|
+
Source object to assign to compartmentalized species
|
2429
|
+
|
2430
|
+
Returns
|
2431
|
+
-------
|
2432
|
+
pd.DataFrame
|
2433
|
+
Compartmentalized species with formatted names and IDs
|
2434
|
+
"""
|
2435
|
+
# Get all distinct upstream and downstream compartmentalized species
|
2436
|
+
comp_species = pd.concat(
|
2437
|
+
[
|
2438
|
+
interaction_edgelist[["upstream_name", "upstream_compartment"]].rename(
|
2439
|
+
{
|
2440
|
+
"upstream_name": SBML_DFS.S_NAME,
|
2441
|
+
"upstream_compartment": SBML_DFS.C_NAME,
|
2442
|
+
},
|
2443
|
+
axis=1,
|
2444
|
+
),
|
2445
|
+
interaction_edgelist[["downstream_name", "downstream_compartment"]].rename(
|
2446
|
+
{
|
2447
|
+
"downstream_name": SBML_DFS.S_NAME,
|
2448
|
+
"downstream_compartment": SBML_DFS.C_NAME,
|
2449
|
+
},
|
2450
|
+
axis=1,
|
2451
|
+
),
|
2452
|
+
]
|
2453
|
+
).drop_duplicates()
|
2454
|
+
|
2455
|
+
# Add species and compartment IDs
|
2456
|
+
comp_species_w_ids = comp_species.merge(
|
2457
|
+
species_df[SBML_DFS.S_NAME].reset_index(), how="left", on=SBML_DFS.S_NAME
|
2458
|
+
).merge(
|
2459
|
+
compartments_df[SBML_DFS.C_NAME].reset_index(), how="left", on=SBML_DFS.C_NAME
|
2460
|
+
)
|
2461
|
+
|
2462
|
+
# Validate merge was successful
|
2463
|
+
_sbml_dfs_from_edgelist_check_cspecies_merge(comp_species_w_ids, comp_species)
|
2464
|
+
|
2465
|
+
# Format compartmentalized species with names, source, and IDs
|
2466
|
+
comp_species_w_ids[SBML_DFS.SC_NAME] = [
|
2467
|
+
f"{s} [{c}]"
|
2468
|
+
for s, c in zip(
|
2469
|
+
comp_species_w_ids[SBML_DFS.S_NAME], comp_species_w_ids[SBML_DFS.C_NAME]
|
2470
|
+
)
|
2471
|
+
]
|
2472
|
+
comp_species_w_ids[SBML_DFS.SC_SOURCE] = interaction_source
|
2473
|
+
comp_species_w_ids[SBML_DFS.SC_ID] = sbml_dfs_utils.id_formatter(
|
2474
|
+
range(comp_species_w_ids.shape[0]), SBML_DFS.SC_ID
|
2475
|
+
)
|
2476
|
+
|
2477
|
+
return comp_species_w_ids.set_index(SBML_DFS.SC_ID)[
|
2478
|
+
[SBML_DFS.SC_NAME, SBML_DFS.S_ID, SBML_DFS.C_ID, SBML_DFS.SC_SOURCE]
|
2479
|
+
]
|
2480
|
+
|
2481
|
+
|
2482
|
+
def _edgelist_create_reactions_and_species(
|
2483
|
+
interaction_edgelist,
|
2484
|
+
comp_species,
|
2485
|
+
species_df,
|
2486
|
+
compartments_df,
|
2487
|
+
interaction_source,
|
2488
|
+
upstream_stoichiometry,
|
2489
|
+
downstream_stoichiometry,
|
2490
|
+
downstream_sbo_name,
|
2491
|
+
extra_reactions_columns,
|
2492
|
+
):
|
2493
|
+
"""
|
2494
|
+
Create reactions and reaction species from interactions.
|
2495
|
+
|
2496
|
+
Parameters
|
2497
|
+
----------
|
2498
|
+
interaction_edgelist : pd.DataFrame
|
2499
|
+
Original interaction data
|
2500
|
+
comp_species : pd.DataFrame
|
2501
|
+
Compartmentalized species with IDs
|
2502
|
+
species_df : pd.DataFrame
|
2503
|
+
Processed species data with IDs
|
2504
|
+
compartments_df : pd.DataFrame
|
2505
|
+
Processed compartments data with IDs
|
2506
|
+
interaction_source : source.Source
|
2507
|
+
Source object for reactions
|
2508
|
+
upstream_stoichiometry : int
|
2509
|
+
Stoichiometry for upstream species
|
2510
|
+
downstream_stoichiometry : int
|
2511
|
+
Stoichiometry for downstream species
|
2512
|
+
downstream_sbo_name : str
|
2513
|
+
SBO term name for downstream species
|
2514
|
+
extra_reactions_columns : list
|
2515
|
+
Names of extra columns to preserve
|
2516
|
+
|
2517
|
+
Returns
|
2518
|
+
-------
|
2519
|
+
tuple
|
2520
|
+
(reactions_df, reaction_species_df, reactions_data)
|
2521
|
+
"""
|
2522
|
+
# Add compartmentalized species IDs to interactions
|
2523
|
+
comp_species_w_names = (
|
2524
|
+
comp_species.reset_index()
|
2525
|
+
.merge(species_df[SBML_DFS.S_NAME].reset_index())
|
2526
|
+
.merge(compartments_df[SBML_DFS.C_NAME].reset_index())
|
2527
|
+
)
|
2528
|
+
|
2529
|
+
interaction_w_cspecies = interaction_edgelist.merge(
|
2530
|
+
comp_species_w_names[[SBML_DFS.SC_ID, SBML_DFS.S_NAME, SBML_DFS.C_NAME]].rename(
|
2531
|
+
{
|
2532
|
+
SBML_DFS.SC_ID: "sc_id_up",
|
2533
|
+
SBML_DFS.S_NAME: "upstream_name",
|
2534
|
+
SBML_DFS.C_NAME: "upstream_compartment",
|
2535
|
+
},
|
2536
|
+
axis=1,
|
2537
|
+
),
|
2538
|
+
how="left",
|
2539
|
+
).merge(
|
2540
|
+
comp_species_w_names[[SBML_DFS.SC_ID, SBML_DFS.S_NAME, SBML_DFS.C_NAME]].rename(
|
2541
|
+
{
|
2542
|
+
SBML_DFS.SC_ID: "sc_id_down",
|
2543
|
+
SBML_DFS.S_NAME: "downstream_name",
|
2544
|
+
SBML_DFS.C_NAME: "downstream_compartment",
|
2545
|
+
},
|
2546
|
+
axis=1,
|
2547
|
+
),
|
2548
|
+
how="left",
|
2549
|
+
)[
|
2550
|
+
REQUIRED_REACTION_FROMEDGELIST_COLUMNS + extra_reactions_columns
|
2551
|
+
]
|
2552
|
+
|
2553
|
+
# Validate merge didn't create duplicates
|
2554
|
+
if interaction_edgelist.shape[0] != interaction_w_cspecies.shape[0]:
|
2555
|
+
raise ValueError(
|
2556
|
+
f"Merging compartmentalized species resulted in row count change "
|
2557
|
+
f"from {interaction_edgelist.shape[0]} to {interaction_w_cspecies.shape[0]}"
|
2558
|
+
)
|
2559
|
+
|
2560
|
+
# Create reaction IDs FIRST - before using them
|
2561
|
+
interaction_w_cspecies[SBML_DFS.R_ID] = sbml_dfs_utils.id_formatter(
|
2562
|
+
range(interaction_w_cspecies.shape[0]), SBML_DFS.R_ID
|
2563
|
+
)
|
2564
|
+
|
2565
|
+
# Create reactions DataFrame
|
2566
|
+
interactions_copy = interaction_w_cspecies.copy()
|
2567
|
+
interactions_copy[SBML_DFS.R_SOURCE] = interaction_source
|
2568
|
+
|
2569
|
+
reactions_columns = [
|
2570
|
+
SBML_DFS.R_NAME,
|
2571
|
+
SBML_DFS.R_IDENTIFIERS,
|
2572
|
+
SBML_DFS.R_SOURCE,
|
2573
|
+
SBML_DFS.R_ISREVERSIBLE,
|
2574
|
+
]
|
2575
|
+
|
2576
|
+
reactions_df = interactions_copy.set_index(SBML_DFS.R_ID)[
|
2577
|
+
reactions_columns + extra_reactions_columns
|
2578
|
+
]
|
2579
|
+
|
2580
|
+
# Separate extra data
|
2581
|
+
reactions_data = reactions_df[extra_reactions_columns]
|
2582
|
+
reactions_df = reactions_df[reactions_columns]
|
2583
|
+
|
2584
|
+
# Create reaction species relationships - NOW r_id exists
|
2585
|
+
reaction_species_df = pd.concat(
|
2586
|
+
[
|
2587
|
+
# Upstream species (modifiers/stimulators/inhibitors)
|
2588
|
+
interaction_w_cspecies[["sc_id_up", "sbo_term", SBML_DFS.R_ID]]
|
2589
|
+
.assign(stoichiometry=upstream_stoichiometry)
|
2590
|
+
.rename({"sc_id_up": "sc_id"}, axis=1),
|
2591
|
+
# Downstream species (products)
|
2592
|
+
interaction_w_cspecies[["sc_id_down", SBML_DFS.R_ID]]
|
2593
|
+
.assign(
|
2594
|
+
stoichiometry=downstream_stoichiometry,
|
2595
|
+
sbo_term=MINI_SBO_FROM_NAME[downstream_sbo_name],
|
2596
|
+
)
|
2597
|
+
.rename({"sc_id_down": "sc_id"}, axis=1),
|
2598
|
+
]
|
2599
|
+
)
|
2600
|
+
|
2601
|
+
reaction_species_df["rsc_id"] = sbml_dfs_utils.id_formatter(
|
2602
|
+
range(reaction_species_df.shape[0]), "rsc_id"
|
2603
|
+
)
|
2604
|
+
|
2605
|
+
reaction_species_df = reaction_species_df.set_index("rsc_id")
|
2606
|
+
|
2607
|
+
return reactions_df, reaction_species_df, reactions_data
|
2608
|
+
|
2609
|
+
|
2610
|
+
def _edgelist_assemble_sbml_model(
|
2611
|
+
compartments,
|
2612
|
+
species,
|
2613
|
+
comp_species,
|
2614
|
+
reactions,
|
2615
|
+
reaction_species,
|
2616
|
+
species_data,
|
2617
|
+
reactions_data,
|
2618
|
+
keep_species_data,
|
2619
|
+
keep_reactions_data,
|
2620
|
+
extra_columns,
|
2621
|
+
):
|
2622
|
+
"""
|
2623
|
+
Assemble the final SBML_dfs object.
|
2624
|
+
|
2625
|
+
Parameters
|
2626
|
+
----------
|
2627
|
+
compartments : pd.DataFrame
|
2628
|
+
Processed compartments data
|
2629
|
+
species : pd.DataFrame
|
2630
|
+
Processed species data
|
2631
|
+
comp_species : pd.DataFrame
|
2632
|
+
Compartmentalized species data
|
2633
|
+
reactions : pd.DataFrame
|
2634
|
+
Reactions data
|
2635
|
+
reaction_species : pd.DataFrame
|
2636
|
+
Reaction species relationships
|
2637
|
+
species_data : pd.DataFrame
|
2638
|
+
Extra species data to include
|
2639
|
+
reactions_data : pd.DataFrame
|
2640
|
+
Extra reactions data to include
|
2641
|
+
keep_species_data : bool or str
|
2642
|
+
Label for species extra data
|
2643
|
+
keep_reactions_data : bool or str
|
2644
|
+
Label for reactions extra data
|
2645
|
+
extra_columns : dict
|
2646
|
+
Dictionary containing lists of extra column names
|
2647
|
+
|
2648
|
+
Returns
|
2649
|
+
-------
|
2650
|
+
SBML_dfs
|
2651
|
+
Validated SBML data structure
|
2652
|
+
"""
|
2653
|
+
sbml_tbl_dict = {
|
2654
|
+
"compartments": compartments,
|
2655
|
+
"species": species,
|
2656
|
+
"compartmentalized_species": comp_species,
|
2657
|
+
"reactions": reactions,
|
2658
|
+
"reaction_species": reaction_species,
|
2659
|
+
}
|
2660
|
+
|
2661
|
+
# Add extra data if requested
|
2662
|
+
if len(extra_columns["reactions"]) > 0:
|
2663
|
+
data_label = (
|
2664
|
+
keep_reactions_data if isinstance(keep_reactions_data, str) else "source"
|
2665
|
+
)
|
2666
|
+
sbml_tbl_dict["reactions_data"] = {data_label: reactions_data}
|
2667
|
+
|
2668
|
+
if len(extra_columns["species"]) > 0:
|
2669
|
+
data_label = (
|
2670
|
+
keep_species_data if isinstance(keep_species_data, str) else "source"
|
2671
|
+
)
|
2672
|
+
sbml_tbl_dict["species_data"] = {data_label: species_data}
|
2673
|
+
|
2674
|
+
sbml_model = SBML_dfs(sbml_tbl_dict)
|
2675
|
+
sbml_model.validate()
|
2676
|
+
|
2677
|
+
return sbml_model
|
2678
|
+
|
2679
|
+
|
2680
|
+
def _sbml_dfs_from_edgelist_check_cspecies_merge(
|
2681
|
+
merged_species: pd.DataFrame, original_species: pd.DataFrame
|
2682
|
+
) -> None:
|
2683
|
+
"""Check for a mismatch between the provided species data and species implied by the edgelist."""
|
2684
|
+
|
2685
|
+
# check for 1-many merge
|
2686
|
+
if merged_species.shape[0] != original_species.shape[0]:
|
2687
|
+
raise ValueError(
|
2688
|
+
"Merging compartmentalized species to species_df"
|
2689
|
+
" and compartments_df by names resulted in an "
|
2690
|
+
f"increase in the tables from {original_species.shape[0]}"
|
2691
|
+
f" to {merged_species.shape[0]} indicating that names were"
|
2692
|
+
" not unique"
|
2693
|
+
)
|
2694
|
+
|
2695
|
+
# check for missing species and compartments
|
2696
|
+
missing_compartments = merged_species[merged_species[SBML_DFS.C_ID].isna()][
|
2697
|
+
SBML_DFS.C_NAME
|
2698
|
+
].unique()
|
2699
|
+
if len(missing_compartments) >= 1:
|
2700
|
+
raise ValueError(
|
2701
|
+
f"{len(missing_compartments)} compartments were present in"
|
2702
|
+
' "interaction_edgelist" but not "compartments_df":'
|
2703
|
+
f" {', '.join(missing_compartments)}"
|
2704
|
+
)
|
2705
|
+
|
2706
|
+
missing_species = merged_species[merged_species[SBML_DFS.S_ID].isna()][
|
2707
|
+
SBML_DFS.S_NAME
|
2708
|
+
].unique()
|
2709
|
+
if len(missing_species) >= 1:
|
2710
|
+
raise ValueError(
|
2711
|
+
f"{len(missing_species)} species were present in "
|
2712
|
+
'"interaction_edgelist" but not "species_df":'
|
2713
|
+
f" {', '.join(missing_species)}"
|
2714
|
+
)
|
2715
|
+
|
2716
|
+
return None
|
2717
|
+
|
2718
|
+
|
2719
|
+
def _stub_compartments(
|
2720
|
+
stubbed_compartment: str = GENERIC_COMPARTMENT,
|
2721
|
+
) -> pd.DataFrame:
|
2722
|
+
"""Stub Compartments
|
2723
|
+
|
2724
|
+
Create a compartments table with only a single compartment
|
2725
|
+
|
2726
|
+
Args:
|
2727
|
+
stubbed_compartment (str): the name of a compartment which should match the
|
2728
|
+
keys in constants.COMPARTMENTS and constants.COMPARTMENTS_GO_TERMS
|
2729
|
+
|
2730
|
+
Returns:
|
2731
|
+
compartments_df (pd.DataFrame): compartments dataframe
|
2732
|
+
"""
|
2733
|
+
|
2734
|
+
if stubbed_compartment not in COMPARTMENT_ALIASES.keys():
|
2735
|
+
raise ValueError(
|
2736
|
+
f"{stubbed_compartment} is not defined in constants.COMPARTMENTS"
|
2737
|
+
)
|
2738
|
+
|
2739
|
+
if stubbed_compartment not in COMPARTMENTS_GO_TERMS.keys():
|
2740
|
+
raise ValueError(
|
2741
|
+
f"{stubbed_compartment} is not defined in constants.COMPARTMENTS_GO_TERMS"
|
2742
|
+
)
|
2743
|
+
|
2744
|
+
stubbed_compartment_id = COMPARTMENTS_GO_TERMS[stubbed_compartment]
|
2745
|
+
|
2746
|
+
formatted_uri = identifiers.format_uri(
|
2747
|
+
uri=identifiers.create_uri_url(
|
2748
|
+
ontology=ONTOLOGIES.GO,
|
2749
|
+
identifier=stubbed_compartment_id,
|
2750
|
+
),
|
2751
|
+
biological_qualifier_type=BQB.IS,
|
2752
|
+
)
|
2753
|
+
|
2754
|
+
compartments_df = pd.DataFrame(
|
2755
|
+
{
|
2756
|
+
SBML_DFS.C_NAME: [stubbed_compartment],
|
2757
|
+
SBML_DFS.C_IDENTIFIERS: [identifiers.Identifiers([formatted_uri])],
|
2758
|
+
}
|
2759
|
+
)
|
2760
|
+
compartments_df.index = sbml_dfs_utils.id_formatter([0], SBML_DFS.C_ID) # type: ignore
|
2761
|
+
compartments_df.index.name = SBML_DFS.C_ID
|
2762
|
+
|
2763
|
+
return compartments_df
|
2764
|
+
|
2765
|
+
|
2766
|
+
def _validate_matching_data(data_table: pd.DataFrame, ref_table: pd.DataFrame):
|
2767
|
+
"""Validates a table against a reference
|
2768
|
+
|
2769
|
+
This check if the table has the same index, no duplicates in the index
|
2770
|
+
and that all values in the index are in the reference table.
|
2771
|
+
|
2772
|
+
Args:
|
2773
|
+
data_table (pd.DataFrame): a table with data that should
|
2774
|
+
match the reference
|
2775
|
+
ref_table (pd.DataFrame): a reference table
|
2776
|
+
|
2777
|
+
Raises:
|
2778
|
+
ValueError: not same index name
|
2779
|
+
ValueError: index contains duplicates
|
2780
|
+
ValueError: index not subset of index of reactions table
|
2781
|
+
"""
|
2782
|
+
ref_index_name = ref_table.index.name
|
2783
|
+
if data_table.index.name != ref_index_name:
|
2784
|
+
raise ValueError(
|
2785
|
+
"the index name for reaction data table was not"
|
2786
|
+
f" {ref_index_name}: {data_table.index.name}"
|
2787
|
+
)
|
2788
|
+
ids = data_table.index
|
2789
|
+
if any(ids.duplicated()):
|
2790
|
+
raise ValueError(
|
2791
|
+
"the index for reaction data table " "contained duplicate values"
|
2792
|
+
)
|
2793
|
+
if not all(ids.isin(ref_table.index)):
|
2794
|
+
raise ValueError(
|
2795
|
+
"the index for reaction data table contained values"
|
2796
|
+
" not found in the reactions table"
|
2797
|
+
)
|
2798
|
+
if not isinstance(data_table, pd.DataFrame):
|
2799
|
+
raise TypeError(
|
2800
|
+
f"The data table was type {type(data_table).__name__}"
|
2801
|
+
" but must be a pd.DataFrame"
|
2802
|
+
)
|