napistu 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- napistu/consensus.py +3 -4
- napistu/ingestion/constants.py +51 -0
- napistu/ingestion/reactom_fi.py +208 -0
- napistu/network/ng_utils.py +26 -6
- napistu/network/precompute.py +56 -0
- napistu/sbml_dfs_utils.py +8 -2
- napistu/source.py +243 -40
- napistu/statistics/hypothesis_testing.py +66 -0
- napistu/utils.py +23 -1
- {napistu-0.4.2.dist-info → napistu-0.4.3.dist-info}/METADATA +1 -1
- {napistu-0.4.2.dist-info → napistu-0.4.3.dist-info}/RECORD +19 -17
- tests/test_network_precompute.py +30 -0
- tests/test_sbml_dfs_utils.py +13 -0
- tests/test_source.py +38 -6
- tests/test_statistics_hypothesis_testing.py +62 -0
- tests/test_set_coverage.py +0 -50
- {napistu-0.4.2.dist-info → napistu-0.4.3.dist-info}/WHEEL +0 -0
- {napistu-0.4.2.dist-info → napistu-0.4.3.dist-info}/entry_points.txt +0 -0
- {napistu-0.4.2.dist-info → napistu-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {napistu-0.4.2.dist-info → napistu-0.4.3.dist-info}/top_level.txt +0 -0
napistu/consensus.py
CHANGED
@@ -426,7 +426,7 @@ def post_consensus_species_ontology_check(sbml_dfs: sbml_dfs_core.SBML_dfs) -> s
|
|
426
426
|
|
427
427
|
# get the sources of species in the consensus model
|
428
428
|
consensus_sbmldf_tbl_var_sc = (
|
429
|
-
source.unnest_sources(sbml_dfs.species,
|
429
|
+
source.unnest_sources(sbml_dfs.species, verbose=False)
|
430
430
|
.reset_index()
|
431
431
|
.sort_values([SOURCE_SPEC.NAME])
|
432
432
|
)
|
@@ -504,12 +504,11 @@ def post_consensus_source_check(
|
|
504
504
|
) -> pd.DataFrame:
|
505
505
|
"""Provide sources of tables in a consensus model; the output df will be used to determine whether models are merged."""
|
506
506
|
|
507
|
-
|
508
|
-
table_pk = sbml_dfs.schema[table_name]["pk"]
|
507
|
+
table_pk = sbml_dfs.schema[table_name][SCHEMA_DEFS.PK]
|
509
508
|
|
510
509
|
sbml_dfs_tbl = getattr(sbml_dfs, table_name)
|
511
510
|
sbml_dfs_tbl_pathway_source = (
|
512
|
-
source.unnest_sources(sbml_dfs_tbl,
|
511
|
+
source.unnest_sources(sbml_dfs_tbl, verbose=False)
|
513
512
|
.reset_index()
|
514
513
|
.sort_values(["name"])
|
515
514
|
)
|
napistu/ingestion/constants.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
|
4
4
|
from types import SimpleNamespace
|
5
5
|
|
6
|
+
from napistu.constants import SBOTERM_NAMES
|
6
7
|
|
7
8
|
SPECIES_FULL_NAME_HUMAN = "Homo sapiens"
|
8
9
|
SPECIES_FULL_NAME_MOUSE = "Mus musculus"
|
@@ -90,6 +91,56 @@ REACTOME_PATHWAYS_URL = "https://reactome.org/download/current/ReactomePathways.
|
|
90
91
|
REACTOME_PATHWAY_INDEX_COLUMNS = ["file", "source", "species", "pathway_id", "name"]
|
91
92
|
REACTOME_PATHWAY_LIST_COLUMNS = ["pathway_id", "name", "species"]
|
92
93
|
|
94
|
+
# REACTOME FI
|
95
|
+
REACTOME_FI_URL = "http://cpws.reactome.org/caBigR3WebApp2025/FIsInGene_04142025_with_annotations.txt.zip"
|
96
|
+
|
97
|
+
REACTOME_FI = SimpleNamespace(
|
98
|
+
GENE1="Gene1",
|
99
|
+
GENE2="Gene2",
|
100
|
+
ANNOTATION="Annotation",
|
101
|
+
DIRECTION="Direction",
|
102
|
+
SCORE="Score",
|
103
|
+
)
|
104
|
+
|
105
|
+
REACTOME_FI_DIRECTIONS = SimpleNamespace(
|
106
|
+
UNDIRECTED="-",
|
107
|
+
STIMULATED_BY="<-",
|
108
|
+
STIMULATES="->",
|
109
|
+
STIMULATES_AND_STIMULATED_BY="<->",
|
110
|
+
INHIBITED_BY="|-",
|
111
|
+
INHIBITS="-|",
|
112
|
+
INHIBITS_AND_INHIBITED_BY="|-|",
|
113
|
+
STIMULATES_AND_INHIBITED_BY="|->",
|
114
|
+
INHIBITS_AND_STIMULATED_BY="<-|",
|
115
|
+
)
|
116
|
+
|
117
|
+
VALID_REACTOME_FI_DIRECTIONS = REACTOME_FI_DIRECTIONS.__dict__.values()
|
118
|
+
|
119
|
+
REACTOME_FI_RULES_REVERSE = SimpleNamespace(
|
120
|
+
NAME_RULES={"catalyzed by": SBOTERM_NAMES.CATALYST},
|
121
|
+
DIRECTION_RULES={
|
122
|
+
REACTOME_FI_DIRECTIONS.STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
|
123
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
|
124
|
+
REACTOME_FI_DIRECTIONS.INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
125
|
+
REACTOME_FI_DIRECTIONS.INHIBITS_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
126
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
127
|
+
REACTOME_FI_DIRECTIONS.UNDIRECTED: SBOTERM_NAMES.INTERACTOR,
|
128
|
+
},
|
129
|
+
)
|
130
|
+
|
131
|
+
REACTOME_FI_RULES_FORWARD = SimpleNamespace(
|
132
|
+
NAME_RULES={"catalyze(;$)": SBOTERM_NAMES.CATALYST},
|
133
|
+
DIRECTION_RULES={
|
134
|
+
REACTOME_FI_DIRECTIONS.STIMULATES: SBOTERM_NAMES.STIMULATOR,
|
135
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_STIMULATED_BY: SBOTERM_NAMES.STIMULATOR,
|
136
|
+
REACTOME_FI_DIRECTIONS.STIMULATES_AND_INHIBITED_BY: SBOTERM_NAMES.STIMULATOR,
|
137
|
+
REACTOME_FI_DIRECTIONS.INHIBITS: SBOTERM_NAMES.INHIBITOR,
|
138
|
+
REACTOME_FI_DIRECTIONS.INHIBITS_AND_INHIBITED_BY: SBOTERM_NAMES.INHIBITOR,
|
139
|
+
REACTOME_FI_DIRECTIONS.INHIBITS_AND_STIMULATED_BY: SBOTERM_NAMES.INHIBITOR,
|
140
|
+
REACTOME_FI_DIRECTIONS.UNDIRECTED: SBOTERM_NAMES.INTERACTOR,
|
141
|
+
},
|
142
|
+
)
|
143
|
+
|
93
144
|
# SBML
|
94
145
|
SBML_DEFS = SimpleNamespace(
|
95
146
|
ERROR_NUMBER="error_number",
|
@@ -0,0 +1,208 @@
|
|
1
|
+
import logging
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
from napistu.identifiers import Identifiers
|
5
|
+
from napistu import utils
|
6
|
+
from napistu.ingestion.constants import (
|
7
|
+
REACTOME_FI,
|
8
|
+
REACTOME_FI_RULES_FORWARD,
|
9
|
+
REACTOME_FI_RULES_REVERSE,
|
10
|
+
REACTOME_FI_URL,
|
11
|
+
VALID_REACTOME_FI_DIRECTIONS,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
def download_reactome_fi(target_uri: str, url: str = REACTOME_FI_URL) -> None:
|
19
|
+
"""
|
20
|
+
Download the Reactome Functional Interactions (FI) dataset as a TSV file.
|
21
|
+
|
22
|
+
Parameters
|
23
|
+
----------
|
24
|
+
target_uri : str
|
25
|
+
The URI where the Reactome FI data should be saved. Should end with .tsv
|
26
|
+
url : str, optional
|
27
|
+
URL to download the zipped Reactome functional interactions TSV from.
|
28
|
+
Defaults to REACTOME_FI_URL.
|
29
|
+
|
30
|
+
Returns
|
31
|
+
-------
|
32
|
+
None
|
33
|
+
|
34
|
+
Raises
|
35
|
+
------
|
36
|
+
ValueError
|
37
|
+
If target_uri does not end with .tsv
|
38
|
+
"""
|
39
|
+
|
40
|
+
if not target_uri.endswith(".tsv"):
|
41
|
+
raise ValueError(f"Target URI must end with .tsv, got {target_uri}")
|
42
|
+
|
43
|
+
file_ext = url.split(".")[-1]
|
44
|
+
target_filename = url.split("/")[-1].split(f".{file_ext}")[0]
|
45
|
+
logger.info("Start downloading proteinatlas %s to %s", url, target_uri)
|
46
|
+
# target_filename is the name of the file in the zip file which will be renamed to target_uri
|
47
|
+
utils.download_wget(url, target_uri, target_filename=target_filename)
|
48
|
+
|
49
|
+
return None
|
50
|
+
|
51
|
+
|
52
|
+
def format_reactome_fi_edgelist(interactions: pd.DataFrame):
|
53
|
+
"""
|
54
|
+
Format the Reactome FI interactions DataFrame as an edgelist for network analysis.
|
55
|
+
|
56
|
+
Parameters
|
57
|
+
----------
|
58
|
+
interactions : pd.DataFrame
|
59
|
+
DataFrame containing Reactome FI interactions.
|
60
|
+
|
61
|
+
Returns
|
62
|
+
-------
|
63
|
+
Dictonary of:
|
64
|
+
|
65
|
+
interaction_edgelist : pd.DataFrame
|
66
|
+
Table containing molecular interactions with columns:
|
67
|
+
- upstream_name : str, matches "s_name" from species_df
|
68
|
+
- downstream_name : str, matches "s_name" from species_df
|
69
|
+
- upstream_compartment : str, matches "c_name" from compartments_df
|
70
|
+
- downstream_compartment : str, matches "c_name" from compartments_df
|
71
|
+
- r_name : str, name for the interaction
|
72
|
+
- sbo_term : str, SBO term defining interaction type
|
73
|
+
- r_Identifiers : identifiers.Identifiers, supporting identifiers
|
74
|
+
- r_isreversible : bool, whether reaction is reversible
|
75
|
+
species_df : pd.DataFrame
|
76
|
+
Table defining molecular species with columns:
|
77
|
+
- s_name : str, name of molecular species
|
78
|
+
- s_Identifiers : identifiers.Identifiers, species identifiers
|
79
|
+
compartments_df : pd.DataFrame
|
80
|
+
Table defining compartments with columns:
|
81
|
+
- c_name : str, name of compartment
|
82
|
+
- c_Identifiers : identifiers.Identifiers, compartment identifiers
|
83
|
+
|
84
|
+
Notes
|
85
|
+
-----
|
86
|
+
This function is not yet implemented and will raise NotImplementedError.
|
87
|
+
"""
|
88
|
+
|
89
|
+
raise NotImplementedError("TO DO - This function is incomplete")
|
90
|
+
|
91
|
+
formatted_annotations = _parse_reactome_fi_annotations(interactions)
|
92
|
+
|
93
|
+
# this join will expand some rows to 2 since the bidirectional relationships are captured as separate edges in Napistu
|
94
|
+
annotated_interactions = interactions.merge(
|
95
|
+
formatted_annotations,
|
96
|
+
on=[REACTOME_FI.ANNOTATION, REACTOME_FI.DIRECTION],
|
97
|
+
how="left",
|
98
|
+
)
|
99
|
+
|
100
|
+
# flip reverse entries so all relationships are forward or undirected
|
101
|
+
formatted_interactions = (
|
102
|
+
pd.concat(
|
103
|
+
[
|
104
|
+
annotated_interactions.query("polarity == 'forward'"),
|
105
|
+
(
|
106
|
+
annotated_interactions.query("polarity == 'reverse'").rename(
|
107
|
+
columns={
|
108
|
+
REACTOME_FI.GENE1: REACTOME_FI.GENE2,
|
109
|
+
REACTOME_FI.GENE2: REACTOME_FI.GENE1,
|
110
|
+
}
|
111
|
+
)
|
112
|
+
),
|
113
|
+
]
|
114
|
+
)[[REACTOME_FI.GENE1, REACTOME_FI.GENE2, "sbo_term_name", "Score"]]
|
115
|
+
# looks like they were already unique edges
|
116
|
+
.sort_values("Score", ascending=False)
|
117
|
+
.groupby([REACTOME_FI.GENE1, REACTOME_FI.GENE2])
|
118
|
+
.first()
|
119
|
+
)
|
120
|
+
|
121
|
+
fi_edgelist = (
|
122
|
+
formatted_interactions.reset_index()
|
123
|
+
.rename(
|
124
|
+
columns={
|
125
|
+
REACTOME_FI.GENE1: "upstream_name",
|
126
|
+
REACTOME_FI.GENE2: "downstream_name",
|
127
|
+
}
|
128
|
+
)
|
129
|
+
.assign(r_Identifiers=Identifiers([]))
|
130
|
+
)
|
131
|
+
|
132
|
+
return fi_edgelist
|
133
|
+
|
134
|
+
|
135
|
+
def _parse_reactome_fi_annotations(interactions: pd.DataFrame) -> pd.DataFrame:
|
136
|
+
"""
|
137
|
+
Parse and annotate Reactome FI interaction types and directions using regex-based rules.
|
138
|
+
|
139
|
+
Parameters
|
140
|
+
----------
|
141
|
+
interactions : pd.DataFrame
|
142
|
+
DataFrame containing Reactome FI interactions, with annotation and direction columns.
|
143
|
+
|
144
|
+
Returns
|
145
|
+
-------
|
146
|
+
pd.DataFrame
|
147
|
+
DataFrame with annotation, direction, SBO term name, and polarity for each unique annotation/direction pair.
|
148
|
+
|
149
|
+
Raises
|
150
|
+
------
|
151
|
+
ValueError
|
152
|
+
If an annotation/direction pair cannot be matched to a rule or if invalid directions are found.
|
153
|
+
"""
|
154
|
+
|
155
|
+
distinct_annotations = (
|
156
|
+
interactions[[REACTOME_FI.ANNOTATION, REACTOME_FI.DIRECTION]]
|
157
|
+
.drop_duplicates()
|
158
|
+
.reset_index(drop=True)
|
159
|
+
)
|
160
|
+
invalid_directions = distinct_annotations.loc[
|
161
|
+
~distinct_annotations[REACTOME_FI.DIRECTION].isin(VALID_REACTOME_FI_DIRECTIONS),
|
162
|
+
"Direction",
|
163
|
+
]
|
164
|
+
if len(invalid_directions) > 0:
|
165
|
+
raise ValueError(f"Invalid directions: {invalid_directions}")
|
166
|
+
|
167
|
+
annotations = list()
|
168
|
+
for _, vals in distinct_annotations.iterrows():
|
169
|
+
annot, direction = vals
|
170
|
+
|
171
|
+
forward_match = utils.match_regex_dict(
|
172
|
+
annot, REACTOME_FI_RULES_FORWARD.NAME_RULES
|
173
|
+
)
|
174
|
+
if not forward_match:
|
175
|
+
if direction in REACTOME_FI_RULES_FORWARD.DIRECTION_RULES:
|
176
|
+
forward_match = REACTOME_FI_RULES_FORWARD.DIRECTION_RULES[direction]
|
177
|
+
|
178
|
+
reverse_match = utils.match_regex_dict(
|
179
|
+
annot, REACTOME_FI_RULES_REVERSE.NAME_RULES
|
180
|
+
)
|
181
|
+
if not reverse_match:
|
182
|
+
if direction in REACTOME_FI_RULES_REVERSE.DIRECTION_RULES:
|
183
|
+
reverse_match = REACTOME_FI_RULES_REVERSE.DIRECTION_RULES[direction]
|
184
|
+
|
185
|
+
if not (forward_match or reverse_match):
|
186
|
+
raise ValueError(f"No match found for {annot} with direction {direction}")
|
187
|
+
|
188
|
+
if forward_match:
|
189
|
+
annotations.append(
|
190
|
+
{
|
191
|
+
REACTOME_FI.ANNOTATION: annot,
|
192
|
+
REACTOME_FI.DIRECTION: direction,
|
193
|
+
"sbo_term_name": forward_match,
|
194
|
+
"polarity": "forward",
|
195
|
+
}
|
196
|
+
)
|
197
|
+
|
198
|
+
if reverse_match:
|
199
|
+
annotations.append(
|
200
|
+
{
|
201
|
+
REACTOME_FI.ANNOTATION: annot,
|
202
|
+
REACTOME_FI.DIRECTION: direction,
|
203
|
+
"sbo_term_name": reverse_match,
|
204
|
+
"polarity": "reverse",
|
205
|
+
}
|
206
|
+
)
|
207
|
+
|
208
|
+
return pd.DataFrame(annotations)
|
napistu/network/ng_utils.py
CHANGED
@@ -66,7 +66,7 @@ def compartmentalize_species_pairs(
|
|
66
66
|
Compartmentalize Shortest Paths
|
67
67
|
|
68
68
|
For a set of origin and destination species pairs, consider each species in every
|
69
|
-
|
69
|
+
compartment it operates in, seperately.
|
70
70
|
|
71
71
|
Parameters
|
72
72
|
----------
|
@@ -112,22 +112,42 @@ def compartmentalize_species_pairs(
|
|
112
112
|
|
113
113
|
|
114
114
|
def get_minimal_sources_edges(
|
115
|
-
vertices: pd.DataFrame,
|
115
|
+
vertices: pd.DataFrame,
|
116
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs,
|
117
|
+
source_total_counts: Optional[pd.Series] = None,
|
116
118
|
) -> pd.DataFrame | None:
|
117
|
-
"""
|
119
|
+
"""
|
120
|
+
Assign edges to a set of sources.
|
121
|
+
|
122
|
+
Parameters
|
123
|
+
----------
|
124
|
+
vertices: pd.DataFrame
|
125
|
+
A table of vertices.
|
126
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs
|
127
|
+
A pathway model
|
128
|
+
source_total_counts: pd.Series
|
129
|
+
A series of the total counts of each source.
|
130
|
+
|
131
|
+
Returns
|
132
|
+
-------
|
133
|
+
edge_sources: pd.DataFrame
|
134
|
+
A table of edges and the sources they are assigned to.
|
135
|
+
"""
|
136
|
+
|
118
137
|
nodes = vertices["node"].tolist()
|
119
138
|
present_reactions = sbml_dfs.reactions[sbml_dfs.reactions.index.isin(nodes)]
|
120
139
|
|
121
140
|
if len(present_reactions) == 0:
|
122
141
|
return None
|
123
142
|
|
124
|
-
|
125
|
-
source_df = source.unnest_sources(present_reactions, table_schema["source"])
|
143
|
+
source_df = source.unnest_sources(present_reactions)
|
126
144
|
|
127
145
|
if source_df is None:
|
128
146
|
return None
|
129
147
|
else:
|
130
|
-
edge_sources = source.
|
148
|
+
edge_sources = source.source_set_coverage(
|
149
|
+
source_df, source_total_counts, sbml_dfs
|
150
|
+
)
|
131
151
|
return edge_sources.reset_index()[
|
132
152
|
[SBML_DFS.R_ID, SOURCE_SPEC.PATHWAY_ID, SOURCE_SPEC.NAME]
|
133
153
|
]
|
napistu/network/precompute.py
CHANGED
@@ -110,6 +110,62 @@ def precompute_distances(
|
|
110
110
|
return filtered_precomputed_distances
|
111
111
|
|
112
112
|
|
113
|
+
def filter_precomputed_distances_top_n(precomputed_distances, top_n=50):
|
114
|
+
"""
|
115
|
+
Filter precomputed distances to only include the top-n pairs for each distance measure.
|
116
|
+
|
117
|
+
Parameters
|
118
|
+
----------
|
119
|
+
precomputed_distances : pd.DataFrame
|
120
|
+
Precomputed distances.
|
121
|
+
top_n : int, optional
|
122
|
+
Top-n pairs to include for each distance measure.
|
123
|
+
|
124
|
+
Returns
|
125
|
+
-------
|
126
|
+
pd.DataFrame
|
127
|
+
Filtered precomputed distances.
|
128
|
+
"""
|
129
|
+
|
130
|
+
# take the union of top-n for each distance measure; and from origin -> dest and dest -> origin
|
131
|
+
distance_vars = set(precomputed_distances.columns) - {
|
132
|
+
NAPISTU_EDGELIST.SC_ID_ORIGIN,
|
133
|
+
NAPISTU_EDGELIST.SC_ID_DEST,
|
134
|
+
}
|
135
|
+
|
136
|
+
valid_pairs = list()
|
137
|
+
for distance_var in distance_vars:
|
138
|
+
top_n_pairs_by_origin = (
|
139
|
+
precomputed_distances.sort_values(by=distance_var, ascending=False)
|
140
|
+
.groupby(NAPISTU_EDGELIST.SC_ID_ORIGIN)
|
141
|
+
.head(top_n)
|
142
|
+
)
|
143
|
+
top_n_pairs_by_dest = (
|
144
|
+
precomputed_distances.sort_values(by=distance_var, ascending=False)
|
145
|
+
.groupby(NAPISTU_EDGELIST.SC_ID_DEST)
|
146
|
+
.head(top_n)
|
147
|
+
)
|
148
|
+
|
149
|
+
valid_pairs.append(
|
150
|
+
top_n_pairs_by_origin[
|
151
|
+
[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
|
152
|
+
]
|
153
|
+
)
|
154
|
+
valid_pairs.append(
|
155
|
+
top_n_pairs_by_dest[
|
156
|
+
[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST]
|
157
|
+
]
|
158
|
+
)
|
159
|
+
|
160
|
+
all_valid_pairs = pd.concat(valid_pairs).drop_duplicates()
|
161
|
+
|
162
|
+
return precomputed_distances.merge(
|
163
|
+
all_valid_pairs,
|
164
|
+
on=[NAPISTU_EDGELIST.SC_ID_ORIGIN, NAPISTU_EDGELIST.SC_ID_DEST],
|
165
|
+
how="inner",
|
166
|
+
)
|
167
|
+
|
168
|
+
|
113
169
|
def _calculate_distances_subset(
|
114
170
|
napistu_graph: NapistuGraph,
|
115
171
|
vs_to_partition: pd.DataFrame,
|
napistu/sbml_dfs_utils.py
CHANGED
@@ -456,8 +456,14 @@ def infer_entity_type(df: pd.DataFrame) -> str:
|
|
456
456
|
if entity_schema.get(SCHEMA_DEFS.PK) == df.index.name:
|
457
457
|
return entity_type
|
458
458
|
|
459
|
-
# Get DataFrame columns that are also primary keys
|
460
|
-
|
459
|
+
# Get DataFrame columns that are also primary keys, including index or MultiIndex names
|
460
|
+
index_names = []
|
461
|
+
if isinstance(df.index, pd.MultiIndex):
|
462
|
+
index_names = [name for name in df.index.names if name is not None]
|
463
|
+
elif df.index.name is not None:
|
464
|
+
index_names = [df.index.name]
|
465
|
+
|
466
|
+
df_columns = set(df.columns).union(index_names).intersection(primary_keys)
|
461
467
|
|
462
468
|
# Check for exact match with primary key + foreign keys
|
463
469
|
for entity_type, entity_schema in schema.items():
|
napistu/source.py
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import numpy as np
|
3
4
|
import pandas as pd
|
5
|
+
from typing import Optional
|
6
|
+
|
4
7
|
from napistu import indices
|
5
|
-
from napistu
|
8
|
+
from napistu import sbml_dfs_core
|
9
|
+
from napistu import sbml_dfs_utils
|
10
|
+
from napistu.statistics import hypothesis_testing
|
11
|
+
from napistu.constants import SBML_DFS_SCHEMA, SCHEMA_DEFS, SOURCE_SPEC
|
6
12
|
|
7
13
|
|
8
14
|
class Source:
|
@@ -41,11 +47,18 @@ class Source:
|
|
41
47
|
Creates an empty source object. This is typically used when creating an SBML_dfs
|
42
48
|
object from a single source.
|
43
49
|
pw_index : indices.PWIndex
|
50
|
+
a pathway index object containing the pathway_id and other metadata
|
44
51
|
|
45
52
|
Returns
|
46
53
|
-------
|
47
54
|
None.
|
48
55
|
|
56
|
+
Raises
|
57
|
+
------
|
58
|
+
ValueError:
|
59
|
+
if pw_index is not a indices.PWIndex
|
60
|
+
ValueError:
|
61
|
+
if SOURCE_SPEC.MODEL is not present in source_df
|
49
62
|
"""
|
50
63
|
|
51
64
|
if init is True:
|
@@ -101,8 +114,27 @@ def create_source_table(
|
|
101
114
|
"""
|
102
115
|
Create Source Table
|
103
116
|
|
104
|
-
Create a table with one row per "new_id" and a Source object created from the
|
105
|
-
|
117
|
+
Create a table with one row per "new_id" and a Source object created from the unionof "old_id" Source objects
|
118
|
+
|
119
|
+
Parameters
|
120
|
+
----------
|
121
|
+
lookup_table: pd.Series
|
122
|
+
a pd.Series containing the index of the table to create a source table for
|
123
|
+
table_schema: dict
|
124
|
+
a dictionary containing the schema of the table to create a source table for
|
125
|
+
pw_index: indices.PWIndex
|
126
|
+
a pathway index object containing the pathway_id and other metadata
|
127
|
+
|
128
|
+
Returns
|
129
|
+
-------
|
130
|
+
source_table: pd.DataFrame
|
131
|
+
a pd.DataFrame containing the index of the table to create a source table for
|
132
|
+
with one row per "new_id" and a Source object created from the union of "old_id" Source objects
|
133
|
+
|
134
|
+
Raises
|
135
|
+
------
|
136
|
+
ValueError:
|
137
|
+
if SOURCE_SPEC.SOURCE is not present in table_schema
|
106
138
|
"""
|
107
139
|
|
108
140
|
if SOURCE_SPEC.SOURCE not in table_schema.keys():
|
@@ -142,8 +174,27 @@ def merge_sources(source_list: list | pd.Series) -> Source:
|
|
142
174
|
|
143
175
|
Merge a list of Source objects into a single Source object
|
144
176
|
|
177
|
+
Parameters
|
178
|
+
----------
|
179
|
+
source_list: list | pd.Series
|
180
|
+
a list of Source objects or a pd.Series of Source objects
|
181
|
+
|
182
|
+
Returns
|
183
|
+
-------
|
184
|
+
source: Source
|
185
|
+
a Source object created from the union of the Source objects in source_list
|
186
|
+
|
187
|
+
Raises
|
188
|
+
------
|
189
|
+
TypeError:
|
190
|
+
if source_list is not a list or pd.Series
|
145
191
|
"""
|
146
192
|
|
193
|
+
if not isinstance(source_list, (list, pd.Series)):
|
194
|
+
raise TypeError(
|
195
|
+
f"source_list must be a list or pd.Series, but was a {type(source_list).__name__}"
|
196
|
+
)
|
197
|
+
|
147
198
|
# filter to non-empty sources
|
148
199
|
# empty sources have only been initialized; a merge hasn't occured
|
149
200
|
existing_sources = [s.source is not None for s in source_list]
|
@@ -160,28 +211,35 @@ def merge_sources(source_list: list | pd.Series) -> Source:
|
|
160
211
|
return Source(pd.concat(existing_source_list))
|
161
212
|
|
162
213
|
|
163
|
-
def unnest_sources(
|
164
|
-
source_table: pd.DataFrame, source_var: str, verbose: bool = False
|
165
|
-
) -> pd.DataFrame:
|
214
|
+
def unnest_sources(source_table: pd.DataFrame, verbose: bool = False) -> pd.DataFrame:
|
166
215
|
"""
|
167
216
|
Unnest Sources
|
168
217
|
|
169
218
|
Take a pd.DataFrame containing an array of Sources and
|
170
219
|
return one-row per source.
|
171
220
|
|
172
|
-
Parameters
|
221
|
+
Parameters
|
222
|
+
----------
|
173
223
|
source_table: pd.DataFrame
|
174
224
|
a table containing an array of Sources
|
175
|
-
|
176
|
-
|
225
|
+
verbose: bool
|
226
|
+
print progress
|
177
227
|
|
178
|
-
Returns
|
228
|
+
Returns
|
229
|
+
-------
|
179
230
|
pd.Dataframe containing the index of source_table but expanded
|
180
231
|
to include one row per source
|
181
232
|
|
182
233
|
"""
|
183
234
|
|
184
235
|
sources = list()
|
236
|
+
|
237
|
+
table_type = sbml_dfs_utils.infer_entity_type(source_table)
|
238
|
+
source_table_schema = SBML_DFS_SCHEMA.SCHEMA[table_type]
|
239
|
+
if SCHEMA_DEFS.SOURCE not in source_table_schema.keys():
|
240
|
+
raise ValueError(f"{table_type} does not have a source attribute")
|
241
|
+
|
242
|
+
source_var = source_table_schema[SCHEMA_DEFS.SOURCE]
|
185
243
|
source_table_index = source_table.index.to_frame().reset_index(drop=True)
|
186
244
|
|
187
245
|
for i in range(source_table.shape[0]):
|
@@ -216,53 +274,73 @@ def unnest_sources(
|
|
216
274
|
return pd.concat(sources)
|
217
275
|
|
218
276
|
|
219
|
-
def
|
220
|
-
|
277
|
+
def source_set_coverage(
|
278
|
+
select_sources_df: pd.DataFrame,
|
279
|
+
source_total_counts: Optional[pd.Series] = None,
|
280
|
+
sbml_dfs: Optional[sbml_dfs_core.SBML_dfs] = None,
|
221
281
|
) -> pd.DataFrame:
|
222
282
|
"""
|
223
283
|
Greedy Set Coverage of Sources
|
224
284
|
|
225
|
-
|
226
|
-
|
285
|
+
Find the set of pathways covering `select_sources_df`. If `all_sources_df`
|
286
|
+
is provided pathways will be selected iteratively based on statistical
|
287
|
+
enrichment. If `all_sources_df` is not provided, the largest pathways
|
288
|
+
will be chosen iteratively.
|
227
289
|
|
228
|
-
Parameters
|
229
|
-
|
290
|
+
Parameters
|
291
|
+
----------
|
292
|
+
select_sources_df: pd.DataFrame
|
230
293
|
pd.Dataframe containing the index of source_table but expanded to
|
231
294
|
include one row per source. As produced by source.unnest_sources()
|
232
|
-
|
233
|
-
|
295
|
+
source_total_counts: pd.Series
|
296
|
+
pd.Series containing the total counts of each source. As produced by
|
297
|
+
source.get_source_total_counts()
|
298
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs
|
299
|
+
if `source_total_counts` is provided then `sbml_dfs` must be provided
|
300
|
+
to calculate the total number of entities in the table.
|
301
|
+
|
302
|
+
Returns
|
303
|
+
-------
|
234
304
|
minimial_sources: [str]
|
235
305
|
A list of pathway_ids of the minimal source set
|
236
306
|
|
237
307
|
"""
|
238
308
|
|
309
|
+
table_type = sbml_dfs_utils.infer_entity_type(select_sources_df)
|
310
|
+
pk = SBML_DFS_SCHEMA.SCHEMA[table_type][SCHEMA_DEFS.PK]
|
311
|
+
|
312
|
+
if source_total_counts is not None:
|
313
|
+
if sbml_dfs is None:
|
314
|
+
raise ValueError(
|
315
|
+
"If `source_total_counts` is provided, `sbml_dfs` must be provided to calculate the total number of entities in the table."
|
316
|
+
)
|
317
|
+
n_total_entities = sbml_dfs.get_table(table_type).shape[0]
|
318
|
+
|
239
319
|
# rollup pathways with identical membership
|
240
|
-
deduplicated_sources = _deduplicate_source_df(
|
320
|
+
deduplicated_sources = _deduplicate_source_df(select_sources_df)
|
241
321
|
|
242
322
|
unaccounted_for_members = deduplicated_sources
|
243
323
|
retained_pathway_ids = []
|
244
|
-
|
245
324
|
while unaccounted_for_members.shape[0] != 0:
|
246
325
|
# find the pathway with the most members
|
247
|
-
|
248
|
-
|
326
|
+
|
327
|
+
if source_total_counts is None:
|
328
|
+
top_pathway = _select_top_pathway_by_size(unaccounted_for_members)
|
329
|
+
else:
|
330
|
+
top_pathway = _select_top_pathway_by_enrichment(
|
331
|
+
unaccounted_for_members, source_total_counts, n_total_entities, pk
|
332
|
+
)
|
333
|
+
|
334
|
+
if top_pathway is None:
|
335
|
+
break
|
336
|
+
|
249
337
|
retained_pathway_ids.append(top_pathway)
|
250
338
|
|
251
339
|
# remove all members associated with the top pathway
|
252
|
-
|
253
|
-
unaccounted_for_members
|
254
|
-
unaccounted_for_members[SOURCE_SPEC.PATHWAY_ID] == top_pathway
|
255
|
-
]
|
256
|
-
.index.get_level_values(table_schema["pk"])
|
257
|
-
.tolist()
|
340
|
+
unaccounted_for_members = _update_unaccounted_for_members(
|
341
|
+
top_pathway, unaccounted_for_members
|
258
342
|
)
|
259
343
|
|
260
|
-
unaccounted_for_members = unaccounted_for_members[
|
261
|
-
~unaccounted_for_members.index.get_level_values(table_schema["pk"]).isin(
|
262
|
-
members_captured
|
263
|
-
)
|
264
|
-
]
|
265
|
-
|
266
344
|
minimial_sources = deduplicated_sources[
|
267
345
|
deduplicated_sources[SOURCE_SPEC.PATHWAY_ID].isin(retained_pathway_ids)
|
268
346
|
].sort_index()
|
@@ -270,9 +348,39 @@ def greedy_set_coverge_of_sources(
|
|
270
348
|
return minimial_sources
|
271
349
|
|
272
350
|
|
273
|
-
def
|
351
|
+
def get_source_total_counts(
|
352
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs, entity_type: str
|
353
|
+
) -> pd.Series:
|
354
|
+
"""
|
355
|
+
Get the total counts of each source.
|
356
|
+
|
357
|
+
Parameters
|
358
|
+
----------
|
359
|
+
sbml_dfs: sbml_dfs_core.SBML_dfs
|
360
|
+
sbml_dfs object containing the table to get the total counts of
|
361
|
+
entity_type: str
|
362
|
+
the type of entity to get the total counts of
|
363
|
+
|
364
|
+
Returns
|
365
|
+
-------
|
366
|
+
source_total_counts: pd.Series
|
367
|
+
pd.Series containing the total counts of each source.
|
368
|
+
"""
|
369
|
+
|
370
|
+
all_sources_table = unnest_sources(sbml_dfs.get_table(entity_type))
|
371
|
+
source_total_counts = all_sources_table.value_counts(SOURCE_SPEC.PATHWAY_ID).rename(
|
372
|
+
"total_counts"
|
373
|
+
)
|
374
|
+
|
375
|
+
return source_total_counts
|
376
|
+
|
377
|
+
|
378
|
+
def _deduplicate_source_df(source_df: pd.DataFrame) -> pd.DataFrame:
|
274
379
|
"""Combine entries in a source table when multiple models have the same members."""
|
275
380
|
|
381
|
+
table_type = sbml_dfs_utils.infer_entity_type(source_df)
|
382
|
+
source_table_schema = SBML_DFS_SCHEMA.SCHEMA[table_type]
|
383
|
+
|
276
384
|
# drop entries which are missing required attributes and throw an error if none are left
|
277
385
|
REQUIRED_NON_NA_ATTRIBUTES = [SOURCE_SPEC.PATHWAY_ID]
|
278
386
|
indexed_sources = (
|
@@ -296,7 +404,11 @@ def _deduplicate_source_df(source_df: pd.DataFrame, table_schema: dict) -> pd.Da
|
|
296
404
|
{
|
297
405
|
SOURCE_SPEC.PATHWAY_ID: p,
|
298
406
|
"membership_string": "_".join(
|
299
|
-
set(
|
407
|
+
set(
|
408
|
+
indexed_sources.loc[[p]][
|
409
|
+
source_table_schema[SCHEMA_DEFS.PK]
|
410
|
+
].tolist()
|
411
|
+
)
|
300
412
|
),
|
301
413
|
}
|
302
414
|
for p in pathways
|
@@ -320,16 +432,16 @@ def _deduplicate_source_df(source_df: pd.DataFrame, table_schema: dict) -> pd.Da
|
|
320
432
|
|
321
433
|
merged_sources = pd.concat(
|
322
434
|
[
|
323
|
-
_collapse_by_membership_string(s, membership_categories,
|
435
|
+
_collapse_by_membership_string(s, membership_categories, source_table_schema) # type: ignore
|
324
436
|
for s in category_index.tolist()
|
325
437
|
]
|
326
438
|
)
|
327
439
|
merged_sources[SOURCE_SPEC.INDEX_NAME] = merged_sources.groupby(
|
328
|
-
|
440
|
+
source_table_schema[SCHEMA_DEFS.PK]
|
329
441
|
).cumcount()
|
330
442
|
|
331
443
|
return merged_sources.set_index(
|
332
|
-
[
|
444
|
+
[source_table_schema[SCHEMA_DEFS.PK], SOURCE_SPEC.INDEX_NAME]
|
333
445
|
).sort_index()
|
334
446
|
|
335
447
|
|
@@ -345,7 +457,10 @@ def _collapse_by_membership_string(
|
|
345
457
|
return pd.DataFrame(
|
346
458
|
[
|
347
459
|
pd.concat(
|
348
|
-
[
|
460
|
+
[
|
461
|
+
pd.Series({table_schema[SCHEMA_DEFS.PK]: ms}),
|
462
|
+
collapsed_source_membership,
|
463
|
+
]
|
349
464
|
)
|
350
465
|
for ms in membership_string.split("_")
|
351
466
|
]
|
@@ -398,3 +513,91 @@ def _safe_source_merge(member_Sources: Source | list) -> Source:
|
|
398
513
|
return merge_sources(member_Sources.tolist())
|
399
514
|
else:
|
400
515
|
raise TypeError("Expecting source.Source or pd.Series")
|
516
|
+
|
517
|
+
|
518
|
+
def _select_top_pathway_by_size(unaccounted_for_members: pd.DataFrame) -> str:
|
519
|
+
|
520
|
+
pathway_members = unaccounted_for_members.value_counts(SOURCE_SPEC.PATHWAY_ID)
|
521
|
+
top_pathway = pathway_members[pathway_members == max(pathway_members)].index[0]
|
522
|
+
|
523
|
+
return top_pathway
|
524
|
+
|
525
|
+
|
526
|
+
def _select_top_pathway_by_enrichment(
|
527
|
+
unaccounted_for_members: pd.DataFrame,
|
528
|
+
source_total_counts: pd.Series,
|
529
|
+
n_total_entities: int,
|
530
|
+
table_pk: str,
|
531
|
+
min_pw_size: int = 5,
|
532
|
+
) -> str:
|
533
|
+
|
534
|
+
n_observed_entities = len(
|
535
|
+
unaccounted_for_members.index.get_level_values(table_pk).unique()
|
536
|
+
)
|
537
|
+
pathway_members = unaccounted_for_members.value_counts(
|
538
|
+
SOURCE_SPEC.PATHWAY_ID
|
539
|
+
).rename("observed_members")
|
540
|
+
|
541
|
+
pathway_members = pathway_members.loc[pathway_members >= min_pw_size]
|
542
|
+
if pathway_members.shape[0] == 0:
|
543
|
+
return None
|
544
|
+
|
545
|
+
wide_contingency_table = (
|
546
|
+
pathway_members.to_frame()
|
547
|
+
.join(source_total_counts)
|
548
|
+
.assign(
|
549
|
+
missing_members=lambda x: x["total_counts"] - x["observed_members"],
|
550
|
+
observed_nonmembers=lambda x: n_observed_entities - x["observed_members"],
|
551
|
+
nonobserved_nonmembers=lambda x: n_total_entities
|
552
|
+
- x["observed_nonmembers"]
|
553
|
+
- x["missing_members"]
|
554
|
+
- x["observed_members"],
|
555
|
+
)
|
556
|
+
.drop(columns=["total_counts"])
|
557
|
+
)
|
558
|
+
|
559
|
+
# calculate enrichments using a fast vectorized normal approximation
|
560
|
+
odds_ratios, _ = hypothesis_testing.fisher_exact_vectorized(
|
561
|
+
wide_contingency_table["observed_members"],
|
562
|
+
wide_contingency_table["missing_members"],
|
563
|
+
wide_contingency_table["observed_nonmembers"],
|
564
|
+
wide_contingency_table["nonobserved_nonmembers"],
|
565
|
+
)
|
566
|
+
|
567
|
+
return pathway_members.index[np.argmax(odds_ratios)]
|
568
|
+
|
569
|
+
|
570
|
+
def _update_unaccounted_for_members(
|
571
|
+
top_pathway, unaccounted_for_members
|
572
|
+
) -> pd.DataFrame:
|
573
|
+
"""
|
574
|
+
Update the unaccounted for members dataframe by removing the members
|
575
|
+
associated with the top pathway.
|
576
|
+
|
577
|
+
Parameters
|
578
|
+
----------
|
579
|
+
top_pathway: str
|
580
|
+
the pathway to remove from the unaccounted for members
|
581
|
+
unaccounted_for_members: pd.DataFrame
|
582
|
+
the dataframe of unaccounted for members
|
583
|
+
|
584
|
+
Returns
|
585
|
+
-------
|
586
|
+
unaccounted_for_members: pd.DataFrame
|
587
|
+
the dataframe of unaccounted for members with the top pathway removed
|
588
|
+
"""
|
589
|
+
|
590
|
+
table_type = sbml_dfs_utils.infer_entity_type(unaccounted_for_members)
|
591
|
+
pk = SBML_DFS_SCHEMA.SCHEMA[table_type][SCHEMA_DEFS.PK]
|
592
|
+
|
593
|
+
members_captured = (
|
594
|
+
unaccounted_for_members[
|
595
|
+
unaccounted_for_members[SOURCE_SPEC.PATHWAY_ID] == top_pathway
|
596
|
+
]
|
597
|
+
.index.get_level_values(pk)
|
598
|
+
.tolist()
|
599
|
+
)
|
600
|
+
|
601
|
+
return unaccounted_for_members[
|
602
|
+
~unaccounted_for_members.index.get_level_values(pk).isin(members_captured)
|
603
|
+
]
|
@@ -0,0 +1,66 @@
|
|
1
|
+
from typing import Union
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from scipy.stats import norm
|
5
|
+
|
6
|
+
|
7
|
+
def fisher_exact_vectorized(
|
8
|
+
observed_members: Union[list[int], np.ndarray],
|
9
|
+
missing_members: Union[list[int], np.ndarray],
|
10
|
+
observed_nonmembers: Union[list[int], np.ndarray],
|
11
|
+
nonobserved_nonmembers: Union[list[int], np.ndarray],
|
12
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
13
|
+
"""
|
14
|
+
Fast vectorized one-tailed Fisher exact test using normal approximation.
|
15
|
+
|
16
|
+
Parameters:
|
17
|
+
-----------
|
18
|
+
observed_members, missing_members, observed_nonmembers, nonobserved_nonmembers : array-like
|
19
|
+
The four cells of the 2x2 contingency tables (must be non-negative)
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
--------
|
23
|
+
odds_ratios : numpy array
|
24
|
+
Odds ratios for each test
|
25
|
+
p_values : numpy array
|
26
|
+
One-tailed p-values (tests for enrichment)
|
27
|
+
"""
|
28
|
+
# Convert to numpy arrays
|
29
|
+
a = np.array(observed_members, dtype=float)
|
30
|
+
b = np.array(missing_members, dtype=float)
|
31
|
+
c = np.array(observed_nonmembers, dtype=float)
|
32
|
+
d = np.array(nonobserved_nonmembers, dtype=float)
|
33
|
+
|
34
|
+
# Check for negative values and raise error
|
35
|
+
if np.any((a < 0) | (b < 0) | (c < 0) | (d < 0)):
|
36
|
+
raise ValueError("All contingency table values must be non-negative")
|
37
|
+
|
38
|
+
# Calculate odds ratios
|
39
|
+
odds_ratios = np.divide(
|
40
|
+
a * d, b * c, out=np.full_like(a, np.inf, dtype=float), where=(b * c) != 0
|
41
|
+
)
|
42
|
+
|
43
|
+
# Normal approximation to hypergeometric distribution
|
44
|
+
n = a + b + c + d
|
45
|
+
|
46
|
+
# Avoid division by zero in expected value calculation
|
47
|
+
expected_a = np.divide(
|
48
|
+
(a + b) * (a + c), n, out=np.zeros_like(n, dtype=float), where=n != 0
|
49
|
+
)
|
50
|
+
|
51
|
+
# Variance calculation with protection against division by zero
|
52
|
+
var_a = np.divide(
|
53
|
+
(a + b) * (c + d) * (a + c) * (b + d),
|
54
|
+
n * n * (n - 1),
|
55
|
+
out=np.ones_like(n, dtype=float), # Default to 1 to avoid sqrt(0)
|
56
|
+
where=(n > 1),
|
57
|
+
)
|
58
|
+
var_a = np.maximum(var_a, 1e-10) # Ensure positive variance
|
59
|
+
|
60
|
+
# Continuity correction and z-score
|
61
|
+
z = (a - expected_a - 0.5) / np.sqrt(var_a)
|
62
|
+
|
63
|
+
# One-tailed p-value (upper tail for enrichment)
|
64
|
+
p_values = norm.sf(z) # 1 - norm.cdf(z)
|
65
|
+
|
66
|
+
return odds_ratios, p_values
|
napistu/utils.py
CHANGED
@@ -14,7 +14,7 @@ import zipfile
|
|
14
14
|
from contextlib import closing
|
15
15
|
from itertools import starmap
|
16
16
|
from textwrap import fill
|
17
|
-
from typing import Any,
|
17
|
+
from typing import Any, Dict, Optional, List, Union
|
18
18
|
from urllib.parse import urlparse
|
19
19
|
from pathlib import Path
|
20
20
|
from requests.adapters import HTTPAdapter
|
@@ -1131,6 +1131,28 @@ def safe_fill(x: str, fill_width: int = 15) -> str:
|
|
1131
1131
|
return fill(x, fill_width)
|
1132
1132
|
|
1133
1133
|
|
1134
|
+
def match_regex_dict(s: str, regex_dict: Dict[str, any]) -> Optional[any]:
|
1135
|
+
"""
|
1136
|
+
Apply each regex in regex_dict to the string s. If a regex matches, return its value.
|
1137
|
+
If no regex matches, return None.
|
1138
|
+
|
1139
|
+
Parameters
|
1140
|
+
----------
|
1141
|
+
s : str
|
1142
|
+
The string to test.
|
1143
|
+
regex_dict : dict
|
1144
|
+
Dictionary where keys are regex patterns (str), and values are the values to return.
|
1145
|
+
|
1146
|
+
Returns
|
1147
|
+
-------
|
1148
|
+
The value associated with the first matching regex, or None if no match.
|
1149
|
+
"""
|
1150
|
+
for pattern, value in regex_dict.items():
|
1151
|
+
if re.search(pattern, s):
|
1152
|
+
return value
|
1153
|
+
return None
|
1154
|
+
|
1155
|
+
|
1134
1156
|
def _add_nameness_score_wrapper(df, name_var, table_schema):
|
1135
1157
|
"""Call _add_nameness_score with default value."""
|
1136
1158
|
|
@@ -1,13 +1,13 @@
|
|
1
1
|
napistu/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
2
2
|
napistu/__main__.py,sha256=xwlbh_0Ig3a-yG6BIJRiDPSN9R2HnX2pEBvlodlO6h4,29015
|
3
|
-
napistu/consensus.py,sha256=
|
3
|
+
napistu/consensus.py,sha256=SDw58vkDivzy5AiOQUnf5vUbFxmSrMGMMmptDMZhk0E,69807
|
4
4
|
napistu/constants.py,sha256=8sp1l0cxu2rsnCrWBEEwhcBKvDtc4u0D0f_72zILLW0,13427
|
5
5
|
napistu/identifiers.py,sha256=e2-nTVzr5AINa0y1ER9218bKXyF2kAeJ9At22S4Z00o,33914
|
6
6
|
napistu/indices.py,sha256=Zjg3gE0JQ3T879lCPazYg-WXVE6hvcAr713ZKpJ32rk,9830
|
7
7
|
napistu/sbml_dfs_core.py,sha256=s0OyoHs-AjOcbZu1d3KNkW_PI7Rxbhu5ZLpfQeO4iY8,72639
|
8
|
-
napistu/sbml_dfs_utils.py,sha256=
|
9
|
-
napistu/source.py,sha256=
|
10
|
-
napistu/utils.py,sha256=
|
8
|
+
napistu/sbml_dfs_utils.py,sha256=SOy1Ii2hDFOfQa7pFAJS9EfAmfBVD_sHvDJBVmCN_p8,46456
|
9
|
+
napistu/source.py,sha256=iDDKpN-4k_W_tyxEjqe_z-yPJv7uoFRRBhkiBtOH5C8,20416
|
10
|
+
napistu/utils.py,sha256=p2sJxTklmV30XS6hanJRjcdfgeaZpkULuMyQX3BPP0c,36404
|
11
11
|
napistu/context/__init__.py,sha256=LQBEqipcHKK0E5UlDEg1ct-ymCs93IlUrUaH8BCevf0,242
|
12
12
|
napistu/context/discretize.py,sha256=Qq7zg46F_I-PvQIT2_pEDQV7YEtUQCxKoRvT5Gu9QsE,15052
|
13
13
|
napistu/context/filtering.py,sha256=l1oq-43ysSGqU9VmhTOO_pYT4DSMf20yxvktPC1MI0I,13696
|
@@ -17,13 +17,14 @@ napistu/gcs/downloads.py,sha256=SvGv9WYr_Vt3guzyz1QiAuBndeKPTBtWSFLj1-QbLf4,6348
|
|
17
17
|
napistu/gcs/utils.py,sha256=eLSsvewWJdCguyj2k0ozUGP5BTemaE1PZg41Z3aY5kM,571
|
18
18
|
napistu/ingestion/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
19
19
|
napistu/ingestion/bigg.py,sha256=f65--8ARe248eYCUJpFMF284Wz53sLyFyBuwelxHmJA,4340
|
20
|
-
napistu/ingestion/constants.py,sha256=
|
20
|
+
napistu/ingestion/constants.py,sha256=jo3v8Z7Y_tNNhTmEcokVOh1HBJFAXc-Z38S4mG58qfo,10059
|
21
21
|
napistu/ingestion/gtex.py,sha256=X0hSC1yrpf4xSJWFhpeNcnHwJzKDII2MvjfUqYA0JN8,3720
|
22
22
|
napistu/ingestion/hpa.py,sha256=R27ExrryKQ4Crxv9ATXmBJCa-yd01TMOrDjkeBhIQac,5054
|
23
23
|
napistu/ingestion/identifiers_etl.py,sha256=6ppDUA6lEZurdmVbiFLOUzphYbr-hndMhtqsQnq_yAc,5009
|
24
24
|
napistu/ingestion/napistu_edgelist.py,sha256=4RLXsoIk_-Atu-Nqme_t1JpEpBET26VIY2Y_Hcd3sMw,3580
|
25
25
|
napistu/ingestion/obo.py,sha256=AQkIPWbjA464Lma0tx91JucWkIwLjC7Jgv5VHGRTDkE,9601
|
26
26
|
napistu/ingestion/psi_mi.py,sha256=5eJjm7XWogL9oTyGqR52kntHClLwLsTePKqCvUGyi-w,10111
|
27
|
+
napistu/ingestion/reactom_fi.py,sha256=hKdOY2wNtcNk6WlnHnNalryiXv6mtcWUiBW9isXPB0Y,6991
|
27
28
|
napistu/ingestion/reactome.py,sha256=Hn9X-vDp4o_HK-OtaQvel3vJeZ8_TC1-4N2rruK9Oks,7099
|
28
29
|
napistu/ingestion/sbml.py,sha256=l8Z98yWuOIRGns8G4UNnoQz7v_xmukZb_IZ_5ye34Ko,25296
|
29
30
|
napistu/ingestion/string.py,sha256=go1WGTkoLJejX7GQWf9bFeInFGAw4jNSpS2B_Zr5f_s,11364
|
@@ -66,9 +67,9 @@ napistu/network/net_create.py,sha256=66kV_xoWnu4BVLaJZ1TAC7wBSsjPDqjoAXH-X9ShV3s
|
|
66
67
|
napistu/network/net_create_utils.py,sha256=zajwaz2xAij_9fEnD77SgBw_EnNAnJ8jBCmmK2rk_bA,24672
|
67
68
|
napistu/network/net_propagation.py,sha256=Il5nDOWh3nLz8gRhDFHGp2LxcvJ9C1twiSZjDeiZMUo,23490
|
68
69
|
napistu/network/ng_core.py,sha256=dGnTUKR4WtnvaYMyIHqqF55FY4mJSa7wjA2LZ4cVB6U,11720
|
69
|
-
napistu/network/ng_utils.py,sha256=
|
70
|
+
napistu/network/ng_utils.py,sha256=ahSm-8M2pV662V7MMVcGaoguBM55_y-F7LDmZSVp9ag,15951
|
70
71
|
napistu/network/paths.py,sha256=r6LVKVvX7i3ctBA5r-xvHfpH5Zsd0VDHUCtin2iag20,17453
|
71
|
-
napistu/network/precompute.py,sha256=
|
72
|
+
napistu/network/precompute.py,sha256=ARU2tktWnxFISaHAY8chpkg8pusZPv7TT5jSIB9eFF0,10081
|
72
73
|
napistu/ontologies/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
73
74
|
napistu/ontologies/constants.py,sha256=GyOFvezSxDK1VigATcruTKtNhjcYaid1ggulEf_HEtQ,4345
|
74
75
|
napistu/ontologies/dogma.py,sha256=VVj6NKBgNym4SdOSu8g22OohALj7cbObhIJmdY2Sfy0,8860
|
@@ -84,8 +85,9 @@ napistu/scverse/__init__.py,sha256=Lgxr3iMQAkTzXE9BNz93CndNP5djzerLvmHM-D0PU3I,3
|
|
84
85
|
napistu/scverse/constants.py,sha256=0iAkhyJUIeFGHdLLU3fCaEU1O3Oix4qAsxr3CxGTjVs,653
|
85
86
|
napistu/scverse/loading.py,sha256=jqiE71XB-wdV50GyZrauFNY0Lai4bX9Fm2Gv80VR8t8,27016
|
86
87
|
napistu/statistics/__init__.py,sha256=dFXAhIqlTLJMwowS4BUDT08-Vy3Q0u1L0CMCErSZT1Y,239
|
88
|
+
napistu/statistics/hypothesis_testing.py,sha256=k0mBFAMF0XHVcKwS26aPnEbq_FIUVwXU1gZ6cKfFbCk,2190
|
87
89
|
napistu/statistics/quantiles.py,sha256=1-LnmVzC2CQWxCKUh0yi6YfKrbsZM1-kkD7nu2-aS5s,3042
|
88
|
-
napistu-0.4.
|
90
|
+
napistu-0.4.3.dist-info/licenses/LICENSE,sha256=kW8wVT__JWoHjl2BbbJDAZInWa9AxzJeR_uv6-i5x1g,1063
|
89
91
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
90
92
|
tests/conftest.py,sha256=t-GHb0MvSsC-MyhkFpOy2K3t5fi7eaig_Rc2xEQC-t8,9678
|
91
93
|
tests/test_consensus.py,sha256=Hzfrgp4SpkRDnEMVMD3f0UInSycndB8kKzC4wDDvRas,15076
|
@@ -114,7 +116,7 @@ tests/test_network_net_propagation.py,sha256=kZeDHD93iMrLVvxO4OyfRH5_vgsYeQyC40O
|
|
114
116
|
tests/test_network_ng_core.py,sha256=w-iNBTtenennJhaLFauk952pEsk7W0-Fa8lPvIRqHyY,628
|
115
117
|
tests/test_network_ng_utils.py,sha256=QVVuRnvCRfTSIlGdwQTIF9lr0wOwoc5gGeXAUY_AdgE,713
|
116
118
|
tests/test_network_paths.py,sha256=TWZnxY5bF3m6gahcxcYJGrBIawh2-_vUcec1LyPmXV8,1686
|
117
|
-
tests/test_network_precompute.py,sha256=
|
119
|
+
tests/test_network_precompute.py,sha256=IPr1KhtxBD0fXx_2TvZqnevrD-Iig35otb8yloRFpRc,10014
|
118
120
|
tests/test_ontologies_genodexito.py,sha256=6fINyUiubHZqu7qxye09DQfJXw28ZMAJc3clPb-cCoY,2298
|
119
121
|
tests/test_ontologies_id_tables.py,sha256=CpwpbmQvTc1BaVd6jbDKHAVE2etwN0vx93nC8jpnMlE,7265
|
120
122
|
tests/test_ontologies_mygene.py,sha256=VkdRcKIWmcG6V-2dpfvsBiOJN5dO-j0RqZNxtJRcyBU,1583
|
@@ -124,18 +126,18 @@ tests/test_rpy2_callr.py,sha256=V4a-QH5krgYOQRgqzksMzIkGAFjBqKOAqgprxrH6bE0,2904
|
|
124
126
|
tests/test_rpy2_init.py,sha256=T3gnxC1O7XNvYM2P4018ikpPPAy-kwQLm7Erj0RfA-4,5895
|
125
127
|
tests/test_sbml.py,sha256=f25zj1NogYrmLluvBDboLameTuCiQ309433Qn3iPvhg,1483
|
126
128
|
tests/test_sbml_dfs_core.py,sha256=nnLPpZTVtCznOBohk7CX67x6sMqktJWt-sZMWQKoaDs,26521
|
127
|
-
tests/test_sbml_dfs_utils.py,sha256=
|
129
|
+
tests/test_sbml_dfs_utils.py,sha256=ZD9x2B81fsfYEjAV9wphHOR7ywjNcfvfw1LGNv4PxUA,11471
|
128
130
|
tests/test_sbo.py,sha256=x_PENFaXYsrZIzOZu9cj_Wrej7i7SNGxgBYYvcigLs0,308
|
129
131
|
tests/test_scverse_loading.py,sha256=bnU1lQSYYWhOAs0IIBoi4ZohqPokDQJ0n_rtkAfEyMU,29948
|
130
|
-
tests/
|
131
|
-
tests/
|
132
|
+
tests/test_source.py,sha256=iV-Yyu8flhIGWF17SCL8msG2bjqwb9w2IZ694b0iZ-o,2985
|
133
|
+
tests/test_statistics_hypothesis_testing.py,sha256=qD-oS9zo5JlH-jdtiOrWAKI4nKFuZvvh6361_pFSpIs,2259
|
132
134
|
tests/test_statistics_quantiles.py,sha256=yNDeqwgbP-1Rx3C_dLX_wnwT_Lr-iJWClmeKmElqmTE,4984
|
133
135
|
tests/test_uncompartmentalize.py,sha256=nAk5kfAVLU9a2VWe2x2HYVcKqj-EnwmwddERIPRax8c,1289
|
134
136
|
tests/test_utils.py,sha256=qPSpV-Q9b6vmdycgaDmQqtcvzKnAVnN9j5xJ9x-T6bg,23959
|
135
137
|
tests/utils.py,sha256=SoWQ_5roJteFGcMaOeEiQ5ucwq3Z2Fa3AAs9iXHTsJY,749
|
136
138
|
tests/test_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
137
|
-
napistu-0.4.
|
138
|
-
napistu-0.4.
|
139
|
-
napistu-0.4.
|
140
|
-
napistu-0.4.
|
141
|
-
napistu-0.4.
|
139
|
+
napistu-0.4.3.dist-info/METADATA,sha256=gV0a41vyQ52Ja15QyLSPGfeIJPj6oQRTC00HsxJjG88,4078
|
140
|
+
napistu-0.4.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
141
|
+
napistu-0.4.3.dist-info/entry_points.txt,sha256=_QnaPOvJNA3IltxmZgWIiBoen-L1bPYX18YQfC7oJgQ,41
|
142
|
+
napistu-0.4.3.dist-info/top_level.txt,sha256=Gpvk0a_PjrtqhYcQ9IDr3zR5LqpZ-uIHidQMIpjlvhY,14
|
143
|
+
napistu-0.4.3.dist-info/RECORD,,
|
tests/test_network_precompute.py
CHANGED
@@ -276,3 +276,33 @@ def test_precomputed_distances_serialization():
|
|
276
276
|
# Clean up the temporary file
|
277
277
|
if os.path.exists(temp_path):
|
278
278
|
os.remove(temp_path)
|
279
|
+
|
280
|
+
|
281
|
+
def test_filter_precomputed_distances_top_n_subset():
|
282
|
+
# Use a small top_n for a quick test
|
283
|
+
top_n = 5
|
284
|
+
filtered = precompute.filter_precomputed_distances_top_n(
|
285
|
+
precomputed_distances, top_n=top_n
|
286
|
+
)
|
287
|
+
# Check that the filtered DataFrame is a subset of the original
|
288
|
+
merged = filtered.merge(
|
289
|
+
precomputed_distances,
|
290
|
+
on=[
|
291
|
+
precompute.NAPISTU_EDGELIST.SC_ID_ORIGIN,
|
292
|
+
precompute.NAPISTU_EDGELIST.SC_ID_DEST,
|
293
|
+
],
|
294
|
+
how="left",
|
295
|
+
indicator=True,
|
296
|
+
)
|
297
|
+
assert (
|
298
|
+
merged["_merge"] == "both"
|
299
|
+
).all(), "Filtered rows must be present in the original DataFrame"
|
300
|
+
# Check that columns are preserved
|
301
|
+
assert set(
|
302
|
+
[
|
303
|
+
precompute.NAPISTU_EDGELIST.SC_ID_ORIGIN,
|
304
|
+
precompute.NAPISTU_EDGELIST.SC_ID_DEST,
|
305
|
+
]
|
306
|
+
).issubset(filtered.columns)
|
307
|
+
# Optionally, check that the number of rows is less than or equal to the input
|
308
|
+
assert filtered.shape[0] <= precomputed_distances.shape[0]
|
tests/test_sbml_dfs_utils.py
CHANGED
@@ -334,3 +334,16 @@ def test_infer_entity_type_errors():
|
|
334
334
|
) # Two primary keys
|
335
335
|
with pytest.raises(ValueError):
|
336
336
|
sbml_dfs_utils.infer_entity_type(df)
|
337
|
+
|
338
|
+
|
339
|
+
def test_infer_entity_type_multindex_reactions():
|
340
|
+
# DataFrame with MultiIndex (r_id, foo), should infer as reactions
|
341
|
+
import pandas as pd
|
342
|
+
from napistu.constants import SBML_DFS
|
343
|
+
|
344
|
+
df = pd.DataFrame({"some_col": [1, 2]})
|
345
|
+
df.index = pd.MultiIndex.from_tuples(
|
346
|
+
[("rxn1", "a"), ("rxn2", "b")], names=[SBML_DFS.R_ID, "foo"]
|
347
|
+
)
|
348
|
+
result = sbml_dfs_utils.infer_entity_type(df)
|
349
|
+
assert result == SBML_DFS.REACTIONS
|
tests/test_source.py
CHANGED
@@ -5,6 +5,8 @@ import os
|
|
5
5
|
import pandas as pd
|
6
6
|
from napistu import indices
|
7
7
|
from napistu import source
|
8
|
+
from napistu.network import ng_utils
|
9
|
+
from napistu.constants import SBML_DFS
|
8
10
|
|
9
11
|
test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
10
12
|
test_data = os.path.join(test_path, "test_data")
|
@@ -58,10 +60,40 @@ def test_source_w_pwindex():
|
|
58
60
|
assert source_obj.source.shape == (2, 8)
|
59
61
|
|
60
62
|
|
61
|
-
|
62
|
-
|
63
|
-
|
63
|
+
def test_get_minimal_source_edges(sbml_dfs_metabolism):
|
64
|
+
vertices = sbml_dfs_metabolism.reactions.reset_index().rename(
|
65
|
+
columns={SBML_DFS.R_ID: "node"}
|
66
|
+
)
|
67
|
+
|
68
|
+
minimal_source_edges = ng_utils.get_minimal_sources_edges(
|
69
|
+
vertices, sbml_dfs_metabolism
|
70
|
+
)
|
71
|
+
# print(minimal_source_edges.shape)
|
72
|
+
assert minimal_source_edges.shape == (87, 3)
|
73
|
+
|
74
|
+
|
75
|
+
def test_source_set_coverage(sbml_dfs_metabolism):
|
76
|
+
|
77
|
+
source_df = source.unnest_sources(sbml_dfs_metabolism.reactions)
|
78
|
+
|
79
|
+
# print(source_df.shape)
|
80
|
+
assert source_df.shape == (111, 7)
|
81
|
+
|
82
|
+
set_coverage = source.source_set_coverage(source_df)
|
83
|
+
# print(set_coverage.shape)
|
84
|
+
assert set_coverage.shape == (87, 6)
|
85
|
+
|
86
|
+
|
87
|
+
def test_source_set_coverage_enrichment(sbml_dfs_metabolism):
|
88
|
+
|
89
|
+
source_total_counts = source.get_source_total_counts(
|
90
|
+
sbml_dfs_metabolism, "reactions"
|
91
|
+
)
|
92
|
+
|
93
|
+
source_df = source.unnest_sources(sbml_dfs_metabolism.reactions).head(40)
|
94
|
+
|
95
|
+
set_coverage = source.source_set_coverage(
|
96
|
+
source_df, source_total_counts=source_total_counts, sbml_dfs=sbml_dfs_metabolism
|
97
|
+
)
|
64
98
|
|
65
|
-
|
66
|
-
test_source()
|
67
|
-
test_source_w_pwindex()
|
99
|
+
assert set_coverage.shape == (30, 6)
|
@@ -0,0 +1,62 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from scipy.stats import fisher_exact
|
3
|
+
|
4
|
+
from napistu.statistics import hypothesis_testing
|
5
|
+
|
6
|
+
|
7
|
+
def test_fisher_exact_vectorized_basic_and_vectorized():
|
8
|
+
|
9
|
+
# Classic Fisher's test example: [[1, 9], [11, 3]]
|
10
|
+
# a=1, b=9, c=11, d=3
|
11
|
+
odds, p = hypothesis_testing.fisher_exact_vectorized([1], [9], [11], [3])
|
12
|
+
# Odds ratio: (1*3)/(9*11) = 3/99 = 0.0303...
|
13
|
+
assert np.allclose(odds, [3 / 99])
|
14
|
+
assert p.shape == (1,)
|
15
|
+
assert (p >= 0).all() and (p <= 1).all()
|
16
|
+
|
17
|
+
# Vectorized: two tables
|
18
|
+
odds, p = hypothesis_testing.fisher_exact_vectorized(
|
19
|
+
[1, 2], [9, 8], [11, 10], [3, 4]
|
20
|
+
)
|
21
|
+
assert odds.shape == (2,)
|
22
|
+
assert p.shape == (2,)
|
23
|
+
# Check that odds ratios are correct
|
24
|
+
expected_odds = np.array([(1 * 3) / (9 * 11), (2 * 4) / (8 * 10)])
|
25
|
+
assert np.allclose(odds, expected_odds)
|
26
|
+
# P-values should be between 0 and 1
|
27
|
+
assert (p >= 0).all() and (p <= 1).all()
|
28
|
+
|
29
|
+
|
30
|
+
def test_fisher_exact_vectorized_vs_scipy():
|
31
|
+
|
32
|
+
# Define several 2x2 tables
|
33
|
+
tables = [
|
34
|
+
([1], [9], [11], [3]),
|
35
|
+
([5], [2], [8], [7]),
|
36
|
+
([10], [10], [10], [10]),
|
37
|
+
([0], [5], [5], [10]),
|
38
|
+
([3], [7], [2], [8]),
|
39
|
+
]
|
40
|
+
for a, b, c, d in tables:
|
41
|
+
odds_vec, p_vec = hypothesis_testing.fisher_exact_vectorized(a, b, c, d)
|
42
|
+
# Build the table for scipy
|
43
|
+
table = np.array([[a[0], b[0]], [c[0], d[0]]])
|
44
|
+
odds_scipy, p_scipy = fisher_exact(table, alternative="greater")
|
45
|
+
# Odds ratios should be nearly identical
|
46
|
+
assert np.allclose(odds_vec, [odds_scipy], rtol=1e-6, atol=1e-8)
|
47
|
+
# P-values should be close (normal approx vs exact)
|
48
|
+
assert np.allclose(
|
49
|
+
p_vec, [p_scipy], rtol=0.15, atol=1e-3
|
50
|
+
) # allow some tolerance
|
51
|
+
|
52
|
+
# Also test vectorized input
|
53
|
+
a = [1, 5, 10, 0, 3]
|
54
|
+
b = [9, 2, 10, 5, 7]
|
55
|
+
c = [11, 8, 10, 5, 2]
|
56
|
+
d = [3, 7, 10, 10, 8]
|
57
|
+
odds_vec, p_vec = hypothesis_testing.fisher_exact_vectorized(a, b, c, d)
|
58
|
+
for i in range(len(a)):
|
59
|
+
table = np.array([[a[i], b[i]], [c[i], d[i]]])
|
60
|
+
odds_scipy, p_scipy = fisher_exact(table, alternative="greater")
|
61
|
+
assert np.allclose(odds_vec[i], odds_scipy, rtol=1e-6, atol=1e-8)
|
62
|
+
assert np.allclose(p_vec[i], p_scipy, rtol=0.15, atol=1e-3)
|
tests/test_set_coverage.py
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from napistu import source
|
4
|
-
from napistu.network import ng_utils
|
5
|
-
|
6
|
-
|
7
|
-
def test_get_minimal_source_edges(sbml_dfs_metabolism):
|
8
|
-
vertices = sbml_dfs_metabolism.reactions.reset_index().rename(
|
9
|
-
columns={"r_id": "node"}
|
10
|
-
)
|
11
|
-
|
12
|
-
minimal_source_edges = ng_utils.get_minimal_sources_edges(
|
13
|
-
vertices, sbml_dfs_metabolism
|
14
|
-
)
|
15
|
-
# print(minimal_source_edges.shape)
|
16
|
-
assert minimal_source_edges.shape == (87, 3)
|
17
|
-
|
18
|
-
|
19
|
-
def test_greedy_set_coverge_of_sources(sbml_dfs_metabolism):
|
20
|
-
table_schema = sbml_dfs_metabolism.schema["reactions"]
|
21
|
-
|
22
|
-
source_df = source.unnest_sources(
|
23
|
-
sbml_dfs_metabolism.reactions, source_var="r_Source"
|
24
|
-
)
|
25
|
-
# print(source_df.shape)
|
26
|
-
assert source_df.shape == (111, 7)
|
27
|
-
|
28
|
-
set_coverage = source.greedy_set_coverge_of_sources(source_df, table_schema)
|
29
|
-
# print(set_coverage.shape)
|
30
|
-
assert set_coverage.shape == (87, 6)
|
31
|
-
|
32
|
-
|
33
|
-
################################################
|
34
|
-
# __main__
|
35
|
-
################################################
|
36
|
-
|
37
|
-
if __name__ == "__main__":
|
38
|
-
import os
|
39
|
-
from napistu import indices
|
40
|
-
from napistu import consensus
|
41
|
-
|
42
|
-
test_path = os.path.abspath(os.path.join(__file__, os.pardir))
|
43
|
-
test_data = os.path.join(test_path, "test_data")
|
44
|
-
|
45
|
-
pw_index = indices.PWIndex(os.path.join(test_data, "pw_index_metabolism.tsv"))
|
46
|
-
sbml_dfs_dict = consensus.construct_sbml_dfs_dict(pw_index)
|
47
|
-
sbml_dfs_metabolism = consensus.construct_consensus_model(sbml_dfs_dict, pw_index)
|
48
|
-
|
49
|
-
test_get_minimal_source_edges(sbml_dfs_metabolism)
|
50
|
-
test_greedy_set_coverge_of_sources(sbml_dfs_metabolism)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|