pathview-plus 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pathview/__init__.py +124 -0
- pathview/color_mapping.py +153 -0
- pathview/constants.py +27 -0
- pathview/databases.py +309 -0
- pathview/examples.py +342 -0
- pathview/highlighting.py +375 -0
- pathview/id_mapping.py +170 -0
- pathview/kegg_api.py +143 -0
- pathview/kgml_parser.py +189 -0
- pathview/mol_data.py +168 -0
- pathview/node_mapping.py +99 -0
- pathview/pathview.py +316 -0
- pathview/rendering.py +409 -0
- pathview/sbgn_parser.py +353 -0
- pathview/splines.py +304 -0
- pathview/svg_rendering.py +305 -0
- pathview/test_all_features.py +343 -0
- pathview/utils.py +80 -0
- pathview_plus-2.0.0.data/scripts/pathview-cli.py +252 -0
- pathview_plus-2.0.0.dist-info/METADATA +661 -0
- pathview_plus-2.0.0.dist-info/RECORD +23 -0
- pathview_plus-2.0.0.dist-info/WHEEL +5 -0
- pathview_plus-2.0.0.dist-info/top_level.txt +1 -0
pathview/__init__.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""
|
|
2
|
+
pathview — Python implementation of R pathview + SBGNview features.
|
|
3
|
+
|
|
4
|
+
Complete pathway visualization system supporting:
|
|
5
|
+
- KEGG pathways (KGML format)
|
|
6
|
+
- SBGN pathways (Reactome, MetaCyc, PANTHER, SMPDB)
|
|
7
|
+
- Multiple rendering modes (PNG overlay, SVG vector, PDF graph)
|
|
8
|
+
- Highlighting and post-processing
|
|
9
|
+
- Spline curve rendering
|
|
10
|
+
|
|
11
|
+
Public API
|
|
12
|
+
----------
|
|
13
|
+
# Core visualization
|
|
14
|
+
from pathview import pathview
|
|
15
|
+
|
|
16
|
+
# Data utilities
|
|
17
|
+
from pathview import sim_mol_data, mol_sum, node_color
|
|
18
|
+
|
|
19
|
+
# ID mapping
|
|
20
|
+
from pathview import id2eg, eg2id, cpd_id_map
|
|
21
|
+
|
|
22
|
+
# Parsing (KEGG)
|
|
23
|
+
from pathview import parse_kgml, node_info
|
|
24
|
+
|
|
25
|
+
# Parsing (SBGN)
|
|
26
|
+
from pathview import parse_sbgn, sbgn_to_df
|
|
27
|
+
|
|
28
|
+
# Database downloaders
|
|
29
|
+
from pathview import (
|
|
30
|
+
download_kegg, download_reactome, download_metacyc,
|
|
31
|
+
list_reactome_pathways, detect_database
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Highlighting & post-processing
|
|
35
|
+
from pathview import (
|
|
36
|
+
PathwayResult, highlight_nodes, highlight_edges,
|
|
37
|
+
highlight_path, change_labels
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Rendering modes
|
|
41
|
+
from pathview import keggview_native, keggview_graph, keggview_svg
|
|
42
|
+
|
|
43
|
+
# Spline curves
|
|
44
|
+
from pathview import (
|
|
45
|
+
cubic_bezier, quadratic_bezier, catmull_rom_spline,
|
|
46
|
+
route_edge_spline, bezier_to_svg_path
|
|
47
|
+
)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
__version__ = "2.0.0"
|
|
51
|
+
__authors__ = 'Richard Allen White III, Jose Luis Figueroa III'
|
|
52
|
+
__description__ = "KEGG + SBGN pathway visualization with Python"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
from .color_mapping import draw_color_key, make_colormap, node_color
|
|
56
|
+
from .databases import (DATABASE_INFO, detect_database, download_metacyc,
|
|
57
|
+
download_panther, download_reactome, download_smpdb,
|
|
58
|
+
list_reactome_pathways)
|
|
59
|
+
from .highlighting import (PathwayResult, change_labels, highlight_edges,
|
|
60
|
+
highlight_nodes, highlight_path)
|
|
61
|
+
from .id_mapping import cpd_id_map, eg2id, id2eg
|
|
62
|
+
from .kegg_api import SpeciesInfo, download_kegg, kegg_species_code
|
|
63
|
+
from .kgml_parser import (KGMLEdge, KGMLNode, KGMLPathway, KGMLReaction,
|
|
64
|
+
node_info, parse_kgml)
|
|
65
|
+
from .mol_data import mol_sum, sim_mol_data
|
|
66
|
+
from .node_mapping import node_map
|
|
67
|
+
from .rendering import kegg_legend, keggview_graph, keggview_native
|
|
68
|
+
from .sbgn_parser import (SBGN_ARC_CLASSES, SBGN_GLYPH_CLASSES, SBGNArc,
|
|
69
|
+
SBGNGlyph, SBGNPathway, parse_sbgn, sbgn_to_df)
|
|
70
|
+
from .splines import (bezier_to_svg_path, catmull_rom_spline, cubic_bezier,
|
|
71
|
+
quadratic_bezier, route_edge_spline, smooth_path_svg)
|
|
72
|
+
from .svg_rendering import keggview_svg, render_edge_svg, render_node_svg
|
|
73
|
+
from .utils import max_abs, random_pick, wordwrap
|
|
74
|
+
|
|
75
|
+
__all__ = [
|
|
76
|
+
# Core pipeline
|
|
77
|
+
"pathview",
|
|
78
|
+
|
|
79
|
+
# Data simulation & aggregation
|
|
80
|
+
"sim_mol_data", "mol_sum",
|
|
81
|
+
|
|
82
|
+
# ID mapping
|
|
83
|
+
"id2eg", "eg2id", "cpd_id_map",
|
|
84
|
+
|
|
85
|
+
# KEGG API
|
|
86
|
+
"kegg_species_code", "download_kegg", "SpeciesInfo",
|
|
87
|
+
|
|
88
|
+
# Database downloads (SBGN)
|
|
89
|
+
"download_reactome", "download_metacyc", "download_panther", "download_smpdb",
|
|
90
|
+
"list_reactome_pathways", "detect_database", "DATABASE_INFO",
|
|
91
|
+
|
|
92
|
+
# Parsing (KGML)
|
|
93
|
+
"parse_kgml", "node_info",
|
|
94
|
+
"KGMLPathway", "KGMLNode", "KGMLEdge", "KGMLReaction",
|
|
95
|
+
|
|
96
|
+
# Parsing (SBGN)
|
|
97
|
+
"parse_sbgn", "sbgn_to_df",
|
|
98
|
+
"SBGNPathway", "SBGNGlyph", "SBGNArc",
|
|
99
|
+
"SBGN_GLYPH_CLASSES", "SBGN_ARC_CLASSES",
|
|
100
|
+
|
|
101
|
+
# Node mapping
|
|
102
|
+
"node_map",
|
|
103
|
+
|
|
104
|
+
# Colors
|
|
105
|
+
"node_color", "make_colormap", "draw_color_key",
|
|
106
|
+
|
|
107
|
+
# Rendering (PNG, PDF, SVG)
|
|
108
|
+
"keggview_native", "keggview_graph", "keggview_svg", "kegg_legend",
|
|
109
|
+
"render_node_svg", "render_edge_svg",
|
|
110
|
+
|
|
111
|
+
# Highlighting & post-processing
|
|
112
|
+
"PathwayResult", "highlight_nodes", "highlight_edges",
|
|
113
|
+
"highlight_path", "change_labels",
|
|
114
|
+
|
|
115
|
+
# Spline curves
|
|
116
|
+
"cubic_bezier", "quadratic_bezier", "catmull_rom_spline",
|
|
117
|
+
"route_edge_spline", "bezier_to_svg_path", "smooth_path_svg",
|
|
118
|
+
|
|
119
|
+
# Utilities
|
|
120
|
+
"wordwrap", "max_abs", "random_pick",
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
# Import pathview last to avoid circular imports
|
|
124
|
+
from .pathview import pathview # noqa: E402
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""
|
|
2
|
+
color_mapping.py
|
|
3
|
+
Colour-scale utilities:
|
|
4
|
+
- make_colormap : build a three-point diverging LinearSegmentedColormap
|
|
5
|
+
- node_color : map numeric node values → hex colour strings
|
|
6
|
+
- draw_color_key : render a colour-bar legend onto a Matplotlib Axes
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Callable, Optional
|
|
12
|
+
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
import numpy as np
|
|
15
|
+
import polars as pl
|
|
16
|
+
from matplotlib.colors import LinearSegmentedColormap, Normalize
|
|
17
|
+
|
|
18
|
+
from .constants import SumMethod
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Colormap construction
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
def make_colormap(
|
|
26
|
+
low: str = "green",
|
|
27
|
+
mid: str = "gray",
|
|
28
|
+
high: str = "red",
|
|
29
|
+
n: int = 256,
|
|
30
|
+
) -> LinearSegmentedColormap:
|
|
31
|
+
"""
|
|
32
|
+
Build a three-point diverging colour map: low → mid → high.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
low, mid, high: Matplotlib colour strings or hex codes.
|
|
37
|
+
n: Number of discrete colour levels.
|
|
38
|
+
"""
|
|
39
|
+
return LinearSegmentedColormap.from_list("pv_cmap", [low, mid, high], N=n)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Node colour mapping
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
def node_color(
|
|
47
|
+
plot_data: pl.DataFrame,
|
|
48
|
+
limit: float | tuple[float, float] = 1.0,
|
|
49
|
+
bins: int = 10,
|
|
50
|
+
both_dirs: bool = True,
|
|
51
|
+
discrete: bool = False,
|
|
52
|
+
low: str = "green",
|
|
53
|
+
mid: str = "gray",
|
|
54
|
+
high: str = "red",
|
|
55
|
+
na_col: str = "transparent",
|
|
56
|
+
trans_fun: Optional[Callable[[np.ndarray], np.ndarray]] = None,
|
|
57
|
+
) -> pl.DataFrame:
|
|
58
|
+
"""
|
|
59
|
+
Convert numeric node values to hex colour strings.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
plot_data: DataFrame with an 'id' column and one or more numeric value
|
|
64
|
+
columns. Each numeric column produces a paired '*_col' column.
|
|
65
|
+
limit: Scalar (symmetric ±limit) or (vmin, vmax) tuple.
|
|
66
|
+
bins: Number of colour bins.
|
|
67
|
+
both_dirs: When True and *limit* is scalar, use ±limit range.
|
|
68
|
+
discrete: Reserved for future discrete-colour support.
|
|
69
|
+
low/mid/high: Colour endpoints.
|
|
70
|
+
na_col: Colour string for NaN values (default "transparent").
|
|
71
|
+
trans_fun: Optional transformation applied to values before colouring.
|
|
72
|
+
|
|
73
|
+
Returns a DataFrame with 'id' and one '*_col' column per input value column.
|
|
74
|
+
"""
|
|
75
|
+
vmin, vmax = _resolve_limits(limit, both_dirs)
|
|
76
|
+
cmap = make_colormap(low, mid, high, n=bins)
|
|
77
|
+
norm = Normalize(vmin=vmin, vmax=vmax, clip=True)
|
|
78
|
+
|
|
79
|
+
value_cols = [c for c in plot_data.columns if c != "id"]
|
|
80
|
+
result: dict[str, list] = {"id": plot_data["id"].to_list()}
|
|
81
|
+
|
|
82
|
+
#TODO: Quick fix, please verify for accuracy
|
|
83
|
+
vec = np.vectorize(lambda x: int(x.lstrip("#"), 16) if isinstance(x, str) else x)
|
|
84
|
+
for col in value_cols:
|
|
85
|
+
vals = vec(plot_data[col].to_numpy()).astype(float)
|
|
86
|
+
if trans_fun is not None:
|
|
87
|
+
vals = trans_fun(vals)
|
|
88
|
+
result[f"{col}_col"] = [_value_to_hex(v, cmap, norm, na_col) for v in vals]
|
|
89
|
+
|
|
90
|
+
return pl.DataFrame(result)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _resolve_limits(
|
|
94
|
+
limit: float | tuple[float, float],
|
|
95
|
+
both_dirs: bool,
|
|
96
|
+
) -> tuple[float, float]:
|
|
97
|
+
"""Convert a scalar or tuple limit into (vmin, vmax)."""
|
|
98
|
+
if isinstance(limit, (int, float)):
|
|
99
|
+
return (-abs(limit), abs(limit)) if both_dirs else (0.0, float(limit))
|
|
100
|
+
return float(limit[0]), float(limit[1])
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _value_to_hex(
|
|
104
|
+
v: float,
|
|
105
|
+
cmap: LinearSegmentedColormap,
|
|
106
|
+
norm: Normalize,
|
|
107
|
+
na_col: str,
|
|
108
|
+
) -> str:
|
|
109
|
+
"""Map a single float to a hex colour string, returning *na_col* for NaN."""
|
|
110
|
+
if np.isnan(v):
|
|
111
|
+
return na_col
|
|
112
|
+
r, g, b, _ = cmap(norm(v))
|
|
113
|
+
return "#{:02X}{:02X}{:02X}".format(int(r * 255), int(g * 255), int(b * 255))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ---------------------------------------------------------------------------
|
|
117
|
+
# Colour-key legend
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
def draw_color_key(
|
|
121
|
+
ax: plt.Axes,
|
|
122
|
+
limit: float | tuple[float, float] = 1.0,
|
|
123
|
+
bins: int = 10,
|
|
124
|
+
both_dirs: bool = True,
|
|
125
|
+
discrete: bool = False,
|
|
126
|
+
low: str = "green",
|
|
127
|
+
mid: str = "gray",
|
|
128
|
+
high: str = "red",
|
|
129
|
+
label_size: float = 8,
|
|
130
|
+
) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Draw a horizontal colour-bar legend as a Matplotlib colorbar.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
ax: Axes on which to anchor the colorbar.
|
|
137
|
+
limit: Colour scale limits (scalar or tuple).
|
|
138
|
+
bins: Number of colour bins.
|
|
139
|
+
both_dirs: Whether to show negative values.
|
|
140
|
+
label_size: Font size for tick labels.
|
|
141
|
+
"""
|
|
142
|
+
vmin, vmax = _resolve_limits(limit, both_dirs)
|
|
143
|
+
cmap = make_colormap(low, mid, high)
|
|
144
|
+
norm = Normalize(vmin=vmin, vmax=vmax)
|
|
145
|
+
|
|
146
|
+
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
|
|
147
|
+
sm.set_array([])
|
|
148
|
+
|
|
149
|
+
cbar = plt.colorbar(sm, ax=ax, orientation="horizontal", fraction=0.046, pad=0.04)
|
|
150
|
+
ticks = [vmin, (vmin + vmax) / 2.0, vmax]
|
|
151
|
+
cbar.set_ticks(ticks)
|
|
152
|
+
cbar.set_ticklabels([f"{t:.2g}" for t in ticks])
|
|
153
|
+
cbar.ax.tick_params(labelsize=label_size)
|
pathview/constants.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
constants.py
|
|
3
|
+
Shared type aliases, literals, and package-wide constants.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Callable, Literal, Optional
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
# KEGG REST base URL
|
|
11
|
+
KEGG_BASE = "https://rest.kegg.jp"
|
|
12
|
+
|
|
13
|
+
# Supported aggregation methods for multi-probe summarisation
|
|
14
|
+
SumMethod = Literal["sum", "mean", "median", "max", "max_abs", "random"]
|
|
15
|
+
|
|
16
|
+
# KEGG node types recognised by the renderer
|
|
17
|
+
NodeType = Literal["gene", "compound", "map", "ortholog", "group"]
|
|
18
|
+
|
|
19
|
+
# Non-data columns present on every node DataFrame
|
|
20
|
+
NODE_META_COLS = frozenset({
|
|
21
|
+
"entry_id", "name", "type", "x", "y",
|
|
22
|
+
"width", "height", "label", "shape",
|
|
23
|
+
"reaction", "component", "size", "kegg_names",
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
# Valid biological node types (used for filtering)
|
|
27
|
+
VALID_NODE_TYPES = {"gene", "enzyme", "compound", "ortholog"}
|
pathview/databases.py
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""
|
|
2
|
+
databases.py
|
|
3
|
+
Download SBGN-ML files from multiple pathway databases:
|
|
4
|
+
- Reactome (human pathways)
|
|
5
|
+
- MetaCyc (metabolic pathways)
|
|
6
|
+
- PANTHER (protein pathways)
|
|
7
|
+
- SMPDB (small molecule pathways)
|
|
8
|
+
|
|
9
|
+
Public API
|
|
10
|
+
----------
|
|
11
|
+
download_reactome : Download Reactome pathway
|
|
12
|
+
download_metacyc : Download MetaCyc pathway
|
|
13
|
+
list_pathways : List available pathways from a database
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import warnings
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
import requests
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Reactome downloader
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
_REACTOME_BASE = "https://reactome.org/ContentService/exporter/sbgn"
|
|
30
|
+
|
|
31
|
+
def download_reactome(
|
|
32
|
+
pathway_id: str,
|
|
33
|
+
output_dir: Path = Path("."),
|
|
34
|
+
species: str = "Homo sapiens",
|
|
35
|
+
) -> Optional[Path]:
|
|
36
|
+
"""
|
|
37
|
+
Download a Reactome pathway in SBGN-ML format.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
pathway_id: Reactome stable ID (e.g., "R-HSA-109582" for Hemostasis)
|
|
42
|
+
output_dir: Directory to save the .sbgn file
|
|
43
|
+
species: Species name (default: "Homo sapiens")
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
Path to downloaded file, or None if download failed
|
|
48
|
+
|
|
49
|
+
Example
|
|
50
|
+
-------
|
|
51
|
+
>>> path = download_reactome("R-HSA-109582", output_dir=Path("./pathways"))
|
|
52
|
+
>>> print(f"Downloaded to {path}")
|
|
53
|
+
|
|
54
|
+
Note
|
|
55
|
+
----
|
|
56
|
+
Reactome pathway IDs follow the format: R-[species code]-[number]
|
|
57
|
+
- R-HSA-* : Homo sapiens
|
|
58
|
+
- R-MMU-* : Mus musculus
|
|
59
|
+
- R-RNO-* : Rattus norvegicus
|
|
60
|
+
"""
|
|
61
|
+
output_dir = Path(output_dir)
|
|
62
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
|
|
64
|
+
# Reactome API endpoint
|
|
65
|
+
url = f"{_REACTOME_BASE}/{pathway_id}.sbgn"
|
|
66
|
+
|
|
67
|
+
output_path = output_dir / f"{pathway_id}.sbgn"
|
|
68
|
+
|
|
69
|
+
print(f"Info: Downloading Reactome pathway {pathway_id}...")
|
|
70
|
+
try:
|
|
71
|
+
resp = requests.get(url, timeout=60)
|
|
72
|
+
resp.raise_for_status()
|
|
73
|
+
output_path.write_text(resp.text, encoding="utf-8")
|
|
74
|
+
print(f"Info: Downloaded → {output_path}")
|
|
75
|
+
return output_path
|
|
76
|
+
except Exception as exc:
|
|
77
|
+
warnings.warn(f"Failed to download {pathway_id}: {exc}")
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ---------------------------------------------------------------------------
|
|
82
|
+
# MetaCyc downloader
|
|
83
|
+
# ---------------------------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
_METACYC_BASE = "https://biocyc.org/META/pathway"
|
|
86
|
+
|
|
87
|
+
def download_metacyc(
|
|
88
|
+
pathway_id: str,
|
|
89
|
+
output_dir: Path = Path("."),
|
|
90
|
+
) -> Optional[Path]:
|
|
91
|
+
"""
|
|
92
|
+
Download a MetaCyc pathway in SBGN-ML format.
|
|
93
|
+
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
pathway_id: MetaCyc pathway ID (e.g., "PWY-7210" for pyrimidine deoxyribonucleotides biosynthesis)
|
|
97
|
+
output_dir: Directory to save the .sbgn file
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
Path to downloaded file, or None if download failed
|
|
102
|
+
|
|
103
|
+
Example
|
|
104
|
+
-------
|
|
105
|
+
>>> path = download_metacyc("PWY-7210", output_dir=Path("./pathways"))
|
|
106
|
+
|
|
107
|
+
Note
|
|
108
|
+
----
|
|
109
|
+
MetaCyc requires registration for API access. This function uses the
|
|
110
|
+
public web interface and may not work for all pathways.
|
|
111
|
+
"""
|
|
112
|
+
output_dir = Path(output_dir)
|
|
113
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
114
|
+
|
|
115
|
+
# Try BioCyc SBGN export (may require authentication)
|
|
116
|
+
url = f"{_METACYC_BASE}?id={pathway_id}&export=sbgn"
|
|
117
|
+
output_path = output_dir / f"{pathway_id}.sbgn"
|
|
118
|
+
|
|
119
|
+
print(f"Info: Downloading MetaCyc pathway {pathway_id}...")
|
|
120
|
+
try:
|
|
121
|
+
resp = requests.get(url, timeout=60)
|
|
122
|
+
resp.raise_for_status()
|
|
123
|
+
output_path.write_text(resp.text, encoding="utf-8")
|
|
124
|
+
print(f"Info: Downloaded → {output_path}")
|
|
125
|
+
return output_path
|
|
126
|
+
except Exception as exc:
|
|
127
|
+
warnings.warn(
|
|
128
|
+
f"Failed to download {pathway_id}: {exc}\n"
|
|
129
|
+
"Note: MetaCyc may require authentication for some pathways."
|
|
130
|
+
)
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# ---------------------------------------------------------------------------
|
|
135
|
+
# PANTHER downloader
|
|
136
|
+
# ---------------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
def download_panther(
|
|
139
|
+
pathway_id: str,
|
|
140
|
+
output_dir: Path = Path("."),
|
|
141
|
+
) -> Optional[Path]:
|
|
142
|
+
"""
|
|
143
|
+
Download a PANTHER pathway in SBGN-ML format.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
pathway_id: PANTHER pathway ID (e.g., "P00001" for p53 pathway)
|
|
148
|
+
output_dir: Directory to save the .sbgn file
|
|
149
|
+
|
|
150
|
+
Returns
|
|
151
|
+
-------
|
|
152
|
+
Path to downloaded file, or None if download failed
|
|
153
|
+
|
|
154
|
+
Note
|
|
155
|
+
----
|
|
156
|
+
PANTHER pathways use pre-generated SBGN-ML files.
|
|
157
|
+
This function expects them to be hosted or provided locally.
|
|
158
|
+
"""
|
|
159
|
+
warnings.warn(
|
|
160
|
+
"PANTHER SBGN downloads not yet implemented. "
|
|
161
|
+
"Please download SBGN-ML files manually from PANTHER website."
|
|
162
|
+
)
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# ---------------------------------------------------------------------------
|
|
167
|
+
# SMPDB downloader
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
_SMPDB_BASE = "https://smpdb.ca/pathways"
|
|
171
|
+
|
|
172
|
+
def download_smpdb(
|
|
173
|
+
pathway_id: str,
|
|
174
|
+
output_dir: Path = Path("."),
|
|
175
|
+
) -> Optional[Path]:
|
|
176
|
+
"""
|
|
177
|
+
Download an SMPDB (Small Molecule Pathway Database) pathway.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
pathway_id: SMPDB pathway ID (e.g., "SMP0000001" for Glycolysis)
|
|
182
|
+
output_dir: Directory to save the .sbgn file
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
Path to downloaded file, or None if download failed
|
|
187
|
+
|
|
188
|
+
Note
|
|
189
|
+
----
|
|
190
|
+
SMPDB provides downloadable pathway files. This function may need
|
|
191
|
+
adjustment based on current SMPDB API availability.
|
|
192
|
+
"""
|
|
193
|
+
warnings.warn(
|
|
194
|
+
"SMPDB SBGN downloads not yet fully implemented. "
|
|
195
|
+
"Check https://smpdb.ca for pathway files."
|
|
196
|
+
)
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# ---------------------------------------------------------------------------
|
|
201
|
+
# Pathway listing
|
|
202
|
+
# ---------------------------------------------------------------------------
|
|
203
|
+
|
|
204
|
+
def list_reactome_pathways(species: str = "Homo sapiens") -> list[dict]:
|
|
205
|
+
"""
|
|
206
|
+
List available Reactome pathways for a species.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
species: Species name (e.g., "Homo sapiens", "Mus musculus")
|
|
211
|
+
|
|
212
|
+
Returns
|
|
213
|
+
-------
|
|
214
|
+
List of dicts with keys: id, name, species
|
|
215
|
+
|
|
216
|
+
Example
|
|
217
|
+
-------
|
|
218
|
+
>>> pathways = list_reactome_pathways("Homo sapiens")
|
|
219
|
+
>>> for pw in pathways[:5]:
|
|
220
|
+
... print(f"{pw['id']}: {pw['name']}")
|
|
221
|
+
"""
|
|
222
|
+
url = "https://reactome.org/ContentService/data/pathways/top/Homo%20sapiens"
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
resp = requests.get(url, timeout=30)
|
|
226
|
+
resp.raise_for_status()
|
|
227
|
+
data = resp.json()
|
|
228
|
+
|
|
229
|
+
pathways = []
|
|
230
|
+
for item in data:
|
|
231
|
+
pathways.append({
|
|
232
|
+
"id": item.get("stId", ""),
|
|
233
|
+
"name": item.get("displayName", ""),
|
|
234
|
+
"species": species,
|
|
235
|
+
})
|
|
236
|
+
return pathways
|
|
237
|
+
except Exception as exc:
|
|
238
|
+
warnings.warn(f"Failed to list Reactome pathways: {exc}")
|
|
239
|
+
return []
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# ---------------------------------------------------------------------------
|
|
243
|
+
# Database information
|
|
244
|
+
# ---------------------------------------------------------------------------
|
|
245
|
+
|
|
246
|
+
DATABASE_INFO = {
|
|
247
|
+
"reactome": {
|
|
248
|
+
"name": "Reactome",
|
|
249
|
+
"description": "Curated human pathway database",
|
|
250
|
+
"url": "https://reactome.org",
|
|
251
|
+
"id_pattern": "R-[SPECIES]-[NUMBER]",
|
|
252
|
+
"example": "R-HSA-109582",
|
|
253
|
+
"downloader": download_reactome,
|
|
254
|
+
},
|
|
255
|
+
"metacyc": {
|
|
256
|
+
"name": "MetaCyc",
|
|
257
|
+
"description": "Metabolic pathway database",
|
|
258
|
+
"url": "https://metacyc.org",
|
|
259
|
+
"id_pattern": "PWY-[NUMBER]",
|
|
260
|
+
"example": "PWY-7210",
|
|
261
|
+
"downloader": download_metacyc,
|
|
262
|
+
},
|
|
263
|
+
"panther": {
|
|
264
|
+
"name": "PANTHER",
|
|
265
|
+
"description": "Protein analysis through evolutionary relationships",
|
|
266
|
+
"url": "http://www.pantherdb.org",
|
|
267
|
+
"id_pattern": "P[NUMBER]",
|
|
268
|
+
"example": "P00001",
|
|
269
|
+
"downloader": download_panther,
|
|
270
|
+
},
|
|
271
|
+
"smpdb": {
|
|
272
|
+
"name": "SMPDB",
|
|
273
|
+
"description": "Small Molecule Pathway Database",
|
|
274
|
+
"url": "https://smpdb.ca",
|
|
275
|
+
"id_pattern": "SMP[NUMBER]",
|
|
276
|
+
"example": "SMP0000001",
|
|
277
|
+
"downloader": download_smpdb,
|
|
278
|
+
},
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def detect_database(pathway_id: str) -> Optional[str]:
|
|
283
|
+
"""
|
|
284
|
+
Detect which database a pathway ID belongs to.
|
|
285
|
+
|
|
286
|
+
Parameters
|
|
287
|
+
----------
|
|
288
|
+
pathway_id: Pathway identifier
|
|
289
|
+
|
|
290
|
+
Returns
|
|
291
|
+
-------
|
|
292
|
+
Database name ("reactome", "metacyc", etc.) or None
|
|
293
|
+
|
|
294
|
+
Example
|
|
295
|
+
-------
|
|
296
|
+
>>> detect_database("R-HSA-109582")
|
|
297
|
+
'reactome'
|
|
298
|
+
>>> detect_database("PWY-7210")
|
|
299
|
+
'metacyc'
|
|
300
|
+
"""
|
|
301
|
+
if pathway_id.startswith("R-") and "-" in pathway_id[2:]:
|
|
302
|
+
return "reactome"
|
|
303
|
+
elif pathway_id.startswith("PWY-"):
|
|
304
|
+
return "metacyc"
|
|
305
|
+
elif pathway_id.startswith("P") and pathway_id[1:].isdigit():
|
|
306
|
+
return "panther"
|
|
307
|
+
elif pathway_id.startswith("SMP"):
|
|
308
|
+
return "smpdb"
|
|
309
|
+
return None
|