pathview-plus 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pathview/__init__.py ADDED
@@ -0,0 +1,124 @@
1
+ """
2
+ pathview — Python implementation of R pathview + SBGNview features.
3
+
4
+ Complete pathway visualization system supporting:
5
+ - KEGG pathways (KGML format)
6
+ - SBGN pathways (Reactome, MetaCyc, PANTHER, SMPDB)
7
+ - Multiple rendering modes (PNG overlay, SVG vector, PDF graph)
8
+ - Highlighting and post-processing
9
+ - Spline curve rendering
10
+
11
+ Public API
12
+ ----------
13
+ # Core visualization
14
+ from pathview import pathview
15
+
16
+ # Data utilities
17
+ from pathview import sim_mol_data, mol_sum, node_color
18
+
19
+ # ID mapping
20
+ from pathview import id2eg, eg2id, cpd_id_map
21
+
22
+ # Parsing (KEGG)
23
+ from pathview import parse_kgml, node_info
24
+
25
+ # Parsing (SBGN)
26
+ from pathview import parse_sbgn, sbgn_to_df
27
+
28
+ # Database downloaders
29
+ from pathview import (
30
+ download_kegg, download_reactome, download_metacyc,
31
+ list_reactome_pathways, detect_database
32
+ )
33
+
34
+ # Highlighting & post-processing
35
+ from pathview import (
36
+ PathwayResult, highlight_nodes, highlight_edges,
37
+ highlight_path, change_labels
38
+ )
39
+
40
+ # Rendering modes
41
+ from pathview import keggview_native, keggview_graph, keggview_svg
42
+
43
+ # Spline curves
44
+ from pathview import (
45
+ cubic_bezier, quadratic_bezier, catmull_rom_spline,
46
+ route_edge_spline, bezier_to_svg_path
47
+ )
48
+ """
49
+
50
+ __version__ = "2.0.0"
51
+ __authors__ = 'Richard Allen White III, Jose Luis Figueroa III'
52
+ __description__ = "KEGG + SBGN pathway visualization with Python"
53
+
54
+
55
+ from .color_mapping import draw_color_key, make_colormap, node_color
56
+ from .databases import (DATABASE_INFO, detect_database, download_metacyc,
57
+ download_panther, download_reactome, download_smpdb,
58
+ list_reactome_pathways)
59
+ from .highlighting import (PathwayResult, change_labels, highlight_edges,
60
+ highlight_nodes, highlight_path)
61
+ from .id_mapping import cpd_id_map, eg2id, id2eg
62
+ from .kegg_api import SpeciesInfo, download_kegg, kegg_species_code
63
+ from .kgml_parser import (KGMLEdge, KGMLNode, KGMLPathway, KGMLReaction,
64
+ node_info, parse_kgml)
65
+ from .mol_data import mol_sum, sim_mol_data
66
+ from .node_mapping import node_map
67
+ from .rendering import kegg_legend, keggview_graph, keggview_native
68
+ from .sbgn_parser import (SBGN_ARC_CLASSES, SBGN_GLYPH_CLASSES, SBGNArc,
69
+ SBGNGlyph, SBGNPathway, parse_sbgn, sbgn_to_df)
70
+ from .splines import (bezier_to_svg_path, catmull_rom_spline, cubic_bezier,
71
+ quadratic_bezier, route_edge_spline, smooth_path_svg)
72
+ from .svg_rendering import keggview_svg, render_edge_svg, render_node_svg
73
+ from .utils import max_abs, random_pick, wordwrap
74
+
75
+ __all__ = [
76
+ # Core pipeline
77
+ "pathview",
78
+
79
+ # Data simulation & aggregation
80
+ "sim_mol_data", "mol_sum",
81
+
82
+ # ID mapping
83
+ "id2eg", "eg2id", "cpd_id_map",
84
+
85
+ # KEGG API
86
+ "kegg_species_code", "download_kegg", "SpeciesInfo",
87
+
88
+ # Database downloads (SBGN)
89
+ "download_reactome", "download_metacyc", "download_panther", "download_smpdb",
90
+ "list_reactome_pathways", "detect_database", "DATABASE_INFO",
91
+
92
+ # Parsing (KGML)
93
+ "parse_kgml", "node_info",
94
+ "KGMLPathway", "KGMLNode", "KGMLEdge", "KGMLReaction",
95
+
96
+ # Parsing (SBGN)
97
+ "parse_sbgn", "sbgn_to_df",
98
+ "SBGNPathway", "SBGNGlyph", "SBGNArc",
99
+ "SBGN_GLYPH_CLASSES", "SBGN_ARC_CLASSES",
100
+
101
+ # Node mapping
102
+ "node_map",
103
+
104
+ # Colors
105
+ "node_color", "make_colormap", "draw_color_key",
106
+
107
+ # Rendering (PNG, PDF, SVG)
108
+ "keggview_native", "keggview_graph", "keggview_svg", "kegg_legend",
109
+ "render_node_svg", "render_edge_svg",
110
+
111
+ # Highlighting & post-processing
112
+ "PathwayResult", "highlight_nodes", "highlight_edges",
113
+ "highlight_path", "change_labels",
114
+
115
+ # Spline curves
116
+ "cubic_bezier", "quadratic_bezier", "catmull_rom_spline",
117
+ "route_edge_spline", "bezier_to_svg_path", "smooth_path_svg",
118
+
119
+ # Utilities
120
+ "wordwrap", "max_abs", "random_pick",
121
+ ]
122
+
123
+ # Import pathview last to avoid circular imports
124
+ from .pathview import pathview # noqa: E402
@@ -0,0 +1,153 @@
1
+ """
2
+ color_mapping.py
3
+ Colour-scale utilities:
4
+ - make_colormap : build a three-point diverging LinearSegmentedColormap
5
+ - node_color : map numeric node values → hex colour strings
6
+ - draw_color_key : render a colour-bar legend onto a Matplotlib Axes
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Callable, Optional
12
+
13
+ import matplotlib.pyplot as plt
14
+ import numpy as np
15
+ import polars as pl
16
+ from matplotlib.colors import LinearSegmentedColormap, Normalize
17
+
18
+ from .constants import SumMethod
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Colormap construction
23
+ # ---------------------------------------------------------------------------
24
+
25
+ def make_colormap(
26
+ low: str = "green",
27
+ mid: str = "gray",
28
+ high: str = "red",
29
+ n: int = 256,
30
+ ) -> LinearSegmentedColormap:
31
+ """
32
+ Build a three-point diverging colour map: low → mid → high.
33
+
34
+ Parameters
35
+ ----------
36
+ low, mid, high: Matplotlib colour strings or hex codes.
37
+ n: Number of discrete colour levels.
38
+ """
39
+ return LinearSegmentedColormap.from_list("pv_cmap", [low, mid, high], N=n)
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Node colour mapping
44
+ # ---------------------------------------------------------------------------
45
+
46
+ def node_color(
47
+ plot_data: pl.DataFrame,
48
+ limit: float | tuple[float, float] = 1.0,
49
+ bins: int = 10,
50
+ both_dirs: bool = True,
51
+ discrete: bool = False,
52
+ low: str = "green",
53
+ mid: str = "gray",
54
+ high: str = "red",
55
+ na_col: str = "transparent",
56
+ trans_fun: Optional[Callable[[np.ndarray], np.ndarray]] = None,
57
+ ) -> pl.DataFrame:
58
+ """
59
+ Convert numeric node values to hex colour strings.
60
+
61
+ Parameters
62
+ ----------
63
+ plot_data: DataFrame with an 'id' column and one or more numeric value
64
+ columns. Each numeric column produces a paired '*_col' column.
65
+ limit: Scalar (symmetric ±limit) or (vmin, vmax) tuple.
66
+ bins: Number of colour bins.
67
+ both_dirs: When True and *limit* is scalar, use ±limit range.
68
+ discrete: Reserved for future discrete-colour support.
69
+ low/mid/high: Colour endpoints.
70
+ na_col: Colour string for NaN values (default "transparent").
71
+ trans_fun: Optional transformation applied to values before colouring.
72
+
73
+ Returns a DataFrame with 'id' and one '*_col' column per input value column.
74
+ """
75
+ vmin, vmax = _resolve_limits(limit, both_dirs)
76
+ cmap = make_colormap(low, mid, high, n=bins)
77
+ norm = Normalize(vmin=vmin, vmax=vmax, clip=True)
78
+
79
+ value_cols = [c for c in plot_data.columns if c != "id"]
80
+ result: dict[str, list] = {"id": plot_data["id"].to_list()}
81
+
82
+ #TODO: Quick fix, please verify for accuracy
83
+ vec = np.vectorize(lambda x: int(x.lstrip("#"), 16) if isinstance(x, str) else x)
84
+ for col in value_cols:
85
+ vals = vec(plot_data[col].to_numpy()).astype(float)
86
+ if trans_fun is not None:
87
+ vals = trans_fun(vals)
88
+ result[f"{col}_col"] = [_value_to_hex(v, cmap, norm, na_col) for v in vals]
89
+
90
+ return pl.DataFrame(result)
91
+
92
+
93
+ def _resolve_limits(
94
+ limit: float | tuple[float, float],
95
+ both_dirs: bool,
96
+ ) -> tuple[float, float]:
97
+ """Convert a scalar or tuple limit into (vmin, vmax)."""
98
+ if isinstance(limit, (int, float)):
99
+ return (-abs(limit), abs(limit)) if both_dirs else (0.0, float(limit))
100
+ return float(limit[0]), float(limit[1])
101
+
102
+
103
+ def _value_to_hex(
104
+ v: float,
105
+ cmap: LinearSegmentedColormap,
106
+ norm: Normalize,
107
+ na_col: str,
108
+ ) -> str:
109
+ """Map a single float to a hex colour string, returning *na_col* for NaN."""
110
+ if np.isnan(v):
111
+ return na_col
112
+ r, g, b, _ = cmap(norm(v))
113
+ return "#{:02X}{:02X}{:02X}".format(int(r * 255), int(g * 255), int(b * 255))
114
+
115
+
116
+ # ---------------------------------------------------------------------------
117
+ # Colour-key legend
118
+ # ---------------------------------------------------------------------------
119
+
120
+ def draw_color_key(
121
+ ax: plt.Axes,
122
+ limit: float | tuple[float, float] = 1.0,
123
+ bins: int = 10,
124
+ both_dirs: bool = True,
125
+ discrete: bool = False,
126
+ low: str = "green",
127
+ mid: str = "gray",
128
+ high: str = "red",
129
+ label_size: float = 8,
130
+ ) -> None:
131
+ """
132
+ Draw a horizontal colour-bar legend as a Matplotlib colorbar.
133
+
134
+ Parameters
135
+ ----------
136
+ ax: Axes on which to anchor the colorbar.
137
+ limit: Colour scale limits (scalar or tuple).
138
+ bins: Number of colour bins.
139
+ both_dirs: Whether to show negative values.
140
+ label_size: Font size for tick labels.
141
+ """
142
+ vmin, vmax = _resolve_limits(limit, both_dirs)
143
+ cmap = make_colormap(low, mid, high)
144
+ norm = Normalize(vmin=vmin, vmax=vmax)
145
+
146
+ sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
147
+ sm.set_array([])
148
+
149
+ cbar = plt.colorbar(sm, ax=ax, orientation="horizontal", fraction=0.046, pad=0.04)
150
+ ticks = [vmin, (vmin + vmax) / 2.0, vmax]
151
+ cbar.set_ticks(ticks)
152
+ cbar.set_ticklabels([f"{t:.2g}" for t in ticks])
153
+ cbar.ax.tick_params(labelsize=label_size)
pathview/constants.py ADDED
@@ -0,0 +1,27 @@
1
+ """
2
+ constants.py
3
+ Shared type aliases, literals, and package-wide constants.
4
+ """
5
+
6
+ from typing import Callable, Literal, Optional
7
+
8
+ import numpy as np
9
+
10
+ # KEGG REST base URL
11
+ KEGG_BASE = "https://rest.kegg.jp"
12
+
13
+ # Supported aggregation methods for multi-probe summarisation
14
+ SumMethod = Literal["sum", "mean", "median", "max", "max_abs", "random"]
15
+
16
+ # KEGG node types recognised by the renderer
17
+ NodeType = Literal["gene", "compound", "map", "ortholog", "group"]
18
+
19
+ # Non-data columns present on every node DataFrame
20
+ NODE_META_COLS = frozenset({
21
+ "entry_id", "name", "type", "x", "y",
22
+ "width", "height", "label", "shape",
23
+ "reaction", "component", "size", "kegg_names",
24
+ })
25
+
26
+ # Valid biological node types (used for filtering)
27
+ VALID_NODE_TYPES = {"gene", "enzyme", "compound", "ortholog"}
pathview/databases.py ADDED
@@ -0,0 +1,309 @@
1
+ """
2
+ databases.py
3
+ Download SBGN-ML files from multiple pathway databases:
4
+ - Reactome (human pathways)
5
+ - MetaCyc (metabolic pathways)
6
+ - PANTHER (protein pathways)
7
+ - SMPDB (small molecule pathways)
8
+
9
+ Public API
10
+ ----------
11
+ download_reactome : Download Reactome pathway
12
+ download_metacyc : Download MetaCyc pathway
13
+ list_pathways : List available pathways from a database
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import warnings
19
+ from pathlib import Path
20
+ from typing import Optional
21
+
22
+ import requests
23
+
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Reactome downloader
27
+ # ---------------------------------------------------------------------------
28
+
29
+ _REACTOME_BASE = "https://reactome.org/ContentService/exporter/sbgn"
30
+
31
+ def download_reactome(
32
+ pathway_id: str,
33
+ output_dir: Path = Path("."),
34
+ species: str = "Homo sapiens",
35
+ ) -> Optional[Path]:
36
+ """
37
+ Download a Reactome pathway in SBGN-ML format.
38
+
39
+ Parameters
40
+ ----------
41
+ pathway_id: Reactome stable ID (e.g., "R-HSA-109582" for Hemostasis)
42
+ output_dir: Directory to save the .sbgn file
43
+ species: Species name (default: "Homo sapiens")
44
+
45
+ Returns
46
+ -------
47
+ Path to downloaded file, or None if download failed
48
+
49
+ Example
50
+ -------
51
+ >>> path = download_reactome("R-HSA-109582", output_dir=Path("./pathways"))
52
+ >>> print(f"Downloaded to {path}")
53
+
54
+ Note
55
+ ----
56
+ Reactome pathway IDs follow the format: R-[species code]-[number]
57
+ - R-HSA-* : Homo sapiens
58
+ - R-MMU-* : Mus musculus
59
+ - R-RNO-* : Rattus norvegicus
60
+ """
61
+ output_dir = Path(output_dir)
62
+ output_dir.mkdir(parents=True, exist_ok=True)
63
+
64
+ # Reactome API endpoint
65
+ url = f"{_REACTOME_BASE}/{pathway_id}.sbgn"
66
+
67
+ output_path = output_dir / f"{pathway_id}.sbgn"
68
+
69
+ print(f"Info: Downloading Reactome pathway {pathway_id}...")
70
+ try:
71
+ resp = requests.get(url, timeout=60)
72
+ resp.raise_for_status()
73
+ output_path.write_text(resp.text, encoding="utf-8")
74
+ print(f"Info: Downloaded → {output_path}")
75
+ return output_path
76
+ except Exception as exc:
77
+ warnings.warn(f"Failed to download {pathway_id}: {exc}")
78
+ return None
79
+
80
+
81
+ # ---------------------------------------------------------------------------
82
+ # MetaCyc downloader
83
+ # ---------------------------------------------------------------------------
84
+
85
+ _METACYC_BASE = "https://biocyc.org/META/pathway"
86
+
87
+ def download_metacyc(
88
+ pathway_id: str,
89
+ output_dir: Path = Path("."),
90
+ ) -> Optional[Path]:
91
+ """
92
+ Download a MetaCyc pathway in SBGN-ML format.
93
+
94
+ Parameters
95
+ ----------
96
+ pathway_id: MetaCyc pathway ID (e.g., "PWY-7210" for pyrimidine deoxyribonucleotides biosynthesis)
97
+ output_dir: Directory to save the .sbgn file
98
+
99
+ Returns
100
+ -------
101
+ Path to downloaded file, or None if download failed
102
+
103
+ Example
104
+ -------
105
+ >>> path = download_metacyc("PWY-7210", output_dir=Path("./pathways"))
106
+
107
+ Note
108
+ ----
109
+ MetaCyc requires registration for API access. This function uses the
110
+ public web interface and may not work for all pathways.
111
+ """
112
+ output_dir = Path(output_dir)
113
+ output_dir.mkdir(parents=True, exist_ok=True)
114
+
115
+ # Try BioCyc SBGN export (may require authentication)
116
+ url = f"{_METACYC_BASE}?id={pathway_id}&export=sbgn"
117
+ output_path = output_dir / f"{pathway_id}.sbgn"
118
+
119
+ print(f"Info: Downloading MetaCyc pathway {pathway_id}...")
120
+ try:
121
+ resp = requests.get(url, timeout=60)
122
+ resp.raise_for_status()
123
+ output_path.write_text(resp.text, encoding="utf-8")
124
+ print(f"Info: Downloaded → {output_path}")
125
+ return output_path
126
+ except Exception as exc:
127
+ warnings.warn(
128
+ f"Failed to download {pathway_id}: {exc}\n"
129
+ "Note: MetaCyc may require authentication for some pathways."
130
+ )
131
+ return None
132
+
133
+
134
+ # ---------------------------------------------------------------------------
135
+ # PANTHER downloader
136
+ # ---------------------------------------------------------------------------
137
+
138
+ def download_panther(
139
+ pathway_id: str,
140
+ output_dir: Path = Path("."),
141
+ ) -> Optional[Path]:
142
+ """
143
+ Download a PANTHER pathway in SBGN-ML format.
144
+
145
+ Parameters
146
+ ----------
147
+ pathway_id: PANTHER pathway ID (e.g., "P00001" for p53 pathway)
148
+ output_dir: Directory to save the .sbgn file
149
+
150
+ Returns
151
+ -------
152
+ Path to downloaded file, or None if download failed
153
+
154
+ Note
155
+ ----
156
+ PANTHER pathways use pre-generated SBGN-ML files.
157
+ This function expects them to be hosted or provided locally.
158
+ """
159
+ warnings.warn(
160
+ "PANTHER SBGN downloads not yet implemented. "
161
+ "Please download SBGN-ML files manually from PANTHER website."
162
+ )
163
+ return None
164
+
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # SMPDB downloader
168
+ # ---------------------------------------------------------------------------
169
+
170
+ _SMPDB_BASE = "https://smpdb.ca/pathways"
171
+
172
+ def download_smpdb(
173
+ pathway_id: str,
174
+ output_dir: Path = Path("."),
175
+ ) -> Optional[Path]:
176
+ """
177
+ Download an SMPDB (Small Molecule Pathway Database) pathway.
178
+
179
+ Parameters
180
+ ----------
181
+ pathway_id: SMPDB pathway ID (e.g., "SMP0000001" for Glycolysis)
182
+ output_dir: Directory to save the .sbgn file
183
+
184
+ Returns
185
+ -------
186
+ Path to downloaded file, or None if download failed
187
+
188
+ Note
189
+ ----
190
+ SMPDB provides downloadable pathway files. This function may need
191
+ adjustment based on current SMPDB API availability.
192
+ """
193
+ warnings.warn(
194
+ "SMPDB SBGN downloads not yet fully implemented. "
195
+ "Check https://smpdb.ca for pathway files."
196
+ )
197
+ return None
198
+
199
+
200
+ # ---------------------------------------------------------------------------
201
+ # Pathway listing
202
+ # ---------------------------------------------------------------------------
203
+
204
+ def list_reactome_pathways(species: str = "Homo sapiens") -> list[dict]:
205
+ """
206
+ List available Reactome pathways for a species.
207
+
208
+ Parameters
209
+ ----------
210
+ species: Species name (e.g., "Homo sapiens", "Mus musculus")
211
+
212
+ Returns
213
+ -------
214
+ List of dicts with keys: id, name, species
215
+
216
+ Example
217
+ -------
218
+ >>> pathways = list_reactome_pathways("Homo sapiens")
219
+ >>> for pw in pathways[:5]:
220
+ ... print(f"{pw['id']}: {pw['name']}")
221
+ """
222
+ url = "https://reactome.org/ContentService/data/pathways/top/Homo%20sapiens"
223
+
224
+ try:
225
+ resp = requests.get(url, timeout=30)
226
+ resp.raise_for_status()
227
+ data = resp.json()
228
+
229
+ pathways = []
230
+ for item in data:
231
+ pathways.append({
232
+ "id": item.get("stId", ""),
233
+ "name": item.get("displayName", ""),
234
+ "species": species,
235
+ })
236
+ return pathways
237
+ except Exception as exc:
238
+ warnings.warn(f"Failed to list Reactome pathways: {exc}")
239
+ return []
240
+
241
+
242
+ # ---------------------------------------------------------------------------
243
+ # Database information
244
+ # ---------------------------------------------------------------------------
245
+
246
+ DATABASE_INFO = {
247
+ "reactome": {
248
+ "name": "Reactome",
249
+ "description": "Curated human pathway database",
250
+ "url": "https://reactome.org",
251
+ "id_pattern": "R-[SPECIES]-[NUMBER]",
252
+ "example": "R-HSA-109582",
253
+ "downloader": download_reactome,
254
+ },
255
+ "metacyc": {
256
+ "name": "MetaCyc",
257
+ "description": "Metabolic pathway database",
258
+ "url": "https://metacyc.org",
259
+ "id_pattern": "PWY-[NUMBER]",
260
+ "example": "PWY-7210",
261
+ "downloader": download_metacyc,
262
+ },
263
+ "panther": {
264
+ "name": "PANTHER",
265
+ "description": "Protein analysis through evolutionary relationships",
266
+ "url": "http://www.pantherdb.org",
267
+ "id_pattern": "P[NUMBER]",
268
+ "example": "P00001",
269
+ "downloader": download_panther,
270
+ },
271
+ "smpdb": {
272
+ "name": "SMPDB",
273
+ "description": "Small Molecule Pathway Database",
274
+ "url": "https://smpdb.ca",
275
+ "id_pattern": "SMP[NUMBER]",
276
+ "example": "SMP0000001",
277
+ "downloader": download_smpdb,
278
+ },
279
+ }
280
+
281
+
282
+ def detect_database(pathway_id: str) -> Optional[str]:
283
+ """
284
+ Detect which database a pathway ID belongs to.
285
+
286
+ Parameters
287
+ ----------
288
+ pathway_id: Pathway identifier
289
+
290
+ Returns
291
+ -------
292
+ Database name ("reactome", "metacyc", etc.) or None
293
+
294
+ Example
295
+ -------
296
+ >>> detect_database("R-HSA-109582")
297
+ 'reactome'
298
+ >>> detect_database("PWY-7210")
299
+ 'metacyc'
300
+ """
301
+ if pathway_id.startswith("R-") and "-" in pathway_id[2:]:
302
+ return "reactome"
303
+ elif pathway_id.startswith("PWY-"):
304
+ return "metacyc"
305
+ elif pathway_id.startswith("P") and pathway_id[1:].isdigit():
306
+ return "panther"
307
+ elif pathway_id.startswith("SMP"):
308
+ return "smpdb"
309
+ return None