pathview-plus 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pathview/rendering.py ADDED
@@ -0,0 +1,409 @@
1
+ """
2
+ rendering.py
3
+ Pathway diagram rendering:
4
+ - keggview_native : overlay data on a KEGG background PNG (pixel painting)
5
+ - keggview_graph : draw a NetworkX graph diagram styled with Seaborn
6
+ - kegg_legend : display a standalone KEGG diagram element legend
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import warnings
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ import matplotlib.patches as mpatches
16
+ import matplotlib.pyplot as plt
17
+ import numpy as np
18
+ import polars as pl
19
+ import seaborn as sns
20
+ from PIL import Image
21
+
22
+ from .color_mapping import draw_color_key, make_colormap
23
+ from .utils import wordwrap
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # KEGG edge subtype reference table
28
+ # ---------------------------------------------------------------------------
29
+
30
+ _EDGE_SUBTYPES = [
31
+ # (name, colour, label, style, arrowhead)
32
+ ("activation", "#00CC00", "-->", "solid", "normal"),
33
+ ("inhibition", "#CC0000", "--|", "solid", "tee"),
34
+ ("expression", "#00AA00", "-->", "dashed", "normal"),
35
+ ("repression", "#AA0000", "--|", "dashed", "tee"),
36
+ ("indirect", "#888888", "..>", "dotted", "normal"),
37
+ ("binding", "#0000CC", "---", "solid", "none"),
38
+ ("compound", "#8800AA", "---", "solid", "none"),
39
+ ("phosphorylation", "#FF6600", "+p", "solid", "normal"),
40
+ ("dephosphorylation", "#FF6600", "-p", "solid", "normal"),
41
+ ("ubiquitination", "#FF00FF", "+u", "solid", "normal"),
42
+ ("methylation", "#00AAFF", "+m", "solid", "normal"),
43
+ ("others/unknown", "#888888", "?", "solid", "normal"),
44
+ ]
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Shared helpers
49
+ # ---------------------------------------------------------------------------
50
+
51
+ def _hex_to_rgb255(hex_col: str) -> Optional[np.ndarray]:
52
+ """
53
+ Convert a hex colour string to a uint8 [R, G, B] array.
54
+ Returns None for transparent / empty strings.
55
+ """
56
+ hex_col = hex_col.lstrip("#")
57
+ if not hex_col or hex_col.lower() in ("transparent", "none"):
58
+ return None
59
+ return np.array([int(hex_col[i:i+2], 16) for i in (0, 2, 4)], dtype=np.uint8)
60
+
61
+
62
+ def _color_cols(df: pl.DataFrame) -> list[str]:
63
+ """Return column names that end with '_col'."""
64
+ return [c for c in df.columns if c.endswith("_col")]
65
+
66
+
67
+ # ---------------------------------------------------------------------------
68
+ # Native view (overlay on KEGG PNG)
69
+ # ---------------------------------------------------------------------------
70
+
71
+ def _paint_gene_nodes(
72
+ img: np.ndarray,
73
+ plot_data: pl.DataFrame,
74
+ col_data: pl.DataFrame,
75
+ ) -> np.ndarray:
76
+ """
77
+ Paint gene-node rectangles onto a H×W×3 uint8 image array.
78
+
79
+ The node width is divided evenly across multi-state colour columns so
80
+ that each experiment gets a horizontal slice of the node box.
81
+ """
82
+ h = img.shape[0]
83
+ ccols = _color_cols(col_data)
84
+ n_states = len(ccols)
85
+ col_lookup = {row["id"]: row for row in col_data.iter_rows(named=True)}
86
+
87
+ for row in plot_data.iter_rows(named=True):
88
+ cx, cy = row["x"], row["y"]
89
+ cy = h - cy # Flip y to convert from math to image coordinate space
90
+ hw, hh = row["width"] / 2, row["height"] / 2
91
+ px_l = max(0, int(cx - hw))
92
+ px_r = min(img.shape[1], int(cx + hw))
93
+ py_t = max(0, int(h - cy - hh))
94
+ py_b = min(h, int(h - cy + hh))
95
+ x_breaks = np.linspace(px_l, px_r, n_states + 1, dtype=int)
96
+
97
+ node_cols = col_lookup.get(row["entry_id"], {})
98
+ for k, ccol in enumerate(ccols):
99
+ rgb = _hex_to_rgb255(node_cols.get(ccol, ""))
100
+ if rgb is None:
101
+ continue
102
+ sl_l, sl_r = int(x_breaks[k]), int(x_breaks[k + 1])
103
+ region = img[py_t:py_b, sl_l:sl_r, :3]
104
+ # Keep black pixels (borders / text)
105
+ mask = region.sum(axis=2) > 0
106
+ region[mask] = rgb
107
+ img[py_t:py_b, sl_l:sl_r, :3] = region
108
+
109
+ return img
110
+
111
+
112
+ def _paint_cpd_nodes(
113
+ img: np.ndarray,
114
+ plot_data: pl.DataFrame,
115
+ col_data: pl.DataFrame,
116
+ ) -> np.ndarray:
117
+ """
118
+ Paint compound-node ellipses onto a H×W×3 uint8 image array.
119
+
120
+ Multi-state colours are applied as vertical slices through the circle.
121
+ """
122
+ h, w_img = img.shape[:2]
123
+ ccols = _color_cols(col_data)
124
+ n_states = len(ccols)
125
+ yy, xx = np.mgrid[0:h, 0:w_img]
126
+ col_lookup = {row["id"]: row for row in col_data.iter_rows(named=True)}
127
+
128
+ for row in plot_data.iter_rows(named=True):
129
+ cx, cy, r = row["x"], row["y"], row["width"]
130
+ cy = h - cy # Flip y to convert from math to image coordinate space
131
+ dist_sq = (xx - cx) ** 2 + (yy - cy) ** 2
132
+ inside = dist_sq < r ** 2
133
+ border = (dist_sq >= (r - 2) ** 2) & inside
134
+ x_breaks = np.linspace(cx - r, cx + r, n_states + 1)
135
+
136
+ node_cols = col_lookup.get(row["entry_id"], {})
137
+ for k, ccol in enumerate(ccols):
138
+ rgb = _hex_to_rgb255(node_cols.get(ccol, ""))
139
+ if rgb is None:
140
+ continue
141
+ mask = inside & (xx >= x_breaks[k]) & (xx < x_breaks[k + 1])
142
+ img[mask, :3] = rgb
143
+
144
+ img[border, :3] = 0 # restore black border
145
+
146
+ return img
147
+
148
+
149
+ def keggview_native(
150
+ plot_data_gene: Optional[pl.DataFrame],
151
+ cols_gene: Optional[pl.DataFrame],
152
+ plot_data_cpd: Optional[pl.DataFrame],
153
+ cols_cpd: Optional[pl.DataFrame],
154
+ node_data: pl.DataFrame,
155
+ pathway_name: str,
156
+ kegg_dir: Path = Path("."),
157
+ out_suffix: str = "pathview",
158
+ limit: dict | None = None,
159
+ bins: dict | None = None,
160
+ both_dirs: dict | None = None,
161
+ discrete: dict | None = None,
162
+ low: dict | None = None,
163
+ mid: dict | None = None,
164
+ high: dict | None = None,
165
+ new_signature: bool = True,
166
+ plot_col_key: bool = True,
167
+ dpi: int = 150,
168
+ ) -> None:
169
+ """
170
+ Render expression data overlaid on the KEGG pathway PNG background.
171
+
172
+ Reads ``<kegg_dir>/<pathway_name>.png``, paints gene and compound nodes
173
+ with the supplied colour data, and writes
174
+ ``<kegg_dir>/<pathway_name>.<out_suffix>.png``.
175
+ """
176
+ if limit is None: limit = {"gene": 1, "cpd": 1}
177
+ if bins is None: bins = {"gene": 10, "cpd": 10}
178
+ if both_dirs is None: both_dirs = {"gene": True, "cpd": True}
179
+ if discrete is None: discrete = {"gene": False, "cpd": False}
180
+ if low is None: low = {"gene": "green", "cpd": "blue"}
181
+ if mid is None: mid = {"gene": "gray", "cpd": "gray"}
182
+ if high is None: high = {"gene": "red", "cpd": "yellow"}
183
+
184
+ png_path = Path(kegg_dir) / f"{pathway_name}.png"
185
+ if not png_path.exists():
186
+ raise FileNotFoundError(f"Background PNG not found: {png_path}")
187
+
188
+ img = np.array(Image.open(png_path).convert("RGB"), dtype=np.uint8)
189
+
190
+ if plot_data_gene is not None and cols_gene is not None:
191
+ img = _paint_gene_nodes(img, plot_data_gene, cols_gene)
192
+ if plot_data_cpd is not None and cols_cpd is not None:
193
+ img = _paint_cpd_nodes(img, plot_data_cpd, cols_cpd)
194
+
195
+ h, w = img.shape[:2]
196
+ key_height = 0.6 if plot_col_key else 0.0
197
+ fig, axes = plt.subplots(
198
+ nrows=2 if plot_col_key else 1,
199
+ figsize=(w / dpi, h / dpi + key_height),
200
+ gridspec_kw={"height_ratios": [h, int(dpi * key_height)]} if plot_col_key else None,
201
+ )
202
+ ax_img = axes[0] if plot_col_key else axes
203
+
204
+ ax_img.imshow(img, aspect="auto")
205
+ ax_img.axis("off")
206
+
207
+ if new_signature:
208
+ ax_img.text(
209
+ 0.02, 0.02, "Rendered by pathview.py",
210
+ #transform=ax_img.transAxes, #TODO: This lable looks better on top
211
+ fontsize=6, color="black", fontweight="bold", va="bottom",
212
+ )
213
+
214
+ if plot_col_key and plot_data_gene is not None:
215
+ draw_color_key(
216
+ ax_img,
217
+ limit=limit["gene"], bins=bins["gene"],
218
+ both_dirs=both_dirs["gene"], discrete=discrete["gene"],
219
+ low=low["gene"], mid=mid["gene"], high=high["gene"],
220
+ )
221
+ if plot_col_key:
222
+ axes[1].set_visible(False)
223
+
224
+ out_path = Path(kegg_dir) / f"{pathway_name}.{out_suffix}.png"
225
+ fig.savefig(out_path, dpi=dpi, bbox_inches="tight")
226
+ plt.close(fig)
227
+ print(f"Info: Written → {out_path}")
228
+
229
+
230
+ # ---------------------------------------------------------------------------
231
+ # Graph view (NetworkX / Seaborn)
232
+ # ---------------------------------------------------------------------------
233
+
234
+ def keggview_graph(
235
+ plot_data_gene: Optional[pl.DataFrame],
236
+ cols_gene: Optional[pl.DataFrame],
237
+ plot_data_cpd: Optional[pl.DataFrame],
238
+ cols_cpd: Optional[pl.DataFrame],
239
+ node_data: pl.DataFrame,
240
+ pathway_name: str,
241
+ out_suffix: str = "pathview",
242
+ kegg_dir: Path = Path("."),
243
+ cex: float = 0.7,
244
+ limit: dict | None = None,
245
+ bins: dict | None = None,
246
+ both_dirs: dict | None = None,
247
+ low: dict | None = None,
248
+ mid: dict | None = None,
249
+ high: dict | None = None,
250
+ new_signature: bool = True,
251
+ plot_col_key: bool = True,
252
+ ) -> None:
253
+ """
254
+ Render pathway as a NetworkX directed graph with Seaborn styling.
255
+
256
+ Nodes are positioned using the KGML (x, y) coordinates. Saves a PDF to
257
+ ``<kegg_dir>/<pathway_name>.<out_suffix>.pdf``.
258
+ """
259
+ try:
260
+ import networkx as nx
261
+ except ImportError:
262
+ raise ImportError("networkx is required for graph view: pip install networkx")
263
+
264
+ if limit is None: limit = {"gene": 1, "cpd": 1}
265
+ if bins is None: bins = {"gene": 10, "cpd": 10}
266
+ if both_dirs is None: both_dirs = {"gene": True, "cpd": True}
267
+ if low is None: low = {"gene": "green", "cpd": "blue"}
268
+ if mid is None: mid = {"gene": "gray", "cpd": "gray"}
269
+ if high is None: high = {"gene": "red", "cpd": "yellow"}
270
+
271
+ # Build colour lookup from both gene and compound colour DataFrames
272
+ color_lookup: dict[str, str] = {}
273
+ for col_df in (cols_gene, cols_cpd):
274
+ if col_df is not None:
275
+ first_col = next((c for c in col_df.columns if c.endswith("_col")), None)
276
+ if first_col:
277
+ for row in col_df.iter_rows(named=True):
278
+ color_lookup.setdefault(row["id"], row[first_col])
279
+
280
+ # Build directed graph from node_data
281
+ G = nx.DiGraph()
282
+ for row in node_data.iter_rows(named=True):
283
+ G.add_node(row["entry_id"], **row)
284
+
285
+ pos = {
286
+ row["entry_id"]: (
287
+ row["x"] if row["x"] is not None else 0.0,
288
+ -(row["y"] if row["y"] is not None else 0.0),
289
+ )
290
+ for row in node_data.iter_rows(named=True)
291
+ }
292
+ node_colors = [color_lookup.get(n, "#CCCCCC") for n in G.nodes]
293
+ node_labels = {
294
+ row["entry_id"]: wordwrap(row.get("label", ""), width=12)
295
+ for row in node_data.iter_rows(named=True)
296
+ }
297
+
298
+ with sns.axes_style("white"):
299
+ fig, ax = plt.subplots(figsize=(14, 10))
300
+ ax.set_title(pathway_name, fontsize=12, fontweight="bold")
301
+
302
+ #TODO: Temporary fix, update prior steps that use transparent instead of none
303
+ node_colors = ['none' if x=='transparent' else x for x in node_colors]
304
+ nx.draw_networkx(
305
+ G,
306
+ pos=pos,
307
+ ax=ax,
308
+ labels=node_labels,
309
+ node_color=node_colors,
310
+ node_size=800,
311
+ font_size=cex * 10,
312
+ arrows=True,
313
+ arrowsize=12,
314
+ edge_color="#555555",
315
+ )
316
+
317
+ if plot_col_key:
318
+ draw_color_key(
319
+ ax,
320
+ limit=limit["gene"], bins=bins["gene"],
321
+ both_dirs=both_dirs["gene"],
322
+ low=low["gene"], mid=mid["gene"], high=high["gene"],
323
+ )
324
+
325
+ if new_signature:
326
+ ax.text(
327
+ 0.01, 0.01, "Rendered by pathview.py",
328
+ transform=ax.transAxes, fontsize=7, va="bottom",
329
+ )
330
+ ax.axis("off")
331
+
332
+ out_path = Path(kegg_dir) / f"{pathway_name}.{out_suffix}.pdf"
333
+ fig.savefig(out_path, bbox_inches="tight")
334
+ plt.close(fig)
335
+ print(f"Info: Written → {out_path}")
336
+
337
+
338
+ # ---------------------------------------------------------------------------
339
+ # KEGG legend
340
+ # ---------------------------------------------------------------------------
341
+
342
+ def kegg_legend(
343
+ legend_type: str = "both",
344
+ ) -> None:
345
+ """
346
+ Display a standalone reference legend for KEGG pathway elements.
347
+
348
+ Parameters
349
+ ----------
350
+ legend_type: One of "both", "edge", or "node".
351
+ """
352
+ if legend_type not in ("both", "edge", "node"):
353
+ warnings.warn(f"legend_type must be 'both', 'edge', or 'node'; got '{legend_type}'.")
354
+ return
355
+
356
+ n = len(_EDGE_SUBTYPES)
357
+ with sns.axes_style("white"):
358
+ fig, ax = plt.subplots(figsize=(9, 7))
359
+ ax.set_xlim(-0.2, 4.5)
360
+ ax.set_ylim(-0.5, n + 1.5)
361
+ ax.axis("off")
362
+ ax.set_title("KEGG Diagram Legend", fontweight="bold", fontsize=12)
363
+
364
+ _line_styles = {"solid": "-", "dashed": "--", "dotted": ":"}
365
+
366
+ if legend_type in ("both", "edge"):
367
+ ax.text(0.9, n + 1.0, "Edge Types", fontsize=10, fontweight="bold", ha="right")
368
+ for i, (name, col, label, style, arrow) in enumerate(_EDGE_SUBTYPES):
369
+ y = n - i - 0.5
370
+ ax.text(0.85, y, name, ha="right", va="center", fontsize=8)
371
+ ax.annotate(
372
+ "",
373
+ xy=(1.8, y), xytext=(1.0, y),
374
+ arrowprops=dict(
375
+ arrowstyle="->" if arrow == "normal" else "-|>",
376
+ color=col,
377
+ linestyle=_line_styles.get(style, "-"),
378
+ lw=1.5,
379
+ ),
380
+ )
381
+ ax.text(1.4, y + 0.22, label, color=col, fontsize=7, ha="center")
382
+
383
+ if legend_type in ("both", "node"):
384
+ x_off = 2.5 if legend_type == "both" else 0.5
385
+ ax.text(x_off + 1.2, n + 1.0, "Node Types", fontsize=10, fontweight="bold", ha="right")
386
+ node_specs = [
387
+ ("gene / protein / enzyme", "rectangle"),
388
+ ("compound / metabolite", "ellipse"),
389
+ ("pathway link", "text"),
390
+ ]
391
+ for i, (label, shape) in enumerate(node_specs):
392
+ y = n - i * 3.5 - 0.5
393
+ ax.text(x_off + 1.1, y, label, ha="right", va="center", fontsize=8)
394
+ xc = x_off + 1.5
395
+ if shape == "ellipse":
396
+ ax.add_patch(mpatches.Ellipse(
397
+ (xc, y), 0.45, 0.28, color="#DDDDDD", ec="black", lw=1,
398
+ ))
399
+ elif shape == "rectangle":
400
+ ax.add_patch(mpatches.FancyBboxPatch(
401
+ (xc - 0.22, y - 0.14), 0.44, 0.28,
402
+ boxstyle="square", color="#DDDDDD", ec="black", lw=1,
403
+ ))
404
+ else:
405
+ ax.text(xc, y, "Pathway Name", ha="center", va="center",
406
+ fontsize=8, style="italic")
407
+
408
+ plt.tight_layout()
409
+ plt.show()