pathview-plus 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pathview/__init__.py +124 -0
- pathview/color_mapping.py +153 -0
- pathview/constants.py +27 -0
- pathview/databases.py +309 -0
- pathview/examples.py +342 -0
- pathview/highlighting.py +375 -0
- pathview/id_mapping.py +170 -0
- pathview/kegg_api.py +143 -0
- pathview/kgml_parser.py +189 -0
- pathview/mol_data.py +168 -0
- pathview/node_mapping.py +99 -0
- pathview/pathview.py +316 -0
- pathview/rendering.py +409 -0
- pathview/sbgn_parser.py +353 -0
- pathview/splines.py +304 -0
- pathview/svg_rendering.py +305 -0
- pathview/test_all_features.py +343 -0
- pathview/utils.py +80 -0
- pathview_plus-2.0.0.data/scripts/pathview-cli.py +252 -0
- pathview_plus-2.0.0.dist-info/METADATA +661 -0
- pathview_plus-2.0.0.dist-info/RECORD +23 -0
- pathview_plus-2.0.0.dist-info/WHEEL +5 -0
- pathview_plus-2.0.0.dist-info/top_level.txt +1 -0
pathview/highlighting.py
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
"""
|
|
2
|
+
highlighting.py
|
|
3
|
+
Layer-by-layer pathway graph modifications.
|
|
4
|
+
|
|
5
|
+
Implements a ggplot2-style composable interface for post-hoc pathway
|
|
6
|
+
customization. Users can highlight specific nodes or paths, change colors,
|
|
7
|
+
adjust labels, and more — all without re-running the full rendering pipeline.
|
|
8
|
+
|
|
9
|
+
Usage
|
|
10
|
+
-----
|
|
11
|
+
from pathview import pathview, highlight_nodes, highlight_edges
|
|
12
|
+
|
|
13
|
+
result = pathview("04110", gene_data=data, species="hsa")
|
|
14
|
+
|
|
15
|
+
# Compose modifications with +
|
|
16
|
+
modified = (result
|
|
17
|
+
+ highlight_nodes(["1956", "2099"], color="red", width=4)
|
|
18
|
+
+ highlight_edges([("1956", "2099")], color="blue", width=3))
|
|
19
|
+
|
|
20
|
+
# Save modified version
|
|
21
|
+
modified.save("highlighted.png")
|
|
22
|
+
|
|
23
|
+
Public API
|
|
24
|
+
----------
|
|
25
|
+
PathwayResult : Container for pathway rendering results
|
|
26
|
+
highlight_nodes : Highlight specific nodes
|
|
27
|
+
highlight_edges : Highlight specific edges
|
|
28
|
+
highlight_path : Highlight an entire path
|
|
29
|
+
change_labels : Update node labels
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
from dataclasses import dataclass, field
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import Callable, Optional
|
|
37
|
+
|
|
38
|
+
import numpy as np
|
|
39
|
+
import polars as pl
|
|
40
|
+
from PIL import Image
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# PathwayResult container
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class PathwayResult:
|
|
49
|
+
"""
|
|
50
|
+
Container for pathway rendering results.
|
|
51
|
+
|
|
52
|
+
Supports composable modifications via the + operator.
|
|
53
|
+
Stores both the rendered image and the underlying data so modifications
|
|
54
|
+
can be applied without full re-rendering.
|
|
55
|
+
"""
|
|
56
|
+
pathway_id: str
|
|
57
|
+
plot_data_gene: Optional[pl.DataFrame] = None
|
|
58
|
+
plot_data_cpd: Optional[pl.DataFrame] = None
|
|
59
|
+
output_path: Optional[Path] = None
|
|
60
|
+
image_array: Optional[np.ndarray] = None
|
|
61
|
+
modifications: list[Callable] = field(default_factory=list)
|
|
62
|
+
|
|
63
|
+
def __add__(self, modifier: Callable) -> PathwayResult:
|
|
64
|
+
"""
|
|
65
|
+
Apply a modification function and return a new PathwayResult.
|
|
66
|
+
|
|
67
|
+
This implements ggplot2-style layer composition:
|
|
68
|
+
|
|
69
|
+
result = pathview(...) + highlight_nodes(...) + highlight_edges(...)
|
|
70
|
+
"""
|
|
71
|
+
new_result = PathwayResult(
|
|
72
|
+
pathway_id=self.pathway_id,
|
|
73
|
+
plot_data_gene=self.plot_data_gene,
|
|
74
|
+
plot_data_cpd=self.plot_data_cpd,
|
|
75
|
+
output_path=self.output_path,
|
|
76
|
+
image_array=self.image_array.copy() if self.image_array is not None else None,
|
|
77
|
+
modifications=self.modifications + [modifier],
|
|
78
|
+
)
|
|
79
|
+
# Apply the modification
|
|
80
|
+
modifier(new_result)
|
|
81
|
+
return new_result
|
|
82
|
+
|
|
83
|
+
def save(self, path: str | Path, format: str = "png") -> None:
|
|
84
|
+
"""Save the modified pathway to a file."""
|
|
85
|
+
if self.image_array is None:
|
|
86
|
+
raise ValueError("No image data to save")
|
|
87
|
+
|
|
88
|
+
img = Image.fromarray(self.image_array)
|
|
89
|
+
if format.lower() == "pdf":
|
|
90
|
+
img.save(path, "PDF", resolution=300.0)
|
|
91
|
+
else:
|
|
92
|
+
img.save(path, format.upper())
|
|
93
|
+
print(f"Info: Saved modified pathway → {path}")
|
|
94
|
+
|
|
95
|
+
def show(self) -> None:
|
|
96
|
+
"""Display the pathway using PIL."""
|
|
97
|
+
if self.image_array is None:
|
|
98
|
+
raise ValueError("No image data to display")
|
|
99
|
+
Image.fromarray(self.image_array).show()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
# ---------------------------------------------------------------------------
|
|
103
|
+
# Node highlighting
|
|
104
|
+
# ---------------------------------------------------------------------------
|
|
105
|
+
|
|
106
|
+
def highlight_nodes(
|
|
107
|
+
node_ids: list[str],
|
|
108
|
+
color: str = "red",
|
|
109
|
+
width: int = 4,
|
|
110
|
+
opacity: float = 1.0,
|
|
111
|
+
) -> Callable[[PathwayResult], None]:
|
|
112
|
+
"""
|
|
113
|
+
Highlight specified nodes by changing their border.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
node_ids: List of node IDs to highlight (Entrez IDs or KEGG IDs)
|
|
118
|
+
color: Border color (hex or named color)
|
|
119
|
+
width: Border width in pixels
|
|
120
|
+
opacity: Border opacity (0-1)
|
|
121
|
+
|
|
122
|
+
Returns a modifier function that can be added to a PathwayResult.
|
|
123
|
+
|
|
124
|
+
Example
|
|
125
|
+
-------
|
|
126
|
+
>>> result = pathview("04110", gene_data=data)
|
|
127
|
+
>>> highlighted = result + highlight_nodes(["1956", "2099"], color="red", width=4)
|
|
128
|
+
>>> highlighted.save("highlighted.png")
|
|
129
|
+
"""
|
|
130
|
+
def modifier(result: PathwayResult) -> None:
|
|
131
|
+
if result.image_array is None:
|
|
132
|
+
return
|
|
133
|
+
|
|
134
|
+
# Find node positions
|
|
135
|
+
nodes_to_highlight = []
|
|
136
|
+
if result.plot_data_gene is not None:
|
|
137
|
+
genes = result.plot_data_gene.filter(
|
|
138
|
+
pl.col("kegg_names").is_in(node_ids)
|
|
139
|
+
)
|
|
140
|
+
if not genes.is_empty():
|
|
141
|
+
nodes_to_highlight.append(genes)
|
|
142
|
+
|
|
143
|
+
if result.plot_data_cpd is not None:
|
|
144
|
+
cpds = result.plot_data_cpd.filter(
|
|
145
|
+
pl.col("kegg_names").is_in(node_ids)
|
|
146
|
+
)
|
|
147
|
+
if not cpds.is_empty():
|
|
148
|
+
nodes_to_highlight.append(cpds)
|
|
149
|
+
|
|
150
|
+
# Draw highlights
|
|
151
|
+
img_height = result.image_array.shape[0]
|
|
152
|
+
rgb = _hex_to_rgb(color)
|
|
153
|
+
|
|
154
|
+
for df in nodes_to_highlight:
|
|
155
|
+
for row in df.iter_rows(named=True):
|
|
156
|
+
cx, cy = row["x"], row["y"]
|
|
157
|
+
hw, hh = row["width"] / 2, row["height"] / 2
|
|
158
|
+
_draw_border(
|
|
159
|
+
result.image_array,
|
|
160
|
+
cx=cx, cy=cy,
|
|
161
|
+
half_width=hw, half_height=hh,
|
|
162
|
+
img_height=img_height,
|
|
163
|
+
rgb=rgb, thickness=width, opacity=opacity
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
return modifier
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# ---------------------------------------------------------------------------
|
|
170
|
+
# Edge highlighting
|
|
171
|
+
# ---------------------------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
def highlight_edges(
|
|
174
|
+
edge_pairs: list[tuple[str, str]],
|
|
175
|
+
color: str = "blue",
|
|
176
|
+
width: int = 3,
|
|
177
|
+
) -> Callable[[PathwayResult], None]:
|
|
178
|
+
"""
|
|
179
|
+
Highlight specified edges (connections between nodes).
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
----------
|
|
183
|
+
edge_pairs: List of (source_id, target_id) tuples
|
|
184
|
+
color: Edge color
|
|
185
|
+
width: Edge width in pixels
|
|
186
|
+
|
|
187
|
+
Returns a modifier function.
|
|
188
|
+
|
|
189
|
+
Example
|
|
190
|
+
-------
|
|
191
|
+
>>> result + highlight_edges([("1956", "2099"), ("2099", "5594")])
|
|
192
|
+
"""
|
|
193
|
+
def modifier(result: PathwayResult) -> None:
|
|
194
|
+
if result.image_array is None:
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
# Find node positions for edge endpoints
|
|
198
|
+
gene_pos = {}
|
|
199
|
+
if result.plot_data_gene is not None:
|
|
200
|
+
for row in result.plot_data_gene.iter_rows(named=True):
|
|
201
|
+
gene_pos[row["kegg_names"]] = (row["x"], row["y"])
|
|
202
|
+
|
|
203
|
+
img_height = result.image_array.shape[0]
|
|
204
|
+
rgb = _hex_to_rgb(color)
|
|
205
|
+
|
|
206
|
+
# Draw lines between pairs
|
|
207
|
+
for source_id, target_id in edge_pairs:
|
|
208
|
+
if source_id in gene_pos and target_id in gene_pos:
|
|
209
|
+
x1, y1 = gene_pos[source_id]
|
|
210
|
+
x2, y2 = gene_pos[target_id]
|
|
211
|
+
_draw_line(
|
|
212
|
+
result.image_array,
|
|
213
|
+
x1=x1, y1=y1, x2=x2, y2=y2,
|
|
214
|
+
img_height=img_height,
|
|
215
|
+
rgb=rgb, thickness=width
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
return modifier
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# ---------------------------------------------------------------------------
|
|
222
|
+
# Path highlighting
|
|
223
|
+
# ---------------------------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
def highlight_path(
|
|
226
|
+
path_node_ids: list[str],
|
|
227
|
+
color: str = "orange",
|
|
228
|
+
node_width: int = 3,
|
|
229
|
+
edge_width: int = 2,
|
|
230
|
+
) -> Callable[[PathwayResult], None]:
|
|
231
|
+
"""
|
|
232
|
+
Highlight an entire path (nodes and edges).
|
|
233
|
+
|
|
234
|
+
Parameters
|
|
235
|
+
----------
|
|
236
|
+
path_node_ids: Ordered list of node IDs forming a path
|
|
237
|
+
color: Color for both nodes and edges
|
|
238
|
+
node_width: Border width for nodes
|
|
239
|
+
edge_width: Width for connecting edges
|
|
240
|
+
|
|
241
|
+
Returns a modifier function.
|
|
242
|
+
|
|
243
|
+
Example
|
|
244
|
+
-------
|
|
245
|
+
>>> result + highlight_path(["1956", "2099", "5594", "207"], color="orange")
|
|
246
|
+
"""
|
|
247
|
+
# Build edge pairs from consecutive nodes
|
|
248
|
+
edge_pairs = list(zip(path_node_ids[:-1], path_node_ids[1:]))
|
|
249
|
+
|
|
250
|
+
def modifier(result: PathwayResult) -> None:
|
|
251
|
+
# Apply both node and edge highlighting
|
|
252
|
+
highlight_nodes(path_node_ids, color=color, width=node_width)(result)
|
|
253
|
+
highlight_edges(edge_pairs, color=color, width=edge_width)(result)
|
|
254
|
+
|
|
255
|
+
return modifier
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
# ---------------------------------------------------------------------------
|
|
259
|
+
# Label modification
|
|
260
|
+
# ---------------------------------------------------------------------------
|
|
261
|
+
|
|
262
|
+
def change_labels(
|
|
263
|
+
label_map: dict[str, str],
|
|
264
|
+
font_size: int = 11,
|
|
265
|
+
color: str = "black",
|
|
266
|
+
) -> Callable[[PathwayResult], None]:
|
|
267
|
+
"""
|
|
268
|
+
Change labels for specified nodes.
|
|
269
|
+
|
|
270
|
+
Parameters
|
|
271
|
+
----------
|
|
272
|
+
label_map: Dict mapping node_id → new_label
|
|
273
|
+
font_size: Font size for new labels
|
|
274
|
+
color: Text color
|
|
275
|
+
|
|
276
|
+
Returns a modifier function.
|
|
277
|
+
|
|
278
|
+
Example
|
|
279
|
+
-------
|
|
280
|
+
>>> result + change_labels({"1956": "EGFR*", "2099": "ESR1*"})
|
|
281
|
+
"""
|
|
282
|
+
def modifier(result: PathwayResult) -> None:
|
|
283
|
+
# This would require text rendering on the image
|
|
284
|
+
# For now, store the label changes for future re-rendering
|
|
285
|
+
if not hasattr(result, '_label_changes'):
|
|
286
|
+
result._label_changes = {}
|
|
287
|
+
result._label_changes.update(label_map)
|
|
288
|
+
|
|
289
|
+
return modifier
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# ---------------------------------------------------------------------------
|
|
293
|
+
# Drawing primitives
|
|
294
|
+
# ---------------------------------------------------------------------------
|
|
295
|
+
|
|
296
|
+
def _hex_to_rgb(hex_color: str) -> tuple[int, int, int]:
|
|
297
|
+
"""Convert hex color to RGB tuple."""
|
|
298
|
+
hex_color = hex_color.lstrip("#")
|
|
299
|
+
return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _draw_border(
|
|
303
|
+
img: np.ndarray,
|
|
304
|
+
cx: float,
|
|
305
|
+
cy: float,
|
|
306
|
+
half_width: float,
|
|
307
|
+
half_height: float,
|
|
308
|
+
img_height: int,
|
|
309
|
+
rgb: tuple[int, int, int],
|
|
310
|
+
thickness: int,
|
|
311
|
+
opacity: float,
|
|
312
|
+
) -> None:
|
|
313
|
+
"""Draw a rectangle border on the image array."""
|
|
314
|
+
# Convert KGML coordinates to image coordinates
|
|
315
|
+
py = int(img_height - cy)
|
|
316
|
+
px = int(cx)
|
|
317
|
+
hw, hh = int(half_width), int(half_height)
|
|
318
|
+
|
|
319
|
+
# Draw rectangle border (4 sides)
|
|
320
|
+
for t in range(thickness):
|
|
321
|
+
# Top
|
|
322
|
+
img[max(0, py - hh - t):min(img.shape[0], py - hh - t + 1),
|
|
323
|
+
max(0, px - hw):min(img.shape[1], px + hw)] = rgb
|
|
324
|
+
# Bottom
|
|
325
|
+
img[max(0, py + hh + t):min(img.shape[0], py + hh + t + 1),
|
|
326
|
+
max(0, px - hw):min(img.shape[1], px + hw)] = rgb
|
|
327
|
+
# Left
|
|
328
|
+
img[max(0, py - hh):min(img.shape[0], py + hh),
|
|
329
|
+
max(0, px - hw - t):min(img.shape[1], px - hw - t + 1)] = rgb
|
|
330
|
+
# Right
|
|
331
|
+
img[max(0, py - hh):min(img.shape[0], py + hh),
|
|
332
|
+
max(0, px + hw + t):min(img.shape[1], px + hw + t + 1)] = rgb
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _draw_line(
|
|
336
|
+
img: np.ndarray,
|
|
337
|
+
x1: float,
|
|
338
|
+
y1: float,
|
|
339
|
+
x2: float,
|
|
340
|
+
y2: float,
|
|
341
|
+
img_height: int,
|
|
342
|
+
rgb: tuple[int, int, int],
|
|
343
|
+
thickness: int,
|
|
344
|
+
) -> None:
|
|
345
|
+
"""Draw a line on the image array using Bresenham's algorithm."""
|
|
346
|
+
# Convert coordinates
|
|
347
|
+
px1, py1 = int(x1), int(img_height - y1)
|
|
348
|
+
px2, py2 = int(x2), int(img_height - y2)
|
|
349
|
+
|
|
350
|
+
# Bresenham's line algorithm
|
|
351
|
+
dx = abs(px2 - px1)
|
|
352
|
+
dy = abs(py2 - py1)
|
|
353
|
+
sx = 1 if px1 < px2 else -1
|
|
354
|
+
sy = 1 if py1 < py2 else -1
|
|
355
|
+
err = dx - dy
|
|
356
|
+
|
|
357
|
+
while True:
|
|
358
|
+
# Draw thick point
|
|
359
|
+
for t in range(-thickness // 2, thickness // 2 + 1):
|
|
360
|
+
for u in range(-thickness // 2, thickness // 2 + 1):
|
|
361
|
+
py = py1 + t
|
|
362
|
+
px = px1 + u
|
|
363
|
+
if 0 <= py < img.shape[0] and 0 <= px < img.shape[1]:
|
|
364
|
+
img[py, px] = rgb
|
|
365
|
+
|
|
366
|
+
if px1 == px2 and py1 == py2:
|
|
367
|
+
break
|
|
368
|
+
|
|
369
|
+
e2 = 2 * err
|
|
370
|
+
if e2 > -dy:
|
|
371
|
+
err -= dy
|
|
372
|
+
px1 += sx
|
|
373
|
+
if e2 < dx:
|
|
374
|
+
err += dx
|
|
375
|
+
py1 += sy
|
pathview/id_mapping.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
id_mapping.py
|
|
3
|
+
Gene and compound identifier mapping:
|
|
4
|
+
- id2eg : arbitrary gene ID → Entrez Gene ID (MyGene.info)
|
|
5
|
+
- eg2id : Entrez Gene ID → any gene ID (MyGene.info)
|
|
6
|
+
- cpd_id_map : compound ID → KEGG compound (KEGG REST conv)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
import polars as pl
|
|
15
|
+
import requests
|
|
16
|
+
|
|
17
|
+
from .constants import KEGG_BASE
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Gene ID mapping (MyGene.info REST API)
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
_SCOPE_MAP = {
|
|
25
|
+
"symbol": "symbol",
|
|
26
|
+
"alias": "alias",
|
|
27
|
+
"uniprot": "uniprot",
|
|
28
|
+
"ensembl": "ensembl.gene",
|
|
29
|
+
"refseq": "refseq",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
_MYGENE_URL = "https://mygene.info/v3/querymany"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _query_mygene(
|
|
36
|
+
ids: list[str],
|
|
37
|
+
scopes: str,
|
|
38
|
+
field: str,
|
|
39
|
+
species: str,
|
|
40
|
+
) -> dict[str, Optional[str]]:
|
|
41
|
+
"""
|
|
42
|
+
POST a batch query to MyGene.info and return a {query_id: field_value} dict.
|
|
43
|
+
Returns an empty-value dict on network failure.
|
|
44
|
+
"""
|
|
45
|
+
payload = {
|
|
46
|
+
"q": ",".join(ids),
|
|
47
|
+
"scopes": scopes,
|
|
48
|
+
"species": species,
|
|
49
|
+
"fields": field,
|
|
50
|
+
"returnall": "true",
|
|
51
|
+
}
|
|
52
|
+
try:
|
|
53
|
+
resp = requests.post(_MYGENE_URL, data=payload, timeout=30)
|
|
54
|
+
resp.raise_for_status()
|
|
55
|
+
hits = resp.json()
|
|
56
|
+
except Exception as exc:
|
|
57
|
+
warnings.warn(f"MyGene.info query failed: {exc}")
|
|
58
|
+
return {i: None for i in ids}
|
|
59
|
+
|
|
60
|
+
lookup: dict[str, Optional[str]] = {}
|
|
61
|
+
for hit in hits:
|
|
62
|
+
qid = hit.get("query", "")
|
|
63
|
+
val = hit.get(field)
|
|
64
|
+
if isinstance(val, list):
|
|
65
|
+
val = val[0] if val else None
|
|
66
|
+
if qid and qid not in lookup:
|
|
67
|
+
lookup[qid] = str(val) if val is not None else None
|
|
68
|
+
|
|
69
|
+
return lookup
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def id2eg(ids: list[str], category: str, org: str = "Hs") -> pl.DataFrame:
|
|
73
|
+
"""
|
|
74
|
+
Map arbitrary gene IDs to Entrez Gene IDs.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
ids: Input gene identifiers.
|
|
79
|
+
category: ID type of *ids* (e.g. "SYMBOL", "ENSEMBL", "UNIPROT").
|
|
80
|
+
org: Species for MyGene.info (e.g. "Hs", "Mm", "hsa").
|
|
81
|
+
|
|
82
|
+
Returns a two-column DataFrame: [category, "ENTREZID"].
|
|
83
|
+
|
|
84
|
+
Raises ValueError if *category* is already an Entrez type.
|
|
85
|
+
"""
|
|
86
|
+
if category.lower() in ("entrez", "eg", "entrezid"):
|
|
87
|
+
raise ValueError("Input IDs are already Entrez Gene IDs.")
|
|
88
|
+
|
|
89
|
+
scope = _SCOPE_MAP.get(category.lower(), category.lower())
|
|
90
|
+
lookup = _query_mygene(ids, scopes=scope, field="entrezgene", species=org)
|
|
91
|
+
return pl.DataFrame({category: ids, "ENTREZID": [lookup.get(i) for i in ids]})
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def eg2id(
|
|
95
|
+
eg_ids: list[str],
|
|
96
|
+
category: str = "SYMBOL",
|
|
97
|
+
org: str = "Hs",
|
|
98
|
+
) -> pl.DataFrame:
|
|
99
|
+
"""
|
|
100
|
+
Map Entrez Gene IDs to another identifier type.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
eg_ids: Entrez Gene IDs to convert.
|
|
105
|
+
category: Target ID type (e.g. "SYMBOL", "UNIPROT", "ENSEMBL").
|
|
106
|
+
org: Species for MyGene.info.
|
|
107
|
+
|
|
108
|
+
Returns a two-column DataFrame: ["ENTREZID", category].
|
|
109
|
+
|
|
110
|
+
Raises ValueError if *category* is an Entrez type.
|
|
111
|
+
"""
|
|
112
|
+
if category.lower() in ("entrez", "eg", "entrezid"):
|
|
113
|
+
raise ValueError("Output category cannot be Entrez Gene ID.")
|
|
114
|
+
|
|
115
|
+
field_map = {
|
|
116
|
+
"symbol": "symbol",
|
|
117
|
+
"name": "name",
|
|
118
|
+
"uniprot": "uniprot",
|
|
119
|
+
"ensembl": "ensembl.gene",
|
|
120
|
+
"alias": "alias",
|
|
121
|
+
}
|
|
122
|
+
field = field_map.get(category.lower(), category.lower())
|
|
123
|
+
lookup = _query_mygene(eg_ids, scopes="entrezgene", field=field, species=org)
|
|
124
|
+
return pl.DataFrame({"ENTREZID": eg_ids, category: [lookup.get(i) for i in eg_ids]})
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
# Compound ID mapping (KEGG REST conv endpoint)
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
_CPD_TYPE_MAP = {
|
|
132
|
+
"pubchem": "pubchem",
|
|
133
|
+
"chebi": "chebi",
|
|
134
|
+
"kegg": "cpd",
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def cpd_id_map(
|
|
139
|
+
in_ids: list[str],
|
|
140
|
+
in_type: str,
|
|
141
|
+
out_type: str = "KEGG",
|
|
142
|
+
) -> pl.DataFrame:
|
|
143
|
+
"""
|
|
144
|
+
Map compound IDs between identifier systems using KEGG REST.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
in_ids: Input compound identifiers.
|
|
149
|
+
in_type: Source ID type (e.g. "PUBCHEM", "CHEBI", "KEGG").
|
|
150
|
+
out_type: Target ID type (default "KEGG").
|
|
151
|
+
|
|
152
|
+
Returns a two-column DataFrame: [in_type, out_type].
|
|
153
|
+
"""
|
|
154
|
+
src = _CPD_TYPE_MAP.get(in_type.lower(), in_type.lower())
|
|
155
|
+
dst = _CPD_TYPE_MAP.get(out_type.lower(), out_type.lower())
|
|
156
|
+
|
|
157
|
+
out_ids: list[Optional[str]] = []
|
|
158
|
+
for cid in in_ids:
|
|
159
|
+
url = f"{KEGG_BASE}/conv/{dst}/{src}:{cid}"
|
|
160
|
+
try:
|
|
161
|
+
resp = requests.get(url, timeout=15)
|
|
162
|
+
if resp.ok and resp.text.strip():
|
|
163
|
+
parts = resp.text.strip().split("\t")
|
|
164
|
+
out_ids.append(parts[1].split(":")[1] if len(parts) > 1 else None)
|
|
165
|
+
else:
|
|
166
|
+
out_ids.append(None)
|
|
167
|
+
except Exception:
|
|
168
|
+
out_ids.append(None)
|
|
169
|
+
|
|
170
|
+
return pl.DataFrame({in_type: in_ids, out_type: out_ids})
|
pathview/kegg_api.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
kegg_api.py
|
|
3
|
+
KEGG REST API interactions:
|
|
4
|
+
- SpeciesInfo : dataclass holding per-species KEGG metadata
|
|
5
|
+
- kegg_species_code: resolve a species name / abbreviation to SpeciesInfo
|
|
6
|
+
- download_kegg : fetch KGML (xml) and/or pathway image (png) files
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
|
|
18
|
+
from .constants import KEGG_BASE
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Species resolution
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class SpeciesInfo:
|
|
27
|
+
"""Immutable container for KEGG species metadata."""
|
|
28
|
+
kegg_code: str
|
|
29
|
+
entrez_gnodes: bool
|
|
30
|
+
kegg_geneid: Optional[str]
|
|
31
|
+
ncbi_geneid: Optional[str]
|
|
32
|
+
ncbi_proteinid: Optional[str]
|
|
33
|
+
uniprot: Optional[str]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
_KO_SPECIES = SpeciesInfo(
|
|
37
|
+
kegg_code="ko",
|
|
38
|
+
entrez_gnodes=False,
|
|
39
|
+
kegg_geneid="K01488",
|
|
40
|
+
ncbi_geneid=None,
|
|
41
|
+
ncbi_proteinid=None,
|
|
42
|
+
uniprot=None,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def kegg_species_code(species: str = "hsa") -> SpeciesInfo:
|
|
47
|
+
"""
|
|
48
|
+
Resolve *species* (KEGG code, common name, or taxon) to a SpeciesInfo.
|
|
49
|
+
|
|
50
|
+
Queries ``rest.kegg.jp/list/organism`` and matches any column.
|
|
51
|
+
Raises ValueError for unknown species.
|
|
52
|
+
"""
|
|
53
|
+
if species == "ko":
|
|
54
|
+
return _KO_SPECIES
|
|
55
|
+
|
|
56
|
+
url = f"{KEGG_BASE}/list/organism"
|
|
57
|
+
try:
|
|
58
|
+
resp = requests.get(url, timeout=30)
|
|
59
|
+
resp.raise_for_status()
|
|
60
|
+
except Exception as exc:
|
|
61
|
+
raise RuntimeError(f"Failed to fetch KEGG organism list: {exc}") from exc
|
|
62
|
+
|
|
63
|
+
query = species.lower()
|
|
64
|
+
for line in resp.text.splitlines():
|
|
65
|
+
parts = line.split("\t")
|
|
66
|
+
if len(parts) < 3:
|
|
67
|
+
continue
|
|
68
|
+
if query in (p.lower() for p in parts):
|
|
69
|
+
return SpeciesInfo(
|
|
70
|
+
kegg_code=parts[1],
|
|
71
|
+
entrez_gnodes=True,
|
|
72
|
+
kegg_geneid=None,
|
|
73
|
+
ncbi_geneid=None,
|
|
74
|
+
ncbi_proteinid=None,
|
|
75
|
+
uniprot=None,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
raise ValueError(
|
|
79
|
+
f"Unknown species '{species}'. "
|
|
80
|
+
"Check https://rest.kegg.jp/list/organism for valid codes."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
# File download
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def download_kegg(
|
|
89
|
+
pathway_id: str,
|
|
90
|
+
species: str = "hsa",
|
|
91
|
+
kegg_dir: Path = Path("."),
|
|
92
|
+
file_type: list[str] | None = None,
|
|
93
|
+
) -> dict[str, str]:
|
|
94
|
+
"""
|
|
95
|
+
Download KEGG KGML and/or pathway image for *pathway_id*.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
pathway_id: Numeric pathway ID, e.g. "04110" (species prefix is added
|
|
100
|
+
automatically if absent).
|
|
101
|
+
species: KEGG species code used to build the full pathway ID.
|
|
102
|
+
kegg_dir: Directory where files are saved.
|
|
103
|
+
file_type: Subset of ["xml", "png"] to download (default: both).
|
|
104
|
+
|
|
105
|
+
Returns a dict mapping the full pathway ID to "succeed" or "failed".
|
|
106
|
+
"""
|
|
107
|
+
if file_type is None:
|
|
108
|
+
file_type = ["xml", "png"]
|
|
109
|
+
|
|
110
|
+
kegg_dir = Path(kegg_dir)
|
|
111
|
+
kegg_dir.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
|
|
113
|
+
full_id = pathway_id if pathway_id.startswith(species) else f"{species}{pathway_id}"
|
|
114
|
+
|
|
115
|
+
_url_templates = {
|
|
116
|
+
"xml": f"{KEGG_BASE}/get/{full_id}/kgml",
|
|
117
|
+
"png": f"{KEGG_BASE}/get/{full_id}/image",
|
|
118
|
+
}
|
|
119
|
+
_targets = {
|
|
120
|
+
"xml": kegg_dir / f"{full_id}.xml",
|
|
121
|
+
"png": kegg_dir / f"{full_id}.png",
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
status = {full_id: "succeed"}
|
|
125
|
+
|
|
126
|
+
for ftype in file_type:
|
|
127
|
+
url = _url_templates[ftype]
|
|
128
|
+
target = _targets[ftype]
|
|
129
|
+
print(f"Info: Downloading {ftype} for {full_id} …")
|
|
130
|
+
try:
|
|
131
|
+
resp = requests.get(url, timeout=60)
|
|
132
|
+
resp.raise_for_status()
|
|
133
|
+
if ftype == "png":
|
|
134
|
+
target.write_bytes(resp.content)
|
|
135
|
+
else:
|
|
136
|
+
target.write_text(resp.text, encoding="utf-8")
|
|
137
|
+
except Exception as exc:
|
|
138
|
+
warnings.warn(f"Download of {full_id} {ftype} failed: {exc}")
|
|
139
|
+
status[full_id] = "failed"
|
|
140
|
+
if target.exists():
|
|
141
|
+
target.unlink()
|
|
142
|
+
|
|
143
|
+
return status
|