pathview-plus 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pathview/__init__.py +124 -0
- pathview/color_mapping.py +153 -0
- pathview/constants.py +27 -0
- pathview/databases.py +309 -0
- pathview/examples.py +342 -0
- pathview/highlighting.py +375 -0
- pathview/id_mapping.py +170 -0
- pathview/kegg_api.py +143 -0
- pathview/kgml_parser.py +189 -0
- pathview/mol_data.py +168 -0
- pathview/node_mapping.py +99 -0
- pathview/pathview.py +316 -0
- pathview/rendering.py +409 -0
- pathview/sbgn_parser.py +353 -0
- pathview/splines.py +304 -0
- pathview/svg_rendering.py +305 -0
- pathview/test_all_features.py +343 -0
- pathview/utils.py +80 -0
- pathview_plus-2.0.0.data/scripts/pathview-cli.py +252 -0
- pathview_plus-2.0.0.dist-info/METADATA +661 -0
- pathview_plus-2.0.0.dist-info/RECORD +23 -0
- pathview_plus-2.0.0.dist-info/WHEEL +5 -0
- pathview_plus-2.0.0.dist-info/top_level.txt +1 -0
pathview/utils.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
utils.py
|
|
3
|
+
General-purpose utility functions:
|
|
4
|
+
- String wrapping / fitting (wordwrap, _strfit)
|
|
5
|
+
- Numeric aggregators (max_abs, random_pick)
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import textwrap
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
# String utilities
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
def wordwrap(text: str, width: int = 20, break_word: bool = False) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Wrap *text* to *width* columns.
|
|
22
|
+
|
|
23
|
+
When *break_word* is False (default) wrapping only occurs at whitespace.
|
|
24
|
+
When True, long words are split and a backslash continuation marker is
|
|
25
|
+
inserted at hard break points.
|
|
26
|
+
"""
|
|
27
|
+
if not break_word:
|
|
28
|
+
return "\n".join(textwrap.wrap(text, width))
|
|
29
|
+
return _strfit(text, width)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _strfit(s: str, width: int = 20) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Hard-wrap *s* to *width* characters per line.
|
|
35
|
+
|
|
36
|
+
Prefers whitespace break points within ±2 characters of *width*.
|
|
37
|
+
Falls back to a forced break with a trailing '\\' when none is found.
|
|
38
|
+
"""
|
|
39
|
+
s = " ".join(s.split()) # collapse all whitespace to single spaces
|
|
40
|
+
chars = list(s)
|
|
41
|
+
lines: list[str] = []
|
|
42
|
+
|
|
43
|
+
while chars:
|
|
44
|
+
if len(chars) <= width + 3:
|
|
45
|
+
lines.append("".join(chars))
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
# Prefer a whitespace break closest to the target width
|
|
49
|
+
for delta in (0, 1, 2, -1, -2):
|
|
50
|
+
pos = width + delta
|
|
51
|
+
if 0 < pos < len(chars) and chars[pos] == " ":
|
|
52
|
+
lines.append("".join(chars[:pos]))
|
|
53
|
+
chars = chars[pos + 1:]
|
|
54
|
+
break
|
|
55
|
+
else:
|
|
56
|
+
# No nearby whitespace — force a mid-word break
|
|
57
|
+
lines.append("".join(chars[:width]) + "\\")
|
|
58
|
+
chars = chars[width:]
|
|
59
|
+
|
|
60
|
+
return "\n".join(lines)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
# Numeric aggregators
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
def max_abs(values: np.ndarray) -> float:
|
|
68
|
+
"""Return the element with the largest absolute value, ignoring NaNs."""
|
|
69
|
+
clean = values[~np.isnan(values)]
|
|
70
|
+
if clean.size == 0:
|
|
71
|
+
return float("nan")
|
|
72
|
+
return float(clean[np.argmax(np.abs(clean))])
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def random_pick(values: np.ndarray) -> float:
|
|
76
|
+
"""Return a randomly chosen element, ignoring NaNs. NaN if empty."""
|
|
77
|
+
clean = values[~np.isnan(values)]
|
|
78
|
+
if clean.size == 0:
|
|
79
|
+
return float("nan")
|
|
80
|
+
return float(np.random.choice(clean))
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
#!python
|
|
2
|
+
"""
|
|
3
|
+
pathview.py – master CLI entry point
|
|
4
|
+
=======================================
|
|
5
|
+
Visualise gene / compound expression data on KEGG pathway diagrams.
|
|
6
|
+
|
|
7
|
+
Usage
|
|
8
|
+
-----
|
|
9
|
+
python pathview.py --pathway-id 04110 --gene-data gene_expr.tsv
|
|
10
|
+
python pathview.py --pathway-id 04110 --species mmu --gene-data gene_expr.tsv
|
|
11
|
+
python pathview.py --pathway-id 04110 --gene-data gd.tsv --cpd-data cpd.tsv \\
|
|
12
|
+
--gene-idtype SYMBOL --cpd-idtype KEGG
|
|
13
|
+
python pathview.py --legend
|
|
14
|
+
|
|
15
|
+
Module layout
|
|
16
|
+
-------------
|
|
17
|
+
pathview/ ← importable package
|
|
18
|
+
__init__.py ← public API re-exports
|
|
19
|
+
constants.py ← shared types and literals
|
|
20
|
+
utils.py ← string helpers, numeric aggregators
|
|
21
|
+
id_mapping.py ← gene / compound ID conversion
|
|
22
|
+
mol_data.py ← mol_sum, sim_mol_data
|
|
23
|
+
kegg_api.py ← species lookup, file download
|
|
24
|
+
kgml_parser.py ← KGML XML → dataclasses + DataFrame
|
|
25
|
+
color_mapping.py ← colormaps, node_color, draw_color_key
|
|
26
|
+
node_mapping.py ← node_map
|
|
27
|
+
rendering.py ← keggview_native, keggview_graph, kegg_legend
|
|
28
|
+
pathview.py ← core orchestrator function
|
|
29
|
+
|
|
30
|
+
pathview.py ← this file (CLI front-end)
|
|
31
|
+
|
|
32
|
+
Dependencies
|
|
33
|
+
------------
|
|
34
|
+
pip install polars requests matplotlib seaborn numpy Pillow networkx
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
from __future__ import annotations
|
|
38
|
+
|
|
39
|
+
import argparse
|
|
40
|
+
import sys
|
|
41
|
+
|
|
42
|
+
import polars as pl
|
|
43
|
+
|
|
44
|
+
from pathview.rendering import kegg_legend
|
|
45
|
+
from pathview.orchestrator import pathview
|
|
46
|
+
from pathview.mol_data import sim_mol_data
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
# Argument parser
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
54
|
+
p = argparse.ArgumentParser(
|
|
55
|
+
prog="pathview",
|
|
56
|
+
description="Overlay gene/compound data on KEGG pathway diagrams.",
|
|
57
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,#argparse.ArgumentDefaultsHelpFormatter,
|
|
58
|
+
epilog=(
|
|
59
|
+
"Examples:\n"
|
|
60
|
+
" python pathview.py --pathway-id 04110 --gene-data expr.tsv\n"
|
|
61
|
+
" python pathview.py --pathway-id hsa04110 --species hsa "
|
|
62
|
+
"--gene-idtype SYMBOL --gene-data expr.tsv\n"
|
|
63
|
+
" python pathview.py --legend\n"
|
|
64
|
+
" python pathview.py --simulate --pathway-id 04110"
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# ---- Pathway -----------------------------------------------------------
|
|
69
|
+
p.add_argument(
|
|
70
|
+
"--pathway-id",
|
|
71
|
+
help="KEGG pathway number, e.g. '04110' or 'hsa04110'.",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# ---- Input data --------------------------------------------------------
|
|
75
|
+
data = p.add_argument_group("Input data")
|
|
76
|
+
data.add_argument(
|
|
77
|
+
"--gene-data", metavar="TSV",
|
|
78
|
+
help="TSV file: first column = gene IDs, remaining = expression values.",
|
|
79
|
+
)
|
|
80
|
+
data.add_argument(
|
|
81
|
+
"--cpd-data", metavar="TSV",
|
|
82
|
+
help="TSV file: first column = compound IDs, remaining = abundance.",
|
|
83
|
+
)
|
|
84
|
+
data.add_argument(
|
|
85
|
+
"--gene-idtype", default="ENTREZ",
|
|
86
|
+
metavar="TYPE",
|
|
87
|
+
help="Input gene ID type: ENTREZ, SYMBOL, UNIPROT, ENSEMBL, KEGG.",
|
|
88
|
+
)
|
|
89
|
+
data.add_argument(
|
|
90
|
+
"--cpd-idtype", default="KEGG",
|
|
91
|
+
metavar="TYPE",
|
|
92
|
+
help="Input compound ID type: KEGG, PUBCHEM, CHEBI.",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# ---- Species & paths ---------------------------------------------------
|
|
96
|
+
run = p.add_argument_group("Species and paths")
|
|
97
|
+
run.add_argument("--species", default="hsa", help="KEGG species code.")
|
|
98
|
+
run.add_argument("--kegg-dir", default=".", metavar="DIR",
|
|
99
|
+
help="Directory for downloaded KEGG files and output images.")
|
|
100
|
+
run.add_argument("--out-suffix", default="pathview",
|
|
101
|
+
help="Suffix appended to each output filename.")
|
|
102
|
+
|
|
103
|
+
# ---- Rendering ---------------------------------------------------------
|
|
104
|
+
rend = p.add_argument_group("Rendering")
|
|
105
|
+
rend.add_argument(
|
|
106
|
+
"--kegg-native", action=argparse.BooleanOptionalAction, default=True,
|
|
107
|
+
help="Use the KEGG PNG background (native) or a NetworkX graph layout.",
|
|
108
|
+
)
|
|
109
|
+
rend.add_argument(
|
|
110
|
+
"--output-format", default="png",
|
|
111
|
+
choices=["png", "pdf", "svg"],
|
|
112
|
+
help="Output format: png (pixel-based), pdf (vector graph), or svg (vector native).",
|
|
113
|
+
)
|
|
114
|
+
rend.add_argument(
|
|
115
|
+
"--map-symbol", action=argparse.BooleanOptionalAction, default=True,
|
|
116
|
+
help="Replace Entrez IDs with gene symbols in node labels.",
|
|
117
|
+
)
|
|
118
|
+
rend.add_argument(
|
|
119
|
+
"--node-sum", default="sum",
|
|
120
|
+
choices=["sum", "mean", "median", "max", "max_abs", "random"],
|
|
121
|
+
help="Aggregation method for multiple probes mapping to one node.",
|
|
122
|
+
)
|
|
123
|
+
rend.add_argument(
|
|
124
|
+
"--min-nnodes", type=int, default=3,
|
|
125
|
+
help="Skip pathways with fewer than this many mappable nodes.",
|
|
126
|
+
)
|
|
127
|
+
rend.add_argument(
|
|
128
|
+
"--no-signature", action="store_true",
|
|
129
|
+
help="Suppress the 'Rendered by pathview.py' watermark.",
|
|
130
|
+
)
|
|
131
|
+
rend.add_argument(
|
|
132
|
+
"--no-col-key", action="store_true",
|
|
133
|
+
help="Suppress the colour-scale legend bar.",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# ---- Colour scale ------------------------------------------------------
|
|
137
|
+
col = p.add_argument_group("Colour scale")
|
|
138
|
+
col.add_argument("--limit-gene", type=float, default=1.0,
|
|
139
|
+
help="Symmetric colour-scale limit for gene data (±value).")
|
|
140
|
+
col.add_argument("--limit-cpd", type=float, default=1.0,
|
|
141
|
+
help="Symmetric colour-scale limit for compound data.")
|
|
142
|
+
col.add_argument("--bins-gene", type=int, default=10,
|
|
143
|
+
help="Colour bins for gene data.")
|
|
144
|
+
col.add_argument("--bins-cpd", type=int, default=10,
|
|
145
|
+
help="Colour bins for compound data.")
|
|
146
|
+
col.add_argument("--low-gene", default="green", help="Low-end gene colour.")
|
|
147
|
+
col.add_argument("--mid-gene", default="gray", help="Mid-point gene colour.")
|
|
148
|
+
col.add_argument("--high-gene", default="red", help="High-end gene colour.")
|
|
149
|
+
col.add_argument("--low-cpd", default="blue", help="Low-end compound colour.")
|
|
150
|
+
col.add_argument("--mid-cpd", default="gray", help="Mid-point compound colour.")
|
|
151
|
+
col.add_argument("--high-cpd", default="yellow", help="High-end compound colour.")
|
|
152
|
+
|
|
153
|
+
# ---- Utilities ---------------------------------------------------------
|
|
154
|
+
util = p.add_argument_group("Utilities")
|
|
155
|
+
util.add_argument(
|
|
156
|
+
"--legend",
|
|
157
|
+
action="store_true",
|
|
158
|
+
help="Display the KEGG element legend and exit.",
|
|
159
|
+
)
|
|
160
|
+
util.add_argument(
|
|
161
|
+
"--simulate",
|
|
162
|
+
action="store_true",
|
|
163
|
+
help="Generate and use simulated gene data (requires --pathway-id).",
|
|
164
|
+
)
|
|
165
|
+
util.add_argument(
|
|
166
|
+
"--n-sim", type=int, default=200,
|
|
167
|
+
help="Number of molecules in simulated data (used with --simulate).",
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return p
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
# Main
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
|
|
177
|
+
def main(argv: list[str] | None = None) -> None:
|
|
178
|
+
args = _build_parser().parse_args(argv)
|
|
179
|
+
|
|
180
|
+
# -- Legend only ---------------------------------------------------------
|
|
181
|
+
if args.legend:
|
|
182
|
+
kegg_legend()
|
|
183
|
+
return
|
|
184
|
+
|
|
185
|
+
# -- Require pathway-id for everything else ------------------------------
|
|
186
|
+
if not args.pathway_id:
|
|
187
|
+
_build_parser().error("--pathway-id is required (unless using --legend).")
|
|
188
|
+
|
|
189
|
+
# -- Load or simulate data -----------------------------------------------
|
|
190
|
+
gene_data = cpd_data = None
|
|
191
|
+
|
|
192
|
+
if args.simulate:
|
|
193
|
+
print(f"Info: Generating simulated gene data (n={args.n_sim}) …")
|
|
194
|
+
gene_data = sim_mol_data(
|
|
195
|
+
mol_type="gene",
|
|
196
|
+
species=args.species,
|
|
197
|
+
n_mol=args.n_sim,
|
|
198
|
+
)
|
|
199
|
+
else:
|
|
200
|
+
if args.gene_data:
|
|
201
|
+
gene_data = pl.read_csv(args.gene_data, separator="\t")
|
|
202
|
+
print(f"Info: Loaded gene data — {gene_data.height} rows, "
|
|
203
|
+
f"{gene_data.width - 1} experiment column(s).")
|
|
204
|
+
if args.cpd_data:
|
|
205
|
+
cpd_data = pl.read_csv(args.cpd_data, separator="\t")
|
|
206
|
+
print(f"Info: Loaded compound data — {cpd_data.height} rows, "
|
|
207
|
+
f"{cpd_data.width - 1} experiment column(s).")
|
|
208
|
+
|
|
209
|
+
if gene_data is None and cpd_data is None:
|
|
210
|
+
_build_parser().error(
|
|
211
|
+
"Provide at least one of --gene-data, --cpd-data, or --simulate."
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# -- Run pathview --------------------------------------------------------
|
|
215
|
+
result = pathview(
|
|
216
|
+
pathway_id = args.pathway_id,
|
|
217
|
+
gene_data = gene_data,
|
|
218
|
+
cpd_data = cpd_data,
|
|
219
|
+
species = args.species,
|
|
220
|
+
kegg_dir = args.kegg_dir,
|
|
221
|
+
kegg_native = args.kegg_native,
|
|
222
|
+
output_format = args.output_format,
|
|
223
|
+
gene_idtype = args.gene_idtype,
|
|
224
|
+
cpd_idtype = args.cpd_idtype,
|
|
225
|
+
out_suffix = args.out_suffix,
|
|
226
|
+
node_sum = args.node_sum,
|
|
227
|
+
map_symbol = args.map_symbol,
|
|
228
|
+
min_nnodes = args.min_nnodes,
|
|
229
|
+
new_signature = not args.no_signature,
|
|
230
|
+
plot_col_key = not args.no_col_key,
|
|
231
|
+
limit = {"gene": args.limit_gene, "cpd": args.limit_cpd},
|
|
232
|
+
bins = {"gene": args.bins_gene, "cpd": args.bins_cpd},
|
|
233
|
+
both_dirs = {"gene": True, "cpd": True},
|
|
234
|
+
low = {"gene": args.low_gene, "cpd": args.low_cpd},
|
|
235
|
+
mid = {"gene": args.mid_gene, "cpd": args.mid_cpd},
|
|
236
|
+
high = {"gene": args.high_gene, "cpd": args.high_cpd},
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
if result:
|
|
240
|
+
gdf = result.get("plot_data_gene")
|
|
241
|
+
cdf = result.get("plot_data_cpd")
|
|
242
|
+
if gdf is not None:
|
|
243
|
+
print(f"Info: Gene plot data — {gdf.height} nodes mapped.")
|
|
244
|
+
if cdf is not None:
|
|
245
|
+
print(f"Info: Compound plot data — {cdf.height} nodes mapped.")
|
|
246
|
+
else:
|
|
247
|
+
print("Warning: Pathway was skipped or no data could be mapped.", file=sys.stderr)
|
|
248
|
+
sys.exit(1)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
if __name__ == "__main__":
|
|
252
|
+
main()
|