pathview-plus 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pathview/utils.py ADDED
@@ -0,0 +1,80 @@
1
+ """
2
+ utils.py
3
+ General-purpose utility functions:
4
+ - String wrapping / fitting (wordwrap, _strfit)
5
+ - Numeric aggregators (max_abs, random_pick)
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import textwrap
11
+
12
+ import numpy as np
13
+
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # String utilities
17
+ # ---------------------------------------------------------------------------
18
+
19
+ def wordwrap(text: str, width: int = 20, break_word: bool = False) -> str:
20
+ """
21
+ Wrap *text* to *width* columns.
22
+
23
+ When *break_word* is False (default) wrapping only occurs at whitespace.
24
+ When True, long words are split and a backslash continuation marker is
25
+ inserted at hard break points.
26
+ """
27
+ if not break_word:
28
+ return "\n".join(textwrap.wrap(text, width))
29
+ return _strfit(text, width)
30
+
31
+
32
+ def _strfit(s: str, width: int = 20) -> str:
33
+ """
34
+ Hard-wrap *s* to *width* characters per line.
35
+
36
+ Prefers whitespace break points within ±2 characters of *width*.
37
+ Falls back to a forced break with a trailing '\\' when none is found.
38
+ """
39
+ s = " ".join(s.split()) # collapse all whitespace to single spaces
40
+ chars = list(s)
41
+ lines: list[str] = []
42
+
43
+ while chars:
44
+ if len(chars) <= width + 3:
45
+ lines.append("".join(chars))
46
+ break
47
+
48
+ # Prefer a whitespace break closest to the target width
49
+ for delta in (0, 1, 2, -1, -2):
50
+ pos = width + delta
51
+ if 0 < pos < len(chars) and chars[pos] == " ":
52
+ lines.append("".join(chars[:pos]))
53
+ chars = chars[pos + 1:]
54
+ break
55
+ else:
56
+ # No nearby whitespace — force a mid-word break
57
+ lines.append("".join(chars[:width]) + "\\")
58
+ chars = chars[width:]
59
+
60
+ return "\n".join(lines)
61
+
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Numeric aggregators
65
+ # ---------------------------------------------------------------------------
66
+
67
+ def max_abs(values: np.ndarray) -> float:
68
+ """Return the element with the largest absolute value, ignoring NaNs."""
69
+ clean = values[~np.isnan(values)]
70
+ if clean.size == 0:
71
+ return float("nan")
72
+ return float(clean[np.argmax(np.abs(clean))])
73
+
74
+
75
+ def random_pick(values: np.ndarray) -> float:
76
+ """Return a randomly chosen element, ignoring NaNs. NaN if empty."""
77
+ clean = values[~np.isnan(values)]
78
+ if clean.size == 0:
79
+ return float("nan")
80
+ return float(np.random.choice(clean))
@@ -0,0 +1,252 @@
1
+ #!python
2
+ """
3
+ pathview.py – master CLI entry point
4
+ =======================================
5
+ Visualise gene / compound expression data on KEGG pathway diagrams.
6
+
7
+ Usage
8
+ -----
9
+ python pathview.py --pathway-id 04110 --gene-data gene_expr.tsv
10
+ python pathview.py --pathway-id 04110 --species mmu --gene-data gene_expr.tsv
11
+ python pathview.py --pathway-id 04110 --gene-data gd.tsv --cpd-data cpd.tsv \\
12
+ --gene-idtype SYMBOL --cpd-idtype KEGG
13
+ python pathview.py --legend
14
+
15
+ Module layout
16
+ -------------
17
+ pathview/ ← importable package
18
+ __init__.py ← public API re-exports
19
+ constants.py ← shared types and literals
20
+ utils.py ← string helpers, numeric aggregators
21
+ id_mapping.py ← gene / compound ID conversion
22
+ mol_data.py ← mol_sum, sim_mol_data
23
+ kegg_api.py ← species lookup, file download
24
+ kgml_parser.py ← KGML XML → dataclasses + DataFrame
25
+ color_mapping.py ← colormaps, node_color, draw_color_key
26
+ node_mapping.py ← node_map
27
+ rendering.py ← keggview_native, keggview_graph, kegg_legend
28
+ pathview.py ← core orchestrator function
29
+
30
+ pathview.py ← this file (CLI front-end)
31
+
32
+ Dependencies
33
+ ------------
34
+ pip install polars requests matplotlib seaborn numpy Pillow networkx
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import argparse
40
+ import sys
41
+
42
+ import polars as pl
43
+
44
+ from pathview.rendering import kegg_legend
45
+ from pathview.orchestrator import pathview
46
+ from pathview.mol_data import sim_mol_data
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # Argument parser
51
+ # ---------------------------------------------------------------------------
52
+
53
+ def _build_parser() -> argparse.ArgumentParser:
54
+ p = argparse.ArgumentParser(
55
+ prog="pathview",
56
+ description="Overlay gene/compound data on KEGG pathway diagrams.",
57
+ formatter_class=argparse.RawDescriptionHelpFormatter,#argparse.ArgumentDefaultsHelpFormatter,
58
+ epilog=(
59
+ "Examples:\n"
60
+ " python pathview.py --pathway-id 04110 --gene-data expr.tsv\n"
61
+ " python pathview.py --pathway-id hsa04110 --species hsa "
62
+ "--gene-idtype SYMBOL --gene-data expr.tsv\n"
63
+ " python pathview.py --legend\n"
64
+ " python pathview.py --simulate --pathway-id 04110"
65
+ ),
66
+ )
67
+
68
+ # ---- Pathway -----------------------------------------------------------
69
+ p.add_argument(
70
+ "--pathway-id",
71
+ help="KEGG pathway number, e.g. '04110' or 'hsa04110'.",
72
+ )
73
+
74
+ # ---- Input data --------------------------------------------------------
75
+ data = p.add_argument_group("Input data")
76
+ data.add_argument(
77
+ "--gene-data", metavar="TSV",
78
+ help="TSV file: first column = gene IDs, remaining = expression values.",
79
+ )
80
+ data.add_argument(
81
+ "--cpd-data", metavar="TSV",
82
+ help="TSV file: first column = compound IDs, remaining = abundance.",
83
+ )
84
+ data.add_argument(
85
+ "--gene-idtype", default="ENTREZ",
86
+ metavar="TYPE",
87
+ help="Input gene ID type: ENTREZ, SYMBOL, UNIPROT, ENSEMBL, KEGG.",
88
+ )
89
+ data.add_argument(
90
+ "--cpd-idtype", default="KEGG",
91
+ metavar="TYPE",
92
+ help="Input compound ID type: KEGG, PUBCHEM, CHEBI.",
93
+ )
94
+
95
+ # ---- Species & paths ---------------------------------------------------
96
+ run = p.add_argument_group("Species and paths")
97
+ run.add_argument("--species", default="hsa", help="KEGG species code.")
98
+ run.add_argument("--kegg-dir", default=".", metavar="DIR",
99
+ help="Directory for downloaded KEGG files and output images.")
100
+ run.add_argument("--out-suffix", default="pathview",
101
+ help="Suffix appended to each output filename.")
102
+
103
+ # ---- Rendering ---------------------------------------------------------
104
+ rend = p.add_argument_group("Rendering")
105
+ rend.add_argument(
106
+ "--kegg-native", action=argparse.BooleanOptionalAction, default=True,
107
+ help="Use the KEGG PNG background (native) or a NetworkX graph layout.",
108
+ )
109
+ rend.add_argument(
110
+ "--output-format", default="png",
111
+ choices=["png", "pdf", "svg"],
112
+ help="Output format: png (pixel-based), pdf (vector graph), or svg (vector native).",
113
+ )
114
+ rend.add_argument(
115
+ "--map-symbol", action=argparse.BooleanOptionalAction, default=True,
116
+ help="Replace Entrez IDs with gene symbols in node labels.",
117
+ )
118
+ rend.add_argument(
119
+ "--node-sum", default="sum",
120
+ choices=["sum", "mean", "median", "max", "max_abs", "random"],
121
+ help="Aggregation method for multiple probes mapping to one node.",
122
+ )
123
+ rend.add_argument(
124
+ "--min-nnodes", type=int, default=3,
125
+ help="Skip pathways with fewer than this many mappable nodes.",
126
+ )
127
+ rend.add_argument(
128
+ "--no-signature", action="store_true",
129
+ help="Suppress the 'Rendered by pathview.py' watermark.",
130
+ )
131
+ rend.add_argument(
132
+ "--no-col-key", action="store_true",
133
+ help="Suppress the colour-scale legend bar.",
134
+ )
135
+
136
+ # ---- Colour scale ------------------------------------------------------
137
+ col = p.add_argument_group("Colour scale")
138
+ col.add_argument("--limit-gene", type=float, default=1.0,
139
+ help="Symmetric colour-scale limit for gene data (±value).")
140
+ col.add_argument("--limit-cpd", type=float, default=1.0,
141
+ help="Symmetric colour-scale limit for compound data.")
142
+ col.add_argument("--bins-gene", type=int, default=10,
143
+ help="Colour bins for gene data.")
144
+ col.add_argument("--bins-cpd", type=int, default=10,
145
+ help="Colour bins for compound data.")
146
+ col.add_argument("--low-gene", default="green", help="Low-end gene colour.")
147
+ col.add_argument("--mid-gene", default="gray", help="Mid-point gene colour.")
148
+ col.add_argument("--high-gene", default="red", help="High-end gene colour.")
149
+ col.add_argument("--low-cpd", default="blue", help="Low-end compound colour.")
150
+ col.add_argument("--mid-cpd", default="gray", help="Mid-point compound colour.")
151
+ col.add_argument("--high-cpd", default="yellow", help="High-end compound colour.")
152
+
153
+ # ---- Utilities ---------------------------------------------------------
154
+ util = p.add_argument_group("Utilities")
155
+ util.add_argument(
156
+ "--legend",
157
+ action="store_true",
158
+ help="Display the KEGG element legend and exit.",
159
+ )
160
+ util.add_argument(
161
+ "--simulate",
162
+ action="store_true",
163
+ help="Generate and use simulated gene data (requires --pathway-id).",
164
+ )
165
+ util.add_argument(
166
+ "--n-sim", type=int, default=200,
167
+ help="Number of molecules in simulated data (used with --simulate).",
168
+ )
169
+
170
+ return p
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # Main
175
+ # ---------------------------------------------------------------------------
176
+
177
+ def main(argv: list[str] | None = None) -> None:
178
+ args = _build_parser().parse_args(argv)
179
+
180
+ # -- Legend only ---------------------------------------------------------
181
+ if args.legend:
182
+ kegg_legend()
183
+ return
184
+
185
+ # -- Require pathway-id for everything else ------------------------------
186
+ if not args.pathway_id:
187
+ _build_parser().error("--pathway-id is required (unless using --legend).")
188
+
189
+ # -- Load or simulate data -----------------------------------------------
190
+ gene_data = cpd_data = None
191
+
192
+ if args.simulate:
193
+ print(f"Info: Generating simulated gene data (n={args.n_sim}) …")
194
+ gene_data = sim_mol_data(
195
+ mol_type="gene",
196
+ species=args.species,
197
+ n_mol=args.n_sim,
198
+ )
199
+ else:
200
+ if args.gene_data:
201
+ gene_data = pl.read_csv(args.gene_data, separator="\t")
202
+ print(f"Info: Loaded gene data — {gene_data.height} rows, "
203
+ f"{gene_data.width - 1} experiment column(s).")
204
+ if args.cpd_data:
205
+ cpd_data = pl.read_csv(args.cpd_data, separator="\t")
206
+ print(f"Info: Loaded compound data — {cpd_data.height} rows, "
207
+ f"{cpd_data.width - 1} experiment column(s).")
208
+
209
+ if gene_data is None and cpd_data is None:
210
+ _build_parser().error(
211
+ "Provide at least one of --gene-data, --cpd-data, or --simulate."
212
+ )
213
+
214
+ # -- Run pathview --------------------------------------------------------
215
+ result = pathview(
216
+ pathway_id = args.pathway_id,
217
+ gene_data = gene_data,
218
+ cpd_data = cpd_data,
219
+ species = args.species,
220
+ kegg_dir = args.kegg_dir,
221
+ kegg_native = args.kegg_native,
222
+ output_format = args.output_format,
223
+ gene_idtype = args.gene_idtype,
224
+ cpd_idtype = args.cpd_idtype,
225
+ out_suffix = args.out_suffix,
226
+ node_sum = args.node_sum,
227
+ map_symbol = args.map_symbol,
228
+ min_nnodes = args.min_nnodes,
229
+ new_signature = not args.no_signature,
230
+ plot_col_key = not args.no_col_key,
231
+ limit = {"gene": args.limit_gene, "cpd": args.limit_cpd},
232
+ bins = {"gene": args.bins_gene, "cpd": args.bins_cpd},
233
+ both_dirs = {"gene": True, "cpd": True},
234
+ low = {"gene": args.low_gene, "cpd": args.low_cpd},
235
+ mid = {"gene": args.mid_gene, "cpd": args.mid_cpd},
236
+ high = {"gene": args.high_gene, "cpd": args.high_cpd},
237
+ )
238
+
239
+ if result:
240
+ gdf = result.get("plot_data_gene")
241
+ cdf = result.get("plot_data_cpd")
242
+ if gdf is not None:
243
+ print(f"Info: Gene plot data — {gdf.height} nodes mapped.")
244
+ if cdf is not None:
245
+ print(f"Info: Compound plot data — {cdf.height} nodes mapped.")
246
+ else:
247
+ print("Warning: Pathway was skipped or no data could be mapped.", file=sys.stderr)
248
+ sys.exit(1)
249
+
250
+
251
+ if __name__ == "__main__":
252
+ main()