graphpop-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. graphpop_cli/__init__.py +2 -0
  2. graphpop_cli/cli.py +161 -0
  3. graphpop_cli/commands/__init__.py +1 -0
  4. graphpop_cli/commands/aggregate.py +206 -0
  5. graphpop_cli/commands/batch.py +155 -0
  6. graphpop_cli/commands/compare.py +118 -0
  7. graphpop_cli/commands/config_cmd.py +117 -0
  8. graphpop_cli/commands/converge.py +156 -0
  9. graphpop_cli/commands/db.py +188 -0
  10. graphpop_cli/commands/divergence.py +37 -0
  11. graphpop_cli/commands/diversity.py +36 -0
  12. graphpop_cli/commands/dump.py +210 -0
  13. graphpop_cli/commands/export_bed.py +170 -0
  14. graphpop_cli/commands/export_windows.py +91 -0
  15. graphpop_cli/commands/extract.py +271 -0
  16. graphpop_cli/commands/filter_results.py +165 -0
  17. graphpop_cli/commands/garud_h.py +30 -0
  18. graphpop_cli/commands/genome_scan.py +41 -0
  19. graphpop_cli/commands/ihs.py +29 -0
  20. graphpop_cli/commands/import_data.py +266 -0
  21. graphpop_cli/commands/inventory.py +160 -0
  22. graphpop_cli/commands/joint_sfs.py +38 -0
  23. graphpop_cli/commands/ld.py +35 -0
  24. graphpop_cli/commands/lookup.py +207 -0
  25. graphpop_cli/commands/neighbors.py +175 -0
  26. graphpop_cli/commands/nsl.py +29 -0
  27. graphpop_cli/commands/plot.py +1066 -0
  28. graphpop_cli/commands/pop_summary.py +30 -0
  29. graphpop_cli/commands/query.py +15 -0
  30. graphpop_cli/commands/rank_genes.py +177 -0
  31. graphpop_cli/commands/report.py +264 -0
  32. graphpop_cli/commands/roh.py +30 -0
  33. graphpop_cli/commands/run_all.py +276 -0
  34. graphpop_cli/commands/server.py +98 -0
  35. graphpop_cli/commands/setup.py +299 -0
  36. graphpop_cli/commands/sfs.py +38 -0
  37. graphpop_cli/commands/validate.py +167 -0
  38. graphpop_cli/commands/xpehh.py +31 -0
  39. graphpop_cli/config.py +57 -0
  40. graphpop_cli/connection.py +52 -0
  41. graphpop_cli/formatters.py +81 -0
  42. graphpop_cli-0.1.0.dist-info/METADATA +73 -0
  43. graphpop_cli-0.1.0.dist-info/RECORD +46 -0
  44. graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
  45. graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1066 @@
1
+ """graphpop plot — generate standard population genomics figures from TSV results."""
2
+ from __future__ import annotations
3
+
4
+ import csv
5
+ import re
6
+ from pathlib import Path
7
+
8
+ import click
9
+
10
+ from ..cli import pass_ctx
11
+
12
+ try:
13
+ import matplotlib
14
+ matplotlib.use("Agg")
15
+ import matplotlib.pyplot as plt
16
+ import numpy as np
17
+ HAS_MPL = True
18
+ except ImportError:
19
+ HAS_MPL = False
20
+
21
+ try:
22
+ from scipy.cluster.hierarchy import linkage, dendrogram
23
+ from scipy.spatial.distance import squareform
24
+ HAS_SCIPY = True
25
+ except ImportError:
26
+ HAS_SCIPY = False
27
+
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Nature-style settings
31
+ # ---------------------------------------------------------------------------
32
+ WONG_PALETTE = [
33
+ "#0072B2", "#E69F00", "#009E73", "#D55E00",
34
+ "#56B4E9", "#CC79A7", "#F0E442", "#000000",
35
+ ]
36
+
37
+
38
+ def _apply_style():
39
+ """Apply Nature Methods figure style."""
40
+ plt.rcParams.update({
41
+ "font.family": "sans-serif",
42
+ "font.sans-serif": ["Arial", "Helvetica", "DejaVu Sans"],
43
+ "font.size": 7,
44
+ "axes.titlesize": 8,
45
+ "axes.labelsize": 7,
46
+ "xtick.labelsize": 6,
47
+ "ytick.labelsize": 6,
48
+ "legend.fontsize": 6,
49
+ "axes.linewidth": 0.6,
50
+ "xtick.major.width": 0.6,
51
+ "ytick.major.width": 0.6,
52
+ "xtick.direction": "out",
53
+ "ytick.direction": "out",
54
+ "lines.linewidth": 1.0,
55
+ "axes.spines.top": False,
56
+ "axes.spines.right": False,
57
+ "figure.facecolor": "white",
58
+ "axes.facecolor": "white",
59
+ "savefig.facecolor": "white",
60
+ "pdf.fonttype": 42,
61
+ })
62
+
63
+
64
+ def _check_matplotlib():
65
+ if not HAS_MPL:
66
+ click.echo(
67
+ "Error: matplotlib is required for graphpop plot.\n"
68
+ "Install with: pip install matplotlib numpy",
69
+ err=True,
70
+ )
71
+ raise SystemExit(1)
72
+
73
+
74
+ def _read_tsv(path: str) -> list[dict]:
75
+ """Read a TSV file, skipping comment lines."""
76
+ rows = []
77
+ with open(path) as f:
78
+ lines = [l for l in f if not l.startswith("#")]
79
+ reader = csv.DictReader(lines, delimiter="\t")
80
+ return list(reader)
81
+
82
+
83
+ def _read_tsv_dir(directory: str, pattern: str = "*.tsv") -> list[dict]:
84
+ """Read all TSV files in a directory."""
85
+ rows = []
86
+ for p in sorted(Path(directory).glob(pattern)):
87
+ rows.extend(_read_tsv(str(p)))
88
+ return rows
89
+
90
+
91
+ def _save_fig(fig, output: str, dpi: int = 300):
92
+ """Save figure in the requested format."""
93
+ fig.savefig(output, dpi=dpi, bbox_inches="tight", facecolor="white")
94
+ click.echo(f"Saved: {output}")
95
+ plt.close(fig)
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # Plot group
100
+ # ---------------------------------------------------------------------------
101
+ @click.group()
102
+ def plot():
103
+ """Generate standard population genomics figures from TSV results.
104
+
105
+ \b
106
+ Plot types:
107
+ diversity-bar Per-population diversity ranking
108
+ fst-heatmap Pairwise Fst matrix with clustering
109
+ manhattan Genome-wide statistic scan
110
+ pinpis piN/piS ratios across populations
111
+ sfs-plot Site frequency spectrum
112
+ roh-landscape Per-population FROH distribution
113
+ """
114
+ pass
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # diversity-bar
119
+ # ---------------------------------------------------------------------------
120
+ @plot.command("diversity-bar")
121
+ @click.argument("input_dir", type=click.Path(exists=True))
122
+ @click.option("-o", "--output", required=True, help="Output figure file (PNG/PDF)")
123
+ @click.option("--stat", default="pi", help="Statistic to plot (pi, theta_w, tajima_d, fis)")
124
+ @click.option("--title", help="Figure title")
125
+ @click.option("--width", type=float, default=7.2, help="Figure width in inches")
126
+ @click.option("--height", type=float, default=3.5, help="Figure height in inches")
127
+ def diversity_bar(input_dir, output, stat, title, width, height):
128
+ """Plot per-population diversity as a horizontal bar chart.
129
+
130
+ INPUT_DIR should contain per-population TSV files from graphpop diversity
131
+ or graphpop run-all (e.g., results/diversity/).
132
+
133
+ \b
134
+ Examples:
135
+ graphpop plot diversity-bar results/diversity/ -o fig_diversity.png
136
+ graphpop plot diversity-bar results/diversity/ --stat tajima_d -o fig_tajima.png
137
+ """
138
+ _check_matplotlib()
139
+ _apply_style()
140
+
141
+ rows = _read_tsv_dir(input_dir)
142
+ if not rows:
143
+ click.echo("No data found.", err=True)
144
+ return
145
+
146
+ # Aggregate by population (mean across chromosomes)
147
+ pop_vals = {}
148
+ for r in rows:
149
+ pop = r.get("population", r.get("file_pop", "unknown"))
150
+ val = float(r.get(stat, 0))
151
+ pop_vals.setdefault(pop, []).append(val)
152
+
153
+ pops = sorted(pop_vals.keys(), key=lambda p: np.mean(pop_vals[p]))
154
+ means = [np.mean(pop_vals[p]) for p in pops]
155
+ colors = [WONG_PALETTE[i % len(WONG_PALETTE)] for i in range(len(pops))]
156
+
157
+ fig, ax = plt.subplots(figsize=(width, height))
158
+ y = range(len(pops))
159
+ ax.barh(y, means, color=colors, height=0.7, edgecolor="none")
160
+ ax.set_yticks(y)
161
+ ax.set_yticklabels(pops)
162
+ ax.set_xlabel(stat.replace("_", " ").title() if stat != "pi" else "Nucleotide diversity (π)")
163
+ ax.set_title(title or f"Per-population {stat}", fontweight="bold")
164
+
165
+ for i, v in enumerate(means):
166
+ ax.text(v + max(means) * 0.01, i, f"{v:.4f}", va="center", fontsize=5)
167
+
168
+ fig.tight_layout()
169
+ _save_fig(fig, output)
170
+
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # fst-heatmap
174
+ # ---------------------------------------------------------------------------
175
+ @plot.command("fst-heatmap")
176
+ @click.argument("input_dir", type=click.Path(exists=True))
177
+ @click.option("-o", "--output", required=True, help="Output figure file")
178
+ @click.option("--stat", default="fst_wc", help="Fst statistic (fst_hudson, fst_wc)")
179
+ @click.option("--title", help="Figure title")
180
+ @click.option("--width", type=float, default=7.2)
181
+ @click.option("--height", type=float, default=6.0)
182
+ def fst_heatmap(input_dir, output, stat, title, width, height):
183
+ """Plot pairwise Fst as a heatmap matrix.
184
+
185
+ INPUT_DIR should contain pairwise TSV files from graphpop divergence
186
+ or graphpop run-all (e.g., results/divergence/).
187
+
188
+ \b
189
+ Examples:
190
+ graphpop plot fst-heatmap results/divergence/ -o fig_fst.png
191
+ graphpop plot fst-heatmap results/divergence/ --stat fst_hudson -o fig_fst_hudson.pdf
192
+ """
193
+ _check_matplotlib()
194
+ _apply_style()
195
+
196
+ rows = _read_tsv_dir(input_dir)
197
+ if not rows:
198
+ click.echo("No data found.", err=True)
199
+ return
200
+
201
+ # Build pairwise Fst matrix
202
+ pair_vals = {}
203
+ for r in rows:
204
+ p1 = r.get("pop1", "")
205
+ p2 = r.get("pop2", "")
206
+ if not p1 or not p2:
207
+ # Try parsing from filename pattern
208
+ continue
209
+ val = float(r.get(stat, 0))
210
+ pair_vals.setdefault((p1, p2), []).append(val)
211
+
212
+ if not pair_vals:
213
+ click.echo("No pairwise data found. Check TSV format.", err=True)
214
+ return
215
+
216
+ # Get unique populations
217
+ all_pops = sorted(set(p for pair in pair_vals for p in pair))
218
+ n = len(all_pops)
219
+ pop_idx = {p: i for i, p in enumerate(all_pops)}
220
+
221
+ matrix = np.zeros((n, n))
222
+ for (p1, p2), vals in pair_vals.items():
223
+ i, j = pop_idx[p1], pop_idx[p2]
224
+ mean_val = np.mean(vals)
225
+ matrix[i, j] = mean_val
226
+ matrix[j, i] = mean_val
227
+
228
+ fig, ax = plt.subplots(figsize=(width, height))
229
+ im = ax.imshow(matrix, cmap="YlOrRd", aspect="equal")
230
+
231
+ ax.set_xticks(range(n))
232
+ ax.set_yticks(range(n))
233
+ ax.set_xticklabels(all_pops, rotation=45, ha="right")
234
+ ax.set_yticklabels(all_pops)
235
+
236
+ # Annotate cells if small matrix
237
+ if n <= 15:
238
+ for i in range(n):
239
+ for j in range(n):
240
+ if i != j:
241
+ val = matrix[i, j]
242
+ color = "white" if val > np.max(matrix) * 0.6 else "black"
243
+ ax.text(j, i, f"{val:.3f}", ha="center", va="center",
244
+ fontsize=4, color=color)
245
+
246
+ cbar = fig.colorbar(im, ax=ax, shrink=0.8, label=stat.replace("_", " "))
247
+ ax.set_title(title or f"Pairwise {stat}", fontweight="bold")
248
+ fig.tight_layout()
249
+ _save_fig(fig, output)
250
+
251
+
252
+ # ---------------------------------------------------------------------------
253
+ # manhattan
254
+ # ---------------------------------------------------------------------------
255
+ @plot.command("manhattan")
256
+ @click.argument("input_file", type=click.Path(exists=True))
257
+ @click.option("-o", "--output", required=True, help="Output figure file")
258
+ @click.option("--stat", default="ihs", help="Statistic column name")
259
+ @click.option("--threshold", type=float, help="Significance threshold line")
260
+ @click.option("--abs-value/--raw-value", default=True, help="Plot absolute values")
261
+ @click.option("--title", help="Figure title")
262
+ @click.option("--width", type=float, default=7.2)
263
+ @click.option("--height", type=float, default=3.0)
264
+ def manhattan(input_file, output, stat, threshold, abs_value, title, width, height):
265
+ """Plot a Manhattan plot of per-variant or per-window statistics.
266
+
267
+ INPUT_FILE should be a TSV with columns: pos (or start) and the statistic.
268
+ For multi-chromosome input, include a chr column.
269
+
270
+ \b
271
+ Examples:
272
+ graphpop plot manhattan ihs_results.tsv --stat ihs --threshold 2.5 -o fig_ihs.png
273
+ graphpop plot manhattan windows.tsv --stat fst --threshold 0.5 -o fig_fst_scan.png
274
+ graphpop plot manhattan xpehh.tsv --stat xpehh --raw-value -o fig_xpehh.pdf
275
+ """
276
+ _check_matplotlib()
277
+ _apply_style()
278
+
279
+ rows = _read_tsv(input_file)
280
+ if not rows:
281
+ click.echo("No data found.", err=True)
282
+ return
283
+
284
+ # Extract positions and values
285
+ positions = []
286
+ values = []
287
+ chroms = []
288
+ for r in rows:
289
+ pos = int(r.get("pos", r.get("start", 0)))
290
+ val = float(r.get(stat, r.get(f"{stat}_unstd", 0)))
291
+ if abs_value:
292
+ val = abs(val)
293
+ chrom = r.get("chr", r.get("chromosome", ""))
294
+ positions.append(pos)
295
+ values.append(val)
296
+ chroms.append(chrom)
297
+
298
+ fig, ax = plt.subplots(figsize=(width, height))
299
+
300
+ # Color by chromosome
301
+ unique_chrs = sorted(set(chroms), key=lambda c: (len(c), c))
302
+ if len(unique_chrs) > 1:
303
+ chr_colors = {c: WONG_PALETTE[i % 2] for i, c in enumerate(unique_chrs)}
304
+ # Make positions additive
305
+ chr_offsets = {}
306
+ offset = 0
307
+ for c in unique_chrs:
308
+ chr_offsets[c] = offset
309
+ chr_positions = [p for p, ch in zip(positions, chroms) if ch == c]
310
+ if chr_positions:
311
+ offset += max(chr_positions) + max(chr_positions) * 0.05
312
+
313
+ adj_pos = [p + chr_offsets.get(c, 0) for p, c in zip(positions, chroms)]
314
+ colors = [chr_colors[c] for c in chroms]
315
+ ax.scatter(adj_pos, values, c=colors, s=1, alpha=0.5, rasterized=True)
316
+
317
+ # Chromosome labels
318
+ for c in unique_chrs:
319
+ c_positions = [p + chr_offsets[c] for p, ch in zip(positions, chroms) if ch == c]
320
+ if c_positions:
321
+ mid = (min(c_positions) + max(c_positions)) / 2
322
+ label = c.replace("chr", "").replace("Chr", "")
323
+ ax.text(mid, -max(values) * 0.05, label, ha="center", fontsize=4)
324
+ ax.set_xlabel("Chromosome")
325
+ else:
326
+ ax.scatter(positions, values, c=WONG_PALETTE[0], s=1, alpha=0.5, rasterized=True)
327
+ ax.set_xlabel(f"Position on {unique_chrs[0] if unique_chrs else 'chromosome'} (bp)")
328
+
329
+ if threshold is not None:
330
+ ax.axhline(threshold, color="#D55E00", linestyle="--", linewidth=0.8, alpha=0.7)
331
+
332
+ ylabel = f"|{stat}|" if abs_value else stat
333
+ ax.set_ylabel(ylabel)
334
+ ax.set_title(title or f"Manhattan plot: {stat}", fontweight="bold")
335
+ fig.tight_layout()
336
+ _save_fig(fig, output)
337
+
338
+
339
+ # ---------------------------------------------------------------------------
340
+ # pinpis
341
+ # ---------------------------------------------------------------------------
342
+ @plot.command("pinpis")
343
+ @click.argument("input_file", type=click.Path(exists=True))
344
+ @click.option("-o", "--output", required=True, help="Output figure file")
345
+ @click.option("--title", help="Figure title")
346
+ @click.option("--width", type=float, default=7.2)
347
+ @click.option("--height", type=float, default=4.0)
348
+ def pinpis(input_file, output, title, width, height):
349
+ """Plot piN/piS ratios across populations.
350
+
351
+ INPUT_FILE should be a TSV with columns: population, piN_piS (or piN and piS
352
+ columns to compute the ratio).
353
+
354
+ \b
355
+ Generate input:
356
+ # For each population, compute piN and piS:
357
+ graphpop diversity chr1 1 43270923 POP --consequence missense_variant -o piN.tsv
358
+ graphpop diversity chr1 1 43270923 POP --consequence synonymous_variant -o piS.tsv
359
+ # Combine into a single TSV with columns: population, piN_piS
360
+
361
+ Examples:
362
+ graphpop plot pinpis pinpis_ratios.tsv -o fig_pinpis.png
363
+ """
364
+ _check_matplotlib()
365
+ _apply_style()
366
+
367
+ rows = _read_tsv(input_file)
368
+ if not rows:
369
+ click.echo("No data found.", err=True)
370
+ return
371
+
372
+ # Try different column name patterns
373
+ pops = []
374
+ ratios = []
375
+ for r in rows:
376
+ pop = r.get("population", r.get("pop", ""))
377
+ ratio = r.get("piN_piS", r.get("pinpis", r.get("ratio", None)))
378
+ if ratio is None:
379
+ piN = float(r.get("piN", r.get("pi_N", r.get("pi_missense", 0))))
380
+ piS = float(r.get("piS", r.get("pi_S", r.get("pi_synonymous", 1))))
381
+ ratio = piN / piS if piS > 0 else 0
382
+ else:
383
+ ratio = float(ratio)
384
+ pops.append(pop)
385
+ ratios.append(ratio)
386
+
387
+ # Sort by ratio
388
+ sorted_pairs = sorted(zip(pops, ratios), key=lambda x: x[1])
389
+ pops = [p for p, _ in sorted_pairs]
390
+ ratios = [r for _, r in sorted_pairs]
391
+
392
+ fig, ax = plt.subplots(figsize=(width, height))
393
+ colors = [WONG_PALETTE[0] if r <= 1.0 else WONG_PALETTE[3] for r in ratios]
394
+ bars = ax.barh(range(len(pops)), ratios, color=colors, height=0.7, edgecolor="none")
395
+
396
+ ax.axvline(1.0, color="black", linestyle="--", linewidth=0.8, alpha=0.5,
397
+ label="Neutral expectation (πN/πS = 1)")
398
+ ax.set_yticks(range(len(pops)))
399
+ ax.set_yticklabels(pops)
400
+ ax.set_xlabel("πN/πS ratio")
401
+ ax.set_title(title or "Cost of domestication: πN/πS across populations", fontweight="bold")
402
+
403
+ for i, v in enumerate(ratios):
404
+ ax.text(v + max(ratios) * 0.01, i, f"{v:.3f}", va="center", fontsize=5)
405
+
406
+ ax.legend(fontsize=5, loc="lower right")
407
+ fig.tight_layout()
408
+ _save_fig(fig, output)
409
+
410
+
411
+ # ---------------------------------------------------------------------------
412
+ # sfs-plot
413
+ # ---------------------------------------------------------------------------
414
+ @plot.command("sfs-plot")
415
+ @click.argument("input_file", type=click.Path(exists=True))
416
+ @click.option("-o", "--output", required=True, help="Output figure file")
417
+ @click.option("--title", help="Figure title")
418
+ @click.option("--log-scale/--linear", default=False, help="Use log scale for y-axis")
419
+ @click.option("--width", type=float, default=5.0)
420
+ @click.option("--height", type=float, default=3.5)
421
+ def sfs_plot(input_file, output, title, log_scale, width, height):
422
+ """Plot a site frequency spectrum.
423
+
424
+ INPUT_FILE should be a TSV from graphpop sfs with an 'sfs' column
425
+ containing comma-separated counts.
426
+
427
+ \b
428
+ Examples:
429
+ graphpop sfs chr22 1 51304566 EUR -o sfs.tsv
430
+ graphpop plot sfs-plot sfs.tsv -o fig_sfs.png
431
+ graphpop plot sfs-plot sfs.tsv --log-scale -o fig_sfs_log.png
432
+ """
433
+ _check_matplotlib()
434
+ _apply_style()
435
+
436
+ rows = _read_tsv(input_file)
437
+ if not rows:
438
+ click.echo("No data found.", err=True)
439
+ return
440
+
441
+ sfs_str = rows[0].get("sfs", "")
442
+ counts = [int(x) for x in sfs_str.split(",") if x.strip()]
443
+
444
+ fig, ax = plt.subplots(figsize=(width, height))
445
+ x = range(len(counts))
446
+ ax.bar(x, counts, color=WONG_PALETTE[0], edgecolor="none", width=0.8)
447
+
448
+ if log_scale:
449
+ ax.set_yscale("log")
450
+ ax.set_ylabel("Count (log scale)")
451
+ else:
452
+ ax.set_ylabel("Count")
453
+
454
+ ax.set_xlabel("Allele count")
455
+ ax.set_title(title or "Site frequency spectrum", fontweight="bold")
456
+
457
+ # Label first and last bins
458
+ if len(counts) > 2:
459
+ ax.set_xticks([0, len(counts) // 4, len(counts) // 2,
460
+ 3 * len(counts) // 4, len(counts) - 1])
461
+
462
+ fig.tight_layout()
463
+ _save_fig(fig, output)
464
+
465
+
466
+ # ---------------------------------------------------------------------------
467
+ # roh-landscape
468
+ # ---------------------------------------------------------------------------
469
+ @plot.command("roh-landscape")
470
+ @click.argument("input_dir", type=click.Path(exists=True))
471
+ @click.option("-o", "--output", required=True, help="Output figure file")
472
+ @click.option("--title", help="Figure title")
473
+ @click.option("--width", type=float, default=7.2)
474
+ @click.option("--height", type=float, default=4.0)
475
+ def roh_landscape(input_dir, output, title, width, height):
476
+ """Plot per-population FROH distribution as violin/box plots.
477
+
478
+ INPUT_DIR should contain per-population ROH TSV files from graphpop roh
479
+ or graphpop run-all (e.g., results/roh/).
480
+
481
+ \b
482
+ Examples:
483
+ graphpop plot roh-landscape results/roh/ -o fig_roh.png
484
+ """
485
+ _check_matplotlib()
486
+ _apply_style()
487
+
488
+ rows = _read_tsv_dir(input_dir)
489
+ if not rows:
490
+ click.echo("No data found.", err=True)
491
+ return
492
+
493
+ # Group FROH by population
494
+ pop_froh = {}
495
+ for r in rows:
496
+ pop = r.get("population", r.get("file_pop", "unknown"))
497
+ froh = float(r.get("froh", 0))
498
+ pop_froh.setdefault(pop, []).append(froh)
499
+
500
+ pops = sorted(pop_froh.keys(), key=lambda p: np.median(pop_froh[p]))
501
+ data = [pop_froh[p] for p in pops]
502
+
503
+ fig, ax = plt.subplots(figsize=(width, height))
504
+ parts = ax.violinplot(data, positions=range(len(pops)), showmedians=True,
505
+ showextrema=False)
506
+
507
+ for i, body in enumerate(parts["bodies"]):
508
+ body.set_facecolor(WONG_PALETTE[i % len(WONG_PALETTE)])
509
+ body.set_alpha(0.7)
510
+ body.set_edgecolor("none")
511
+ parts["cmedians"].set_color("black")
512
+ parts["cmedians"].set_linewidth(1.0)
513
+
514
+ # Add mean markers
515
+ means = [np.mean(d) for d in data]
516
+ ax.scatter(range(len(pops)), means, color="black", s=15, zorder=3, marker="D")
517
+
518
+ ax.set_xticks(range(len(pops)))
519
+ ax.set_xticklabels(pops, rotation=45, ha="right")
520
+ ax.set_ylabel("FROH (fraction of genome in ROH)")
521
+ ax.set_title(title or "Inbreeding landscape: FROH by population", fontweight="bold")
522
+
523
+ fig.tight_layout()
524
+ _save_fig(fig, output)
525
+
526
+
527
+ # ---------------------------------------------------------------------------
528
+ # gene-zoom
529
+ # ---------------------------------------------------------------------------
530
+ @plot.command("gene-zoom")
531
+ @click.argument("target")
532
+ @click.option("--pop", "population", required=True, help="Population name")
533
+ @click.option("--pop2", help="Second population (for Fst track)")
534
+ @click.option("-o", "--output", required=True, help="Output figure file (PNG/PDF)")
535
+ @click.option("--title", help="Figure title")
536
+ @click.option("--width", type=float, default=7.2, help="Figure width in inches")
537
+ @click.option("--height", type=float, default=6.0, help="Figure height in inches")
538
+ @pass_ctx
539
+ def gene_zoom(ctx, target, population, pop2, output, title, width, height):
540
+ """Multi-track regional plot for a gene or genomic region.
541
+
542
+ TARGET is either a gene name (e.g., KCNE1) or a region in chr:start-end
543
+ format (e.g., chr6:9000000-9600000). The command resolves gene names to
544
+ coordinates via the Gene node in the graph.
545
+
546
+ \b
547
+ Tracks (top to bottom):
548
+ 1. Fst (from GenomicWindow or per-variant)
549
+ 2. |iHS| (from Variant properties)
550
+ 3. Gene model (from HAS_CONSEQUENCE edges)
551
+
552
+ \b
553
+ Examples:
554
+ graphpop plot gene-zoom KCNE1 --pop EUR -o fig_kcne1.png
555
+ graphpop plot gene-zoom chr6:9000000-9600000 --pop GJ-tmp -o fig_hd1.png
556
+ graphpop plot gene-zoom GW5 --pop GJ-tmp --pop2 GJ-trop -o fig_gw5.pdf
557
+ """
558
+ _check_matplotlib()
559
+ _apply_style()
560
+
561
+ # --- Resolve target to chr, start, end ---
562
+ region_match = re.match(r'^(chr\w+|Chr\w+):(\d+)-(\d+)$', target)
563
+ if region_match:
564
+ chrom = region_match.group(1)
565
+ reg_start = int(region_match.group(2))
566
+ reg_end = int(region_match.group(3))
567
+ region_label = f"{chrom}:{reg_start}-{reg_end}"
568
+ else:
569
+ # Resolve gene name
570
+ recs = ctx.run(
571
+ "MATCH (g:Gene) "
572
+ "WHERE g.symbol = $target OR g.geneId = $target "
573
+ "RETURN g.chr AS chr, g.start AS start, g.end AS end, "
574
+ "g.symbol AS symbol LIMIT 1",
575
+ {"target": target},
576
+ )
577
+ if not recs:
578
+ click.echo(f"Gene '{target}' not found in the graph.", err=True)
579
+ raise SystemExit(1)
580
+ g = recs[0]
581
+ chrom = g["chr"]
582
+ # Pad 20% on each side for context
583
+ gene_len = (g["end"] or 0) - (g["start"] or 0)
584
+ pad = max(gene_len * 0.2, 10000)
585
+ reg_start = max(0, int((g["start"] or 0) - pad))
586
+ reg_end = int((g["end"] or 0) + pad)
587
+ region_label = f"{g['symbol']} ({chrom}:{reg_start}-{reg_end})"
588
+
589
+ click.echo(f"Region: {region_label}", err=True)
590
+
591
+ # --- Query Fst from GenomicWindow ---
592
+ fst_pos = []
593
+ fst_vals = []
594
+ fst_query = (
595
+ "MATCH (w:GenomicWindow) "
596
+ "WHERE w.chr = $chrom AND w.population = $population "
597
+ "AND w.start >= $reg_start AND w.end <= $reg_end "
598
+ "RETURN w.start AS start, w.end AS end, "
599
+ "w.fst AS fst "
600
+ "ORDER BY w.start"
601
+ )
602
+ region_params = {
603
+ "chrom": chrom, "population": population,
604
+ "reg_start": reg_start, "reg_end": reg_end,
605
+ }
606
+ try:
607
+ fst_recs = ctx.run(fst_query, region_params)
608
+ for r in fst_recs:
609
+ mid = ((r["start"] or 0) + (r["end"] or 0)) / 2
610
+ val = r.get("fst")
611
+ if val is not None:
612
+ fst_pos.append(mid)
613
+ fst_vals.append(float(val))
614
+ except SystemExit:
615
+ click.echo("Warning: no GenomicWindow Fst data.", err=True)
616
+
617
+ # --- Query |iHS| from Variant nodes ---
618
+ ihs_prop = f"ihs_{population}"
619
+ ihs_pos = []
620
+ ihs_vals = []
621
+ ihs_query = (
622
+ f"MATCH (v:Variant) "
623
+ f"WHERE v.chr = $chrom AND v.pos >= $reg_start AND v.pos <= $reg_end "
624
+ f"AND v.{ihs_prop} IS NOT NULL "
625
+ f"RETURN v.pos AS pos, v.{ihs_prop} AS ihs "
626
+ f"ORDER BY v.pos"
627
+ )
628
+ try:
629
+ ihs_recs = ctx.run(ihs_query, region_params)
630
+ for r in ihs_recs:
631
+ ihs_pos.append(r["pos"])
632
+ ihs_vals.append(abs(float(r["ihs"])))
633
+ except SystemExit:
634
+ click.echo("Warning: no iHS data.", err=True)
635
+
636
+ # --- Query gene models ---
637
+ gene_query = (
638
+ "MATCH (v:Variant)-[hc:HAS_CONSEQUENCE]->(g:Gene) "
639
+ "WHERE v.chr = $chrom AND v.pos >= $reg_start AND v.pos <= $reg_end "
640
+ "RETURN DISTINCT g.symbol AS gene, g.start AS start, g.end AS end "
641
+ "ORDER BY g.start"
642
+ )
643
+ gene_recs = ctx.run(gene_query, region_params)
644
+
645
+ # --- Build figure ---
646
+ fig, axes = plt.subplots(3, 1, figsize=(width, height), sharex=True,
647
+ gridspec_kw={"height_ratios": [2, 2, 1]})
648
+
649
+ # Track 1: Fst
650
+ ax_fst = axes[0]
651
+ if fst_pos:
652
+ ax_fst.fill_between(fst_pos, fst_vals, alpha=0.3, color=WONG_PALETTE[0])
653
+ ax_fst.plot(fst_pos, fst_vals, color=WONG_PALETTE[0], linewidth=0.8)
654
+ ax_fst.set_ylabel("Fst")
655
+ ax_fst.set_title(title or f"Gene zoom: {region_label}", fontweight="bold")
656
+
657
+ # Track 2: |iHS|
658
+ ax_ihs = axes[1]
659
+ if ihs_pos:
660
+ ax_ihs.scatter(ihs_pos, ihs_vals, s=2, color=WONG_PALETTE[3],
661
+ alpha=0.6, rasterized=True)
662
+ # Threshold line at 2.0
663
+ ax_ihs.axhline(2.0, color="grey", linestyle="--", linewidth=0.6, alpha=0.5)
664
+ ax_ihs.set_ylabel("|iHS|")
665
+
666
+ # Track 3: Gene models
667
+ ax_gene = axes[2]
668
+ if gene_recs:
669
+ y_pos = 0.5
670
+ for i, g in enumerate(gene_recs):
671
+ g_start = g.get("start") or reg_start
672
+ g_end = g.get("end") or reg_end
673
+ color = WONG_PALETTE[i % len(WONG_PALETTE)]
674
+ ax_gene.barh(y_pos, g_end - g_start, left=g_start, height=0.3,
675
+ color=color, edgecolor="black", linewidth=0.3)
676
+ mid = (g_start + g_end) / 2
677
+ ax_gene.text(mid, y_pos + 0.25, g.get("gene", ""),
678
+ ha="center", va="bottom", fontsize=5, style="italic")
679
+ y_pos += 0.5
680
+ ax_gene.set_ylabel("Genes")
681
+ ax_gene.set_yticks([])
682
+ ax_gene.set_xlabel(f"Position on {chrom} (bp)")
683
+ ax_gene.set_xlim(reg_start, reg_end)
684
+
685
+ # Add vertical lines at peak iHS positions
686
+ if ihs_vals:
687
+ peak_thresh = max(ihs_vals) * 0.9 if max(ihs_vals) > 0 else 999
688
+ for pos, val in zip(ihs_pos, ihs_vals):
689
+ if val >= peak_thresh:
690
+ for ax in axes:
691
+ ax.axvline(pos, color=WONG_PALETTE[3], linestyle=":",
692
+ linewidth=0.5, alpha=0.4)
693
+
694
+ fig.tight_layout()
695
+ _save_fig(fig, output)
696
+
697
+
698
+ # ---------------------------------------------------------------------------
699
+ # pop-tree
700
+ # ---------------------------------------------------------------------------
701
+ @plot.command("pop-tree")
702
+ @click.argument("input_dir", type=click.Path(exists=True))
703
+ @click.option("-o", "--output", required=True, help="Output figure file (PNG/PDF)")
704
+ @click.option("--method", default="upgma", type=click.Choice(["upgma", "nj"]),
705
+ help="Tree method: upgma (default) or nj")
706
+ @click.option("--stat", default="fst_wc", help="Fst statistic column (fst_wc or fst_hudson)")
707
+ @click.option("--title", help="Figure title")
708
+ @click.option("--width", type=float, default=7.2, help="Figure width in inches")
709
+ @click.option("--height", type=float, default=5.0, help="Figure height in inches")
710
+ def pop_tree(input_dir, output, method, stat, title, width, height):
711
+ """Build a UPGMA or neighbor-joining tree from pairwise Fst data.
712
+
713
+ INPUT_DIR should contain pairwise divergence TSV files (same format as
714
+ fst-heatmap input, from graphpop divergence or graphpop run-all).
715
+
716
+ Uses scipy.cluster.hierarchy for UPGMA clustering. Neighbor-joining is
717
+ approximated via the 'weighted' linkage method.
718
+
719
+ \b
720
+ Examples:
721
+ graphpop plot pop-tree results/divergence/ -o fig_tree.png
722
+ graphpop plot pop-tree results/divergence/ --method nj --stat fst_hudson -o fig_nj.png
723
+ """
724
+ _check_matplotlib()
725
+ _apply_style()
726
+
727
+ if not HAS_SCIPY:
728
+ click.echo(
729
+ "Error: scipy is required for pop-tree.\n"
730
+ "Install with: pip install scipy",
731
+ err=True,
732
+ )
733
+ raise SystemExit(1)
734
+
735
+ rows = _read_tsv_dir(input_dir)
736
+ if not rows:
737
+ click.echo("No data found.", err=True)
738
+ return
739
+
740
+ # Build pairwise Fst matrix
741
+ pair_vals = {}
742
+ for r in rows:
743
+ p1 = r.get("pop1", "")
744
+ p2 = r.get("pop2", "")
745
+ if not p1 or not p2:
746
+ continue
747
+ val = float(r.get(stat, 0))
748
+ pair_vals.setdefault((p1, p2), []).append(val)
749
+
750
+ if not pair_vals:
751
+ click.echo(f"No pairwise data found. Check TSV format and --stat={stat}.", err=True)
752
+ return
753
+
754
+ # Build distance matrix
755
+ all_pops = sorted(set(p for pair in pair_vals for p in pair))
756
+ n = len(all_pops)
757
+ pop_idx = {p: i for i, p in enumerate(all_pops)}
758
+
759
+ matrix = np.zeros((n, n))
760
+ for (p1, p2), vals in pair_vals.items():
761
+ i, j = pop_idx[p1], pop_idx[p2]
762
+ mean_val = max(0, np.mean(vals)) # Clamp negative Fst to 0
763
+ matrix[i, j] = mean_val
764
+ matrix[j, i] = mean_val
765
+
766
+ # Convert to condensed distance form for scipy
767
+ dist_condensed = squareform(matrix, checks=False)
768
+
769
+ # Linkage method
770
+ if method == "upgma":
771
+ linkage_method = "average"
772
+ else:
773
+ # NJ approximation via weighted linkage
774
+ linkage_method = "weighted"
775
+
776
+ Z = linkage(dist_condensed, method=linkage_method)
777
+
778
+ # Plot dendrogram
779
+ fig, ax = plt.subplots(figsize=(width, height))
780
+ dendrogram(
781
+ Z,
782
+ labels=all_pops,
783
+ ax=ax,
784
+ leaf_rotation=0,
785
+ orientation="left",
786
+ leaf_font_size=7,
787
+ color_threshold=0,
788
+ above_threshold_color=WONG_PALETTE[0],
789
+ )
790
+
791
+ ax.set_xlabel(f"Genetic distance ({stat.replace('_', ' ')})")
792
+ tree_label = "UPGMA" if method == "upgma" else "Neighbor-joining"
793
+ ax.set_title(title or f"Population tree ({tree_label}, {stat})", fontweight="bold")
794
+ ax.spines["top"].set_visible(False)
795
+ ax.spines["right"].set_visible(False)
796
+
797
+ fig.tight_layout()
798
+ _save_fig(fig, output)
799
+
800
+
801
+ # ---------------------------------------------------------------------------
802
+ # chromosome — multi-track chromosome view
803
+ # ---------------------------------------------------------------------------
804
+ @plot.command("chromosome")
805
+ @click.option("--chr", "chrom", required=True, help="Chromosome name (e.g., chr22)")
806
+ @click.option("--pop", "population", required=True, help="Population name")
807
+ @click.option("--stats", default="fst,ihs",
808
+ help="Comma-separated statistics to plot (e.g., fst,ihs,pi,tajima_d,xpehh)")
809
+ @click.option("-o", "--output", required=True, help="Output figure file (PNG/PDF)")
810
+ @click.option("--title", help="Figure title")
811
+ @click.option("--width", type=float, default=7.2, help="Figure width in inches")
812
+ @click.option("--height", type=float, default=2.0,
813
+ help="Height per track in inches (total = n_tracks * height)")
814
+ @pass_ctx
815
+ def chromosome(ctx, chrom, population, stats, output, title, width, height):
816
+ """Multi-track chromosome view of population statistics.
817
+
818
+ Draws stacked tracks for each requested statistic along the chromosome.
819
+ Window-level stats (fst, pi, tajima_d) are queried from GenomicWindow nodes.
820
+ Variant-level stats (ihs, xpehh) are queried from Variant nodes.
821
+
822
+ \b
823
+ Examples:
824
+ graphpop plot chromosome --chr chr22 --pop EUR --stats fst,ihs,pi -o fig_chr22.png
825
+ graphpop plot chromosome --chr Chr01 --pop GJ-tmp --stats fst,pi -o fig_chr01.png
826
+ """
827
+ _check_matplotlib()
828
+ _apply_style()
829
+
830
+ stat_list = [s.strip() for s in stats.split(",") if s.strip()]
831
+ if not stat_list:
832
+ click.echo("No statistics specified.", err=True)
833
+ raise SystemExit(1)
834
+
835
+ window_stats = {"fst", "pi", "theta_w", "tajima_d"}
836
+ variant_stats = {"ihs", "xpehh"}
837
+
838
+ n_tracks = len(stat_list)
839
+ fig_height = height * n_tracks
840
+ fig, axes = plt.subplots(n_tracks, 1, figsize=(width, fig_height), sharex=True,
841
+ squeeze=False)
842
+ axes = axes.flatten()
843
+
844
+ track_colors = [WONG_PALETTE[i % len(WONG_PALETTE)] for i in range(n_tracks)]
845
+
846
+ for idx, stat in enumerate(stat_list):
847
+ ax = axes[idx]
848
+ color = track_colors[idx]
849
+
850
+ if stat.lower() in window_stats:
851
+ query = (
852
+ f"MATCH (w:GenomicWindow) "
853
+ f"WHERE w.chr = $chrom AND w.population = $population "
854
+ f"AND w.{stat} IS NOT NULL "
855
+ f"RETURN w.start AS start, w.end AS end, w.{stat} AS value "
856
+ f"ORDER BY w.start"
857
+ )
858
+ recs = ctx.run(query, {"chrom": chrom, "population": population})
859
+ if recs:
860
+ positions = [((r["start"] or 0) + (r["end"] or 0)) / 2 for r in recs]
861
+ values = [float(r["value"]) for r in recs]
862
+ ax.fill_between(positions, values, alpha=0.3, color=color)
863
+ ax.plot(positions, values, color=color, linewidth=0.6)
864
+ else:
865
+ ax.text(0.5, 0.5, "No data", transform=ax.transAxes,
866
+ ha="center", va="center", fontsize=7, color="grey")
867
+
868
+ elif stat.lower() in variant_stats:
869
+ prop = f"{stat}_{population}"
870
+ query = (
871
+ f"MATCH (v:Variant) "
872
+ f"WHERE v.chr = $chrom AND v.{prop} IS NOT NULL "
873
+ f"RETURN v.pos AS pos, v.{prop} AS value "
874
+ f"ORDER BY v.pos"
875
+ )
876
+ recs = ctx.run(query, {"chrom": chrom})
877
+ if recs:
878
+ positions = [r["pos"] for r in recs]
879
+ values = [abs(float(r["value"])) for r in recs]
880
+ ax.scatter(positions, values, s=1, alpha=0.4, color=color,
881
+ rasterized=True)
882
+ else:
883
+ ax.text(0.5, 0.5, "No data", transform=ax.transAxes,
884
+ ha="center", va="center", fontsize=7, color="grey")
885
+ else:
886
+ ax.text(0.5, 0.5, f"Unknown stat: {stat}", transform=ax.transAxes,
887
+ ha="center", va="center", fontsize=7, color="red")
888
+
889
+ label = f"|{stat}|" if stat.lower() in variant_stats else stat
890
+ ax.set_ylabel(label, fontsize=7)
891
+
892
+ # Alternating background
893
+ if idx % 2 == 1:
894
+ ax.set_facecolor("#f8f8f8")
895
+
896
+ axes[-1].set_xlabel(f"Position on {chrom} (bp)")
897
+ axes[0].set_title(
898
+ title or f"Chromosome view: {chrom} ({population})", fontweight="bold"
899
+ )
900
+
901
+ fig.tight_layout()
902
+ _save_fig(fig, output)
903
+
904
+
905
+ # ---------------------------------------------------------------------------
906
+ # pca-scatter
907
+ # ---------------------------------------------------------------------------
908
+ @plot.command("pca-scatter")
909
+ @click.argument("input_file", type=click.Path(exists=True))
910
+ @click.option("-o", "--output", required=True, help="Output figure file (PNG/PDF)")
911
+ @click.option("--color-by", "color_by", default="population",
912
+ help="Column name to color points by (default: population)")
913
+ @click.option("--pc", "pc_axes", default="1,2",
914
+ help="Which PCs to plot, comma-separated (default: 1,2)")
915
+ @click.option("--title", help="Figure title")
916
+ @click.option("--width", type=float, default=5.0, help="Figure width in inches")
917
+ @click.option("--height", type=float, default=5.0, help="Figure height in inches")
918
+ def pca_scatter(input_file, output, color_by, pc_axes, title, width, height):
919
+ """PCA scatter plot from a TSV with PC columns.
920
+
921
+ INPUT_FILE should be a TSV with columns like pc1, pc2 (or PC1, PC2) and
922
+ a grouping column (default: population) for coloring.
923
+
924
+ \b
925
+ Examples:
926
+ graphpop plot pca-scatter pca_results.tsv -o fig_pca.png
927
+ graphpop plot pca-scatter pca.tsv --color-by superpopulation --pc 1,3 -o pca13.png
928
+ """
929
+ _check_matplotlib()
930
+ _apply_style()
931
+
932
+ rows = _read_tsv(input_file)
933
+ if not rows:
934
+ click.echo("No data found.", err=True)
935
+ return
936
+
937
+ # Parse PC axes
938
+ try:
939
+ pc_a, pc_b = [int(x.strip()) for x in pc_axes.split(",")]
940
+ except (ValueError, IndexError):
941
+ click.echo("Invalid --pc format. Use e.g. '1,2'.", err=True)
942
+ raise SystemExit(1)
943
+
944
+ # Find PC column names (case-insensitive)
945
+ sample_keys = list(rows[0].keys())
946
+ pc_col_a = _find_pc_col(sample_keys, pc_a)
947
+ pc_col_b = _find_pc_col(sample_keys, pc_b)
948
+ if not pc_col_a or not pc_col_b:
949
+ click.echo(
950
+ f"Could not find PC{pc_a} and PC{pc_b} columns in: {sample_keys}",
951
+ err=True,
952
+ )
953
+ raise SystemExit(1)
954
+
955
+ # Group by color column
956
+ groups = {}
957
+ for r in rows:
958
+ group = r.get(color_by, "unknown")
959
+ x = float(r[pc_col_a])
960
+ y = float(r[pc_col_b])
961
+ groups.setdefault(group, ([], []))
962
+ groups[group][0].append(x)
963
+ groups[group][1].append(y)
964
+
965
+ fig, ax = plt.subplots(figsize=(width, height))
966
+ for i, (group_name, (xs, ys)) in enumerate(sorted(groups.items())):
967
+ color = WONG_PALETTE[i % len(WONG_PALETTE)]
968
+ ax.scatter(xs, ys, s=8, alpha=0.7, color=color, label=group_name,
969
+ edgecolors="none")
970
+
971
+ ax.set_xlabel(f"PC{pc_a}")
972
+ ax.set_ylabel(f"PC{pc_b}")
973
+ ax.set_title(title or f"PCA: PC{pc_a} vs PC{pc_b}", fontweight="bold")
974
+
975
+ # Legend outside if many groups
976
+ if len(groups) > 8:
977
+ ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize=5,
978
+ markerscale=1.5, frameon=False)
979
+ else:
980
+ ax.legend(fontsize=6, markerscale=1.5, frameon=False)
981
+
982
+ fig.tight_layout()
983
+ _save_fig(fig, output)
984
+
985
+
986
+ def _find_pc_col(keys: list[str], pc_num: int) -> str | None:
987
+ """Find the column name for a given PC number (case-insensitive)."""
988
+ candidates = [f"pc{pc_num}", f"PC{pc_num}", f"Pc{pc_num}",
989
+ f"pc_{pc_num}", f"PC_{pc_num}"]
990
+ for c in candidates:
991
+ if c in keys:
992
+ return c
993
+ # Fallback: partial match
994
+ for k in keys:
995
+ if k.lower().replace("_", "") == f"pc{pc_num}":
996
+ return k
997
+ return None
998
+
999
+
1000
+ # ---------------------------------------------------------------------------
1001
+ # heatmap — general-purpose heatmap from a matrix TSV
1002
+ # ---------------------------------------------------------------------------
1003
+ @plot.command("heatmap")
1004
+ @click.argument("input_file", type=click.Path(exists=True))
1005
+ @click.option("-o", "--output", required=True, help="Output figure file (PNG/PDF)")
1006
+ @click.option("--cmap", default="viridis", help="Matplotlib colormap (default: viridis)")
1007
+ @click.option("--annotate", is_flag=True, help="Add numeric values to cells")
1008
+ @click.option("--title", help="Figure title")
1009
+ @click.option("--width", type=float, default=7.2, help="Figure width in inches")
1010
+ @click.option("--height", type=float, default=6.0, help="Figure height in inches")
1011
+ def heatmap(input_file, output, cmap, annotate, title, width, height):
1012
+ """General-purpose heatmap from a matrix TSV.
1013
+
1014
+ INPUT_FILE should be a TSV where the first column contains row labels
1015
+ and the header row contains column labels. All other cells are numeric.
1016
+
1017
+ \b
1018
+ Examples:
1019
+ graphpop plot heatmap matrix.tsv -o fig_heatmap.png --cmap YlOrRd --annotate
1020
+ graphpop plot heatmap fst_matrix.tsv -o fig_fst.png --title "Pairwise Fst"
1021
+ """
1022
+ _check_matplotlib()
1023
+ _apply_style()
1024
+
1025
+ rows = _read_tsv(input_file)
1026
+ if not rows:
1027
+ click.echo("No data found.", err=True)
1028
+ return
1029
+
1030
+ # First column = row labels, rest = numeric matrix
1031
+ col_keys = list(rows[0].keys())
1032
+ label_col = col_keys[0]
1033
+ value_cols = col_keys[1:]
1034
+
1035
+ row_labels = [r[label_col] for r in rows]
1036
+ matrix = np.zeros((len(rows), len(value_cols)))
1037
+ for i, r in enumerate(rows):
1038
+ for j, c in enumerate(value_cols):
1039
+ try:
1040
+ matrix[i, j] = float(r[c])
1041
+ except (ValueError, TypeError):
1042
+ matrix[i, j] = np.nan
1043
+
1044
+ fig, ax = plt.subplots(figsize=(width, height))
1045
+ im = ax.imshow(matrix, cmap=cmap, aspect="auto")
1046
+
1047
+ ax.set_xticks(range(len(value_cols)))
1048
+ ax.set_yticks(range(len(row_labels)))
1049
+ ax.set_xticklabels(value_cols, rotation=45, ha="right")
1050
+ ax.set_yticklabels(row_labels)
1051
+
1052
+ if annotate:
1053
+ for i in range(len(row_labels)):
1054
+ for j in range(len(value_cols)):
1055
+ val = matrix[i, j]
1056
+ if not np.isnan(val):
1057
+ text_color = ("white"
1058
+ if val > np.nanmax(matrix) * 0.6
1059
+ else "black")
1060
+ ax.text(j, i, f"{val:.3g}", ha="center", va="center",
1061
+ fontsize=5, color=text_color)
1062
+
1063
+ fig.colorbar(im, ax=ax, shrink=0.8)
1064
+ ax.set_title(title or "Heatmap", fontweight="bold")
1065
+ fig.tight_layout()
1066
+ _save_fig(fig, output)