graphpop-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. graphpop_cli/__init__.py +2 -0
  2. graphpop_cli/cli.py +161 -0
  3. graphpop_cli/commands/__init__.py +1 -0
  4. graphpop_cli/commands/aggregate.py +206 -0
  5. graphpop_cli/commands/batch.py +155 -0
  6. graphpop_cli/commands/compare.py +118 -0
  7. graphpop_cli/commands/config_cmd.py +117 -0
  8. graphpop_cli/commands/converge.py +156 -0
  9. graphpop_cli/commands/db.py +188 -0
  10. graphpop_cli/commands/divergence.py +37 -0
  11. graphpop_cli/commands/diversity.py +36 -0
  12. graphpop_cli/commands/dump.py +210 -0
  13. graphpop_cli/commands/export_bed.py +170 -0
  14. graphpop_cli/commands/export_windows.py +91 -0
  15. graphpop_cli/commands/extract.py +271 -0
  16. graphpop_cli/commands/filter_results.py +165 -0
  17. graphpop_cli/commands/garud_h.py +30 -0
  18. graphpop_cli/commands/genome_scan.py +41 -0
  19. graphpop_cli/commands/ihs.py +29 -0
  20. graphpop_cli/commands/import_data.py +266 -0
  21. graphpop_cli/commands/inventory.py +160 -0
  22. graphpop_cli/commands/joint_sfs.py +38 -0
  23. graphpop_cli/commands/ld.py +35 -0
  24. graphpop_cli/commands/lookup.py +207 -0
  25. graphpop_cli/commands/neighbors.py +175 -0
  26. graphpop_cli/commands/nsl.py +29 -0
  27. graphpop_cli/commands/plot.py +1066 -0
  28. graphpop_cli/commands/pop_summary.py +30 -0
  29. graphpop_cli/commands/query.py +15 -0
  30. graphpop_cli/commands/rank_genes.py +177 -0
  31. graphpop_cli/commands/report.py +264 -0
  32. graphpop_cli/commands/roh.py +30 -0
  33. graphpop_cli/commands/run_all.py +276 -0
  34. graphpop_cli/commands/server.py +98 -0
  35. graphpop_cli/commands/setup.py +299 -0
  36. graphpop_cli/commands/sfs.py +38 -0
  37. graphpop_cli/commands/validate.py +167 -0
  38. graphpop_cli/commands/xpehh.py +31 -0
  39. graphpop_cli/config.py +57 -0
  40. graphpop_cli/connection.py +52 -0
  41. graphpop_cli/formatters.py +81 -0
  42. graphpop_cli-0.1.0.dist-info/METADATA +73 -0
  43. graphpop_cli-0.1.0.dist-info/RECORD +46 -0
  44. graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
  45. graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,30 @@
1
+ """graphpop pop-summary — whole-chromosome population summary statistics."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command("pop-summary")
9
+ @click.argument("chr")
10
+ @click.argument("population")
11
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
12
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
13
+ @click.option("--consequence", help="Filter by VEP consequence type")
14
+ @click.option("--pathway", help="Filter by pathway name")
15
+ @click.option("--gene", help="Filter by gene name")
16
+ @pass_ctx
17
+ def pop_summary(ctx, chr, population, output_path, fmt,
18
+ consequence, pathway, gene):
19
+ """Compute population summary statistics for a chromosome."""
20
+ opts = build_options_map(consequence=consequence, pathway=pathway, gene=gene)
21
+ cypher = build_cypher(
22
+ "graphpop.pop_summary",
23
+ [f"'{chr}'", f"'{population}'"],
24
+ options=opts if opts else None,
25
+ yield_cols=["pi", "theta_w", "tajima_d", "fay_wu_h", "mean_he", "mean_ho",
26
+ "mean_fis", "n_variants", "n_segregating", "n_polarized"],
27
+ )
28
+ records = ctx.run(cypher)
29
+ format_output(records, output_path, fmt, "pop-summary",
30
+ {"chr": chr, "pop": population})
@@ -0,0 +1,15 @@
1
+ """graphpop query — run arbitrary Cypher and format as TSV."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..formatters import format_output
5
+
6
+
7
+ @click.command()
8
+ @click.argument("cypher")
9
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
10
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
11
+ @pass_ctx
12
+ def query(ctx, cypher, output_path, fmt):
13
+ """Run an arbitrary Cypher statement and format the results."""
14
+ records = ctx.run(cypher)
15
+ format_output(records, output_path, fmt, "query", {"cypher": cypher})
@@ -0,0 +1,177 @@
1
+ """graphpop rank-genes — rank genes by composite selection evidence."""
2
+ from __future__ import annotations
3
+
4
+ import click
5
+
6
+ from ..cli import pass_ctx
7
+ from ..formatters import format_output
8
+
9
+
10
+ @click.command("rank-genes")
11
+ @click.option("--pop", "population", required=True, help="Population name")
12
+ @click.option("--pop2", help="Second population (for xpehh)")
13
+ @click.option("--chr", "chromosome", help="Restrict to chromosome")
14
+ @click.option("--top", type=int, default=100, help="Number of top genes (default: 100)")
15
+ @click.option("--sort-by", "sort_by", default="composite",
16
+ type=click.Choice(["composite", "max_abs_ihs", "max_abs_xpehh",
17
+ "max_h12", "mean_fst", "n_high_impact"]),
18
+ help="Sort criterion (default: composite)")
19
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
20
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
21
+ @pass_ctx
22
+ def rank_genes(ctx, population, pop2, chromosome, top, sort_by,
23
+ output_path, fmt):
24
+ """Rank genes by composite selection evidence.
25
+
26
+ For each gene computes:
27
+ - max_abs_ihs: max |iHS| across variants in the gene
28
+ - max_abs_xpehh: max |XP-EHH| (if --pop2 provided)
29
+ - max_h12: max H12 from overlapping GenomicWindow
30
+ - mean_fst: mean Fst from GenomicWindow overlapping the gene
31
+ - n_high_impact: count of HIGH-impact variants
32
+
33
+ Composite score = sum of per-stat percentile ranks (higher = stronger signal).
34
+
35
+ \b
36
+ Examples:
37
+ graphpop rank-genes --pop EUR --top 50 -o top_genes.tsv
38
+ graphpop rank-genes --pop GJ-tmp --pop2 GJ-trop --chr Chr01 --sort-by max_abs_ihs
39
+ graphpop rank-genes --pop EUR --pop2 AFR --sort-by mean_fst --format json
40
+ """
41
+ # Dynamic property names cannot be parameterized — kept as f-strings.
42
+ ihs_prop = f"ihs_{population}"
43
+ xpehh_prop = f"xpehh_{population}_{pop2}" if pop2 else None
44
+
45
+ # Build parameterized chromosome filter strings and params dict.
46
+ chr_filter_v = "AND v.chr = $chromosome" if chromosome else ""
47
+ chr_filter_g = "AND g.chr = $chromosome" if chromosome else ""
48
+
49
+ params: dict = {}
50
+ if chromosome:
51
+ params["chromosome"] = chromosome
52
+
53
+ # --- Query: per-gene iHS, high-impact count ---
54
+ cypher_variant = f"""
55
+ MATCH (v:Variant)-[hc:HAS_CONSEQUENCE]->(g:Gene)
56
+ WHERE v.{ihs_prop} IS NOT NULL {chr_filter_v}
57
+ WITH g,
58
+ MAX(abs(v.{ihs_prop})) AS max_abs_ihs,
59
+ {'MAX(abs(v.' + xpehh_prop + ')) AS max_abs_xpehh,' if xpehh_prop else ''}
60
+ SUM(CASE WHEN hc.impact = 'HIGH' THEN 1 ELSE 0 END) AS n_high_impact,
61
+ COUNT(DISTINCT v) AS n_variants
62
+ RETURN g.symbol AS gene,
63
+ g.geneId AS gene_id,
64
+ g.chr AS chr,
65
+ g.start AS gene_start,
66
+ g.end AS gene_end,
67
+ max_abs_ihs,
68
+ {'max_abs_xpehh,' if xpehh_prop else ''}
69
+ n_high_impact,
70
+ n_variants
71
+ """
72
+ variant_records = ctx.run(cypher_variant, params)
73
+
74
+ if not variant_records:
75
+ # Fallback: try without iHS requirement
76
+ click.echo("No iHS data found; querying genes by annotation only.", err=True)
77
+ cypher_fallback = f"""
78
+ MATCH (v:Variant)-[hc:HAS_CONSEQUENCE]->(g:Gene)
79
+ WHERE TRUE {chr_filter_v}
80
+ WITH g,
81
+ SUM(CASE WHEN hc.impact = 'HIGH' THEN 1 ELSE 0 END) AS n_high_impact,
82
+ COUNT(DISTINCT v) AS n_variants
83
+ RETURN g.symbol AS gene,
84
+ g.geneId AS gene_id,
85
+ g.chr AS chr,
86
+ g.start AS gene_start,
87
+ g.end AS gene_end,
88
+ 0.0 AS max_abs_ihs,
89
+ n_high_impact,
90
+ n_variants
91
+ """
92
+ variant_records = ctx.run(cypher_fallback, params)
93
+
94
+ if not variant_records:
95
+ click.echo("No genes found.", err=True)
96
+ return
97
+
98
+ # Build gene lookup
99
+ gene_data = {}
100
+ for rec in variant_records:
101
+ gene = rec.get("gene") or rec.get("gene_id")
102
+ if not gene:
103
+ continue
104
+ gene_data[gene] = {
105
+ "gene": gene,
106
+ "gene_id": rec.get("gene_id", ""),
107
+ "chr": rec.get("chr", ""),
108
+ "gene_start": rec.get("gene_start", 0),
109
+ "gene_end": rec.get("gene_end", 0),
110
+ "max_abs_ihs": rec.get("max_abs_ihs", 0) or 0,
111
+ "max_abs_xpehh": rec.get("max_abs_xpehh", 0) or 0,
112
+ "n_high_impact": rec.get("n_high_impact", 0) or 0,
113
+ "n_variants": rec.get("n_variants", 0) or 0,
114
+ "max_h12": 0.0,
115
+ "mean_fst": 0.0,
116
+ }
117
+
118
+ # --- Query: window-based stats (H12, Fst) overlapping genes ---
119
+ click.echo("Querying window-based statistics...", err=True)
120
+ window_params: dict = {
121
+ "gene_names": list(gene_data.keys()),
122
+ "population": population,
123
+ }
124
+ if chromosome:
125
+ window_params["chromosome"] = chromosome
126
+
127
+ cypher_windows = f"""
128
+ MATCH (g:Gene)
129
+ WHERE g.symbol IN $gene_names {chr_filter_g}
130
+ MATCH (w:GenomicWindow)
131
+ WHERE w.chr = g.chr AND w.population = $population
132
+ AND w.start <= g.end AND w.end >= g.start
133
+ WITH g, MAX(w.h12) AS max_h12, AVG(w.fst) AS mean_fst
134
+ RETURN g.symbol AS gene, max_h12, mean_fst
135
+ """
136
+ try:
137
+ window_records = ctx.run(cypher_windows, window_params)
138
+ for rec in window_records:
139
+ gene = rec.get("gene")
140
+ if gene in gene_data:
141
+ gene_data[gene]["max_h12"] = rec.get("max_h12", 0) or 0
142
+ gene_data[gene]["mean_fst"] = rec.get("mean_fst", 0) or 0
143
+ except SystemExit:
144
+ click.echo("Warning: could not query GenomicWindow stats.", err=True)
145
+
146
+ # --- Compute composite score (sum of ranks) ---
147
+ genes = list(gene_data.values())
148
+ stat_cols = ["max_abs_ihs", "max_abs_xpehh", "max_h12", "mean_fst", "n_high_impact"]
149
+
150
+ for col in stat_cols:
151
+ vals = sorted(set(g[col] for g in genes))
152
+ rank_map = {v: i for i, v in enumerate(vals)}
153
+ n = max(len(vals) - 1, 1)
154
+ for g in genes:
155
+ g[f"_rank_{col}"] = rank_map[g[col]] / n if n > 0 else 0
156
+
157
+ for g in genes:
158
+ g["composite"] = sum(g[f"_rank_{col}"] for col in stat_cols)
159
+
160
+ # Clean up internal rank columns
161
+ for g in genes:
162
+ for col in stat_cols:
163
+ del g[f"_rank_{col}"]
164
+
165
+ # Sort
166
+ if sort_by == "composite":
167
+ genes.sort(key=lambda g: g["composite"], reverse=True)
168
+ else:
169
+ genes.sort(key=lambda g: g.get(sort_by, 0), reverse=True)
170
+
171
+ # Top N
172
+ genes = genes[:top]
173
+
174
+ click.echo(f"Ranked {len(genes)} genes by {sort_by}.", err=True)
175
+ format_output(genes, output_path, fmt, "rank-genes",
176
+ {"pop": population, "pop2": pop2, "chr": chromosome,
177
+ "sort_by": sort_by, "top": top})
@@ -0,0 +1,264 @@
1
+ """graphpop report -- generate an automated HTML analysis summary report."""
2
+ from __future__ import annotations
3
+
4
+ import click
5
+
6
+ from ..cli import pass_ctx
7
+
8
+
9
+ @click.command()
10
+ @click.option("-o", "--output", "output_path", required=True,
11
+ help="Output HTML file path")
12
+ @click.option("--database", help="Override database name for the report title")
13
+ @pass_ctx
14
+ def report(ctx, output_path, database):
15
+ """Generate a self-contained HTML analysis report.
16
+
17
+ Queries the graph database for dataset overview, per-population diversity,
18
+ pairwise Fst, top selection signals, and annotation summary.
19
+
20
+ \b
21
+ Examples:
22
+ graphpop report -o report.html
23
+ graphpop report --database rice3k -o rice3k_report.html
24
+ """
25
+ db_name = database or ctx.database or "GraphPop"
26
+ click.echo(f"Generating report for database: {db_name} ...", err=True)
27
+
28
+ # ---- 1. Dataset overview ------------------------------------------------
29
+ overview_rows = ctx.run(
30
+ "MATCH (v:Variant) "
31
+ "WITH count(v) AS n_variants "
32
+ "OPTIONAL MATCH (s:Sample) "
33
+ "WITH n_variants, count(DISTINCT s) AS n_samples "
34
+ "OPTIONAL MATCH (g:Gene) "
35
+ "WITH n_variants, n_samples, count(DISTINCT g) AS n_genes "
36
+ "RETURN n_variants, n_samples, n_genes"
37
+ )
38
+ overview = overview_rows[0] if overview_rows else {
39
+ "n_variants": 0, "n_samples": 0, "n_genes": 0,
40
+ }
41
+
42
+ pop_list_rows = ctx.run(
43
+ "MATCH (v:Variant) WHERE v.pop_ids IS NOT NULL "
44
+ "RETURN v.pop_ids AS pids LIMIT 1"
45
+ )
46
+ populations = pop_list_rows[0]["pids"] if pop_list_rows else []
47
+
48
+ chr_rows = ctx.run(
49
+ "MATCH (v:Variant) "
50
+ "RETURN DISTINCT v.chr AS chr ORDER BY chr"
51
+ )
52
+ chromosomes = [r["chr"] for r in chr_rows]
53
+
54
+ # ---- 2. Per-population diversity ----------------------------------------
55
+ diversity_rows = ctx.run(
56
+ "MATCH (w:GenomicWindow) "
57
+ "WHERE w.pi IS NOT NULL "
58
+ "RETURN w.population AS population, "
59
+ " avg(w.pi) AS mean_pi, "
60
+ " avg(w.theta_w) AS mean_theta_w, "
61
+ " avg(w.tajima_d) AS mean_tajima_d "
62
+ "ORDER BY mean_pi DESC"
63
+ )
64
+
65
+ # ---- 3. Pairwise Fst (top 10 pairs) ------------------------------------
66
+ fst_rows = ctx.run(
67
+ "MATCH (w:GenomicWindow) "
68
+ "WHERE w.fst IS NOT NULL AND w.pop_pair IS NOT NULL "
69
+ "RETURN w.pop_pair AS pop_pair, avg(w.fst) AS mean_fst "
70
+ "ORDER BY mean_fst DESC LIMIT 10"
71
+ )
72
+
73
+ # ---- 4. Top selection signals -------------------------------------------
74
+ ihs_rows = ctx.run(
75
+ "MATCH (v:Variant) "
76
+ "WHERE any(k IN keys(v) WHERE k STARTS WITH 'ihs_') "
77
+ "WITH v, [k IN keys(v) WHERE k STARTS WITH 'ihs_'] AS ks "
78
+ "UNWIND ks AS k "
79
+ "WITH v.variantId AS variant, v.chr AS chr, v.pos AS pos, "
80
+ " k AS stat, v[k] AS value "
81
+ "WHERE abs(value) > 2.0 "
82
+ "RETURN variant, chr, pos, stat, value "
83
+ "ORDER BY abs(value) DESC LIMIT 20"
84
+ )
85
+
86
+ xpehh_rows = ctx.run(
87
+ "MATCH (v:Variant) "
88
+ "WHERE any(k IN keys(v) WHERE k STARTS WITH 'xpehh_') "
89
+ "WITH v, [k IN keys(v) WHERE k STARTS WITH 'xpehh_'] AS ks "
90
+ "UNWIND ks AS k "
91
+ "WITH v.variantId AS variant, v.chr AS chr, v.pos AS pos, "
92
+ " k AS stat, v[k] AS value "
93
+ "WHERE abs(value) > 2.0 "
94
+ "RETURN variant, chr, pos, stat, value "
95
+ "ORDER BY abs(value) DESC LIMIT 20"
96
+ )
97
+
98
+ selection_rows = ihs_rows + xpehh_rows
99
+ selection_rows.sort(key=lambda r: abs(r.get("value", 0)), reverse=True)
100
+ selection_rows = selection_rows[:20]
101
+
102
+ # ---- 5. Annotation summary ----------------------------------------------
103
+ annot_rows = ctx.run(
104
+ "OPTIONAL MATCH (g:Gene) "
105
+ "WITH count(DISTINCT g) AS n_genes "
106
+ "OPTIONAL MATCH (pw:Pathway) "
107
+ "WITH n_genes, count(DISTINCT pw) AS n_pathways "
108
+ "OPTIONAL MATCH (go:GOTerm) "
109
+ "RETURN n_genes, n_pathways, count(DISTINCT go) AS n_go_terms"
110
+ )
111
+ annot = annot_rows[0] if annot_rows else {
112
+ "n_genes": 0, "n_pathways": 0, "n_go_terms": 0,
113
+ }
114
+
115
+ # ---- Build HTML ---------------------------------------------------------
116
+ def _table(headers, rows_data):
117
+ """Build an HTML table from headers and list-of-lists."""
118
+ lines = ['<table>', '<tr>' + ''.join(f'<th>{h}</th>' for h in headers) + '</tr>']
119
+ for row in rows_data:
120
+ lines.append('<tr>' + ''.join(f'<td>{_fmt(v)}</td>' for v in row) + '</tr>')
121
+ lines.append('</table>')
122
+ return '\n'.join(lines)
123
+
124
+ def _fmt(v):
125
+ if v is None:
126
+ return "NA"
127
+ if isinstance(v, float):
128
+ return f"{v:.6g}"
129
+ if isinstance(v, list):
130
+ return ", ".join(str(x) for x in v)
131
+ return str(v)
132
+
133
+ # Overview table
134
+ overview_html = _table(
135
+ ["Metric", "Value"],
136
+ [
137
+ ["Variants", overview.get("n_variants", 0)],
138
+ ["Samples", overview.get("n_samples", 0)],
139
+ ["Genes", overview.get("n_genes", 0)],
140
+ ["Populations", len(populations)],
141
+ ["Population IDs", ", ".join(str(p) for p in populations)],
142
+ ["Chromosomes", ", ".join(str(c) for c in chromosomes)],
143
+ ],
144
+ )
145
+
146
+ # Diversity table
147
+ if diversity_rows:
148
+ div_html = _table(
149
+ ["Population", "Mean pi", "Mean theta_W", "Mean Tajima's D"],
150
+ [[r["population"], r["mean_pi"], r["mean_theta_w"], r["mean_tajima_d"]]
151
+ for r in diversity_rows],
152
+ )
153
+ else:
154
+ div_html = "<p>No GenomicWindow diversity data found.</p>"
155
+
156
+ # Fst table
157
+ if fst_rows:
158
+ fst_html = _table(
159
+ ["Population Pair", "Mean Fst"],
160
+ [[r["pop_pair"], r["mean_fst"]] for r in fst_rows],
161
+ )
162
+ else:
163
+ fst_html = "<p>No pairwise Fst data found in GenomicWindow nodes.</p>"
164
+
165
+ # Selection signals table
166
+ if selection_rows:
167
+ sel_html = _table(
168
+ ["Variant", "Chr", "Pos", "Statistic", "Value"],
169
+ [[r["variant"], r["chr"], r["pos"], r["stat"], r["value"]]
170
+ for r in selection_rows],
171
+ )
172
+ else:
173
+ sel_html = "<p>No iHS or XP-EHH signals above threshold found.</p>"
174
+
175
+ # Annotation summary table
176
+ annot_html = _table(
177
+ ["Annotation Type", "Count"],
178
+ [
179
+ ["Genes", annot.get("n_genes", 0)],
180
+ ["Pathways", annot.get("n_pathways", 0)],
181
+ ["GO Terms", annot.get("n_go_terms", 0)],
182
+ ],
183
+ )
184
+
185
+ html = f"""<!DOCTYPE html>
186
+ <html>
187
+ <head>
188
+ <meta charset="utf-8">
189
+ <title>GraphPop Report: {db_name}</title>
190
+ <style>
191
+ body {{
192
+ font-family: Arial, Helvetica, sans-serif;
193
+ max-width: 960px;
194
+ margin: 40px auto;
195
+ padding: 0 20px;
196
+ color: #333;
197
+ line-height: 1.5;
198
+ }}
199
+ h1 {{
200
+ color: #0072B2;
201
+ border-bottom: 2px solid #0072B2;
202
+ padding-bottom: 8px;
203
+ }}
204
+ h2 {{
205
+ color: #555;
206
+ margin-top: 32px;
207
+ }}
208
+ table {{
209
+ border-collapse: collapse;
210
+ width: 100%;
211
+ margin: 12px 0 24px 0;
212
+ font-size: 13px;
213
+ }}
214
+ th, td {{
215
+ border: 1px solid #ddd;
216
+ padding: 6px 10px;
217
+ text-align: left;
218
+ }}
219
+ th {{
220
+ background-color: #0072B2;
221
+ color: white;
222
+ font-weight: 600;
223
+ }}
224
+ tr:nth-child(even) {{
225
+ background-color: #f9f9f9;
226
+ }}
227
+ tr:hover {{
228
+ background-color: #e9f3fb;
229
+ }}
230
+ footer {{
231
+ margin-top: 48px;
232
+ padding-top: 12px;
233
+ border-top: 1px solid #ddd;
234
+ font-size: 11px;
235
+ color: #999;
236
+ }}
237
+ </style>
238
+ </head>
239
+ <body>
240
+ <h1>GraphPop Analysis Report: {db_name}</h1>
241
+
242
+ <h2>Dataset Overview</h2>
243
+ {overview_html}
244
+
245
+ <h2>Population Diversity</h2>
246
+ {div_html}
247
+
248
+ <h2>Pairwise Fst (Top 10)</h2>
249
+ {fst_html}
250
+
251
+ <h2>Top Selection Signals</h2>
252
+ {sel_html}
253
+
254
+ <h2>Annotation Summary</h2>
255
+ {annot_html}
256
+
257
+ <footer>Generated by GraphPop CLI v0.1.0</footer>
258
+ </body>
259
+ </html>
260
+ """
261
+
262
+ with open(output_path, "w") as f:
263
+ f.write(html)
264
+ click.echo(f"Report saved to: {output_path}")
@@ -0,0 +1,30 @@
1
+ """graphpop roh — runs of homozygosity."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command()
9
+ @click.argument("chr")
10
+ @click.argument("population")
11
+ @click.option("--method", type=click.Choice(["hmm", "window"]), default="hmm",
12
+ help="ROH detection method (default: hmm)")
13
+ @click.option("--min-length", type=int, help="Minimum ROH length in bp")
14
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
15
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
16
+ @pass_ctx
17
+ def roh(ctx, chr, population, method, min_length, output_path, fmt):
18
+ """Detect runs of homozygosity (ROH) per sample."""
19
+ opts = build_options_map(method=method, min_length=min_length)
20
+ cypher = build_cypher(
21
+ "graphpop.roh",
22
+ [f"'{chr}'", f"'{population}'"],
23
+ options=opts if opts else None,
24
+ yield_cols=["sampleId", "n_roh", "total_length", "froh",
25
+ "mean_length", "max_length"],
26
+ )
27
+ records = ctx.run(cypher)
28
+ format_output(records, output_path, fmt, "roh",
29
+ {"chr": chr, "pop": population, "method": method,
30
+ "min_length": min_length})