graphpop-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. graphpop_cli/__init__.py +2 -0
  2. graphpop_cli/cli.py +161 -0
  3. graphpop_cli/commands/__init__.py +1 -0
  4. graphpop_cli/commands/aggregate.py +206 -0
  5. graphpop_cli/commands/batch.py +155 -0
  6. graphpop_cli/commands/compare.py +118 -0
  7. graphpop_cli/commands/config_cmd.py +117 -0
  8. graphpop_cli/commands/converge.py +156 -0
  9. graphpop_cli/commands/db.py +188 -0
  10. graphpop_cli/commands/divergence.py +37 -0
  11. graphpop_cli/commands/diversity.py +36 -0
  12. graphpop_cli/commands/dump.py +210 -0
  13. graphpop_cli/commands/export_bed.py +170 -0
  14. graphpop_cli/commands/export_windows.py +91 -0
  15. graphpop_cli/commands/extract.py +271 -0
  16. graphpop_cli/commands/filter_results.py +165 -0
  17. graphpop_cli/commands/garud_h.py +30 -0
  18. graphpop_cli/commands/genome_scan.py +41 -0
  19. graphpop_cli/commands/ihs.py +29 -0
  20. graphpop_cli/commands/import_data.py +266 -0
  21. graphpop_cli/commands/inventory.py +160 -0
  22. graphpop_cli/commands/joint_sfs.py +38 -0
  23. graphpop_cli/commands/ld.py +35 -0
  24. graphpop_cli/commands/lookup.py +207 -0
  25. graphpop_cli/commands/neighbors.py +175 -0
  26. graphpop_cli/commands/nsl.py +29 -0
  27. graphpop_cli/commands/plot.py +1066 -0
  28. graphpop_cli/commands/pop_summary.py +30 -0
  29. graphpop_cli/commands/query.py +15 -0
  30. graphpop_cli/commands/rank_genes.py +177 -0
  31. graphpop_cli/commands/report.py +264 -0
  32. graphpop_cli/commands/roh.py +30 -0
  33. graphpop_cli/commands/run_all.py +276 -0
  34. graphpop_cli/commands/server.py +98 -0
  35. graphpop_cli/commands/setup.py +299 -0
  36. graphpop_cli/commands/sfs.py +38 -0
  37. graphpop_cli/commands/validate.py +167 -0
  38. graphpop_cli/commands/xpehh.py +31 -0
  39. graphpop_cli/config.py +57 -0
  40. graphpop_cli/connection.py +52 -0
  41. graphpop_cli/formatters.py +81 -0
  42. graphpop_cli-0.1.0.dist-info/METADATA +73 -0
  43. graphpop_cli-0.1.0.dist-info/RECORD +46 -0
  44. graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
  45. graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,271 @@
1
+ """graphpop extract — extract variants, samples, and genotypes from the graph."""
2
+ from __future__ import annotations
3
+
4
+ import click
5
+
6
+ from ..cli import pass_ctx
7
+ from ..formatters import format_output
8
+
9
+
10
+ @click.group()
11
+ def extract():
12
+ """Extract data from the graph: variants, samples, or genotypes.
13
+
14
+ \b
15
+ Subcommands:
16
+ variants Query Variant nodes with flexible filters
17
+ samples Query Sample nodes for a population
18
+ genotypes Extract sample x variant dosage matrix for a region
19
+
20
+ \b
21
+ Examples:
22
+ graphpop extract variants --chr chr22 --pop EUR --consequence missense_variant -o variants.tsv
23
+ graphpop extract samples --pop EUR -o samples.tsv
24
+ graphpop extract genotypes --chr chr22 --start 16000000 --end 17000000 --pop EUR -o geno.tsv
25
+ """
26
+ pass
27
+
28
+
29
+ @extract.command("variants")
30
+ @click.option("--chr", "chromosome", help="Chromosome filter")
31
+ @click.option("--start", type=int, help="Region start position")
32
+ @click.option("--end", type=int, help="Region end position")
33
+ @click.option("--pop", "population", help="Population name (for AF lookup)")
34
+ @click.option("--min-af", type=float, help="Minimum allele frequency")
35
+ @click.option("--max-af", type=float, help="Maximum allele frequency")
36
+ @click.option("--consequence", help="VEP consequence type (e.g., missense_variant)")
37
+ @click.option("--pathway", help="Pathway name (substring match)")
38
+ @click.option("--gene", help="Gene symbol or ID")
39
+ @click.option("--fields", default="variantId,pos,ref,alt,af",
40
+ help="Comma-separated fields to return (default: variantId,pos,ref,alt,af)")
41
+ @click.option("--limit", type=int, default=10000, help="Maximum rows (default: 10000)")
42
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
43
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
44
+ @pass_ctx
45
+ def extract_variants(ctx, chromosome, start, end, population, min_af, max_af,
46
+ consequence, pathway, gene, fields, limit, output_path, fmt):
47
+ """Query Variant nodes with optional filters.
48
+
49
+ Builds Cypher dynamically based on provided filters. Use --fields to
50
+ select which variant properties to return.
51
+
52
+ \b
53
+ Examples:
54
+ graphpop extract variants --chr chr22 --pop EUR --consequence missense_variant -o variants.tsv
55
+ graphpop extract variants --chr chr22 --start 16000000 --end 17000000 --fields pos,ref,alt,af,fst,ihs -o region.tsv
56
+ graphpop extract variants --gene KCNE1 --pop EUR -o kcne1_variants.tsv
57
+ """
58
+ field_list = [f.strip() for f in fields.split(",")]
59
+ params = {}
60
+
61
+ # Build MATCH clause with optional annotation joins
62
+ match_clause = "MATCH (v:Variant)"
63
+ where_parts = []
64
+
65
+ if consequence:
66
+ match_clause += "-[:HAS_CONSEQUENCE]->(hc)"
67
+ where_parts.append("hc.consequence = $consequence")
68
+ params["consequence"] = consequence
69
+ if gene:
70
+ if "-[:HAS_CONSEQUENCE]->" not in match_clause:
71
+ match_clause += "-[:HAS_CONSEQUENCE]->(g:Gene)"
72
+ else:
73
+ match_clause = "MATCH (v:Variant)-[:HAS_CONSEQUENCE]->(g:Gene)"
74
+ if consequence:
75
+ match_clause = "MATCH (v:Variant)-[hc_rel:HAS_CONSEQUENCE]->(g:Gene)"
76
+ where_parts = [p for p in where_parts if "hc.consequence" not in p]
77
+ where_parts.append("hc_rel.consequence = $consequence")
78
+ where_parts.append("(g.symbol = $gene OR g.geneId = $gene)")
79
+ params["gene"] = gene
80
+ if pathway:
81
+ if "Gene" not in match_clause:
82
+ match_clause += "-[:HAS_CONSEQUENCE]->(g:Gene)-[:IN_PATHWAY]->(pw:Pathway)"
83
+ else:
84
+ match_clause += "-[:IN_PATHWAY]->(pw:Pathway)"
85
+ where_parts.append("pw.name CONTAINS $pathway")
86
+ params["pathway"] = pathway
87
+
88
+ if chromosome:
89
+ where_parts.append("v.chr = $chromosome")
90
+ params["chromosome"] = chromosome
91
+ if start is not None:
92
+ where_parts.append("v.pos >= $start")
93
+ params["start"] = start
94
+ if end is not None:
95
+ where_parts.append("v.pos <= $end")
96
+ params["end"] = end
97
+
98
+ # AF filtering: if population is given, look up index in pop_ids array
99
+ if population and (min_af is not None or max_af is not None):
100
+ params["population"] = population
101
+ if min_af is not None:
102
+ where_parts.append(
103
+ "ANY(i IN range(0, size(v.pop_ids)-1) "
104
+ "WHERE v.pop_ids[i] = $population AND v.af[i] >= $min_af)"
105
+ )
106
+ params["min_af"] = min_af
107
+ if max_af is not None:
108
+ where_parts.append(
109
+ "ANY(i IN range(0, size(v.pop_ids)-1) "
110
+ "WHERE v.pop_ids[i] = $population AND v.af[i] <= $max_af)"
111
+ )
112
+ params["max_af"] = max_af
113
+
114
+ # Build RETURN clause from requested fields
115
+ return_cols = []
116
+ for f in field_list:
117
+ if f == "af" and population:
118
+ if "population" not in params:
119
+ params["population"] = population
120
+ return_cols.append(
121
+ "[i IN range(0, size(v.pop_ids)-1) "
122
+ "WHERE v.pop_ids[i] = $population | v.af[i]][0] AS af_" + population
123
+ )
124
+ elif f in ("gene", "gene_symbol") and "Gene" in match_clause:
125
+ return_cols.append("g.symbol AS gene")
126
+ elif f == "consequence" and consequence:
127
+ col = "hc.consequence" if "hc)" in match_clause else "hc_rel.consequence"
128
+ return_cols.append(f"{col} AS consequence")
129
+ else:
130
+ return_cols.append(f"v.{f} AS {f}")
131
+
132
+ params["limit"] = limit
133
+ where_str = " AND ".join(where_parts) if where_parts else "true"
134
+ cypher = (
135
+ f"{match_clause} "
136
+ f"WHERE {where_str} "
137
+ f"RETURN DISTINCT {', '.join(return_cols)} "
138
+ f"ORDER BY v.pos LIMIT $limit"
139
+ )
140
+
141
+ records = ctx.run(cypher, params)
142
+ if not records:
143
+ click.echo("No variants found with given filters.", err=True)
144
+ return
145
+
146
+ click.echo(f"Found {len(records)} variants.", err=True)
147
+ format_output(records, output_path, fmt, "extract variants",
148
+ {"chr": chromosome, "start": start, "end": end,
149
+ "pop": population, "consequence": consequence,
150
+ "pathway": pathway, "gene": gene})
151
+
152
+
153
+ @extract.command("samples")
154
+ @click.option("--pop", "population", required=True, help="Population name")
155
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
156
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
157
+ @pass_ctx
158
+ def extract_samples(ctx, population, output_path, fmt):
159
+ """Query Sample nodes for a population.
160
+
161
+ Returns sampleId, population, and packed_index for each sample. If
162
+ population-level summary stats (e.g., FROH) are available, they are
163
+ included.
164
+
165
+ \b
166
+ Examples:
167
+ graphpop extract samples --pop EUR -o samples.tsv
168
+ graphpop extract samples --pop GJ-tmp --format json
169
+ """
170
+ cypher = """
171
+ MATCH (s:Sample)
172
+ WHERE s.population = $population
173
+ OPTIONAL MATCH (p:Population {name: $population})
174
+ RETURN s.sampleId AS sampleId,
175
+ s.population AS population,
176
+ s.packed_index AS packed_index,
177
+ s.froh AS froh,
178
+ p.n_samples AS pop_n_samples,
179
+ p.mean_froh AS pop_mean_froh
180
+ ORDER BY s.packed_index
181
+ """
182
+ records = ctx.run(cypher, {"population": population})
183
+ if not records:
184
+ click.echo(f"No samples found for population '{population}'.", err=True)
185
+ return
186
+
187
+ click.echo(f"Found {len(records)} samples for {population}.", err=True)
188
+ format_output(records, output_path, fmt, "extract samples",
189
+ {"population": population})
190
+
191
+
192
+ @extract.command("genotypes")
193
+ @click.option("--chr", "chromosome", required=True, help="Chromosome")
194
+ @click.option("--start", type=int, required=True, help="Region start position")
195
+ @click.option("--end", type=int, required=True, help="Region end position")
196
+ @click.option("--pop", "population", required=True, help="Population name")
197
+ @click.option("--format-gt", "gt_format", default="dosage",
198
+ type=click.Choice(["dosage", "gt", "raw"]),
199
+ help="Output format: dosage (0/1/2), gt (0/0, 0/1, 1/1), or raw (hex gt_packed)")
200
+ @click.option("--limit", type=int, default=1000, help="Maximum variants (default: 1000)")
201
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
202
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
203
+ @pass_ctx
204
+ def extract_genotypes(ctx, chromosome, start, end, population, gt_format,
205
+ limit, output_path, fmt):
206
+ """Extract genotype data for a region and population.
207
+
208
+ Queries CARRIES edges between Sample and Variant nodes to build a
209
+ sample x variant matrix. For large regions, consider using --limit
210
+ to cap the number of variants.
211
+
212
+ Note: gt_packed decoding requires bit operations. With --format-gt raw,
213
+ the raw gt_packed byte array is returned as a hex string per variant.
214
+ With dosage or gt mode, individual CARRIES edges are queried instead.
215
+
216
+ \b
217
+ Examples:
218
+ graphpop extract genotypes --chr chr22 --start 16000000 --end 17000000 --pop EUR -o geno.tsv
219
+ graphpop extract genotypes --chr chr22 --start 16000000 --end 17000000 --pop EUR --format-gt raw -o geno_raw.tsv
220
+ """
221
+ params = {
222
+ "chromosome": chromosome,
223
+ "start": start,
224
+ "end": end,
225
+ "population": population,
226
+ "limit": limit,
227
+ }
228
+
229
+ if gt_format == "raw":
230
+ # Return per-variant summary with raw gt_packed as hex
231
+ cypher = (
232
+ "MATCH (v:Variant) "
233
+ "WHERE v.chr = $chromosome AND v.pos >= $start AND v.pos <= $end "
234
+ "RETURN v.variantId AS variantId, v.pos AS pos, v.ref AS ref, v.alt AS alt, "
235
+ "v.gt_packed AS gt_packed_hex, "
236
+ "[i IN range(0, size(v.pop_ids)-1) "
237
+ "WHERE v.pop_ids[i] = $population | v.af[i]][0] AS af "
238
+ "ORDER BY v.pos LIMIT $limit"
239
+ )
240
+ records = ctx.run(cypher, params)
241
+ if not records:
242
+ click.echo("No variants found in region.", err=True)
243
+ return
244
+ click.echo(f"Found {len(records)} variants (raw gt_packed mode).", err=True)
245
+ format_output(records, output_path, fmt, "extract genotypes",
246
+ {"chr": chromosome, "start": start, "end": end,
247
+ "pop": population, "format": gt_format})
248
+ else:
249
+ # Query CARRIES edges for individual genotypes
250
+ gt_label = "c.gt" if gt_format == "dosage" else (
251
+ "CASE c.gt WHEN 1 THEN '0/1' WHEN 2 THEN '1/1' ELSE '0/0' END"
252
+ )
253
+ params["carries_limit"] = limit * 100
254
+ cypher = (
255
+ "MATCH (s:Sample)-[c:CARRIES]->(v:Variant) "
256
+ "WHERE s.population = $population "
257
+ "AND v.chr = $chromosome AND v.pos >= $start AND v.pos <= $end "
258
+ "RETURN s.sampleId AS sampleId, v.variantId AS variantId, "
259
+ f"v.pos AS pos, {gt_label} AS genotype "
260
+ "ORDER BY v.pos, s.sampleId "
261
+ "LIMIT $carries_limit"
262
+ )
263
+ records = ctx.run(cypher, params)
264
+ if not records:
265
+ click.echo("No genotype data found. CARRIES edges may not exist "
266
+ "for this region/population.", err=True)
267
+ return
268
+ click.echo(f"Found {len(records)} genotype entries.", err=True)
269
+ format_output(records, output_path, fmt, "extract genotypes",
270
+ {"chr": chromosome, "start": start, "end": end,
271
+ "pop": population, "format": gt_format})
@@ -0,0 +1,165 @@
1
+ """graphpop filter — query persisted results with annotation filters."""
2
+ from __future__ import annotations
3
+
4
+ import click
5
+
6
+ from ..cli import pass_ctx
7
+ from ..formatters import format_output
8
+
9
+
10
+ @click.command("filter")
11
+ @click.argument("statistic", type=click.Choice([
12
+ "ihs", "xpehh", "nsl", "fst", "pi", "tajima_d", "h12",
13
+ ]))
14
+ @click.argument("chr")
15
+ @click.argument("population")
16
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
17
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
18
+ @click.option("--consequence", help="Filter by VEP consequence type (e.g., missense_variant)")
19
+ @click.option("--pathway", help="Filter by pathway name")
20
+ @click.option("--gene", help="Filter by gene name or ID")
21
+ @click.option("--min-score", type=float, help="Minimum absolute score")
22
+ @click.option("--max-score", type=float, help="Maximum absolute score")
23
+ @click.option("--pop2", help="Second population (for xpehh)")
24
+ @click.option("--limit", type=int, default=10000, help="Maximum rows (default: 10000)")
25
+ @pass_ctx
26
+ def filter_results(ctx, statistic, chr, population, output_path, fmt,
27
+ consequence, pathway, gene, min_score, max_score, pop2, limit):
28
+ """Query persisted statistics with annotation-based filters.
29
+
30
+ This command retrieves already-computed statistics (iHS, XP-EHH, nSL, etc.)
31
+ from graph nodes and filters them by functional annotation. It is the
32
+ recommended way to perform conditioned analysis for haplotype-based
33
+ statistics, which must be computed genome-wide first and then filtered.
34
+
35
+ \b
36
+ Workflow:
37
+ 1. Compute statistics: graphpop ihs chr1 EUR --persist
38
+ 2. Filter by annotation: graphpop filter ihs chr1 EUR --consequence missense_variant
39
+
40
+ \b
41
+ Examples:
42
+ graphpop filter ihs chr1 EUR --consequence missense_variant -o ihs_missense.tsv
43
+ graphpop filter xpehh chr1 EUR --pop2 AFR --pathway "Cardiac repolarization"
44
+ graphpop filter nsl chr1 GJ-tmp --gene GW5 --min-score 2.0
45
+ graphpop filter h12 chr1 GJ-tmp --consequence missense_variant
46
+ """
47
+ # Build the property name for this statistic
48
+ if statistic == "xpehh" and pop2:
49
+ prop = f"xpehh_{population}_{pop2}"
50
+ prop_unstd = f"xpehh_unstd_{population}_{pop2}"
51
+ elif statistic == "xpehh":
52
+ # Try to find any xpehh property
53
+ prop = f"xpehh_{population}_*"
54
+ click.echo("Warning: --pop2 not specified; will search for any XP-EHH involving this population.", err=True)
55
+ prop = None
56
+ elif statistic in ("ihs", "nsl"):
57
+ prop = f"{statistic}_{population}"
58
+ prop_unstd = f"{statistic}_unstd_{population}"
59
+ elif statistic in ("fst", "pi", "tajima_d", "h12"):
60
+ prop = statistic
61
+ prop_unstd = None
62
+ else:
63
+ prop = statistic
64
+ prop_unstd = None
65
+
66
+ # Build Cypher query
67
+ params: dict = {"chr": chr, "population": population, "limit": limit}
68
+
69
+ if statistic in ("ihs", "xpehh", "nsl"):
70
+ # Per-variant statistics stored on Variant nodes
71
+ match_clause = "MATCH (v:Variant)"
72
+ where_parts = ["v.chr = $chr"]
73
+ if prop:
74
+ where_parts.append(f"v.{prop} IS NOT NULL")
75
+
76
+ # Annotation join
77
+ if consequence:
78
+ match_clause += "-[:HAS_CONSEQUENCE]->(hc)"
79
+ where_parts.append("hc.consequence = $consequence")
80
+ params["consequence"] = consequence
81
+ if pathway:
82
+ match_clause += "-[:HAS_CONSEQUENCE]->(:Gene)-[:IN_PATHWAY]->(pw:Pathway)"
83
+ where_parts.append("pw.name CONTAINS $pathway")
84
+ params["pathway"] = pathway
85
+ if gene:
86
+ match_clause += "-[:HAS_CONSEQUENCE]->(g:Gene)"
87
+ where_parts.append("(g.geneId = $gene OR g.symbol = $gene)")
88
+ params["gene"] = gene
89
+
90
+ if min_score is not None and prop:
91
+ where_parts.append(f"abs(v.{prop}) >= {min_score}")
92
+ if max_score is not None and prop:
93
+ where_parts.append(f"abs(v.{prop}) <= {max_score}")
94
+
95
+ return_cols = [
96
+ "v.variantId AS variant_id",
97
+ "v.pos AS pos",
98
+ ]
99
+ if prop:
100
+ return_cols.append(f"v.{prop} AS {statistic}")
101
+ if prop_unstd:
102
+ return_cols.append(f"v.{prop_unstd} AS {statistic}_unstd")
103
+ if consequence:
104
+ return_cols.append("hc.consequence AS consequence")
105
+ return_cols.append("hc.impact AS impact")
106
+ if gene:
107
+ return_cols.append("g.symbol AS gene")
108
+
109
+ cypher = (
110
+ f"{match_clause} "
111
+ f"WHERE {' AND '.join(where_parts)} "
112
+ f"RETURN DISTINCT {', '.join(return_cols)} "
113
+ "ORDER BY v.pos LIMIT $limit"
114
+ )
115
+
116
+ elif statistic == "h12":
117
+ # Garud's H stored on GenomicWindow nodes
118
+ match_clause = "MATCH (w:GenomicWindow)"
119
+ where_parts = [
120
+ "w.chr = $chr",
121
+ "w.population = $population",
122
+ ]
123
+ if min_score is not None:
124
+ where_parts.append(f"w.h12 >= {min_score}")
125
+
126
+ cypher = (
127
+ f"{match_clause} "
128
+ f"WHERE {' AND '.join(where_parts)} "
129
+ "RETURN w.windowId AS window_id, w.chr AS chr, "
130
+ "w.start AS start, w.end AS end, "
131
+ "w.h12 AS h12, w.h2_h1 AS h2_h1, w.hap_diversity AS hap_div "
132
+ "ORDER BY w.h12 DESC LIMIT $limit"
133
+ )
134
+
135
+ else:
136
+ # Window-level statistics (fst, pi, tajima_d)
137
+ match_clause = "MATCH (w:GenomicWindow)"
138
+ where_parts = [
139
+ "w.chr = $chr",
140
+ "w.population = $population",
141
+ ]
142
+ if min_score is not None:
143
+ where_parts.append(f"w.{prop} >= {min_score}")
144
+ if max_score is not None:
145
+ where_parts.append(f"w.{prop} <= {max_score}")
146
+
147
+ cypher = (
148
+ f"{match_clause} "
149
+ f"WHERE {' AND '.join(where_parts)} "
150
+ f"RETURN w.windowId AS window_id, w.start AS start, w.end AS end, "
151
+ f"w.{prop} AS {statistic}, w.n_variants AS n_variants "
152
+ "ORDER BY w.start LIMIT $limit"
153
+ )
154
+
155
+ records = ctx.run(cypher, params)
156
+
157
+ if not records:
158
+ click.echo(f"No results found for {statistic} on {chr}/{population} "
159
+ f"with given filters.", err=True)
160
+ return
161
+
162
+ click.echo(f"Found {len(records)} records.", err=True)
163
+ format_output(records, output_path, fmt, "filter",
164
+ {"statistic": statistic, "chr": chr, "population": population,
165
+ "consequence": consequence, "pathway": pathway, "gene": gene})
@@ -0,0 +1,30 @@
1
+ """graphpop garud-h — Garud's H statistics for haplotype homozygosity."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command("garud-h")
9
+ @click.argument("chr")
10
+ @click.argument("population")
11
+ @click.argument("window_size", type=int)
12
+ @click.argument("step_size", type=int)
13
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
14
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
15
+ @click.option("--min-af", type=float, help="Minimum allele frequency")
16
+ @pass_ctx
17
+ def garud_h(ctx, chr, population, window_size, step_size, output_path, fmt, min_af):
18
+ """Compute Garud's H1, H12, H2/H1 in sliding windows."""
19
+ opts = build_options_map(min_af=min_af)
20
+ cypher = build_cypher(
21
+ "graphpop.garud_h",
22
+ [f"'{chr}'", f"'{population}'", str(window_size), str(step_size)],
23
+ options=opts if opts else None,
24
+ yield_cols=["chr", "start", "end", "population", "h1", "h12", "h2_h1",
25
+ "hap_diversity", "n_haplotypes", "n_variants"],
26
+ )
27
+ records = ctx.run(cypher)
28
+ format_output(records, output_path, fmt, "garud-h",
29
+ {"chr": chr, "pop": population, "window": window_size,
30
+ "step": step_size})
@@ -0,0 +1,41 @@
1
+ """graphpop genome-scan — sliding-window genome scan."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command("genome-scan")
9
+ @click.argument("chr")
10
+ @click.argument("population")
11
+ @click.argument("window_size", type=int)
12
+ @click.argument("step_size", type=int)
13
+ @click.option("--pop2", help="Second population for Fst/Dxy/PBS")
14
+ @click.option("--persist", is_flag=True, default=False,
15
+ help="Persist window results to graph (default behavior)")
16
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
17
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
18
+ @click.option("--consequence", help="Filter by VEP consequence type")
19
+ @click.option("--pathway", help="Filter by pathway name")
20
+ @click.option("--gene", help="Filter by gene name")
21
+ @click.option("--min-af", type=float, help="Minimum allele frequency")
22
+ @pass_ctx
23
+ def genome_scan(ctx, chr, population, window_size, step_size, pop2, persist,
24
+ output_path, fmt, consequence, pathway, gene, min_af):
25
+ """Run a sliding-window genome scan (pi, theta, Tajima's D, Fst, etc.)."""
26
+ opts = build_options_map(consequence=consequence, pathway=pathway, gene=gene,
27
+ min_af=min_af)
28
+ positional = [f"'{chr}'", f"'{population}'", str(window_size), str(step_size)]
29
+ if pop2:
30
+ positional.append(f"'{pop2}'")
31
+ cypher = build_cypher(
32
+ "graphpop.genome_scan", positional,
33
+ options=opts if opts else None,
34
+ yield_cols=["window_id", "chr", "start", "end", "population",
35
+ "n_variants", "n_segregating", "pi", "theta_w", "tajima_d",
36
+ "fst", "fst_wc", "dxy", "pbs", "fay_wu_h"],
37
+ )
38
+ records = ctx.run(cypher)
39
+ format_output(records, output_path, fmt, "genome-scan",
40
+ {"chr": chr, "pop": population, "window": window_size,
41
+ "step": step_size, "pop2": pop2})
@@ -0,0 +1,29 @@
1
+ """graphpop ihs — integrated haplotype score."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command()
9
+ @click.argument("chr")
10
+ @click.argument("population")
11
+ @click.option("--min-af", type=float, help="Minimum allele frequency filter")
12
+ @click.option("--persist", is_flag=True, default=False,
13
+ help="Write iHS scores to Variant nodes")
14
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
15
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
16
+ @pass_ctx
17
+ def ihs(ctx, chr, population, min_af, persist, output_path, fmt):
18
+ """Compute integrated haplotype score (iHS) across a chromosome."""
19
+ opts = build_options_map(min_af=min_af, persist=persist)
20
+ cypher = build_cypher(
21
+ "graphpop.ihs",
22
+ [f"'{chr}'", f"'{population}'"],
23
+ options=opts if opts else None,
24
+ yield_cols=["variantId", "pos", "af", "ihs_unstd", "ihs"],
25
+ )
26
+ records = ctx.run(cypher)
27
+ format_output(records, output_path, fmt, "ihs",
28
+ {"chr": chr, "pop": population, "min_af": min_af,
29
+ "persist": persist})