graphpop-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. graphpop_cli/__init__.py +2 -0
  2. graphpop_cli/cli.py +161 -0
  3. graphpop_cli/commands/__init__.py +1 -0
  4. graphpop_cli/commands/aggregate.py +206 -0
  5. graphpop_cli/commands/batch.py +155 -0
  6. graphpop_cli/commands/compare.py +118 -0
  7. graphpop_cli/commands/config_cmd.py +117 -0
  8. graphpop_cli/commands/converge.py +156 -0
  9. graphpop_cli/commands/db.py +188 -0
  10. graphpop_cli/commands/divergence.py +37 -0
  11. graphpop_cli/commands/diversity.py +36 -0
  12. graphpop_cli/commands/dump.py +210 -0
  13. graphpop_cli/commands/export_bed.py +170 -0
  14. graphpop_cli/commands/export_windows.py +91 -0
  15. graphpop_cli/commands/extract.py +271 -0
  16. graphpop_cli/commands/filter_results.py +165 -0
  17. graphpop_cli/commands/garud_h.py +30 -0
  18. graphpop_cli/commands/genome_scan.py +41 -0
  19. graphpop_cli/commands/ihs.py +29 -0
  20. graphpop_cli/commands/import_data.py +266 -0
  21. graphpop_cli/commands/inventory.py +160 -0
  22. graphpop_cli/commands/joint_sfs.py +38 -0
  23. graphpop_cli/commands/ld.py +35 -0
  24. graphpop_cli/commands/lookup.py +207 -0
  25. graphpop_cli/commands/neighbors.py +175 -0
  26. graphpop_cli/commands/nsl.py +29 -0
  27. graphpop_cli/commands/plot.py +1066 -0
  28. graphpop_cli/commands/pop_summary.py +30 -0
  29. graphpop_cli/commands/query.py +15 -0
  30. graphpop_cli/commands/rank_genes.py +177 -0
  31. graphpop_cli/commands/report.py +264 -0
  32. graphpop_cli/commands/roh.py +30 -0
  33. graphpop_cli/commands/run_all.py +276 -0
  34. graphpop_cli/commands/server.py +98 -0
  35. graphpop_cli/commands/setup.py +299 -0
  36. graphpop_cli/commands/sfs.py +38 -0
  37. graphpop_cli/commands/validate.py +167 -0
  38. graphpop_cli/commands/xpehh.py +31 -0
  39. graphpop_cli/config.py +57 -0
  40. graphpop_cli/connection.py +52 -0
  41. graphpop_cli/formatters.py +81 -0
  42. graphpop_cli-0.1.0.dist-info/METADATA +73 -0
  43. graphpop_cli-0.1.0.dist-info/RECORD +46 -0
  44. graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
  45. graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,266 @@
1
+ """graphpop import — import VCF data into a Neo4j graph database."""
2
+ from __future__ import annotations
3
+
4
+ import subprocess
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import click
9
+ import yaml
10
+
11
+
12
+ def _get_neo4j_home() -> Path:
13
+ """Get Neo4j home from config."""
14
+ config_path = Path.home() / ".graphpop" / "config.yaml"
15
+ if config_path.exists():
16
+ with open(config_path) as f:
17
+ cfg = yaml.safe_load(f) or {}
18
+ if "neo4j_home" in cfg:
19
+ return Path(cfg["neo4j_home"])
20
+ return Path.home() / "neo4j"
21
+
22
+
23
+ @click.command("import")
24
+ @click.option("--vcf", required=True, type=click.Path(exists=True),
25
+ help="Input VCF file (bgzipped recommended)")
26
+ @click.option("--panel", required=True, type=click.Path(exists=True),
27
+ help="Population panel file (TSV: sample_id, population)")
28
+ @click.option("--database", required=True,
29
+ help="Name for the Neo4j database")
30
+ @click.option("--vep", type=click.Path(exists=True),
31
+ help="VEP/SnpEff annotation file")
32
+ @click.option("--pathways", type=click.Path(exists=True),
33
+ help="Reactome/Plant Reactome pathway file")
34
+ @click.option("--go-terms", type=click.Path(exists=True),
35
+ help="GO term annotation file (UniProt GOA format)")
36
+ @click.option("--ancestral", type=click.Path(exists=True),
37
+ help="Ancestral allele FASTA (Ensembl EPO)")
38
+ @click.option("--csv-dir", type=click.Path(),
39
+ help="Directory for intermediate CSV files (default: temp)")
40
+ @click.option("--neo4j-home", type=click.Path(),
41
+ help="Neo4j installation directory")
42
+ @click.option("--threads", type=int, default=4,
43
+ help="Import threads (default: 4)")
44
+ @click.option("--skip-csv", is_flag=True,
45
+ help="Skip CSV generation (reuse existing CSVs)")
46
+ @click.option("--skip-import", is_flag=True,
47
+ help="Skip neo4j-admin import (CSVs only)")
48
+ @click.option("--skip-annotations", is_flag=True,
49
+ help="Skip annotation loading")
50
+ def import_data(vcf, panel, database, vep, pathways, go_terms, ancestral,
51
+ csv_dir, neo4j_home, threads, skip_csv, skip_import,
52
+ skip_annotations):
53
+ """Import VCF data into a Neo4j graph database.
54
+
55
+ This command orchestrates the full import pipeline:
56
+
57
+ \b
58
+ 1. Parse VCF + panel → generate CSV files (Variant, Sample, Population, etc.)
59
+ 2. Run neo4j-admin database import to bulk-load CSVs
60
+ 3. Load functional annotations (VEP, pathways, GO terms, ancestral alleles)
61
+
62
+ The database name is user-specified and stored in the GraphPop config.
63
+
64
+ \b
65
+ Examples:
66
+ graphpop import --vcf data.vcf.gz --panel panel.txt --database myproject
67
+ graphpop import --vcf rice.vcf.gz --panel rice_panel.txt \\
68
+ --database rice3k --vep rice_vep.vcf --pathways plant_reactome.tsv
69
+ """
70
+ neo4j_path = Path(neo4j_home) if neo4j_home else _get_neo4j_home()
71
+ csv_path = Path(csv_dir) if csv_dir else Path(f"/tmp/graphpop_csv_{database}")
72
+ csv_path.mkdir(parents=True, exist_ok=True)
73
+
74
+ click.echo(f"GraphPop Import Pipeline")
75
+ click.echo(f" VCF: {vcf}")
76
+ click.echo(f" Panel: {panel}")
77
+ click.echo(f" Database: {database}")
78
+ click.echo(f" Neo4j: {neo4j_path}")
79
+ click.echo(f" CSV dir: {csv_path}")
80
+ click.echo()
81
+
82
+ # Step 1: Generate CSVs
83
+ if not skip_csv:
84
+ click.echo("Step 1/3: Generating CSV files from VCF...")
85
+ _run_csv_generation(vcf, panel, csv_path, threads)
86
+ else:
87
+ click.echo("Step 1/3: Skipping CSV generation (--skip-csv)")
88
+
89
+ # Step 2: neo4j-admin import
90
+ if not skip_import:
91
+ click.echo("\nStep 2/3: Running neo4j-admin bulk import...")
92
+ _run_bulk_import(neo4j_path, csv_path, database)
93
+ else:
94
+ click.echo("\nStep 2/3: Skipping bulk import (--skip-import)")
95
+
96
+ # Step 3: Load annotations
97
+ if not skip_annotations:
98
+ click.echo("\nStep 3/3: Loading annotations...")
99
+ _load_annotations(neo4j_path, database, vep, pathways, go_terms, ancestral)
100
+ else:
101
+ click.echo("\nStep 3/3: Skipping annotations (--skip-annotations)")
102
+
103
+ # Update config with new database
104
+ _update_config(database)
105
+
106
+ click.echo(f"""
107
+ Import complete!
108
+
109
+ Database: {database}
110
+ GraphPop config updated to use database '{database}'.
111
+
112
+ Next steps:
113
+ graphpop start # Start Neo4j (if not running)
114
+ graphpop db info # Verify node/edge counts
115
+ graphpop diversity chr1 1 50000000 YOUR_POP # Run first analysis
116
+ graphpop run-all --database {database} -d results/ # Full-genome analysis
117
+ """)
118
+
119
+
120
+ def _run_csv_generation(vcf: str, panel: str, csv_dir: Path, threads: int):
121
+ """Run the graphpop-import CSV generation."""
122
+ try:
123
+ import importlib
124
+ # Try importing graphpop_import directly
125
+ spec = importlib.util.find_spec("graphpop_import")
126
+ if spec:
127
+ click.echo(" Using graphpop-import Python package...")
128
+ from graphpop_import.vcf_parser import VCFParser
129
+ from graphpop_import.csv_emitter import CSVEmitter
130
+ parser = VCFParser(vcf, panel)
131
+ emitter = CSVEmitter(str(csv_dir))
132
+ parser.parse(emitter)
133
+ click.echo(f" CSVs written to {csv_dir}")
134
+ return
135
+ except ImportError:
136
+ pass
137
+
138
+ # Fallback: run as subprocess
139
+ click.echo(" Running graphpop-import as subprocess...")
140
+ scripts = [
141
+ Path("graphpop-import/src/graphpop_import/vcf_parser.py"),
142
+ Path("scripts/rice_csv_parallel.py"),
143
+ ]
144
+ for script in scripts:
145
+ if script.exists():
146
+ result = subprocess.run(
147
+ [sys.executable, str(script),
148
+ "--vcf", vcf, "--panel", panel, "--output", str(csv_dir),
149
+ "--threads", str(threads)],
150
+ capture_output=True, text=True,
151
+ )
152
+ if result.returncode == 0:
153
+ click.echo(f" CSVs written to {csv_dir}")
154
+ return
155
+ else:
156
+ click.echo(f" Warning: {result.stderr[:200]}", err=True)
157
+
158
+ click.echo(
159
+ " Error: graphpop-import not found.\n"
160
+ " Install with: pip install -e graphpop-import/\n"
161
+ " Or generate CSVs manually and use --skip-csv",
162
+ err=True,
163
+ )
164
+ raise SystemExit(1)
165
+
166
+
167
+ def _run_bulk_import(neo4j_home: Path, csv_dir: Path, database: str):
168
+ """Run neo4j-admin database import."""
169
+ admin_bin = neo4j_home / "bin" / "neo4j-admin"
170
+ if not admin_bin.exists():
171
+ click.echo(f" Error: neo4j-admin not found at {admin_bin}", err=True)
172
+ raise SystemExit(1)
173
+
174
+ # Check if database already exists
175
+ db_dir = neo4j_home / "data" / "databases" / database
176
+ if db_dir.exists():
177
+ if not click.confirm(f" Database '{database}' already exists. Overwrite?"):
178
+ click.echo(" Import cancelled.")
179
+ raise SystemExit(0)
180
+
181
+ # Build neo4j-admin import command
182
+ cmd = [
183
+ str(admin_bin), "database", "import", "full",
184
+ f"--nodes=Variant={csv_dir}/variant_header.csv,{csv_dir}/variants_*.csv",
185
+ f"--nodes=Sample={csv_dir}/sample_header.csv,{csv_dir}/samples.csv",
186
+ f"--nodes=Population={csv_dir}/population_header.csv,{csv_dir}/populations.csv",
187
+ f"--nodes=Chromosome={csv_dir}/chromosome_header.csv,{csv_dir}/chromosomes.csv",
188
+ f"--relationships=NEXT={csv_dir}/next_header.csv,{csv_dir}/next_*.csv",
189
+ f"--relationships=ON_CHROMOSOME={csv_dir}/on_chromosome_header.csv,{csv_dir}/on_chromosome_*.csv",
190
+ f"--relationships=IN_POPULATION={csv_dir}/in_population_header.csv,{csv_dir}/in_population.csv",
191
+ "--overwrite-destination=true",
192
+ database,
193
+ ]
194
+
195
+ click.echo(f" Running: neo4j-admin database import {database}")
196
+ result = subprocess.run(cmd, capture_output=True, text=True)
197
+ if result.returncode != 0:
198
+ click.echo(f" Import failed: {result.stderr[:500]}", err=True)
199
+ click.echo(" You may need to stop Neo4j first: graphpop stop", err=True)
200
+ raise SystemExit(1)
201
+ click.echo(" Bulk import complete.")
202
+
203
+
204
+ def _load_annotations(neo4j_home: Path, database: str,
205
+ vep: str | None, pathways: str | None,
206
+ go_terms: str | None, ancestral: str | None):
207
+ """Load functional annotations via Cypher transactions."""
208
+ if not any([vep, pathways, go_terms, ancestral]):
209
+ click.echo(" No annotations specified, skipping.")
210
+ return
211
+
212
+ # Load annotations by running the appropriate Python scripts
213
+ scripts_dir = Path("scripts")
214
+ annotation_scripts = []
215
+
216
+ if vep:
217
+ click.echo(f" Loading VEP annotations from {vep}...")
218
+ annotation_scripts.append(("load_annotations", ["--vep", vep]))
219
+
220
+ if pathways:
221
+ click.echo(f" Loading pathway annotations from {pathways}...")
222
+ annotation_scripts.append(("load_annotations", ["--pathways", pathways]))
223
+
224
+ if go_terms:
225
+ click.echo(f" Loading GO term annotations from {go_terms}...")
226
+ annotation_scripts.append(("load_annotations", ["--go", go_terms]))
227
+
228
+ if ancestral:
229
+ click.echo(f" Loading ancestral alleles from {ancestral}...")
230
+ annotation_scripts.append(("load_annotations", ["--ancestral", ancestral]))
231
+
232
+ for script_name, args in annotation_scripts:
233
+ # Try to find the annotation loading script
234
+ candidates = [
235
+ scripts_dir / f"{script_name}.py",
236
+ scripts_dir / "load_rice_annotations.py",
237
+ Path(f"graphpop-import/src/graphpop_import/{script_name}.py"),
238
+ ]
239
+ for script in candidates:
240
+ if script.exists():
241
+ result = subprocess.run(
242
+ [sys.executable, str(script), "--database", database] + args,
243
+ capture_output=True, text=True,
244
+ )
245
+ if result.returncode == 0:
246
+ click.echo(f" Loaded: {script_name}")
247
+ break
248
+ else:
249
+ click.echo(f" Warning: {result.stderr[:200]}", err=True)
250
+ else:
251
+ click.echo(f" Annotation script not found for: {script_name}")
252
+ click.echo(" You can load annotations manually after import.")
253
+
254
+
255
+ def _update_config(database: str):
256
+ """Update GraphPop config to use the new database."""
257
+ config_path = Path.home() / ".graphpop" / "config.yaml"
258
+ cfg = {}
259
+ if config_path.exists():
260
+ with open(config_path) as f:
261
+ cfg = yaml.safe_load(f) or {}
262
+ cfg["database"] = database
263
+ config_path.parent.mkdir(exist_ok=True)
264
+ with open(config_path, "w") as f:
265
+ yaml.dump(cfg, f, default_flow_style=False)
266
+ click.echo(f" Config updated: database = {database}")
@@ -0,0 +1,160 @@
1
+ """graphpop inventory — comprehensive database inventory."""
2
+ from __future__ import annotations
3
+
4
+ import click
5
+
6
+ from ..cli import pass_ctx
7
+ from ..formatters import format_output
8
+
9
+
10
+ @click.command("inventory")
11
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
12
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
13
+ @pass_ctx
14
+ def inventory(ctx, output_path, fmt):
15
+ """Show comprehensive database inventory.
16
+
17
+ Reports node/relationship counts, populations, chromosomes, loaded
18
+ annotations, and persisted statistics. No arguments needed.
19
+
20
+ \b
21
+ Examples:
22
+ graphpop inventory
23
+ graphpop inventory --format json -o db_inventory.json
24
+ """
25
+ sections = []
26
+
27
+ # --- 1. Node label counts ---
28
+ click.echo("Querying node counts...", err=True)
29
+ labels = ["Variant", "Sample", "Population", "Gene", "Pathway",
30
+ "GOTerm", "GenomicWindow"]
31
+ for label in labels:
32
+ recs = ctx.run(f"MATCH (n:{label}) RETURN count(n) AS count")
33
+ count = recs[0]["count"] if recs else 0
34
+ sections.append({"section": "nodes", "item": label, "value": str(count)})
35
+
36
+ # --- 2. Relationship type counts ---
37
+ click.echo("Querying relationship counts...", err=True)
38
+ rel_types = ["CARRIES", "HAS_CONSEQUENCE", "IN_PATHWAY", "HAS_GO_TERM",
39
+ "NEXT", "LD", "BELONGS_TO"]
40
+ for rel in rel_types:
41
+ recs = ctx.run(f"MATCH ()-[r:{rel}]->() RETURN count(r) AS count")
42
+ count = recs[0]["count"] if recs else 0
43
+ sections.append({"section": "relationships", "item": rel, "value": str(count)})
44
+
45
+ # --- 3. Populations and sample counts ---
46
+ click.echo("Querying populations...", err=True)
47
+ recs = ctx.run(
48
+ "MATCH (p:Population) "
49
+ "OPTIONAL MATCH (s:Sample)-[:BELONGS_TO]->(p) "
50
+ "RETURN p.popId AS population, count(s) AS sample_count "
51
+ "ORDER BY p.popId"
52
+ )
53
+ for rec in recs:
54
+ sections.append({
55
+ "section": "populations",
56
+ "item": rec["population"],
57
+ "value": str(rec["sample_count"]),
58
+ })
59
+
60
+ # --- 4. Chromosomes and variant counts ---
61
+ click.echo("Querying chromosomes...", err=True)
62
+ recs = ctx.run(
63
+ "MATCH (v:Variant) "
64
+ "RETURN v.chr AS chr, count(v) AS variant_count "
65
+ "ORDER BY v.chr"
66
+ )
67
+ for rec in recs:
68
+ sections.append({
69
+ "section": "chromosomes",
70
+ "item": rec["chr"],
71
+ "value": str(rec["variant_count"]),
72
+ })
73
+
74
+ # --- 5. Annotation coverage ---
75
+ click.echo("Querying annotations...", err=True)
76
+ # HAS_CONSEQUENCE edges
77
+ recs = ctx.run("MATCH ()-[r:HAS_CONSEQUENCE]->() RETURN count(r) AS count")
78
+ has_conseq = recs[0]["count"] if recs else 0
79
+ sections.append({"section": "annotations", "item": "HAS_CONSEQUENCE edges",
80
+ "value": str(has_conseq)})
81
+
82
+ # IN_PATHWAY edges
83
+ recs = ctx.run("MATCH ()-[r:IN_PATHWAY]->() RETURN count(r) AS count")
84
+ has_pw = recs[0]["count"] if recs else 0
85
+ sections.append({"section": "annotations", "item": "IN_PATHWAY edges",
86
+ "value": str(has_pw)})
87
+
88
+ # HAS_GO_TERM edges
89
+ recs = ctx.run("MATCH ()-[r:HAS_GO_TERM]->() RETURN count(r) AS count")
90
+ has_go = recs[0]["count"] if recs else 0
91
+ sections.append({"section": "annotations", "item": "HAS_GO_TERM edges",
92
+ "value": str(has_go)})
93
+
94
+ # Ancestral allele coverage
95
+ recs = ctx.run(
96
+ "MATCH (v:Variant) WHERE v.ancestral_allele IS NOT NULL "
97
+ "RETURN count(v) AS count"
98
+ )
99
+ aa_count = recs[0]["count"] if recs else 0
100
+ sections.append({"section": "annotations", "item": "variants_with_ancestral_allele",
101
+ "value": str(aa_count)})
102
+
103
+ # --- 6. Persisted statistics ---
104
+ click.echo("Querying persisted statistics...", err=True)
105
+ # Check for ihs/xpehh/nsl properties on Variant nodes (sample a few)
106
+ for stat_prefix in ["ihs_", "xpehh_", "nsl_"]:
107
+ recs = ctx.run(
108
+ "MATCH (v:Variant) "
109
+ "WITH v LIMIT 1 "
110
+ "UNWIND keys(v) AS k "
111
+ "WITH k WHERE k STARTS WITH $stat_prefix "
112
+ "RETURN COLLECT(DISTINCT k) AS props",
113
+ {"stat_prefix": stat_prefix},
114
+ )
115
+ props = recs[0]["props"] if recs and recs[0]["props"] else []
116
+ if props:
117
+ for p in props:
118
+ sections.append({"section": "persisted_stats", "item": p,
119
+ "value": "on Variant nodes"})
120
+ else:
121
+ sections.append({"section": "persisted_stats",
122
+ "item": f"{stat_prefix}*",
123
+ "value": "none found"})
124
+
125
+ # GenomicWindow statistics
126
+ recs = ctx.run("MATCH (w:GenomicWindow) RETURN count(w) AS count")
127
+ gw_count = recs[0]["count"] if recs else 0
128
+ sections.append({"section": "persisted_stats", "item": "GenomicWindow count",
129
+ "value": str(gw_count)})
130
+
131
+ # Fst properties on GenomicWindow
132
+ recs = ctx.run(
133
+ "MATCH (w:GenomicWindow) "
134
+ "WITH w LIMIT 1 "
135
+ "UNWIND keys(w) AS k "
136
+ "WITH k WHERE k STARTS WITH 'fst_' "
137
+ "RETURN COLLECT(DISTINCT k) AS props"
138
+ )
139
+ fst_props = recs[0]["props"] if recs and recs[0]["props"] else []
140
+ for p in fst_props:
141
+ sections.append({"section": "persisted_stats", "item": p,
142
+ "value": "on GenomicWindow nodes"})
143
+
144
+ # --- Print summary ---
145
+ if fmt == "tsv" and not output_path:
146
+ _print_inventory(sections)
147
+ else:
148
+ format_output(sections, output_path, fmt, "inventory", {})
149
+
150
+
151
+ def _print_inventory(sections: list[dict]):
152
+ """Pretty-print inventory to stderr/stdout."""
153
+ current_section = None
154
+ for row in sections:
155
+ sec = row["section"]
156
+ if sec != current_section:
157
+ current_section = sec
158
+ click.echo(f"\n=== {sec.upper().replace('_', ' ')} ===")
159
+ click.echo(f" {row['item']:40s} {row['value']}")
160
+ click.echo()
@@ -0,0 +1,38 @@
1
+ """graphpop joint-sfs — joint site frequency spectrum between two populations."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command("joint-sfs")
9
+ @click.argument("chr")
10
+ @click.argument("start", type=int)
11
+ @click.argument("end", type=int)
12
+ @click.argument("pop1")
13
+ @click.argument("pop2")
14
+ @click.option("--unfolded", is_flag=True, default=False,
15
+ help="Compute unfolded joint SFS (requires ancestral allele)")
16
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
17
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
18
+ @click.option("--consequence", help="Filter by VEP consequence type")
19
+ @click.option("--pathway", help="Filter by pathway name")
20
+ @click.option("--gene", help="Filter by gene name")
21
+ @click.option("--min-af", type=float, help="Minimum allele frequency")
22
+ @pass_ctx
23
+ def joint_sfs(ctx, chr, start, end, pop1, pop2, unfolded, output_path, fmt,
24
+ consequence, pathway, gene, min_af):
25
+ """Compute the joint site frequency spectrum between two populations."""
26
+ opts = build_options_map(consequence=consequence, pathway=pathway, gene=gene,
27
+ min_af=min_af)
28
+ cypher = build_cypher(
29
+ "graphpop.joint_sfs",
30
+ [f"'{chr}'", str(start), str(end), f"'{pop1}'", f"'{pop2}'",
31
+ "true" if unfolded else "false"],
32
+ options=opts if opts else None,
33
+ yield_cols=["joint_sfs", "n_variants", "max_ac1", "max_ac2", "dim1", "dim2"],
34
+ )
35
+ records = ctx.run(cypher)
36
+ format_output(records, output_path, fmt, "joint-sfs",
37
+ {"chr": chr, "start": start, "end": end,
38
+ "pop1": pop1, "pop2": pop2, "unfolded": unfolded})
@@ -0,0 +1,35 @@
1
+ """graphpop ld — linkage disequilibrium (r2, D')."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command()
9
+ @click.argument("chr")
10
+ @click.argument("start", type=int)
11
+ @click.argument("end", type=int)
12
+ @click.argument("population")
13
+ @click.argument("max_dist", type=int)
14
+ @click.argument("threshold", type=float)
15
+ @click.option("--persist", is_flag=True, default=False,
16
+ help="Write LD edges to the graph")
17
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
18
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
19
+ @click.option("--min-af", type=float, help="Minimum allele frequency")
20
+ @pass_ctx
21
+ def ld(ctx, chr, start, end, population, max_dist, threshold, persist,
22
+ output_path, fmt, min_af):
23
+ """Compute pairwise linkage disequilibrium (r2 and D')."""
24
+ opts = build_options_map(min_af=min_af, write_edges=persist)
25
+ cypher = build_cypher(
26
+ "graphpop.ld",
27
+ [f"'{chr}'", str(start), str(end), f"'{population}'",
28
+ str(max_dist), str(threshold)],
29
+ options=opts if opts else None,
30
+ yield_cols=["variant1", "variant2", "r2", "dprime", "distance"],
31
+ )
32
+ records = ctx.run(cypher)
33
+ format_output(records, output_path, fmt, "ld",
34
+ {"chr": chr, "start": start, "end": end, "pop": population,
35
+ "max_dist": max_dist, "threshold": threshold, "persist": persist})