graphpop-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. graphpop_cli/__init__.py +2 -0
  2. graphpop_cli/cli.py +161 -0
  3. graphpop_cli/commands/__init__.py +1 -0
  4. graphpop_cli/commands/aggregate.py +206 -0
  5. graphpop_cli/commands/batch.py +155 -0
  6. graphpop_cli/commands/compare.py +118 -0
  7. graphpop_cli/commands/config_cmd.py +117 -0
  8. graphpop_cli/commands/converge.py +156 -0
  9. graphpop_cli/commands/db.py +188 -0
  10. graphpop_cli/commands/divergence.py +37 -0
  11. graphpop_cli/commands/diversity.py +36 -0
  12. graphpop_cli/commands/dump.py +210 -0
  13. graphpop_cli/commands/export_bed.py +170 -0
  14. graphpop_cli/commands/export_windows.py +91 -0
  15. graphpop_cli/commands/extract.py +271 -0
  16. graphpop_cli/commands/filter_results.py +165 -0
  17. graphpop_cli/commands/garud_h.py +30 -0
  18. graphpop_cli/commands/genome_scan.py +41 -0
  19. graphpop_cli/commands/ihs.py +29 -0
  20. graphpop_cli/commands/import_data.py +266 -0
  21. graphpop_cli/commands/inventory.py +160 -0
  22. graphpop_cli/commands/joint_sfs.py +38 -0
  23. graphpop_cli/commands/ld.py +35 -0
  24. graphpop_cli/commands/lookup.py +207 -0
  25. graphpop_cli/commands/neighbors.py +175 -0
  26. graphpop_cli/commands/nsl.py +29 -0
  27. graphpop_cli/commands/plot.py +1066 -0
  28. graphpop_cli/commands/pop_summary.py +30 -0
  29. graphpop_cli/commands/query.py +15 -0
  30. graphpop_cli/commands/rank_genes.py +177 -0
  31. graphpop_cli/commands/report.py +264 -0
  32. graphpop_cli/commands/roh.py +30 -0
  33. graphpop_cli/commands/run_all.py +276 -0
  34. graphpop_cli/commands/server.py +98 -0
  35. graphpop_cli/commands/setup.py +299 -0
  36. graphpop_cli/commands/sfs.py +38 -0
  37. graphpop_cli/commands/validate.py +167 -0
  38. graphpop_cli/commands/xpehh.py +31 -0
  39. graphpop_cli/config.py +57 -0
  40. graphpop_cli/connection.py +52 -0
  41. graphpop_cli/formatters.py +81 -0
  42. graphpop_cli-0.1.0.dist-info/METADATA +73 -0
  43. graphpop_cli-0.1.0.dist-info/RECORD +46 -0
  44. graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
  45. graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,117 @@
1
+ """graphpop config — manage GraphPop configuration."""
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+ import click
7
+ import yaml
8
+
9
+
10
+ CONFIG_PATH = Path.home() / ".graphpop" / "config.yaml"
11
+
12
+ DEFAULTS = {
13
+ "uri": "bolt://localhost:7687",
14
+ "user": "neo4j",
15
+ "password": "",
16
+ "database": "neo4j",
17
+ "neo4j_home": str(Path.home() / "neo4j"),
18
+ }
19
+
20
+
21
+ @click.group()
22
+ def config():
23
+ """Manage GraphPop configuration."""
24
+ pass
25
+
26
+
27
+ @config.command()
28
+ def init():
29
+ """Create a new GraphPop config file interactively."""
30
+ if CONFIG_PATH.exists():
31
+ if not click.confirm(f"{CONFIG_PATH} already exists. Overwrite?"):
32
+ return
33
+
34
+ cfg = {}
35
+ cfg["uri"] = click.prompt("Neo4j URI", default=DEFAULTS["uri"])
36
+ cfg["user"] = click.prompt("Neo4j user", default=DEFAULTS["user"])
37
+ cfg["password"] = click.prompt("Neo4j password", hide_input=True)
38
+ cfg["database"] = click.prompt("Default database", default=DEFAULTS["database"])
39
+ cfg["neo4j_home"] = click.prompt("Neo4j home directory",
40
+ default=DEFAULTS["neo4j_home"])
41
+
42
+ CONFIG_PATH.parent.mkdir(exist_ok=True)
43
+ with open(CONFIG_PATH, "w") as f:
44
+ yaml.dump(cfg, f, default_flow_style=False)
45
+ CONFIG_PATH.chmod(0o600) # Restrict permissions (contains password)
46
+
47
+ click.echo(f"\nConfig written to {CONFIG_PATH}")
48
+ click.echo("Permissions set to owner-only (600).")
49
+
50
+
51
+ @config.command()
52
+ def show():
53
+ """Display the current configuration."""
54
+ if not CONFIG_PATH.exists():
55
+ click.echo(f"No config file found at {CONFIG_PATH}")
56
+ click.echo("Run 'graphpop config init' to create one.")
57
+ return
58
+
59
+ with open(CONFIG_PATH) as f:
60
+ cfg = yaml.safe_load(f) or {}
61
+
62
+ click.echo(f"Config: {CONFIG_PATH}\n")
63
+ for key, value in cfg.items():
64
+ if key == "password":
65
+ display = "****" if value else "(not set)"
66
+ else:
67
+ display = value
68
+ click.echo(f" {key}: {display}")
69
+
70
+ # Show env var overrides
71
+ import os
72
+ overrides = []
73
+ if os.environ.get("GRAPHPOP_URI"):
74
+ overrides.append(f" GRAPHPOP_URI={os.environ['GRAPHPOP_URI']}")
75
+ if os.environ.get("GRAPHPOP_USER"):
76
+ overrides.append(f" GRAPHPOP_USER={os.environ['GRAPHPOP_USER']}")
77
+ if os.environ.get("GRAPHPOP_PASSWORD"):
78
+ overrides.append(" GRAPHPOP_PASSWORD=****")
79
+ if os.environ.get("GRAPHPOP_DATABASE"):
80
+ overrides.append(f" GRAPHPOP_DATABASE={os.environ['GRAPHPOP_DATABASE']}")
81
+ if overrides:
82
+ click.echo("\nEnvironment overrides:")
83
+ for o in overrides:
84
+ click.echo(o)
85
+
86
+
87
+ @config.command()
88
+ @click.argument("key")
89
+ @click.argument("value")
90
+ def set(key, value):
91
+ """Set a configuration value.
92
+
93
+ \b
94
+ Examples:
95
+ graphpop config set database rice3k
96
+ graphpop config set pagecache 20g
97
+ graphpop config set neo4j_home /opt/neo4j
98
+ """
99
+ cfg = {}
100
+ if CONFIG_PATH.exists():
101
+ with open(CONFIG_PATH) as f:
102
+ cfg = yaml.safe_load(f) or {}
103
+
104
+ old = cfg.get(key, "(not set)")
105
+ cfg[key] = value
106
+
107
+ CONFIG_PATH.parent.mkdir(exist_ok=True)
108
+ with open(CONFIG_PATH, "w") as f:
109
+ yaml.dump(cfg, f, default_flow_style=False)
110
+
111
+ click.echo(f"{key}: {old} → {value}")
112
+
113
+
114
+ @config.command()
115
+ def path():
116
+ """Print the config file path."""
117
+ click.echo(str(CONFIG_PATH))
@@ -0,0 +1,156 @@
1
+ """graphpop converge — find regions where multiple selection statistics converge."""
2
+ from __future__ import annotations
3
+
4
+ import click
5
+
6
+ from ..cli import pass_ctx
7
+ from ..formatters import format_output
8
+
9
+
10
+ # Statistics stored on Variant nodes vs GenomicWindow nodes
11
+ VARIANT_STATS = {"ihs", "xpehh", "nsl"}
12
+ WINDOW_STATS = {"h12", "fst", "pi", "tajima_d"}
13
+
14
+
15
+ @click.command("converge")
16
+ @click.option("--stats", required=True,
17
+ help="Comma-separated statistic names (ihs, xpehh, nsl, h12, fst, pi, tajima_d)")
18
+ @click.option("--thresholds", required=True,
19
+ help="Comma-separated threshold values (matched positionally to stats)")
20
+ @click.option("--chr", "chromosome", help="Chromosome (optional, all if not specified)")
21
+ @click.option("--pop", "population", required=True, help="Population name")
22
+ @click.option("--pop2", help="Second population (for xpehh, fst)")
23
+ @click.option("--window", type=int, default=0,
24
+ help="Aggregate into windows of this size (default: per-variant)")
25
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
26
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
27
+ @click.option("--limit", type=int, default=10000, help="Maximum rows (default: 10000)")
28
+ @pass_ctx
29
+ def converge(ctx, stats, thresholds, chromosome, population, pop2,
30
+ window, output_path, fmt, limit):
31
+ """Find genomic regions where multiple statistics exceed thresholds.
32
+
33
+ Identifies convergent selection signals by requiring multiple persisted
34
+ statistics to simultaneously exceed user-defined thresholds.
35
+
36
+ \b
37
+ For variant-based stats (ihs, xpehh, nsl): queries Variant nodes.
38
+ For window-based stats (h12, fst, pi, tajima_d): queries GenomicWindow nodes.
39
+ If mixed, runs two queries and merges results by position.
40
+
41
+ \b
42
+ Examples:
43
+ graphpop converge --stats ihs,xpehh --thresholds 2.0,2.0 --pop EUR --pop2 AFR
44
+ graphpop converge --stats ihs,nsl --thresholds 2.0,2.0 --chr chr22 --pop EUR -o conv.tsv
45
+ graphpop converge --stats h12,fst --thresholds 0.3,0.5 --pop GJ-tmp --window 100000
46
+ graphpop converge --stats ihs,xpehh,h12,fst --thresholds 2.0,2.0,0.3,0.5 --pop EUR --pop2 AFR
47
+ """
48
+ stat_list = [s.strip() for s in stats.split(",")]
49
+ thresh_list = [float(t.strip()) for t in thresholds.split(",")]
50
+
51
+ if len(stat_list) != len(thresh_list):
52
+ click.echo("Error: --stats and --thresholds must have the same number of items.", err=True)
53
+ raise SystemExit(1)
54
+
55
+ requested_variant = [s for s in stat_list if s in VARIANT_STATS]
56
+ requested_window = [s for s in stat_list if s in WINDOW_STATS]
57
+ unknown = [s for s in stat_list if s not in VARIANT_STATS and s not in WINDOW_STATS]
58
+ if unknown:
59
+ click.echo(f"Warning: unknown statistics ignored: {unknown}", err=True)
60
+
61
+ stat_thresh = dict(zip(stat_list, thresh_list))
62
+
63
+ results = []
64
+
65
+ # --- Query variant-based stats ---
66
+ if requested_variant:
67
+ where_parts = []
68
+ params: dict = {"limit": limit}
69
+ if chromosome:
70
+ where_parts.append("v.chr = $chromosome")
71
+ params["chromosome"] = chromosome
72
+
73
+ return_cols = [
74
+ "v.variantId AS variant_id",
75
+ "v.chr AS chr",
76
+ "v.pos AS pos",
77
+ ]
78
+
79
+ for stat in requested_variant:
80
+ prop = _prop_name(stat, population, pop2)
81
+ if prop is None:
82
+ click.echo(f"Warning: skipping {stat} (need --pop2 for xpehh)", err=True)
83
+ continue
84
+ thresh = stat_thresh[stat]
85
+ where_parts.append(f"abs(v.{prop}) >= {thresh}")
86
+ return_cols.append(f"v.{prop} AS {stat}")
87
+
88
+ # Join to gene annotation
89
+ cypher = (
90
+ f"MATCH (v:Variant) "
91
+ f"WHERE {' AND '.join(where_parts)} "
92
+ f"OPTIONAL MATCH (v)-[:HAS_CONSEQUENCE]->(g:Gene) "
93
+ f"RETURN DISTINCT {', '.join(return_cols)}, "
94
+ f"g.symbol AS gene "
95
+ f"ORDER BY v.pos LIMIT $limit"
96
+ )
97
+ variant_records = ctx.run(cypher, params)
98
+ results.extend(variant_records)
99
+
100
+ # --- Query window-based stats ---
101
+ if requested_window:
102
+ where_parts = ["w.population = $population"]
103
+ params = {"population": population, "limit": limit}
104
+ if chromosome:
105
+ where_parts.append("w.chr = $chromosome")
106
+ params["chromosome"] = chromosome
107
+
108
+ return_cols = [
109
+ "w.windowId AS window_id",
110
+ "w.chr AS chr",
111
+ "w.start AS start",
112
+ "w.end AS end",
113
+ ]
114
+
115
+ for stat in requested_window:
116
+ prop = stat
117
+ thresh = stat_thresh[stat]
118
+ if stat == "fst" and pop2:
119
+ prop = f"fst_{population}_{pop2}"
120
+ if stat in ("h12",):
121
+ where_parts.append(f"w.{prop} >= {thresh}")
122
+ elif stat in ("tajima_d",):
123
+ # Tajima's D: extreme negative = selection
124
+ where_parts.append(f"w.{prop} <= -{thresh}")
125
+ else:
126
+ where_parts.append(f"w.{prop} >= {thresh}")
127
+ return_cols.append(f"w.{prop} AS {stat}")
128
+
129
+ cypher = (
130
+ f"MATCH (w:GenomicWindow) "
131
+ f"WHERE {' AND '.join(where_parts)} "
132
+ f"RETURN {', '.join(return_cols)} "
133
+ f"ORDER BY w.start LIMIT $limit"
134
+ )
135
+ window_records = ctx.run(cypher, params)
136
+ results.extend(window_records)
137
+
138
+ if not results:
139
+ click.echo("No convergent signals found with the given thresholds.", err=True)
140
+ return
141
+
142
+ click.echo(f"Found {len(results)} convergent records.", err=True)
143
+ format_output(results, output_path, fmt, "converge",
144
+ {"stats": stats, "thresholds": thresholds,
145
+ "chr": chromosome, "pop": population, "pop2": pop2})
146
+
147
+
148
+ def _prop_name(stat: str, population: str, pop2: str | None) -> str | None:
149
+ """Build the Neo4j property name for a given statistic."""
150
+ if stat in ("ihs", "nsl"):
151
+ return f"{stat}_{population}"
152
+ elif stat == "xpehh":
153
+ if not pop2:
154
+ return None
155
+ return f"xpehh_{population}_{pop2}"
156
+ return stat
@@ -0,0 +1,188 @@
1
+ """graphpop db — database management (list, create, switch, drop, info)."""
2
+ from __future__ import annotations
3
+
4
+ from pathlib import Path
5
+
6
+ import click
7
+ import yaml
8
+
9
+ from ..cli import pass_ctx
10
+
11
+
12
+ @click.group()
13
+ def db():
14
+ """Manage Neo4j databases for GraphPop."""
15
+ pass
16
+
17
+
18
+ @db.command()
19
+ @pass_ctx
20
+ def list(ctx):
21
+ """List all databases with sizes and status."""
22
+ cypher = "SHOW DATABASES YIELD name, currentStatus, sizeOnDisk ORDER BY name"
23
+ try:
24
+ records = ctx.run(cypher)
25
+ except Exception:
26
+ # Fallback for Neo4j Community (SHOW DATABASES may not return sizeOnDisk)
27
+ try:
28
+ records = ctx.run("SHOW DATABASES YIELD name, currentStatus ORDER BY name")
29
+ except Exception as e:
30
+ click.echo(f"Error: {e}", err=True)
31
+ raise SystemExit(1)
32
+
33
+ if not records:
34
+ click.echo("No databases found.")
35
+ return
36
+
37
+ # Show current active database
38
+ config_path = Path.home() / ".graphpop" / "config.yaml"
39
+ active_db = "neo4j"
40
+ if config_path.exists():
41
+ with open(config_path) as f:
42
+ cfg = yaml.safe_load(f) or {}
43
+ active_db = cfg.get("database", "neo4j")
44
+
45
+ click.echo(f"{'Database':<25} {'Status':<12} {'Size':<15} {'Active'}")
46
+ click.echo("-" * 60)
47
+ for rec in records:
48
+ name = rec.get("name", "?")
49
+ status = rec.get("currentStatus", "?")
50
+ size = rec.get("sizeOnDisk", "")
51
+ if isinstance(size, (int, float)) and size > 0:
52
+ size = _format_size(size)
53
+ active = " *" if name == active_db else ""
54
+ click.echo(f"{name:<25} {status:<12} {str(size):<15}{active}")
55
+
56
+
57
+ @db.command()
58
+ @click.argument("name")
59
+ @pass_ctx
60
+ def create(ctx, name):
61
+ """Create a new database."""
62
+ click.echo(f"Creating database '{name}'...")
63
+ try:
64
+ # Must run against system database
65
+ from neo4j import GraphDatabase
66
+ driver = GraphDatabase.driver(ctx.cfg["uri"],
67
+ auth=(ctx.cfg["user"], ctx.cfg["password"]))
68
+ with driver.session(database="system") as session:
69
+ session.run(f"CREATE DATABASE `{name}` IF NOT EXISTS")
70
+ driver.close()
71
+ click.echo(f"Database '{name}' created.")
72
+ click.echo(f"Switch to it with: graphpop db switch {name}")
73
+ except Exception as e:
74
+ if "Unsupported" in str(e) or "Enterprise" in str(e):
75
+ click.echo(
76
+ "Error: CREATE DATABASE requires Neo4j Enterprise Edition.\n"
77
+ "With Community Edition, use 'neo4j' as the default database\n"
78
+ "or create databases via neo4j-admin.",
79
+ err=True,
80
+ )
81
+ else:
82
+ click.echo(f"Error: {e}", err=True)
83
+ raise SystemExit(1)
84
+
85
+
86
+ @db.command()
87
+ @click.argument("name")
88
+ def switch(name):
89
+ """Set the active database in GraphPop config."""
90
+ config_path = Path.home() / ".graphpop" / "config.yaml"
91
+ cfg = {}
92
+ if config_path.exists():
93
+ with open(config_path) as f:
94
+ cfg = yaml.safe_load(f) or {}
95
+ old = cfg.get("database", "neo4j")
96
+ cfg["database"] = name
97
+ config_path.parent.mkdir(exist_ok=True)
98
+ with open(config_path, "w") as f:
99
+ yaml.dump(cfg, f, default_flow_style=False)
100
+ click.echo(f"Active database: {old} → {name}")
101
+ click.echo(f"All graphpop commands will now use database '{name}'.")
102
+
103
+
104
+ @db.command()
105
+ @click.argument("name")
106
+ @click.option("--force", is_flag=True, help="Skip confirmation prompt")
107
+ @pass_ctx
108
+ def drop(ctx, name, force):
109
+ """Drop a database (requires confirmation)."""
110
+ if name in ("neo4j", "system"):
111
+ click.echo(f"Error: Cannot drop the '{name}' system database.", err=True)
112
+ raise SystemExit(1)
113
+
114
+ if not force:
115
+ click.confirm(f"Drop database '{name}'? This cannot be undone", abort=True)
116
+
117
+ try:
118
+ from neo4j import GraphDatabase
119
+ driver = GraphDatabase.driver(ctx.cfg["uri"],
120
+ auth=(ctx.cfg["user"], ctx.cfg["password"]))
121
+ with driver.session(database="system") as session:
122
+ session.run(f"DROP DATABASE `{name}` IF EXISTS")
123
+ driver.close()
124
+ click.echo(f"Database '{name}' dropped.")
125
+ except Exception as e:
126
+ if "Unsupported" in str(e) or "Enterprise" in str(e):
127
+ click.echo(
128
+ "Error: DROP DATABASE requires Neo4j Enterprise Edition.\n"
129
+ "With Community Edition, use neo4j-admin to manage databases.",
130
+ err=True,
131
+ )
132
+ else:
133
+ click.echo(f"Error: {e}", err=True)
134
+ raise SystemExit(1)
135
+
136
+
137
+ @db.command()
138
+ @pass_ctx
139
+ def info(ctx):
140
+ """Show detailed information about the current database."""
141
+ click.echo(f"Database: {ctx.database}\n")
142
+
143
+ # Node counts
144
+ try:
145
+ records = ctx.run(
146
+ "CALL db.labels() YIELD label "
147
+ "CALL { WITH label MATCH (n) WHERE label IN labels(n) "
148
+ "RETURN count(n) AS cnt } RETURN label, cnt ORDER BY cnt DESC"
149
+ )
150
+ if records:
151
+ click.echo("Node counts:")
152
+ for rec in records:
153
+ click.echo(f" {rec['label']:<20} {rec['cnt']:>12,}")
154
+
155
+ # Relationship counts
156
+ records = ctx.run(
157
+ "CALL db.relationshipTypes() YIELD relationshipType AS type "
158
+ "CALL { WITH type MATCH ()-[r]->() WHERE type(r) = type "
159
+ "RETURN count(r) AS cnt } RETURN type, cnt ORDER BY cnt DESC"
160
+ )
161
+ if records:
162
+ click.echo("\nRelationship counts:")
163
+ for rec in records:
164
+ click.echo(f" {rec['type']:<25} {rec['cnt']:>12,}")
165
+
166
+ # Check GraphPop procedures
167
+ records = ctx.run(
168
+ "SHOW PROCEDURES YIELD name WHERE name STARTS WITH 'graphpop' "
169
+ "RETURN name ORDER BY name"
170
+ )
171
+ if records:
172
+ click.echo(f"\nGraphPop procedures ({len(records)}):")
173
+ for rec in records:
174
+ click.echo(f" {rec['name']}")
175
+ else:
176
+ click.echo("\nGraphPop procedures: NONE INSTALLED")
177
+
178
+ except Exception as e:
179
+ click.echo(f"Error querying database: {e}", err=True)
180
+
181
+
182
+ def _format_size(size_bytes: int | float) -> str:
183
+ """Format bytes as human-readable size."""
184
+ for unit in ("B", "KB", "MB", "GB", "TB"):
185
+ if abs(size_bytes) < 1024.0:
186
+ return f"{size_bytes:.1f} {unit}"
187
+ size_bytes /= 1024.0
188
+ return f"{size_bytes:.1f} PB"
@@ -0,0 +1,37 @@
1
+ """graphpop divergence — Fst, Dxy, Da, PBS."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command()
9
+ @click.argument("chr")
10
+ @click.argument("start", type=int)
11
+ @click.argument("end", type=int)
12
+ @click.argument("pop1")
13
+ @click.argument("pop2")
14
+ @click.option("--pop3", help="Third population for PBS computation")
15
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
16
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
17
+ @click.option("--consequence", help="Filter by VEP consequence type")
18
+ @click.option("--pathway", help="Filter by pathway name")
19
+ @click.option("--gene", help="Filter by gene name")
20
+ @click.option("--min-af", type=float, help="Minimum allele frequency")
21
+ @pass_ctx
22
+ def divergence(ctx, chr, start, end, pop1, pop2, pop3, output_path, fmt,
23
+ consequence, pathway, gene, min_af):
24
+ """Compute Hudson Fst, W&C Fst, Dxy, Da, and optionally PBS."""
25
+ opts = build_options_map(consequence=consequence, pathway=pathway, gene=gene,
26
+ min_af=min_af)
27
+ positional = [f"'{chr}'", str(start), str(end), f"'{pop1}'", f"'{pop2}'"]
28
+ if pop3:
29
+ positional.append(f"'{pop3}'")
30
+ cypher = build_cypher(
31
+ "graphpop.divergence", positional,
32
+ options=opts if opts else None,
33
+ yield_cols=["fst_hudson", "fst_wc", "dxy", "da", "pbs", "n_variants"],
34
+ )
35
+ records = ctx.run(cypher)
36
+ format_output(records, output_path, fmt, "divergence",
37
+ {"chr": chr, "pop1": pop1, "pop2": pop2, "pop3": pop3})
@@ -0,0 +1,36 @@
1
+ """graphpop diversity — nucleotide diversity, theta, Tajima's D, Fay & Wu's H."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command()
9
+ @click.argument("chr")
10
+ @click.argument("start", type=int)
11
+ @click.argument("end", type=int)
12
+ @click.argument("population")
13
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
14
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
15
+ @click.option("--consequence", help="Filter by VEP consequence type")
16
+ @click.option("--pathway", help="Filter by pathway name")
17
+ @click.option("--gene", help="Filter by gene name")
18
+ @click.option("--min-af", type=float, help="Minimum allele frequency")
19
+ @click.option("--max-af", type=float, help="Maximum allele frequency")
20
+ @pass_ctx
21
+ def diversity(ctx, chr, start, end, population, output_path, fmt,
22
+ consequence, pathway, gene, min_af, max_af):
23
+ """Compute nucleotide diversity, theta_W, Tajima's D, Fay & Wu's H."""
24
+ opts = build_options_map(consequence=consequence, pathway=pathway, gene=gene,
25
+ min_af=min_af, max_af=max_af)
26
+ cypher = build_cypher(
27
+ "graphpop.diversity",
28
+ [f"'{chr}'", str(start), str(end), f"'{population}'"],
29
+ options=opts if opts else None,
30
+ yield_cols=["pi", "theta_w", "tajima_d", "fay_wu_h", "fay_wu_h_norm",
31
+ "het_exp", "het_obs", "fis", "n_variants", "n_segregating",
32
+ "n_polarized"],
33
+ )
34
+ records = ctx.run(cypher)
35
+ format_output(records, output_path, fmt, "diversity",
36
+ {"chr": chr, "start": start, "end": end, "pop": population})