graphpop-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphpop_cli/__init__.py +2 -0
- graphpop_cli/cli.py +161 -0
- graphpop_cli/commands/__init__.py +1 -0
- graphpop_cli/commands/aggregate.py +206 -0
- graphpop_cli/commands/batch.py +155 -0
- graphpop_cli/commands/compare.py +118 -0
- graphpop_cli/commands/config_cmd.py +117 -0
- graphpop_cli/commands/converge.py +156 -0
- graphpop_cli/commands/db.py +188 -0
- graphpop_cli/commands/divergence.py +37 -0
- graphpop_cli/commands/diversity.py +36 -0
- graphpop_cli/commands/dump.py +210 -0
- graphpop_cli/commands/export_bed.py +170 -0
- graphpop_cli/commands/export_windows.py +91 -0
- graphpop_cli/commands/extract.py +271 -0
- graphpop_cli/commands/filter_results.py +165 -0
- graphpop_cli/commands/garud_h.py +30 -0
- graphpop_cli/commands/genome_scan.py +41 -0
- graphpop_cli/commands/ihs.py +29 -0
- graphpop_cli/commands/import_data.py +266 -0
- graphpop_cli/commands/inventory.py +160 -0
- graphpop_cli/commands/joint_sfs.py +38 -0
- graphpop_cli/commands/ld.py +35 -0
- graphpop_cli/commands/lookup.py +207 -0
- graphpop_cli/commands/neighbors.py +175 -0
- graphpop_cli/commands/nsl.py +29 -0
- graphpop_cli/commands/plot.py +1066 -0
- graphpop_cli/commands/pop_summary.py +30 -0
- graphpop_cli/commands/query.py +15 -0
- graphpop_cli/commands/rank_genes.py +177 -0
- graphpop_cli/commands/report.py +264 -0
- graphpop_cli/commands/roh.py +30 -0
- graphpop_cli/commands/run_all.py +276 -0
- graphpop_cli/commands/server.py +98 -0
- graphpop_cli/commands/setup.py +299 -0
- graphpop_cli/commands/sfs.py +38 -0
- graphpop_cli/commands/validate.py +167 -0
- graphpop_cli/commands/xpehh.py +31 -0
- graphpop_cli/config.py +57 -0
- graphpop_cli/connection.py +52 -0
- graphpop_cli/formatters.py +81 -0
- graphpop_cli-0.1.0.dist-info/METADATA +73 -0
- graphpop_cli-0.1.0.dist-info/RECORD +46 -0
- graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
- graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
- graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""graphpop sfs — site frequency spectrum."""
|
|
2
|
+
import click
|
|
3
|
+
from ..cli import pass_ctx
|
|
4
|
+
from ..config import build_options_map, build_cypher
|
|
5
|
+
from ..formatters import format_output
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.command()
|
|
9
|
+
@click.argument("chr")
|
|
10
|
+
@click.argument("start", type=int)
|
|
11
|
+
@click.argument("end", type=int)
|
|
12
|
+
@click.argument("population")
|
|
13
|
+
@click.option("--unfolded", is_flag=True, default=False,
|
|
14
|
+
help="Compute unfolded SFS (requires ancestral allele)")
|
|
15
|
+
@click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
|
|
16
|
+
@click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
|
|
17
|
+
@click.option("--consequence", help="Filter by VEP consequence type")
|
|
18
|
+
@click.option("--pathway", help="Filter by pathway name")
|
|
19
|
+
@click.option("--gene", help="Filter by gene name")
|
|
20
|
+
@click.option("--min-af", type=float, help="Minimum allele frequency")
|
|
21
|
+
@click.option("--max-af", type=float, help="Maximum allele frequency")
|
|
22
|
+
@pass_ctx
|
|
23
|
+
def sfs(ctx, chr, start, end, population, unfolded, output_path, fmt,
|
|
24
|
+
consequence, pathway, gene, min_af, max_af):
|
|
25
|
+
"""Compute the site frequency spectrum."""
|
|
26
|
+
opts = build_options_map(consequence=consequence, pathway=pathway, gene=gene,
|
|
27
|
+
min_af=min_af, max_af=max_af)
|
|
28
|
+
cypher = build_cypher(
|
|
29
|
+
"graphpop.sfs",
|
|
30
|
+
[f"'{chr}'", str(start), str(end), f"'{population}'",
|
|
31
|
+
"true" if unfolded else "false"],
|
|
32
|
+
options=opts if opts else None,
|
|
33
|
+
yield_cols=["sfs", "n_variants", "max_ac", "n_polarized"],
|
|
34
|
+
)
|
|
35
|
+
records = ctx.run(cypher)
|
|
36
|
+
format_output(records, output_path, fmt, "sfs",
|
|
37
|
+
{"chr": chr, "start": start, "end": end, "pop": population,
|
|
38
|
+
"unfolded": unfolded})
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""graphpop validate — check graph database integrity and completeness."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from ..cli import pass_ctx
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@click.command()
|
|
10
|
+
@click.option("--fix", is_flag=True, help="Attempt to fix issues (create missing indexes)")
|
|
11
|
+
@pass_ctx
|
|
12
|
+
def validate(ctx, fix):
|
|
13
|
+
"""Validate the graph database structure and completeness.
|
|
14
|
+
|
|
15
|
+
Checks that required node labels, indexes, and GraphPop procedures
|
|
16
|
+
are present. Reports any issues found.
|
|
17
|
+
|
|
18
|
+
\b
|
|
19
|
+
Checks performed:
|
|
20
|
+
- Required node labels (Variant, Sample, Population, Chromosome)
|
|
21
|
+
- Optional node labels (Gene, Pathway, GOTerm, GenomicWindow)
|
|
22
|
+
- Required indexes (Variant.chr+pos, Population.populationId)
|
|
23
|
+
- GraphPop procedures installed
|
|
24
|
+
- Variant node properties (pop_ids, ac, an, af, gt_packed)
|
|
25
|
+
- Sample-Population relationships
|
|
26
|
+
"""
|
|
27
|
+
issues = []
|
|
28
|
+
ok = []
|
|
29
|
+
|
|
30
|
+
click.echo(f"Validating database: {ctx.database}\n")
|
|
31
|
+
|
|
32
|
+
# Check node labels
|
|
33
|
+
click.echo("Node labels:")
|
|
34
|
+
try:
|
|
35
|
+
records = ctx.run(
|
|
36
|
+
"CALL db.labels() YIELD label "
|
|
37
|
+
"CALL { WITH label MATCH (n) WHERE label IN labels(n) "
|
|
38
|
+
"RETURN count(n) AS cnt } RETURN label, cnt ORDER BY cnt DESC"
|
|
39
|
+
)
|
|
40
|
+
label_counts = {r["label"]: r["cnt"] for r in records}
|
|
41
|
+
except Exception as e:
|
|
42
|
+
click.echo(f" Error: Cannot query database: {e}", err=True)
|
|
43
|
+
raise SystemExit(1)
|
|
44
|
+
|
|
45
|
+
required_labels = ["Variant", "Sample", "Population", "Chromosome"]
|
|
46
|
+
optional_labels = ["Gene", "Pathway", "GOTerm", "GenomicWindow"]
|
|
47
|
+
|
|
48
|
+
for label in required_labels:
|
|
49
|
+
count = label_counts.get(label, 0)
|
|
50
|
+
if count > 0:
|
|
51
|
+
ok.append(f"{label}: {count:,}")
|
|
52
|
+
click.echo(f" [OK] {label}: {count:,}")
|
|
53
|
+
else:
|
|
54
|
+
issues.append(f"Required label '{label}' missing or empty")
|
|
55
|
+
click.echo(f" [FAIL] {label}: MISSING")
|
|
56
|
+
|
|
57
|
+
for label in optional_labels:
|
|
58
|
+
count = label_counts.get(label, 0)
|
|
59
|
+
if count > 0:
|
|
60
|
+
click.echo(f" [OK] {label}: {count:,}")
|
|
61
|
+
else:
|
|
62
|
+
click.echo(f" [--] {label}: not present (optional)")
|
|
63
|
+
|
|
64
|
+
# Check indexes
|
|
65
|
+
click.echo("\nIndexes:")
|
|
66
|
+
try:
|
|
67
|
+
records = ctx.run(
|
|
68
|
+
"SHOW INDEXES YIELD name, labelsOrTypes, properties, state "
|
|
69
|
+
"RETURN name, labelsOrTypes, properties, state"
|
|
70
|
+
)
|
|
71
|
+
indexes = {
|
|
72
|
+
(tuple(r["labelsOrTypes"]), tuple(r["properties"])): r["state"]
|
|
73
|
+
for r in records
|
|
74
|
+
}
|
|
75
|
+
except Exception:
|
|
76
|
+
indexes = {}
|
|
77
|
+
|
|
78
|
+
required_indexes = [
|
|
79
|
+
(("Variant",), ("chr", "pos")),
|
|
80
|
+
(("Population",), ("populationId",)),
|
|
81
|
+
(("Sample",), ("sampleId",)),
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
for labels, props in required_indexes:
|
|
85
|
+
state = indexes.get((labels, props), None)
|
|
86
|
+
if state == "ONLINE":
|
|
87
|
+
click.echo(f" [OK] {labels[0]}({', '.join(props)})")
|
|
88
|
+
elif state:
|
|
89
|
+
click.echo(f" [WARN] {labels[0]}({', '.join(props)}): {state}")
|
|
90
|
+
else:
|
|
91
|
+
msg = f"Index on {labels[0]}({', '.join(props)}) missing"
|
|
92
|
+
issues.append(msg)
|
|
93
|
+
click.echo(f" [FAIL] {msg}")
|
|
94
|
+
if fix:
|
|
95
|
+
idx_name = f"idx_{labels[0].lower()}_{'_'.join(props)}"
|
|
96
|
+
cypher = (
|
|
97
|
+
f"CREATE INDEX {idx_name} IF NOT EXISTS "
|
|
98
|
+
f"FOR (n:{labels[0]}) ON ({', '.join(f'n.{p}' for p in props)})"
|
|
99
|
+
)
|
|
100
|
+
try:
|
|
101
|
+
ctx.run(cypher)
|
|
102
|
+
click.echo(f" Fixed: created index {idx_name}")
|
|
103
|
+
except Exception as e:
|
|
104
|
+
click.echo(f" Fix failed: {e}", err=True)
|
|
105
|
+
|
|
106
|
+
# Check procedures
|
|
107
|
+
click.echo("\nGraphPop procedures:")
|
|
108
|
+
try:
|
|
109
|
+
records = ctx.run(
|
|
110
|
+
"SHOW PROCEDURES YIELD name WHERE name STARTS WITH 'graphpop' "
|
|
111
|
+
"RETURN name ORDER BY name"
|
|
112
|
+
)
|
|
113
|
+
proc_names = [r["name"] for r in records]
|
|
114
|
+
except Exception:
|
|
115
|
+
proc_names = []
|
|
116
|
+
|
|
117
|
+
expected_procs = [
|
|
118
|
+
"graphpop.diversity", "graphpop.divergence", "graphpop.sfs",
|
|
119
|
+
"graphpop.joint_sfs", "graphpop.genome_scan", "graphpop.pop_summary",
|
|
120
|
+
"graphpop.ld", "graphpop.ihs", "graphpop.xpehh",
|
|
121
|
+
"graphpop.nsl", "graphpop.roh", "graphpop.garud_h",
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
for proc in expected_procs:
|
|
125
|
+
if proc in proc_names:
|
|
126
|
+
click.echo(f" [OK] {proc}")
|
|
127
|
+
else:
|
|
128
|
+
issues.append(f"Procedure '{proc}' not installed")
|
|
129
|
+
click.echo(f" [FAIL] {proc}: NOT INSTALLED")
|
|
130
|
+
|
|
131
|
+
# Check Variant properties
|
|
132
|
+
click.echo("\nVariant node properties:")
|
|
133
|
+
try:
|
|
134
|
+
rec = ctx.run(
|
|
135
|
+
"MATCH (v:Variant) WITH v LIMIT 1 "
|
|
136
|
+
"RETURN keys(v) AS props"
|
|
137
|
+
)
|
|
138
|
+
if rec:
|
|
139
|
+
props = set(rec[0]["props"])
|
|
140
|
+
required_props = ["chr", "pos", "ref", "alt", "pop_ids", "ac", "an", "af"]
|
|
141
|
+
optional_props = ["gt_packed", "phase_packed", "ancestral_allele", "is_polarized"]
|
|
142
|
+
|
|
143
|
+
for p in required_props:
|
|
144
|
+
if p in props:
|
|
145
|
+
click.echo(f" [OK] {p}")
|
|
146
|
+
else:
|
|
147
|
+
issues.append(f"Variant property '{p}' missing")
|
|
148
|
+
click.echo(f" [FAIL] {p}: MISSING")
|
|
149
|
+
|
|
150
|
+
for p in optional_props:
|
|
151
|
+
if p in props:
|
|
152
|
+
click.echo(f" [OK] {p}")
|
|
153
|
+
else:
|
|
154
|
+
click.echo(f" [--] {p}: not present (optional)")
|
|
155
|
+
except Exception as e:
|
|
156
|
+
click.echo(f" Error checking properties: {e}", err=True)
|
|
157
|
+
|
|
158
|
+
# Summary
|
|
159
|
+
click.echo(f"\n{'='*40}")
|
|
160
|
+
if issues:
|
|
161
|
+
click.echo(f"VALIDATION: {len(issues)} issue(s) found")
|
|
162
|
+
for issue in issues:
|
|
163
|
+
click.echo(f" - {issue}")
|
|
164
|
+
if not fix:
|
|
165
|
+
click.echo("\nRun with --fix to attempt automatic fixes.")
|
|
166
|
+
else:
|
|
167
|
+
click.echo("VALIDATION: All checks passed")
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""graphpop xpehh — cross-population extended haplotype homozygosity."""
|
|
2
|
+
import click
|
|
3
|
+
from ..cli import pass_ctx
|
|
4
|
+
from ..config import build_options_map, build_cypher
|
|
5
|
+
from ..formatters import format_output
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.command()
|
|
9
|
+
@click.argument("chr")
|
|
10
|
+
@click.argument("pop1")
|
|
11
|
+
@click.argument("pop2")
|
|
12
|
+
@click.option("--min-af", type=float, help="Minimum allele frequency filter")
|
|
13
|
+
@click.option("--persist", is_flag=True, default=False,
|
|
14
|
+
help="Write XP-EHH scores to Variant nodes")
|
|
15
|
+
@click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
|
|
16
|
+
@click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
|
|
17
|
+
@pass_ctx
|
|
18
|
+
def xpehh(ctx, chr, pop1, pop2, min_af, persist, output_path, fmt):
|
|
19
|
+
"""Compute cross-population extended haplotype homozygosity (XP-EHH)."""
|
|
20
|
+
opts = build_options_map(min_af=min_af, persist=persist)
|
|
21
|
+
cypher = build_cypher(
|
|
22
|
+
"graphpop.xpehh",
|
|
23
|
+
[f"'{chr}'", f"'{pop1}'", f"'{pop2}'"],
|
|
24
|
+
options=opts if opts else None,
|
|
25
|
+
yield_cols=["variantId", "pos", "af_pop1", "af_pop2",
|
|
26
|
+
"xpehh_unstd", "xpehh"],
|
|
27
|
+
)
|
|
28
|
+
records = ctx.run(cypher)
|
|
29
|
+
format_output(records, output_path, fmt, "xpehh",
|
|
30
|
+
{"chr": chr, "pop1": pop1, "pop2": pop2,
|
|
31
|
+
"min_af": min_af, "persist": persist})
|
graphpop_cli/config.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Shared options and Cypher builders for GraphPop CLI."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def build_options_map(consequence: str | None = None,
|
|
6
|
+
pathway: str | None = None,
|
|
7
|
+
gene: str | None = None,
|
|
8
|
+
min_af: float | None = None,
|
|
9
|
+
max_af: float | None = None,
|
|
10
|
+
variant_type: str | None = None,
|
|
11
|
+
**extra) -> dict:
|
|
12
|
+
"""Build the options map passed to GraphPop procedures."""
|
|
13
|
+
opts = {}
|
|
14
|
+
if consequence:
|
|
15
|
+
opts["consequence"] = consequence
|
|
16
|
+
if pathway:
|
|
17
|
+
opts["pathway"] = pathway
|
|
18
|
+
if gene:
|
|
19
|
+
opts["gene"] = gene
|
|
20
|
+
if min_af is not None:
|
|
21
|
+
opts["min_af"] = min_af
|
|
22
|
+
if max_af is not None:
|
|
23
|
+
opts["max_af"] = max_af
|
|
24
|
+
if variant_type:
|
|
25
|
+
opts["variant_type"] = variant_type
|
|
26
|
+
opts.update({k: v for k, v in extra.items() if v is not None})
|
|
27
|
+
return opts
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def build_cypher(procedure: str, positional: list[str],
|
|
31
|
+
options: dict | None = None,
|
|
32
|
+
yield_cols: list[str] | None = None) -> str:
|
|
33
|
+
"""Build a CALL ... YIELD Cypher statement."""
|
|
34
|
+
args = ", ".join(positional)
|
|
35
|
+
if options:
|
|
36
|
+
opts_str = ", ".join(
|
|
37
|
+
f"{k}: {_cypher_literal(v)}" for k, v in options.items()
|
|
38
|
+
)
|
|
39
|
+
args += f", {{{opts_str}}}"
|
|
40
|
+
cypher = f"CALL {procedure}({args})"
|
|
41
|
+
if yield_cols:
|
|
42
|
+
cypher += " YIELD " + ", ".join(yield_cols)
|
|
43
|
+
return cypher
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _cypher_literal(v) -> str:
|
|
47
|
+
"""Convert Python value to Cypher literal."""
|
|
48
|
+
if isinstance(v, str):
|
|
49
|
+
return f"'{v}'"
|
|
50
|
+
if isinstance(v, bool):
|
|
51
|
+
return "true" if v else "false"
|
|
52
|
+
if isinstance(v, (int, float)):
|
|
53
|
+
return str(v)
|
|
54
|
+
if isinstance(v, list):
|
|
55
|
+
inner = ", ".join(_cypher_literal(x) for x in v)
|
|
56
|
+
return f"[{inner}]"
|
|
57
|
+
return f"'{v}'"
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Neo4j connection management for GraphPop CLI."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
from neo4j import GraphDatabase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_DEFAULT_CONFIG_PATH = Path.home() / ".graphpop" / "config.yaml"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_config(config_path: Path | None = None) -> dict:
|
|
15
|
+
"""Load connection config from file, env vars, or defaults."""
|
|
16
|
+
cfg = {
|
|
17
|
+
"uri": "bolt://localhost:7687",
|
|
18
|
+
"user": "neo4j",
|
|
19
|
+
"password": "neo4j",
|
|
20
|
+
"database": "neo4j",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
# Config file
|
|
24
|
+
path = config_path or _DEFAULT_CONFIG_PATH
|
|
25
|
+
if path.exists():
|
|
26
|
+
with open(path) as f:
|
|
27
|
+
file_cfg = yaml.safe_load(f) or {}
|
|
28
|
+
cfg.update({k: v for k, v in file_cfg.items() if v is not None})
|
|
29
|
+
|
|
30
|
+
# Env vars override
|
|
31
|
+
if v := os.environ.get("GRAPHPOP_URI"):
|
|
32
|
+
cfg["uri"] = v
|
|
33
|
+
if v := os.environ.get("GRAPHPOP_USER"):
|
|
34
|
+
cfg["user"] = v
|
|
35
|
+
if v := os.environ.get("GRAPHPOP_PASSWORD"):
|
|
36
|
+
cfg["password"] = v
|
|
37
|
+
if v := os.environ.get("GRAPHPOP_DATABASE"):
|
|
38
|
+
cfg["database"] = v
|
|
39
|
+
|
|
40
|
+
return cfg
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_driver(cfg: dict):
|
|
44
|
+
"""Create a Neo4j driver from config dict."""
|
|
45
|
+
return GraphDatabase.driver(cfg["uri"], auth=(cfg["user"], cfg["password"]))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def run_procedure(driver, database: str, cypher: str, **params) -> list[dict]:
|
|
49
|
+
"""Run a Cypher procedure call and return list of record dicts."""
|
|
50
|
+
with driver.session(database=database) as session:
|
|
51
|
+
result = session.run(cypher, **params)
|
|
52
|
+
return [record.data() for record in result]
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Output formatters for GraphPop CLI."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import csv
|
|
5
|
+
import io
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
from typing import TextIO
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def write_header_comment(out: TextIO, command: str, args: dict):
|
|
12
|
+
"""Write a comment header with command info."""
|
|
13
|
+
parts = [f"# graphpop {command}"]
|
|
14
|
+
for k, v in args.items():
|
|
15
|
+
if v is not None and v is not False:
|
|
16
|
+
parts.append(f"# {k}: {v}")
|
|
17
|
+
out.write("\n".join(parts) + "\n")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def write_tsv(records: list[dict], out: TextIO, header: bool = True):
|
|
21
|
+
"""Write records as TSV."""
|
|
22
|
+
if not records:
|
|
23
|
+
return
|
|
24
|
+
keys = list(records[0].keys())
|
|
25
|
+
if header:
|
|
26
|
+
out.write("\t".join(keys) + "\n")
|
|
27
|
+
for rec in records:
|
|
28
|
+
vals = []
|
|
29
|
+
for k in keys:
|
|
30
|
+
v = rec[k]
|
|
31
|
+
if isinstance(v, float):
|
|
32
|
+
vals.append(f"{v:.6g}")
|
|
33
|
+
elif isinstance(v, list):
|
|
34
|
+
vals.append(",".join(str(x) for x in v))
|
|
35
|
+
elif v is None:
|
|
36
|
+
vals.append("NA")
|
|
37
|
+
else:
|
|
38
|
+
vals.append(str(v))
|
|
39
|
+
out.write("\t".join(vals) + "\n")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def write_csv(records: list[dict], out: TextIO):
|
|
43
|
+
"""Write records as CSV."""
|
|
44
|
+
if not records:
|
|
45
|
+
return
|
|
46
|
+
writer = csv.DictWriter(out, fieldnames=records[0].keys())
|
|
47
|
+
writer.writeheader()
|
|
48
|
+
for rec in records:
|
|
49
|
+
writer.writerow({k: (f"{v:.6g}" if isinstance(v, float) else v)
|
|
50
|
+
for k, v in rec.items()})
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def write_json(records: list[dict], out: TextIO):
|
|
54
|
+
"""Write records as JSON."""
|
|
55
|
+
json.dump(records, out, indent=2, default=str)
|
|
56
|
+
out.write("\n")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_output(output_path: str | None) -> TextIO:
|
|
60
|
+
"""Get output file handle (stdout if None)."""
|
|
61
|
+
if output_path:
|
|
62
|
+
return open(output_path, "w")
|
|
63
|
+
return sys.stdout
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def format_output(records: list[dict], output_path: str | None,
|
|
67
|
+
fmt: str = "tsv", command: str = "", args: dict | None = None):
|
|
68
|
+
"""Format and write records to output."""
|
|
69
|
+
out = get_output(output_path)
|
|
70
|
+
try:
|
|
71
|
+
if args and output_path:
|
|
72
|
+
write_header_comment(out, command, args)
|
|
73
|
+
if fmt == "tsv":
|
|
74
|
+
write_tsv(records, out)
|
|
75
|
+
elif fmt == "csv":
|
|
76
|
+
write_csv(records, out)
|
|
77
|
+
elif fmt == "json":
|
|
78
|
+
write_json(records, out)
|
|
79
|
+
finally:
|
|
80
|
+
if output_path and out is not sys.stdout:
|
|
81
|
+
out.close()
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: graphpop-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Graph database-native population genomics CLI with O(V*K) complexity
|
|
5
|
+
Author: Jianfeng Mao
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/jfmao/GraphPop
|
|
8
|
+
Project-URL: Repository, https://github.com/jfmao/GraphPop
|
|
9
|
+
Project-URL: Issues, https://github.com/jfmao/GraphPop/issues
|
|
10
|
+
Keywords: population-genomics,graph-database,neo4j,bioinformatics,genetics
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: click>=8.0
|
|
21
|
+
Requires-Dist: neo4j>=5.0
|
|
22
|
+
Requires-Dist: pyyaml>=6.0
|
|
23
|
+
Provides-Extra: plot
|
|
24
|
+
Requires-Dist: matplotlib>=3.5; extra == "plot"
|
|
25
|
+
Requires-Dist: numpy>=1.22; extra == "plot"
|
|
26
|
+
|
|
27
|
+
# GraphPop CLI
|
|
28
|
+
|
|
29
|
+
Command-line interface for **GraphPop** — a graph database-native population genomics engine that reduces summary statistic complexity from O(V×N) to O(V×K), independent of sample count.
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install graphpop-cli
|
|
35
|
+
graphpop setup --password mypass # Downloads Neo4j + procedures plugin
|
|
36
|
+
graphpop start # Start the database
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Prerequisites:** Python 3.10+, Java 21+ (for Neo4j runtime).
|
|
40
|
+
|
|
41
|
+
## Features
|
|
42
|
+
|
|
43
|
+
- **60 commands** across 11 functional domains
|
|
44
|
+
- **12 population genetics procedures**: diversity, Fst, SFS, iHS, XP-EHH, nSL, ROH, Garud's H, LD, genome scan, pop summary, joint SFS
|
|
45
|
+
- **Annotation conditioning**: `--consequence`, `--pathway`, `--gene` flags on any procedure
|
|
46
|
+
- **Persistent analytical records**: `--persist` writes results to graph nodes
|
|
47
|
+
- **Publication-ready plots**: 11 visualization types following Nature Methods guidelines
|
|
48
|
+
|
|
49
|
+
## Usage
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Population diversity
|
|
53
|
+
graphpop diversity chr1 1 50000000 EUR -o diversity.tsv
|
|
54
|
+
|
|
55
|
+
# Annotation-conditioned analysis
|
|
56
|
+
graphpop diversity chr1 1 43270923 GJ-tmp --consequence missense_variant
|
|
57
|
+
|
|
58
|
+
# Selection scan
|
|
59
|
+
graphpop ihs chr22 EUR --persist -o ihs.tsv
|
|
60
|
+
|
|
61
|
+
# Multi-statistic convergence
|
|
62
|
+
graphpop converge --stats ihs,xpehh,h12 --thresholds 2,2,0.3 --pop EUR
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Documentation
|
|
66
|
+
|
|
67
|
+
- [Full documentation](https://github.com/jfmao/GraphPop)
|
|
68
|
+
- [Rice 3K vignette](https://github.com/jfmao/GraphPop/blob/main/graphpop-cli/vignettes/rice-3k-analysis.md)
|
|
69
|
+
- [Human 1000G vignette](https://github.com/jfmao/GraphPop/blob/main/graphpop-cli/vignettes/human-1000g-analysis.md)
|
|
70
|
+
|
|
71
|
+
## License
|
|
72
|
+
|
|
73
|
+
MIT
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
graphpop_cli/__init__.py,sha256=TGu9i13Y7A-Fm-12ozUMTprGa0O-QrYqr7neUWkPEwg,106
|
|
2
|
+
graphpop_cli/cli.py,sha256=pbsPGkcTk-tep6dNq7ZwdCr6Maf_i2sLKoOCzx78NhE,5190
|
|
3
|
+
graphpop_cli/config.py,sha256=oHnmdIZ_lstTPDMfBCdxdfPcE1BWlg5lSkgoJFzPhbE,1874
|
|
4
|
+
graphpop_cli/connection.py,sha256=6ZE3og3BsZDckG_E0PAx6-ow5ngpFPtE_1AxOk400cA,1538
|
|
5
|
+
graphpop_cli/formatters.py,sha256=dXH0YSJY51fFCS8qDSwSIxk1ZCBrS77tlHORT1ciSiw,2400
|
|
6
|
+
graphpop_cli/commands/__init__.py,sha256=ug95sIrk2eYKO-K29eRdpmMezey2qL8bcvnbCZWo6vg,36
|
|
7
|
+
graphpop_cli/commands/aggregate.py,sha256=VhtJuL-QMo33CQu8St0S18rhDisABF7c479dXRD1di4,8174
|
|
8
|
+
graphpop_cli/commands/batch.py,sha256=oietajzJ9SzvhEDyuWVcJsOL0txHURMs698qM94VPkA,6021
|
|
9
|
+
graphpop_cli/commands/compare.py,sha256=IKaKfRti13CPaHVYRvWDwIfRoZkHe7aZq_Jwzu52jwg,4492
|
|
10
|
+
graphpop_cli/commands/config_cmd.py,sha256=hUZKlPDhfoncRGAsaCEkaznq-5MQCHGppTlR_yhS3ng,3365
|
|
11
|
+
graphpop_cli/commands/converge.py,sha256=K2Vk1saJiUPsZ4Sd9j8ol9tJPMVlGewV8N1RdqBQGsQ,6202
|
|
12
|
+
graphpop_cli/commands/db.py,sha256=Ybg5Z6lRofw-Dpq66GuOx1xS0UTXqcdNIGUps9GbHS0,6424
|
|
13
|
+
graphpop_cli/commands/divergence.py,sha256=9BDu2AFwhl4VrNjiyx15FPIcQ9XPULfh3owuR-hFPj4,1652
|
|
14
|
+
graphpop_cli/commands/diversity.py,sha256=r14-qjWxaiPyLwqBt_hDIBoGzPdgyPtVuIMT_6NA2Vo,1734
|
|
15
|
+
graphpop_cli/commands/dump.py,sha256=MlS8f4q2WRyl_ZZWPLOdLufraJJUHjllD1bnqo0C2wo,7830
|
|
16
|
+
graphpop_cli/commands/export_bed.py,sha256=43O51yk8isNDOBCswS1pKae3ULWB7ZwwBbfwklUpgLg,6381
|
|
17
|
+
graphpop_cli/commands/export_windows.py,sha256=ti6H1diRnSYDSpNQS9A8cUI5LSFQwM2So6FYLcfRk2E,3651
|
|
18
|
+
graphpop_cli/commands/extract.py,sha256=fyCt5oAll63KlkOZ2xzFPtyUXoyM5_QZOhL9X2iFOSc,11680
|
|
19
|
+
graphpop_cli/commands/filter_results.py,sha256=ySmWHL9IINplehz409w9kwVv4DKwldk37cDLF3Asblo,6574
|
|
20
|
+
graphpop_cli/commands/garud_h.py,sha256=qWvhdFpHNhE_9dQZyLDopwfqEDPN8S2qNPFY0Peua7c,1338
|
|
21
|
+
graphpop_cli/commands/genome_scan.py,sha256=q76OOWxCi3fJw0H0RMYhX1AO6eU5s-1PjIeiPV1xF0c,2025
|
|
22
|
+
graphpop_cli/commands/ihs.py,sha256=8uBoHF2eIRsBImOrw_1DUCcezezYdd4H8G2JD-6i_Uk,1225
|
|
23
|
+
graphpop_cli/commands/import_data.py,sha256=akQq4_HmuykPK_9l7UP4rDhcquI9SYyOsf_kgnW1d7M,10767
|
|
24
|
+
graphpop_cli/commands/inventory.py,sha256=b6brcO_sWd-qwyvtf7mkHn7OGR7UxA_TlXS8rkC3XAI,6040
|
|
25
|
+
graphpop_cli/commands/joint_sfs.py,sha256=aYMrzJt2n7IpnRaN83YCPEjroQfsrrSxhAENXCGVnkE,1804
|
|
26
|
+
graphpop_cli/commands/ld.py,sha256=dyxH1_suR0zNwOmTAYrGrrBQQapf6KvD8uR9Rs34m7A,1519
|
|
27
|
+
graphpop_cli/commands/lookup.py,sha256=42mcLHTKD8TCPNPGMuY33O-QOBqWXG6dkWchJXEpeyI,7032
|
|
28
|
+
graphpop_cli/commands/neighbors.py,sha256=M67nLlEoypceWLW2WLcMMnWsx0oHrSL8UJOLrAjxPIo,6757
|
|
29
|
+
graphpop_cli/commands/nsl.py,sha256=I2vQVUfpNMeRL5_uXEep8sha9B1he4f9XTFyTiEhvS4,1227
|
|
30
|
+
graphpop_cli/commands/plot.py,sha256=E4RgSpQODRT14x3AbVsr2YIb3NFAWTRNlC0zwWOta50,39453
|
|
31
|
+
graphpop_cli/commands/pop_summary.py,sha256=SX7Wdxq-G7j3DMfvBKKWt6nqAzc0_SvmSJgL1QRM3f0,1361
|
|
32
|
+
graphpop_cli/commands/query.py,sha256=5jrQmKiQjryLf-w6YbH6c7Ss4FVxBAmPZUanndjdwl4,589
|
|
33
|
+
graphpop_cli/commands/rank_genes.py,sha256=3fQXeG_8cW7aZg4_NlbEg-vgFwFbyUjo0In6W2L8WE0,6817
|
|
34
|
+
graphpop_cli/commands/report.py,sha256=I1bue4HwpFsNl_Rm83UpV2SQQ7lFm3e335r2PEAiMo0,7938
|
|
35
|
+
graphpop_cli/commands/roh.py,sha256=-gSsZaecLxpRs_kepnVIGNa12aVDw3qP_hd1qg8v4M0,1287
|
|
36
|
+
graphpop_cli/commands/run_all.py,sha256=O0AqnjD59deldPfcCnoy0-RR1H57T2f1j9gBrdOSlWc,11789
|
|
37
|
+
graphpop_cli/commands/server.py,sha256=vPZwSy7R5nIbXuxdtnr64-P5jP3UfJcpgGaStzmsVBM,3561
|
|
38
|
+
graphpop_cli/commands/setup.py,sha256=Ei5jGrnaR0wZSKzy6cKhgK_EE9hphwbWZk4I-t8x5pw,10895
|
|
39
|
+
graphpop_cli/commands/sfs.py,sha256=0pIIRYAENJsYbH248e9Ql5Lw0s9Dn-n2Rp_lXRUypc8,1739
|
|
40
|
+
graphpop_cli/commands/validate.py,sha256=hrv4gjk6ZKfJwQcDZYGBpdUUNesfHt80BgwEod0n22U,5898
|
|
41
|
+
graphpop_cli/commands/xpehh.py,sha256=kENF8xmt-RpGI8eru_WlTOwDFlf50znZBoshUtJJorw,1338
|
|
42
|
+
graphpop_cli-0.1.0.dist-info/METADATA,sha256=6kK9rZLyhJH8CHiJU65wfkgHM6ljAnq669arvOOU-B4,2644
|
|
43
|
+
graphpop_cli-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
44
|
+
graphpop_cli-0.1.0.dist-info/entry_points.txt,sha256=fAtumzONXTTCKMKvafi_AJtLeX8U9aeBpBvtIXV9jjU,51
|
|
45
|
+
graphpop_cli-0.1.0.dist-info/top_level.txt,sha256=noQV0fnJye3OHhLQNeph2SIlrIm8QzIz4lOFAHsaBmc,13
|
|
46
|
+
graphpop_cli-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
graphpop_cli
|