graphpop-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphpop_cli/__init__.py +2 -0
- graphpop_cli/cli.py +161 -0
- graphpop_cli/commands/__init__.py +1 -0
- graphpop_cli/commands/aggregate.py +206 -0
- graphpop_cli/commands/batch.py +155 -0
- graphpop_cli/commands/compare.py +118 -0
- graphpop_cli/commands/config_cmd.py +117 -0
- graphpop_cli/commands/converge.py +156 -0
- graphpop_cli/commands/db.py +188 -0
- graphpop_cli/commands/divergence.py +37 -0
- graphpop_cli/commands/diversity.py +36 -0
- graphpop_cli/commands/dump.py +210 -0
- graphpop_cli/commands/export_bed.py +170 -0
- graphpop_cli/commands/export_windows.py +91 -0
- graphpop_cli/commands/extract.py +271 -0
- graphpop_cli/commands/filter_results.py +165 -0
- graphpop_cli/commands/garud_h.py +30 -0
- graphpop_cli/commands/genome_scan.py +41 -0
- graphpop_cli/commands/ihs.py +29 -0
- graphpop_cli/commands/import_data.py +266 -0
- graphpop_cli/commands/inventory.py +160 -0
- graphpop_cli/commands/joint_sfs.py +38 -0
- graphpop_cli/commands/ld.py +35 -0
- graphpop_cli/commands/lookup.py +207 -0
- graphpop_cli/commands/neighbors.py +175 -0
- graphpop_cli/commands/nsl.py +29 -0
- graphpop_cli/commands/plot.py +1066 -0
- graphpop_cli/commands/pop_summary.py +30 -0
- graphpop_cli/commands/query.py +15 -0
- graphpop_cli/commands/rank_genes.py +177 -0
- graphpop_cli/commands/report.py +264 -0
- graphpop_cli/commands/roh.py +30 -0
- graphpop_cli/commands/run_all.py +276 -0
- graphpop_cli/commands/server.py +98 -0
- graphpop_cli/commands/setup.py +299 -0
- graphpop_cli/commands/sfs.py +38 -0
- graphpop_cli/commands/validate.py +167 -0
- graphpop_cli/commands/xpehh.py +31 -0
- graphpop_cli/config.py +57 -0
- graphpop_cli/connection.py +52 -0
- graphpop_cli/formatters.py +81 -0
- graphpop_cli-0.1.0.dist-info/METADATA +73 -0
- graphpop_cli-0.1.0.dist-info/RECORD +46 -0
- graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
- graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
- graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""graphpop run-all — orchestrate full-genome analysis across populations and chromosomes."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
|
|
10
|
+
from ..cli import pass_ctx
|
|
11
|
+
from ..config import build_cypher
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Default procedures for each phase
|
|
15
|
+
PHASE1_PROCEDURES = [
|
|
16
|
+
"diversity", "sfs", "pop_summary", "roh", "ihs", "nsl", "garud_h",
|
|
17
|
+
]
|
|
18
|
+
PHASE2_PROCEDURES = ["xpehh", "divergence"]
|
|
19
|
+
|
|
20
|
+
YIELD_COLS = {
|
|
21
|
+
"diversity": ["pi", "theta_w", "tajima_d", "fay_wu_h", "fay_wu_h_norm",
|
|
22
|
+
"het_exp", "het_obs", "fis", "n_variants", "n_segregating"],
|
|
23
|
+
"sfs": ["sfs", "n_variants", "max_ac"],
|
|
24
|
+
"pop_summary": ["pi", "theta_w", "tajima_d", "n_variants", "n_segregating"],
|
|
25
|
+
"roh": ["sampleId", "n_roh", "total_length", "froh", "mean_length", "max_length"],
|
|
26
|
+
"ihs": ["variantId", "pos", "af", "ihs_unstd", "ihs"],
|
|
27
|
+
"nsl": ["variantId", "pos", "af", "nsl_unstd", "nsl"],
|
|
28
|
+
"garud_h": ["chr", "start", "end", "population", "h1", "h12", "h2_h1",
|
|
29
|
+
"hap_diversity", "n_haplotypes", "n_variants"],
|
|
30
|
+
"xpehh": ["variantId", "pos", "af_pop1", "af_pop2", "xpehh_unstd", "xpehh"],
|
|
31
|
+
"divergence": ["fst_hudson", "fst_wc", "dxy", "da", "pbs", "n_variants"],
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_chromosome_lengths(ctx) -> dict[str, int]:
|
|
36
|
+
"""Query chromosome lengths from the graph."""
|
|
37
|
+
cypher = "MATCH (c:Chromosome) RETURN c.chromosomeId AS chr, c.length AS length ORDER BY c.chromosomeId"
|
|
38
|
+
records = ctx.run(cypher)
|
|
39
|
+
return {r["chr"]: r["length"] for r in records}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_populations(ctx) -> list[str]:
|
|
43
|
+
"""Query population IDs from the graph."""
|
|
44
|
+
cypher = "MATCH (p:Population) WHERE p.n_samples > 1 RETURN p.populationId AS pop ORDER BY p.n_samples DESC"
|
|
45
|
+
records = ctx.run(cypher)
|
|
46
|
+
return [r["pop"] for r in records]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_xpehh_pairs(populations: list[str], max_pairs: int = 20) -> list[tuple[str, str]]:
|
|
50
|
+
"""Generate representative XP-EHH population pairs."""
|
|
51
|
+
pairs = []
|
|
52
|
+
for i, p1 in enumerate(populations):
|
|
53
|
+
for p2 in populations[i + 1:]:
|
|
54
|
+
pairs.append((p1, p2))
|
|
55
|
+
if len(pairs) >= max_pairs:
|
|
56
|
+
return pairs
|
|
57
|
+
return pairs
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@click.command("run-all")
|
|
61
|
+
@click.option("--phase", type=click.Choice(["1", "2", "all"]), default="all",
|
|
62
|
+
help="Phase 1 (per-pop), Phase 2 (pairwise), or all")
|
|
63
|
+
@click.option("--output-dir", "-d", type=click.Path(), default="graphpop_results",
|
|
64
|
+
help="Output directory for result files")
|
|
65
|
+
@click.option("--resume/--no-resume", default=True,
|
|
66
|
+
help="Skip already-completed tasks (default: resume)")
|
|
67
|
+
@click.option("--json-output", type=click.Path(),
|
|
68
|
+
help="Accumulated JSON results file (default: <output-dir>/results.json)")
|
|
69
|
+
@click.option("--persist/--no-persist", default=True,
|
|
70
|
+
help="Write results to graph nodes (default: yes)")
|
|
71
|
+
@click.option("--populations", "pop_list",
|
|
72
|
+
help="Comma-separated population list (default: auto-detect)")
|
|
73
|
+
@click.option("--chromosomes", "chr_list",
|
|
74
|
+
help="Comma-separated chromosome list (default: auto-detect)")
|
|
75
|
+
@click.option("--xpehh-pairs", type=int, default=20,
|
|
76
|
+
help="Max number of XP-EHH population pairs")
|
|
77
|
+
@click.option("--workers", type=int, default=1,
|
|
78
|
+
help="Parallel workers (experimental)")
|
|
79
|
+
@pass_ctx
|
|
80
|
+
def run_all(ctx, phase, output_dir, resume, json_output, persist,
|
|
81
|
+
pop_list, chr_list, xpehh_pairs, workers):
|
|
82
|
+
"""Run full-genome analysis across all populations and chromosomes.
|
|
83
|
+
|
|
84
|
+
Phase 1: Per-population statistics (diversity, SFS, iHS, nSL, ROH, Garud's H)
|
|
85
|
+
for each population × chromosome combination.
|
|
86
|
+
|
|
87
|
+
Phase 2: Pairwise statistics (XP-EHH, divergence) for representative
|
|
88
|
+
population pairs × chromosomes.
|
|
89
|
+
|
|
90
|
+
Results are saved as TSV files in the output directory and optionally
|
|
91
|
+
persisted to graph nodes.
|
|
92
|
+
"""
|
|
93
|
+
out_dir = Path(output_dir)
|
|
94
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
json_path = Path(json_output) if json_output else out_dir / "results.json"
|
|
96
|
+
|
|
97
|
+
# Load or initialize results
|
|
98
|
+
results = {}
|
|
99
|
+
if resume and json_path.exists():
|
|
100
|
+
with open(json_path) as f:
|
|
101
|
+
results = json.load(f)
|
|
102
|
+
click.echo(f"Resuming from {json_path} ({len(results)} entries)")
|
|
103
|
+
|
|
104
|
+
# Auto-detect populations and chromosomes
|
|
105
|
+
click.echo("Querying graph for populations and chromosomes...")
|
|
106
|
+
chr_lens = get_chromosome_lengths(ctx)
|
|
107
|
+
chromosomes = sorted(chr_lens.keys()) if not chr_list else chr_list.split(",")
|
|
108
|
+
populations = get_populations(ctx) if not pop_list else pop_list.split(",")
|
|
109
|
+
|
|
110
|
+
click.echo(f"Populations: {len(populations)} ({', '.join(populations[:5])}...)")
|
|
111
|
+
click.echo(f"Chromosomes: {len(chromosomes)} ({', '.join(chromosomes[:3])}...)")
|
|
112
|
+
|
|
113
|
+
# Phase 1: Per-population
|
|
114
|
+
if phase in ("1", "all"):
|
|
115
|
+
click.echo(f"\n=== Phase 1: Per-population ({len(populations)} pops × {len(chromosomes)} chrs) ===")
|
|
116
|
+
total = len(populations) * len(chromosomes) * len(PHASE1_PROCEDURES)
|
|
117
|
+
done = 0
|
|
118
|
+
t0 = time.time()
|
|
119
|
+
|
|
120
|
+
for pop in populations:
|
|
121
|
+
for chrom in chromosomes:
|
|
122
|
+
for proc in PHASE1_PROCEDURES:
|
|
123
|
+
key = f"{pop}_{chrom}_{proc}"
|
|
124
|
+
if resume and key in results:
|
|
125
|
+
done += 1
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
if proc in ("diversity", "sfs", "pop_summary"):
|
|
130
|
+
length = chr_lens.get(chrom, 300_000_000)
|
|
131
|
+
cypher = build_cypher(
|
|
132
|
+
f"graphpop.{proc}",
|
|
133
|
+
[f"'{chrom}'", "1", str(length), f"'{pop}'"],
|
|
134
|
+
yield_cols=YIELD_COLS.get(proc),
|
|
135
|
+
)
|
|
136
|
+
elif proc in ("ihs", "nsl"):
|
|
137
|
+
cypher = build_cypher(
|
|
138
|
+
f"graphpop.{proc}",
|
|
139
|
+
[f"'{chrom}'", f"'{pop}'"],
|
|
140
|
+
options={"min_af": 0.05},
|
|
141
|
+
yield_cols=["variantId", "pos", "af",
|
|
142
|
+
f"{proc}_unstd", proc],
|
|
143
|
+
)
|
|
144
|
+
elif proc == "roh":
|
|
145
|
+
cypher = build_cypher(
|
|
146
|
+
f"graphpop.{proc}",
|
|
147
|
+
[f"'{chrom}'", f"'{pop}'"],
|
|
148
|
+
yield_cols=YIELD_COLS["roh"],
|
|
149
|
+
)
|
|
150
|
+
elif proc == "garud_h":
|
|
151
|
+
cypher = build_cypher(
|
|
152
|
+
f"graphpop.{proc}",
|
|
153
|
+
[f"'{chrom}'", f"'{pop}'", "100000", "50000"],
|
|
154
|
+
yield_cols=YIELD_COLS["garud_h"],
|
|
155
|
+
)
|
|
156
|
+
else:
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
records = ctx.run(cypher)
|
|
160
|
+
results[key] = {
|
|
161
|
+
"population": pop, "chr": chrom, "procedure": proc,
|
|
162
|
+
"n_records": len(records),
|
|
163
|
+
"summary": records[0] if len(records) == 1 else f"{len(records)} rows",
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
# Write per-procedure TSV
|
|
167
|
+
tsv_dir = out_dir / proc
|
|
168
|
+
tsv_dir.mkdir(exist_ok=True)
|
|
169
|
+
tsv_path = tsv_dir / f"{pop}_{chrom}.tsv"
|
|
170
|
+
if records:
|
|
171
|
+
_write_tsv(tsv_path, records)
|
|
172
|
+
|
|
173
|
+
except Exception as e:
|
|
174
|
+
results[key] = {"error": str(e)}
|
|
175
|
+
click.echo(f" ERROR {key}: {e}", err=True)
|
|
176
|
+
|
|
177
|
+
done += 1
|
|
178
|
+
elapsed = time.time() - t0
|
|
179
|
+
rate = done / elapsed if elapsed > 0 else 0
|
|
180
|
+
eta = (total - done) / rate if rate > 0 else 0
|
|
181
|
+
if done % 10 == 0:
|
|
182
|
+
click.echo(
|
|
183
|
+
f" [{done}/{total}] {key} "
|
|
184
|
+
f"({elapsed:.0f}s elapsed, ~{eta:.0f}s remaining)"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Save checkpoint after each chromosome
|
|
188
|
+
_save_json(json_path, results)
|
|
189
|
+
|
|
190
|
+
# Phase 2: Pairwise
|
|
191
|
+
if phase in ("2", "all"):
|
|
192
|
+
pairs = get_xpehh_pairs(populations, xpehh_pairs)
|
|
193
|
+
click.echo(f"\n=== Phase 2: Pairwise ({len(pairs)} pairs × {len(chromosomes)} chrs) ===")
|
|
194
|
+
|
|
195
|
+
for pop1, pop2 in pairs:
|
|
196
|
+
for chrom in chromosomes:
|
|
197
|
+
for proc in PHASE2_PROCEDURES:
|
|
198
|
+
key = f"{pop1}_vs_{pop2}_{chrom}_{proc}"
|
|
199
|
+
if resume and key in results:
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
if proc == "xpehh":
|
|
204
|
+
cypher = build_cypher(
|
|
205
|
+
"graphpop.xpehh",
|
|
206
|
+
[f"'{chrom}'", f"'{pop1}'", f"'{pop2}'"],
|
|
207
|
+
options={"min_af": 0.05},
|
|
208
|
+
yield_cols=YIELD_COLS["xpehh"],
|
|
209
|
+
)
|
|
210
|
+
elif proc == "divergence":
|
|
211
|
+
length = chr_lens.get(chrom, 300_000_000)
|
|
212
|
+
cypher = build_cypher(
|
|
213
|
+
"graphpop.divergence",
|
|
214
|
+
[f"'{chrom}'", "1", str(length),
|
|
215
|
+
f"'{pop1}'", f"'{pop2}'"],
|
|
216
|
+
yield_cols=YIELD_COLS["divergence"],
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
records = ctx.run(cypher)
|
|
222
|
+
results[key] = {
|
|
223
|
+
"pop1": pop1, "pop2": pop2, "chr": chrom,
|
|
224
|
+
"procedure": proc, "n_records": len(records),
|
|
225
|
+
"summary": records[0] if len(records) == 1 else f"{len(records)} rows",
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
tsv_dir = out_dir / proc
|
|
229
|
+
tsv_dir.mkdir(exist_ok=True)
|
|
230
|
+
tsv_path = tsv_dir / f"{pop1}_vs_{pop2}_{chrom}.tsv"
|
|
231
|
+
if records:
|
|
232
|
+
_write_tsv(tsv_path, records)
|
|
233
|
+
|
|
234
|
+
except Exception as e:
|
|
235
|
+
results[key] = {"error": str(e)}
|
|
236
|
+
click.echo(f" ERROR {key}: {e}", err=True)
|
|
237
|
+
|
|
238
|
+
_save_json(json_path, results)
|
|
239
|
+
|
|
240
|
+
# Final save
|
|
241
|
+
_save_json(json_path, results)
|
|
242
|
+
n_ok = sum(1 for v in results.values() if "error" not in v)
|
|
243
|
+
n_err = sum(1 for v in results.values() if "error" in v)
|
|
244
|
+
click.echo(f"\nDone. {n_ok} succeeded, {n_err} failed.")
|
|
245
|
+
click.echo(f"Results: {json_path}")
|
|
246
|
+
click.echo(f"TSV files: {out_dir}/")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _write_tsv(path: Path, records: list[dict]):
|
|
250
|
+
"""Write records to a TSV file."""
|
|
251
|
+
if not records:
|
|
252
|
+
return
|
|
253
|
+
keys = list(records[0].keys())
|
|
254
|
+
with open(path, "w") as f:
|
|
255
|
+
f.write("\t".join(keys) + "\n")
|
|
256
|
+
for rec in records:
|
|
257
|
+
vals = []
|
|
258
|
+
for k in keys:
|
|
259
|
+
v = rec[k]
|
|
260
|
+
if isinstance(v, float):
|
|
261
|
+
vals.append(f"{v:.6g}")
|
|
262
|
+
elif isinstance(v, list):
|
|
263
|
+
vals.append(",".join(str(x) for x in v))
|
|
264
|
+
elif v is None:
|
|
265
|
+
vals.append("NA")
|
|
266
|
+
else:
|
|
267
|
+
vals.append(str(v))
|
|
268
|
+
f.write("\t".join(vals) + "\n")
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _save_json(path: Path, data: dict):
|
|
272
|
+
"""Save results as JSON with atomic write."""
|
|
273
|
+
tmp = path.with_suffix(".tmp")
|
|
274
|
+
with open(tmp, "w") as f:
|
|
275
|
+
json.dump(data, f, indent=2, default=str)
|
|
276
|
+
tmp.rename(path)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""graphpop start/stop/status — Neo4j server lifecycle management."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import subprocess
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _get_neo4j_home() -> Path:
|
|
12
|
+
"""Get Neo4j home from config or default."""
|
|
13
|
+
config_path = Path.home() / ".graphpop" / "config.yaml"
|
|
14
|
+
if config_path.exists():
|
|
15
|
+
with open(config_path) as f:
|
|
16
|
+
cfg = yaml.safe_load(f) or {}
|
|
17
|
+
if "neo4j_home" in cfg:
|
|
18
|
+
return Path(cfg["neo4j_home"])
|
|
19
|
+
# Fallbacks
|
|
20
|
+
for candidate in [Path.home() / "neo4j", Path("/var/lib/neo4j")]:
|
|
21
|
+
if (candidate / "bin" / "neo4j").exists():
|
|
22
|
+
return candidate
|
|
23
|
+
return Path.home() / "neo4j"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _run_neo4j_cmd(command: str, neo4j_home: Path | None = None) -> tuple[int, str]:
|
|
27
|
+
"""Run a neo4j command and return (returncode, output)."""
|
|
28
|
+
home = neo4j_home or _get_neo4j_home()
|
|
29
|
+
neo4j_bin = home / "bin" / "neo4j"
|
|
30
|
+
if not neo4j_bin.exists():
|
|
31
|
+
return 1, f"Neo4j not found at {home}. Run 'graphpop setup' first."
|
|
32
|
+
result = subprocess.run(
|
|
33
|
+
[str(neo4j_bin), command],
|
|
34
|
+
capture_output=True, text=True,
|
|
35
|
+
)
|
|
36
|
+
output = (result.stdout + result.stderr).strip()
|
|
37
|
+
return result.returncode, output
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@click.command()
|
|
41
|
+
@click.option("--neo4j-home", type=click.Path(), help="Neo4j installation directory")
|
|
42
|
+
def start(neo4j_home):
|
|
43
|
+
"""Start the Neo4j database server."""
|
|
44
|
+
home = Path(neo4j_home) if neo4j_home else None
|
|
45
|
+
click.echo("Starting Neo4j...")
|
|
46
|
+
rc, output = _run_neo4j_cmd("start", home)
|
|
47
|
+
click.echo(output)
|
|
48
|
+
if rc == 0:
|
|
49
|
+
click.echo("\nNeo4j started. Use 'graphpop status' to verify.")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@click.command()
|
|
53
|
+
@click.option("--neo4j-home", type=click.Path(), help="Neo4j installation directory")
|
|
54
|
+
def stop(neo4j_home):
|
|
55
|
+
"""Stop the Neo4j database server."""
|
|
56
|
+
home = Path(neo4j_home) if neo4j_home else None
|
|
57
|
+
click.echo("Stopping Neo4j...")
|
|
58
|
+
rc, output = _run_neo4j_cmd("stop", home)
|
|
59
|
+
click.echo(output)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@click.command()
|
|
63
|
+
@click.option("--neo4j-home", type=click.Path(), help="Neo4j installation directory")
|
|
64
|
+
def status(neo4j_home):
|
|
65
|
+
"""Check whether Neo4j is running and show database info."""
|
|
66
|
+
home = Path(neo4j_home) if neo4j_home else _get_neo4j_home()
|
|
67
|
+
|
|
68
|
+
# Check Neo4j process
|
|
69
|
+
rc, output = _run_neo4j_cmd("status", home)
|
|
70
|
+
click.echo(output)
|
|
71
|
+
|
|
72
|
+
# Show version
|
|
73
|
+
neo4j_bin = home / "bin" / "neo4j"
|
|
74
|
+
if neo4j_bin.exists():
|
|
75
|
+
result = subprocess.run([str(neo4j_bin), "version"],
|
|
76
|
+
capture_output=True, text=True)
|
|
77
|
+
if result.returncode == 0:
|
|
78
|
+
click.echo(f"Version: {result.stdout.strip()}")
|
|
79
|
+
|
|
80
|
+
# Show config
|
|
81
|
+
config_path = Path.home() / ".graphpop" / "config.yaml"
|
|
82
|
+
if config_path.exists():
|
|
83
|
+
with open(config_path) as f:
|
|
84
|
+
cfg = yaml.safe_load(f) or {}
|
|
85
|
+
click.echo(f"\nGraphPop config ({config_path}):")
|
|
86
|
+
click.echo(f" URI: {cfg.get('uri', 'not set')}")
|
|
87
|
+
click.echo(f" Database: {cfg.get('database', 'not set')}")
|
|
88
|
+
click.echo(f" Neo4j: {cfg.get('neo4j_home', 'not set')}")
|
|
89
|
+
|
|
90
|
+
# Show plugin status
|
|
91
|
+
plugins_dir = home / "plugins"
|
|
92
|
+
jar_files = list(plugins_dir.glob("graphpop*.jar")) if plugins_dir.exists() else []
|
|
93
|
+
if jar_files:
|
|
94
|
+
click.echo(f"\nGraphPop plugin: {jar_files[0].name}")
|
|
95
|
+
else:
|
|
96
|
+
click.echo("\nGraphPop plugin: NOT INSTALLED")
|
|
97
|
+
click.echo(" Build with: cd graphpop-procedures && mvn package")
|
|
98
|
+
click.echo(" Deploy with: graphpop setup --deploy-plugin target/graphpop-procedures-*.jar")
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
"""graphpop setup — download, configure, and initialize Neo4j for GraphPop."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import platform
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
import tarfile
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
DEFAULT_NEO4J_HOME = Path.home() / "neo4j"
|
|
16
|
+
NEO4J_VERSION = "5.26.0"
|
|
17
|
+
NEO4J_DOWNLOAD_URL = (
|
|
18
|
+
f"https://dist.neo4j.org/neo4j-community-{NEO4J_VERSION}-unix.tar.gz"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# GraphPop procedures plugin — auto-downloaded from GitHub Releases
|
|
22
|
+
GRAPHPOP_PROCEDURES_VERSION = "0.1.0"
|
|
23
|
+
GRAPHPOP_JAR_NAME = f"graphpop-procedures-{GRAPHPOP_PROCEDURES_VERSION}.jar"
|
|
24
|
+
GRAPHPOP_JAR_URL = (
|
|
25
|
+
f"https://github.com/jfmao/GraphPop/releases/download/"
|
|
26
|
+
f"v{GRAPHPOP_PROCEDURES_VERSION}/{GRAPHPOP_JAR_NAME}"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@click.command()
|
|
31
|
+
@click.option("--neo4j-home", type=click.Path(), default=str(DEFAULT_NEO4J_HOME),
|
|
32
|
+
help=f"Neo4j installation directory (default: {DEFAULT_NEO4J_HOME})")
|
|
33
|
+
@click.option("--pagecache", default="16g",
|
|
34
|
+
help="Neo4j page cache size (default: 16g)")
|
|
35
|
+
@click.option("--heap", default="4g",
|
|
36
|
+
help="Neo4j JVM heap size (default: 4g)")
|
|
37
|
+
@click.option("--password", prompt=True, hide_input=True,
|
|
38
|
+
confirmation_prompt=True,
|
|
39
|
+
help="Neo4j password for the 'neo4j' user")
|
|
40
|
+
@click.option("--skip-download", is_flag=True,
|
|
41
|
+
help="Skip downloading Neo4j (use existing installation)")
|
|
42
|
+
@click.option("--deploy-plugin", type=click.Path(exists=True), default=None,
|
|
43
|
+
help="Path to a local graphpop-procedures.jar (skips auto-download)")
|
|
44
|
+
@click.option("--skip-plugin", is_flag=True,
|
|
45
|
+
help="Skip deploying the GraphPop procedures plugin")
|
|
46
|
+
def setup(neo4j_home, pagecache, heap, password, skip_download, deploy_plugin,
|
|
47
|
+
skip_plugin):
|
|
48
|
+
"""Set up Neo4j for GraphPop.
|
|
49
|
+
|
|
50
|
+
Downloads Neo4j Community Edition, automatically downloads and deploys
|
|
51
|
+
the pre-compiled GraphPop procedures plugin, configures memory settings,
|
|
52
|
+
sets the initial password, and creates the GraphPop config file.
|
|
53
|
+
|
|
54
|
+
No Java or Maven installation is required — the plugin is downloaded as
|
|
55
|
+
a pre-compiled JAR from GitHub Releases.
|
|
56
|
+
|
|
57
|
+
\b
|
|
58
|
+
Examples:
|
|
59
|
+
graphpop setup --password mypass
|
|
60
|
+
graphpop setup --neo4j-home /opt/neo4j --pagecache 20g --heap 8g
|
|
61
|
+
graphpop setup --deploy-plugin path/to/local/graphpop-procedures.jar
|
|
62
|
+
graphpop setup --skip-plugin --password mypass
|
|
63
|
+
"""
|
|
64
|
+
neo4j_path = Path(neo4j_home)
|
|
65
|
+
|
|
66
|
+
# Step 0: Check Java runtime
|
|
67
|
+
_check_java()
|
|
68
|
+
|
|
69
|
+
# Step 1: Download Neo4j
|
|
70
|
+
if not skip_download:
|
|
71
|
+
if neo4j_path.exists() and (neo4j_path / "bin" / "neo4j").exists():
|
|
72
|
+
click.echo(f"Neo4j already installed at {neo4j_path}")
|
|
73
|
+
if not click.confirm("Re-install?"):
|
|
74
|
+
skip_download = True
|
|
75
|
+
|
|
76
|
+
if not skip_download:
|
|
77
|
+
_download_neo4j(neo4j_path)
|
|
78
|
+
|
|
79
|
+
# Verify installation
|
|
80
|
+
neo4j_bin = neo4j_path / "bin" / "neo4j"
|
|
81
|
+
if not neo4j_bin.exists():
|
|
82
|
+
click.echo(f"Error: Neo4j not found at {neo4j_path}", err=True)
|
|
83
|
+
click.echo("Use --neo4j-home to specify the installation directory.", err=True)
|
|
84
|
+
raise SystemExit(1)
|
|
85
|
+
|
|
86
|
+
# Step 2: Configure Neo4j
|
|
87
|
+
click.echo("\nConfiguring Neo4j...")
|
|
88
|
+
_configure_neo4j(neo4j_path, pagecache, heap)
|
|
89
|
+
|
|
90
|
+
# Step 3: Set initial password
|
|
91
|
+
click.echo("Setting Neo4j password...")
|
|
92
|
+
_set_password(neo4j_path, password)
|
|
93
|
+
|
|
94
|
+
# Step 4: Deploy GraphPop plugin
|
|
95
|
+
# Priority: user-provided JAR > conda-bundled JAR > GitHub download
|
|
96
|
+
plugin_dest = neo4j_path / "plugins" / "graphpop-procedures.jar"
|
|
97
|
+
if deploy_plugin:
|
|
98
|
+
# Use user-provided local JAR
|
|
99
|
+
click.echo(f"Deploying GraphPop plugin from {deploy_plugin}...")
|
|
100
|
+
shutil.copy2(deploy_plugin, plugin_dest)
|
|
101
|
+
click.echo(f" Deployed to {plugin_dest}")
|
|
102
|
+
elif not skip_plugin:
|
|
103
|
+
# Check for conda-bundled JAR first
|
|
104
|
+
conda_jar = _find_conda_jar()
|
|
105
|
+
if conda_jar:
|
|
106
|
+
click.echo(f"Deploying conda-bundled GraphPop plugin...")
|
|
107
|
+
shutil.copy2(conda_jar, plugin_dest)
|
|
108
|
+
click.echo(f" Deployed to {plugin_dest}")
|
|
109
|
+
else:
|
|
110
|
+
# Auto-download pre-compiled JAR from GitHub Releases
|
|
111
|
+
click.echo(f"Downloading GraphPop procedures plugin v{GRAPHPOP_PROCEDURES_VERSION}...")
|
|
112
|
+
_download_plugin(plugin_dest)
|
|
113
|
+
click.echo(f" Deployed to {plugin_dest}")
|
|
114
|
+
|
|
115
|
+
# Step 5: Create GraphPop config
|
|
116
|
+
config_dir = Path.home() / ".graphpop"
|
|
117
|
+
config_dir.mkdir(exist_ok=True)
|
|
118
|
+
config_path = config_dir / "config.yaml"
|
|
119
|
+
|
|
120
|
+
config = {
|
|
121
|
+
"uri": "bolt://localhost:7687",
|
|
122
|
+
"user": "neo4j",
|
|
123
|
+
"password": password,
|
|
124
|
+
"database": "neo4j",
|
|
125
|
+
"neo4j_home": str(neo4j_path),
|
|
126
|
+
}
|
|
127
|
+
with open(config_path, "w") as f:
|
|
128
|
+
yaml.dump(config, f, default_flow_style=False)
|
|
129
|
+
click.echo(f"\nGraphPop config written to {config_path}")
|
|
130
|
+
|
|
131
|
+
# Step 6: Summary
|
|
132
|
+
click.echo(f"""
|
|
133
|
+
Setup complete!
|
|
134
|
+
|
|
135
|
+
Neo4j home: {neo4j_path}
|
|
136
|
+
Page cache: {pagecache}
|
|
137
|
+
Heap: {heap}
|
|
138
|
+
Config: {config_path}
|
|
139
|
+
Plugin: {'deployed' if (deploy_plugin or not skip_plugin) else 'not deployed (use --deploy-plugin or remove --skip-plugin)'}
|
|
140
|
+
|
|
141
|
+
Next steps:
|
|
142
|
+
graphpop start # Start Neo4j
|
|
143
|
+
graphpop import --vcf data.vcf.gz \\
|
|
144
|
+
--panel panel.txt --database mydb # Import data
|
|
145
|
+
graphpop diversity chr1 1 50000000 POP # Run analysis
|
|
146
|
+
""")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _download_neo4j(dest: Path):
|
|
150
|
+
"""Download and extract Neo4j Community Edition."""
|
|
151
|
+
import urllib.request
|
|
152
|
+
|
|
153
|
+
tarball = Path(f"/tmp/neo4j-community-{NEO4J_VERSION}-unix.tar.gz")
|
|
154
|
+
if tarball.exists():
|
|
155
|
+
click.echo(f"Using cached download: {tarball}")
|
|
156
|
+
else:
|
|
157
|
+
click.echo(f"Downloading Neo4j {NEO4J_VERSION}...")
|
|
158
|
+
click.echo(f" URL: {NEO4J_DOWNLOAD_URL}")
|
|
159
|
+
urllib.request.urlretrieve(NEO4J_DOWNLOAD_URL, tarball)
|
|
160
|
+
click.echo(f" Downloaded to {tarball}")
|
|
161
|
+
|
|
162
|
+
click.echo(f"Extracting to {dest}...")
|
|
163
|
+
if dest.exists():
|
|
164
|
+
shutil.rmtree(dest)
|
|
165
|
+
|
|
166
|
+
with tarfile.open(tarball) as tf:
|
|
167
|
+
tf.extractall(dest.parent)
|
|
168
|
+
|
|
169
|
+
# The tarball extracts to neo4j-community-X.Y.Z/
|
|
170
|
+
extracted = dest.parent / f"neo4j-community-{NEO4J_VERSION}"
|
|
171
|
+
if extracted.exists() and extracted != dest:
|
|
172
|
+
extracted.rename(dest)
|
|
173
|
+
click.echo(f" Installed to {dest}")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _configure_neo4j(neo4j_home: Path, pagecache: str, heap: str):
|
|
177
|
+
"""Configure Neo4j memory and settings."""
|
|
178
|
+
conf_path = neo4j_home / "conf" / "neo4j.conf"
|
|
179
|
+
|
|
180
|
+
# Read existing config
|
|
181
|
+
lines = conf_path.read_text().splitlines() if conf_path.exists() else []
|
|
182
|
+
|
|
183
|
+
# Settings to apply
|
|
184
|
+
settings = {
|
|
185
|
+
"server.memory.pagecache.size": pagecache,
|
|
186
|
+
"server.memory.heap.initial_size": heap,
|
|
187
|
+
"server.memory.heap.max_size": heap,
|
|
188
|
+
"server.directories.import": "import",
|
|
189
|
+
"db.tx_log.rotation.retention_policy": "2 days 2G",
|
|
190
|
+
"dbms.security.procedures.unrestricted": "graphpop.*",
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
# Update or append settings
|
|
194
|
+
updated_keys = set()
|
|
195
|
+
new_lines = []
|
|
196
|
+
for line in lines:
|
|
197
|
+
key = line.split("=")[0].strip() if "=" in line and not line.startswith("#") else None
|
|
198
|
+
if key and key in settings:
|
|
199
|
+
new_lines.append(f"{key}={settings[key]}")
|
|
200
|
+
updated_keys.add(key)
|
|
201
|
+
else:
|
|
202
|
+
new_lines.append(line)
|
|
203
|
+
|
|
204
|
+
# Append settings not yet in config
|
|
205
|
+
for key, value in settings.items():
|
|
206
|
+
if key not in updated_keys:
|
|
207
|
+
new_lines.append(f"{key}={value}")
|
|
208
|
+
|
|
209
|
+
conf_path.write_text("\n".join(new_lines) + "\n")
|
|
210
|
+
for k, v in settings.items():
|
|
211
|
+
click.echo(f" {k}={v}")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _check_java():
|
|
215
|
+
"""Verify that Java 21+ is available for Neo4j runtime."""
|
|
216
|
+
try:
|
|
217
|
+
result = subprocess.run(
|
|
218
|
+
["java", "-version"], capture_output=True, text=True,
|
|
219
|
+
)
|
|
220
|
+
output = result.stderr + result.stdout # java -version prints to stderr
|
|
221
|
+
click.echo(f" Java found: {output.splitlines()[0].strip()}")
|
|
222
|
+
# Check version >= 21
|
|
223
|
+
import re
|
|
224
|
+
m = re.search(r'"(\d+)', output)
|
|
225
|
+
if m and int(m.group(1)) < 21:
|
|
226
|
+
click.echo(
|
|
227
|
+
" Warning: Java 21+ is required by Neo4j. "
|
|
228
|
+
"Found version {m.group(1)}.\n"
|
|
229
|
+
" Install via: conda install -c conda-forge openjdk=21\n"
|
|
230
|
+
" Or: sudo apt install openjdk-21-jre-headless",
|
|
231
|
+
err=True,
|
|
232
|
+
)
|
|
233
|
+
except FileNotFoundError:
|
|
234
|
+
click.echo(
|
|
235
|
+
"Error: Java not found. Neo4j requires Java 21+ to run.\n"
|
|
236
|
+
"Install via:\n"
|
|
237
|
+
" conda install -c conda-forge openjdk=21\n"
|
|
238
|
+
" Or: sudo apt install openjdk-21-jre-headless",
|
|
239
|
+
err=True,
|
|
240
|
+
)
|
|
241
|
+
raise SystemExit(1)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _find_conda_jar() -> Path | None:
|
|
245
|
+
"""Look for a GraphPop JAR bundled by conda in the environment prefix."""
|
|
246
|
+
import sys
|
|
247
|
+
conda_prefix = Path(sys.prefix)
|
|
248
|
+
candidates = [
|
|
249
|
+
conda_prefix / "share" / "graphpop" / "plugins" / "graphpop-procedures.jar",
|
|
250
|
+
conda_prefix / "lib" / "graphpop" / "graphpop-procedures.jar",
|
|
251
|
+
]
|
|
252
|
+
for p in candidates:
|
|
253
|
+
if p.exists():
|
|
254
|
+
return p
|
|
255
|
+
return None
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _download_plugin(dest: Path):
|
|
259
|
+
"""Download the pre-compiled GraphPop procedures JAR from GitHub Releases."""
|
|
260
|
+
import urllib.request
|
|
261
|
+
|
|
262
|
+
cache = Path(f"/tmp/{GRAPHPOP_JAR_NAME}")
|
|
263
|
+
if cache.exists():
|
|
264
|
+
click.echo(f" Using cached plugin: {cache}")
|
|
265
|
+
else:
|
|
266
|
+
click.echo(f" URL: {GRAPHPOP_JAR_URL}")
|
|
267
|
+
try:
|
|
268
|
+
urllib.request.urlretrieve(GRAPHPOP_JAR_URL, cache)
|
|
269
|
+
except Exception as e:
|
|
270
|
+
click.echo(f" Error downloading plugin: {e}", err=True)
|
|
271
|
+
click.echo(
|
|
272
|
+
" You can build locally instead:\n"
|
|
273
|
+
" cd graphpop-procedures && ./mvnw package -DskipTests\n"
|
|
274
|
+
" graphpop setup --deploy-plugin target/graphpop-procedures-0.1.0-SNAPSHOT.jar",
|
|
275
|
+
err=True,
|
|
276
|
+
)
|
|
277
|
+
raise SystemExit(1)
|
|
278
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
279
|
+
shutil.copy2(cache, dest)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _set_password(neo4j_home: Path, password: str):
|
|
283
|
+
"""Set the initial Neo4j password."""
|
|
284
|
+
admin_bin = neo4j_home / "bin" / "neo4j-admin"
|
|
285
|
+
try:
|
|
286
|
+
result = subprocess.run(
|
|
287
|
+
[str(admin_bin), "dbms", "set-initial-password", password],
|
|
288
|
+
capture_output=True, text=True,
|
|
289
|
+
)
|
|
290
|
+
if result.returncode == 0:
|
|
291
|
+
click.echo(" Password set successfully")
|
|
292
|
+
else:
|
|
293
|
+
# May already be set
|
|
294
|
+
if "already" in result.stderr.lower() or "already" in result.stdout.lower():
|
|
295
|
+
click.echo(" Password already set (use Neo4j browser to change)")
|
|
296
|
+
else:
|
|
297
|
+
click.echo(f" Warning: {result.stderr.strip()}")
|
|
298
|
+
except FileNotFoundError:
|
|
299
|
+
click.echo(" Warning: neo4j-admin not found, skipping password setup")
|