graphpop-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. graphpop_cli/__init__.py +2 -0
  2. graphpop_cli/cli.py +161 -0
  3. graphpop_cli/commands/__init__.py +1 -0
  4. graphpop_cli/commands/aggregate.py +206 -0
  5. graphpop_cli/commands/batch.py +155 -0
  6. graphpop_cli/commands/compare.py +118 -0
  7. graphpop_cli/commands/config_cmd.py +117 -0
  8. graphpop_cli/commands/converge.py +156 -0
  9. graphpop_cli/commands/db.py +188 -0
  10. graphpop_cli/commands/divergence.py +37 -0
  11. graphpop_cli/commands/diversity.py +36 -0
  12. graphpop_cli/commands/dump.py +210 -0
  13. graphpop_cli/commands/export_bed.py +170 -0
  14. graphpop_cli/commands/export_windows.py +91 -0
  15. graphpop_cli/commands/extract.py +271 -0
  16. graphpop_cli/commands/filter_results.py +165 -0
  17. graphpop_cli/commands/garud_h.py +30 -0
  18. graphpop_cli/commands/genome_scan.py +41 -0
  19. graphpop_cli/commands/ihs.py +29 -0
  20. graphpop_cli/commands/import_data.py +266 -0
  21. graphpop_cli/commands/inventory.py +160 -0
  22. graphpop_cli/commands/joint_sfs.py +38 -0
  23. graphpop_cli/commands/ld.py +35 -0
  24. graphpop_cli/commands/lookup.py +207 -0
  25. graphpop_cli/commands/neighbors.py +175 -0
  26. graphpop_cli/commands/nsl.py +29 -0
  27. graphpop_cli/commands/plot.py +1066 -0
  28. graphpop_cli/commands/pop_summary.py +30 -0
  29. graphpop_cli/commands/query.py +15 -0
  30. graphpop_cli/commands/rank_genes.py +177 -0
  31. graphpop_cli/commands/report.py +264 -0
  32. graphpop_cli/commands/roh.py +30 -0
  33. graphpop_cli/commands/run_all.py +276 -0
  34. graphpop_cli/commands/server.py +98 -0
  35. graphpop_cli/commands/setup.py +299 -0
  36. graphpop_cli/commands/sfs.py +38 -0
  37. graphpop_cli/commands/validate.py +167 -0
  38. graphpop_cli/commands/xpehh.py +31 -0
  39. graphpop_cli/config.py +57 -0
  40. graphpop_cli/connection.py +52 -0
  41. graphpop_cli/formatters.py +81 -0
  42. graphpop_cli-0.1.0.dist-info/METADATA +73 -0
  43. graphpop_cli-0.1.0.dist-info/RECORD +46 -0
  44. graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
  45. graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,276 @@
1
+ """graphpop run-all — orchestrate full-genome analysis across populations and chromosomes."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import time
6
+ from pathlib import Path
7
+
8
+ import click
9
+
10
+ from ..cli import pass_ctx
11
+ from ..config import build_cypher
12
+
13
+
14
+ # Default procedures for each phase
15
+ PHASE1_PROCEDURES = [
16
+ "diversity", "sfs", "pop_summary", "roh", "ihs", "nsl", "garud_h",
17
+ ]
18
+ PHASE2_PROCEDURES = ["xpehh", "divergence"]
19
+
20
+ YIELD_COLS = {
21
+ "diversity": ["pi", "theta_w", "tajima_d", "fay_wu_h", "fay_wu_h_norm",
22
+ "het_exp", "het_obs", "fis", "n_variants", "n_segregating"],
23
+ "sfs": ["sfs", "n_variants", "max_ac"],
24
+ "pop_summary": ["pi", "theta_w", "tajima_d", "n_variants", "n_segregating"],
25
+ "roh": ["sampleId", "n_roh", "total_length", "froh", "mean_length", "max_length"],
26
+ "ihs": ["variantId", "pos", "af", "ihs_unstd", "ihs"],
27
+ "nsl": ["variantId", "pos", "af", "nsl_unstd", "nsl"],
28
+ "garud_h": ["chr", "start", "end", "population", "h1", "h12", "h2_h1",
29
+ "hap_diversity", "n_haplotypes", "n_variants"],
30
+ "xpehh": ["variantId", "pos", "af_pop1", "af_pop2", "xpehh_unstd", "xpehh"],
31
+ "divergence": ["fst_hudson", "fst_wc", "dxy", "da", "pbs", "n_variants"],
32
+ }
33
+
34
+
35
+ def get_chromosome_lengths(ctx) -> dict[str, int]:
36
+ """Query chromosome lengths from the graph."""
37
+ cypher = "MATCH (c:Chromosome) RETURN c.chromosomeId AS chr, c.length AS length ORDER BY c.chromosomeId"
38
+ records = ctx.run(cypher)
39
+ return {r["chr"]: r["length"] for r in records}
40
+
41
+
42
+ def get_populations(ctx) -> list[str]:
43
+ """Query population IDs from the graph."""
44
+ cypher = "MATCH (p:Population) WHERE p.n_samples > 1 RETURN p.populationId AS pop ORDER BY p.n_samples DESC"
45
+ records = ctx.run(cypher)
46
+ return [r["pop"] for r in records]
47
+
48
+
49
+ def get_xpehh_pairs(populations: list[str], max_pairs: int = 20) -> list[tuple[str, str]]:
50
+ """Generate representative XP-EHH population pairs."""
51
+ pairs = []
52
+ for i, p1 in enumerate(populations):
53
+ for p2 in populations[i + 1:]:
54
+ pairs.append((p1, p2))
55
+ if len(pairs) >= max_pairs:
56
+ return pairs
57
+ return pairs
58
+
59
+
60
+ @click.command("run-all")
61
+ @click.option("--phase", type=click.Choice(["1", "2", "all"]), default="all",
62
+ help="Phase 1 (per-pop), Phase 2 (pairwise), or all")
63
+ @click.option("--output-dir", "-d", type=click.Path(), default="graphpop_results",
64
+ help="Output directory for result files")
65
+ @click.option("--resume/--no-resume", default=True,
66
+ help="Skip already-completed tasks (default: resume)")
67
+ @click.option("--json-output", type=click.Path(),
68
+ help="Accumulated JSON results file (default: <output-dir>/results.json)")
69
+ @click.option("--persist/--no-persist", default=True,
70
+ help="Write results to graph nodes (default: yes)")
71
+ @click.option("--populations", "pop_list",
72
+ help="Comma-separated population list (default: auto-detect)")
73
+ @click.option("--chromosomes", "chr_list",
74
+ help="Comma-separated chromosome list (default: auto-detect)")
75
+ @click.option("--xpehh-pairs", type=int, default=20,
76
+ help="Max number of XP-EHH population pairs")
77
+ @click.option("--workers", type=int, default=1,
78
+ help="Parallel workers (experimental)")
79
+ @pass_ctx
80
+ def run_all(ctx, phase, output_dir, resume, json_output, persist,
81
+ pop_list, chr_list, xpehh_pairs, workers):
82
+ """Run full-genome analysis across all populations and chromosomes.
83
+
84
+ Phase 1: Per-population statistics (diversity, SFS, iHS, nSL, ROH, Garud's H)
85
+ for each population × chromosome combination.
86
+
87
+ Phase 2: Pairwise statistics (XP-EHH, divergence) for representative
88
+ population pairs × chromosomes.
89
+
90
+ Results are saved as TSV files in the output directory and optionally
91
+ persisted to graph nodes.
92
+ """
93
+ out_dir = Path(output_dir)
94
+ out_dir.mkdir(parents=True, exist_ok=True)
95
+ json_path = Path(json_output) if json_output else out_dir / "results.json"
96
+
97
+ # Load or initialize results
98
+ results = {}
99
+ if resume and json_path.exists():
100
+ with open(json_path) as f:
101
+ results = json.load(f)
102
+ click.echo(f"Resuming from {json_path} ({len(results)} entries)")
103
+
104
+ # Auto-detect populations and chromosomes
105
+ click.echo("Querying graph for populations and chromosomes...")
106
+ chr_lens = get_chromosome_lengths(ctx)
107
+ chromosomes = sorted(chr_lens.keys()) if not chr_list else chr_list.split(",")
108
+ populations = get_populations(ctx) if not pop_list else pop_list.split(",")
109
+
110
+ click.echo(f"Populations: {len(populations)} ({', '.join(populations[:5])}...)")
111
+ click.echo(f"Chromosomes: {len(chromosomes)} ({', '.join(chromosomes[:3])}...)")
112
+
113
+ # Phase 1: Per-population
114
+ if phase in ("1", "all"):
115
+ click.echo(f"\n=== Phase 1: Per-population ({len(populations)} pops × {len(chromosomes)} chrs) ===")
116
+ total = len(populations) * len(chromosomes) * len(PHASE1_PROCEDURES)
117
+ done = 0
118
+ t0 = time.time()
119
+
120
+ for pop in populations:
121
+ for chrom in chromosomes:
122
+ for proc in PHASE1_PROCEDURES:
123
+ key = f"{pop}_{chrom}_{proc}"
124
+ if resume and key in results:
125
+ done += 1
126
+ continue
127
+
128
+ try:
129
+ if proc in ("diversity", "sfs", "pop_summary"):
130
+ length = chr_lens.get(chrom, 300_000_000)
131
+ cypher = build_cypher(
132
+ f"graphpop.{proc}",
133
+ [f"'{chrom}'", "1", str(length), f"'{pop}'"],
134
+ yield_cols=YIELD_COLS.get(proc),
135
+ )
136
+ elif proc in ("ihs", "nsl"):
137
+ cypher = build_cypher(
138
+ f"graphpop.{proc}",
139
+ [f"'{chrom}'", f"'{pop}'"],
140
+ options={"min_af": 0.05},
141
+ yield_cols=["variantId", "pos", "af",
142
+ f"{proc}_unstd", proc],
143
+ )
144
+ elif proc == "roh":
145
+ cypher = build_cypher(
146
+ f"graphpop.{proc}",
147
+ [f"'{chrom}'", f"'{pop}'"],
148
+ yield_cols=YIELD_COLS["roh"],
149
+ )
150
+ elif proc == "garud_h":
151
+ cypher = build_cypher(
152
+ f"graphpop.{proc}",
153
+ [f"'{chrom}'", f"'{pop}'", "100000", "50000"],
154
+ yield_cols=YIELD_COLS["garud_h"],
155
+ )
156
+ else:
157
+ continue
158
+
159
+ records = ctx.run(cypher)
160
+ results[key] = {
161
+ "population": pop, "chr": chrom, "procedure": proc,
162
+ "n_records": len(records),
163
+ "summary": records[0] if len(records) == 1 else f"{len(records)} rows",
164
+ }
165
+
166
+ # Write per-procedure TSV
167
+ tsv_dir = out_dir / proc
168
+ tsv_dir.mkdir(exist_ok=True)
169
+ tsv_path = tsv_dir / f"{pop}_{chrom}.tsv"
170
+ if records:
171
+ _write_tsv(tsv_path, records)
172
+
173
+ except Exception as e:
174
+ results[key] = {"error": str(e)}
175
+ click.echo(f" ERROR {key}: {e}", err=True)
176
+
177
+ done += 1
178
+ elapsed = time.time() - t0
179
+ rate = done / elapsed if elapsed > 0 else 0
180
+ eta = (total - done) / rate if rate > 0 else 0
181
+ if done % 10 == 0:
182
+ click.echo(
183
+ f" [{done}/{total}] {key} "
184
+ f"({elapsed:.0f}s elapsed, ~{eta:.0f}s remaining)"
185
+ )
186
+
187
+ # Save checkpoint after each chromosome
188
+ _save_json(json_path, results)
189
+
190
+ # Phase 2: Pairwise
191
+ if phase in ("2", "all"):
192
+ pairs = get_xpehh_pairs(populations, xpehh_pairs)
193
+ click.echo(f"\n=== Phase 2: Pairwise ({len(pairs)} pairs × {len(chromosomes)} chrs) ===")
194
+
195
+ for pop1, pop2 in pairs:
196
+ for chrom in chromosomes:
197
+ for proc in PHASE2_PROCEDURES:
198
+ key = f"{pop1}_vs_{pop2}_{chrom}_{proc}"
199
+ if resume and key in results:
200
+ continue
201
+
202
+ try:
203
+ if proc == "xpehh":
204
+ cypher = build_cypher(
205
+ "graphpop.xpehh",
206
+ [f"'{chrom}'", f"'{pop1}'", f"'{pop2}'"],
207
+ options={"min_af": 0.05},
208
+ yield_cols=YIELD_COLS["xpehh"],
209
+ )
210
+ elif proc == "divergence":
211
+ length = chr_lens.get(chrom, 300_000_000)
212
+ cypher = build_cypher(
213
+ "graphpop.divergence",
214
+ [f"'{chrom}'", "1", str(length),
215
+ f"'{pop1}'", f"'{pop2}'"],
216
+ yield_cols=YIELD_COLS["divergence"],
217
+ )
218
+ else:
219
+ continue
220
+
221
+ records = ctx.run(cypher)
222
+ results[key] = {
223
+ "pop1": pop1, "pop2": pop2, "chr": chrom,
224
+ "procedure": proc, "n_records": len(records),
225
+ "summary": records[0] if len(records) == 1 else f"{len(records)} rows",
226
+ }
227
+
228
+ tsv_dir = out_dir / proc
229
+ tsv_dir.mkdir(exist_ok=True)
230
+ tsv_path = tsv_dir / f"{pop1}_vs_{pop2}_{chrom}.tsv"
231
+ if records:
232
+ _write_tsv(tsv_path, records)
233
+
234
+ except Exception as e:
235
+ results[key] = {"error": str(e)}
236
+ click.echo(f" ERROR {key}: {e}", err=True)
237
+
238
+ _save_json(json_path, results)
239
+
240
+ # Final save
241
+ _save_json(json_path, results)
242
+ n_ok = sum(1 for v in results.values() if "error" not in v)
243
+ n_err = sum(1 for v in results.values() if "error" in v)
244
+ click.echo(f"\nDone. {n_ok} succeeded, {n_err} failed.")
245
+ click.echo(f"Results: {json_path}")
246
+ click.echo(f"TSV files: {out_dir}/")
247
+
248
+
249
+ def _write_tsv(path: Path, records: list[dict]):
250
+ """Write records to a TSV file."""
251
+ if not records:
252
+ return
253
+ keys = list(records[0].keys())
254
+ with open(path, "w") as f:
255
+ f.write("\t".join(keys) + "\n")
256
+ for rec in records:
257
+ vals = []
258
+ for k in keys:
259
+ v = rec[k]
260
+ if isinstance(v, float):
261
+ vals.append(f"{v:.6g}")
262
+ elif isinstance(v, list):
263
+ vals.append(",".join(str(x) for x in v))
264
+ elif v is None:
265
+ vals.append("NA")
266
+ else:
267
+ vals.append(str(v))
268
+ f.write("\t".join(vals) + "\n")
269
+
270
+
271
+ def _save_json(path: Path, data: dict):
272
+ """Save results as JSON with atomic write."""
273
+ tmp = path.with_suffix(".tmp")
274
+ with open(tmp, "w") as f:
275
+ json.dump(data, f, indent=2, default=str)
276
+ tmp.rename(path)
@@ -0,0 +1,98 @@
1
+ """graphpop start/stop/status — Neo4j server lifecycle management."""
2
+ from __future__ import annotations
3
+
4
+ import subprocess
5
+ from pathlib import Path
6
+
7
+ import click
8
+ import yaml
9
+
10
+
11
+ def _get_neo4j_home() -> Path:
12
+ """Get Neo4j home from config or default."""
13
+ config_path = Path.home() / ".graphpop" / "config.yaml"
14
+ if config_path.exists():
15
+ with open(config_path) as f:
16
+ cfg = yaml.safe_load(f) or {}
17
+ if "neo4j_home" in cfg:
18
+ return Path(cfg["neo4j_home"])
19
+ # Fallbacks
20
+ for candidate in [Path.home() / "neo4j", Path("/var/lib/neo4j")]:
21
+ if (candidate / "bin" / "neo4j").exists():
22
+ return candidate
23
+ return Path.home() / "neo4j"
24
+
25
+
26
+ def _run_neo4j_cmd(command: str, neo4j_home: Path | None = None) -> tuple[int, str]:
27
+ """Run a neo4j command and return (returncode, output)."""
28
+ home = neo4j_home or _get_neo4j_home()
29
+ neo4j_bin = home / "bin" / "neo4j"
30
+ if not neo4j_bin.exists():
31
+ return 1, f"Neo4j not found at {home}. Run 'graphpop setup' first."
32
+ result = subprocess.run(
33
+ [str(neo4j_bin), command],
34
+ capture_output=True, text=True,
35
+ )
36
+ output = (result.stdout + result.stderr).strip()
37
+ return result.returncode, output
38
+
39
+
40
+ @click.command()
41
+ @click.option("--neo4j-home", type=click.Path(), help="Neo4j installation directory")
42
+ def start(neo4j_home):
43
+ """Start the Neo4j database server."""
44
+ home = Path(neo4j_home) if neo4j_home else None
45
+ click.echo("Starting Neo4j...")
46
+ rc, output = _run_neo4j_cmd("start", home)
47
+ click.echo(output)
48
+ if rc == 0:
49
+ click.echo("\nNeo4j started. Use 'graphpop status' to verify.")
50
+
51
+
52
+ @click.command()
53
+ @click.option("--neo4j-home", type=click.Path(), help="Neo4j installation directory")
54
+ def stop(neo4j_home):
55
+ """Stop the Neo4j database server."""
56
+ home = Path(neo4j_home) if neo4j_home else None
57
+ click.echo("Stopping Neo4j...")
58
+ rc, output = _run_neo4j_cmd("stop", home)
59
+ click.echo(output)
60
+
61
+
62
+ @click.command()
63
+ @click.option("--neo4j-home", type=click.Path(), help="Neo4j installation directory")
64
+ def status(neo4j_home):
65
+ """Check whether Neo4j is running and show database info."""
66
+ home = Path(neo4j_home) if neo4j_home else _get_neo4j_home()
67
+
68
+ # Check Neo4j process
69
+ rc, output = _run_neo4j_cmd("status", home)
70
+ click.echo(output)
71
+
72
+ # Show version
73
+ neo4j_bin = home / "bin" / "neo4j"
74
+ if neo4j_bin.exists():
75
+ result = subprocess.run([str(neo4j_bin), "version"],
76
+ capture_output=True, text=True)
77
+ if result.returncode == 0:
78
+ click.echo(f"Version: {result.stdout.strip()}")
79
+
80
+ # Show config
81
+ config_path = Path.home() / ".graphpop" / "config.yaml"
82
+ if config_path.exists():
83
+ with open(config_path) as f:
84
+ cfg = yaml.safe_load(f) or {}
85
+ click.echo(f"\nGraphPop config ({config_path}):")
86
+ click.echo(f" URI: {cfg.get('uri', 'not set')}")
87
+ click.echo(f" Database: {cfg.get('database', 'not set')}")
88
+ click.echo(f" Neo4j: {cfg.get('neo4j_home', 'not set')}")
89
+
90
+ # Show plugin status
91
+ plugins_dir = home / "plugins"
92
+ jar_files = list(plugins_dir.glob("graphpop*.jar")) if plugins_dir.exists() else []
93
+ if jar_files:
94
+ click.echo(f"\nGraphPop plugin: {jar_files[0].name}")
95
+ else:
96
+ click.echo("\nGraphPop plugin: NOT INSTALLED")
97
+ click.echo(" Build with: cd graphpop-procedures && mvn package")
98
+ click.echo(" Deploy with: graphpop setup --deploy-plugin target/graphpop-procedures-*.jar")
@@ -0,0 +1,299 @@
1
+ """graphpop setup — download, configure, and initialize Neo4j for GraphPop."""
2
+ from __future__ import annotations
3
+
4
+ import platform
5
+ import shutil
6
+ import subprocess
7
+ import tarfile
8
+ from pathlib import Path
9
+
10
+ import click
11
+ import yaml
12
+
13
+
14
+
15
+ DEFAULT_NEO4J_HOME = Path.home() / "neo4j"
16
+ NEO4J_VERSION = "5.26.0"
17
+ NEO4J_DOWNLOAD_URL = (
18
+ f"https://dist.neo4j.org/neo4j-community-{NEO4J_VERSION}-unix.tar.gz"
19
+ )
20
+
21
+ # GraphPop procedures plugin — auto-downloaded from GitHub Releases
22
+ GRAPHPOP_PROCEDURES_VERSION = "0.1.0"
23
+ GRAPHPOP_JAR_NAME = f"graphpop-procedures-{GRAPHPOP_PROCEDURES_VERSION}.jar"
24
+ GRAPHPOP_JAR_URL = (
25
+ f"https://github.com/jfmao/GraphPop/releases/download/"
26
+ f"v{GRAPHPOP_PROCEDURES_VERSION}/{GRAPHPOP_JAR_NAME}"
27
+ )
28
+
29
+
30
+ @click.command()
31
+ @click.option("--neo4j-home", type=click.Path(), default=str(DEFAULT_NEO4J_HOME),
32
+ help=f"Neo4j installation directory (default: {DEFAULT_NEO4J_HOME})")
33
+ @click.option("--pagecache", default="16g",
34
+ help="Neo4j page cache size (default: 16g)")
35
+ @click.option("--heap", default="4g",
36
+ help="Neo4j JVM heap size (default: 4g)")
37
+ @click.option("--password", prompt=True, hide_input=True,
38
+ confirmation_prompt=True,
39
+ help="Neo4j password for the 'neo4j' user")
40
+ @click.option("--skip-download", is_flag=True,
41
+ help="Skip downloading Neo4j (use existing installation)")
42
+ @click.option("--deploy-plugin", type=click.Path(exists=True), default=None,
43
+ help="Path to a local graphpop-procedures.jar (skips auto-download)")
44
+ @click.option("--skip-plugin", is_flag=True,
45
+ help="Skip deploying the GraphPop procedures plugin")
46
+ def setup(neo4j_home, pagecache, heap, password, skip_download, deploy_plugin,
47
+ skip_plugin):
48
+ """Set up Neo4j for GraphPop.
49
+
50
+ Downloads Neo4j Community Edition, automatically downloads and deploys
51
+ the pre-compiled GraphPop procedures plugin, configures memory settings,
52
+ sets the initial password, and creates the GraphPop config file.
53
+
54
+ No Java or Maven installation is required — the plugin is downloaded as
55
+ a pre-compiled JAR from GitHub Releases.
56
+
57
+ \b
58
+ Examples:
59
+ graphpop setup --password mypass
60
+ graphpop setup --neo4j-home /opt/neo4j --pagecache 20g --heap 8g
61
+ graphpop setup --deploy-plugin path/to/local/graphpop-procedures.jar
62
+ graphpop setup --skip-plugin --password mypass
63
+ """
64
+ neo4j_path = Path(neo4j_home)
65
+
66
+ # Step 0: Check Java runtime
67
+ _check_java()
68
+
69
+ # Step 1: Download Neo4j
70
+ if not skip_download:
71
+ if neo4j_path.exists() and (neo4j_path / "bin" / "neo4j").exists():
72
+ click.echo(f"Neo4j already installed at {neo4j_path}")
73
+ if not click.confirm("Re-install?"):
74
+ skip_download = True
75
+
76
+ if not skip_download:
77
+ _download_neo4j(neo4j_path)
78
+
79
+ # Verify installation
80
+ neo4j_bin = neo4j_path / "bin" / "neo4j"
81
+ if not neo4j_bin.exists():
82
+ click.echo(f"Error: Neo4j not found at {neo4j_path}", err=True)
83
+ click.echo("Use --neo4j-home to specify the installation directory.", err=True)
84
+ raise SystemExit(1)
85
+
86
+ # Step 2: Configure Neo4j
87
+ click.echo("\nConfiguring Neo4j...")
88
+ _configure_neo4j(neo4j_path, pagecache, heap)
89
+
90
+ # Step 3: Set initial password
91
+ click.echo("Setting Neo4j password...")
92
+ _set_password(neo4j_path, password)
93
+
94
+ # Step 4: Deploy GraphPop plugin
95
+ # Priority: user-provided JAR > conda-bundled JAR > GitHub download
96
+ plugin_dest = neo4j_path / "plugins" / "graphpop-procedures.jar"
97
+ if deploy_plugin:
98
+ # Use user-provided local JAR
99
+ click.echo(f"Deploying GraphPop plugin from {deploy_plugin}...")
100
+ shutil.copy2(deploy_plugin, plugin_dest)
101
+ click.echo(f" Deployed to {plugin_dest}")
102
+ elif not skip_plugin:
103
+ # Check for conda-bundled JAR first
104
+ conda_jar = _find_conda_jar()
105
+ if conda_jar:
106
+ click.echo(f"Deploying conda-bundled GraphPop plugin...")
107
+ shutil.copy2(conda_jar, plugin_dest)
108
+ click.echo(f" Deployed to {plugin_dest}")
109
+ else:
110
+ # Auto-download pre-compiled JAR from GitHub Releases
111
+ click.echo(f"Downloading GraphPop procedures plugin v{GRAPHPOP_PROCEDURES_VERSION}...")
112
+ _download_plugin(plugin_dest)
113
+ click.echo(f" Deployed to {plugin_dest}")
114
+
115
+ # Step 5: Create GraphPop config
116
+ config_dir = Path.home() / ".graphpop"
117
+ config_dir.mkdir(exist_ok=True)
118
+ config_path = config_dir / "config.yaml"
119
+
120
+ config = {
121
+ "uri": "bolt://localhost:7687",
122
+ "user": "neo4j",
123
+ "password": password,
124
+ "database": "neo4j",
125
+ "neo4j_home": str(neo4j_path),
126
+ }
127
+ with open(config_path, "w") as f:
128
+ yaml.dump(config, f, default_flow_style=False)
129
+ click.echo(f"\nGraphPop config written to {config_path}")
130
+
131
+ # Step 6: Summary
132
+ click.echo(f"""
133
+ Setup complete!
134
+
135
+ Neo4j home: {neo4j_path}
136
+ Page cache: {pagecache}
137
+ Heap: {heap}
138
+ Config: {config_path}
139
+ Plugin: {'deployed' if (deploy_plugin or not skip_plugin) else 'not deployed (use --deploy-plugin or remove --skip-plugin)'}
140
+
141
+ Next steps:
142
+ graphpop start # Start Neo4j
143
+ graphpop import --vcf data.vcf.gz \\
144
+ --panel panel.txt --database mydb # Import data
145
+ graphpop diversity chr1 1 50000000 POP # Run analysis
146
+ """)
147
+
148
+
149
+ def _download_neo4j(dest: Path):
150
+ """Download and extract Neo4j Community Edition."""
151
+ import urllib.request
152
+
153
+ tarball = Path(f"/tmp/neo4j-community-{NEO4J_VERSION}-unix.tar.gz")
154
+ if tarball.exists():
155
+ click.echo(f"Using cached download: {tarball}")
156
+ else:
157
+ click.echo(f"Downloading Neo4j {NEO4J_VERSION}...")
158
+ click.echo(f" URL: {NEO4J_DOWNLOAD_URL}")
159
+ urllib.request.urlretrieve(NEO4J_DOWNLOAD_URL, tarball)
160
+ click.echo(f" Downloaded to {tarball}")
161
+
162
+ click.echo(f"Extracting to {dest}...")
163
+ if dest.exists():
164
+ shutil.rmtree(dest)
165
+
166
+ with tarfile.open(tarball) as tf:
167
+ tf.extractall(dest.parent)
168
+
169
+ # The tarball extracts to neo4j-community-X.Y.Z/
170
+ extracted = dest.parent / f"neo4j-community-{NEO4J_VERSION}"
171
+ if extracted.exists() and extracted != dest:
172
+ extracted.rename(dest)
173
+ click.echo(f" Installed to {dest}")
174
+
175
+
176
+ def _configure_neo4j(neo4j_home: Path, pagecache: str, heap: str):
177
+ """Configure Neo4j memory and settings."""
178
+ conf_path = neo4j_home / "conf" / "neo4j.conf"
179
+
180
+ # Read existing config
181
+ lines = conf_path.read_text().splitlines() if conf_path.exists() else []
182
+
183
+ # Settings to apply
184
+ settings = {
185
+ "server.memory.pagecache.size": pagecache,
186
+ "server.memory.heap.initial_size": heap,
187
+ "server.memory.heap.max_size": heap,
188
+ "server.directories.import": "import",
189
+ "db.tx_log.rotation.retention_policy": "2 days 2G",
190
+ "dbms.security.procedures.unrestricted": "graphpop.*",
191
+ }
192
+
193
+ # Update or append settings
194
+ updated_keys = set()
195
+ new_lines = []
196
+ for line in lines:
197
+ key = line.split("=")[0].strip() if "=" in line and not line.startswith("#") else None
198
+ if key and key in settings:
199
+ new_lines.append(f"{key}={settings[key]}")
200
+ updated_keys.add(key)
201
+ else:
202
+ new_lines.append(line)
203
+
204
+ # Append settings not yet in config
205
+ for key, value in settings.items():
206
+ if key not in updated_keys:
207
+ new_lines.append(f"{key}={value}")
208
+
209
+ conf_path.write_text("\n".join(new_lines) + "\n")
210
+ for k, v in settings.items():
211
+ click.echo(f" {k}={v}")
212
+
213
+
214
+ def _check_java():
215
+ """Verify that Java 21+ is available for Neo4j runtime."""
216
+ try:
217
+ result = subprocess.run(
218
+ ["java", "-version"], capture_output=True, text=True,
219
+ )
220
+ output = result.stderr + result.stdout # java -version prints to stderr
221
+ click.echo(f" Java found: {output.splitlines()[0].strip()}")
222
+ # Check version >= 21
223
+ import re
224
+ m = re.search(r'"(\d+)', output)
225
+ if m and int(m.group(1)) < 21:
226
+ click.echo(
227
+ " Warning: Java 21+ is required by Neo4j. "
228
+ "Found version {m.group(1)}.\n"
229
+ " Install via: conda install -c conda-forge openjdk=21\n"
230
+ " Or: sudo apt install openjdk-21-jre-headless",
231
+ err=True,
232
+ )
233
+ except FileNotFoundError:
234
+ click.echo(
235
+ "Error: Java not found. Neo4j requires Java 21+ to run.\n"
236
+ "Install via:\n"
237
+ " conda install -c conda-forge openjdk=21\n"
238
+ " Or: sudo apt install openjdk-21-jre-headless",
239
+ err=True,
240
+ )
241
+ raise SystemExit(1)
242
+
243
+
244
+ def _find_conda_jar() -> Path | None:
245
+ """Look for a GraphPop JAR bundled by conda in the environment prefix."""
246
+ import sys
247
+ conda_prefix = Path(sys.prefix)
248
+ candidates = [
249
+ conda_prefix / "share" / "graphpop" / "plugins" / "graphpop-procedures.jar",
250
+ conda_prefix / "lib" / "graphpop" / "graphpop-procedures.jar",
251
+ ]
252
+ for p in candidates:
253
+ if p.exists():
254
+ return p
255
+ return None
256
+
257
+
258
+ def _download_plugin(dest: Path):
259
+ """Download the pre-compiled GraphPop procedures JAR from GitHub Releases."""
260
+ import urllib.request
261
+
262
+ cache = Path(f"/tmp/{GRAPHPOP_JAR_NAME}")
263
+ if cache.exists():
264
+ click.echo(f" Using cached plugin: {cache}")
265
+ else:
266
+ click.echo(f" URL: {GRAPHPOP_JAR_URL}")
267
+ try:
268
+ urllib.request.urlretrieve(GRAPHPOP_JAR_URL, cache)
269
+ except Exception as e:
270
+ click.echo(f" Error downloading plugin: {e}", err=True)
271
+ click.echo(
272
+ " You can build locally instead:\n"
273
+ " cd graphpop-procedures && ./mvnw package -DskipTests\n"
274
+ " graphpop setup --deploy-plugin target/graphpop-procedures-0.1.0-SNAPSHOT.jar",
275
+ err=True,
276
+ )
277
+ raise SystemExit(1)
278
+ dest.parent.mkdir(parents=True, exist_ok=True)
279
+ shutil.copy2(cache, dest)
280
+
281
+
282
+ def _set_password(neo4j_home: Path, password: str):
283
+ """Set the initial Neo4j password."""
284
+ admin_bin = neo4j_home / "bin" / "neo4j-admin"
285
+ try:
286
+ result = subprocess.run(
287
+ [str(admin_bin), "dbms", "set-initial-password", password],
288
+ capture_output=True, text=True,
289
+ )
290
+ if result.returncode == 0:
291
+ click.echo(" Password set successfully")
292
+ else:
293
+ # May already be set
294
+ if "already" in result.stderr.lower() or "already" in result.stdout.lower():
295
+ click.echo(" Password already set (use Neo4j browser to change)")
296
+ else:
297
+ click.echo(f" Warning: {result.stderr.strip()}")
298
+ except FileNotFoundError:
299
+ click.echo(" Warning: neo4j-admin not found, skipping password setup")