graphpop-cli 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/PKG-INFO +1 -1
  2. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/pyproject.toml +1 -1
  3. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/__init__.py +1 -1
  4. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/cli.py +2 -1
  5. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/compare.py +3 -18
  6. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/converge.py +6 -0
  7. graphpop_cli-0.1.1/src/graphpop_cli/commands/doctor.py +172 -0
  8. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/export_bed.py +6 -0
  9. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/export_windows.py +0 -2
  10. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/filter_results.py +6 -0
  11. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/plot.py +6 -0
  12. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/rank_genes.py +6 -0
  13. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/setup.py +185 -22
  14. graphpop_cli-0.1.1/src/graphpop_cli/validators.py +22 -0
  15. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli.egg-info/PKG-INFO +1 -1
  16. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli.egg-info/SOURCES.txt +2 -0
  17. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/README.md +0 -0
  18. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/setup.cfg +0 -0
  19. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/__init__.py +0 -0
  20. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/aggregate.py +0 -0
  21. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/batch.py +0 -0
  22. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/config_cmd.py +0 -0
  23. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/db.py +0 -0
  24. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/divergence.py +0 -0
  25. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/diversity.py +0 -0
  26. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/dump.py +0 -0
  27. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/extract.py +0 -0
  28. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/garud_h.py +0 -0
  29. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/genome_scan.py +0 -0
  30. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/ihs.py +0 -0
  31. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/import_data.py +0 -0
  32. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/inventory.py +0 -0
  33. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/joint_sfs.py +0 -0
  34. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/ld.py +0 -0
  35. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/lookup.py +0 -0
  36. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/neighbors.py +0 -0
  37. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/nsl.py +0 -0
  38. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/pop_summary.py +0 -0
  39. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/query.py +0 -0
  40. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/report.py +0 -0
  41. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/roh.py +0 -0
  42. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/run_all.py +0 -0
  43. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/server.py +0 -0
  44. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/sfs.py +0 -0
  45. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/validate.py +0 -0
  46. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/commands/xpehh.py +0 -0
  47. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/config.py +0 -0
  48. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/connection.py +0 -0
  49. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli/formatters.py +0 -0
  50. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli.egg-info/dependency_links.txt +0 -0
  51. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli.egg-info/entry_points.txt +0 -0
  52. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli.egg-info/requires.txt +0 -0
  53. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/src/graphpop_cli.egg-info/top_level.txt +0 -0
  54. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/tests/test_commands.py +0 -0
  55. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/tests/test_config.py +0 -0
  56. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/tests/test_connection.py +0 -0
  57. {graphpop_cli-0.1.0 → graphpop_cli-0.1.1}/tests/test_formatters.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphpop-cli
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Graph database-native population genomics CLI with O(V*K) complexity
5
5
  Author: Jianfeng Mao
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "graphpop-cli"
7
- version = "0.1.0"
7
+ version = "0.1.1"
8
8
  description = "Graph database-native population genomics CLI with O(V*K) complexity"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -1,2 +1,2 @@
1
1
  """GraphPop CLI — command-line interface for graph-native population genomics."""
2
- __version__ = "0.1.0"
2
+ __version__ = "0.1.1"
@@ -95,7 +95,7 @@ from .commands import ( # noqa: E402
95
95
  genome_scan, pop_summary,
96
96
  ld, ihs, xpehh, nsl, roh, garud_h,
97
97
  query, run_all, aggregate, export_windows,
98
- setup, server, db, import_data, dump,
98
+ setup, server, doctor, db, import_data, dump,
99
99
  config_cmd, validate, filter_results, plot,
100
100
  lookup, converge, inventory, rank_genes,
101
101
  extract, export_bed, batch, compare,
@@ -128,6 +128,7 @@ main.add_command(setup.setup)
128
128
  main.add_command(server.start)
129
129
  main.add_command(server.stop)
130
130
  main.add_command(server.status)
131
+ main.add_command(doctor.doctor)
131
132
 
132
133
  # Database management
133
134
  main.add_command(db.db)
@@ -1,26 +1,11 @@
1
1
  """graphpop compare — compare statistics between two populations."""
2
2
  from __future__ import annotations
3
3
 
4
- import re
5
-
6
4
  import click
7
5
 
8
6
  from ..cli import pass_ctx
9
7
  from ..formatters import format_output
10
-
11
- # Allowed stat names — used to whitelist dynamic property access
12
- _VALID_STATS = {"pi", "theta_w", "tajima_d", "fst", "ihs"}
13
- _IDENT_RE = re.compile(r'^[A-Za-z0-9_-]+$')
14
-
15
-
16
- def _validate_identifier(value: str, label: str) -> str:
17
- """Validate that a value is safe for use as a Cypher property name."""
18
- if not _IDENT_RE.match(value):
19
- raise click.BadParameter(
20
- f"Invalid {label}: {value!r}. Only alphanumeric, hyphen, "
21
- "and underscore characters are allowed."
22
- )
23
- return value
8
+ from ..validators import validate_identifier
24
9
 
25
10
 
26
11
  @click.command("compare")
@@ -54,8 +39,8 @@ def compare(ctx, pop1, pop2, chr, stat, window_size, output_path, fmt, limit):
54
39
  graphpop compare EUR EAS chr22 --stat ihs -o ihs_diff.tsv
55
40
  """
56
41
  # Validate identifiers used in dynamic property names
57
- pop1 = _validate_identifier(pop1, "population")
58
- pop2 = _validate_identifier(pop2, "population")
42
+ pop1 = validate_identifier(pop1, "population")
43
+ pop2 = validate_identifier(pop2, "population")
59
44
 
60
45
  if stat == "ihs":
61
46
  records = _compare_variant_stat(ctx, pop1, pop2, chr, stat, limit)
@@ -5,6 +5,7 @@ import click
5
5
 
6
6
  from ..cli import pass_ctx
7
7
  from ..formatters import format_output
8
+ from ..validators import validate_identifier
8
9
 
9
10
 
10
11
  # Statistics stored on Variant nodes vs GenomicWindow nodes
@@ -45,6 +46,11 @@ def converge(ctx, stats, thresholds, chromosome, population, pop2,
45
46
  graphpop converge --stats h12,fst --thresholds 0.3,0.5 --pop GJ-tmp --window 100000
46
47
  graphpop converge --stats ihs,xpehh,h12,fst --thresholds 2.0,2.0,0.3,0.5 --pop EUR --pop2 AFR
47
48
  """
49
+ # Validate identifiers used in dynamic property names
50
+ validate_identifier(population, "population")
51
+ if pop2:
52
+ validate_identifier(pop2, "population")
53
+
48
54
  stat_list = [s.strip() for s in stats.split(",")]
49
55
  thresh_list = [float(t.strip()) for t in thresholds.split(",")]
50
56
 
@@ -0,0 +1,172 @@
1
+ """graphpop doctor — installation health check."""
2
+ from __future__ import annotations
3
+
4
+ import re
5
+ import socket
6
+ import subprocess
7
+ from pathlib import Path
8
+
9
+ import click
10
+ import yaml
11
+
12
+ from .setup import DEFAULT_BOLT_PORT, DEFAULT_HTTP_PORT
13
+
14
+
15
+ def _check(label: str, ok: bool, detail: str = "") -> bool:
16
+ """Print a check result and return whether it passed."""
17
+ mark = click.style("OK", fg="green") if ok else click.style("FAIL", fg="red")
18
+ msg = f" [{mark}] {label}"
19
+ if detail:
20
+ msg += f" — {detail}"
21
+ click.echo(msg)
22
+ return ok
23
+
24
+
25
+ @click.command()
26
+ def doctor():
27
+ """Run a full health check on the GraphPop installation.
28
+
29
+ Verifies Java, Neo4j home directory, running process, port reachability,
30
+ plugin deployment, config file, and password connectivity.
31
+ """
32
+ click.echo("GraphPop Doctor\n")
33
+ all_ok = True
34
+
35
+ # 1. Java
36
+ click.echo("Checking Java...")
37
+ java_ok, java_detail = _check_java_health()
38
+ all_ok &= _check("Java 21+", java_ok, java_detail)
39
+
40
+ # 2. Config file
41
+ click.echo("\nChecking configuration...")
42
+ config_path = Path.home() / ".graphpop" / "config.yaml"
43
+ cfg = {}
44
+ if config_path.exists():
45
+ with open(config_path) as f:
46
+ cfg = yaml.safe_load(f) or {}
47
+ all_ok &= _check("Config file", True, str(config_path))
48
+ else:
49
+ all_ok &= _check("Config file", False,
50
+ f"{config_path} not found — run 'graphpop setup'")
51
+
52
+ # 3. Neo4j home
53
+ click.echo("\nChecking Neo4j installation...")
54
+ neo4j_home = Path(cfg.get("neo4j_home", Path.home() / "neo4j"))
55
+ neo4j_bin = neo4j_home / "bin" / "neo4j"
56
+ if neo4j_bin.exists():
57
+ all_ok &= _check("Neo4j home", True, str(neo4j_home))
58
+ else:
59
+ all_ok &= _check("Neo4j home", False,
60
+ f"neo4j binary not found at {neo4j_home}")
61
+
62
+ # 4. Neo4j version
63
+ if neo4j_bin.exists():
64
+ result = subprocess.run(
65
+ [str(neo4j_bin), "version"], capture_output=True, text=True,
66
+ )
67
+ version_str = result.stdout.strip() if result.returncode == 0 else "unknown"
68
+ all_ok &= _check("Neo4j version", result.returncode == 0, version_str)
69
+
70
+ # 5. Plugin deployment
71
+ click.echo("\nChecking plugin...")
72
+ plugins_dir = neo4j_home / "plugins"
73
+ jar_files = list(plugins_dir.glob("graphpop*.jar")) if plugins_dir.exists() else []
74
+ if jar_files:
75
+ all_ok &= _check("GraphPop plugin", True, jar_files[0].name)
76
+ else:
77
+ all_ok &= _check("GraphPop plugin", False,
78
+ "not found in plugins/ — run 'graphpop setup'")
79
+
80
+ # 6. Neo4j process
81
+ click.echo("\nChecking Neo4j process...")
82
+ if neo4j_bin.exists():
83
+ result = subprocess.run(
84
+ [str(neo4j_bin), "status"], capture_output=True, text=True,
85
+ )
86
+ output = (result.stdout + result.stderr).strip()
87
+ running = result.returncode == 0 and "running" in output.lower()
88
+ all_ok &= _check("Neo4j running", running,
89
+ output.splitlines()[0] if output else "no output")
90
+ else:
91
+ all_ok &= _check("Neo4j running", False, "neo4j binary not found")
92
+
93
+ # 7. Port reachability
94
+ click.echo("\nChecking ports...")
95
+ uri = cfg.get("uri", f"bolt://localhost:{DEFAULT_BOLT_PORT}")
96
+ # Parse port from URI
97
+ port_match = re.search(r":(\d+)$", uri)
98
+ bolt_port = int(port_match.group(1)) if port_match else DEFAULT_BOLT_PORT
99
+
100
+ bolt_ok = _is_port_open("127.0.0.1", bolt_port)
101
+ all_ok &= _check(f"Bolt port {bolt_port}", bolt_ok,
102
+ "reachable" if bolt_ok else "not reachable")
103
+
104
+ # Check HTTP port from config
105
+ http_port = cfg.get("http_port", DEFAULT_HTTP_PORT)
106
+ http_ok = _is_port_open("127.0.0.1", http_port)
107
+ all_ok &= _check(f"HTTP port {http_port}", http_ok,
108
+ "reachable" if http_ok else "not reachable")
109
+
110
+ # 8. Bolt connectivity (if neo4j driver available)
111
+ click.echo("\nChecking database connectivity...")
112
+ password = cfg.get("password")
113
+ if password and bolt_ok:
114
+ conn_ok, conn_detail = _check_bolt_connectivity(uri, password)
115
+ all_ok &= _check("Bolt connection", conn_ok, conn_detail)
116
+ elif not bolt_ok:
117
+ all_ok &= _check("Bolt connection", False,
118
+ "skipped — port not reachable")
119
+ else:
120
+ all_ok &= _check("Bolt connection", False,
121
+ "skipped — no password in config")
122
+
123
+ # Summary
124
+ click.echo("")
125
+ if all_ok:
126
+ click.echo(click.style("All checks passed.", fg="green"))
127
+ else:
128
+ click.echo(click.style(
129
+ "Some checks failed. Review the output above.", fg="yellow"))
130
+ raise SystemExit(0 if all_ok else 1)
131
+
132
+
133
+ def _check_java_health() -> tuple[bool, str]:
134
+ """Return (ok, detail) for Java version check."""
135
+ try:
136
+ result = subprocess.run(
137
+ ["java", "-version"], capture_output=True, text=True,
138
+ )
139
+ output = result.stderr + result.stdout
140
+ first_line = output.splitlines()[0].strip() if output else "unknown"
141
+ m = re.search(r'"(\d+)', output)
142
+ if m and int(m.group(1)) >= 21:
143
+ return True, first_line
144
+ elif m:
145
+ return False, f"{first_line} (need 21+, found {m.group(1)})"
146
+ return False, f"{first_line} (could not parse version)"
147
+ except FileNotFoundError:
148
+ return False, "java not found — install via: conda install -c conda-forge openjdk=21"
149
+
150
+
151
+ def _is_port_open(host: str, port: int, timeout: float = 2.0) -> bool:
152
+ """Check if a TCP port is accepting connections."""
153
+ try:
154
+ with socket.create_connection((host, port), timeout=timeout):
155
+ return True
156
+ except (OSError, ConnectionRefusedError):
157
+ return False
158
+
159
+
160
+ def _check_bolt_connectivity(uri: str, password: str) -> tuple[bool, str]:
161
+ """Try a Bolt connection and return (ok, detail)."""
162
+ try:
163
+ from neo4j import GraphDatabase
164
+ driver = GraphDatabase.driver(uri, auth=("neo4j", password))
165
+ driver.verify_connectivity()
166
+ info = driver.get_server_info()
167
+ driver.close()
168
+ return True, f"connected to {info.agent}"
169
+ except ImportError:
170
+ return False, "neo4j-driver not installed — pip install neo4j"
171
+ except Exception as e:
172
+ return False, str(e)
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
  import click
5
5
 
6
6
  from ..cli import pass_ctx
7
+ from ..validators import validate_identifier
7
8
 
8
9
 
9
10
  # Statistics stored on GenomicWindow nodes vs Variant nodes
@@ -41,6 +42,11 @@ def export_bed(ctx, stat, threshold, population, pop2, chromosome,
41
42
  graphpop export-bed --stat xpehh --threshold 3.0 --pop EUR --pop2 AFR -o xpehh.bed
42
43
  graphpop export-bed --stat tajima_d --threshold -2.0 --pop GJ-tmp -o tajimad.bed
43
44
  """
45
+ # Validate identifiers used in dynamic property names
46
+ validate_identifier(population, "population")
47
+ if pop2:
48
+ validate_identifier(pop2, "population")
49
+
44
50
  if stat == "xpehh" and not pop2:
45
51
  click.echo("Error: --pop2 is required for xpehh.", err=True)
46
52
  raise SystemExit(1)
@@ -1,8 +1,6 @@
1
1
  """graphpop export-windows — batch export GenomicWindow nodes to TSV."""
2
2
  from __future__ import annotations
3
3
 
4
- from pathlib import Path
5
-
6
4
  import click
7
5
 
8
6
  from ..cli import pass_ctx
@@ -5,6 +5,7 @@ import click
5
5
 
6
6
  from ..cli import pass_ctx
7
7
  from ..formatters import format_output
8
+ from ..validators import validate_identifier
8
9
 
9
10
 
10
11
  @click.command("filter")
@@ -44,6 +45,11 @@ def filter_results(ctx, statistic, chr, population, output_path, fmt,
44
45
  graphpop filter nsl chr1 GJ-tmp --gene GW5 --min-score 2.0
45
46
  graphpop filter h12 chr1 GJ-tmp --consequence missense_variant
46
47
  """
48
+ # Validate identifiers used in dynamic property names
49
+ validate_identifier(population, "population")
50
+ if pop2:
51
+ validate_identifier(pop2, "population")
52
+
47
53
  # Build the property name for this statistic
48
54
  if statistic == "xpehh" and pop2:
49
55
  prop = f"xpehh_{population}_{pop2}"
@@ -8,6 +8,7 @@ from pathlib import Path
8
8
  import click
9
9
 
10
10
  from ..cli import pass_ctx
11
+ from ..validators import validate_identifier
11
12
 
12
13
  try:
13
14
  import matplotlib
@@ -827,10 +828,15 @@ def chromosome(ctx, chrom, population, stats, output, title, width, height):
827
828
  _check_matplotlib()
828
829
  _apply_style()
829
830
 
831
+ # Validate identifiers used in dynamic property names
832
+ validate_identifier(population, "population")
833
+
830
834
  stat_list = [s.strip() for s in stats.split(",") if s.strip()]
831
835
  if not stat_list:
832
836
  click.echo("No statistics specified.", err=True)
833
837
  raise SystemExit(1)
838
+ for s in stat_list:
839
+ validate_identifier(s, "statistic")
834
840
 
835
841
  window_stats = {"fst", "pi", "theta_w", "tajima_d"}
836
842
  variant_stats = {"ihs", "xpehh"}
@@ -5,6 +5,7 @@ import click
5
5
 
6
6
  from ..cli import pass_ctx
7
7
  from ..formatters import format_output
8
+ from ..validators import validate_identifier
8
9
 
9
10
 
10
11
  @click.command("rank-genes")
@@ -38,6 +39,11 @@ def rank_genes(ctx, population, pop2, chromosome, top, sort_by,
38
39
  graphpop rank-genes --pop GJ-tmp --pop2 GJ-trop --chr Chr01 --sort-by max_abs_ihs
39
40
  graphpop rank-genes --pop EUR --pop2 AFR --sort-by mean_fst --format json
40
41
  """
42
+ # Validate identifiers used in dynamic property names
43
+ population = validate_identifier(population, "population")
44
+ if pop2:
45
+ pop2 = validate_identifier(pop2, "population")
46
+
41
47
  # Dynamic property names cannot be parameterized — kept as f-strings.
42
48
  ihs_prop = f"ihs_{population}"
43
49
  xpehh_prop = f"xpehh_{population}_{pop2}" if pop2 else None
@@ -1,8 +1,9 @@
1
1
  """graphpop setup — download, configure, and initialize Neo4j for GraphPop."""
2
2
  from __future__ import annotations
3
3
 
4
- import platform
4
+ import re
5
5
  import shutil
6
+ import socket
6
7
  import subprocess
7
8
  import tarfile
8
9
  from pathlib import Path
@@ -13,11 +14,14 @@ import yaml
13
14
 
14
15
 
15
16
  DEFAULT_NEO4J_HOME = Path.home() / "neo4j"
16
- NEO4J_VERSION = "5.26.0"
17
+ NEO4J_VERSION = "2025.12.1"
17
18
  NEO4J_DOWNLOAD_URL = (
18
19
  f"https://dist.neo4j.org/neo4j-community-{NEO4J_VERSION}-unix.tar.gz"
19
20
  )
20
21
 
22
+ DEFAULT_BOLT_PORT = 7687
23
+ DEFAULT_HTTP_PORT = 7474
24
+
21
25
  # GraphPop procedures plugin — auto-downloaded from GitHub Releases
22
26
  GRAPHPOP_PROCEDURES_VERSION = "0.1.0"
23
27
  GRAPHPOP_JAR_NAME = f"graphpop-procedures-{GRAPHPOP_PROCEDURES_VERSION}.jar"
@@ -27,6 +31,26 @@ GRAPHPOP_JAR_URL = (
27
31
  )
28
32
 
29
33
 
34
+ def _port_in_use(port: int) -> int | None:
35
+ """Return the PID using *port*, or None if the port is free."""
36
+ try:
37
+ result = subprocess.run(
38
+ ["lsof", "-ti", f":{port}"],
39
+ capture_output=True, text=True,
40
+ )
41
+ if result.returncode == 0 and result.stdout.strip():
42
+ return int(result.stdout.strip().splitlines()[0])
43
+ except (FileNotFoundError, ValueError):
44
+ pass
45
+ # Fallback: try to bind
46
+ try:
47
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
48
+ s.bind(("127.0.0.1", port))
49
+ return None
50
+ except OSError:
51
+ return 0 # port in use but PID unknown
52
+
53
+
30
54
  @click.command()
31
55
  @click.option("--neo4j-home", type=click.Path(), default=str(DEFAULT_NEO4J_HOME),
32
56
  help=f"Neo4j installation directory (default: {DEFAULT_NEO4J_HOME})")
@@ -43,8 +67,18 @@ GRAPHPOP_JAR_URL = (
43
67
  help="Path to a local graphpop-procedures.jar (skips auto-download)")
44
68
  @click.option("--skip-plugin", is_flag=True,
45
69
  help="Skip deploying the GraphPop procedures plugin")
70
+ @click.option("--bolt-port", type=int, default=DEFAULT_BOLT_PORT,
71
+ help=f"Bolt protocol port (default: {DEFAULT_BOLT_PORT})")
72
+ @click.option("--http-port", type=int, default=DEFAULT_HTTP_PORT,
73
+ help=f"HTTP port for Neo4j Browser (default: {DEFAULT_HTTP_PORT})")
74
+ @click.option("--neo4j-tarball", type=click.Path(exists=True), default=None,
75
+ help="Path to a local Neo4j tarball (skips download, for offline install)")
76
+ @click.option("--adopt", is_flag=True,
77
+ help="Adopt a running Neo4j instance (deploys plugin and restarts)")
78
+ @click.option("--yes", is_flag=True,
79
+ help="Skip interactive confirmations (use with --adopt)")
46
80
  def setup(neo4j_home, pagecache, heap, password, skip_download, deploy_plugin,
47
- skip_plugin):
81
+ skip_plugin, bolt_port, http_port, neo4j_tarball, adopt, yes):
48
82
  """Set up Neo4j for GraphPop.
49
83
 
50
84
  Downloads Neo4j Community Edition, automatically downloads and deploys
@@ -60,20 +94,88 @@ def setup(neo4j_home, pagecache, heap, password, skip_download, deploy_plugin,
60
94
  graphpop setup --neo4j-home /opt/neo4j --pagecache 20g --heap 8g
61
95
  graphpop setup --deploy-plugin path/to/local/graphpop-procedures.jar
62
96
  graphpop setup --skip-plugin --password mypass
97
+ graphpop setup --bolt-port 7688 --http-port 7475 --password mypass
98
+ graphpop setup --neo4j-tarball /path/to/neo4j.tar.gz --password mypass
99
+ graphpop setup --adopt --neo4j-home /path/to/neo4j --password mypass
63
100
  """
64
101
  neo4j_path = Path(neo4j_home)
65
102
 
66
103
  # Step 0: Check Java runtime
67
104
  _check_java()
68
105
 
106
+ # Step 0b: Check port conflicts
107
+ if not adopt:
108
+ pid = _port_in_use(bolt_port)
109
+ if pid is not None:
110
+ pid_str = f" (PID {pid})" if pid else ""
111
+ click.echo(
112
+ f"Error: Bolt port {bolt_port} is already in use{pid_str}.\n"
113
+ f"\n"
114
+ f"Options:\n"
115
+ f" 1. Stop the existing process"
116
+ + (f": kill {pid}" if pid else "") + "\n"
117
+ f" 2. Install on different ports:\n"
118
+ f" graphpop setup --bolt-port {bolt_port + 1} "
119
+ f"--http-port {http_port + 1} ...\n"
120
+ f" 3. Adopt the running instance:\n"
121
+ f" graphpop setup --adopt --neo4j-home <neo4j-home> ...",
122
+ err=True,
123
+ )
124
+ raise SystemExit(1)
125
+
126
+ # --adopt path: deploy plugin to a running instance and restart
127
+ if adopt:
128
+ click.echo(f"Adopting existing Neo4j at {neo4j_path}...")
129
+ neo4j_bin = neo4j_path / "bin" / "neo4j"
130
+ if not neo4j_bin.exists():
131
+ click.echo(f"Error: Neo4j not found at {neo4j_path}", err=True)
132
+ raise SystemExit(1)
133
+
134
+ if not yes:
135
+ click.confirm(
136
+ "This will stop and restart the Neo4j instance. Continue?",
137
+ abort=True,
138
+ )
139
+
140
+ # Stop Neo4j
141
+ click.echo("Stopping Neo4j...")
142
+ subprocess.run([str(neo4j_bin), "stop"], capture_output=True, text=True)
143
+
144
+ # Deploy plugin
145
+ _deploy_plugin_to(neo4j_path, deploy_plugin, skip_plugin)
146
+
147
+ # Configure ports if non-default
148
+ if bolt_port != DEFAULT_BOLT_PORT or http_port != DEFAULT_HTTP_PORT:
149
+ click.echo("Configuring custom ports...")
150
+ _configure_neo4j(neo4j_path, pagecache, heap,
151
+ bolt_port=bolt_port, http_port=http_port)
152
+ else:
153
+ click.echo("\nConfiguring Neo4j...")
154
+ _configure_neo4j(neo4j_path, pagecache, heap,
155
+ bolt_port=bolt_port, http_port=http_port)
156
+
157
+ # Restart Neo4j
158
+ click.echo("Restarting Neo4j...")
159
+ subprocess.run([str(neo4j_bin), "start"], capture_output=True, text=True)
160
+
161
+ # Write config and summary
162
+ config_path = _write_config(neo4j_path, password, bolt_port)
163
+ _print_summary(neo4j_path, pagecache, heap, bolt_port, http_port,
164
+ config_path, deploy_plugin, skip_plugin)
165
+ return
166
+
69
167
  # Step 1: Download Neo4j
70
- if not skip_download:
168
+ if neo4j_tarball:
169
+ # Offline install: extract user-provided tarball
170
+ _extract_tarball(Path(neo4j_tarball), neo4j_path)
171
+ skip_download = True
172
+ elif not skip_download:
71
173
  if neo4j_path.exists() and (neo4j_path / "bin" / "neo4j").exists():
72
174
  click.echo(f"Neo4j already installed at {neo4j_path}")
73
175
  if not click.confirm("Re-install?"):
74
176
  skip_download = True
75
177
 
76
- if not skip_download:
178
+ if not skip_download and not neo4j_tarball:
77
179
  _download_neo4j(neo4j_path)
78
180
 
79
181
  # Verify installation
@@ -85,40 +187,56 @@ def setup(neo4j_home, pagecache, heap, password, skip_download, deploy_plugin,
85
187
 
86
188
  # Step 2: Configure Neo4j
87
189
  click.echo("\nConfiguring Neo4j...")
88
- _configure_neo4j(neo4j_path, pagecache, heap)
190
+ _configure_neo4j(neo4j_path, pagecache, heap,
191
+ bolt_port=bolt_port, http_port=http_port)
89
192
 
90
193
  # Step 3: Set initial password
91
194
  click.echo("Setting Neo4j password...")
92
195
  _set_password(neo4j_path, password)
93
196
 
94
197
  # Step 4: Deploy GraphPop plugin
95
- # Priority: user-provided JAR > conda-bundled JAR > GitHub download
198
+ _deploy_plugin_to(neo4j_path, deploy_plugin, skip_plugin)
199
+
200
+ # Step 5: Create GraphPop config
201
+ config_path = _write_config(neo4j_path, password, bolt_port)
202
+
203
+ # Step 6: Summary
204
+ _print_summary(neo4j_path, pagecache, heap, bolt_port, http_port,
205
+ config_path, deploy_plugin, skip_plugin)
206
+
207
+
208
+ def _deploy_plugin_to(neo4j_path: Path, deploy_plugin: str | None,
209
+ skip_plugin: bool):
210
+ """Deploy the GraphPop procedures JAR to the Neo4j plugins directory."""
96
211
  plugin_dest = neo4j_path / "plugins" / "graphpop-procedures.jar"
97
212
  if deploy_plugin:
98
- # Use user-provided local JAR
99
213
  click.echo(f"Deploying GraphPop plugin from {deploy_plugin}...")
100
214
  shutil.copy2(deploy_plugin, plugin_dest)
101
215
  click.echo(f" Deployed to {plugin_dest}")
102
216
  elif not skip_plugin:
103
- # Check for conda-bundled JAR first
104
217
  conda_jar = _find_conda_jar()
105
218
  if conda_jar:
106
- click.echo(f"Deploying conda-bundled GraphPop plugin...")
219
+ click.echo("Deploying conda-bundled GraphPop plugin...")
107
220
  shutil.copy2(conda_jar, plugin_dest)
108
221
  click.echo(f" Deployed to {plugin_dest}")
109
222
  else:
110
- # Auto-download pre-compiled JAR from GitHub Releases
111
- click.echo(f"Downloading GraphPop procedures plugin v{GRAPHPOP_PROCEDURES_VERSION}...")
223
+ click.echo(
224
+ f"Downloading GraphPop procedures plugin "
225
+ f"v{GRAPHPOP_PROCEDURES_VERSION}..."
226
+ )
112
227
  _download_plugin(plugin_dest)
113
228
  click.echo(f" Deployed to {plugin_dest}")
114
229
 
115
- # Step 5: Create GraphPop config
230
+
231
+ def _write_config(neo4j_path: Path, password: str,
232
+ bolt_port: int) -> Path:
233
+ """Write ~/.graphpop/config.yaml and return its path."""
116
234
  config_dir = Path.home() / ".graphpop"
117
235
  config_dir.mkdir(exist_ok=True)
118
236
  config_path = config_dir / "config.yaml"
119
237
 
120
238
  config = {
121
- "uri": "bolt://localhost:7687",
239
+ "uri": f"bolt://localhost:{bolt_port}",
122
240
  "user": "neo4j",
123
241
  "password": password,
124
242
  "database": "neo4j",
@@ -127,14 +245,24 @@ def setup(neo4j_home, pagecache, heap, password, skip_download, deploy_plugin,
127
245
  with open(config_path, "w") as f:
128
246
  yaml.dump(config, f, default_flow_style=False)
129
247
  click.echo(f"\nGraphPop config written to {config_path}")
248
+ return config_path
249
+
250
+
251
+ def _print_summary(neo4j_path: Path, pagecache: str, heap: str,
252
+ bolt_port: int, http_port: int,
253
+ config_path: Path, deploy_plugin: str | None,
254
+ skip_plugin: bool):
255
+ """Print the post-setup summary."""
256
+ port_note = ""
257
+ if bolt_port != DEFAULT_BOLT_PORT or http_port != DEFAULT_HTTP_PORT:
258
+ port_note = f"\n Bolt port: {bolt_port}\n HTTP port: {http_port}"
130
259
 
131
- # Step 6: Summary
132
260
  click.echo(f"""
133
261
  Setup complete!
134
262
 
135
263
  Neo4j home: {neo4j_path}
136
264
  Page cache: {pagecache}
137
- Heap: {heap}
265
+ Heap: {heap}{port_note}
138
266
  Config: {config_path}
139
267
  Plugin: {'deployed' if (deploy_plugin or not skip_plugin) else 'not deployed (use --deploy-plugin or remove --skip-plugin)'}
140
268
 
@@ -143,6 +271,7 @@ Next steps:
143
271
  graphpop import --vcf data.vcf.gz \\
144
272
  --panel panel.txt --database mydb # Import data
145
273
  graphpop diversity chr1 1 50000000 POP # Run analysis
274
+ graphpop doctor # Verify installation health
146
275
  """)
147
276
 
148
277
 
@@ -173,8 +302,36 @@ def _download_neo4j(dest: Path):
173
302
  click.echo(f" Installed to {dest}")
174
303
 
175
304
 
176
- def _configure_neo4j(neo4j_home: Path, pagecache: str, heap: str):
177
- """Configure Neo4j memory and settings."""
305
+ def _extract_tarball(tarball_path: Path, dest: Path):
306
+ """Extract a user-provided Neo4j tarball for offline install."""
307
+ # Validate filename pattern
308
+ name = tarball_path.name
309
+ if not re.match(r"neo4j-community-[\d.]+(-unix)?\.tar\.gz", name):
310
+ click.echo(
311
+ f"Warning: tarball filename '{name}' does not match expected pattern "
312
+ f"'neo4j-community-5.x.y-unix.tar.gz'.",
313
+ err=True,
314
+ )
315
+
316
+ click.echo(f"Extracting {tarball_path} to {dest}...")
317
+ if dest.exists():
318
+ shutil.rmtree(dest)
319
+
320
+ with tarfile.open(tarball_path) as tf:
321
+ tf.extractall(dest.parent)
322
+
323
+ # Find the extracted directory (neo4j-community-X.Y.Z)
324
+ for child in dest.parent.iterdir():
325
+ if child.is_dir() and child.name.startswith("neo4j-community-") and child != dest:
326
+ child.rename(dest)
327
+ break
328
+ click.echo(f" Installed to {dest}")
329
+
330
+
331
+ def _configure_neo4j(neo4j_home: Path, pagecache: str, heap: str, *,
332
+ bolt_port: int = DEFAULT_BOLT_PORT,
333
+ http_port: int = DEFAULT_HTTP_PORT):
334
+ """Configure Neo4j memory, port, and security settings."""
178
335
  conf_path = neo4j_home / "conf" / "neo4j.conf"
179
336
 
180
337
  # Read existing config
@@ -190,6 +347,12 @@ def _configure_neo4j(neo4j_home: Path, pagecache: str, heap: str):
190
347
  "dbms.security.procedures.unrestricted": "graphpop.*",
191
348
  }
192
349
 
350
+ # Add port settings if non-default
351
+ if bolt_port != DEFAULT_BOLT_PORT:
352
+ settings["server.bolt.listen_address"] = f":{bolt_port}"
353
+ if http_port != DEFAULT_HTTP_PORT:
354
+ settings["server.http.listen_address"] = f":{http_port}"
355
+
193
356
  # Update or append settings
194
357
  updated_keys = set()
195
358
  new_lines = []
@@ -224,10 +387,10 @@ def _check_java():
224
387
  m = re.search(r'"(\d+)', output)
225
388
  if m and int(m.group(1)) < 21:
226
389
  click.echo(
227
- " Warning: Java 21+ is required by Neo4j. "
228
- "Found version {m.group(1)}.\n"
229
- " Install via: conda install -c conda-forge openjdk=21\n"
230
- " Or: sudo apt install openjdk-21-jre-headless",
390
+ f" Warning: Java 21+ is required by Neo4j. "
391
+ f"Found version {m.group(1)}.\n"
392
+ f" Install via: conda install -c conda-forge openjdk=21\n"
393
+ f" Or: sudo apt install openjdk-21-jre-headless",
231
394
  err=True,
232
395
  )
233
396
  except FileNotFoundError:
@@ -0,0 +1,22 @@
1
+ """Shared validation utilities for GraphPop CLI commands."""
2
+ from __future__ import annotations
3
+
4
+ import re
5
+
6
+ import click
7
+
8
+ _IDENT_RE = re.compile(r'^[A-Za-z0-9_-]+$')
9
+
10
+
11
+ def validate_identifier(value: str, label: str = "identifier") -> str:
12
+ """Validate that a value is safe for use as a Cypher property name.
13
+
14
+ Only alphanumeric characters, hyphens, and underscores are allowed.
15
+ Raises click.BadParameter if the value contains unsafe characters.
16
+ """
17
+ if not _IDENT_RE.match(value):
18
+ raise click.BadParameter(
19
+ f"Invalid {label}: {value!r}. Only alphanumeric, hyphen, "
20
+ "and underscore characters are allowed."
21
+ )
22
+ return value
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: graphpop-cli
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Graph database-native population genomics CLI with O(V*K) complexity
5
5
  Author: Jianfeng Mao
6
6
  License: MIT
@@ -5,6 +5,7 @@ src/graphpop_cli/cli.py
5
5
  src/graphpop_cli/config.py
6
6
  src/graphpop_cli/connection.py
7
7
  src/graphpop_cli/formatters.py
8
+ src/graphpop_cli/validators.py
8
9
  src/graphpop_cli.egg-info/PKG-INFO
9
10
  src/graphpop_cli.egg-info/SOURCES.txt
10
11
  src/graphpop_cli.egg-info/dependency_links.txt
@@ -20,6 +21,7 @@ src/graphpop_cli/commands/converge.py
20
21
  src/graphpop_cli/commands/db.py
21
22
  src/graphpop_cli/commands/divergence.py
22
23
  src/graphpop_cli/commands/diversity.py
24
+ src/graphpop_cli/commands/doctor.py
23
25
  src/graphpop_cli/commands/dump.py
24
26
  src/graphpop_cli/commands/export_bed.py
25
27
  src/graphpop_cli/commands/export_windows.py
File without changes
File without changes