graphpop-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. graphpop_cli/__init__.py +2 -0
  2. graphpop_cli/cli.py +161 -0
  3. graphpop_cli/commands/__init__.py +1 -0
  4. graphpop_cli/commands/aggregate.py +206 -0
  5. graphpop_cli/commands/batch.py +155 -0
  6. graphpop_cli/commands/compare.py +118 -0
  7. graphpop_cli/commands/config_cmd.py +117 -0
  8. graphpop_cli/commands/converge.py +156 -0
  9. graphpop_cli/commands/db.py +188 -0
  10. graphpop_cli/commands/divergence.py +37 -0
  11. graphpop_cli/commands/diversity.py +36 -0
  12. graphpop_cli/commands/dump.py +210 -0
  13. graphpop_cli/commands/export_bed.py +170 -0
  14. graphpop_cli/commands/export_windows.py +91 -0
  15. graphpop_cli/commands/extract.py +271 -0
  16. graphpop_cli/commands/filter_results.py +165 -0
  17. graphpop_cli/commands/garud_h.py +30 -0
  18. graphpop_cli/commands/genome_scan.py +41 -0
  19. graphpop_cli/commands/ihs.py +29 -0
  20. graphpop_cli/commands/import_data.py +266 -0
  21. graphpop_cli/commands/inventory.py +160 -0
  22. graphpop_cli/commands/joint_sfs.py +38 -0
  23. graphpop_cli/commands/ld.py +35 -0
  24. graphpop_cli/commands/lookup.py +207 -0
  25. graphpop_cli/commands/neighbors.py +175 -0
  26. graphpop_cli/commands/nsl.py +29 -0
  27. graphpop_cli/commands/plot.py +1066 -0
  28. graphpop_cli/commands/pop_summary.py +30 -0
  29. graphpop_cli/commands/query.py +15 -0
  30. graphpop_cli/commands/rank_genes.py +177 -0
  31. graphpop_cli/commands/report.py +264 -0
  32. graphpop_cli/commands/roh.py +30 -0
  33. graphpop_cli/commands/run_all.py +276 -0
  34. graphpop_cli/commands/server.py +98 -0
  35. graphpop_cli/commands/setup.py +299 -0
  36. graphpop_cli/commands/sfs.py +38 -0
  37. graphpop_cli/commands/validate.py +167 -0
  38. graphpop_cli/commands/xpehh.py +31 -0
  39. graphpop_cli/config.py +57 -0
  40. graphpop_cli/connection.py +52 -0
  41. graphpop_cli/formatters.py +81 -0
  42. graphpop_cli-0.1.0.dist-info/METADATA +73 -0
  43. graphpop_cli-0.1.0.dist-info/RECORD +46 -0
  44. graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
  45. graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
  46. graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,207 @@
1
+ """graphpop lookup — query genes, pathways, variants, and regions in the graph."""
2
+ from __future__ import annotations
3
+
4
+ import click
5
+
6
+ from ..cli import pass_ctx
7
+ from ..formatters import format_output
8
+
9
+
10
+ @click.group()
11
+ def lookup():
12
+ """Look up genes, pathways, variants, or genomic regions.
13
+
14
+ \b
15
+ Subcommands:
16
+ gene Look up a gene by symbol or ID
17
+ pathway Look up a pathway by name
18
+ variant Look up a variant by ID
19
+ region Look up genes and stats in a genomic region
20
+
21
+ \b
22
+ Examples:
23
+ graphpop lookup gene KCNE1
24
+ graphpop lookup pathway "Cardiac repolarization"
25
+ graphpop lookup variant chr22:16050075:A:G
26
+ graphpop lookup region chr6 9000000 9600000
27
+ """
28
+ pass
29
+
30
+
31
+ @lookup.command("gene")
32
+ @click.argument("gene_name")
33
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
34
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
35
+ @pass_ctx
36
+ def lookup_gene(ctx, gene_name, output_path, fmt):
37
+ """Look up a gene: variant count, consequences, pathways, and selection stats.
38
+
39
+ GENE_NAME can be a gene symbol (e.g., KCNE1) or gene ID (e.g., ENSG00000180509).
40
+
41
+ \b
42
+ Examples:
43
+ graphpop lookup gene KCNE1
44
+ graphpop lookup gene GW5 -o gw5_info.tsv
45
+ graphpop lookup gene ENSG00000180509 --format json
46
+ """
47
+ cypher = """
48
+ MATCH (g:Gene)
49
+ WHERE g.symbol = $gene_name OR g.geneId = $gene_name
50
+ OPTIONAL MATCH (v:Variant)-[:HAS_CONSEQUENCE]->(g)
51
+ OPTIONAL MATCH (g)-[:IN_PATHWAY]->(pw:Pathway)
52
+ WITH g, v, COLLECT(DISTINCT pw.name) AS pathways
53
+ RETURN g.symbol AS gene,
54
+ g.geneId AS gene_id,
55
+ g.chr AS chr,
56
+ g.start AS start,
57
+ g.end AS end,
58
+ v.variantId AS variant_id,
59
+ v.pos AS pos,
60
+ v.ref AS ref,
61
+ v.alt AS alt,
62
+ pathways,
63
+ CASE WHEN v IS NOT NULL THEN [k IN keys(v) WHERE k STARTS WITH 'ihs_' | k + '=' + toString(v[k])] ELSE [] END AS ihs_scores,
64
+ CASE WHEN v IS NOT NULL THEN [k IN keys(v) WHERE k STARTS WITH 'xpehh_' | k + '=' + toString(v[k])] ELSE [] END AS xpehh_scores
65
+ ORDER BY v.pos
66
+ """
67
+ records = ctx.run(cypher, {"gene_name": gene_name})
68
+
69
+ if not records:
70
+ click.echo(f"Gene '{gene_name}' not found in the graph.", err=True)
71
+ return
72
+
73
+ click.echo(f"Found {len(records)} variants for gene {gene_name}.", err=True)
74
+ format_output(records, output_path, fmt, "lookup gene",
75
+ {"gene": gene_name})
76
+
77
+
78
+ @lookup.command("pathway")
79
+ @click.argument("pw_name")
80
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
81
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
82
+ @pass_ctx
83
+ def lookup_pathway(ctx, pw_name, output_path, fmt):
84
+ """Look up a pathway: member genes and variant counts.
85
+
86
+ PW_NAME is matched as a substring (CONTAINS) against pathway names.
87
+
88
+ \b
89
+ Examples:
90
+ graphpop lookup pathway "Cardiac repolarization"
91
+ graphpop lookup pathway "starch" -o starch_pathway.tsv
92
+ """
93
+ cypher = """
94
+ MATCH (pw:Pathway)
95
+ WHERE pw.name CONTAINS $pw_name
96
+ OPTIONAL MATCH (g:Gene)-[:IN_PATHWAY]->(pw)
97
+ OPTIONAL MATCH (v:Variant)-[:HAS_CONSEQUENCE]->(g)
98
+ WITH pw, g, COUNT(DISTINCT v) AS variant_count
99
+ RETURN pw.name AS pathway,
100
+ pw.pathwayId AS pathway_id,
101
+ g.symbol AS gene,
102
+ g.geneId AS gene_id,
103
+ g.chr AS chr,
104
+ g.start AS gene_start,
105
+ g.end AS gene_end,
106
+ variant_count
107
+ ORDER BY pw.name, g.symbol
108
+ """
109
+ records = ctx.run(cypher, {"pw_name": pw_name})
110
+
111
+ if not records:
112
+ click.echo(f"No pathways matching '{pw_name}' found.", err=True)
113
+ return
114
+
115
+ click.echo(f"Found {len(records)} gene entries across matching pathways.", err=True)
116
+ format_output(records, output_path, fmt, "lookup pathway",
117
+ {"pathway": pw_name})
118
+
119
+
120
+ @lookup.command("variant")
121
+ @click.argument("var_id")
122
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
123
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
124
+ @pass_ctx
125
+ def lookup_variant(ctx, var_id, output_path, fmt):
126
+ """Full annotation for a single variant.
127
+
128
+ VAR_ID format: chr:pos:ref:alt (e.g., chr22:16050075:A:G).
129
+
130
+ \b
131
+ Examples:
132
+ graphpop lookup variant chr22:16050075:A:G
133
+ graphpop lookup variant Chr01:12345:A:T --format json
134
+ """
135
+ cypher = """
136
+ MATCH (v:Variant {variantId: $var_id})
137
+ OPTIONAL MATCH (v)-[:HAS_CONSEQUENCE]->(g:Gene)
138
+ OPTIONAL MATCH (g)-[:IN_PATHWAY]->(pw:Pathway)
139
+ RETURN v AS variant_props,
140
+ g.symbol AS gene,
141
+ g.geneId AS gene_id,
142
+ COLLECT(DISTINCT pw.name) AS pathways
143
+ """
144
+ records = ctx.run(cypher, {"var_id": var_id})
145
+
146
+ if not records:
147
+ click.echo(f"Variant '{var_id}' not found.", err=True)
148
+ return
149
+
150
+ # Flatten variant properties into columns
151
+ flat_records = []
152
+ for rec in records:
153
+ row = {}
154
+ vprops = rec.get("variant_props", {})
155
+ if vprops:
156
+ for k, v in vprops.items():
157
+ row[k] = v
158
+ row["gene"] = rec.get("gene")
159
+ row["gene_id"] = rec.get("gene_id")
160
+ row["pathways"] = rec.get("pathways", [])
161
+ flat_records.append(row)
162
+
163
+ format_output(flat_records, output_path, fmt, "lookup variant",
164
+ {"variant_id": var_id})
165
+
166
+
167
+ @lookup.command("region")
168
+ @click.argument("chr")
169
+ @click.argument("start", type=int)
170
+ @click.argument("end", type=int)
171
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
172
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
173
+ @pass_ctx
174
+ def lookup_region(ctx, chr, start, end, output_path, fmt):
175
+ """Genes and summary stats in a genomic region.
176
+
177
+ Returns per-gene variant counts and mean allele frequencies in the region.
178
+
179
+ \b
180
+ Examples:
181
+ graphpop lookup region chr6 9000000 9600000
182
+ graphpop lookup region chr22 16000000 17000000 -o region.tsv
183
+ """
184
+ cypher = """
185
+ MATCH (v:Variant)
186
+ WHERE v.chr = $chr AND v.pos >= $start AND v.pos <= $end
187
+ OPTIONAL MATCH (v)-[:HAS_CONSEQUENCE]->(g:Gene)
188
+ WITH g, COUNT(DISTINCT v) AS variant_count,
189
+ MIN(v.pos) AS min_pos, MAX(v.pos) AS max_pos
190
+ RETURN COALESCE(g.symbol, 'intergenic') AS gene,
191
+ g.geneId AS gene_id,
192
+ g.start AS gene_start,
193
+ g.end AS gene_end,
194
+ variant_count,
195
+ min_pos,
196
+ max_pos
197
+ ORDER BY min_pos
198
+ """
199
+ records = ctx.run(cypher, {"chr": chr, "start": start, "end": end})
200
+
201
+ if not records:
202
+ click.echo(f"No variants found in {chr}:{start}-{end}.", err=True)
203
+ return
204
+
205
+ click.echo(f"Found {len(records)} genes/regions in {chr}:{start}-{end}.", err=True)
206
+ format_output(records, output_path, fmt, "lookup region",
207
+ {"chr": chr, "start": start, "end": end})
@@ -0,0 +1,175 @@
1
+ """graphpop neighbors -- explore the graph neighborhood around a gene."""
2
+ from __future__ import annotations
3
+
4
+ import click
5
+
6
+ from ..cli import pass_ctx
7
+ from ..formatters import format_output
8
+
9
+
10
+ @click.command()
11
+ @click.argument("gene")
12
+ @click.option("--hops", default=1, type=click.IntRange(1, 3),
13
+ help="Number of hops to traverse (default: 1, max: 3)")
14
+ @click.option("--via", default="IN_PATHWAY",
15
+ type=click.Choice(["IN_PATHWAY", "LD", "HAS_GO_TERM"],
16
+ case_sensitive=False),
17
+ help="Relationship type to traverse (default: IN_PATHWAY)")
18
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
19
+ @click.option("--format", "fmt", default="tsv",
20
+ type=click.Choice(["tsv", "csv", "json"]))
21
+ @pass_ctx
22
+ def neighbors(ctx, gene, hops, via, output_path, fmt):
23
+ """Explore the graph neighborhood around a gene.
24
+
25
+ Traverses shared pathways, LD edges, or GO terms to find related genes.
26
+
27
+ \b
28
+ Examples:
29
+ graphpop neighbors KCNE1 -o neighbors.tsv
30
+ graphpop neighbors KCNE1 --hops 2 -o neighbors_2hop.tsv
31
+ graphpop neighbors GW5 --via HAS_GO_TERM --format json
32
+ """
33
+ via = via.upper()
34
+
35
+ if via == "IN_PATHWAY":
36
+ cypher, params = _pathway_query(hops)
37
+ elif via == "LD":
38
+ cypher, params = _ld_query(hops)
39
+ elif via == "HAS_GO_TERM":
40
+ cypher, params = _go_query(hops)
41
+ else:
42
+ click.echo(f"Unsupported --via type: {via}", err=True)
43
+ raise SystemExit(1)
44
+
45
+ params["gene"] = gene
46
+ records = ctx.run(cypher, params)
47
+
48
+ if not records:
49
+ click.echo(f"No neighbors found for gene '{gene}' via {via} "
50
+ f"({hops} hop(s)).", err=True)
51
+ return
52
+
53
+ click.echo(f"Found {len(records)} neighbor(s) for {gene} via {via} "
54
+ f"({hops} hop(s)).", err=True)
55
+ format_output(records, output_path, fmt, "neighbors",
56
+ {"gene": gene, "hops": hops, "via": via})
57
+
58
+
59
+ def _pathway_query(hops: int) -> tuple[str, dict]:
60
+ """Build pathway-based neighbor query."""
61
+ if hops == 1:
62
+ return (
63
+ "MATCH (g1:Gene)-[:IN_PATHWAY]->(p:Pathway)<-[:IN_PATHWAY]-(g2:Gene) "
64
+ "WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
65
+ "RETURN DISTINCT g2.symbol AS gene, p.name AS shared_pathway, "
66
+ "g2.chr AS chr, g2.start AS start, g2.end AS end "
67
+ "ORDER BY gene",
68
+ {},
69
+ )
70
+ elif hops == 2:
71
+ return (
72
+ "MATCH (g1:Gene)-[:IN_PATHWAY]->(p1:Pathway)<-[:IN_PATHWAY]-(g2:Gene)"
73
+ "-[:IN_PATHWAY]->(p2:Pathway)<-[:IN_PATHWAY]-(g3:Gene) "
74
+ "WHERE (g1.symbol = $gene OR g1.geneId = $gene) "
75
+ "AND g1 <> g2 AND g1 <> g3 AND g2 <> g3 "
76
+ "RETURN DISTINCT g3.symbol AS gene, "
77
+ "g2.symbol AS via_gene, p1.name AS pathway_1, p2.name AS pathway_2, "
78
+ "g3.chr AS chr, g3.start AS start, g3.end AS end "
79
+ "ORDER BY gene",
80
+ {},
81
+ )
82
+ else: # hops == 3
83
+ return (
84
+ "MATCH path = (g1:Gene)"
85
+ "(-[:IN_PATHWAY]->(:Pathway)<-[:IN_PATHWAY]-(:Gene)){3} "
86
+ "WHERE (g1.symbol = $gene OR g1.geneId = $gene) "
87
+ "WITH g1, last(nodes(path)) AS g_end, "
88
+ "[n IN nodes(path) WHERE 'Pathway' IN labels(n) | n.name] AS pws, "
89
+ "[n IN nodes(path) WHERE 'Gene' IN labels(n) | n.symbol] AS genes "
90
+ "WHERE g1 <> g_end "
91
+ "RETURN DISTINCT g_end.symbol AS gene, "
92
+ "g_end.chr AS chr, g_end.start AS start, g_end.end AS end, "
93
+ "pws AS pathways, genes AS via_genes "
94
+ "ORDER BY gene "
95
+ "LIMIT 500",
96
+ {},
97
+ )
98
+
99
+
100
+ def _ld_query(hops: int) -> tuple[str, dict]:
101
+ """Build LD-based neighbor query."""
102
+ if hops == 1:
103
+ return (
104
+ "MATCH (g1:Gene)<-[:HAS_CONSEQUENCE]-(v1:Variant)"
105
+ "-[ld:LD]-(v2:Variant)-[:HAS_CONSEQUENCE]->(g2:Gene) "
106
+ "WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
107
+ "RETURN DISTINCT g2.symbol AS gene, "
108
+ "max(ld.r2) AS max_r2, g2.chr AS chr "
109
+ "ORDER BY max_r2 DESC",
110
+ {},
111
+ )
112
+ elif hops == 2:
113
+ return (
114
+ "MATCH (g1:Gene)<-[:HAS_CONSEQUENCE]-(v1:Variant)"
115
+ "-[:LD]-(v2:Variant)-[:LD]-(v3:Variant)"
116
+ "-[:HAS_CONSEQUENCE]->(g2:Gene) "
117
+ "WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
118
+ "RETURN DISTINCT g2.symbol AS gene, g2.chr AS chr "
119
+ "ORDER BY gene "
120
+ "LIMIT 500",
121
+ {},
122
+ )
123
+ else: # hops == 3
124
+ return (
125
+ "MATCH (g1:Gene)<-[:HAS_CONSEQUENCE]-(v1:Variant)"
126
+ "-[:LD]-(v2:Variant)-[:LD]-(v3:Variant)-[:LD]-(v4:Variant)"
127
+ "-[:HAS_CONSEQUENCE]->(g2:Gene) "
128
+ "WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
129
+ "RETURN DISTINCT g2.symbol AS gene, g2.chr AS chr "
130
+ "ORDER BY gene "
131
+ "LIMIT 500",
132
+ {},
133
+ )
134
+
135
+
136
+ def _go_query(hops: int) -> tuple[str, dict]:
137
+ """Build GO term-based neighbor query."""
138
+ if hops == 1:
139
+ return (
140
+ "MATCH (g1:Gene)-[:HAS_GO_TERM]->(go:GOTerm)<-[:HAS_GO_TERM]-(g2:Gene) "
141
+ "WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
142
+ "RETURN DISTINCT g2.symbol AS gene, go.name AS shared_go_term, "
143
+ "go.goId AS go_id, g2.chr AS chr "
144
+ "ORDER BY gene",
145
+ {},
146
+ )
147
+ elif hops == 2:
148
+ return (
149
+ "MATCH (g1:Gene)-[:HAS_GO_TERM]->(:GOTerm)<-[:HAS_GO_TERM]-(g2:Gene)"
150
+ "-[:HAS_GO_TERM]->(go2:GOTerm)<-[:HAS_GO_TERM]-(g3:Gene) "
151
+ "WHERE (g1.symbol = $gene OR g1.geneId = $gene) "
152
+ "AND g1 <> g2 AND g1 <> g3 AND g2 <> g3 "
153
+ "RETURN DISTINCT g3.symbol AS gene, "
154
+ "g2.symbol AS via_gene, go2.name AS go_term, "
155
+ "g3.chr AS chr "
156
+ "ORDER BY gene "
157
+ "LIMIT 500",
158
+ {},
159
+ )
160
+ else: # hops == 3
161
+ return (
162
+ "MATCH path = (g1:Gene)"
163
+ "(-[:HAS_GO_TERM]->(:GOTerm)<-[:HAS_GO_TERM]-(:Gene)){3} "
164
+ "WHERE (g1.symbol = $gene OR g1.geneId = $gene) "
165
+ "WITH g1, last(nodes(path)) AS g_end, "
166
+ "[n IN nodes(path) WHERE 'GOTerm' IN labels(n) | n.name] AS terms, "
167
+ "[n IN nodes(path) WHERE 'Gene' IN labels(n) | n.symbol] AS genes "
168
+ "WHERE g1 <> g_end "
169
+ "RETURN DISTINCT g_end.symbol AS gene, "
170
+ "g_end.chr AS chr, "
171
+ "terms AS go_terms, genes AS via_genes "
172
+ "ORDER BY gene "
173
+ "LIMIT 500",
174
+ {},
175
+ )
@@ -0,0 +1,29 @@
1
+ """graphpop nsl — number of segregating sites by length."""
2
+ import click
3
+ from ..cli import pass_ctx
4
+ from ..config import build_options_map, build_cypher
5
+ from ..formatters import format_output
6
+
7
+
8
+ @click.command()
9
+ @click.argument("chr")
10
+ @click.argument("population")
11
+ @click.option("--min-af", type=float, help="Minimum allele frequency filter")
12
+ @click.option("--persist", is_flag=True, default=False,
13
+ help="Write nSL scores to Variant nodes")
14
+ @click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
15
+ @click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
16
+ @pass_ctx
17
+ def nsl(ctx, chr, population, min_af, persist, output_path, fmt):
18
+ """Compute number of segregating sites by length (nSL)."""
19
+ opts = build_options_map(min_af=min_af, persist=persist)
20
+ cypher = build_cypher(
21
+ "graphpop.nsl",
22
+ [f"'{chr}'", f"'{population}'"],
23
+ options=opts if opts else None,
24
+ yield_cols=["variantId", "pos", "af", "nsl_unstd", "nsl"],
25
+ )
26
+ records = ctx.run(cypher)
27
+ format_output(records, output_path, fmt, "nsl",
28
+ {"chr": chr, "pop": population, "min_af": min_af,
29
+ "persist": persist})