graphpop-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphpop_cli/__init__.py +2 -0
- graphpop_cli/cli.py +161 -0
- graphpop_cli/commands/__init__.py +1 -0
- graphpop_cli/commands/aggregate.py +206 -0
- graphpop_cli/commands/batch.py +155 -0
- graphpop_cli/commands/compare.py +118 -0
- graphpop_cli/commands/config_cmd.py +117 -0
- graphpop_cli/commands/converge.py +156 -0
- graphpop_cli/commands/db.py +188 -0
- graphpop_cli/commands/divergence.py +37 -0
- graphpop_cli/commands/diversity.py +36 -0
- graphpop_cli/commands/dump.py +210 -0
- graphpop_cli/commands/export_bed.py +170 -0
- graphpop_cli/commands/export_windows.py +91 -0
- graphpop_cli/commands/extract.py +271 -0
- graphpop_cli/commands/filter_results.py +165 -0
- graphpop_cli/commands/garud_h.py +30 -0
- graphpop_cli/commands/genome_scan.py +41 -0
- graphpop_cli/commands/ihs.py +29 -0
- graphpop_cli/commands/import_data.py +266 -0
- graphpop_cli/commands/inventory.py +160 -0
- graphpop_cli/commands/joint_sfs.py +38 -0
- graphpop_cli/commands/ld.py +35 -0
- graphpop_cli/commands/lookup.py +207 -0
- graphpop_cli/commands/neighbors.py +175 -0
- graphpop_cli/commands/nsl.py +29 -0
- graphpop_cli/commands/plot.py +1066 -0
- graphpop_cli/commands/pop_summary.py +30 -0
- graphpop_cli/commands/query.py +15 -0
- graphpop_cli/commands/rank_genes.py +177 -0
- graphpop_cli/commands/report.py +264 -0
- graphpop_cli/commands/roh.py +30 -0
- graphpop_cli/commands/run_all.py +276 -0
- graphpop_cli/commands/server.py +98 -0
- graphpop_cli/commands/setup.py +299 -0
- graphpop_cli/commands/sfs.py +38 -0
- graphpop_cli/commands/validate.py +167 -0
- graphpop_cli/commands/xpehh.py +31 -0
- graphpop_cli/config.py +57 -0
- graphpop_cli/connection.py +52 -0
- graphpop_cli/formatters.py +81 -0
- graphpop_cli-0.1.0.dist-info/METADATA +73 -0
- graphpop_cli-0.1.0.dist-info/RECORD +46 -0
- graphpop_cli-0.1.0.dist-info/WHEEL +5 -0
- graphpop_cli-0.1.0.dist-info/entry_points.txt +2 -0
- graphpop_cli-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""graphpop lookup — query genes, pathways, variants, and regions in the graph."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from ..cli import pass_ctx
|
|
7
|
+
from ..formatters import format_output
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.group()
|
|
11
|
+
def lookup():
|
|
12
|
+
"""Look up genes, pathways, variants, or genomic regions.
|
|
13
|
+
|
|
14
|
+
\b
|
|
15
|
+
Subcommands:
|
|
16
|
+
gene Look up a gene by symbol or ID
|
|
17
|
+
pathway Look up a pathway by name
|
|
18
|
+
variant Look up a variant by ID
|
|
19
|
+
region Look up genes and stats in a genomic region
|
|
20
|
+
|
|
21
|
+
\b
|
|
22
|
+
Examples:
|
|
23
|
+
graphpop lookup gene KCNE1
|
|
24
|
+
graphpop lookup pathway "Cardiac repolarization"
|
|
25
|
+
graphpop lookup variant chr22:16050075:A:G
|
|
26
|
+
graphpop lookup region chr6 9000000 9600000
|
|
27
|
+
"""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@lookup.command("gene")
|
|
32
|
+
@click.argument("gene_name")
|
|
33
|
+
@click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
|
|
34
|
+
@click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
|
|
35
|
+
@pass_ctx
|
|
36
|
+
def lookup_gene(ctx, gene_name, output_path, fmt):
|
|
37
|
+
"""Look up a gene: variant count, consequences, pathways, and selection stats.
|
|
38
|
+
|
|
39
|
+
GENE_NAME can be a gene symbol (e.g., KCNE1) or gene ID (e.g., ENSG00000180509).
|
|
40
|
+
|
|
41
|
+
\b
|
|
42
|
+
Examples:
|
|
43
|
+
graphpop lookup gene KCNE1
|
|
44
|
+
graphpop lookup gene GW5 -o gw5_info.tsv
|
|
45
|
+
graphpop lookup gene ENSG00000180509 --format json
|
|
46
|
+
"""
|
|
47
|
+
cypher = """
|
|
48
|
+
MATCH (g:Gene)
|
|
49
|
+
WHERE g.symbol = $gene_name OR g.geneId = $gene_name
|
|
50
|
+
OPTIONAL MATCH (v:Variant)-[:HAS_CONSEQUENCE]->(g)
|
|
51
|
+
OPTIONAL MATCH (g)-[:IN_PATHWAY]->(pw:Pathway)
|
|
52
|
+
WITH g, v, COLLECT(DISTINCT pw.name) AS pathways
|
|
53
|
+
RETURN g.symbol AS gene,
|
|
54
|
+
g.geneId AS gene_id,
|
|
55
|
+
g.chr AS chr,
|
|
56
|
+
g.start AS start,
|
|
57
|
+
g.end AS end,
|
|
58
|
+
v.variantId AS variant_id,
|
|
59
|
+
v.pos AS pos,
|
|
60
|
+
v.ref AS ref,
|
|
61
|
+
v.alt AS alt,
|
|
62
|
+
pathways,
|
|
63
|
+
CASE WHEN v IS NOT NULL THEN [k IN keys(v) WHERE k STARTS WITH 'ihs_' | k + '=' + toString(v[k])] ELSE [] END AS ihs_scores,
|
|
64
|
+
CASE WHEN v IS NOT NULL THEN [k IN keys(v) WHERE k STARTS WITH 'xpehh_' | k + '=' + toString(v[k])] ELSE [] END AS xpehh_scores
|
|
65
|
+
ORDER BY v.pos
|
|
66
|
+
"""
|
|
67
|
+
records = ctx.run(cypher, {"gene_name": gene_name})
|
|
68
|
+
|
|
69
|
+
if not records:
|
|
70
|
+
click.echo(f"Gene '{gene_name}' not found in the graph.", err=True)
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
click.echo(f"Found {len(records)} variants for gene {gene_name}.", err=True)
|
|
74
|
+
format_output(records, output_path, fmt, "lookup gene",
|
|
75
|
+
{"gene": gene_name})
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@lookup.command("pathway")
|
|
79
|
+
@click.argument("pw_name")
|
|
80
|
+
@click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
|
|
81
|
+
@click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
|
|
82
|
+
@pass_ctx
|
|
83
|
+
def lookup_pathway(ctx, pw_name, output_path, fmt):
|
|
84
|
+
"""Look up a pathway: member genes and variant counts.
|
|
85
|
+
|
|
86
|
+
PW_NAME is matched as a substring (CONTAINS) against pathway names.
|
|
87
|
+
|
|
88
|
+
\b
|
|
89
|
+
Examples:
|
|
90
|
+
graphpop lookup pathway "Cardiac repolarization"
|
|
91
|
+
graphpop lookup pathway "starch" -o starch_pathway.tsv
|
|
92
|
+
"""
|
|
93
|
+
cypher = """
|
|
94
|
+
MATCH (pw:Pathway)
|
|
95
|
+
WHERE pw.name CONTAINS $pw_name
|
|
96
|
+
OPTIONAL MATCH (g:Gene)-[:IN_PATHWAY]->(pw)
|
|
97
|
+
OPTIONAL MATCH (v:Variant)-[:HAS_CONSEQUENCE]->(g)
|
|
98
|
+
WITH pw, g, COUNT(DISTINCT v) AS variant_count
|
|
99
|
+
RETURN pw.name AS pathway,
|
|
100
|
+
pw.pathwayId AS pathway_id,
|
|
101
|
+
g.symbol AS gene,
|
|
102
|
+
g.geneId AS gene_id,
|
|
103
|
+
g.chr AS chr,
|
|
104
|
+
g.start AS gene_start,
|
|
105
|
+
g.end AS gene_end,
|
|
106
|
+
variant_count
|
|
107
|
+
ORDER BY pw.name, g.symbol
|
|
108
|
+
"""
|
|
109
|
+
records = ctx.run(cypher, {"pw_name": pw_name})
|
|
110
|
+
|
|
111
|
+
if not records:
|
|
112
|
+
click.echo(f"No pathways matching '{pw_name}' found.", err=True)
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
click.echo(f"Found {len(records)} gene entries across matching pathways.", err=True)
|
|
116
|
+
format_output(records, output_path, fmt, "lookup pathway",
|
|
117
|
+
{"pathway": pw_name})
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@lookup.command("variant")
|
|
121
|
+
@click.argument("var_id")
|
|
122
|
+
@click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
|
|
123
|
+
@click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
|
|
124
|
+
@pass_ctx
|
|
125
|
+
def lookup_variant(ctx, var_id, output_path, fmt):
|
|
126
|
+
"""Full annotation for a single variant.
|
|
127
|
+
|
|
128
|
+
VAR_ID format: chr:pos:ref:alt (e.g., chr22:16050075:A:G).
|
|
129
|
+
|
|
130
|
+
\b
|
|
131
|
+
Examples:
|
|
132
|
+
graphpop lookup variant chr22:16050075:A:G
|
|
133
|
+
graphpop lookup variant Chr01:12345:A:T --format json
|
|
134
|
+
"""
|
|
135
|
+
cypher = """
|
|
136
|
+
MATCH (v:Variant {variantId: $var_id})
|
|
137
|
+
OPTIONAL MATCH (v)-[:HAS_CONSEQUENCE]->(g:Gene)
|
|
138
|
+
OPTIONAL MATCH (g)-[:IN_PATHWAY]->(pw:Pathway)
|
|
139
|
+
RETURN v AS variant_props,
|
|
140
|
+
g.symbol AS gene,
|
|
141
|
+
g.geneId AS gene_id,
|
|
142
|
+
COLLECT(DISTINCT pw.name) AS pathways
|
|
143
|
+
"""
|
|
144
|
+
records = ctx.run(cypher, {"var_id": var_id})
|
|
145
|
+
|
|
146
|
+
if not records:
|
|
147
|
+
click.echo(f"Variant '{var_id}' not found.", err=True)
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
# Flatten variant properties into columns
|
|
151
|
+
flat_records = []
|
|
152
|
+
for rec in records:
|
|
153
|
+
row = {}
|
|
154
|
+
vprops = rec.get("variant_props", {})
|
|
155
|
+
if vprops:
|
|
156
|
+
for k, v in vprops.items():
|
|
157
|
+
row[k] = v
|
|
158
|
+
row["gene"] = rec.get("gene")
|
|
159
|
+
row["gene_id"] = rec.get("gene_id")
|
|
160
|
+
row["pathways"] = rec.get("pathways", [])
|
|
161
|
+
flat_records.append(row)
|
|
162
|
+
|
|
163
|
+
format_output(flat_records, output_path, fmt, "lookup variant",
|
|
164
|
+
{"variant_id": var_id})
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@lookup.command("region")
|
|
168
|
+
@click.argument("chr")
|
|
169
|
+
@click.argument("start", type=int)
|
|
170
|
+
@click.argument("end", type=int)
|
|
171
|
+
@click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
|
|
172
|
+
@click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
|
|
173
|
+
@pass_ctx
|
|
174
|
+
def lookup_region(ctx, chr, start, end, output_path, fmt):
|
|
175
|
+
"""Genes and summary stats in a genomic region.
|
|
176
|
+
|
|
177
|
+
Returns per-gene variant counts and mean allele frequencies in the region.
|
|
178
|
+
|
|
179
|
+
\b
|
|
180
|
+
Examples:
|
|
181
|
+
graphpop lookup region chr6 9000000 9600000
|
|
182
|
+
graphpop lookup region chr22 16000000 17000000 -o region.tsv
|
|
183
|
+
"""
|
|
184
|
+
cypher = """
|
|
185
|
+
MATCH (v:Variant)
|
|
186
|
+
WHERE v.chr = $chr AND v.pos >= $start AND v.pos <= $end
|
|
187
|
+
OPTIONAL MATCH (v)-[:HAS_CONSEQUENCE]->(g:Gene)
|
|
188
|
+
WITH g, COUNT(DISTINCT v) AS variant_count,
|
|
189
|
+
MIN(v.pos) AS min_pos, MAX(v.pos) AS max_pos
|
|
190
|
+
RETURN COALESCE(g.symbol, 'intergenic') AS gene,
|
|
191
|
+
g.geneId AS gene_id,
|
|
192
|
+
g.start AS gene_start,
|
|
193
|
+
g.end AS gene_end,
|
|
194
|
+
variant_count,
|
|
195
|
+
min_pos,
|
|
196
|
+
max_pos
|
|
197
|
+
ORDER BY min_pos
|
|
198
|
+
"""
|
|
199
|
+
records = ctx.run(cypher, {"chr": chr, "start": start, "end": end})
|
|
200
|
+
|
|
201
|
+
if not records:
|
|
202
|
+
click.echo(f"No variants found in {chr}:{start}-{end}.", err=True)
|
|
203
|
+
return
|
|
204
|
+
|
|
205
|
+
click.echo(f"Found {len(records)} genes/regions in {chr}:{start}-{end}.", err=True)
|
|
206
|
+
format_output(records, output_path, fmt, "lookup region",
|
|
207
|
+
{"chr": chr, "start": start, "end": end})
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""graphpop neighbors -- explore the graph neighborhood around a gene."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from ..cli import pass_ctx
|
|
7
|
+
from ..formatters import format_output
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@click.command()
|
|
11
|
+
@click.argument("gene")
|
|
12
|
+
@click.option("--hops", default=1, type=click.IntRange(1, 3),
|
|
13
|
+
help="Number of hops to traverse (default: 1, max: 3)")
|
|
14
|
+
@click.option("--via", default="IN_PATHWAY",
|
|
15
|
+
type=click.Choice(["IN_PATHWAY", "LD", "HAS_GO_TERM"],
|
|
16
|
+
case_sensitive=False),
|
|
17
|
+
help="Relationship type to traverse (default: IN_PATHWAY)")
|
|
18
|
+
@click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
|
|
19
|
+
@click.option("--format", "fmt", default="tsv",
|
|
20
|
+
type=click.Choice(["tsv", "csv", "json"]))
|
|
21
|
+
@pass_ctx
|
|
22
|
+
def neighbors(ctx, gene, hops, via, output_path, fmt):
|
|
23
|
+
"""Explore the graph neighborhood around a gene.
|
|
24
|
+
|
|
25
|
+
Traverses shared pathways, LD edges, or GO terms to find related genes.
|
|
26
|
+
|
|
27
|
+
\b
|
|
28
|
+
Examples:
|
|
29
|
+
graphpop neighbors KCNE1 -o neighbors.tsv
|
|
30
|
+
graphpop neighbors KCNE1 --hops 2 -o neighbors_2hop.tsv
|
|
31
|
+
graphpop neighbors GW5 --via HAS_GO_TERM --format json
|
|
32
|
+
"""
|
|
33
|
+
via = via.upper()
|
|
34
|
+
|
|
35
|
+
if via == "IN_PATHWAY":
|
|
36
|
+
cypher, params = _pathway_query(hops)
|
|
37
|
+
elif via == "LD":
|
|
38
|
+
cypher, params = _ld_query(hops)
|
|
39
|
+
elif via == "HAS_GO_TERM":
|
|
40
|
+
cypher, params = _go_query(hops)
|
|
41
|
+
else:
|
|
42
|
+
click.echo(f"Unsupported --via type: {via}", err=True)
|
|
43
|
+
raise SystemExit(1)
|
|
44
|
+
|
|
45
|
+
params["gene"] = gene
|
|
46
|
+
records = ctx.run(cypher, params)
|
|
47
|
+
|
|
48
|
+
if not records:
|
|
49
|
+
click.echo(f"No neighbors found for gene '{gene}' via {via} "
|
|
50
|
+
f"({hops} hop(s)).", err=True)
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
click.echo(f"Found {len(records)} neighbor(s) for {gene} via {via} "
|
|
54
|
+
f"({hops} hop(s)).", err=True)
|
|
55
|
+
format_output(records, output_path, fmt, "neighbors",
|
|
56
|
+
{"gene": gene, "hops": hops, "via": via})
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _pathway_query(hops: int) -> tuple[str, dict]:
|
|
60
|
+
"""Build pathway-based neighbor query."""
|
|
61
|
+
if hops == 1:
|
|
62
|
+
return (
|
|
63
|
+
"MATCH (g1:Gene)-[:IN_PATHWAY]->(p:Pathway)<-[:IN_PATHWAY]-(g2:Gene) "
|
|
64
|
+
"WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
|
|
65
|
+
"RETURN DISTINCT g2.symbol AS gene, p.name AS shared_pathway, "
|
|
66
|
+
"g2.chr AS chr, g2.start AS start, g2.end AS end "
|
|
67
|
+
"ORDER BY gene",
|
|
68
|
+
{},
|
|
69
|
+
)
|
|
70
|
+
elif hops == 2:
|
|
71
|
+
return (
|
|
72
|
+
"MATCH (g1:Gene)-[:IN_PATHWAY]->(p1:Pathway)<-[:IN_PATHWAY]-(g2:Gene)"
|
|
73
|
+
"-[:IN_PATHWAY]->(p2:Pathway)<-[:IN_PATHWAY]-(g3:Gene) "
|
|
74
|
+
"WHERE (g1.symbol = $gene OR g1.geneId = $gene) "
|
|
75
|
+
"AND g1 <> g2 AND g1 <> g3 AND g2 <> g3 "
|
|
76
|
+
"RETURN DISTINCT g3.symbol AS gene, "
|
|
77
|
+
"g2.symbol AS via_gene, p1.name AS pathway_1, p2.name AS pathway_2, "
|
|
78
|
+
"g3.chr AS chr, g3.start AS start, g3.end AS end "
|
|
79
|
+
"ORDER BY gene",
|
|
80
|
+
{},
|
|
81
|
+
)
|
|
82
|
+
else: # hops == 3
|
|
83
|
+
return (
|
|
84
|
+
"MATCH path = (g1:Gene)"
|
|
85
|
+
"(-[:IN_PATHWAY]->(:Pathway)<-[:IN_PATHWAY]-(:Gene)){3} "
|
|
86
|
+
"WHERE (g1.symbol = $gene OR g1.geneId = $gene) "
|
|
87
|
+
"WITH g1, last(nodes(path)) AS g_end, "
|
|
88
|
+
"[n IN nodes(path) WHERE 'Pathway' IN labels(n) | n.name] AS pws, "
|
|
89
|
+
"[n IN nodes(path) WHERE 'Gene' IN labels(n) | n.symbol] AS genes "
|
|
90
|
+
"WHERE g1 <> g_end "
|
|
91
|
+
"RETURN DISTINCT g_end.symbol AS gene, "
|
|
92
|
+
"g_end.chr AS chr, g_end.start AS start, g_end.end AS end, "
|
|
93
|
+
"pws AS pathways, genes AS via_genes "
|
|
94
|
+
"ORDER BY gene "
|
|
95
|
+
"LIMIT 500",
|
|
96
|
+
{},
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _ld_query(hops: int) -> tuple[str, dict]:
|
|
101
|
+
"""Build LD-based neighbor query."""
|
|
102
|
+
if hops == 1:
|
|
103
|
+
return (
|
|
104
|
+
"MATCH (g1:Gene)<-[:HAS_CONSEQUENCE]-(v1:Variant)"
|
|
105
|
+
"-[ld:LD]-(v2:Variant)-[:HAS_CONSEQUENCE]->(g2:Gene) "
|
|
106
|
+
"WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
|
|
107
|
+
"RETURN DISTINCT g2.symbol AS gene, "
|
|
108
|
+
"max(ld.r2) AS max_r2, g2.chr AS chr "
|
|
109
|
+
"ORDER BY max_r2 DESC",
|
|
110
|
+
{},
|
|
111
|
+
)
|
|
112
|
+
elif hops == 2:
|
|
113
|
+
return (
|
|
114
|
+
"MATCH (g1:Gene)<-[:HAS_CONSEQUENCE]-(v1:Variant)"
|
|
115
|
+
"-[:LD]-(v2:Variant)-[:LD]-(v3:Variant)"
|
|
116
|
+
"-[:HAS_CONSEQUENCE]->(g2:Gene) "
|
|
117
|
+
"WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
|
|
118
|
+
"RETURN DISTINCT g2.symbol AS gene, g2.chr AS chr "
|
|
119
|
+
"ORDER BY gene "
|
|
120
|
+
"LIMIT 500",
|
|
121
|
+
{},
|
|
122
|
+
)
|
|
123
|
+
else: # hops == 3
|
|
124
|
+
return (
|
|
125
|
+
"MATCH (g1:Gene)<-[:HAS_CONSEQUENCE]-(v1:Variant)"
|
|
126
|
+
"-[:LD]-(v2:Variant)-[:LD]-(v3:Variant)-[:LD]-(v4:Variant)"
|
|
127
|
+
"-[:HAS_CONSEQUENCE]->(g2:Gene) "
|
|
128
|
+
"WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
|
|
129
|
+
"RETURN DISTINCT g2.symbol AS gene, g2.chr AS chr "
|
|
130
|
+
"ORDER BY gene "
|
|
131
|
+
"LIMIT 500",
|
|
132
|
+
{},
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _go_query(hops: int) -> tuple[str, dict]:
|
|
137
|
+
"""Build GO term-based neighbor query."""
|
|
138
|
+
if hops == 1:
|
|
139
|
+
return (
|
|
140
|
+
"MATCH (g1:Gene)-[:HAS_GO_TERM]->(go:GOTerm)<-[:HAS_GO_TERM]-(g2:Gene) "
|
|
141
|
+
"WHERE (g1.symbol = $gene OR g1.geneId = $gene) AND g1 <> g2 "
|
|
142
|
+
"RETURN DISTINCT g2.symbol AS gene, go.name AS shared_go_term, "
|
|
143
|
+
"go.goId AS go_id, g2.chr AS chr "
|
|
144
|
+
"ORDER BY gene",
|
|
145
|
+
{},
|
|
146
|
+
)
|
|
147
|
+
elif hops == 2:
|
|
148
|
+
return (
|
|
149
|
+
"MATCH (g1:Gene)-[:HAS_GO_TERM]->(:GOTerm)<-[:HAS_GO_TERM]-(g2:Gene)"
|
|
150
|
+
"-[:HAS_GO_TERM]->(go2:GOTerm)<-[:HAS_GO_TERM]-(g3:Gene) "
|
|
151
|
+
"WHERE (g1.symbol = $gene OR g1.geneId = $gene) "
|
|
152
|
+
"AND g1 <> g2 AND g1 <> g3 AND g2 <> g3 "
|
|
153
|
+
"RETURN DISTINCT g3.symbol AS gene, "
|
|
154
|
+
"g2.symbol AS via_gene, go2.name AS go_term, "
|
|
155
|
+
"g3.chr AS chr "
|
|
156
|
+
"ORDER BY gene "
|
|
157
|
+
"LIMIT 500",
|
|
158
|
+
{},
|
|
159
|
+
)
|
|
160
|
+
else: # hops == 3
|
|
161
|
+
return (
|
|
162
|
+
"MATCH path = (g1:Gene)"
|
|
163
|
+
"(-[:HAS_GO_TERM]->(:GOTerm)<-[:HAS_GO_TERM]-(:Gene)){3} "
|
|
164
|
+
"WHERE (g1.symbol = $gene OR g1.geneId = $gene) "
|
|
165
|
+
"WITH g1, last(nodes(path)) AS g_end, "
|
|
166
|
+
"[n IN nodes(path) WHERE 'GOTerm' IN labels(n) | n.name] AS terms, "
|
|
167
|
+
"[n IN nodes(path) WHERE 'Gene' IN labels(n) | n.symbol] AS genes "
|
|
168
|
+
"WHERE g1 <> g_end "
|
|
169
|
+
"RETURN DISTINCT g_end.symbol AS gene, "
|
|
170
|
+
"g_end.chr AS chr, "
|
|
171
|
+
"terms AS go_terms, genes AS via_genes "
|
|
172
|
+
"ORDER BY gene "
|
|
173
|
+
"LIMIT 500",
|
|
174
|
+
{},
|
|
175
|
+
)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""graphpop nsl — number of segregating sites by length."""
|
|
2
|
+
import click
|
|
3
|
+
from ..cli import pass_ctx
|
|
4
|
+
from ..config import build_options_map, build_cypher
|
|
5
|
+
from ..formatters import format_output
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@click.command()
|
|
9
|
+
@click.argument("chr")
|
|
10
|
+
@click.argument("population")
|
|
11
|
+
@click.option("--min-af", type=float, help="Minimum allele frequency filter")
|
|
12
|
+
@click.option("--persist", is_flag=True, default=False,
|
|
13
|
+
help="Write nSL scores to Variant nodes")
|
|
14
|
+
@click.option("-o", "--output", "output_path", help="Output file (default: stdout)")
|
|
15
|
+
@click.option("--format", "fmt", default="tsv", type=click.Choice(["tsv", "csv", "json"]))
|
|
16
|
+
@pass_ctx
|
|
17
|
+
def nsl(ctx, chr, population, min_af, persist, output_path, fmt):
|
|
18
|
+
"""Compute number of segregating sites by length (nSL)."""
|
|
19
|
+
opts = build_options_map(min_af=min_af, persist=persist)
|
|
20
|
+
cypher = build_cypher(
|
|
21
|
+
"graphpop.nsl",
|
|
22
|
+
[f"'{chr}'", f"'{population}'"],
|
|
23
|
+
options=opts if opts else None,
|
|
24
|
+
yield_cols=["variantId", "pos", "af", "nsl_unstd", "nsl"],
|
|
25
|
+
)
|
|
26
|
+
records = ctx.run(cypher)
|
|
27
|
+
format_output(records, output_path, fmt, "nsl",
|
|
28
|
+
{"chr": chr, "pop": population, "min_af": min_af,
|
|
29
|
+
"persist": persist})
|