merge-cli 3.5.1__tar.gz → 3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- merge_cli-3.7/MANIFEST.in +1 -0
- merge_cli-3.7/PKG-INFO +88 -0
- merge_cli-3.7/README.md +61 -0
- merge_cli-3.7/merge_cli/__init__.py +1 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/api.py +29 -5
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/cli.py +328 -29
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/config.py +3 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/local_engine.py +412 -22
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/output.py +36 -2
- merge_cli-3.7/merge_cli.egg-info/PKG-INFO +88 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/SOURCES.txt +2 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/requires.txt +1 -11
- merge_cli-3.7/pyproject.toml +46 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/setup.cfg +4 -4
- merge_cli-3.5.1/PKG-INFO +0 -20
- merge_cli-3.5.1/merge_cli/__init__.py +0 -1
- merge_cli-3.5.1/merge_cli.egg-info/PKG-INFO +0 -20
- merge_cli-3.5.1/pyproject.toml +0 -55
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/cli_env_patch.py +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/__init__.py +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/.gitkeep +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/BestModel_coding.pkl +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/BestModel_noncoding.pkl +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/BestModel_splice.pkl +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/__init__.py +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/ensemble_predict.py +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/ensemble_predict.py +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/dependency_links.txt +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/entry_points.txt +0 -0
- {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
recursive-include merge_cli/data/models *.pkl *.py .gitkeep
|
merge_cli-3.7/PKG-INFO
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: merge-cli
|
|
3
|
+
Version: 3.7
|
|
4
|
+
Summary: MERGE variant pathogenicity prediction CLI with precomputed VCF cache and local/remote model integrations
|
|
5
|
+
Author: MERGE Team
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Project-URL: Homepage, https://merge.fanglab.cn
|
|
8
|
+
Keywords: bioinformatics,variant,pathogenicity,genomics,cli
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: click>=8.1
|
|
18
|
+
Requires-Dist: rich>=13.0
|
|
19
|
+
Requires-Dist: requests>=2.31
|
|
20
|
+
Requires-Dist: pysam>=0.22
|
|
21
|
+
Requires-Dist: numpy>=1.26
|
|
22
|
+
Requires-Dist: pandas>=2.1
|
|
23
|
+
Requires-Dist: scikit-learn>=1.4
|
|
24
|
+
Requires-Dist: joblib>=1.3
|
|
25
|
+
Requires-Dist: matplotlib>=3.8
|
|
26
|
+
Requires-Dist: shap>=0.44
|
|
27
|
+
|
|
28
|
+
# merge-cli 3.7
|
|
29
|
+
|
|
30
|
+
MERGE variant pathogenicity prediction CLI.
|
|
31
|
+
|
|
32
|
+
## Highlights
|
|
33
|
+
|
|
34
|
+
- Remote and local prediction modes.
|
|
35
|
+
- Local precomputed VCF cache lookup for hg38/hg19 coding and splicing variants.
|
|
36
|
+
- Local/remote model switches for AlphaGenome, HyenaDNA, NT, Evo2, Enformer, GENERATOR, GENERATOR-v2, and NT-v2.
|
|
37
|
+
- Bundled MERGE ensemble model files.
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install merge-cli
|
|
43
|
+
merge --help
|
|
44
|
+
merge predict --chrom chr1 --pos 69428 --ref T --alt G --genome hg38
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Precomputed VCF Cache
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
merge precomputed configure --data-dir /path/to/precomputed
|
|
51
|
+
merge precomputed status
|
|
52
|
+
merge precomputed download --genome all --variant-type all
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Expected files include `coding_merged.vcf.gz`, `splicing_merged.vcf.gz`, their hg19 counterparts, and `.tbi` indices.
|
|
56
|
+
|
|
57
|
+
## GENERATOR / GENERATOR-v2 / NT-v2
|
|
58
|
+
|
|
59
|
+
These models use one external service by default at `http://127.0.0.1:18004`.
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
merge generator-ntv2 configure --url http://127.0.0.1:18004
|
|
63
|
+
merge generator-ntv2 status
|
|
64
|
+
merge predict --chrom chr1 --pos 69428 --ref T --alt G --generator --generator-v2 --nt-v2
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Local mode uses the same service URL:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
merge predict --local --chrom chr1 --pos 69428 --ref T --alt G --ensemble-type coding --generator --generator-v2 --nt-v2
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Service contract:
|
|
74
|
+
|
|
75
|
+
- `GET /health`
|
|
76
|
+
- `POST /predict` with `chrom`, `pos`, `ref`, `alt`, `genome_version`, `models`
|
|
77
|
+
- `POST /batch_predict` with `variants`, `models`
|
|
78
|
+
|
|
79
|
+
Model keys: `generator`, `generator_v2`, `nt_v2`.
|
|
80
|
+
|
|
81
|
+
## Enformer
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
merge enformer configure --url http://localhost:5004
|
|
85
|
+
merge enformer status
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
If local services are unavailable, errors are reported under `prediction.errors` and the CLI continues returning available results.
|
merge_cli-3.7/README.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# merge-cli 3.7
|
|
2
|
+
|
|
3
|
+
MERGE variant pathogenicity prediction CLI.
|
|
4
|
+
|
|
5
|
+
## Highlights
|
|
6
|
+
|
|
7
|
+
- Remote and local prediction modes.
|
|
8
|
+
- Local precomputed VCF cache lookup for hg38/hg19 coding and splicing variants.
|
|
9
|
+
- Local/remote model switches for AlphaGenome, HyenaDNA, NT, Evo2, Enformer, GENERATOR, GENERATOR-v2, and NT-v2.
|
|
10
|
+
- Bundled MERGE ensemble model files.
|
|
11
|
+
|
|
12
|
+
## Quick Start
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install merge-cli
|
|
16
|
+
merge --help
|
|
17
|
+
merge predict --chrom chr1 --pos 69428 --ref T --alt G --genome hg38
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Precomputed VCF Cache
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
merge precomputed configure --data-dir /path/to/precomputed
|
|
24
|
+
merge precomputed status
|
|
25
|
+
merge precomputed download --genome all --variant-type all
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Expected files include `coding_merged.vcf.gz`, `splicing_merged.vcf.gz`, their hg19 counterparts, and `.tbi` indices.
|
|
29
|
+
|
|
30
|
+
## GENERATOR / GENERATOR-v2 / NT-v2
|
|
31
|
+
|
|
32
|
+
These models use one external service by default at `http://127.0.0.1:18004`.
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
merge generator-ntv2 configure --url http://127.0.0.1:18004
|
|
36
|
+
merge generator-ntv2 status
|
|
37
|
+
merge predict --chrom chr1 --pos 69428 --ref T --alt G --generator --generator-v2 --nt-v2
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Local mode uses the same service URL:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
merge predict --local --chrom chr1 --pos 69428 --ref T --alt G --ensemble-type coding --generator --generator-v2 --nt-v2
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Service contract:
|
|
47
|
+
|
|
48
|
+
- `GET /health`
|
|
49
|
+
- `POST /predict` with `chrom`, `pos`, `ref`, `alt`, `genome_version`, `models`
|
|
50
|
+
- `POST /batch_predict` with `variants`, `models`
|
|
51
|
+
|
|
52
|
+
Model keys: `generator`, `generator_v2`, `nt_v2`.
|
|
53
|
+
|
|
54
|
+
## Enformer
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
merge enformer configure --url http://localhost:5004
|
|
58
|
+
merge enformer status
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
If local services are unavailable, errors are reported under `prediction.errors` and the CLI continues returning available results.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "3.7"
|
|
@@ -20,6 +20,14 @@ def _headers() -> dict:
|
|
|
20
20
|
return {"Accept": "application/json"}
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
def _session() -> requests.Session:
|
|
24
|
+
session = requests.Session()
|
|
25
|
+
# The MERGE endpoint is fixed and reachable directly. Some cluster nodes set
|
|
26
|
+
# HTTP(S)_PROXY values that break Python TLS while curl still works.
|
|
27
|
+
session.trust_env = False
|
|
28
|
+
return session
|
|
29
|
+
|
|
30
|
+
|
|
23
31
|
# ─────────────────────────────────────────────────────────────
|
|
24
32
|
# 1. 单变异预测 POST /predict/
|
|
25
33
|
# ─────────────────────────────────────────────────────────────
|
|
@@ -34,7 +42,11 @@ def predict_single(
|
|
|
34
42
|
use_gpn_msa: bool = True,
|
|
35
43
|
use_popeve: bool = True,
|
|
36
44
|
use_evo2: bool = True,
|
|
37
|
-
|
|
45
|
+
use_enformer: bool = True,
|
|
46
|
+
use_generator: bool = False,
|
|
47
|
+
use_generator_v2: bool = False,
|
|
48
|
+
use_nt_v2: bool = False,
|
|
49
|
+
use_evo1: bool = True
|
|
38
50
|
) -> dict:
|
|
39
51
|
payload = {
|
|
40
52
|
"chr": chrom, "pos": pos, "ref": ref, "alt": alt,
|
|
@@ -47,9 +59,13 @@ def predict_single(
|
|
|
47
59
|
"use_gpn_msa": str(use_gpn_msa).lower(),
|
|
48
60
|
"use_popeve": str(use_popeve).lower(),
|
|
49
61
|
"use_evo2": str(use_evo2).lower(),
|
|
62
|
+
"use_enformer": str(use_enformer).lower(),
|
|
63
|
+
"use_generator": str(use_generator).lower(),
|
|
64
|
+
"use_generator_v2": str(use_generator_v2).lower(),
|
|
65
|
+
"use_nt_v2": str(use_nt_v2).lower(),
|
|
50
66
|
"use_evo1": str(use_evo1).lower(),
|
|
51
67
|
}
|
|
52
|
-
resp =
|
|
68
|
+
resp = _session().post(
|
|
53
69
|
f"{_BASE}/predict/",
|
|
54
70
|
json=payload,
|
|
55
71
|
headers=_headers(),
|
|
@@ -89,7 +105,11 @@ def submit_batch(
|
|
|
89
105
|
use_gpn_msa: bool = True,
|
|
90
106
|
use_popeve: bool = True,
|
|
91
107
|
use_evo2: bool = True,
|
|
92
|
-
|
|
108
|
+
use_enformer: bool = True,
|
|
109
|
+
use_generator: bool = False,
|
|
110
|
+
use_generator_v2: bool = False,
|
|
111
|
+
use_nt_v2: bool = False,
|
|
112
|
+
use_evo1: bool = True
|
|
93
113
|
) -> dict:
|
|
94
114
|
data = {
|
|
95
115
|
"notify_email": email,
|
|
@@ -102,10 +122,14 @@ def submit_batch(
|
|
|
102
122
|
"use_gpn_msa": str(use_gpn_msa).lower(),
|
|
103
123
|
"use_popeve": str(use_popeve).lower(),
|
|
104
124
|
"use_evo2": str(use_evo2).lower(),
|
|
125
|
+
"use_enformer": str(use_enformer).lower(),
|
|
126
|
+
"use_generator": str(use_generator).lower(),
|
|
127
|
+
"use_generator_v2": str(use_generator_v2).lower(),
|
|
128
|
+
"use_nt_v2": str(use_nt_v2).lower(),
|
|
105
129
|
"use_evo1": str(use_evo1).lower(),
|
|
106
130
|
}
|
|
107
131
|
with open(vcf_path, "rb") as f:
|
|
108
|
-
resp =
|
|
132
|
+
resp = _session().post(
|
|
109
133
|
f"{_BASE}/api/submit_batch_job/",
|
|
110
134
|
data=data,
|
|
111
135
|
files={"vcf_file": (vcf_path.split("/")[-1], f)},
|
|
@@ -120,7 +144,7 @@ def submit_batch(
|
|
|
120
144
|
# 4. 查询批量任务状态 GET /api/batch_job/<job_id>/
|
|
121
145
|
# ─────────────────────────────────────────────────────────────
|
|
122
146
|
def get_batch_status(job_id: str) -> dict:
|
|
123
|
-
resp =
|
|
147
|
+
resp = _session().get(
|
|
124
148
|
f"{_BASE}/api/batch_job/{job_id}/",
|
|
125
149
|
headers=_headers(),
|
|
126
150
|
timeout=30,
|