merge-cli 3.5.1__tar.gz → 3.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. merge_cli-3.7/MANIFEST.in +1 -0
  2. merge_cli-3.7/PKG-INFO +88 -0
  3. merge_cli-3.7/README.md +61 -0
  4. merge_cli-3.7/merge_cli/__init__.py +1 -0
  5. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/api.py +29 -5
  6. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/cli.py +328 -29
  7. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/config.py +3 -0
  8. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/local_engine.py +412 -22
  9. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/output.py +36 -2
  10. merge_cli-3.7/merge_cli.egg-info/PKG-INFO +88 -0
  11. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/SOURCES.txt +2 -0
  12. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/requires.txt +1 -11
  13. merge_cli-3.7/pyproject.toml +46 -0
  14. {merge_cli-3.5.1 → merge_cli-3.7}/setup.cfg +4 -4
  15. merge_cli-3.5.1/PKG-INFO +0 -20
  16. merge_cli-3.5.1/merge_cli/__init__.py +0 -1
  17. merge_cli-3.5.1/merge_cli.egg-info/PKG-INFO +0 -20
  18. merge_cli-3.5.1/pyproject.toml +0 -55
  19. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/cli_env_patch.py +0 -0
  20. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/__init__.py +0 -0
  21. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/.gitkeep +0 -0
  22. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/BestModel_coding.pkl +0 -0
  23. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/BestModel_noncoding.pkl +0 -0
  24. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/BestModel_splice.pkl +0 -0
  25. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/__init__.py +0 -0
  26. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/data/models/ensemble_predict.py +0 -0
  27. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli/ensemble_predict.py +0 -0
  28. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/dependency_links.txt +0 -0
  29. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/entry_points.txt +0 -0
  30. {merge_cli-3.5.1 → merge_cli-3.7}/merge_cli.egg-info/top_level.txt +0 -0
@@ -0,0 +1 @@
1
+ recursive-include merge_cli/data/models *.pkl *.py .gitkeep
merge_cli-3.7/PKG-INFO ADDED
@@ -0,0 +1,88 @@
1
+ Metadata-Version: 2.4
2
+ Name: merge-cli
3
+ Version: 3.7
4
+ Summary: MERGE variant pathogenicity prediction CLI with precomputed VCF cache and local/remote model integrations
5
+ Author: MERGE Team
6
+ License-Expression: LicenseRef-Proprietary
7
+ Project-URL: Homepage, https://merge.fanglab.cn
8
+ Keywords: bioinformatics,variant,pathogenicity,genomics,cli
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Environment :: Console
14
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: click>=8.1
18
+ Requires-Dist: rich>=13.0
19
+ Requires-Dist: requests>=2.31
20
+ Requires-Dist: pysam>=0.22
21
+ Requires-Dist: numpy>=1.26
22
+ Requires-Dist: pandas>=2.1
23
+ Requires-Dist: scikit-learn>=1.4
24
+ Requires-Dist: joblib>=1.3
25
+ Requires-Dist: matplotlib>=3.8
26
+ Requires-Dist: shap>=0.44
27
+
28
+ # merge-cli 3.7
29
+
30
+ MERGE variant pathogenicity prediction CLI.
31
+
32
+ ## Highlights
33
+
34
+ - Remote and local prediction modes.
35
+ - Local precomputed VCF cache lookup for hg38/hg19 coding and splicing variants.
36
+ - Local/remote model switches for AlphaGenome, HyenaDNA, NT, Evo2, Enformer, GENERATOR, GENERATOR-v2, and NT-v2.
37
+ - Bundled MERGE ensemble model files.
38
+
39
+ ## Quick Start
40
+
41
+ ```bash
42
+ pip install merge-cli
43
+ merge --help
44
+ merge predict --chrom chr1 --pos 69428 --ref T --alt G --genome hg38
45
+ ```
46
+
47
+ ## Precomputed VCF Cache
48
+
49
+ ```bash
50
+ merge precomputed configure --data-dir /path/to/precomputed
51
+ merge precomputed status
52
+ merge precomputed download --genome all --variant-type all
53
+ ```
54
+
55
+ Expected files include `coding_merged.vcf.gz`, `splicing_merged.vcf.gz`, their hg19 counterparts, and `.tbi` indices.
56
+
57
+ ## GENERATOR / GENERATOR-v2 / NT-v2
58
+
59
+ These models use one external service by default at `http://127.0.0.1:18004`.
60
+
61
+ ```bash
62
+ merge generator-ntv2 configure --url http://127.0.0.1:18004
63
+ merge generator-ntv2 status
64
+ merge predict --chrom chr1 --pos 69428 --ref T --alt G --generator --generator-v2 --nt-v2
65
+ ```
66
+
67
+ Local mode uses the same service URL:
68
+
69
+ ```bash
70
+ merge predict --local --chrom chr1 --pos 69428 --ref T --alt G --ensemble-type coding --generator --generator-v2 --nt-v2
71
+ ```
72
+
73
+ Service contract:
74
+
75
+ - `GET /health`
76
+ - `POST /predict` with `chrom`, `pos`, `ref`, `alt`, `genome_version`, `models`
77
+ - `POST /batch_predict` with `variants`, `models`
78
+
79
+ Model keys: `generator`, `generator_v2`, `nt_v2`.
80
+
81
+ ## Enformer
82
+
83
+ ```bash
84
+ merge enformer configure --url http://localhost:5004
85
+ merge enformer status
86
+ ```
87
+
88
+ If local services are unavailable, errors are reported under `prediction.errors` and the CLI continues returning available results.
@@ -0,0 +1,61 @@
1
+ # merge-cli 3.7
2
+
3
+ MERGE variant pathogenicity prediction CLI.
4
+
5
+ ## Highlights
6
+
7
+ - Remote and local prediction modes.
8
+ - Local precomputed VCF cache lookup for hg38/hg19 coding and splicing variants.
9
+ - Local/remote model switches for AlphaGenome, HyenaDNA, NT, Evo2, Enformer, GENERATOR, GENERATOR-v2, and NT-v2.
10
+ - Bundled MERGE ensemble model files.
11
+
12
+ ## Quick Start
13
+
14
+ ```bash
15
+ pip install merge-cli
16
+ merge --help
17
+ merge predict --chrom chr1 --pos 69428 --ref T --alt G --genome hg38
18
+ ```
19
+
20
+ ## Precomputed VCF Cache
21
+
22
+ ```bash
23
+ merge precomputed configure --data-dir /path/to/precomputed
24
+ merge precomputed status
25
+ merge precomputed download --genome all --variant-type all
26
+ ```
27
+
28
+ Expected files include `coding_merged.vcf.gz`, `splicing_merged.vcf.gz`, their hg19 counterparts, and `.tbi` indices.
29
+
30
+ ## GENERATOR / GENERATOR-v2 / NT-v2
31
+
32
+ These models use one external service by default at `http://127.0.0.1:18004`.
33
+
34
+ ```bash
35
+ merge generator-ntv2 configure --url http://127.0.0.1:18004
36
+ merge generator-ntv2 status
37
+ merge predict --chrom chr1 --pos 69428 --ref T --alt G --generator --generator-v2 --nt-v2
38
+ ```
39
+
40
+ Local mode uses the same service URL:
41
+
42
+ ```bash
43
+ merge predict --local --chrom chr1 --pos 69428 --ref T --alt G --ensemble-type coding --generator --generator-v2 --nt-v2
44
+ ```
45
+
46
+ Service contract:
47
+
48
+ - `GET /health`
49
+ - `POST /predict` with `chrom`, `pos`, `ref`, `alt`, `genome_version`, `models`
50
+ - `POST /batch_predict` with `variants`, `models`
51
+
52
+ Model keys: `generator`, `generator_v2`, `nt_v2`.
53
+
54
+ ## Enformer
55
+
56
+ ```bash
57
+ merge enformer configure --url http://localhost:5004
58
+ merge enformer status
59
+ ```
60
+
61
+ If local services are unavailable, errors are reported under `prediction.errors` and the CLI continues returning available results.
@@ -0,0 +1 @@
1
+ __version__ = "3.7"
@@ -20,6 +20,14 @@ def _headers() -> dict:
20
20
  return {"Accept": "application/json"}
21
21
 
22
22
 
23
+ def _session() -> requests.Session:
24
+ session = requests.Session()
25
+ # The MERGE endpoint is fixed and reachable directly. Some cluster nodes set
26
+ # HTTP(S)_PROXY values that break Python TLS while curl still works.
27
+ session.trust_env = False
28
+ return session
29
+
30
+
23
31
  # ─────────────────────────────────────────────────────────────
24
32
  # 1. 单变异预测 POST /predict/
25
33
  # ─────────────────────────────────────────────────────────────
@@ -34,7 +42,11 @@ def predict_single(
34
42
  use_gpn_msa: bool = True,
35
43
  use_popeve: bool = True,
36
44
  use_evo2: bool = True,
37
- use_evo1: bool = True,
45
+ use_enformer: bool = True,
46
+ use_generator: bool = False,
47
+ use_generator_v2: bool = False,
48
+ use_nt_v2: bool = False,
49
+ use_evo1: bool = True
38
50
  ) -> dict:
39
51
  payload = {
40
52
  "chr": chrom, "pos": pos, "ref": ref, "alt": alt,
@@ -47,9 +59,13 @@ def predict_single(
47
59
  "use_gpn_msa": str(use_gpn_msa).lower(),
48
60
  "use_popeve": str(use_popeve).lower(),
49
61
  "use_evo2": str(use_evo2).lower(),
62
+ "use_enformer": str(use_enformer).lower(),
63
+ "use_generator": str(use_generator).lower(),
64
+ "use_generator_v2": str(use_generator_v2).lower(),
65
+ "use_nt_v2": str(use_nt_v2).lower(),
50
66
  "use_evo1": str(use_evo1).lower(),
51
67
  }
52
- resp = requests.post(
68
+ resp = _session().post(
53
69
  f"{_BASE}/predict/",
54
70
  json=payload,
55
71
  headers=_headers(),
@@ -89,7 +105,11 @@ def submit_batch(
89
105
  use_gpn_msa: bool = True,
90
106
  use_popeve: bool = True,
91
107
  use_evo2: bool = True,
92
- use_evo1: bool = True,
108
+ use_enformer: bool = True,
109
+ use_generator: bool = False,
110
+ use_generator_v2: bool = False,
111
+ use_nt_v2: bool = False,
112
+ use_evo1: bool = True
93
113
  ) -> dict:
94
114
  data = {
95
115
  "notify_email": email,
@@ -102,10 +122,14 @@ def submit_batch(
102
122
  "use_gpn_msa": str(use_gpn_msa).lower(),
103
123
  "use_popeve": str(use_popeve).lower(),
104
124
  "use_evo2": str(use_evo2).lower(),
125
+ "use_enformer": str(use_enformer).lower(),
126
+ "use_generator": str(use_generator).lower(),
127
+ "use_generator_v2": str(use_generator_v2).lower(),
128
+ "use_nt_v2": str(use_nt_v2).lower(),
105
129
  "use_evo1": str(use_evo1).lower(),
106
130
  }
107
131
  with open(vcf_path, "rb") as f:
108
- resp = requests.post(
132
+ resp = _session().post(
109
133
  f"{_BASE}/api/submit_batch_job/",
110
134
  data=data,
111
135
  files={"vcf_file": (vcf_path.split("/")[-1], f)},
@@ -120,7 +144,7 @@ def submit_batch(
120
144
  # 4. 查询批量任务状态 GET /api/batch_job/<job_id>/
121
145
  # ─────────────────────────────────────────────────────────────
122
146
  def get_batch_status(job_id: str) -> dict:
123
- resp = requests.get(
147
+ resp = _session().get(
124
148
  f"{_BASE}/api/batch_job/{job_id}/",
125
149
  headers=_headers(),
126
150
  timeout=30,