merge-cli 3.2.0__tar.gz → 3.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {merge_cli-3.2.0 → merge_cli-3.3.0}/PKG-INFO +1 -1
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/api.py +7 -1
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/cli.py +14 -6
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/config.py +3 -1
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/local_engine.py +274 -22
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/PKG-INFO +1 -1
- {merge_cli-3.2.0 → merge_cli-3.3.0}/pyproject.toml +1 -1
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/__init__.py +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/cli_env_patch.py +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/__init__.py +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/.gitkeep +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/BestModel_Clinvar-noncoding.pkl +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/BestModel_Clinvar.pkl +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/BestModel_Splice_Unsupervised Only.pkl +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/__init__.py +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/ensemble_predict.py +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/ensemble_predict.py +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/output.py +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/SOURCES.txt +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/dependency_links.txt +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/entry_points.txt +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/requires.txt +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/top_level.txt +0 -0
- {merge_cli-3.2.0 → merge_cli-3.3.0}/setup.cfg +0 -0
|
@@ -62,8 +62,14 @@ def predict_single(
|
|
|
62
62
|
# ─────────────────────────────────────────────────────────────
|
|
63
63
|
# 2. 集成模型评分 POST /ensemble/
|
|
64
64
|
# ─────────────────────────────────────────────────────────────
|
|
65
|
-
def predict_ensemble(
|
|
65
|
+
def predict_ensemble(
|
|
66
|
+
prediction: dict,
|
|
67
|
+
all_transcripts: list,
|
|
68
|
+
ensemble_type: Optional[str] = None,
|
|
69
|
+
) -> dict:
|
|
66
70
|
payload = {"prediction": prediction, "all_transcripts": all_transcripts}
|
|
71
|
+
if ensemble_type:
|
|
72
|
+
payload["ensemble_type"] = ensemble_type
|
|
67
73
|
resp = requests.post(
|
|
68
74
|
f"{_BASE}/ensemble/",
|
|
69
75
|
json=payload,
|
|
@@ -206,7 +206,7 @@ def config_show():
|
|
|
206
206
|
help="手动指定变异类型(跳过 ANNOVAR)")
|
|
207
207
|
@_add_model_options
|
|
208
208
|
def predict(chrom, pos, ref, alt, genome, fmt, no_ensemble,
|
|
209
|
-
use_local, ensemble_type,
|
|
209
|
+
use_local, ensemble_type, **kwargs):
|
|
210
210
|
"""单变异致病性预测(远程 / 本地模式)。
|
|
211
211
|
|
|
212
212
|
\b
|
|
@@ -256,10 +256,6 @@ def predict(chrom, pos, ref, alt, genome, fmt, no_ensemble,
|
|
|
256
256
|
dbnsfp = prediction.get("dbnsfp") or {}
|
|
257
257
|
transcripts = [dbnsfp] if dbnsfp else []
|
|
258
258
|
# FIX #7: 自动通过远程 ANNOVAR 判断变异类型
|
|
259
|
-
if not ensemble_type:
|
|
260
|
-
from . import local_engine as _le
|
|
261
|
-
ensemble_type = _le.get_variant_type(
|
|
262
|
-
chrom, pos, ref, alt, genome)
|
|
263
259
|
ens_resp = api.predict_ensemble(prediction, transcripts,
|
|
264
260
|
ensemble_type=ensemble_type)
|
|
265
261
|
if ens_resp.get("success"):
|
|
@@ -604,7 +600,7 @@ def local_download(file_type):
|
|
|
604
600
|
help="手动指定变异类型(跳过 ANNOVAR):coding / noncoding / splice")
|
|
605
601
|
@_add_model_options
|
|
606
602
|
def local_predict(chrom, pos, ref, alt, genome, fmt,
|
|
607
|
-
no_ensemble, ensemble_type,
|
|
603
|
+
no_ensemble, ensemble_type, **kwargs):
|
|
608
604
|
"""完全在本地运行单变异预测,无速率限制。
|
|
609
605
|
|
|
610
606
|
\b
|
|
@@ -819,6 +815,12 @@ def local_env_setup(model_name, evo2_size, local_model_path):
|
|
|
819
815
|
"""
|
|
820
816
|
from .local_engine import setup_model_env
|
|
821
817
|
|
|
818
|
+
if model_name in ("all", "evo2") and evo2_size == "1b_base":
|
|
819
|
+
console.print(
|
|
820
|
+
"[yellow]提示:官方文档里 `evo2_1b_base` 需要 FP8 / Transformer Engine 和 Hopper GPU,"
|
|
821
|
+
"并不是 7B 的轻量替代。[/yellow]"
|
|
822
|
+
)
|
|
823
|
+
|
|
822
824
|
targets = (["evo2", "hyenadna", "nt"] if model_name == "all" else [model_name])
|
|
823
825
|
for m in targets:
|
|
824
826
|
ok = setup_model_env(
|
|
@@ -850,6 +852,12 @@ def local_env_start(model_name):
|
|
|
850
852
|
"""
|
|
851
853
|
from .local_engine import start_local_service
|
|
852
854
|
|
|
855
|
+
if model_name == "all":
|
|
856
|
+
console.print(
|
|
857
|
+
"[yellow]提示:同时启动 hyenadna / nt / evo2 在 CPU 或小内存机器上很容易被系统 OOM kill。"
|
|
858
|
+
" 如果日志里出现 `Killed`,优先单独启动需要的模型。[/yellow]"
|
|
859
|
+
)
|
|
860
|
+
|
|
853
861
|
targets = (["hyenadna", "nt", "evo2"] if model_name == "all" else [model_name])
|
|
854
862
|
for m in targets:
|
|
855
863
|
start_local_service(m, console=console)
|
|
@@ -49,7 +49,9 @@ _LOCAL_DEFAULTS = {
|
|
|
49
49
|
"annovar_dir": "",
|
|
50
50
|
"hyenadna_url": "http://localhost:5001",
|
|
51
51
|
"nt_url": "http://localhost:5002",
|
|
52
|
-
"evo2_url": "http://localhost:
|
|
52
|
+
"evo2_url": "http://localhost:5003",
|
|
53
|
+
"evo2_model_dir": "",
|
|
54
|
+
"evo2_model_name": "evo2_7b",
|
|
53
55
|
}
|
|
54
56
|
|
|
55
57
|
|
|
@@ -16,6 +16,7 @@ from __future__ import annotations
|
|
|
16
16
|
|
|
17
17
|
import importlib.resources
|
|
18
18
|
import importlib.util
|
|
19
|
+
import json
|
|
19
20
|
import logging
|
|
20
21
|
import os
|
|
21
22
|
import shutil
|
|
@@ -28,7 +29,7 @@ from typing import Optional
|
|
|
28
29
|
|
|
29
30
|
import requests
|
|
30
31
|
|
|
31
|
-
from .config import get_local_config,
|
|
32
|
+
from .config import FIXED_API_URL, get_local_config, set_local_config
|
|
32
33
|
|
|
33
34
|
logger = logging.getLogger(__name__)
|
|
34
35
|
|
|
@@ -456,12 +457,87 @@ def call_local_service(model_name: str, chrom, pos, ref, alt,
|
|
|
456
457
|
return {"error": f"{model_name} 调用失败:{exc}"}
|
|
457
458
|
|
|
458
459
|
|
|
459
|
-
def
|
|
460
|
+
def _conda_env_map() -> dict[str, str]:
|
|
461
|
+
for cmd in (
|
|
462
|
+
["conda", "env", "list", "--json"],
|
|
463
|
+
["conda", "info", "--envs", "--json"],
|
|
464
|
+
):
|
|
465
|
+
try:
|
|
466
|
+
result = subprocess.run(
|
|
467
|
+
cmd, capture_output=True, text=True, timeout=30,
|
|
468
|
+
)
|
|
469
|
+
if result.returncode != 0 or not result.stdout.strip():
|
|
470
|
+
continue
|
|
471
|
+
data = json.loads(result.stdout)
|
|
472
|
+
envs = {}
|
|
473
|
+
for prefix in data.get("envs", []):
|
|
474
|
+
env_name = Path(prefix).name
|
|
475
|
+
if env_name:
|
|
476
|
+
envs[env_name] = prefix
|
|
477
|
+
if envs:
|
|
478
|
+
return envs
|
|
479
|
+
except Exception:
|
|
480
|
+
continue
|
|
481
|
+
|
|
460
482
|
try:
|
|
461
483
|
out = subprocess.check_output(["conda", "env", "list"], text=True, timeout=15)
|
|
462
|
-
return name in out
|
|
463
484
|
except Exception:
|
|
464
|
-
return
|
|
485
|
+
return {}
|
|
486
|
+
|
|
487
|
+
envs = {}
|
|
488
|
+
for raw_line in out.splitlines():
|
|
489
|
+
line = raw_line.strip()
|
|
490
|
+
if not line or line.startswith("#"):
|
|
491
|
+
continue
|
|
492
|
+
parts = [p for p in line.split() if p != "*"]
|
|
493
|
+
if not parts:
|
|
494
|
+
continue
|
|
495
|
+
prefix = parts[-1]
|
|
496
|
+
if os.path.sep in prefix or "/" in prefix or "\\" in prefix:
|
|
497
|
+
env_name = Path(prefix).name
|
|
498
|
+
if env_name:
|
|
499
|
+
envs[env_name] = prefix
|
|
500
|
+
continue
|
|
501
|
+
envs[parts[0]] = parts[0]
|
|
502
|
+
return envs
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _get_conda_env_prefix(name: str) -> Optional[str]:
|
|
506
|
+
return _conda_env_map().get(name)
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def _get_conda_env_python(name: str) -> Optional[str]:
|
|
510
|
+
prefix = _get_conda_env_prefix(name)
|
|
511
|
+
if not prefix:
|
|
512
|
+
return None
|
|
513
|
+
|
|
514
|
+
candidates = [
|
|
515
|
+
Path(prefix) / "python.exe",
|
|
516
|
+
Path(prefix) / "bin" / "python",
|
|
517
|
+
Path(prefix) / "Scripts" / "python.exe",
|
|
518
|
+
]
|
|
519
|
+
for candidate in candidates:
|
|
520
|
+
if candidate.exists():
|
|
521
|
+
return str(candidate)
|
|
522
|
+
return None
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def _run_python_in_conda_env(env_name: str, args: list[str], **kwargs):
|
|
526
|
+
python_exe = _get_conda_env_python(env_name)
|
|
527
|
+
if not python_exe:
|
|
528
|
+
raise FileNotFoundError(f"找不到 conda 环境 '{env_name}' 的 python")
|
|
529
|
+
return subprocess.run([python_exe] + list(args), **kwargs)
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def _popen_python_in_conda_env(env_name: str, args: list[str], **kwargs):
|
|
533
|
+
python_exe = _get_conda_env_python(env_name)
|
|
534
|
+
if not python_exe:
|
|
535
|
+
raise FileNotFoundError(f"找不到 conda 环境 '{env_name}' 的 python")
|
|
536
|
+
return subprocess.Popen([python_exe] + list(args), **kwargs)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
def _conda_env_exists(name: str) -> bool:
|
|
540
|
+
return _get_conda_env_prefix(name) is not None
|
|
465
541
|
|
|
466
542
|
|
|
467
543
|
# ════════════════════════════════════════════════════════════════
|
|
@@ -528,15 +604,18 @@ def _pip_install(env_name: str, pkgs: list, console=None, timeout=900) -> bool:
|
|
|
528
604
|
all_pkgs = base_pkgs + [p for p in pkgs if p not in base_pkgs]
|
|
529
605
|
|
|
530
606
|
try:
|
|
531
|
-
result =
|
|
532
|
-
|
|
533
|
-
|
|
607
|
+
result = _run_python_in_conda_env(
|
|
608
|
+
env_name,
|
|
609
|
+
["-m", "pip", "install", "--quiet"] + all_pkgs,
|
|
534
610
|
capture_output=False, timeout=timeout,
|
|
535
611
|
)
|
|
536
612
|
if result.returncode != 0:
|
|
537
613
|
_log(f" [red]✗ pip install 失败(exit {result.returncode})[/red]")
|
|
538
614
|
return False
|
|
539
615
|
return True
|
|
616
|
+
except FileNotFoundError as e:
|
|
617
|
+
_log(f" [red]✗ {e}[/red]")
|
|
618
|
+
return False
|
|
540
619
|
except subprocess.CalledProcessError as e:
|
|
541
620
|
_log(f" [red]✗ pip install 失败:{e}[/red]")
|
|
542
621
|
return False
|
|
@@ -618,7 +697,7 @@ def _setup_nt(console=None, **_) -> bool:
|
|
|
618
697
|
return True
|
|
619
698
|
|
|
620
699
|
|
|
621
|
-
def
|
|
700
|
+
def _setup_evo2_legacy(console=None, local_model_path=None, model_size="7b") -> bool:
|
|
622
701
|
if not shutil.which("conda"):
|
|
623
702
|
if console:
|
|
624
703
|
console.print("[red]✗ 未找到 conda[/red]")
|
|
@@ -741,6 +820,143 @@ def _setup_evo2(console=None, local_model_path=None, model_size="7b") -> bool:
|
|
|
741
820
|
return True
|
|
742
821
|
|
|
743
822
|
|
|
823
|
+
def _setup_evo2(console=None, local_model_path=None, model_size="7b") -> bool:
|
|
824
|
+
if not shutil.which("conda"):
|
|
825
|
+
if console:
|
|
826
|
+
console.print("[red]✗ 未找到 conda[/red]")
|
|
827
|
+
return False
|
|
828
|
+
|
|
829
|
+
def _log(message):
|
|
830
|
+
if console:
|
|
831
|
+
console.print(message)
|
|
832
|
+
|
|
833
|
+
evo2_model_name = f"evo2_{model_size}"
|
|
834
|
+
install_mode = "light" if model_size == "7b" else "full"
|
|
835
|
+
python_ver = "3.11" if install_mode == "light" else "3.12"
|
|
836
|
+
required_gb = 18 if install_mode == "light" else 35
|
|
837
|
+
torch_spec = os.environ.get("MERGE_EVO2_TORCH_SPEC", "torch==2.7.1")
|
|
838
|
+
torch_index_url = os.environ.get(
|
|
839
|
+
"MERGE_EVO2_TORCH_INDEX_URL",
|
|
840
|
+
"https://download.pytorch.org/whl/cu128",
|
|
841
|
+
)
|
|
842
|
+
|
|
843
|
+
if local_model_path and not os.path.exists(local_model_path):
|
|
844
|
+
_log(f" [red]✗ 本地模型路径不存在:{local_model_path}[/red]")
|
|
845
|
+
return False
|
|
846
|
+
|
|
847
|
+
_log(f"\n[bold cyan]═══ Evo2 环境部署(official {install_mode} install)═══[/bold cyan]")
|
|
848
|
+
if model_size != "7b":
|
|
849
|
+
_log(" [yellow]⚠ evo2_1b_base 按官方要求需要 FP8 / Transformer Engine 和 Hopper GPU。[/yellow]")
|
|
850
|
+
|
|
851
|
+
conda_dir = str(Path.home() / "miniconda3" / "envs")
|
|
852
|
+
ok, free_gb = check_disk_space(conda_dir, required_gb)
|
|
853
|
+
_log(f" 磁盘剩余:{free_gb} GB")
|
|
854
|
+
if install_mode == "light":
|
|
855
|
+
_log(" 安装模式:7B 轻量模式(PyTorch -> flash-attn -> pip install evo2)")
|
|
856
|
+
_log(" 空间估计:环境约 8-12 GB;权重默认不在 setup 阶段强制预下载")
|
|
857
|
+
else:
|
|
858
|
+
_log(" 安装模式:完整模式(含 Transformer Engine,适用于 1b_base / FP8)")
|
|
859
|
+
_log(" 空间估计:环境约 20+ GB,首次运行还会下载较大权重")
|
|
860
|
+
if not ok:
|
|
861
|
+
_log(f" [red]✗ 磁盘空间偏低(剩余 {free_gb} GB,建议至少 {required_gb} GB)[/red]")
|
|
862
|
+
if not _yn_prompt("磁盘空间可能不足,是否继续?"):
|
|
863
|
+
return False
|
|
864
|
+
|
|
865
|
+
total_steps = 4 if install_mode == "light" else 5
|
|
866
|
+
_log(f"\n[bold]1/{total_steps}[/bold] 创建 conda 环境 evo2 (python={python_ver})…")
|
|
867
|
+
if not _create_conda_env("evo2", python_ver, console):
|
|
868
|
+
return False
|
|
869
|
+
|
|
870
|
+
_log(f"\n[bold]2/{total_steps}[/bold] 安装 PyTorch 与基础依赖…")
|
|
871
|
+
install_steps = [
|
|
872
|
+
(["-m", "pip", "install", "--quiet", "--upgrade", "pip", "setuptools", "wheel"], 600),
|
|
873
|
+
(["-m", "pip", "install", "--quiet", torch_spec, "--index-url", torch_index_url], 1800),
|
|
874
|
+
([
|
|
875
|
+
"-m", "pip", "install", "--quiet",
|
|
876
|
+
"psutil", "requests", "urllib3", "fastapi", "uvicorn[standard]", "biopython",
|
|
877
|
+
], 1200),
|
|
878
|
+
]
|
|
879
|
+
for args, timeout in install_steps:
|
|
880
|
+
try:
|
|
881
|
+
result = _run_python_in_conda_env(
|
|
882
|
+
"evo2", args, capture_output=False, timeout=timeout,
|
|
883
|
+
)
|
|
884
|
+
except FileNotFoundError as e:
|
|
885
|
+
_log(f" [red]✗ {e}[/red]")
|
|
886
|
+
return False
|
|
887
|
+
if result.returncode != 0:
|
|
888
|
+
_log(f" [red]✗ 命令失败(exit {result.returncode})[/red]")
|
|
889
|
+
return False
|
|
890
|
+
|
|
891
|
+
current_step = 3
|
|
892
|
+
if install_mode == "full":
|
|
893
|
+
_log(f"\n[bold]{current_step}/{total_steps}[/bold] 安装 Transformer Engine 依赖…")
|
|
894
|
+
for cmd in [
|
|
895
|
+
["conda", "install", "-n", "evo2", "-c", "nvidia",
|
|
896
|
+
"cuda-nvcc", "cuda-cudart-dev", "-y", "--quiet"],
|
|
897
|
+
["conda", "install", "-n", "evo2", "-c", "conda-forge",
|
|
898
|
+
"transformer-engine-torch=2.3.0", "-y", "--quiet"],
|
|
899
|
+
]:
|
|
900
|
+
result = subprocess.run(cmd, timeout=1800, capture_output=False)
|
|
901
|
+
if result.returncode != 0:
|
|
902
|
+
_log(f" [red]✗ 命令失败(exit {result.returncode})[/red]")
|
|
903
|
+
return False
|
|
904
|
+
current_step += 1
|
|
905
|
+
|
|
906
|
+
_log(f"\n[bold]{current_step}/{total_steps}[/bold] 安装 Flash Attention…")
|
|
907
|
+
try:
|
|
908
|
+
result = _run_python_in_conda_env(
|
|
909
|
+
"evo2",
|
|
910
|
+
["-m", "pip", "install", "flash-attn==2.8.0.post2", "--no-build-isolation"],
|
|
911
|
+
capture_output=False,
|
|
912
|
+
timeout=3600,
|
|
913
|
+
)
|
|
914
|
+
except FileNotFoundError as e:
|
|
915
|
+
_log(f" [red]✗ {e}[/red]")
|
|
916
|
+
return False
|
|
917
|
+
if result.returncode != 0:
|
|
918
|
+
_log(" [red]✗ flash-attn 安装失败[/red]")
|
|
919
|
+
return False
|
|
920
|
+
current_step += 1
|
|
921
|
+
|
|
922
|
+
_log(f"\n[bold]{current_step}/{total_steps}[/bold] 安装 evo2…")
|
|
923
|
+
try:
|
|
924
|
+
result = _run_python_in_conda_env(
|
|
925
|
+
"evo2",
|
|
926
|
+
["-m", "pip", "install", "--quiet", "evo2"],
|
|
927
|
+
capture_output=False,
|
|
928
|
+
timeout=1800,
|
|
929
|
+
)
|
|
930
|
+
except FileNotFoundError as e:
|
|
931
|
+
_log(f" [red]✗ {e}[/red]")
|
|
932
|
+
return False
|
|
933
|
+
if result.returncode != 0:
|
|
934
|
+
_log(" [red]✗ pip install evo2 失败[/red]")
|
|
935
|
+
return False
|
|
936
|
+
|
|
937
|
+
cfg = _cfg()
|
|
938
|
+
default_model_dir = os.path.join(cfg["data_dir"], evo2_model_name)
|
|
939
|
+
stored_model_dir = ""
|
|
940
|
+
if local_model_path:
|
|
941
|
+
stored_model_dir = os.path.abspath(local_model_path)
|
|
942
|
+
_log(f" [green]✓[/green] 已记录本地权重目录:{stored_model_dir}")
|
|
943
|
+
elif os.path.isdir(default_model_dir):
|
|
944
|
+
stored_model_dir = default_model_dir
|
|
945
|
+
|
|
946
|
+
set_local_config("evo2_model_name", evo2_model_name)
|
|
947
|
+
set_local_config("evo2_model_dir", stored_model_dir)
|
|
948
|
+
set_local_config("evo2_url", "http://localhost:5003")
|
|
949
|
+
|
|
950
|
+
if stored_model_dir:
|
|
951
|
+
_log(f" [dim]启动时将优先从本地目录加载:{stored_model_dir}[/dim]")
|
|
952
|
+
else:
|
|
953
|
+
_log(" [dim]未强制预下载权重;首次启动时将按官方方式从 HuggingFace 获取模型。[/dim]")
|
|
954
|
+
|
|
955
|
+
_log("\n[bold green]✓ Evo2 部署完成![/bold green]")
|
|
956
|
+
_log(" 下一步:[bold]merge local env start --model evo2[/bold]")
|
|
957
|
+
return True
|
|
958
|
+
|
|
959
|
+
|
|
744
960
|
def _yn_prompt(msg: str) -> bool:
|
|
745
961
|
try:
|
|
746
962
|
ans = input(f"{msg} [y/N] ").strip().lower()
|
|
@@ -787,7 +1003,8 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
|
|
|
787
1003
|
print("[HyenaDNA] tokenizer 已加载", flush=True)
|
|
788
1004
|
model = AutoModelForCausalLM.from_pretrained(
|
|
789
1005
|
MODEL_NAME, trust_remote_code=True,
|
|
790
|
-
|
|
1006
|
+
dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
|
1007
|
+
low_cpu_mem_usage=True,
|
|
791
1008
|
).to(DEVICE).eval()
|
|
792
1009
|
elapsed = time.time() - t0
|
|
793
1010
|
print(f"[HyenaDNA] 步骤 3/3: 模型加载完成(耗时 {elapsed:.1f}s)", flush=True)
|
|
@@ -867,7 +1084,8 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
|
867
1084
|
print("[NT] tokenizer 已加载", flush=True)
|
|
868
1085
|
model = AutoModelForMaskedLM.from_pretrained(
|
|
869
1086
|
MODEL_NAME, output_hidden_states=True,
|
|
870
|
-
|
|
1087
|
+
dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
|
1088
|
+
low_cpu_mem_usage=True,
|
|
871
1089
|
).to(DEVICE).eval()
|
|
872
1090
|
elapsed = time.time() - t0
|
|
873
1091
|
print(f"[NT] 步骤 3/3: 模型加载完成(耗时 {elapsed:.1f}s)", flush=True)
|
|
@@ -920,7 +1138,8 @@ if __name__ == "__main__":
|
|
|
920
1138
|
'''
|
|
921
1139
|
|
|
922
1140
|
elif model_name == "evo2":
|
|
923
|
-
|
|
1141
|
+
evo2_model_name = cfg.get("evo2_model_name", "evo2_7b")
|
|
1142
|
+
model_dir = cfg.get("evo2_model_dir") or os.path.join(cfg["data_dir"], evo2_model_name)
|
|
924
1143
|
# FIX #1 + #3
|
|
925
1144
|
code = f'''#!/usr/bin/env python3
|
|
926
1145
|
"""Evo2 本地推理服务 - MERGE CLI v3.2.0"""
|
|
@@ -938,6 +1157,7 @@ from pydantic import BaseModel
|
|
|
938
1157
|
|
|
939
1158
|
app = FastAPI()
|
|
940
1159
|
_model = None
|
|
1160
|
+
MODEL_NAME = "{evo2_model_name}"
|
|
941
1161
|
|
|
942
1162
|
|
|
943
1163
|
def _load():
|
|
@@ -946,7 +1166,8 @@ def _load():
|
|
|
946
1166
|
return
|
|
947
1167
|
print("[Evo2] 步骤 2/3: 加载 Evo2 7b 模型...", flush=True)
|
|
948
1168
|
LOCAL_DIR = "{model_dir}"
|
|
949
|
-
pt_path = os.path.
|
|
1169
|
+
pt_path = (LOCAL_DIR if os.path.isfile(LOCAL_DIR)
|
|
1170
|
+
else os.path.join(LOCAL_DIR, f"{{MODEL_NAME}}.pt"))
|
|
950
1171
|
if os.path.exists(pt_path):
|
|
951
1172
|
print(f"[Evo2] 使用本地权重:{{pt_path}}", flush=True)
|
|
952
1173
|
print("[Evo2] (14GB 权重加载约需 2-5 分钟,请耐心等待)", flush=True)
|
|
@@ -958,9 +1179,9 @@ def _load():
|
|
|
958
1179
|
t0 = time.time()
|
|
959
1180
|
try:
|
|
960
1181
|
from evo2.models import Evo2
|
|
961
|
-
_model = (Evo2(model_name=
|
|
1182
|
+
_model = (Evo2(model_name=MODEL_NAME, local_path=pt_path)
|
|
962
1183
|
if os.path.exists(pt_path)
|
|
963
|
-
else Evo2(model_name=
|
|
1184
|
+
else Evo2(model_name=MODEL_NAME))
|
|
964
1185
|
finally:
|
|
965
1186
|
torch.set_default_dtype(torch.float32)
|
|
966
1187
|
elapsed = time.time() - t0
|
|
@@ -978,7 +1199,7 @@ class V(BaseModel):
|
|
|
978
1199
|
|
|
979
1200
|
@app.get("/health")
|
|
980
1201
|
def health():
|
|
981
|
-
return {{"status": "ok", "model":
|
|
1202
|
+
return {{"status": "ok", "model": MODEL_NAME,
|
|
982
1203
|
"device": "cuda" if __import__("torch").cuda.is_available() else "cpu"}}
|
|
983
1204
|
|
|
984
1205
|
|
|
@@ -1010,19 +1231,31 @@ if __name__ == "__main__":
|
|
|
1010
1231
|
else:
|
|
1011
1232
|
raise ValueError(f"未知模型:{model_name}")
|
|
1012
1233
|
|
|
1013
|
-
p.write_text(code.strip())
|
|
1234
|
+
p.write_text(code.strip(), encoding="utf-8")
|
|
1014
1235
|
return str(p)
|
|
1015
1236
|
|
|
1016
1237
|
|
|
1238
|
+
def _wait_for_service_stability(model_name: str, checks: int = 3, delay_s: int = 2) -> bool:
|
|
1239
|
+
for _ in range(checks):
|
|
1240
|
+
time.sleep(delay_s)
|
|
1241
|
+
if not check_service_health(model_name).get("online"):
|
|
1242
|
+
return False
|
|
1243
|
+
return True
|
|
1244
|
+
|
|
1245
|
+
|
|
1017
1246
|
def start_local_service(model_name: str, console=None) -> bool:
|
|
1018
1247
|
ports = {"hyenadna": 5001, "nt": 5002, "evo2": 5003}
|
|
1019
1248
|
env_name = ENV_NAMES.get(model_name, model_name)
|
|
1020
1249
|
port = ports.get(model_name, 5001)
|
|
1250
|
+
svc = _LOCAL_SERVICES.get(model_name)
|
|
1021
1251
|
|
|
1022
1252
|
def _log(m):
|
|
1023
1253
|
if console:
|
|
1024
1254
|
console.print(m)
|
|
1025
1255
|
|
|
1256
|
+
if svc:
|
|
1257
|
+
set_local_config(svc["url_key"], svc["default"])
|
|
1258
|
+
|
|
1026
1259
|
h = check_service_health(model_name)
|
|
1027
1260
|
if h["online"]:
|
|
1028
1261
|
_log(f"[yellow]⚠[/yellow] {model_name} 已在运行({h['url']}),跳过")
|
|
@@ -1042,17 +1275,20 @@ def start_local_service(model_name: str, console=None) -> bool:
|
|
|
1042
1275
|
load_hints = {
|
|
1043
1276
|
"hyenadna": "HyenaDNA 首次启动约需 1-3 分钟(下载/加载约 400MB 权重)",
|
|
1044
1277
|
"nt": "NT 首次启动约需 3-8 分钟(下载/加载约 2GB 权重)",
|
|
1045
|
-
"evo2": "Evo2
|
|
1278
|
+
"evo2": "Evo2 首次启动约需 2-5 分钟(如未提供本地权重,将按官方方式从 HuggingFace 下载)",
|
|
1046
1279
|
}
|
|
1047
1280
|
_log(f" [dim]{load_hints.get(model_name, '')}[/dim]")
|
|
1048
1281
|
_log(f" [dim]实时日志:tail -f {log_file}[/dim]")
|
|
1049
1282
|
|
|
1050
1283
|
try:
|
|
1051
1284
|
with open(log_file, "a") as lf:
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
stdout=lf,
|
|
1285
|
+
proc = _popen_python_in_conda_env(
|
|
1286
|
+
env_name,
|
|
1287
|
+
["-u", script],
|
|
1288
|
+
stdout=lf,
|
|
1289
|
+
stderr=lf,
|
|
1290
|
+
stdin=subprocess.DEVNULL,
|
|
1291
|
+
start_new_session=True,
|
|
1056
1292
|
)
|
|
1057
1293
|
except Exception as e:
|
|
1058
1294
|
_log(f"[red]✗ 启动失败:{e}[/red]")
|
|
@@ -1062,9 +1298,18 @@ def start_local_service(model_name: str, console=None) -> bool:
|
|
|
1062
1298
|
deadline = time.time() + 300
|
|
1063
1299
|
dots = 0
|
|
1064
1300
|
while time.time() < deadline:
|
|
1301
|
+
if proc.poll() is not None:
|
|
1302
|
+
_log(f"\n[red]✗ {model_name} 进程已退出(exit {proc.returncode}),请查看日志:{log_file}[/red]")
|
|
1303
|
+
_log(" [dim]若日志中出现 `Killed`,通常是系统内存不足导致的 OOM。[/dim]")
|
|
1304
|
+
return False
|
|
1065
1305
|
h = check_service_health(model_name)
|
|
1066
1306
|
if h["online"]:
|
|
1307
|
+
if not _wait_for_service_stability(model_name):
|
|
1308
|
+
_log(f"\n[red]✗ {model_name} 进程刚启动后很快退出,请查看日志:{log_file}[/red]")
|
|
1309
|
+
_log(" [dim]这通常是内存不足(OOM)或模型加载阶段被系统 kill。[/dim]")
|
|
1310
|
+
return False
|
|
1067
1311
|
_log(f"\n[green]✓ {model_name} 就绪({h['url']},{h['latency_ms']} ms)[/green]")
|
|
1312
|
+
_log(f" [dim]PID: {proc.pid}[/dim]")
|
|
1068
1313
|
return True
|
|
1069
1314
|
if console:
|
|
1070
1315
|
console.print(f" 等待中{'.' * (dots % 4 + 1)} (最多等待 {int(deadline - time.time())}s)",
|
|
@@ -1313,12 +1558,19 @@ def _call_alphagenome_remote(chrom, pos, ref, alt, genome) -> dict:
|
|
|
1313
1558
|
def check_local_files() -> dict:
|
|
1314
1559
|
cfg = _cfg()
|
|
1315
1560
|
data_dir = cfg["data_dir"]
|
|
1561
|
+
evo2_model_name = cfg.get("evo2_model_name", "evo2_7b")
|
|
1562
|
+
evo2_model_dir = cfg.get("evo2_model_dir") or os.path.join(data_dir, evo2_model_name)
|
|
1563
|
+
evo2_weight_path = (
|
|
1564
|
+
evo2_model_dir
|
|
1565
|
+
if os.path.isfile(evo2_model_dir)
|
|
1566
|
+
else os.path.join(evo2_model_dir, f"{evo2_model_name}.pt")
|
|
1567
|
+
)
|
|
1316
1568
|
files = {
|
|
1317
1569
|
"dbNSFP (hg38)": os.path.join(data_dir, "dbNSFP5.3a_grch38.gz"),
|
|
1318
1570
|
"dbNSFP (hg19)": os.path.join(data_dir, "dbNSFP5.3a_grch37.gz"),
|
|
1319
1571
|
"GPN-MSA": os.path.join(data_dir, "scores.tsv.bgz"),
|
|
1320
1572
|
"popEVE": os.path.join(data_dir, "grch38_popEVE_ukbb_20250715.vcf.gz"),
|
|
1321
|
-
"Evo2
|
|
1573
|
+
f"Evo2 本地权重 ({evo2_model_name}, optional)": evo2_weight_path,
|
|
1322
1574
|
}
|
|
1323
1575
|
result = {}
|
|
1324
1576
|
for label, path in files.items():
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/BestModel_Splice_Unsupervised Only.pkl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|