merge-cli 3.2.0__tar.gz → 3.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {merge_cli-3.2.0 → merge_cli-3.3.0}/PKG-INFO +1 -1
  2. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/api.py +7 -1
  3. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/cli.py +14 -6
  4. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/config.py +3 -1
  5. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/local_engine.py +274 -22
  6. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/PKG-INFO +1 -1
  7. {merge_cli-3.2.0 → merge_cli-3.3.0}/pyproject.toml +1 -1
  8. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/__init__.py +0 -0
  9. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/cli_env_patch.py +0 -0
  10. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/__init__.py +0 -0
  11. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/.gitkeep +0 -0
  12. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/BestModel_Clinvar-noncoding.pkl +0 -0
  13. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/BestModel_Clinvar.pkl +0 -0
  14. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/BestModel_Splice_Unsupervised Only.pkl +0 -0
  15. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/__init__.py +0 -0
  16. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/data/models/ensemble_predict.py +0 -0
  17. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/ensemble_predict.py +0 -0
  18. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli/output.py +0 -0
  19. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/SOURCES.txt +0 -0
  20. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/dependency_links.txt +0 -0
  21. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/entry_points.txt +0 -0
  22. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/requires.txt +0 -0
  23. {merge_cli-3.2.0 → merge_cli-3.3.0}/merge_cli.egg-info/top_level.txt +0 -0
  24. {merge_cli-3.2.0 → merge_cli-3.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: merge-cli
3
- Version: 3.2.0
3
+ Version: 3.3.0
4
4
  Summary: MERGE 变异致病性预测 CLI(服务器固定,集成模型内嵌,无需手动配置)
5
5
  Project-URL: Homepage, https://merge.fanglab.cn
6
6
  Requires-Python: >=3.11
@@ -62,8 +62,14 @@ def predict_single(
62
62
  # ─────────────────────────────────────────────────────────────
63
63
  # 2. 集成模型评分 POST /ensemble/
64
64
  # ─────────────────────────────────────────────────────────────
65
- def predict_ensemble(prediction: dict, all_transcripts: list) -> dict:
65
+ def predict_ensemble(
66
+ prediction: dict,
67
+ all_transcripts: list,
68
+ ensemble_type: Optional[str] = None,
69
+ ) -> dict:
66
70
  payload = {"prediction": prediction, "all_transcripts": all_transcripts}
71
+ if ensemble_type:
72
+ payload["ensemble_type"] = ensemble_type
67
73
  resp = requests.post(
68
74
  f"{_BASE}/ensemble/",
69
75
  json=payload,
@@ -206,7 +206,7 @@ def config_show():
206
206
  help="手动指定变异类型(跳过 ANNOVAR)")
207
207
  @_add_model_options
208
208
  def predict(chrom, pos, ref, alt, genome, fmt, no_ensemble,
209
- use_local, ensemble_type, local_genome_path, **kwargs):
209
+ use_local, ensemble_type, **kwargs):
210
210
  """单变异致病性预测(远程 / 本地模式)。
211
211
 
212
212
  \b
@@ -256,10 +256,6 @@ def predict(chrom, pos, ref, alt, genome, fmt, no_ensemble,
256
256
  dbnsfp = prediction.get("dbnsfp") or {}
257
257
  transcripts = [dbnsfp] if dbnsfp else []
258
258
  # FIX #7: 自动通过远程 ANNOVAR 判断变异类型
259
- if not ensemble_type:
260
- from . import local_engine as _le
261
- ensemble_type = _le.get_variant_type(
262
- chrom, pos, ref, alt, genome)
263
259
  ens_resp = api.predict_ensemble(prediction, transcripts,
264
260
  ensemble_type=ensemble_type)
265
261
  if ens_resp.get("success"):
@@ -604,7 +600,7 @@ def local_download(file_type):
604
600
  help="手动指定变异类型(跳过 ANNOVAR):coding / noncoding / splice")
605
601
  @_add_model_options
606
602
  def local_predict(chrom, pos, ref, alt, genome, fmt,
607
- no_ensemble, ensemble_type, local_genome_path=None, **kwargs):
603
+ no_ensemble, ensemble_type, **kwargs):
608
604
  """完全在本地运行单变异预测,无速率限制。
609
605
 
610
606
  \b
@@ -819,6 +815,12 @@ def local_env_setup(model_name, evo2_size, local_model_path):
819
815
  """
820
816
  from .local_engine import setup_model_env
821
817
 
818
+ if model_name in ("all", "evo2") and evo2_size == "1b_base":
819
+ console.print(
820
+ "[yellow]提示:官方文档里 `evo2_1b_base` 需要 FP8 / Transformer Engine 和 Hopper GPU,"
821
+ "并不是 7B 的轻量替代。[/yellow]"
822
+ )
823
+
822
824
  targets = (["evo2", "hyenadna", "nt"] if model_name == "all" else [model_name])
823
825
  for m in targets:
824
826
  ok = setup_model_env(
@@ -850,6 +852,12 @@ def local_env_start(model_name):
850
852
  """
851
853
  from .local_engine import start_local_service
852
854
 
855
+ if model_name == "all":
856
+ console.print(
857
+ "[yellow]提示:同时启动 hyenadna / nt / evo2 在 CPU 或小内存机器上很容易被系统 OOM kill。"
858
+ " 如果日志里出现 `Killed`,优先单独启动需要的模型。[/yellow]"
859
+ )
860
+
853
861
  targets = (["hyenadna", "nt", "evo2"] if model_name == "all" else [model_name])
854
862
  for m in targets:
855
863
  start_local_service(m, console=console)
@@ -49,7 +49,9 @@ _LOCAL_DEFAULTS = {
49
49
  "annovar_dir": "",
50
50
  "hyenadna_url": "http://localhost:5001",
51
51
  "nt_url": "http://localhost:5002",
52
- "evo2_url": "http://localhost:8082",
52
+ "evo2_url": "http://localhost:5003",
53
+ "evo2_model_dir": "",
54
+ "evo2_model_name": "evo2_7b",
53
55
  }
54
56
 
55
57
 
@@ -16,6 +16,7 @@ from __future__ import annotations
16
16
 
17
17
  import importlib.resources
18
18
  import importlib.util
19
+ import json
19
20
  import logging
20
21
  import os
21
22
  import shutil
@@ -28,7 +29,7 @@ from typing import Optional
28
29
 
29
30
  import requests
30
31
 
31
- from .config import get_local_config, FIXED_API_URL
32
+ from .config import FIXED_API_URL, get_local_config, set_local_config
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
 
@@ -456,12 +457,87 @@ def call_local_service(model_name: str, chrom, pos, ref, alt,
456
457
  return {"error": f"{model_name} 调用失败:{exc}"}
457
458
 
458
459
 
459
- def _conda_env_exists(name: str) -> bool:
460
+ def _conda_env_map() -> dict[str, str]:
461
+ for cmd in (
462
+ ["conda", "env", "list", "--json"],
463
+ ["conda", "info", "--envs", "--json"],
464
+ ):
465
+ try:
466
+ result = subprocess.run(
467
+ cmd, capture_output=True, text=True, timeout=30,
468
+ )
469
+ if result.returncode != 0 or not result.stdout.strip():
470
+ continue
471
+ data = json.loads(result.stdout)
472
+ envs = {}
473
+ for prefix in data.get("envs", []):
474
+ env_name = Path(prefix).name
475
+ if env_name:
476
+ envs[env_name] = prefix
477
+ if envs:
478
+ return envs
479
+ except Exception:
480
+ continue
481
+
460
482
  try:
461
483
  out = subprocess.check_output(["conda", "env", "list"], text=True, timeout=15)
462
- return name in out
463
484
  except Exception:
464
- return False
485
+ return {}
486
+
487
+ envs = {}
488
+ for raw_line in out.splitlines():
489
+ line = raw_line.strip()
490
+ if not line or line.startswith("#"):
491
+ continue
492
+ parts = [p for p in line.split() if p != "*"]
493
+ if not parts:
494
+ continue
495
+ prefix = parts[-1]
496
+ if os.path.sep in prefix or "/" in prefix or "\\" in prefix:
497
+ env_name = Path(prefix).name
498
+ if env_name:
499
+ envs[env_name] = prefix
500
+ continue
501
+ envs[parts[0]] = parts[0]
502
+ return envs
503
+
504
+
505
+ def _get_conda_env_prefix(name: str) -> Optional[str]:
506
+ return _conda_env_map().get(name)
507
+
508
+
509
+ def _get_conda_env_python(name: str) -> Optional[str]:
510
+ prefix = _get_conda_env_prefix(name)
511
+ if not prefix:
512
+ return None
513
+
514
+ candidates = [
515
+ Path(prefix) / "python.exe",
516
+ Path(prefix) / "bin" / "python",
517
+ Path(prefix) / "Scripts" / "python.exe",
518
+ ]
519
+ for candidate in candidates:
520
+ if candidate.exists():
521
+ return str(candidate)
522
+ return None
523
+
524
+
525
+ def _run_python_in_conda_env(env_name: str, args: list[str], **kwargs):
526
+ python_exe = _get_conda_env_python(env_name)
527
+ if not python_exe:
528
+ raise FileNotFoundError(f"找不到 conda 环境 '{env_name}' 的 python")
529
+ return subprocess.run([python_exe] + list(args), **kwargs)
530
+
531
+
532
+ def _popen_python_in_conda_env(env_name: str, args: list[str], **kwargs):
533
+ python_exe = _get_conda_env_python(env_name)
534
+ if not python_exe:
535
+ raise FileNotFoundError(f"找不到 conda 环境 '{env_name}' 的 python")
536
+ return subprocess.Popen([python_exe] + list(args), **kwargs)
537
+
538
+
539
+ def _conda_env_exists(name: str) -> bool:
540
+ return _get_conda_env_prefix(name) is not None
465
541
 
466
542
 
467
543
  # ════════════════════════════════════════════════════════════════
@@ -528,15 +604,18 @@ def _pip_install(env_name: str, pkgs: list, console=None, timeout=900) -> bool:
528
604
  all_pkgs = base_pkgs + [p for p in pkgs if p not in base_pkgs]
529
605
 
530
606
  try:
531
- result = subprocess.run(
532
- ["conda", "run", "-n", env_name, "--no-capture-output",
533
- "pip", "install", "--quiet"] + all_pkgs,
607
+ result = _run_python_in_conda_env(
608
+ env_name,
609
+ ["-m", "pip", "install", "--quiet"] + all_pkgs,
534
610
  capture_output=False, timeout=timeout,
535
611
  )
536
612
  if result.returncode != 0:
537
613
  _log(f" [red]✗ pip install 失败(exit {result.returncode})[/red]")
538
614
  return False
539
615
  return True
616
+ except FileNotFoundError as e:
617
+ _log(f" [red]✗ {e}[/red]")
618
+ return False
540
619
  except subprocess.CalledProcessError as e:
541
620
  _log(f" [red]✗ pip install 失败:{e}[/red]")
542
621
  return False
@@ -618,7 +697,7 @@ def _setup_nt(console=None, **_) -> bool:
618
697
  return True
619
698
 
620
699
 
621
- def _setup_evo2(console=None, local_model_path=None, model_size="7b") -> bool:
700
+ def _setup_evo2_legacy(console=None, local_model_path=None, model_size="7b") -> bool:
622
701
  if not shutil.which("conda"):
623
702
  if console:
624
703
  console.print("[red]✗ 未找到 conda[/red]")
@@ -741,6 +820,143 @@ def _setup_evo2(console=None, local_model_path=None, model_size="7b") -> bool:
741
820
  return True
742
821
 
743
822
 
823
+ def _setup_evo2(console=None, local_model_path=None, model_size="7b") -> bool:
824
+ if not shutil.which("conda"):
825
+ if console:
826
+ console.print("[red]✗ 未找到 conda[/red]")
827
+ return False
828
+
829
+ def _log(message):
830
+ if console:
831
+ console.print(message)
832
+
833
+ evo2_model_name = f"evo2_{model_size}"
834
+ install_mode = "light" if model_size == "7b" else "full"
835
+ python_ver = "3.11" if install_mode == "light" else "3.12"
836
+ required_gb = 18 if install_mode == "light" else 35
837
+ torch_spec = os.environ.get("MERGE_EVO2_TORCH_SPEC", "torch==2.7.1")
838
+ torch_index_url = os.environ.get(
839
+ "MERGE_EVO2_TORCH_INDEX_URL",
840
+ "https://download.pytorch.org/whl/cu128",
841
+ )
842
+
843
+ if local_model_path and not os.path.exists(local_model_path):
844
+ _log(f" [red]✗ 本地模型路径不存在:{local_model_path}[/red]")
845
+ return False
846
+
847
+ _log(f"\n[bold cyan]═══ Evo2 环境部署(official {install_mode} install)═══[/bold cyan]")
848
+ if model_size != "7b":
849
+ _log(" [yellow]⚠ evo2_1b_base 按官方要求需要 FP8 / Transformer Engine 和 Hopper GPU。[/yellow]")
850
+
851
+ conda_dir = str(Path.home() / "miniconda3" / "envs")
852
+ ok, free_gb = check_disk_space(conda_dir, required_gb)
853
+ _log(f" 磁盘剩余:{free_gb} GB")
854
+ if install_mode == "light":
855
+ _log(" 安装模式:7B 轻量模式(PyTorch -> flash-attn -> pip install evo2)")
856
+ _log(" 空间估计:环境约 8-12 GB;权重默认不在 setup 阶段强制预下载")
857
+ else:
858
+ _log(" 安装模式:完整模式(含 Transformer Engine,适用于 1b_base / FP8)")
859
+ _log(" 空间估计:环境约 20+ GB,首次运行还会下载较大权重")
860
+ if not ok:
861
+ _log(f" [red]✗ 磁盘空间偏低(剩余 {free_gb} GB,建议至少 {required_gb} GB)[/red]")
862
+ if not _yn_prompt("磁盘空间可能不足,是否继续?"):
863
+ return False
864
+
865
+ total_steps = 4 if install_mode == "light" else 5
866
+ _log(f"\n[bold]1/{total_steps}[/bold] 创建 conda 环境 evo2 (python={python_ver})…")
867
+ if not _create_conda_env("evo2", python_ver, console):
868
+ return False
869
+
870
+ _log(f"\n[bold]2/{total_steps}[/bold] 安装 PyTorch 与基础依赖…")
871
+ install_steps = [
872
+ (["-m", "pip", "install", "--quiet", "--upgrade", "pip", "setuptools", "wheel"], 600),
873
+ (["-m", "pip", "install", "--quiet", torch_spec, "--index-url", torch_index_url], 1800),
874
+ ([
875
+ "-m", "pip", "install", "--quiet",
876
+ "psutil", "requests", "urllib3", "fastapi", "uvicorn[standard]", "biopython",
877
+ ], 1200),
878
+ ]
879
+ for args, timeout in install_steps:
880
+ try:
881
+ result = _run_python_in_conda_env(
882
+ "evo2", args, capture_output=False, timeout=timeout,
883
+ )
884
+ except FileNotFoundError as e:
885
+ _log(f" [red]✗ {e}[/red]")
886
+ return False
887
+ if result.returncode != 0:
888
+ _log(f" [red]✗ 命令失败(exit {result.returncode})[/red]")
889
+ return False
890
+
891
+ current_step = 3
892
+ if install_mode == "full":
893
+ _log(f"\n[bold]{current_step}/{total_steps}[/bold] 安装 Transformer Engine 依赖…")
894
+ for cmd in [
895
+ ["conda", "install", "-n", "evo2", "-c", "nvidia",
896
+ "cuda-nvcc", "cuda-cudart-dev", "-y", "--quiet"],
897
+ ["conda", "install", "-n", "evo2", "-c", "conda-forge",
898
+ "transformer-engine-torch=2.3.0", "-y", "--quiet"],
899
+ ]:
900
+ result = subprocess.run(cmd, timeout=1800, capture_output=False)
901
+ if result.returncode != 0:
902
+ _log(f" [red]✗ 命令失败(exit {result.returncode})[/red]")
903
+ return False
904
+ current_step += 1
905
+
906
+ _log(f"\n[bold]{current_step}/{total_steps}[/bold] 安装 Flash Attention…")
907
+ try:
908
+ result = _run_python_in_conda_env(
909
+ "evo2",
910
+ ["-m", "pip", "install", "flash-attn==2.8.0.post2", "--no-build-isolation"],
911
+ capture_output=False,
912
+ timeout=3600,
913
+ )
914
+ except FileNotFoundError as e:
915
+ _log(f" [red]✗ {e}[/red]")
916
+ return False
917
+ if result.returncode != 0:
918
+ _log(" [red]✗ flash-attn 安装失败[/red]")
919
+ return False
920
+ current_step += 1
921
+
922
+ _log(f"\n[bold]{current_step}/{total_steps}[/bold] 安装 evo2…")
923
+ try:
924
+ result = _run_python_in_conda_env(
925
+ "evo2",
926
+ ["-m", "pip", "install", "--quiet", "evo2"],
927
+ capture_output=False,
928
+ timeout=1800,
929
+ )
930
+ except FileNotFoundError as e:
931
+ _log(f" [red]✗ {e}[/red]")
932
+ return False
933
+ if result.returncode != 0:
934
+ _log(" [red]✗ pip install evo2 失败[/red]")
935
+ return False
936
+
937
+ cfg = _cfg()
938
+ default_model_dir = os.path.join(cfg["data_dir"], evo2_model_name)
939
+ stored_model_dir = ""
940
+ if local_model_path:
941
+ stored_model_dir = os.path.abspath(local_model_path)
942
+ _log(f" [green]✓[/green] 已记录本地权重目录:{stored_model_dir}")
943
+ elif os.path.isdir(default_model_dir):
944
+ stored_model_dir = default_model_dir
945
+
946
+ set_local_config("evo2_model_name", evo2_model_name)
947
+ set_local_config("evo2_model_dir", stored_model_dir)
948
+ set_local_config("evo2_url", "http://localhost:5003")
949
+
950
+ if stored_model_dir:
951
+ _log(f" [dim]启动时将优先从本地目录加载:{stored_model_dir}[/dim]")
952
+ else:
953
+ _log(" [dim]未强制预下载权重;首次启动时将按官方方式从 HuggingFace 获取模型。[/dim]")
954
+
955
+ _log("\n[bold green]✓ Evo2 部署完成![/bold green]")
956
+ _log(" 下一步:[bold]merge local env start --model evo2[/bold]")
957
+ return True
958
+
959
+
744
960
  def _yn_prompt(msg: str) -> bool:
745
961
  try:
746
962
  ans = input(f"{msg} [y/N] ").strip().lower()
@@ -787,7 +1003,8 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
787
1003
  print("[HyenaDNA] tokenizer 已加载", flush=True)
788
1004
  model = AutoModelForCausalLM.from_pretrained(
789
1005
  MODEL_NAME, trust_remote_code=True,
790
- torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
1006
+ dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
1007
+ low_cpu_mem_usage=True,
791
1008
  ).to(DEVICE).eval()
792
1009
  elapsed = time.time() - t0
793
1010
  print(f"[HyenaDNA] 步骤 3/3: 模型加载完成(耗时 {elapsed:.1f}s)", flush=True)
@@ -867,7 +1084,8 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
867
1084
  print("[NT] tokenizer 已加载", flush=True)
868
1085
  model = AutoModelForMaskedLM.from_pretrained(
869
1086
  MODEL_NAME, output_hidden_states=True,
870
- torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
1087
+ dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
1088
+ low_cpu_mem_usage=True,
871
1089
  ).to(DEVICE).eval()
872
1090
  elapsed = time.time() - t0
873
1091
  print(f"[NT] 步骤 3/3: 模型加载完成(耗时 {elapsed:.1f}s)", flush=True)
@@ -920,7 +1138,8 @@ if __name__ == "__main__":
920
1138
  '''
921
1139
 
922
1140
  elif model_name == "evo2":
923
- model_dir = os.path.join(cfg["data_dir"], "evo2_7b")
1141
+ evo2_model_name = cfg.get("evo2_model_name", "evo2_7b")
1142
+ model_dir = cfg.get("evo2_model_dir") or os.path.join(cfg["data_dir"], evo2_model_name)
924
1143
  # FIX #1 + #3
925
1144
  code = f'''#!/usr/bin/env python3
926
1145
  """Evo2 本地推理服务 - MERGE CLI v3.2.0"""
@@ -938,6 +1157,7 @@ from pydantic import BaseModel
938
1157
 
939
1158
  app = FastAPI()
940
1159
  _model = None
1160
+ MODEL_NAME = "{evo2_model_name}"
941
1161
 
942
1162
 
943
1163
  def _load():
@@ -946,7 +1166,8 @@ def _load():
946
1166
  return
947
1167
  print("[Evo2] 步骤 2/3: 加载 Evo2 7b 模型...", flush=True)
948
1168
  LOCAL_DIR = "{model_dir}"
949
- pt_path = os.path.join(LOCAL_DIR, "evo2_7b.pt")
1169
+ pt_path = (LOCAL_DIR if os.path.isfile(LOCAL_DIR)
1170
+ else os.path.join(LOCAL_DIR, f"{{MODEL_NAME}}.pt"))
950
1171
  if os.path.exists(pt_path):
951
1172
  print(f"[Evo2] 使用本地权重:{{pt_path}}", flush=True)
952
1173
  print("[Evo2] (14GB 权重加载约需 2-5 分钟,请耐心等待)", flush=True)
@@ -958,9 +1179,9 @@ def _load():
958
1179
  t0 = time.time()
959
1180
  try:
960
1181
  from evo2.models import Evo2
961
- _model = (Evo2(model_name="evo2_7b", local_path=pt_path)
1182
+ _model = (Evo2(model_name=MODEL_NAME, local_path=pt_path)
962
1183
  if os.path.exists(pt_path)
963
- else Evo2(model_name="evo2_7b"))
1184
+ else Evo2(model_name=MODEL_NAME))
964
1185
  finally:
965
1186
  torch.set_default_dtype(torch.float32)
966
1187
  elapsed = time.time() - t0
@@ -978,7 +1199,7 @@ class V(BaseModel):
978
1199
 
979
1200
  @app.get("/health")
980
1201
  def health():
981
- return {{"status": "ok", "model": "evo2_7b",
1202
+ return {{"status": "ok", "model": MODEL_NAME,
982
1203
  "device": "cuda" if __import__("torch").cuda.is_available() else "cpu"}}
983
1204
 
984
1205
 
@@ -1010,19 +1231,31 @@ if __name__ == "__main__":
1010
1231
  else:
1011
1232
  raise ValueError(f"未知模型:{model_name}")
1012
1233
 
1013
- p.write_text(code.strip())
1234
+ p.write_text(code.strip(), encoding="utf-8")
1014
1235
  return str(p)
1015
1236
 
1016
1237
 
1238
+ def _wait_for_service_stability(model_name: str, checks: int = 3, delay_s: int = 2) -> bool:
1239
+ for _ in range(checks):
1240
+ time.sleep(delay_s)
1241
+ if not check_service_health(model_name).get("online"):
1242
+ return False
1243
+ return True
1244
+
1245
+
1017
1246
  def start_local_service(model_name: str, console=None) -> bool:
1018
1247
  ports = {"hyenadna": 5001, "nt": 5002, "evo2": 5003}
1019
1248
  env_name = ENV_NAMES.get(model_name, model_name)
1020
1249
  port = ports.get(model_name, 5001)
1250
+ svc = _LOCAL_SERVICES.get(model_name)
1021
1251
 
1022
1252
  def _log(m):
1023
1253
  if console:
1024
1254
  console.print(m)
1025
1255
 
1256
+ if svc:
1257
+ set_local_config(svc["url_key"], svc["default"])
1258
+
1026
1259
  h = check_service_health(model_name)
1027
1260
  if h["online"]:
1028
1261
  _log(f"[yellow]⚠[/yellow] {model_name} 已在运行({h['url']}),跳过")
@@ -1042,17 +1275,20 @@ def start_local_service(model_name: str, console=None) -> bool:
1042
1275
  load_hints = {
1043
1276
  "hyenadna": "HyenaDNA 首次启动约需 1-3 分钟(下载/加载约 400MB 权重)",
1044
1277
  "nt": "NT 首次启动约需 3-8 分钟(下载/加载约 2GB 权重)",
1045
- "evo2": "Evo2 7b 首次启动约需 2-5 分钟(加载 14GB 本地权重)",
1278
+ "evo2": "Evo2 首次启动约需 2-5 分钟(如未提供本地权重,将按官方方式从 HuggingFace 下载)",
1046
1279
  }
1047
1280
  _log(f" [dim]{load_hints.get(model_name, '')}[/dim]")
1048
1281
  _log(f" [dim]实时日志:tail -f {log_file}[/dim]")
1049
1282
 
1050
1283
  try:
1051
1284
  with open(log_file, "a") as lf:
1052
- subprocess.Popen(
1053
- ["conda", "run", "-n", env_name, "--no-capture-output",
1054
- "python", script],
1055
- stdout=lf, stderr=lf, start_new_session=True,
1285
+ proc = _popen_python_in_conda_env(
1286
+ env_name,
1287
+ ["-u", script],
1288
+ stdout=lf,
1289
+ stderr=lf,
1290
+ stdin=subprocess.DEVNULL,
1291
+ start_new_session=True,
1056
1292
  )
1057
1293
  except Exception as e:
1058
1294
  _log(f"[red]✗ 启动失败:{e}[/red]")
@@ -1062,9 +1298,18 @@ def start_local_service(model_name: str, console=None) -> bool:
1062
1298
  deadline = time.time() + 300
1063
1299
  dots = 0
1064
1300
  while time.time() < deadline:
1301
+ if proc.poll() is not None:
1302
+ _log(f"\n[red]✗ {model_name} 进程已退出(exit {proc.returncode}),请查看日志:{log_file}[/red]")
1303
+ _log(" [dim]若日志中出现 `Killed`,通常是系统内存不足导致的 OOM。[/dim]")
1304
+ return False
1065
1305
  h = check_service_health(model_name)
1066
1306
  if h["online"]:
1307
+ if not _wait_for_service_stability(model_name):
1308
+ _log(f"\n[red]✗ {model_name} 进程刚启动后很快退出,请查看日志:{log_file}[/red]")
1309
+ _log(" [dim]这通常是内存不足(OOM)或模型加载阶段被系统 kill。[/dim]")
1310
+ return False
1067
1311
  _log(f"\n[green]✓ {model_name} 就绪({h['url']},{h['latency_ms']} ms)[/green]")
1312
+ _log(f" [dim]PID: {proc.pid}[/dim]")
1068
1313
  return True
1069
1314
  if console:
1070
1315
  console.print(f" 等待中{'.' * (dots % 4 + 1)} (最多等待 {int(deadline - time.time())}s)",
@@ -1313,12 +1558,19 @@ def _call_alphagenome_remote(chrom, pos, ref, alt, genome) -> dict:
1313
1558
  def check_local_files() -> dict:
1314
1559
  cfg = _cfg()
1315
1560
  data_dir = cfg["data_dir"]
1561
+ evo2_model_name = cfg.get("evo2_model_name", "evo2_7b")
1562
+ evo2_model_dir = cfg.get("evo2_model_dir") or os.path.join(data_dir, evo2_model_name)
1563
+ evo2_weight_path = (
1564
+ evo2_model_dir
1565
+ if os.path.isfile(evo2_model_dir)
1566
+ else os.path.join(evo2_model_dir, f"{evo2_model_name}.pt")
1567
+ )
1316
1568
  files = {
1317
1569
  "dbNSFP (hg38)": os.path.join(data_dir, "dbNSFP5.3a_grch38.gz"),
1318
1570
  "dbNSFP (hg19)": os.path.join(data_dir, "dbNSFP5.3a_grch37.gz"),
1319
1571
  "GPN-MSA": os.path.join(data_dir, "scores.tsv.bgz"),
1320
1572
  "popEVE": os.path.join(data_dir, "grch38_popEVE_ukbb_20250715.vcf.gz"),
1321
- "Evo2 7b 权重": os.path.join(data_dir, "evo2_7b", "evo2_7b.pt"),
1573
+ f"Evo2 本地权重 ({evo2_model_name}, optional)": evo2_weight_path,
1322
1574
  }
1323
1575
  result = {}
1324
1576
  for label, path in files.items():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: merge-cli
3
- Version: 3.2.0
3
+ Version: 3.3.0
4
4
  Summary: MERGE 变异致病性预测 CLI(服务器固定,集成模型内嵌,无需手动配置)
5
5
  Project-URL: Homepage, https://merge.fanglab.cn
6
6
  Requires-Python: >=3.11
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "merge-cli"
7
- version = "3.2.0"
7
+ version = "3.3.0"
8
8
  description = "MERGE 变异致病性预测 CLI(服务器固定,集成模型内嵌,无需手动配置)"
9
9
  requires-python = ">=3.11"
10
10
  dependencies = [
File without changes
File without changes