PyPI - merge-cli - Versions diffs - 3.5.0__tar.gz → 3.6__tar.gz - Mend

merge-cli 3.5.0tar.gz → 3.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{merge_cli-3.5.0 → merge_cli-3.6}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: merge-cli
-Version: 3.5.0
-Summary: MERGE 变异致病性预测 CLI（服务器固定，集成模型内嵌，无需手动配置）
+Version: 3.6
+Summary: MERGE variant pathogenicity prediction CLI (fixed server, integrated model embedded, no manual configuration required)
 Project-URL: Homepage, https://merge.fanglab.cn
 Requires-Python: >=3.11
 Requires-Dist: click>=8.1

{merge_cli-3.5.0 → merge_cli-3.6}/merge_cli/cli.py RENAMED Viewed

@@ -7,7 +7,8 @@ Changes (v3.5.0):
   3. Added --genome-ref option to specify reference genome FASTA path
   4. Added --no-shap option for skipping SHAP analysis
 """
-import os, sys, time
+import os, sys, time, subprocess
+import requests
 import click
 from rich.console import Console
 from rich.table import Table
@@ -251,46 +252,62 @@ def predict(chrom, pos, ref, alt, genome, fmt, no_ensemble,
             skip_shap = not click.confirm("\nGenerate SHAP analysis plot?", default=True)
         variant_info = f"{chrom}:{pos} {ref}>{alt}"
-        # ── Determine variant type ────────────────────────────────────
-        # Priority: CLI --ensemble-type > prediction["_variant_type"] > remote ANNOVAR API
-        effective_ensemble_type = ensemble_type
-        if not effective_ensemble_type:
-            effective_ensemble_type = prediction.get("_variant_type")
-        if not effective_ensemble_type:
-            with console.status("[bold cyan]Detecting variant type...[/bold cyan]"):
+        # ── Pre-computed fast path ────────────────────────────────
+        # Both remote (/predict/ returns prediction["precomputed"]) and local
+        # (predict_local returns prediction["precomputed"]) use the same key.
+        precomp = prediction.get("precomputed")
+        if precomp:
+            model_key = precomp.get("model_used", "MERGE_Precomputed")
+            ensemble = {
+                "success":        True,
+                "ensemble_results": {model_key: precomp},
+                "raw_features":   {},
+                "from_cache":     True,
+            }
+            prediction["_variant_type"] = precomp.get("variant_type")
+            console.print("[dim]⚡ Pre-computed score retrieved from cache[/dim]")
+        # ─────────────────────────────────────────────────────────
+        else:
+            # ── Determine variant type ────────────────────────────────────
+            # Priority: CLI --ensemble-type > prediction["_variant_type"] > remote ANNOVAR API
+            effective_ensemble_type = ensemble_type
+            if not effective_ensemble_type:
+                effective_ensemble_type = prediction.get("_variant_type")
+            if not effective_ensemble_type:
+                with console.status("[bold cyan]Detecting variant type...[/bold cyan]"):
+                    try:
+                        from . import local_engine as _le
+                        effective_ensemble_type = _le.get_variant_type(chrom, pos, ref, alt, genome)
+                    except Exception:
+                        effective_ensemble_type = None
+            if effective_ensemble_type:
+                prediction["_variant_type"] = effective_ensemble_type
+            with console.status("[bold cyan]Computing MERGE ensemble score...[/bold cyan]"):
                 try:
-                    from . import local_engine as _le
-                    effective_ensemble_type = _le.get_variant_type(chrom, pos, ref, alt, genome)
-                except Exception:
-                    effective_ensemble_type = None
-        if effective_ensemble_type:
-            prediction["_variant_type"] = effective_ensemble_type
-        with console.status("[bold cyan]Computing MERGE ensemble score...[/bold cyan]"):
-            try:
-                if is_local:
-                    from . import local_engine
-                    dbnsfp      = prediction.get("dbnsfp") or {}
-                    transcripts = dbnsfp.get("transcripts") or ([dbnsfp] if dbnsfp else [])
-                    ens_result, ens_err = local_engine.run_local_ensemble(
-                        prediction, transcripts, ensemble_type=effective_ensemble_type,
-                        skip_shap=skip_shap, variant_info=variant_info)
-                    if ens_err:
-                        console.print(f"[yellow]Ensemble model warning: {ens_err}[/yellow]")
-                    ensemble = {"success": True, **ens_result}
-                else:
-                    dbnsfp      = prediction.get("dbnsfp") or {}
-                    transcripts = [dbnsfp] if dbnsfp else []
-                    ens_resp = api.predict_ensemble(prediction, transcripts,
-                                                    ensemble_type=effective_ensemble_type)
-                    if ens_resp.get("success"):
-                        ensemble = ens_resp
-                        for _v in (ensemble.get("ensemble_results") or {}).values():
-                            if isinstance(_v, dict) and _v.get("variant_type"):
-                                prediction["_variant_type"] = _v["variant_type"]
-                                break
-            except Exception as e:
-                console.print(f"[yellow]Ensemble score failed (raw scores unaffected): {e}[/yellow]")
+                    if is_local:
+                        from . import local_engine
+                        dbnsfp      = prediction.get("dbnsfp") or {}
+                        transcripts = dbnsfp.get("transcripts") or ([dbnsfp] if dbnsfp else [])
+                        ens_result, ens_err = local_engine.run_local_ensemble(
+                            prediction, transcripts, ensemble_type=effective_ensemble_type,
+                            skip_shap=skip_shap, variant_info=variant_info)
+                        if ens_err:
+                            console.print(f"[yellow]Ensemble model warning: {ens_err}[/yellow]")
+                        ensemble = {"success": True, **ens_result}
+                    else:
+                        dbnsfp      = prediction.get("dbnsfp") or {}
+                        transcripts = [dbnsfp] if dbnsfp else []
+                        ens_resp = api.predict_ensemble(prediction, transcripts,
+                                                        ensemble_type=effective_ensemble_type)
+                        if ens_resp.get("success"):
+                            ensemble = ens_resp
+                            for _v in (ensemble.get("ensemble_results") or {}).values():
+                                if isinstance(_v, dict) and _v.get("variant_type"):
+                                    prediction["_variant_type"] = _v["variant_type"]
+                                    break
+                except Exception as e:
+                    console.print(f"[yellow]Ensemble score failed (raw scores unaffected): {e}[/yellow]")
     output.render_single(prediction, ensemble, fmt=fmt,
                          errors=prediction.get("errors"))
@@ -428,15 +445,33 @@ def local_setup():
 # ── merge local download ──────────────────────────────────────
 @local.command("download")
 @click.option("--file", "file_type",
-              type=click.Choice(["all", "dbnsfp", "gpn-msa", "popeve"]),
+              type=click.Choice(["all", "dbnsfp", "gpn-msa", "popeve", "precomputed"]),
               default="all",
-              help="Show download instructions for specific file")
-def local_download(file_type):
-    """Show official download URLs for pre-computed data files."""
+              help="Show download instructions for specific file (or auto-download precomputed VCF)")
+@click.option("--genome", default="all", type=click.Choice(["all", "hg38", "hg19"]),
+              help="Genome version for precomputed VCF download (default: both)")
+@click.option("--variant-type", "variant_type", default="all",
+              type=click.Choice(["all", "coding", "splicing"]),
+              help="Variant type for precomputed VCF download (default: both)")
+def local_download(file_type, genome, variant_type):
+    """Show official download URLs for pre-computed data files.
+    \b
+    The precomputed VCF cache files are auto-downloaded from merge.fanglab.cn:
+      merge local download --file precomputed              # download all 4 files
+      merge local download --file precomputed --genome hg38
+      merge local download --file precomputed --genome hg38 --variant-type coding
+    """
     cfg = get_local_config()
     data_dir = cfg["data_dir"]
     os.makedirs(data_dir, exist_ok=True)
+    # ── Auto-download precomputed VCF files ───────────────────────
+    if file_type == "precomputed":
+        _download_precomputed_vcfs(data_dir, genome, variant_type)
+        return
+    # ─────────────────────────────────────────────────────────────
     INSTRUCTIONS = {
         "dbnsfp": {
             "label": "dbNSFP (ESM1b + AlphaMissense)",
@@ -472,6 +507,8 @@ def local_download(file_type):
         "\n[bold green]✓ Ensemble models (pkl files)[/bold green] are bundled with pip install, "
         "no manual download needed.\n"
         "  If model files are missing, run: [bold]pip install --force-reinstall merge-cli[/bold]\n"
+        "\n[bold yellow]⚡ Pre-computed VCF cache[/bold yellow] can be auto-downloaded:\n"
+        "  [bold]merge local download --file precomputed[/bold]\n"
     )
     show_keys = (list(INSTRUCTIONS.keys()) if file_type == "all"
@@ -487,6 +524,129 @@ def local_download(file_type):
         console.print()
+def _download_precomputed_vcfs(data_dir: str, genome: str = "all", variant_type: str = "all"):
+    """Auto-download pre-computed VCF files + tabix indices from merge.fanglab.cn."""
+    import shutil
+    # Files hosted at: https://merge.fanglab.cn/static/precomputed/<filename>
+    # Adjust BASE_STATIC_URL if you move the files elsewhere (e.g. OSS / GitHub Releases)
+    BASE_STATIC_URL = f"{FIXED_API_URL}/static/precomputed"
+    ALL_FILES = {
+        ("hg38", "coding"):   "coding_merged.vcf.gz",
+        ("hg38", "splicing"): "splicing_merged.vcf.gz",
+        ("hg19", "coding"):   "coding_merged_hg19.vcf.gz",
+        ("hg19", "splicing"): "splicing_merged_hg19.vcf.gz",
+    }
+    # Filter by --genome and --variant-type
+    targets = {
+        (g, vt): fname for (g, vt), fname in ALL_FILES.items()
+        if (genome == "all" or g == genome)
+        and (variant_type == "all" or vt == variant_type)
+    }
+    if not targets:
+        console.print("[red]No files match the specified genome/variant-type combination.[/red]")
+        return
+    has_tabix = bool(shutil.which("tabix"))
+    if not has_tabix:
+        console.print(
+            "[yellow]⚠ tabix not found in PATH. VCF files will be downloaded but NOT indexed.\n"
+            "  Install htslib (conda install -c bioconda htslib) then run this command again\n"
+            "  or index manually: tabix -p vcf <file.vcf.gz>[/yellow]\n"
+        )
+    ok_count = 0
+    for (g, vt), fname in targets.items():
+        dest = os.path.join(data_dir, fname)
+        dest_tbi = dest + ".tbi"
+        url     = f"{BASE_STATIC_URL}/{fname}"
+        url_tbi = f"{BASE_STATIC_URL}/{fname}.tbi"
+        console.print(f"\n[bold cyan]── {g} {vt}: {fname} ──[/bold cyan]")
+        # ── Download .vcf.gz ──────────────────────────────────────
+        if os.path.exists(dest):
+            size_mb = os.path.getsize(dest) / 1e6
+            console.print(f"  [green]✓ Already exists ({size_mb:.0f} MB), skipping.[/green]")
+        else:
+            console.print(f"  Downloading {url} …")
+            success = _stream_download(url, dest)
+            if not success:
+                console.print(f"  [red]✗ Download failed. Check URL or network.[/red]")
+                continue
+        # ── Download .tbi (pre-built index) ──────────────────────
+        if os.path.exists(dest_tbi):
+            console.print(f"  [green]✓ Index already exists, skipping.[/green]")
+        else:
+            console.print(f"  Downloading index {url_tbi} …")
+            idx_ok = _stream_download(url_tbi, dest_tbi)
+            if not idx_ok:
+                # Fall back to local tabix
+                if has_tabix:
+                    console.print("  Pre-built index not found, building with local tabix…")
+                    result = subprocess.run(
+                        ["tabix", "-p", "vcf", dest],
+                        capture_output=True, text=True, timeout=300,
+                    )
+                    if result.returncode == 0:
+                        console.print("  [green]✓ Index built.[/green]")
+                    else:
+                        console.print(f"  [red]✗ tabix failed: {result.stderr.strip()}[/red]")
+                        continue
+                else:
+                    console.print(
+                        "  [red]✗ No pre-built index and tabix not found.\n"
+                        "     Install htslib and run: tabix -p vcf " + dest + "[/red]"
+                    )
+                    continue
+        console.print(f"  [green]✓ Ready: {dest}[/green]")
+        ok_count += 1
+    console.print(f"\n[bold green]✓ Done: {ok_count}/{len(targets)} files ready in {data_dir}[/bold green]")
+    if ok_count > 0:
+        console.print(
+            "  Pre-computed scores will now be used automatically for common variants.\n"
+            "  Run [bold]merge local status[/bold] to verify."
+        )
+def _stream_download(url: str, dest: str, chunk_size: int = 1 << 20) -> bool:
+    """Download url → dest with a progress bar. Returns True on success."""
+    import math
+    tmp = dest + ".part"
+    try:
+        r = requests.get(url, stream=True, timeout=30)
+        if r.status_code == 404:
+            return False
+        r.raise_for_status()
+        total = int(r.headers.get("content-length", 0))
+        downloaded = 0
+        with open(tmp, "wb") as f:
+            for chunk in r.iter_content(chunk_size=chunk_size):
+                if chunk:
+                    f.write(chunk)
+                    downloaded += len(chunk)
+                    if total:
+                        pct  = downloaded / total * 100
+                        done = int(pct / 5)
+                        bar  = "█" * done + "░" * (20 - done)
+                        mb   = downloaded / 1e6
+                        print(f"\r    [{bar}] {pct:5.1f}%  {mb:.0f} MB", end="", flush=True)
+        print()  # newline after progress bar
+        os.replace(tmp, dest)
+        return True
+    except Exception as exc:
+        if os.path.exists(tmp):
+            os.unlink(tmp)
+        console.print(f"\n  [red]Download error: {exc}[/red]")
+        return False
 # ── merge local predict ───────────────────────────────────────
 @local.command("predict")
 @click.option("--chrom",   required=True)

{merge_cli-3.5.0 → merge_cli-3.6}/merge_cli/data/models/ensemble_predict.py RENAMED Viewed

@@ -122,9 +122,9 @@ sys.modules['__main__'].FeatureEngineer = FeatureEngineer
 _THIS_DIR = os.path.dirname(os.path.abspath(__file__))
 MODEL_PATHS = {
-    'ClinVar':               os.path.join(_THIS_DIR, 'BestModel_Clinvar.pkl'),
-    'Splice_ClinVar_GnomAD': os.path.join(_THIS_DIR, 'BestModel_Splice_Unsupervised Only.pkl'),
-    'NonCoding_ClinVar':     os.path.join(_THIS_DIR, 'BestModel_Clinvar-noncoding.pkl'),
+    'ClinVar':               os.path.join(_THIS_DIR, 'BestModel_coding.pkl'),
+    'Splice_ClinVar_GnomAD': os.path.join(_THIS_DIR, 'BestModel_splice.pkl'),
+    'NonCoding_ClinVar':     os.path.join(_THIS_DIR, 'BestModel_noncoding.pkl'),
 }
 for _name, _path in MODEL_PATHS.items():

{merge_cli-3.5.0 → merge_cli-3.6}/merge_cli/ensemble_predict.py RENAMED Viewed

@@ -122,9 +122,9 @@ sys.modules['__main__'].FeatureEngineer = FeatureEngineer
 _THIS_DIR = os.path.dirname(os.path.abspath(__file__))
 MODEL_PATHS = {
-    'ClinVar':               os.path.join(_THIS_DIR, 'BestModel_Clinvar.pkl'),
-    'Splice_ClinVar_GnomAD': os.path.join(_THIS_DIR, 'BestModel_Splice_Unsupervised Only.pkl'),
-    'NonCoding_ClinVar':     os.path.join(_THIS_DIR, 'BestModel_Clinvar-noncoding.pkl'),
+    'ClinVar':               os.path.join(_THIS_DIR, 'BestModel_coding.pkl'),
+    'Splice_ClinVar_GnomAD': os.path.join(_THIS_DIR, 'BestModel_splice.pkl'),
+    'NonCoding_ClinVar':     os.path.join(_THIS_DIR, 'BestModel_noncoding.pkl'),
 }
 for _name, _path in MODEL_PATHS.items():

{merge_cli-3.5.0 → merge_cli-3.6}/merge_cli/local_engine.py RENAMED Viewed

@@ -1251,11 +1251,190 @@ def run_local_ensemble(pred_data, all_transcripts, ensemble_type=None, skip_shap
     return {chosen_key: result}, None
+# ─── Pre-computed VCF Cache ───────────────────────────────────────
+# VCF filenames mirror the server-side PRECOMPUTED_VCFS config in views.py
+_PRECOMPUTED_VCF_NAMES = {
+    "hg38": {
+        "coding":    "coding_merged.vcf.gz",
+        "splicing":  "splicing_merged.vcf.gz",
+    },
+    "hg19": {
+        "coding":    "coding_merged_hg19.vcf.gz",
+        "splicing":  "splicing_merged_hg19.vcf.gz",
+    },
+}
+def query_precomputed_local(chrom: str, pos, ref: str, alt: str,
+                            genome: str = "hg38") -> Optional[dict]:
+    """
+    Query the local pre-computed VCF cache files (same format as the server).
+    Splicing is checked first to prevent coding-model mis-classification.
+    Returns the same dict shape as views.query_precomputed_vcf on hit, or
+    None on miss / unavailable.
+    INFO fields expected (set by the pre-computation pipeline):
+      ENSEMBLE  – MERGE score (0-1)
+      INTERP    – interpretation label (spaces encoded as underscores)
+      HYENA     – HyenaDNA score
+      NT        – Nucleotide Transformer score
+      GPN       – GPN-MSA score
+      POPEVE    – popEVE score
+      EVO2      – Evo2 LLR score
+      AM        – AlphaMissense score
+      ESM1B     – ESM1b score
+      AG_SPLICE           – AlphaGenome splicing composite (splice variants)
+      AG_RAW_MEAN / MAX / MIN         – AlphaGenome raw scores (coding)
+      AG_Q_MEAN / MAX / MIN           – AlphaGenome quantile scores (coding)
+    """
+    try:
+        import pysam
+    except ImportError:
+        return None  # pysam unavailable; remote fallback will be used
+    data_dir = _cfg()["data_dir"]
+    names = _PRECOMPUTED_VCF_NAMES.get(genome, {})
+    for vtype in ("splicing", "coding"):   # splicing first — mirrors server logic
+        fname = names.get(vtype)
+        if not fname:
+            continue
+        fpath = os.path.join(data_dir, fname)
+        if not os.path.exists(fpath):
+            continue
+        try:
+            tbx = pysam.TabixFile(fpath)
+            pos_int = int(pos)
+            chrom_queries = [chrom,
+                             ("chr" + chrom) if not chrom.startswith("chr") else chrom.lstrip("chr")]
+            records = []
+            for cq in chrom_queries:
+                try:
+                    records = list(tbx.fetch(cq, pos_int - 1, pos_int))
+                    if records:
+                        break
+                except Exception:
+                    continue
+            tbx.close()
+            for record in records:
+                parts = record.split("\t")
+                if len(parts) < 8 or parts[3] != ref or parts[4] != alt:
+                    continue
+                info: dict = {}
+                for item in parts[7].split(";"):
+                    if "=" in item:
+                        k, v = item.split("=", 1)
+                        info[k] = v
+                if "ENSEMBLE" not in info:
+                    continue
+                score_val  = _safe_float(info["ENSEMBLE"]) or 0.0
+                interp_val = info.get("INTERP", "Precomputed").replace("_", " ")
+                sub_models = {
+                    "alphagenome": {"statistics": {}},
+                    "hyenadna":    {"score": _safe_float(info.get("HYENA"))},
+                    "nt":          {"score": _safe_float(info.get("NT"))},
+                    "gpn_msa":     {"score": _safe_float(info.get("GPN"))},
+                    "popeve":      {"score": _safe_float(info.get("POPEVE"))},
+                    "evo2": {
+                        "llr_score":      _safe_float(info.get("EVO2")),
+                        "ref_score":      None,
+                        "var_score":      None,
+                        "context_length": 8192,
+                        "interpretation": "Precomputed cache",
+                        "score_class": (
+                            "benign"     if (_safe_float(info.get("EVO2")) or 0) > 0
+                            else "deleterious" if (_safe_float(info.get("EVO2")) or 0) < 0
+                            else "unknown"
+                        ),
+                    },
+                    "AlphaMissense": {"score": _safe_float(info.get("AM"))},
+                    "ESM1b":         {"score": _safe_float(info.get("ESM1B"))},
+                }
+                if vtype == "splicing":
+                    sub_models["alphagenome"]["statistics"]["alphagenome_splicing"] = \
+                        _safe_float(info.get("AG_SPLICE"))
+                else:
+                    sub_models["alphagenome"]["statistics"].update({
+                        "raw_score_mean":      _safe_float(info.get("AG_RAW_MEAN")),
+                        "raw_score_max":       _safe_float(info.get("AG_RAW_MAX")),
+                        "raw_score_min":       _safe_float(info.get("AG_RAW_MIN")),
+                        "quantile_score_mean": _safe_float(info.get("AG_Q_MEAN")),
+                        "quantile_score_max":  _safe_float(info.get("AG_Q_MAX")),
+                        "quantile_score_min":  _safe_float(info.get("AG_Q_MIN")),
+                    })
+                return {
+                    "ensemble": {
+                        "score":        score_val,
+                        "interpretation": {
+                            "label":  interp_val,
+                            "badge":  "⚡ Cache",
+                            "color":  "#ff6b6b" if score_val > 0.5 else "#6bcf7f",
+                        },
+                        "variant_type": "splice" if vtype == "splicing" else "coding",
+                        "model_used":   ("Splice_ClinVar_GnomAD"
+                                         if vtype == "splicing" else "ClinVar"),
+                        "from_cache":        True,
+                        "features_raw_aligned": {},
+                        "imputed_features":     {},
+                        "shap_plot":            None,
+                    },
+                    "sub_models": sub_models,
+                }
+        except Exception as exc:
+            logger.debug(f"[precomputed_local] {vtype}/{genome}: {exc}")
+            continue
+    return None   # cache miss
 # ─── Top-level Prediction Entry ───────────────────────────────────
 def predict_local(chrom, pos, ref, alt, genome="hg38", ensemble_type=None, **flags) -> dict:
     vtype = ensemble_type or get_variant_type(chrom, pos, ref, alt, genome)
+    # ── Pre-computed cache fast path ──────────────────────────────
+    cached = query_precomputed_local(chrom, pos, ref, alt, genome)
+    if cached:
+        logger.debug(f"[precomputed_local] cache hit: {chrom}:{pos} {ref}>{alt}")
+        ens    = cached["ensemble"]
+        subs   = cached["sub_models"]
+        return {
+            "success":   True,
+            "from_cache": True,
+            "prediction": {
+                "input":        {"chrom": chrom, "pos": pos, "ref": ref, "alt": alt},
+                "genome_version": genome,
+                "_variant_type": ens.get("variant_type", vtype),
+                "precomputed":   ens,
+                "dbnsfp": {
+                    "annotations": {},
+                    "dl_models":   {
+                        "AlphaMissense": subs.get("AlphaMissense", {}),
+                        "ESM1b":         subs.get("ESM1b", {}),
+                    },
+                },
+                "alphagenome": subs.get("alphagenome"),
+                "hyenadna":    subs.get("hyenadna"),
+                "nt":          subs.get("nt"),
+                "gpn_msa":     subs.get("gpn_msa"),
+                "popeve":      subs.get("popeve"),
+                "evo2":        subs.get("evo2"),
+                "errors":      {},
+            },
+        }
+    # ─────────────────────────────────────────────────────────────
     tasks = {
         "dbnsfp":   lambda: query_dbnsfp_local(chrom, pos, ref, alt, genome),
         "hyenadna": lambda: call_local_service("hyenadna", chrom, pos, ref, alt, genome),
@@ -1317,6 +1496,42 @@ def predict_local_batch(vcf_path: str, genome: str = "hg38",
         _log(f"  [{i}/{len(variants)}] {chrom}:{pos} {ref}>{alt}…")
         try:
             vtype = get_variant_type(chrom, pos, ref, alt, genome, ensemble_type)
+            # ── Pre-computed cache fast path ──────────────────────
+            cached = query_precomputed_local(chrom, pos, ref, alt, genome)
+            if cached:
+                _log(f"    ⚡ Pre-computed cache hit")
+                ens  = cached["ensemble"]
+                subs = cached["sub_models"]
+                ag_stats = (subs.get("alphagenome") or {}).get("statistics", {})
+                row = {
+                    "chrom": chrom, "pos": pos, "ref": ref, "alt": alt,
+                    "genome":         genome,
+                    "variant_type":   ens.get("variant_type", vtype),
+                    "gene":           "-",
+                    "transcript":     "-",
+                    "merge_ensemble_score": ens.get("score"),
+                    "merge_label":    (ens.get("interpretation") or {}).get("label"),
+                    "from_cache":     True,
+                    "alphamissense_score": (subs.get("AlphaMissense") or {}).get("score"),
+                    "esm1b_score":         (subs.get("ESM1b") or {}).get("score"),
+                    "gpn_msa_score":  (subs.get("gpn_msa") or {}).get("score"),
+                    "popeve_score":   (subs.get("popeve") or {}).get("score"),
+                    "hyenadna_score": (subs.get("hyenadna") or {}).get("score"),
+                    "nt_score":       (subs.get("nt") or {}).get("score"),
+                    "evo2_score":     (subs.get("evo2") or {}).get("llr_score"),
+                    "alphagenome_raw_max":       ag_stats.get("raw_score_max"),
+                    "alphagenome_raw_min":       ag_stats.get("raw_score_min"),
+                    "alphagenome_raw_mean":      ag_stats.get("raw_score_mean"),
+                    "alphagenome_quantile_max":  ag_stats.get("quantile_score_max"),
+                    "alphagenome_quantile_min":  ag_stats.get("quantile_score_min"),
+                    "alphagenome_quantile_mean": ag_stats.get("quantile_score_mean"),
+                    "alphagenome_splicing":      ag_stats.get("alphagenome_splicing"),
+                    "errors": "",
+                }
+                results_rows.append(row)
+                continue
+            # ─────────────────────────────────────────────────────
             resp  = predict_local(chrom, pos, ref, alt, genome,
                                   local_genome_path=None, ensemble_type=vtype, **model_flags)
             pred  = resp.get("prediction", {})
@@ -1399,6 +1614,12 @@ def check_local_files() -> dict:
         "dbNSFP (hg19)": os.path.join(data_dir, "dbNSFP5.3a_grch37.gz"),
         "GPN-MSA":       os.path.join(data_dir, "scores.tsv.bgz"),
         "popEVE":        os.path.join(data_dir, "grch38_popEVE_ukbb_20250715.vcf.gz"),
+        # ── Pre-computed VCF cache ──────────────────────────────────
+        "Pre-computed coding (hg38)":    os.path.join(data_dir, "coding_merged.vcf.gz"),
+        "Pre-computed splicing (hg38)":  os.path.join(data_dir, "splicing_merged.vcf.gz"),
+        "Pre-computed coding (hg19)":    os.path.join(data_dir, "coding_merged_hg19.vcf.gz"),
+        "Pre-computed splicing (hg19)":  os.path.join(data_dir, "splicing_merged_hg19.vcf.gz"),
+        # ───────────────────────────────────────────────────────────
         f"Evo2 local weights ({evo2_model_name}, optional)": evo2_weight_path,
     }
     result = {}

{merge_cli-3.5.0 → merge_cli-3.6}/merge_cli/output.py RENAMED Viewed

@@ -62,11 +62,13 @@ def _extract_flat(prediction: dict, ensemble=None) -> dict:
     ens_score = ens_label = ens_model = ens_variant_type = None
     imputed_feats = {}
+    _from_cache = False
     if ensemble:
         ens_results = ensemble.get("ensemble_results") or {
             k: v for k, v in ensemble.items() if k not in ("success", "error")
         }
+        _from_cache = bool(ensemble.get("from_cache"))
         for _key, _val in ens_results.items():
             if isinstance(_val, dict):
                 ens_score        = _val.get("score")
@@ -74,6 +76,8 @@ def _extract_flat(prediction: dict, ensemble=None) -> dict:
                 ens_model        = _key
                 ens_variant_type = _val.get("variant_type")
                 imputed_feats    = _val.get("imputed_features", {})
+                if _val.get("from_cache"):
+                    _from_cache = True
                 break
     def _get_val(raw_val, imputed_key):
@@ -115,6 +119,7 @@ def _extract_flat(prediction: dict, ensemble=None) -> dict:
         "AG_raw_mean":     _ag("alphagenome_raw_score_mean",     "raw_score_mean"),
         "AG_quantile_mean":_ag("alphagenome_quantile_score_mean","quantile_score_mean"),
         "AG_Splicing":     _ag("alphagenome_splicing",            "alphagenome_splicing"),
+        "from_cache":      _from_cache,
     }
@@ -172,6 +177,17 @@ def render_single(prediction: dict, ensemble=None, fmt="table", errors=None) ->
         vt_style = {"coding": "bold magenta", "noncoding": "bold blue", "splice": "bold yellow"}.get(str(vt).lower(), "white")
         t.add_row("Variant Type", Text(str(vt), style=vt_style))
+    # Cache indicator
+    _is_cached = (
+        (ensemble or {}).get("from_cache") or
+        any(
+            isinstance(v, dict) and v.get("from_cache")
+            for v in ((ensemble or {}).get("ensemble_results") or {}).values()
+        )
+    )
+    if _is_cached:
+        t.add_row("Score Source", Text("⚡ Pre-computed cache", style="bold yellow"))
     # MERGE ensemble
     t.add_section()
     t.add_row("MERGE Pathogenicity", _score_text(flat["MERGE_Score"], flat["MERGE_Label"]))

{merge_cli-3.5.0 → merge_cli-3.6}/merge_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: merge-cli
-Version: 3.5.0
-Summary: MERGE 变异致病性预测 CLI（服务器固定，集成模型内嵌，无需手动配置）
+Version: 3.6
+Summary: MERGE variant pathogenicity prediction CLI (fixed server, integrated model embedded, no manual configuration required)
 Project-URL: Homepage, https://merge.fanglab.cn
 Requires-Python: >=3.11
 Requires-Dist: click>=8.1

{merge_cli-3.5.0 → merge_cli-3.6}/pyproject.toml RENAMED Viewed

@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "merge-cli"
-version = "3.5.0"
-description = "MERGE 变异致病性预测 CLI（服务器固定，集成模型内嵌，无需手动配置）"
+version = "3.6"
+description = "MERGE variant pathogenicity prediction CLI (fixed server, integrated model embedded, no manual configuration required)"
 requires-python = ">=3.11"
 dependencies = [
     # 核心 CLI 框架