PyPI - amina-cli - Versions diffs - 0.4.2__tar.gz → 0.5.1__tar.gz - Mend

amina-cli 0.4.2tar.gz → 0.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

{amina_cli-0.4.2 → amina_cli-0.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: amina-cli
-Version: 0.4.2
+Version: 0.5.1
 Summary: CLI for AminoAnalytica protein engineering platform
 Project-URL: Homepage, https://aminoanalytica.com
 Project-URL: Documentation, https://docs.aminoanalytica.com

{amina_cli-0.4.2 → amina_cli-0.5.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "amina-cli"
-version = "0.4.2"
+version = "0.5.1"
 description = "CLI for AminoAnalytica protein engineering platform"
 readme = "README.md"
 license = {text = "Apache-2.0"}

{amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/__init__.py RENAMED Viewed

@@ -9,4 +9,4 @@ Quick start:
     amina run esmfold --sequence "MKFLILLFNILCLFPVLAADNH"
 """
-__version__ = "0.4.2"
+__version__ = "0.5.1"

{amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/jobs_cmd.py RENAMED Viewed

@@ -775,7 +775,7 @@ def download(
     response_path.write_text(json.dumps(result, indent=2, default=str))
     try:
-        downloaded = download_results(result, output)
+        downloaded, failed = download_results(result, output)
         if downloaded:
             console.print(f"[green]\u2713[/green] Downloaded {len(downloaded)} file(s) to {output}/")
             for path in downloaded:
@@ -797,8 +797,20 @@ def download(
                 tool_metadata = get_tool(job_info.get("tool_name", ""))
                 render_tool_output(result, tool_metadata)
-            else:
+            elif not failed:
                 console.print("[dim]No output files to download.[/dim]")
+        # Per-file failures are reported separately from total-failure (StorageError).
+        # A non-empty `failed` dict means a partial success \u2014 the caller (often an
+        # agent) needs to know which files are missing so it can rerun `amina jobs
+        # download` to mint fresh signed URLs and retry just the gaps.
+        if failed:
+            console.print(f"[yellow]Warning:[/yellow] {len(failed)} file(s) failed to download:")
+            for file_type, err in failed.items():
+                console.print(f"  - {file_type}: {err}")
+            console.print(
+                "[dim]Re-run `amina jobs download` to mint fresh signed URLs and retry the missing files.[/dim]"
+            )
+            raise typer.Exit(1)
     except StorageError as e:
         # Show signed URLs as fallback
         signed_urls = result.get("signed_urls", {})
@@ -819,6 +831,40 @@ def download(
 # ═══════════════════════════════════════════════════════════════════════════════
+def _missing_artifacts(response_path: Path, dir_path: Path) -> list[str]:
+    """Return artifact file_types in ``response.json`` whose local file is missing.
+    Used by ``reconcile`` to detect partial downloads: a response.json on disk
+    with declared ``output_files`` but no corresponding local files indicates
+    the original download partially failed (a stale signed URL, an HTTP 400,
+    a network blip). The dir needs a retry with fresh URLs, not a skip.
+    Returns an empty list when:
+      - response.json doesn't exist or is malformed (caller already handled)
+      - the result declared no ``output_files`` (e.g. data-only tools)
+      - every declared file is on disk
+    Returns a list of ``file_type`` keys (e.g. ``["pdb_filepath",
+    "csv_filepath"]``) for each missing artifact.
+    """
+    try:
+        result = json.loads(response_path.read_text())
+    except (OSError, json.JSONDecodeError):
+        # If we can't parse it, downstream code will re-resolve from scratch.
+        return []
+    output_files = result.get("output_files") or {}
+    if not isinstance(output_files, dict):
+        return []
+    missing: list[str] = []
+    for file_type, remote_path in output_files.items():
+        if not remote_path:
+            continue
+        local = dir_path / Path(remote_path).name
+        if not local.exists():
+            missing.append(file_type)
+    return missing
 def _find_submission_files(root: Path, recursive: bool) -> list[tuple[str, Path]]:
     """Walk ``root`` for ``submission.json`` files and extract job_id from each.
@@ -945,11 +991,23 @@ def reconcile(
         response_path = dir_path / "response.json"
         entry: dict = {"job_id": job_id, "dir": str(dir_path)}
+        # Idempotency check has two parts: response.json must exist AND every
+        # declared artifact must be on local disk. Checking only response.json
+        # leaves silent gaps when the original download partially failed
+        # (HTTP 400 on a single file, stale URL after >1h, etc.) — reconcile
+        # would forever mark such dirs as `already_done` despite missing PDB
+        # or TRB outputs.
         if response_path.exists():
-            summary["already_done"] += 1
-            entry["action"] = "skipped_response_json_exists"
-            per_job.append(entry)
-            continue
+            missing = _missing_artifacts(response_path, dir_path)
+            if not missing:
+                summary["already_done"] += 1
+                entry["action"] = "skipped_response_json_exists"
+                per_job.append(entry)
+                continue
+            # Fall through to re-resolve status. _resolve_job_status will
+            # mint fresh signed URLs (any in the on-disk response.json are
+            # likely expired) and we'll retry the download below.
+            entry["missing_artifacts_before_retry"] = missing
         job_info = get_job_info(job_id)
         if not job_info:
@@ -1012,8 +1070,15 @@ def reconcile(
             entry["action"] = "wrote_response_json"
             if download:
                 try:
-                    downloaded = download_results(result_payload, dir_path)
+                    downloaded, failed = download_results(result_payload, dir_path)
                     entry["downloaded_files"] = [p.name for p in downloaded]
+                    # Per-file failures are partial successes; count and surface
+                    # them so the agent can detect missing artifacts without
+                    # post-hoc disk inspection. Idempotency check above will
+                    # also catch this on the next pass and retry the gaps.
+                    if failed:
+                        summary["download_failed"] += 1
+                        entry["download_failures"] = failed
                 except StorageError as dl_err:
                     summary["download_failed"] += 1
                     entry["download_error"] = str(dl_err)

{amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/__init__.py RENAMED Viewed

@@ -382,12 +382,15 @@ def run_tool_with_progress(
             progress.update(task, description="Downloading results...")
-            # Download output files
-            downloaded = []
+            # Download output files. download_results returns (downloaded, failed):
+            # per-file errors land in `failed` rather than aborting the loop, so a
+            # single stale signed URL no longer silently abandons the other files.
+            downloaded: list[Path] = []
+            failed: dict[str, str] = {}
             try:
-                downloaded = download_results(result, output_dir)
+                downloaded, failed = download_results(result, output_dir)
             except StorageError as e:
-                # If signed_urls are available, show them as fallback
+                # Total failure (e.g. no signed URLs and no Supabase credentials).
                 signed_urls = result.get("signed_urls", {})
                 if signed_urls:
                     console.print("\n[yellow]Warning:[/yellow] Could not download files automatically.")
@@ -398,6 +401,16 @@ def run_tool_with_progress(
                 else:
                     console.print(f"\n[yellow]Warning:[/yellow] Download failed: {e}")
+            # Surface per-file failures so agents don't silently end up with
+            # response.json present but artifacts missing.
+            if failed:
+                console.print(f"\n[yellow]Warning:[/yellow] {len(failed)} file(s) failed to download:")
+                for file_type, err in failed.items():
+                    console.print(f"  - {file_type}: {err}")
+                console.print(
+                    "[dim]Re-run `amina jobs download` to mint fresh signed URLs and retry the missing files.[/dim]"
+                )
             # Persist the structured response payload (success or failure) so agents
             # and scripts can read metrics/warnings/cost without re-parsing stdout.
             # Soft-fail on filesystem errors — the job has already run and been

{amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/docs/proteinmpnn.yaml RENAMED Viewed

@@ -28,41 +28,43 @@ when_not_to_use: |
 tool_algorithm: |
   ProteinMPNN is an autoregressive message-passing neural network that designs
   amino acid sequences conditioned on a protein backbone structure. The model
-  operates on a k-nearest-neighbor graph constructed from C-alpha coordinates
-  and iteratively passes messages between nodes (residues) and edges (spatial
-  neighbors).
+  is backbone-only (it never sees side-chains) and operates on a
+  k-nearest-neighbor graph over Cα atoms.
   Key stages:
   1. **Graph construction**: The input PDB backbone is converted to a
-     k-nearest-neighbor graph based on C-alpha distances.
-  2. **Encoder**: A series of message-passing layers encode structural features
-     (backbone dihedrals, inter-residue distances, orientations) into node and
-     edge embeddings.
-  3. **Decoder**: An autoregressive decoder samples amino acid identities one
-     residue at a time, conditioned on the structural encoding and previously
-     sampled residues.
+     k-nearest-neighbor graph (k=48) over Cα atoms; edge features encode
+     distances between N, Cα, C, O, and a virtual Cβ.
+  2. **Encoder**: 3 message-passing layers with hidden dim 128 encode
+     backbone geometry into node and edge embeddings.
+  3. **Decoder**: 3 message-passing layers autoregressively sample amino-acid
+     identities one residue at a time in a random decoding order, conditioned
+     on the structural encoding and previously sampled residues.
   4. **Temperature sampling**: The softmax temperature controls sequence
      diversity — lower temperatures produce sequences closer to the model's
      most confident prediction; higher temperatures increase diversity.
-  5. **Scoring**: Each designed sequence receives a negative log-likelihood
-     score (lower is better) and a sequence recovery metric measuring
-     similarity to the original sequence.
+  5. **Scoring**: Each designed sequence receives an average negative
+     log-likelihood score (always ≥ 0; lower is better) and a sequence
+     recovery metric measuring similarity to the original sequence.
 additional_context: |
-  - The **vanilla** model is trained on the full PDB and is the default choice
-    for most design tasks.
+  - The **vanilla** model is a backbone-atom model (N, Cα, C, O, virtual Cβ)
+    trained on PDB biological assemblies as of Aug 2021 (≤3.5 Å resolution,
+    <10,000 residues). It is the default choice for most design tasks.
   - The **soluble** model is trained only on soluble proteins and may produce
     sequences with better solubility characteristics.
-  - The **ca_only** model uses only C-alpha coordinates, useful when full
-    backbone atom positions are unavailable or unreliable.
-  - Lower sampling temperatures (0.1-0.3) produce more conservative, high-
-    confidence designs. Higher temperatures (0.5-1.0) increase diversity.
+  - The **ca_only** model uses only Cα coordinates, useful when full backbone
+    atom positions are unavailable or unreliable.
+  - Lower sampling temperatures (upstream recommends 0.1–0.3) produce more
+    conservative, high-confidence designs. Higher temperatures (0.5–1.0)
+    increase diversity but at T=1.0 sequences approach random.
   - Fixed residues are excluded from the design process entirely — their
     identity is held constant and used as context for designing other positions.
-  - Sequence recovery measures the fraction of designed residues that match the
-    original sequence. High recovery (~0.4-0.6) is typical for well-designed
-    backbones; very high recovery (>0.8) may indicate the backbone strongly
-    constrains the sequence space.
+  - Sequence recovery measures the fraction of designed residues that match
+    the original sequence. The paper reports ~52.4% mean recovery on native
+    backbones, so ~0.5 is the typical center for well-folded proteins; very
+    high recovery (>0.7) may indicate the backbone strongly constrains the
+    sequence space.
 # ─── Parameters ───
 # Parameter definitions are canonical here — keep in sync with proteinmpnn.py
@@ -101,9 +103,10 @@ parameters:
     default: vanilla
     description: |
       Model variant to use:
-      - **vanilla**: Full-atom model trained on the entire PDB (default)
+      - **vanilla**: Backbone-atom model (N, Cα, C, O, virtual Cβ) trained on
+        PDB biological assemblies as of Aug 2021, ≤3.5 Å (default)
       - **soluble**: Trained on soluble proteins only
-      - **ca_only**: Uses only C-alpha coordinates
+      - **ca_only**: Uses only Cα coordinates
   num-sequences:
     type: integer
@@ -115,11 +118,13 @@ parameters:
   temperature:
     type: float
-    default: 1.0
+    default: 0.1
     range: [0.01, 2.0]
     description: |
-      Sampling temperature controlling sequence diversity. Lower values
-      produce more conservative designs; higher values increase diversity.
+      Sampling temperature controlling sequence diversity. Upstream
+      ProteinMPNN's default and recommended range is 0.1–0.3; values
+      approaching 1.0 produce near-random sequences. Lower = more
+      conservative designs; higher = more diverse.
   seed:
     type: integer
@@ -169,14 +174,15 @@ output_metrics:
   best_score:
     display_name: Best Score
     description: |
-      **Best score** is the lowest (most favorable) negative log-likelihood
-      across all designed sequences. The score measures how well the designed
-      sequence fits the input backbone according to the ProteinMPNN model.
-      Lower values indicate higher model confidence that the sequence will
-      fold into the target structure.
+      **Best score** is the lowest (most favorable) average negative
+      log-likelihood across all designed sequences. The score measures how
+      well the designed sequence fits the input backbone according to the
+      ProteinMPNN model. The score is always ≥ 0 (since it is a NLL); lower
+      values indicate higher model confidence that the sequence will fold
+      into the target structure.
     interpretation: |
-      - Lower is better — there is no fixed scale
-      - Typical values range from roughly -3.0 to 0.0 depending on structure
+      - Lower is better; always positive
+      - Typical values per the upstream README examples are ~0.7–1.5
       - Compare across sequences within the same run for relative ranking
   mean_score:
@@ -198,8 +204,9 @@ output_metrics:
       designed positions, averaged across all generated sequences.
     range: [0, 1]
     interpretation: |
-      - 0.3–0.5: Typical for well-folded natural proteins
-      - >0.5: High recovery — backbone strongly constrains sequence space
+      - 0.4–0.55: Typical for well-folded native backbones (the paper reports
+        a 52.4% mean recovery)
+      - >0.6: High recovery — backbone strongly constrains sequence space
       - <0.3: Low recovery — backbone may accommodate diverse sequences
       - Very high recovery (>0.8) may indicate a rigid, highly constrained fold

{amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/docs/rfdiffusion.yaml RENAMED Viewed

@@ -47,7 +47,7 @@ tool_algorithm: |
   - **unconditional**: Base_ckpt.pt
   - **binder-design**: Complex_base_ckpt.pt (default), Complex_beta_ckpt.pt (diverse topologies)
   - **binder-redesign**: Base_ckpt.pt
-  - **motif-scaffolding**: Base_ckpt.pt, ActiveSite_ckpt.pt (small motifs), InpaintSeq_ckpt.pt
+  - **motif-scaffolding**: Base_ckpt.pt, ActiveSite_ckpt.pt (small motifs), InpaintSeq_ckpt.pt, InpaintSeq_Fold_ckpt.pt (when both inpaint_seq and fold_conditioning are set)
   - **partial-diffusion**: Base_ckpt.pt
   - **custom-contigs**: Any of 8 available checkpoints
@@ -60,9 +60,23 @@ additional_context: |
     binders but may have lower success rates.
   - Fold conditioning (--fold-conditioning) biases the diffusion toward specific
     fold topologies using PDB scaffold templates.
-  - Noise level for partial-diffusion and binder-redesign controls the degree of
-    structural diversification: lower noise (1-10) = subtle changes, higher noise
-    (25-50) = major backbone rearrangement.
+  - The `--noise` flag maps directly to RFdiffusion's `diffuser.partial_T`,
+    which is a discrete diffusion-timestep count (total `diffuser.T=50`).
+    Upstream guidance is `partial_T≈20` as a typical starting point; values
+    approaching T mean nearly full re-diffusion. Lower values stay closer to
+    the input backbone; higher values introduce more change.
+  - **binder-redesign accepts any input numbering.** Partial diffusion internally
+    requires each chain numbered contiguously from 1, but binder-redesign now
+    renumbers the input for you and restores the original chain IDs and residue
+    numbers on the output — so a target keeping its biological numbering (e.g.
+    residues 257-364) works as-is, and the output is returned with that numbering.
+  - **partial-diffusion still requires contiguous-from-1 numbering.** It maps input
+    residue positions directly onto the output, so every chain must be numbered
+    contiguously starting at residue 1 with no gaps; a chain starting at, say, 50
+    is rejected. Renumber first (e.g. the **PDB cleaner** tool, or `pdbtools`'
+    `pdb_reres`). This does not apply to
+    binder-design, which generates the binder de novo and references the target
+    as an explicit motif, so offset target numbering is fine there.
 # ─── Parameters ───
 # Parameter definitions are canonical here — keep in sync with rfdiffusion.py
@@ -85,6 +99,9 @@ parameters:
       Input PDB file. Required for binder-design, binder-redesign,
       motif-scaffolding, and partial-diffusion modes. Optional for
       custom-contigs.
+      For partial-diffusion, every chain must be numbered contiguously from
+      residue 1 (no gaps). binder-redesign handles any numbering automatically
+      (renumbered internally, restored on output) — see additional notes.
   num-designs:
     type: integer
@@ -109,8 +126,10 @@ parameters:
     required: false
     description: |
       Symmetry type for unconditional and motif-scaffolding modes.
-      Supports cyclic (c2, c3, c4...), dihedral (d2, d3...), and
-      tetrahedral symmetry.
+      Upstream RFdiffusion currently implements cyclic (c2, c3, c4…),
+      dihedral (d2, d3…), and tetrahedral symmetry. The schema validator
+      also accepts octahedral and icosahedral but upstream marks these
+      as "future".
     examples:
       - "c4"
       - "d2"
@@ -162,29 +181,38 @@ parameters:
     type: string
     required: false
     description: |
-      Target chain ID for binder-redesign mode. Defaults to 'B'.
+      Target chain ID(s) for binder-redesign mode. Defaults to 'B'. Accepts one
+      or more chains, comma-separated, to redesign the binder against a
+      multi-chain target. Each target chain is kept fixed during redesign.
     examples:
       - "B"
+      - "A,B"
   noise:
     type: integer
     required: false
     range: [1, 50]
     description: |
-      Noise level for partial-diffusion and binder-redesign modes.
-      Controls the degree of structural diversification. Lower values
-      (1-10) produce subtle changes; higher values (25-50) cause
-      major backbone rearrangement. Required for these modes.
+      Partial diffusion timestep count (maps to RFdiffusion's
+      `diffuser.partial_T`; total `diffuser.T=50`). Required for
+      partial-diffusion and binder-redesign modes. Upstream guidance is
+      `partial_T≈20` as a typical starting point — lower values stay
+      closer to the input backbone; values approaching 50 mean nearly
+      full re-diffusion.
   preserve:
     type: string
     required: false
     description: |
-      Residues to preserve during partial-diffusion and binder-redesign.
-      Comma-separated list of residue identifiers or ranges.
+      Residues whose sequence is preserved during partial-diffusion and
+      binder-redesign. Comma-separated list of residues or ranges, given as
+      PDB residue numbers. A chain-prefixed range (e.g. 'X10-25') targets that
+      chain; a bare range (e.g. '10-25') applies to the binder chain in
+      binder-redesign, or the first chain in partial-diffusion. These are
+      mapped internally to the contig positions RFdiffusion expects.
     examples:
-      - "10,20,30"
-      - "A10-25"
+      - "10-25"
+      - "X10-25,X40-50"
   contigs:
     type: string
@@ -259,6 +287,8 @@ examples:
     command: amina run rfdiffusion -m binder-design -i target.pdb --hotspots A30,A33 --binder-length 80-120 -o ./out/
   - title: Binder redesign
     command: amina run rfdiffusion -m binder-redesign -i complex.pdb --noise 25 --binder-chain A --target-chain B -o ./out/
+  - title: Binder redesign against a multi-chain target, preserving part of the binder
+    command: amina run rfdiffusion -m binder-redesign -i complex.pdb --noise 25 --binder-chain X --target-chain A,B --preserve X1-30 -o ./out/
   - title: Motif scaffolding
     command: amina run rfdiffusion -m motif-scaffolding -i motif.pdb --contigs "10-40/A163-181/10-40" -o ./out/
   - title: Partial diffusion
@@ -268,5 +298,7 @@ examples:
 # ─── References ───
 references:
-  - title: "RFDiffusion: De novo protein design by deep network hallucination (Watson et al., 2023)"
+  - title: "De novo design of protein structure and function with RFdiffusion (Watson et al., Nature 620, 1089–1100, 2023)"
+    url: "https://doi.org/10.1038/s41586-023-06415-8"
+  - title: "RFdiffusion GitHub source (RosettaCommons)"
     url: "https://github.com/RosettaCommons/RFdiffusion"

{amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/proteinmpnn.py RENAMED Viewed

@@ -52,9 +52,9 @@ def register(app: typer.Typer):
             help="Number of sequences to generate (1-50)",
         ),
         temperature: float = typer.Option(
-            1.0,
+            0.1,
             "--temperature",
-            help="Sampling temperature (0.01-2.0)",
+            help="Sampling temperature (0.01-2.0; upstream default 0.1, recommended 0.1-0.3)",
         ),
         seed: int = typer.Option(
             37,

{amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/rfdiffusion.py RENAMED Viewed

@@ -118,7 +118,7 @@ def register(app: typer.Typer):
             None,
             "--target-chain",
             "-tc",
-            help="Target chain ID (binder-redesign)",
+            help="Target chain ID(s) (binder-redesign). Comma-separate for multiple, e.g. 'A,B'",
         ),
         # Partial diffusion / redesign options
         noise_level: Optional[int] = typer.Option(
@@ -222,7 +222,8 @@ def register(app: typer.Typer):
                 raise typer.Exit(1)
             params["pdb_content"] = pdb_content
             params["binder_chain"] = binder_chain or "A"
-            params["target_chain"] = target_chain or "B"
+            # Accept one or more target chains, comma-separated (e.g. "A,B").
+            params["target_chain"] = [c.strip() for c in target_chain.split(",")] if target_chain else ["B"]
             params["noise_level"] = noise_level
             if preserve:
                 params["preserve_sequences"] = [p.strip() for p in preserve.split(",")]

{amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/boltz2.py RENAMED Viewed

@@ -287,7 +287,7 @@ def register(app: typer.Typer):
             "recycling_steps": recycling_steps,
             "sampling_steps": sampling_steps,
             "diffusion_samples": diffusion_samples,
-            "step_scale": 1.638,
+            "step_scale": 1.5,
             "use_potentials": False,
             "enable_affinity_prediction": enable_affinity,
         }

{amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/docs/boltz2.yaml RENAMED Viewed

@@ -19,7 +19,7 @@ when_to_use: |
   - Multi-chain protein complex modeling
   - Protein-nucleic acid (DNA/RNA) complex structures
   - When you need confidence metrics for predicted interfaces (ipTM, ipSAE)
-  - When binding affinity prediction (pKd) is needed alongside structure
+  - When binding affinity prediction (log10 IC50) is needed alongside structure
 when_not_to_use: |
   - Single-chain protein folding only → use **ESMFold** (faster, no MSA needed)
@@ -38,7 +38,7 @@ tool_algorithm: |
   1. **Input processing**: Sequences are tokenized; MSAs are generated or loaded;
      ligands are parsed from SMILES/CCD.
   2. **Trunk module**: Processes pairwise and MSA representations through
-     Evoformer-like attention blocks.
+     PairFormer attention blocks (PairformerArgsV2, 64 blocks).
   3. **Diffusion sampling**: Starting from noise, atom positions are refined
      over N denoising steps (default 200). More steps = finer detail.
   4. **Recycling**: The predicted structure is fed back through the trunk for
@@ -46,8 +46,9 @@ tool_algorithm: |
   5. **Confidence estimation**: PAE (Predicted Aligned Error), pLDDT, pTM, and
      ipTM are computed from internal representations. Additional metrics (ipSAE,
      LIS, pDockQ) are derived from the PAE matrix.
-  6. **Affinity prediction** (optional): A separate head predicts binding
-     affinity (pKd) for protein-ligand complexes.
+  6. **Affinity prediction** (optional): A separate PairFormer-based head
+     predicts a binary binding likelihood and an IC50-like affinity value
+     (log10 IC50 in μM) for protein–ligand complexes.
 additional_context: |
   - MSA generation adds ~60s to runtime for sequences without precomputed
@@ -157,8 +158,8 @@ parameters:
     type: boolean
     default: false
     description: |
-      Enable binding affinity prediction (pKd). Only works for
-      protein-ligand complexes — requires at least one `--ligand`.
+      Enable binding affinity prediction (log10 IC50 in μM). Only works
+      for protein-ligand complexes — requires at least one `--ligand`.
   job-name:
     type: string
@@ -199,9 +200,9 @@ output_metrics:
   ptm:
     display_name: pTM
     description: |
-      **pTM** (predicted TM-score) estimates the global fold quality of the
-      entire predicted structure. Derived from the PAE matrix, measuring
-      overall structural similarity to the ground truth.
+      **pTM** (predicted TM-score) is the model's estimate of the TM-score its
+      structure would achieve against ground truth, using only the model's own
+      PAE matrix (no ground-truth structure is available at inference).
     range: [0, 1]
     interpretation: |
       - >0.8: High-quality global fold prediction
@@ -224,8 +225,9 @@ output_metrics:
   confidence_score:
     display_name: Confidence Score
     description: |
-      Combined confidence metric computed as `0.2 * ipTM + 0.8 * pTM`.
-      Gives a single overall quality score weighted toward global fold quality.
+      Aggregated ranking score used by Boltz-2 to order samples. Computed as
+      `0.8 * complex_plddt + 0.2 * ipTM` (Boltz-2 falls back to `0.8 * pLDDT
+      + 0.2 * pTM` when there are no interfaces). Weighted toward pLDDT.
     range: [0, 1]
     interpretation: |
       - >0.8: High overall prediction confidence
@@ -274,44 +276,52 @@ output_metrics:
   lis:
     display_name: LIS
     description: |
-      **LIS** (Local Interface Score) evaluates interface quality using
-      local PAE values between interface residue pairs. Focuses on nearby
-      contacts rather than global alignment.
+      **LIS** (Local Interaction Score) evaluates protein-protein interaction
+      quality from inverted PAE values within the local interaction area
+      (residue pairs with PAE ≤ 12 Å). Introduced by Kim et al. 2024
+      (bioRxiv 2024.02.19.580970).
     range: [0, 1]
     interpretation: |
-      - >0.5: Good interface contacts predicted
-      - <0.5: Weak interface prediction
+      - Higher is better; AFM-LIS benchmarks Best LIS ≥ 0.203 / Average LIS
+        ≥ 0.073 as positive-call thresholds.
   pdockq:
     display_name: pDockQ
     description: |
       **pDockQ** (predicted DockQ) estimates the quality of a predicted
       protein-protein docked complex, calibrated against the DockQ benchmark.
-      Computed from the number of interface contacts and average interface pLDDT.
+      Bryant et al. 2022 define it as a sigmoidal fit on `average interface
+      pLDDT × log(interface contacts)`.
     range: [0, 1]
     interpretation: |
-      - >0.5: Acceptable docking quality
-      - >0.23: Possible interaction
-      - <0.23: Unlikely to be a correct docked pose
+      - >0.5: confidently predicted interface
+      - 0.23–0.5: borderline (pDockQ is noisy in this range; 0.23 is the
+        DockQ-acceptable calibration target, not a pDockQ cutoff)
+      - <0.23: unlikely interaction
   pdockq2:
     display_name: pDockQ2
     description: |
-      **pDockQ2** is an improved version of pDockQ that uses a multi-model
-      scoring approach for better discrimination of correct interfaces.
+      **pDockQ2** augments pDockQ by incorporating the predicted aligned
+      error (PAE) between chains, giving better calibration on multimers
+      and homomers where pDockQ can over-score large but incorrect
+      interfaces (Zhu et al. 2023, Bioinformatics btad424).
     range: [0, 1]
     interpretation: |
-      - Higher is better; interpretation similar to pDockQ
+      - Higher is better; interpretation similar to pDockQ.
   affinity_pred_value:
-    display_name: Predicted pKd
+    display_name: log10(IC50, μM)
     description: |
-      Predicted binding affinity as pKd (negative log of dissociation constant).
-      Only available when `--affinity` is enabled for protein-ligand complexes.
+      Predicted binding affinity as `log10(IC50)` with IC50 in μM. Lower
+      values indicate stronger binding (e.g., −3 ≈ 1 nM, 0 ≈ 1 μM, 2 ≈
+      100 μM weak/decoy). Only available when `--affinity` is enabled for
+      protein–ligand complexes.
     interpretation: |
-      - Higher pKd = tighter binding (e.g., pKd 9 = 1 nM Kd)
-      - Typical drug-like: pKd 6–10
-      - Weak binder: pKd < 5
+      - Lower = tighter binding
+      - −3 to −1: strong binder (≈ 1 nM – 100 nM)
+      - −1 to 1: moderate (≈ 100 nM – 10 μM)
+      - >1: weak / non-binder
   affinity_probability_binary:
     display_name: Binding Probability
@@ -378,7 +388,7 @@ output_display:
     - title: Binding Affinity Prediction
       fields:
         - key: affinity_pred_value
-          label: Predicted pKd
+          label: log10(IC50, μM)
         - key: affinity_probability_binary
           label: Binding Probability
@@ -397,5 +407,15 @@ examples:
 # ─── References ───
 references:
-  - title: "Boltz-2: Biomolecular Interaction Modeling"
+  - title: "Boltz-2: Jointly Modeling Structure and Binding Affinity (Passaro et al., 2025)"
+    url: "https://doi.org/10.1101/2025.06.14.659707"
+  - title: "Boltz GitHub source"
     url: "https://github.com/jwohlwend/boltz"
+  - title: "ipSAE — Interaction prediction Score from Aligned Errors (Dunbrack 2025)"
+    url: "https://www.biorxiv.org/content/10.1101/2025.02.10.637595v1"
+  - title: "LIS — Local Interaction Score (Kim et al. 2024)"
+    url: "https://www.biorxiv.org/content/10.1101/2024.02.19.580970v1"
+  - title: "pDockQ (Bryant et al., Nat Commun 2022)"
+    url: "https://www.nature.com/articles/s41467-022-28865-w"
+  - title: "pDockQ2 (Zhu et al., Bioinformatics 2023, btad424)"
+    url: "https://academic.oup.com/bioinformatics/article/39/7/btad424/7219714"

amina-cli 0.4.2__tar.gz → 0.5.1__tar.gz

amina-cli 0.4.2tar.gz → 0.5.1tar.gz