amina-cli 0.4.2__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {amina_cli-0.4.2 → amina_cli-0.5.1}/PKG-INFO +1 -1
  2. {amina_cli-0.4.2 → amina_cli-0.5.1}/pyproject.toml +1 -1
  3. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/__init__.py +1 -1
  4. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/jobs_cmd.py +72 -7
  5. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/__init__.py +17 -4
  6. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/docs/proteinmpnn.yaml +44 -37
  7. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/docs/rfdiffusion.yaml +48 -16
  8. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/proteinmpnn.py +2 -2
  9. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/rfdiffusion.py +3 -2
  10. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/boltz2.py +1 -1
  11. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/docs/boltz2.yaml +51 -31
  12. amina_cli-0.5.1/src/amina_cli/commands/tools/folding/docs/esmfold2.yaml +447 -0
  13. amina_cli-0.5.1/src/amina_cli/commands/tools/folding/esmfold2.py +317 -0
  14. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/storage.py +28 -12
  15. {amina_cli-0.4.2 → amina_cli-0.5.1}/.gitignore +0 -0
  16. {amina_cli-0.4.2 → amina_cli-0.5.1}/LICENSE +0 -0
  17. {amina_cli-0.4.2 → amina_cli-0.5.1}/README.md +0 -0
  18. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/auth.py +0 -0
  19. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/client.py +0 -0
  20. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/__init__.py +0 -0
  21. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/auth_cmd.py +0 -0
  22. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/run_cmd.py +0 -0
  23. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/__init__.py +0 -0
  24. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/docs/hydrophobicity.yaml +0 -0
  25. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/docs/mmseqs2_cluster.yaml +0 -0
  26. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/docs/residue_accessibility.yaml +0 -0
  27. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/docs/rmsd.yaml +0 -0
  28. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/docs/sasa.yaml +0 -0
  29. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/docs/simple_rmsd.yaml +0 -0
  30. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/docs/surface_charge.yaml +0 -0
  31. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/docs/usalign.yaml +0 -0
  32. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/hydrophobicity.py +0 -0
  33. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/mmseqs2_cluster.py +0 -0
  34. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/residue_accessibility.py +0 -0
  35. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/rmsd.py +0 -0
  36. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/sasa.py +0 -0
  37. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/simple_rmsd.py +0 -0
  38. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/surface_charge.py +0 -0
  39. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/analysis/usalign.py +0 -0
  40. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/__init__.py +0 -0
  41. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/docs/esm_if1.yaml +0 -0
  42. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/docs/protein_mc.yaml +0 -0
  43. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/esm_if1.py +0 -0
  44. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/design/protein_mc.py +0 -0
  45. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/display.py +0 -0
  46. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/doccard.py +0 -0
  47. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/__init__.py +0 -0
  48. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/docs/esmfold.yaml +0 -0
  49. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/docs/openfold3.yaml +0 -0
  50. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/docs/protenix.yaml +0 -0
  51. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/esmfold.py +0 -0
  52. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/openfold3.py +0 -0
  53. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/folding/protenix.py +0 -0
  54. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/__init__.py +0 -0
  55. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/autodock_vina.py +0 -0
  56. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/diffdock.py +0 -0
  57. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/dockq.py +0 -0
  58. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/autodock_vina.yaml +0 -0
  59. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/diffdock.yaml +0 -0
  60. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/dockq.yaml +0 -0
  61. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/emngly.yaml +0 -0
  62. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/glycosylation_ensemble.yaml +0 -0
  63. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/interface_identifier.yaml +0 -0
  64. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/isoglyp.yaml +0 -0
  65. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/lmngly.yaml +0 -0
  66. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/p2rank.yaml +0 -0
  67. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/docs/pesto.yaml +0 -0
  68. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/emngly.py +0 -0
  69. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/glycosylation_ensemble.py +0 -0
  70. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/interface_identifier.py +0 -0
  71. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/isoglyp.py +0 -0
  72. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/lmngly.py +0 -0
  73. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/p2rank.py +0 -0
  74. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/interactions/pesto.py +0 -0
  75. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/properties/__init__.py +0 -0
  76. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/properties/aminosol.py +0 -0
  77. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/properties/docs/aminosol.yaml +0 -0
  78. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/properties/docs/esm1v.yaml +0 -0
  79. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/properties/docs/esm2_embedding.yaml +0 -0
  80. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/properties/esm1v.py +0 -0
  81. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/properties/esm2_embedding.py +0 -0
  82. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/__init__.py +0 -0
  83. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/activesite_verifier.py +0 -0
  84. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/chain_select.py +0 -0
  85. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/distance_calculator.py +0 -0
  86. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/activesite_verifier.yaml +0 -0
  87. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/chain_select.yaml +0 -0
  88. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/distance_calculator.yaml +0 -0
  89. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/maxit_convert.yaml +0 -0
  90. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/mol_size_calculator.yaml +0 -0
  91. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/obabel_convert.yaml +0 -0
  92. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/pdb_bfactor_overwrite.yaml +0 -0
  93. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/pdb_cleaner.yaml +0 -0
  94. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/pdb_quality_assessment.yaml +0 -0
  95. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/pdb_to_fasta.yaml +0 -0
  96. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/docs/protein_relaxer.yaml +0 -0
  97. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/maxit_convert.py +0 -0
  98. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/mol_size_calculator.py +0 -0
  99. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/obabel_convert.py +0 -0
  100. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/pdb_bfactor_overwrite.py +0 -0
  101. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/pdb_cleaner.py +0 -0
  102. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/pdb_quality_assessment.py +0 -0
  103. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/pdb_to_fasta.py +0 -0
  104. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools/utilities/protein_relaxer.py +0 -0
  105. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/commands/tools_cmd.py +0 -0
  106. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/main.py +0 -0
  107. {amina_cli-0.4.2 → amina_cli-0.5.1}/src/amina_cli/registry.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: amina-cli
3
- Version: 0.4.2
3
+ Version: 0.5.1
4
4
  Summary: CLI for AminoAnalytica protein engineering platform
5
5
  Project-URL: Homepage, https://aminoanalytica.com
6
6
  Project-URL: Documentation, https://docs.aminoanalytica.com
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "amina-cli"
3
- version = "0.4.2"
3
+ version = "0.5.1"
4
4
  description = "CLI for AminoAnalytica protein engineering platform"
5
5
  readme = "README.md"
6
6
  license = {text = "Apache-2.0"}
@@ -9,4 +9,4 @@ Quick start:
9
9
  amina run esmfold --sequence "MKFLILLFNILCLFPVLAADNH"
10
10
  """
11
11
 
12
- __version__ = "0.4.2"
12
+ __version__ = "0.5.1"
@@ -775,7 +775,7 @@ def download(
775
775
  response_path.write_text(json.dumps(result, indent=2, default=str))
776
776
 
777
777
  try:
778
- downloaded = download_results(result, output)
778
+ downloaded, failed = download_results(result, output)
779
779
  if downloaded:
780
780
  console.print(f"[green]\u2713[/green] Downloaded {len(downloaded)} file(s) to {output}/")
781
781
  for path in downloaded:
@@ -797,8 +797,20 @@ def download(
797
797
 
798
798
  tool_metadata = get_tool(job_info.get("tool_name", ""))
799
799
  render_tool_output(result, tool_metadata)
800
- else:
800
+ elif not failed:
801
801
  console.print("[dim]No output files to download.[/dim]")
802
+ # Per-file failures are reported separately from total-failure (StorageError).
803
+ # A non-empty `failed` dict means a partial success \u2014 the caller (often an
804
+ # agent) needs to know which files are missing so it can rerun `amina jobs
805
+ # download` to mint fresh signed URLs and retry just the gaps.
806
+ if failed:
807
+ console.print(f"[yellow]Warning:[/yellow] {len(failed)} file(s) failed to download:")
808
+ for file_type, err in failed.items():
809
+ console.print(f" - {file_type}: {err}")
810
+ console.print(
811
+ "[dim]Re-run `amina jobs download` to mint fresh signed URLs and retry the missing files.[/dim]"
812
+ )
813
+ raise typer.Exit(1)
802
814
  except StorageError as e:
803
815
  # Show signed URLs as fallback
804
816
  signed_urls = result.get("signed_urls", {})
@@ -819,6 +831,40 @@ def download(
819
831
  # ═══════════════════════════════════════════════════════════════════════════════
820
832
 
821
833
 
834
+ def _missing_artifacts(response_path: Path, dir_path: Path) -> list[str]:
835
+ """Return artifact file_types in ``response.json`` whose local file is missing.
836
+
837
+ Used by ``reconcile`` to detect partial downloads: a response.json on disk
838
+ with declared ``output_files`` but no corresponding local files indicates
839
+ the original download partially failed (a stale signed URL, an HTTP 400,
840
+ a network blip). The dir needs a retry with fresh URLs, not a skip.
841
+
842
+ Returns an empty list when:
843
+ - response.json doesn't exist or is malformed (caller already handled)
844
+ - the result declared no ``output_files`` (e.g. data-only tools)
845
+ - every declared file is on disk
846
+
847
+ Returns a list of ``file_type`` keys (e.g. ``["pdb_filepath",
848
+ "csv_filepath"]``) for each missing artifact.
849
+ """
850
+ try:
851
+ result = json.loads(response_path.read_text())
852
+ except (OSError, json.JSONDecodeError):
853
+ # If we can't parse it, downstream code will re-resolve from scratch.
854
+ return []
855
+ output_files = result.get("output_files") or {}
856
+ if not isinstance(output_files, dict):
857
+ return []
858
+ missing: list[str] = []
859
+ for file_type, remote_path in output_files.items():
860
+ if not remote_path:
861
+ continue
862
+ local = dir_path / Path(remote_path).name
863
+ if not local.exists():
864
+ missing.append(file_type)
865
+ return missing
866
+
867
+
822
868
  def _find_submission_files(root: Path, recursive: bool) -> list[tuple[str, Path]]:
823
869
  """Walk ``root`` for ``submission.json`` files and extract job_id from each.
824
870
 
@@ -945,11 +991,23 @@ def reconcile(
945
991
  response_path = dir_path / "response.json"
946
992
  entry: dict = {"job_id": job_id, "dir": str(dir_path)}
947
993
 
994
+ # Idempotency check has two parts: response.json must exist AND every
995
+ # declared artifact must be on local disk. Checking only response.json
996
+ # leaves silent gaps when the original download partially failed
997
+ # (HTTP 400 on a single file, stale URL after >1h, etc.) — reconcile
998
+ # would forever mark such dirs as `already_done` despite missing PDB
999
+ # or TRB outputs.
948
1000
  if response_path.exists():
949
- summary["already_done"] += 1
950
- entry["action"] = "skipped_response_json_exists"
951
- per_job.append(entry)
952
- continue
1001
+ missing = _missing_artifacts(response_path, dir_path)
1002
+ if not missing:
1003
+ summary["already_done"] += 1
1004
+ entry["action"] = "skipped_response_json_exists"
1005
+ per_job.append(entry)
1006
+ continue
1007
+ # Fall through to re-resolve status. _resolve_job_status will
1008
+ # mint fresh signed URLs (any in the on-disk response.json are
1009
+ # likely expired) and we'll retry the download below.
1010
+ entry["missing_artifacts_before_retry"] = missing
953
1011
 
954
1012
  job_info = get_job_info(job_id)
955
1013
  if not job_info:
@@ -1012,8 +1070,15 @@ def reconcile(
1012
1070
  entry["action"] = "wrote_response_json"
1013
1071
  if download:
1014
1072
  try:
1015
- downloaded = download_results(result_payload, dir_path)
1073
+ downloaded, failed = download_results(result_payload, dir_path)
1016
1074
  entry["downloaded_files"] = [p.name for p in downloaded]
1075
+ # Per-file failures are partial successes; count and surface
1076
+ # them so the agent can detect missing artifacts without
1077
+ # post-hoc disk inspection. Idempotency check above will
1078
+ # also catch this on the next pass and retry the gaps.
1079
+ if failed:
1080
+ summary["download_failed"] += 1
1081
+ entry["download_failures"] = failed
1017
1082
  except StorageError as dl_err:
1018
1083
  summary["download_failed"] += 1
1019
1084
  entry["download_error"] = str(dl_err)
@@ -382,12 +382,15 @@ def run_tool_with_progress(
382
382
 
383
383
  progress.update(task, description="Downloading results...")
384
384
 
385
- # Download output files
386
- downloaded = []
385
+ # Download output files. download_results returns (downloaded, failed):
386
+ # per-file errors land in `failed` rather than aborting the loop, so a
387
+ # single stale signed URL no longer silently abandons the other files.
388
+ downloaded: list[Path] = []
389
+ failed: dict[str, str] = {}
387
390
  try:
388
- downloaded = download_results(result, output_dir)
391
+ downloaded, failed = download_results(result, output_dir)
389
392
  except StorageError as e:
390
- # If signed_urls are available, show them as fallback
393
+ # Total failure (e.g. no signed URLs and no Supabase credentials).
391
394
  signed_urls = result.get("signed_urls", {})
392
395
  if signed_urls:
393
396
  console.print("\n[yellow]Warning:[/yellow] Could not download files automatically.")
@@ -398,6 +401,16 @@ def run_tool_with_progress(
398
401
  else:
399
402
  console.print(f"\n[yellow]Warning:[/yellow] Download failed: {e}")
400
403
 
404
+ # Surface per-file failures so agents don't silently end up with
405
+ # response.json present but artifacts missing.
406
+ if failed:
407
+ console.print(f"\n[yellow]Warning:[/yellow] {len(failed)} file(s) failed to download:")
408
+ for file_type, err in failed.items():
409
+ console.print(f" - {file_type}: {err}")
410
+ console.print(
411
+ "[dim]Re-run `amina jobs download` to mint fresh signed URLs and retry the missing files.[/dim]"
412
+ )
413
+
401
414
  # Persist the structured response payload (success or failure) so agents
402
415
  # and scripts can read metrics/warnings/cost without re-parsing stdout.
403
416
  # Soft-fail on filesystem errors — the job has already run and been
@@ -28,41 +28,43 @@ when_not_to_use: |
28
28
  tool_algorithm: |
29
29
  ProteinMPNN is an autoregressive message-passing neural network that designs
30
30
  amino acid sequences conditioned on a protein backbone structure. The model
31
- operates on a k-nearest-neighbor graph constructed from C-alpha coordinates
32
- and iteratively passes messages between nodes (residues) and edges (spatial
33
- neighbors).
31
+ is backbone-only (it never sees side-chains) and operates on a
32
+ k-nearest-neighbor graph over atoms.
34
33
 
35
34
  Key stages:
36
35
  1. **Graph construction**: The input PDB backbone is converted to a
37
- k-nearest-neighbor graph based on C-alpha distances.
38
- 2. **Encoder**: A series of message-passing layers encode structural features
39
- (backbone dihedrals, inter-residue distances, orientations) into node and
40
- edge embeddings.
41
- 3. **Decoder**: An autoregressive decoder samples amino acid identities one
42
- residue at a time, conditioned on the structural encoding and previously
43
- sampled residues.
36
+ k-nearest-neighbor graph (k=48) over Cα atoms; edge features encode
37
+ distances between N, Cα, C, O, and a virtual Cβ.
38
+ 2. **Encoder**: 3 message-passing layers with hidden dim 128 encode
39
+ backbone geometry into node and edge embeddings.
40
+ 3. **Decoder**: 3 message-passing layers autoregressively sample amino-acid
41
+ identities one residue at a time in a random decoding order, conditioned
42
+ on the structural encoding and previously sampled residues.
44
43
  4. **Temperature sampling**: The softmax temperature controls sequence
45
44
  diversity — lower temperatures produce sequences closer to the model's
46
45
  most confident prediction; higher temperatures increase diversity.
47
- 5. **Scoring**: Each designed sequence receives a negative log-likelihood
48
- score (lower is better) and a sequence recovery metric measuring
49
- similarity to the original sequence.
46
+ 5. **Scoring**: Each designed sequence receives an average negative
47
+ log-likelihood score (always ≥ 0; lower is better) and a sequence
48
+ recovery metric measuring similarity to the original sequence.
50
49
 
51
50
  additional_context: |
52
- - The **vanilla** model is trained on the full PDB and is the default choice
53
- for most design tasks.
51
+ - The **vanilla** model is a backbone-atom model (N, Cα, C, O, virtual Cβ)
52
+ trained on PDB biological assemblies as of Aug 2021 (≤3.5 Å resolution,
53
+ <10,000 residues). It is the default choice for most design tasks.
54
54
  - The **soluble** model is trained only on soluble proteins and may produce
55
55
  sequences with better solubility characteristics.
56
- - The **ca_only** model uses only C-alpha coordinates, useful when full
57
- backbone atom positions are unavailable or unreliable.
58
- - Lower sampling temperatures (0.1-0.3) produce more conservative, high-
59
- confidence designs. Higher temperatures (0.5-1.0) increase diversity.
56
+ - The **ca_only** model uses only Cα coordinates, useful when full backbone
57
+ atom positions are unavailable or unreliable.
58
+ - Lower sampling temperatures (upstream recommends 0.10.3) produce more
59
+ conservative, high-confidence designs. Higher temperatures (0.51.0)
60
+ increase diversity but at T=1.0 sequences approach random.
60
61
  - Fixed residues are excluded from the design process entirely — their
61
62
  identity is held constant and used as context for designing other positions.
62
- - Sequence recovery measures the fraction of designed residues that match the
63
- original sequence. High recovery (~0.4-0.6) is typical for well-designed
64
- backbones; very high recovery (>0.8) may indicate the backbone strongly
65
- constrains the sequence space.
63
+ - Sequence recovery measures the fraction of designed residues that match
64
+ the original sequence. The paper reports ~52.4% mean recovery on native
65
+ backbones, so ~0.5 is the typical center for well-folded proteins; very
66
+ high recovery (>0.7) may indicate the backbone strongly constrains the
67
+ sequence space.
66
68
 
67
69
  # ─── Parameters ───
68
70
  # Parameter definitions are canonical here — keep in sync with proteinmpnn.py
@@ -101,9 +103,10 @@ parameters:
101
103
  default: vanilla
102
104
  description: |
103
105
  Model variant to use:
104
- - **vanilla**: Full-atom model trained on the entire PDB (default)
106
+ - **vanilla**: Backbone-atom model (N, Cα, C, O, virtual ) trained on
107
+ PDB biological assemblies as of Aug 2021, ≤3.5 Å (default)
105
108
  - **soluble**: Trained on soluble proteins only
106
- - **ca_only**: Uses only C-alpha coordinates
109
+ - **ca_only**: Uses only Cα coordinates
107
110
 
108
111
  num-sequences:
109
112
  type: integer
@@ -115,11 +118,13 @@ parameters:
115
118
 
116
119
  temperature:
117
120
  type: float
118
- default: 1.0
121
+ default: 0.1
119
122
  range: [0.01, 2.0]
120
123
  description: |
121
- Sampling temperature controlling sequence diversity. Lower values
122
- produce more conservative designs; higher values increase diversity.
124
+ Sampling temperature controlling sequence diversity. Upstream
125
+ ProteinMPNN's default and recommended range is 0.1–0.3; values
126
+ approaching 1.0 produce near-random sequences. Lower = more
127
+ conservative designs; higher = more diverse.
123
128
 
124
129
  seed:
125
130
  type: integer
@@ -169,14 +174,15 @@ output_metrics:
169
174
  best_score:
170
175
  display_name: Best Score
171
176
  description: |
172
- **Best score** is the lowest (most favorable) negative log-likelihood
173
- across all designed sequences. The score measures how well the designed
174
- sequence fits the input backbone according to the ProteinMPNN model.
175
- Lower values indicate higher model confidence that the sequence will
176
- fold into the target structure.
177
+ **Best score** is the lowest (most favorable) average negative
178
+ log-likelihood across all designed sequences. The score measures how
179
+ well the designed sequence fits the input backbone according to the
180
+ ProteinMPNN model. The score is always 0 (since it is a NLL); lower
181
+ values indicate higher model confidence that the sequence will fold
182
+ into the target structure.
177
183
  interpretation: |
178
- - Lower is better there is no fixed scale
179
- - Typical values range from roughly -3.0 to 0.0 depending on structure
184
+ - Lower is better; always positive
185
+ - Typical values per the upstream README examples are ~0.7–1.5
180
186
  - Compare across sequences within the same run for relative ranking
181
187
 
182
188
  mean_score:
@@ -198,8 +204,9 @@ output_metrics:
198
204
  designed positions, averaged across all generated sequences.
199
205
  range: [0, 1]
200
206
  interpretation: |
201
- - 0.3–0.5: Typical for well-folded natural proteins
202
- - >0.5: High recovery — backbone strongly constrains sequence space
207
+ - 0.4–0.55: Typical for well-folded native backbones (the paper reports
208
+ a 52.4% mean recovery)
209
+ - >0.6: High recovery — backbone strongly constrains sequence space
203
210
  - <0.3: Low recovery — backbone may accommodate diverse sequences
204
211
  - Very high recovery (>0.8) may indicate a rigid, highly constrained fold
205
212
 
@@ -47,7 +47,7 @@ tool_algorithm: |
47
47
  - **unconditional**: Base_ckpt.pt
48
48
  - **binder-design**: Complex_base_ckpt.pt (default), Complex_beta_ckpt.pt (diverse topologies)
49
49
  - **binder-redesign**: Base_ckpt.pt
50
- - **motif-scaffolding**: Base_ckpt.pt, ActiveSite_ckpt.pt (small motifs), InpaintSeq_ckpt.pt
50
+ - **motif-scaffolding**: Base_ckpt.pt, ActiveSite_ckpt.pt (small motifs), InpaintSeq_ckpt.pt, InpaintSeq_Fold_ckpt.pt (when both inpaint_seq and fold_conditioning are set)
51
51
  - **partial-diffusion**: Base_ckpt.pt
52
52
  - **custom-contigs**: Any of 8 available checkpoints
53
53
 
@@ -60,9 +60,23 @@ additional_context: |
60
60
  binders but may have lower success rates.
61
61
  - Fold conditioning (--fold-conditioning) biases the diffusion toward specific
62
62
  fold topologies using PDB scaffold templates.
63
- - Noise level for partial-diffusion and binder-redesign controls the degree of
64
- structural diversification: lower noise (1-10) = subtle changes, higher noise
65
- (25-50) = major backbone rearrangement.
63
+ - The `--noise` flag maps directly to RFdiffusion's `diffuser.partial_T`,
64
+ which is a discrete diffusion-timestep count (total `diffuser.T=50`).
65
+ Upstream guidance is `partial_T≈20` as a typical starting point; values
66
+ approaching T mean nearly full re-diffusion. Lower values stay closer to
67
+ the input backbone; higher values introduce more change.
68
+ - **binder-redesign accepts any input numbering.** Partial diffusion internally
69
+ requires each chain numbered contiguously from 1, but binder-redesign now
70
+ renumbers the input for you and restores the original chain IDs and residue
71
+ numbers on the output — so a target keeping its biological numbering (e.g.
72
+ residues 257-364) works as-is, and the output is returned with that numbering.
73
+ - **partial-diffusion still requires contiguous-from-1 numbering.** It maps input
74
+ residue positions directly onto the output, so every chain must be numbered
75
+ contiguously starting at residue 1 with no gaps; a chain starting at, say, 50
76
+ is rejected. Renumber first (e.g. the **PDB cleaner** tool, or `pdbtools`'
77
+ `pdb_reres`). This does not apply to
78
+ binder-design, which generates the binder de novo and references the target
79
+ as an explicit motif, so offset target numbering is fine there.
66
80
 
67
81
  # ─── Parameters ───
68
82
  # Parameter definitions are canonical here — keep in sync with rfdiffusion.py
@@ -85,6 +99,9 @@ parameters:
85
99
  Input PDB file. Required for binder-design, binder-redesign,
86
100
  motif-scaffolding, and partial-diffusion modes. Optional for
87
101
  custom-contigs.
102
+ For partial-diffusion, every chain must be numbered contiguously from
103
+ residue 1 (no gaps). binder-redesign handles any numbering automatically
104
+ (renumbered internally, restored on output) — see additional notes.
88
105
 
89
106
  num-designs:
90
107
  type: integer
@@ -109,8 +126,10 @@ parameters:
109
126
  required: false
110
127
  description: |
111
128
  Symmetry type for unconditional and motif-scaffolding modes.
112
- Supports cyclic (c2, c3, c4...), dihedral (d2, d3...), and
113
- tetrahedral symmetry.
129
+ Upstream RFdiffusion currently implements cyclic (c2, c3, c4),
130
+ dihedral (d2, d3…), and tetrahedral symmetry. The schema validator
131
+ also accepts octahedral and icosahedral but upstream marks these
132
+ as "future".
114
133
  examples:
115
134
  - "c4"
116
135
  - "d2"
@@ -162,29 +181,38 @@ parameters:
162
181
  type: string
163
182
  required: false
164
183
  description: |
165
- Target chain ID for binder-redesign mode. Defaults to 'B'.
184
+ Target chain ID(s) for binder-redesign mode. Defaults to 'B'. Accepts one
185
+ or more chains, comma-separated, to redesign the binder against a
186
+ multi-chain target. Each target chain is kept fixed during redesign.
166
187
  examples:
167
188
  - "B"
189
+ - "A,B"
168
190
 
169
191
  noise:
170
192
  type: integer
171
193
  required: false
172
194
  range: [1, 50]
173
195
  description: |
174
- Noise level for partial-diffusion and binder-redesign modes.
175
- Controls the degree of structural diversification. Lower values
176
- (1-10) produce subtle changes; higher values (25-50) cause
177
- major backbone rearrangement. Required for these modes.
196
+ Partial diffusion timestep count (maps to RFdiffusion's
197
+ `diffuser.partial_T`; total `diffuser.T=50`). Required for
198
+ partial-diffusion and binder-redesign modes. Upstream guidance is
199
+ `partial_T≈20` as a typical starting point — lower values stay
200
+ closer to the input backbone; values approaching 50 mean nearly
201
+ full re-diffusion.
178
202
 
179
203
  preserve:
180
204
  type: string
181
205
  required: false
182
206
  description: |
183
- Residues to preserve during partial-diffusion and binder-redesign.
184
- Comma-separated list of residue identifiers or ranges.
207
+ Residues whose sequence is preserved during partial-diffusion and
208
+ binder-redesign. Comma-separated list of residues or ranges, given as
209
+ PDB residue numbers. A chain-prefixed range (e.g. 'X10-25') targets that
210
+ chain; a bare range (e.g. '10-25') applies to the binder chain in
211
+ binder-redesign, or the first chain in partial-diffusion. These are
212
+ mapped internally to the contig positions RFdiffusion expects.
185
213
  examples:
186
- - "10,20,30"
187
- - "A10-25"
214
+ - "10-25"
215
+ - "X10-25,X40-50"
188
216
 
189
217
  contigs:
190
218
  type: string
@@ -259,6 +287,8 @@ examples:
259
287
  command: amina run rfdiffusion -m binder-design -i target.pdb --hotspots A30,A33 --binder-length 80-120 -o ./out/
260
288
  - title: Binder redesign
261
289
  command: amina run rfdiffusion -m binder-redesign -i complex.pdb --noise 25 --binder-chain A --target-chain B -o ./out/
290
+ - title: Binder redesign against a multi-chain target, preserving part of the binder
291
+ command: amina run rfdiffusion -m binder-redesign -i complex.pdb --noise 25 --binder-chain X --target-chain A,B --preserve X1-30 -o ./out/
262
292
  - title: Motif scaffolding
263
293
  command: amina run rfdiffusion -m motif-scaffolding -i motif.pdb --contigs "10-40/A163-181/10-40" -o ./out/
264
294
  - title: Partial diffusion
@@ -268,5 +298,7 @@ examples:
268
298
 
269
299
  # ─── References ───
270
300
  references:
271
- - title: "RFDiffusion: De novo protein design by deep network hallucination (Watson et al., 2023)"
301
+ - title: "De novo design of protein structure and function with RFdiffusion (Watson et al., Nature 620, 1089–1100, 2023)"
302
+ url: "https://doi.org/10.1038/s41586-023-06415-8"
303
+ - title: "RFdiffusion GitHub source (RosettaCommons)"
272
304
  url: "https://github.com/RosettaCommons/RFdiffusion"
@@ -52,9 +52,9 @@ def register(app: typer.Typer):
52
52
  help="Number of sequences to generate (1-50)",
53
53
  ),
54
54
  temperature: float = typer.Option(
55
- 1.0,
55
+ 0.1,
56
56
  "--temperature",
57
- help="Sampling temperature (0.01-2.0)",
57
+ help="Sampling temperature (0.01-2.0; upstream default 0.1, recommended 0.1-0.3)",
58
58
  ),
59
59
  seed: int = typer.Option(
60
60
  37,
@@ -118,7 +118,7 @@ def register(app: typer.Typer):
118
118
  None,
119
119
  "--target-chain",
120
120
  "-tc",
121
- help="Target chain ID (binder-redesign)",
121
+ help="Target chain ID(s) (binder-redesign). Comma-separate for multiple, e.g. 'A,B'",
122
122
  ),
123
123
  # Partial diffusion / redesign options
124
124
  noise_level: Optional[int] = typer.Option(
@@ -222,7 +222,8 @@ def register(app: typer.Typer):
222
222
  raise typer.Exit(1)
223
223
  params["pdb_content"] = pdb_content
224
224
  params["binder_chain"] = binder_chain or "A"
225
- params["target_chain"] = target_chain or "B"
225
+ # Accept one or more target chains, comma-separated (e.g. "A,B").
226
+ params["target_chain"] = [c.strip() for c in target_chain.split(",")] if target_chain else ["B"]
226
227
  params["noise_level"] = noise_level
227
228
  if preserve:
228
229
  params["preserve_sequences"] = [p.strip() for p in preserve.split(",")]
@@ -287,7 +287,7 @@ def register(app: typer.Typer):
287
287
  "recycling_steps": recycling_steps,
288
288
  "sampling_steps": sampling_steps,
289
289
  "diffusion_samples": diffusion_samples,
290
- "step_scale": 1.638,
290
+ "step_scale": 1.5,
291
291
  "use_potentials": False,
292
292
  "enable_affinity_prediction": enable_affinity,
293
293
  }
@@ -19,7 +19,7 @@ when_to_use: |
19
19
  - Multi-chain protein complex modeling
20
20
  - Protein-nucleic acid (DNA/RNA) complex structures
21
21
  - When you need confidence metrics for predicted interfaces (ipTM, ipSAE)
22
- - When binding affinity prediction (pKd) is needed alongside structure
22
+ - When binding affinity prediction (log10 IC50) is needed alongside structure
23
23
 
24
24
  when_not_to_use: |
25
25
  - Single-chain protein folding only → use **ESMFold** (faster, no MSA needed)
@@ -38,7 +38,7 @@ tool_algorithm: |
38
38
  1. **Input processing**: Sequences are tokenized; MSAs are generated or loaded;
39
39
  ligands are parsed from SMILES/CCD.
40
40
  2. **Trunk module**: Processes pairwise and MSA representations through
41
- Evoformer-like attention blocks.
41
+ PairFormer attention blocks (PairformerArgsV2, 64 blocks).
42
42
  3. **Diffusion sampling**: Starting from noise, atom positions are refined
43
43
  over N denoising steps (default 200). More steps = finer detail.
44
44
  4. **Recycling**: The predicted structure is fed back through the trunk for
@@ -46,8 +46,9 @@ tool_algorithm: |
46
46
  5. **Confidence estimation**: PAE (Predicted Aligned Error), pLDDT, pTM, and
47
47
  ipTM are computed from internal representations. Additional metrics (ipSAE,
48
48
  LIS, pDockQ) are derived from the PAE matrix.
49
- 6. **Affinity prediction** (optional): A separate head predicts binding
50
- affinity (pKd) for protein-ligand complexes.
49
+ 6. **Affinity prediction** (optional): A separate PairFormer-based head
50
+ predicts a binary binding likelihood and an IC50-like affinity value
51
+ (log10 IC50 in μM) for protein–ligand complexes.
51
52
 
52
53
  additional_context: |
53
54
  - MSA generation adds ~60s to runtime for sequences without precomputed
@@ -157,8 +158,8 @@ parameters:
157
158
  type: boolean
158
159
  default: false
159
160
  description: |
160
- Enable binding affinity prediction (pKd). Only works for
161
- protein-ligand complexes — requires at least one `--ligand`.
161
+ Enable binding affinity prediction (log10 IC50 in μM). Only works
162
+ for protein-ligand complexes — requires at least one `--ligand`.
162
163
 
163
164
  job-name:
164
165
  type: string
@@ -199,9 +200,9 @@ output_metrics:
199
200
  ptm:
200
201
  display_name: pTM
201
202
  description: |
202
- **pTM** (predicted TM-score) estimates the global fold quality of the
203
- entire predicted structure. Derived from the PAE matrix, measuring
204
- overall structural similarity to the ground truth.
203
+ **pTM** (predicted TM-score) is the model's estimate of the TM-score its
204
+ structure would achieve against ground truth, using only the model's own
205
+ PAE matrix (no ground-truth structure is available at inference).
205
206
  range: [0, 1]
206
207
  interpretation: |
207
208
  - >0.8: High-quality global fold prediction
@@ -224,8 +225,9 @@ output_metrics:
224
225
  confidence_score:
225
226
  display_name: Confidence Score
226
227
  description: |
227
- Combined confidence metric computed as `0.2 * ipTM + 0.8 * pTM`.
228
- Gives a single overall quality score weighted toward global fold quality.
228
+ Aggregated ranking score used by Boltz-2 to order samples. Computed as
229
+ `0.8 * complex_plddt + 0.2 * ipTM` (Boltz-2 falls back to `0.8 * pLDDT
230
+ + 0.2 * pTM` when there are no interfaces). Weighted toward pLDDT.
229
231
  range: [0, 1]
230
232
  interpretation: |
231
233
  - >0.8: High overall prediction confidence
@@ -274,44 +276,52 @@ output_metrics:
274
276
  lis:
275
277
  display_name: LIS
276
278
  description: |
277
- **LIS** (Local Interface Score) evaluates interface quality using
278
- local PAE values between interface residue pairs. Focuses on nearby
279
- contacts rather than global alignment.
279
+ **LIS** (Local Interaction Score) evaluates protein-protein interaction
280
+ quality from inverted PAE values within the local interaction area
281
+ (residue pairs with PAE ≤ 12 Å). Introduced by Kim et al. 2024
282
+ (bioRxiv 2024.02.19.580970).
280
283
  range: [0, 1]
281
284
  interpretation: |
282
- - >0.5: Good interface contacts predicted
283
- - <0.5: Weak interface prediction
285
+ - Higher is better; AFM-LIS benchmarks Best LIS ≥ 0.203 / Average LIS
286
+ 0.073 as positive-call thresholds.
284
287
 
285
288
  pdockq:
286
289
  display_name: pDockQ
287
290
  description: |
288
291
  **pDockQ** (predicted DockQ) estimates the quality of a predicted
289
292
  protein-protein docked complex, calibrated against the DockQ benchmark.
290
- Computed from the number of interface contacts and average interface pLDDT.
293
+ Bryant et al. 2022 define it as a sigmoidal fit on `average interface
294
+ pLDDT × log(interface contacts)`.
291
295
  range: [0, 1]
292
296
  interpretation: |
293
- - >0.5: Acceptable docking quality
294
- - >0.23: Possible interaction
295
- - <0.23: Unlikely to be a correct docked pose
297
+ - >0.5: confidently predicted interface
298
+ - 0.23–0.5: borderline (pDockQ is noisy in this range; 0.23 is the
299
+ DockQ-acceptable calibration target, not a pDockQ cutoff)
300
+ - <0.23: unlikely interaction
296
301
 
297
302
  pdockq2:
298
303
  display_name: pDockQ2
299
304
  description: |
300
- **pDockQ2** is an improved version of pDockQ that uses a multi-model
301
- scoring approach for better discrimination of correct interfaces.
305
+ **pDockQ2** augments pDockQ by incorporating the predicted aligned
306
+ error (PAE) between chains, giving better calibration on multimers
307
+ and homomers where pDockQ can over-score large but incorrect
308
+ interfaces (Zhu et al. 2023, Bioinformatics btad424).
302
309
  range: [0, 1]
303
310
  interpretation: |
304
- - Higher is better; interpretation similar to pDockQ
311
+ - Higher is better; interpretation similar to pDockQ.
305
312
 
306
313
  affinity_pred_value:
307
- display_name: Predicted pKd
314
+ display_name: log10(IC50, μM)
308
315
  description: |
309
- Predicted binding affinity as pKd (negative log of dissociation constant).
310
- Only available when `--affinity` is enabled for protein-ligand complexes.
316
+ Predicted binding affinity as `log10(IC50)` with IC50 in μM. Lower
317
+ values indicate stronger binding (e.g., −3 1 nM, 0 ≈ 1 μM, 2 ≈
318
+ 100 μM weak/decoy). Only available when `--affinity` is enabled for
319
+ protein–ligand complexes.
311
320
  interpretation: |
312
- - Higher pKd = tighter binding (e.g., pKd 9 = 1 nM Kd)
313
- - Typical drug-like: pKd 610
314
- - Weak binder: pKd < 5
321
+ - Lower = tighter binding
322
+ - −3 to −1: strong binder (≈ 1 nM 100 nM)
323
+ - −1 to 1: moderate (≈ 100 nM – 10 μM)
324
+ - >1: weak / non-binder
315
325
 
316
326
  affinity_probability_binary:
317
327
  display_name: Binding Probability
@@ -378,7 +388,7 @@ output_display:
378
388
  - title: Binding Affinity Prediction
379
389
  fields:
380
390
  - key: affinity_pred_value
381
- label: Predicted pKd
391
+ label: log10(IC50, μM)
382
392
  - key: affinity_probability_binary
383
393
  label: Binding Probability
384
394
 
@@ -397,5 +407,15 @@ examples:
397
407
 
398
408
  # ─── References ───
399
409
  references:
400
- - title: "Boltz-2: Biomolecular Interaction Modeling"
410
+ - title: "Boltz-2: Jointly Modeling Structure and Binding Affinity (Passaro et al., 2025)"
411
+ url: "https://doi.org/10.1101/2025.06.14.659707"
412
+ - title: "Boltz GitHub source"
401
413
  url: "https://github.com/jwohlwend/boltz"
414
+ - title: "ipSAE — Interaction prediction Score from Aligned Errors (Dunbrack 2025)"
415
+ url: "https://www.biorxiv.org/content/10.1101/2025.02.10.637595v1"
416
+ - title: "LIS — Local Interaction Score (Kim et al. 2024)"
417
+ url: "https://www.biorxiv.org/content/10.1101/2024.02.19.580970v1"
418
+ - title: "pDockQ (Bryant et al., Nat Commun 2022)"
419
+ url: "https://www.nature.com/articles/s41467-022-28865-w"
420
+ - title: "pDockQ2 (Zhu et al., Bioinformatics 2023, btad424)"
421
+ url: "https://academic.oup.com/bioinformatics/article/39/7/btad424/7219714"