boltz-vsynthes 1.0.6__tar.gz → 1.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. {boltz_vsynthes-1.0.6/src/boltz_vsynthes.egg-info → boltz_vsynthes-1.0.8}/PKG-INFO +1 -1
  2. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/pyproject.toml +1 -1
  3. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/mol.py +4 -0
  4. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/parse/schema.py +91 -265
  5. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/main.py +78 -61
  6. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8/src/boltz_vsynthes.egg-info}/PKG-INFO +1 -1
  7. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/LICENSE +0 -0
  8. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/README.md +0 -0
  9. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/setup.cfg +0 -0
  10. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/__init__.py +0 -0
  11. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/__init__.py +0 -0
  12. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/const.py +0 -0
  13. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/crop/__init__.py +0 -0
  14. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/crop/affinity.py +0 -0
  15. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/crop/boltz.py +0 -0
  16. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/crop/cropper.py +0 -0
  17. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/feature/__init__.py +0 -0
  18. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/feature/featurizer.py +0 -0
  19. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/feature/featurizerv2.py +0 -0
  20. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/feature/symmetry.py +0 -0
  21. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/__init__.py +0 -0
  22. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/dynamic/__init__.py +0 -0
  23. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/dynamic/date.py +0 -0
  24. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/dynamic/filter.py +0 -0
  25. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/dynamic/max_residues.py +0 -0
  26. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/dynamic/resolution.py +0 -0
  27. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/dynamic/size.py +0 -0
  28. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/dynamic/subset.py +0 -0
  29. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/static/__init__.py +0 -0
  30. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/static/filter.py +0 -0
  31. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/static/ligand.py +0 -0
  32. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/filter/static/polymer.py +0 -0
  33. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/module/__init__.py +0 -0
  34. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/module/inference.py +0 -0
  35. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/module/inferencev2.py +0 -0
  36. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/module/training.py +0 -0
  37. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/module/trainingv2.py +0 -0
  38. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/msa/__init__.py +0 -0
  39. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/msa/mmseqs2.py +0 -0
  40. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/pad.py +0 -0
  41. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/parse/__init__.py +0 -0
  42. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/parse/a3m.py +0 -0
  43. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/parse/csv.py +0 -0
  44. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/parse/fasta.py +0 -0
  45. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/parse/mmcif.py +0 -0
  46. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/parse/mmcif_with_constraints.py +0 -0
  47. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/parse/yaml.py +0 -0
  48. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/sample/__init__.py +0 -0
  49. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/sample/cluster.py +0 -0
  50. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/sample/distillation.py +0 -0
  51. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/sample/random.py +0 -0
  52. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/sample/sampler.py +0 -0
  53. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/tokenize/__init__.py +0 -0
  54. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/tokenize/boltz.py +0 -0
  55. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/tokenize/boltz2.py +0 -0
  56. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/tokenize/tokenizer.py +0 -0
  57. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/types.py +0 -0
  58. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/write/__init__.py +0 -0
  59. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/write/mmcif.py +0 -0
  60. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/write/pdb.py +0 -0
  61. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/write/utils.py +0 -0
  62. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/data/write/writer.py +0 -0
  63. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/__init__.py +0 -0
  64. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/__init__.py +0 -0
  65. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/attention.py +0 -0
  66. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/attentionv2.py +0 -0
  67. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/confidence_utils.py +0 -0
  68. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/dropout.py +0 -0
  69. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/initialize.py +0 -0
  70. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/outer_product_mean.py +0 -0
  71. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/pair_averaging.py +0 -0
  72. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/pairformer.py +0 -0
  73. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/relative.py +0 -0
  74. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/transition.py +0 -0
  75. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/triangular_attention/__init__.py +0 -0
  76. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/triangular_attention/attention.py +0 -0
  77. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/triangular_attention/primitives.py +0 -0
  78. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/triangular_attention/utils.py +0 -0
  79. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/layers/triangular_mult.py +0 -0
  80. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/loss/__init__.py +0 -0
  81. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/loss/bfactor.py +0 -0
  82. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/loss/confidence.py +0 -0
  83. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/loss/confidencev2.py +0 -0
  84. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/loss/diffusion.py +0 -0
  85. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/loss/diffusionv2.py +0 -0
  86. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/loss/distogram.py +0 -0
  87. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/loss/distogramv2.py +0 -0
  88. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/loss/validation.py +0 -0
  89. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/models/__init__.py +0 -0
  90. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/models/boltz1.py +0 -0
  91. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/models/boltz2.py +0 -0
  92. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/__init__.py +0 -0
  93. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/affinity.py +0 -0
  94. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/confidence.py +0 -0
  95. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/confidence_utils.py +0 -0
  96. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/confidencev2.py +0 -0
  97. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/diffusion.py +0 -0
  98. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/diffusion_conditioning.py +0 -0
  99. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/diffusionv2.py +0 -0
  100. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/encoders.py +0 -0
  101. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/encodersv2.py +0 -0
  102. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/transformers.py +0 -0
  103. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/transformersv2.py +0 -0
  104. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/trunk.py +0 -0
  105. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/trunkv2.py +0 -0
  106. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/modules/utils.py +0 -0
  107. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/optim/__init__.py +0 -0
  108. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/optim/ema.py +0 -0
  109. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/optim/scheduler.py +0 -0
  110. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/potentials/__init__.py +0 -0
  111. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/potentials/potentials.py +0 -0
  112. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz/model/potentials/schedules.py +0 -0
  113. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz_vsynthes.egg-info/SOURCES.txt +0 -0
  114. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz_vsynthes.egg-info/dependency_links.txt +0 -0
  115. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz_vsynthes.egg-info/entry_points.txt +0 -0
  116. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz_vsynthes.egg-info/requires.txt +0 -0
  117. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/src/boltz_vsynthes.egg-info/top_level.txt +0 -0
  118. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/tests/test_kernels.py +0 -0
  119. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/tests/test_regression.py +0 -0
  120. {boltz_vsynthes-1.0.6 → boltz_vsynthes-1.0.8}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boltz-vsynthes
3
- Version: 1.0.6
3
+ Version: 1.0.8
4
4
  Summary: Boltz for V-Synthes
5
5
  Requires-Python: <3.13,>=3.10
6
6
  Description-Content-Type: text/markdown
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "boltz-vsynthes"
7
- version = "1.0.6"
7
+ version = "1.0.8"
8
8
  requires-python = ">=3.10,<3.13"
9
9
  description = "Boltz for V-Synthes"
10
10
  readme = "README.md"
@@ -30,6 +30,10 @@ def load_molecules(moldir: str, molecules: list[str]) -> dict[str, Mol]:
30
30
  """
31
31
  loaded_mols = {}
32
32
  for molecule in molecules:
33
+ # Skip if it's a SMILES string (starts with LIG)
34
+ if molecule.startswith("LIG"):
35
+ continue
36
+
33
37
  path = Path(moldir) / f"{molecule}.pkl"
34
38
  if not path.exists():
35
39
  msg = f"CCD component {molecule} not found!"
@@ -621,6 +621,9 @@ def get_mol(ccd: str, mols: dict, moldir: str) -> Mol:
621
621
  Return mol with ccd from mols if it is in mols. Otherwise load it from moldir,
622
622
  add it to mols, and return the mol.
623
623
  """
624
+ # Skip if it's a SMILES string (starts with LIG)
625
+ if ccd.startswith("LIG"):
626
+ return None
624
627
  mol = mols.get(ccd)
625
628
  if mol is None:
626
629
  mol = load_molecules(moldir, [ccd])[ccd]
@@ -655,6 +658,10 @@ def parse_ccd_residue(
655
658
  The output ParsedResidue, if successful.
656
659
 
657
660
  """
661
+ # Skip if it's a SMILES string (starts with LIG)
662
+ if name.startswith("LIG"):
663
+ return None
664
+
658
665
  unk_chirality = const.chirality_type_ids[const.unk_chirality_type]
659
666
 
660
667
  # Check if this is a single heavy atom CCD residue
@@ -929,86 +936,32 @@ def token_spec_to_ids(
929
936
  contacts.append((chain_to_idx[chain_name], residue_index_or_atom_name - 1))
930
937
 
931
938
 
932
- def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
933
- name: str,
934
- schema: dict,
935
- ccd: Mapping[str, Mol],
936
- mol_dir: Optional[Path] = None,
937
- boltz_2: bool = False,
938
- ) -> Target:
939
- """Parse a Boltz input yaml / json.
940
-
941
- The input file should be a dictionary with the following format:
942
-
943
- version: 1
944
- sequences:
945
- - protein:
946
- id: A
947
- sequence: "MADQLTEEQIAEFKEAFSLF" # or pdb: "1a2k" or pdb: "path/to/file.pdb"
948
- msa: path/to/msa1.a3m
949
- - protein:
950
- id: [B, C]
951
- sequence: "AKLSILPWGHC"
952
- msa: path/to/msa2.a3m
953
- - rna:
954
- id: D
955
- sequence: "GCAUAGC"
956
- - ligand:
957
- id: E
958
- smiles: "CC1=CC=CC=C1"
959
- constraints:
960
- - bond:
961
- atom1: [A, 1, CA]
962
- atom2: [A, 2, N]
963
- - pocket:
964
- binder: E
965
- contacts: [[B, 1], [B, 2]]
966
- max_distance: 6
967
- - contact:
968
- token1: [A, 1]
969
- token2: [B, 1]
970
- max_distance: 6
971
- templates:
972
- - cif: path/to/template.cif
973
- properties:
974
- - affinity:
975
- binder: E
939
+ def parse_boltz_schema(schema: dict) -> dict:
940
+ """Parse the Boltz input schema.
976
941
 
977
942
  Parameters
978
943
  ----------
979
- name : str
980
- A name for the input.
981
944
  schema : dict
982
945
  The input schema.
983
- components : dict
984
- Dictionary of CCD components.
985
- mol_dir: Path
986
- Path to the directory containing the molecules.
987
- boltz2: bool
988
- Whether to parse the input for Boltz2.
989
946
 
990
947
  Returns
991
948
  -------
992
- Target
993
- The parsed target.
949
+ dict
950
+ The parsed schema.
994
951
 
995
952
  """
996
- # Assert version 1
997
- version = schema.get("version", 1)
998
- if version != 1:
999
- msg = f"Invalid version {version} in input!"
953
+ # Check version
954
+ if "version" not in schema:
955
+ msg = "Schema must have a version field"
1000
956
  raise ValueError(msg)
1001
957
 
1002
- # Disable rdkit warnings
1003
- blocker = rdBase.BlockLogs() # noqa: F841
1004
-
1005
- # First group items that have the same type, sequence and modifications
958
+ # Group items by entity type and sequence
1006
959
  items_to_group = {}
1007
960
  chain_name_to_entity_type = {}
1008
-
961
+
1009
962
  # Keep track of ligand IDs
1010
963
  ligand_id = 1
1011
- ligand_id_map = {} # Maps user-provided IDs to internal LIG1, LIG2, etc.
964
+ ligand_id_map = {}
1012
965
 
1013
966
  # Parse sequences
1014
967
  for item in schema["sequences"]:
@@ -1057,16 +1010,37 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
1057
1010
  msg = "Protein must have either 'sequence' or 'pdb' field"
1058
1011
  raise ValueError(msg)
1059
1012
  elif entity_type == "ligand":
1060
- assert "smiles" in item[entity_type] or "ccd" in item[entity_type]
1061
- assert "smiles" not in item[entity_type] or "ccd" not in item[entity_type]
1013
+ # Support for SMILES, CCD, and SDF
1062
1014
  if "smiles" in item[entity_type]:
1063
1015
  seq = str(item[entity_type]["smiles"])
1064
1016
  # Map user-provided ID to internal LIG1, LIG2, etc.
1065
1017
  for id in entity_id:
1066
1018
  ligand_id_map[id] = f"LIG{ligand_id}"
1067
1019
  ligand_id += 1
1068
- else:
1020
+ elif "ccd" in item[entity_type]:
1069
1021
  seq = str(item[entity_type]["ccd"])
1022
+ # For CCD ligands, use the CCD code as the internal ID
1023
+ for id in entity_id:
1024
+ ligand_id_map[id] = seq
1025
+ elif "sdf" in item[entity_type]:
1026
+ sdf_path = Path(item[entity_type]["sdf"])
1027
+ if not sdf_path.exists():
1028
+ msg = f"SDF file not found: {sdf_path}"
1029
+ raise FileNotFoundError(msg)
1030
+ # Read SDF and convert to SMILES
1031
+ from rdkit import Chem
1032
+ mol = Chem.SDMolSupplier(str(sdf_path))[0]
1033
+ if mol is None:
1034
+ msg = f"Failed to read SDF file: {sdf_path}"
1035
+ raise ValueError(msg)
1036
+ seq = Chem.MolToSmiles(mol)
1037
+ # Map user-provided ID to internal LIG1, LIG2, etc.
1038
+ for id in entity_id:
1039
+ ligand_id_map[id] = f"LIG{ligand_id}"
1040
+ ligand_id += 1
1041
+ else:
1042
+ msg = "Ligand must have either 'smiles', 'ccd', or 'sdf' field"
1043
+ raise ValueError(msg)
1070
1044
 
1071
1045
  # Group items by entity
1072
1046
  items_to_group.setdefault((entity_type, seq), []).append(item)
@@ -1077,208 +1051,60 @@ def parse_boltz_schema( # noqa: C901, PLR0915, PLR0912
1077
1051
  for chain_name in chain_names:
1078
1052
  chain_name_to_entity_type[chain_name] = entity_type
1079
1053
 
1080
- # Check if any affinity ligand is present
1081
- affinity_ligands = set()
1082
- properties = schema.get("properties", [])
1083
- for prop in properties:
1084
- if "affinity" in prop:
1085
- binder = prop["affinity"]["binder"]
1086
- if binder in ligand_id_map:
1087
- binder = ligand_id_map[binder] # Convert to internal LIG1, LIG2, etc.
1088
- affinity_ligands.add(binder)
1089
-
1090
- # Parse each group
1091
- chains = []
1092
- extra_mols = {}
1093
- for (entity_type, seq), items in items_to_group.items():
1094
- # Get entity id
1095
- entity_id = items[0][entity_type]["id"]
1054
+ # Get all proteins and ligands
1055
+ proteins = []
1056
+ ligands = []
1057
+ for item in schema["sequences"]:
1058
+ entity_type = list(item.keys())[0]
1059
+ entity_id = item[entity_type]["id"]
1096
1060
  entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
1097
-
1098
- # Check if this entity has affinity
1099
- affinity = any(entity in affinity_ligands for entity in entity_id)
1100
-
1101
- # Parse a protein
1102
1061
  if entity_type == "protein":
1103
- # Get MSA
1104
- msa = items[0][entity_type].get("msa")
1105
- if msa is not None:
1106
- msa = Path(msa)
1107
- if not msa.exists():
1108
- msg = f"MSA file not found: {msa}"
1109
- raise FileNotFoundError(msg)
1110
- with msa.open("r") as f:
1111
- msa_data = f.read()
1112
- else:
1113
- msa_data = None
1114
-
1115
- # Parse sequence
1116
- residues = []
1117
- for res_idx, code in enumerate(seq):
1118
- # Get mol
1119
- ref_mol = get_mol(code, ccd, mol_dir)
1120
-
1121
- # Parse residue
1122
- residue = parse_ccd_residue(
1123
- name=code,
1124
- ref_mol=ref_mol,
1125
- res_idx=res_idx,
1126
- )
1127
- residues.append(residue)
1128
-
1129
- # Create protein chain
1130
- parsed_chain = ParsedChain(
1131
- entity=entity_id,
1132
- residues=residues,
1133
- type=const.chain_type_ids["PROTEIN"],
1134
- cyclic_period=0,
1135
- sequence=seq,
1136
- affinity=affinity,
1137
- affinity_mw=None,
1138
- )
1139
-
1140
- # Parse a non-polymer
1141
- elif (entity_type == "ligand") and "ccd" in (items[0][entity_type]):
1142
- seq = items[0][entity_type]["ccd"]
1143
-
1144
- if isinstance(seq, str):
1145
- seq = [seq]
1146
-
1147
- if affinity and len(seq) > 1:
1148
- msg = "Cannot compute affinity for multi residue ligands!"
1149
- raise ValueError(msg)
1150
-
1151
- residues = []
1152
- affinity_mw = None
1153
- for res_idx, code in enumerate(seq):
1154
- # Get mol
1155
- ref_mol = get_mol(code, ccd, mol_dir)
1156
-
1157
- if affinity:
1158
- affinity_mw = AllChem.Descriptors.MolWt(ref_mol)
1159
-
1160
- # Parse residue
1161
- residue = parse_ccd_residue(
1162
- name=code,
1163
- ref_mol=ref_mol,
1164
- res_idx=res_idx,
1165
- )
1166
- residues.append(residue)
1167
-
1168
- # Create multi ligand chain
1169
- parsed_chain = ParsedChain(
1170
- entity=entity_id,
1171
- residues=residues,
1172
- type=const.chain_type_ids["NONPOLYMER"],
1173
- cyclic_period=0,
1174
- sequence=None,
1175
- affinity=affinity,
1176
- affinity_mw=affinity_mw,
1177
- )
1178
-
1179
- assert not items[0][entity_type].get(
1180
- "cyclic", False
1181
- ), "Cyclic flag is not supported for ligands"
1182
-
1183
- elif (entity_type == "ligand") and ("smiles" in items[0][entity_type]):
1184
- seq = items[0][entity_type]["smiles"]
1185
-
1186
- if affinity:
1187
- seq = standardize(seq)
1188
-
1189
- mol = AllChem.MolFromSmiles(seq)
1190
- mol = AllChem.AddHs(mol)
1062
+ proteins.extend(entity_id)
1063
+ elif entity_type == "ligand":
1064
+ ligands.extend(entity_id)
1191
1065
 
1192
- # Set atom names
1193
- canonical_order = AllChem.CanonicalRankAtoms(mol)
1194
- for atom, can_idx in zip(mol.GetAtoms(), canonical_order):
1195
- atom_name = atom.GetSymbol().upper() + str(can_idx + 1)
1196
- if len(atom_name) > 4:
1197
- msg = (
1198
- f"{seq} has an atom with a name longer than "
1199
- f"4 characters: {atom_name}."
1200
- )
1066
+ # Generate properties for each protein-ligand pair
1067
+ new_properties = []
1068
+ for prop in schema.get("properties", []):
1069
+ if "affinity" in prop:
1070
+ affinity = prop["affinity"]
1071
+ # Handle protein as binder
1072
+ if "protein" in affinity:
1073
+ binder = affinity["protein"]
1074
+ if binder not in proteins:
1075
+ msg = f"Protein {binder} not found in sequences"
1201
1076
  raise ValueError(msg)
1202
- atom.SetProp("name", atom_name)
1203
-
1204
- success = compute_3d_conformer(mol)
1205
- if not success:
1206
- msg = f"Failed to compute 3D conformer for {seq}"
1207
- raise ValueError(msg)
1208
-
1209
- mol_no_h = AllChem.RemoveHs(mol, sanitize=False)
1210
- affinity_mw = AllChem.Descriptors.MolWt(mol_no_h) if affinity else None
1211
-
1212
- # Use the mapped internal ID (LIG1, LIG2, etc.)
1213
- internal_id = ligand_id_map[entity_id[0]]
1214
- extra_mols[internal_id] = mol_no_h
1215
- residue = parse_ccd_residue(
1216
- name=internal_id,
1217
- ref_mol=mol,
1218
- res_idx=0,
1219
- )
1220
-
1221
- parsed_chain = ParsedChain(
1222
- entity=entity_id,
1223
- residues=[residue],
1224
- type=const.chain_type_ids["NONPOLYMER"],
1225
- cyclic_period=0,
1226
- sequence=None,
1227
- affinity=affinity,
1228
- affinity_mw=affinity_mw,
1229
- )
1230
-
1231
- assert not items[0][entity_type].get(
1232
- "cyclic", False
1233
- ), "Cyclic flag is not supported for ligands"
1234
-
1235
- else:
1236
- msg = f"Invalid entity type: {entity_type}"
1237
- raise ValueError(msg)
1238
-
1239
- chains.append(parsed_chain)
1240
-
1241
- # Parse constraints
1242
- constraints = []
1243
- for constraint in schema.get("constraints", []):
1244
- if "bond" in constraint:
1245
- atom1 = constraint["bond"]["atom1"]
1246
- atom2 = constraint["bond"]["atom2"]
1247
- constraints.append(ParsedBond(atom1, atom2))
1248
- elif "pocket" in constraint:
1249
- binder = constraint["pocket"]["binder"]
1250
- if binder in ligand_id_map:
1251
- binder = ligand_id_map[binder] # Convert to internal LIG1, LIG2, etc.
1252
- contacts = constraint["pocket"]["contacts"]
1253
- max_distance = constraint["pocket"].get("max_distance", 6.0)
1254
- constraints.append(ParsedPocket(binder, contacts, max_distance))
1255
- elif "contact" in constraint:
1256
- token1 = constraint["contact"]["token1"]
1257
- token2 = constraint["contact"]["token2"]
1258
- max_distance = constraint["contact"].get("max_distance", 6.0)
1259
- constraints.append(ParsedContact(token1, token2, max_distance))
1260
- else:
1261
- msg = f"Invalid constraint type: {list(constraint.keys())[0]}"
1262
- raise ValueError(msg)
1263
-
1264
- # Parse templates
1265
- templates = []
1266
- for template in schema.get("templates", []):
1267
- cif = template["cif"]
1268
- chain_id = template.get("chain_id")
1269
- template_id = template.get("template_id")
1270
- templates.append(ParsedTemplate(cif, chain_id, template_id))
1271
-
1272
- # Create target
1273
- target = Target(
1274
- name=name,
1275
- chains=chains,
1276
- constraints=constraints,
1277
- templates=templates,
1278
- extra_mols=extra_mols,
1279
- )
1280
-
1281
- return target
1077
+ # Generate pairs with all ligands
1078
+ for ligand in ligands:
1079
+ if ligand in ligand_id_map:
1080
+ ligand = ligand_id_map[ligand] # Convert to internal LIG1, LIG2, etc.
1081
+ new_properties.append({
1082
+ "affinity": {
1083
+ "binder": binder,
1084
+ "ligand": ligand
1085
+ }
1086
+ })
1087
+ # Handle ligand as binder (backward compatibility)
1088
+ elif "binder" in affinity:
1089
+ binder = affinity["binder"]
1090
+ if binder not in proteins:
1091
+ msg = f"Protein {binder} not found in sequences"
1092
+ raise ValueError(msg)
1093
+ # Generate pairs with all ligands
1094
+ for ligand in ligands:
1095
+ if ligand in ligand_id_map:
1096
+ ligand = ligand_id_map[ligand] # Convert to internal LIG1, LIG2, etc.
1097
+ new_properties.append({
1098
+ "affinity": {
1099
+ "binder": binder,
1100
+ "ligand": ligand
1101
+ }
1102
+ })
1103
+
1104
+ # Update schema with generated properties
1105
+ schema["properties"] = new_properties
1106
+
1107
+ return schema
1282
1108
 
1283
1109
 
1284
1110
  def standardize(smiles: str) -> Optional[str]:
@@ -1235,72 +1235,89 @@ def predict( # noqa: C901, PLR0915, PLR0912
1235
1235
  # Print header
1236
1236
  click.echo("\nPredicting property: affinity\n")
1237
1237
 
1238
- # Validate inputs
1239
- manifest_filtered = filter_inputs_affinity(
1240
- manifest=manifest,
1241
- outdir=out_dir,
1242
- override=override,
1243
- )
1244
- if not manifest_filtered.records:
1245
- click.echo("Found existing affinity predictions for all inputs, skipping.")
1246
- return
1247
-
1248
- msg = f"Running affinity prediction for {len(manifest_filtered.records)} input"
1249
- msg += "s." if len(manifest_filtered.records) > 1 else "."
1250
- click.echo(msg)
1251
-
1252
- pred_writer = BoltzAffinityWriter(
1253
- data_dir=processed.targets_dir,
1254
- output_dir=out_dir / "predictions",
1255
- )
1238
+ # Group records by protein-ligand pairs
1239
+ affinity_groups = {}
1240
+ for record in manifest.records:
1241
+ if record.affinity:
1242
+ key = (record.affinity["binder"], record.affinity["ligand"])
1243
+ if key not in affinity_groups:
1244
+ affinity_groups[key] = []
1245
+ affinity_groups[key].append(record)
1246
+
1247
+ # Process each protein-ligand pair
1248
+ for (binder, ligand), records in affinity_groups.items():
1249
+ # Create subfolder for this protein-ligand pair
1250
+ pair_dir = out_dir / "predictions" / f"{binder}_{ligand}"
1251
+ pair_dir.mkdir(parents=True, exist_ok=True)
1252
+
1253
+ # Create manifest for this pair
1254
+ pair_manifest = Manifest(records)
1255
+
1256
+ # Validate inputs
1257
+ pair_manifest_filtered = filter_inputs_affinity(
1258
+ manifest=pair_manifest,
1259
+ outdir=pair_dir,
1260
+ override=override,
1261
+ )
1262
+ if not pair_manifest_filtered.records:
1263
+ click.echo(f"Found existing affinity predictions for {binder}_{ligand}, skipping.")
1264
+ continue
1256
1265
 
1257
- data_module = Boltz2InferenceDataModule(
1258
- manifest=manifest_filtered,
1259
- target_dir=out_dir / "predictions",
1260
- msa_dir=processed.msa_dir,
1261
- mol_dir=mol_dir,
1262
- num_workers=num_workers,
1263
- constraints_dir=processed.constraints_dir,
1264
- template_dir=processed.template_dir,
1265
- extra_mols_dir=processed.extra_mols_dir,
1266
- override_method="other",
1267
- affinity=True,
1268
- )
1266
+ msg = f"Running affinity prediction for {binder} with {ligand}"
1267
+ click.echo(msg)
1269
1268
 
1270
- predict_affinity_args = {
1271
- "recycling_steps": 5,
1272
- "sampling_steps": sampling_steps_affinity,
1273
- "diffusion_samples": diffusion_samples_affinity,
1274
- "max_parallel_samples": 1,
1275
- "write_confidence_summary": False,
1276
- "write_full_pae": False,
1277
- "write_full_pde": False,
1278
- }
1269
+ pred_writer = BoltzAffinityWriter(
1270
+ data_dir=processed.targets_dir,
1271
+ output_dir=pair_dir,
1272
+ )
1279
1273
 
1280
- # Load affinity model
1281
- if affinity_checkpoint is None:
1282
- affinity_checkpoint = cache / "boltz2_aff.ckpt"
1274
+ data_module = Boltz2InferenceDataModule(
1275
+ manifest=pair_manifest_filtered,
1276
+ target_dir=out_dir / "predictions",
1277
+ msa_dir=processed.msa_dir,
1278
+ mol_dir=mol_dir,
1279
+ num_workers=num_workers,
1280
+ constraints_dir=processed.constraints_dir,
1281
+ template_dir=processed.template_dir,
1282
+ extra_mols_dir=processed.extra_mols_dir,
1283
+ override_method="other",
1284
+ affinity=True,
1285
+ )
1283
1286
 
1284
- model_module = Boltz2.load_from_checkpoint(
1285
- affinity_checkpoint,
1286
- strict=True,
1287
- predict_args=predict_affinity_args,
1288
- map_location="cpu",
1289
- diffusion_process_args=asdict(diffusion_params),
1290
- ema=False,
1291
- pairformer_args=asdict(pairformer_args),
1292
- msa_args=asdict(msa_args),
1293
- steering_args={"fk_steering": False, "guidance_update": False},
1294
- affinity_mw_correction=affinity_mw_correction,
1295
- )
1296
- model_module.eval()
1287
+ predict_affinity_args = {
1288
+ "recycling_steps": 5,
1289
+ "sampling_steps": sampling_steps_affinity,
1290
+ "diffusion_samples": diffusion_samples_affinity,
1291
+ "max_parallel_samples": 1,
1292
+ "write_confidence_summary": False,
1293
+ "write_full_pae": False,
1294
+ "write_full_pde": False,
1295
+ }
1296
+
1297
+ # Load affinity model
1298
+ if affinity_checkpoint is None:
1299
+ affinity_checkpoint = cache / "boltz2_aff.ckpt"
1300
+
1301
+ model_module = Boltz2.load_from_checkpoint(
1302
+ affinity_checkpoint,
1303
+ strict=True,
1304
+ predict_args=predict_affinity_args,
1305
+ map_location="cpu",
1306
+ diffusion_process_args=asdict(diffusion_params),
1307
+ ema=False,
1308
+ pairformer_args=asdict(pairformer_args),
1309
+ msa_args=asdict(msa_args),
1310
+ steering_args={"fk_steering": False, "guidance_update": False},
1311
+ affinity_mw_correction=affinity_mw_correction,
1312
+ )
1313
+ model_module.eval()
1297
1314
 
1298
- trainer.callbacks[0] = pred_writer
1299
- trainer.predict(
1300
- model_module,
1301
- datamodule=data_module,
1302
- return_predictions=False,
1303
- )
1315
+ trainer.callbacks[0] = pred_writer
1316
+ trainer.predict(
1317
+ model_module,
1318
+ datamodule=data_module,
1319
+ return_predictions=False,
1320
+ )
1304
1321
 
1305
1322
 
1306
1323
  if __name__ == "__main__":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boltz-vsynthes
3
- Version: 1.0.6
3
+ Version: 1.0.8
4
4
  Summary: Boltz for V-Synthes
5
5
  Requires-Python: <3.13,>=3.10
6
6
  Description-Content-Type: text/markdown
File without changes
File without changes
File without changes