PyPI - boltz-vsynthes - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

boltz-vsynthes 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

boltz/data/module/inferencev2.py CHANGED Viewed

@@ -30,6 +30,7 @@ def load_input(
     msa_dir: Path,
     constraints_dir: Optional[Path] = None,
     template_dir: Optional[Path] = None,
+    only_prediction: bool = False,
     extra_mols_dir: Optional[Path] = None,
     affinity: bool = False,
 ) -> Input:
@@ -60,15 +61,18 @@ def load_input(
     """
     # Load the structure
     if affinity:
-        structure = StructureV2.load(
+        if only_prediction:
+            #TODO: Formalize this
+            if target_dir.name == "predictions":
+                target_dir = target_dir.parent / "processed"
+            structure = StructureV2.load(
+                target_dir / f"structures/{record.id}.npz"
+                )
+        else:
+            structure = StructureV2.load(
             target_dir / record.id / f"pre_affinity_{record.id}.npz"
         )
-    # if affinity:
-    #     if target_dir.name == "predictions":
-    #         target_dir = target_dir.parent / "processed"
-    #     structure = StructureV2.load(
-    #         target_dir / f"structures/{record.id}.npz"
-    #     )
     else:
         structure = StructureV2.load(target_dir / f"{record.id}.npz")
@@ -171,6 +175,7 @@ class PredictionDataset(torch.utils.data.Dataset):
         mol_dir: Path,
         constraints_dir: Optional[Path] = None,
         template_dir: Optional[Path] = None,
+        only_prediction: bool = False,
         extra_mols_dir: Optional[Path] = None,
         override_method: Optional[str] = None,
         affinity: bool = False,
@@ -203,6 +208,7 @@ class PredictionDataset(torch.utils.data.Dataset):
         self.tokenizer = Boltz2Tokenizer()
         self.featurizer = Boltz2Featurizer()
         self.canonicals = load_canonicals(self.mol_dir)
+        self.only_prediction = only_prediction
         self.extra_mols_dir = extra_mols_dir
         self.override_method = override_method
         self.affinity = affinity
@@ -228,6 +234,7 @@ class PredictionDataset(torch.utils.data.Dataset):
             msa_dir=self.msa_dir,
             constraints_dir=self.constraints_dir,
             template_dir=self.template_dir,
+            only_prediction=self.only_prediction,
             extra_mols_dir=self.extra_mols_dir,
             affinity=self.affinity,
         )
@@ -329,6 +336,7 @@ class Boltz2InferenceDataModule(pl.LightningDataModule):
         constraints_dir: Optional[Path] = None,
         template_dir: Optional[Path] = None,
         extra_mols_dir: Optional[Path] = None,
+        only_prediction: bool = False,
         override_method: Optional[str] = None,
         affinity: bool = False,
     ) -> None:
@@ -365,6 +373,7 @@ class Boltz2InferenceDataModule(pl.LightningDataModule):
         self.constraints_dir = constraints_dir
         self.template_dir = template_dir
         self.extra_mols_dir = extra_mols_dir
+        self.only_prediction = only_prediction
         self.override_method = override_method
         self.affinity = affinity
@@ -384,6 +393,7 @@ class Boltz2InferenceDataModule(pl.LightningDataModule):
             mol_dir=self.mol_dir,
             constraints_dir=self.constraints_dir,
             template_dir=self.template_dir,
+            only_prediction=self.only_prediction,
             extra_mols_dir=self.extra_mols_dir,
             override_method=self.override_method,
             affinity=self.affinity,

boltz/data/parse/pdb.py CHANGED Viewed

@@ -8,6 +8,9 @@ from Bio.PDB.Polypeptide import PPBuilder
 from Bio.Data.IUPACData import protein_letters_3to1
 from rdkit import Chem
 from rdkit.Chem.rdchem import Mol
+from Bio.SeqUtils import seq1
+from collections import defaultdict
 from boltz.data.types import Target
 from boltz.data.parse.schema import parse_boltz_schema
@@ -38,27 +41,35 @@ def parse_pdb(
         Dictionary containing sequences and bonds.
     """
     # Read PDB file
-    parser = PDBParser(QUIET=True)
-    structure = parser.get_structure("protein", str(pdb_path))
-    ppb = PPBuilder()
-    # Convert to yaml format
     sequences = []
-    for model in structure:
-        for chain in model:
-            for pp in ppb.build_peptides(chain):
-                seq = str(pp.get_sequence())
-                if seq:  # Only add if sequence is not empty
-                    sequences.append({
-                        "protein": {
-                            "id": chain.id,
-                            "sequence": seq,
-                            "modifications": [],
-                        }
-                    })
+    sequence_by_chain = defaultdict(list)
+    # Parse SEQRES records directly
+    with open(pdb_path) as f:
+        for line in f:
+            if line.startswith("SEQRES"):
+                parts = line.split()
+                chain_id = parts[2]
+                residues = parts[4:]
+                for res in residues:
+                    try:
+                        aa = seq1(res)
+                    except KeyError:
+                        aa = 'X'
+                    sequence_by_chain[chain_id].append(aa)
+    # Convert to yaml-style list
+    for chain_id, aa_list in sequence_by_chain.items():
+        sequences.append({
+            "protein": {
+                "id": chain_id,
+                "sequence": ''.join(aa_list),
+                "modifications": [],
+            }
+        })
+    print(sequences)
     return {
         "sequences": sequences,
         "bonds": [],
         "version": 1,
-    }
+    }

boltz/data/parse/pdb_download.py CHANGED Viewed

@@ -8,6 +8,8 @@ from Bio.PDB.PDBParser import PDBParser
 from Bio.PDB.Polypeptide import PPBuilder
 from rdkit import Chem
 from rdkit.Chem.rdchem import Mol
+from Bio.SeqUtils import seq1
+from collections import defaultdict
 from boltz.data.types import Target
 from boltz.data.parse.schema import parse_boltz_schema
@@ -79,28 +81,36 @@ def parse_pdb_id(
     # Download PDB file
     pdb_path = download_pdb(pdb_id, cache_dir)
-    # Read PDB file
-    parser = PDBParser(QUIET=True)
-    structure = parser.get_structure("protein", str(pdb_path))
-    ppb = PPBuilder()
-    # Convert to yaml format
     sequences = []
-    for model in structure:
-        for chain in model:
-            for pp in ppb.build_peptides(chain):
-                seq = str(pp.get_sequence())
-                if seq:  # Only add if sequence is not empty
-                    sequences.append({
-                        "protein": {
-                            "id": chain.id,
-                            "sequence": seq,
-                            "modifications": [],
-                        }
-                    })
+    sequence_by_chain = defaultdict(list)
+    # Parse SEQRES records directly
+    with open(pdb_path) as f:
+        for line in f:
+            if line.startswith("SEQRES"):
+                parts = line.split()
+                chain_id = parts[2]
+                residues = parts[4:]
+                for res in residues:
+                    try:
+                        aa = seq1(res)
+                    except KeyError:
+                        aa = 'X'
+                    sequence_by_chain[chain_id].append(aa)
+    # Convert to yaml-style list
+    for chain_id, aa_list in sequence_by_chain.items():
+        sequences.append({
+            "protein": {
+                "id": chain_id,
+                "sequence": ''.join(aa_list),
+                "modifications": [],
+            }
+        })
+    print(sequences)
     return {
         "sequences": sequences,
         "bonds": [],
         "version": 1,
-    }
+    }

boltz/main.py CHANGED Viewed

@@ -935,6 +935,11 @@ def cli() -> None:
     is_flag=True,
     help="Whether to disable the kernels. Default False",
 )
+@click.option(
+    "--only_prediction",
+    is_flag=True,
+    help="Run only prediction. Default False",
+)
 def predict(  # noqa: C901, PLR0915, PLR0912
     data: str,
     out_dir: str,
@@ -968,6 +973,7 @@ def predict(  # noqa: C901, PLR0915, PLR0912
     subsample_msa: bool = True,
     num_subsampled_msa: int = 1024,
     no_kernels: bool = False,
+    only_prediction: bool = False,
 ) -> None:
     """Run predictions with Boltz."""
     # If cpu, write a friendly warning
@@ -1176,6 +1182,7 @@ def predict(  # noqa: C901, PLR0915, PLR0912
                 constraints_dir=processed.constraints_dir,
                 template_dir=processed.template_dir,
                 extra_mols_dir=processed.extra_mols_dir,
+                only_prediction=only_prediction,
                 override_method=method,
             )
         else:
@@ -1224,15 +1231,16 @@ def predict(  # noqa: C901, PLR0915, PLR0912
         model_module.eval()
         print(f"[{datetime.now().strftime('%H:%M:%S')}] Model loaded in {time.time() - t_model:.2f} seconds")
-        # 9. Before and after compute structure predictions (predict)
-        t_predict = time.time()
-        print(f"[{datetime.now().strftime('%H:%M:%S')}] Computing structure predictions...")
-        trainer.predict(
-            model_module,
-            datamodule=data_module,
-            return_predictions=False,
-        )
-        print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure predictions computed in {time.time() - t_predict:.2f} seconds")
+        if not only_prediction:
+            # 9. Before and after compute structure predictions (predict)
+            t_predict = time.time()
+            print(f"[{datetime.now().strftime('%H:%M:%S')}] Computing structure predictions...")
+            trainer.predict(
+                model_module,
+                datamodule=data_module,
+                return_predictions=False,
+            )
+            print(f"[{datetime.now().strftime('%H:%M:%S')}] Structure predictions computed in {time.time() - t_predict:.2f} seconds")
     # Check if affinity predictions are needed
     if any(r.affinity for r in manifest.records):
@@ -1266,6 +1274,7 @@ def predict(  # noqa: C901, PLR0915, PLR0912
             num_workers=num_workers,
             constraints_dir=processed.constraints_dir,
             template_dir=processed.template_dir,
+            only_prediction=only_prediction,
             extra_mols_dir=processed.extra_mols_dir,
             override_method="other",
             affinity=True,

{boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: boltz-vsynthes
-Version: 0.1.2
+Version: 0.1.4
 Summary: Boltz for VSYNTHES
 Requires-Python: <3.13,>=3.10
 Description-Content-Type: text/markdown

{boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
-boltz/main.py,sha256=nJVS1bG79_pqbMUCiRs0ak7WeOrPtIsJ_LeuHbsa_ms,42544
+boltz/main.py,sha256=CQ4lGxzXPw9kvnWYpuh0KRLtfc2Qmk1KcSTo4GquVv4,42848
 boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
 boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
@@ -27,7 +27,7 @@ boltz/data/filter/static/ligand.py,sha256=LamC-Z9IjYj3DmfxwMFmPbKBBhRMby3uWQj74w
 boltz/data/filter/static/polymer.py,sha256=LNsQMsOOnhYpeKuM9AStktoTQPMZE3H0yu4mRg-jwPc,9386
 boltz/data/module/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/data/module/inference.py,sha256=xk8ZJ8UhjPiPTdOluH_v4gnV8GtTX3sr1WZ1s5Ox8I8,8100
-boltz/data/module/inferencev2.py,sha256=aLUm1WR6E1814JUrF6sJfoe5y8y7d_s4zlQ3pdFBVy8,12742
+boltz/data/module/inferencev2.py,sha256=83GMvlQihS0QzTd51TB6sFeErQ4xFyCn2M1BD2z0bZE,13125
 boltz/data/module/training.py,sha256=iNzmq9ufs20S4M947CCzdYzGTFjmCTf2tFExJ2PtXnA,22428
 boltz/data/module/trainingv2.py,sha256=ZsYUHYXxfuPgIpbTwCj5QLO0XK__xjsqIw6GARSNGW0,21276
 boltz/data/msa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -38,8 +38,8 @@ boltz/data/parse/csv.py,sha256=Hcq8rJW2njczahEr8jfd_o-zxLaNSgJ3YIoC9srIqpw,2518
 boltz/data/parse/fasta.py,sha256=taI4s_CqPtyF0XaLJAsVAJHCL0GXm2g1g8Qeccdxikk,3906
 boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,36822
 boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
-boltz/data/parse/pdb.py,sha256=iybk4p2UgUy_ABGprDq_xxyPSdm1HAZsGTM0lhxVEwM,1654
-boltz/data/parse/pdb_download.py,sha256=wge-scX-lOatX0q83W1wOsaql99rYp-6uGWSHEc995M,2718
+boltz/data/parse/pdb.py,sha256=873jPx4D-OTBTd4lIn3GquYt0OLX4gbfzyAldxtzIIA,1913
+boltz/data/parse/pdb_download.py,sha256=Ys2fepXD6RLYyq6xjHFNsoAStxISuvQ_EUEt0JwmrK0,2958
 boltz/data/parse/schema.py,sha256=kNu28U2_MGiecwWNlcxgaDH3WOcO0P-q2LdoSPSb66w,63826
 boltz/data/parse/sdf.py,sha256=fs3MQVClDcCzxJaeVYiDuoh-fUrYc8Tcd5Bz8ws3FKI,2052
 boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
@@ -110,9 +110,9 @@ boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRp
 boltz/utils/sdf_splitter.py,sha256=ZHn_syOcmm-fDnJ3YEGyGv_vYz2IRzUW7vbbMSU2JBY,2108
 boltz/utils/sdf_to_pre_affinity_npz.py,sha256=ro0KGe24JexbJm47J8S8w8Lmr_KaQbzOAb_dKZO2G9I,40384
 boltz/utils/yaml_generator.py,sha256=ermWIG-BE6nNWHFvpEwpk92N9J-YATpGXZGLvD1I2oQ,4012
-boltz_vsynthes-0.1.2.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
-boltz_vsynthes-0.1.2.dist-info/METADATA,sha256=RUv2LCKZ3YPmu2-YqB2EI-Pbe45u2SlXnAoTtgmlEOM,7234
-boltz_vsynthes-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-boltz_vsynthes-0.1.2.dist-info/entry_points.txt,sha256=nZNYPKKrmAr-MVA0K-ClNRT2p90FV1_14d7HpsESZFQ,211
-boltz_vsynthes-0.1.2.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
-boltz_vsynthes-0.1.2.dist-info/RECORD,,
+boltz_vsynthes-0.1.4.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
+boltz_vsynthes-0.1.4.dist-info/METADATA,sha256=_MxyMdy8K71CIe85VHRqIvHArfBjy7pd-fZjoDpInJo,7234
+boltz_vsynthes-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+boltz_vsynthes-0.1.4.dist-info/entry_points.txt,sha256=nZNYPKKrmAr-MVA0K-ClNRT2p90FV1_14d7HpsESZFQ,211
+boltz_vsynthes-0.1.4.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
+boltz_vsynthes-0.1.4.dist-info/RECORD,,

{boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{boltz_vsynthes-0.1.2.dist-info → boltz_vsynthes-0.1.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

boltz-vsynthes 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

boltz-vsynthes 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl