PyPI - boltz-vsynthes - Versions diffs - 1.0.15__py3-none-any.whl → 1.0.17__py3-none-any.whl - Mend

boltz-vsynthes 1.0.15py3-none-any.whl → 1.0.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

boltz/data/parse/pdb.py CHANGED Viewed

@@ -1,17 +1,19 @@
+import os
 from pathlib import Path
 from typing import Optional
 from Bio import PDB
-from Bio.Data.IUPACData import protein_letters_3to1
+from Bio.PDB.PDBParser import PDBParser
+from Bio.PDB.Polypeptide import PPBuilder
 from rdkit import Chem
 from rdkit.Chem.rdchem import Mol
 from boltz.data.types import Target
-from boltz.data.parse.yaml import parse_boltz_schema
+from boltz.data.parse.schema import parse_boltz_schema
 def parse_pdb(
-    path: Path,
+    pdb_path: Path,
     ccd: dict[str, Mol],
     mol_dir: Path,
     boltz2: bool = False,
@@ -20,14 +22,14 @@ def parse_pdb(
     Parameters
     ----------
-    path : Path
+    pdb_path : Path
         Path to the PDB file.
     ccd : Dict
         Dictionary of CCD components.
     mol_dir : Path
         Path to the directory containing the molecules.
-    boltz2 : bool
-        Whether to parse the input for Boltz2.
+    boltz2 : bool, optional
+        Whether to use Boltz2 format, by default False.
     Returns
     -------
@@ -35,31 +37,25 @@ def parse_pdb(
         The parsed target.
     """
     # Read PDB file
-    parser = PDB.PDBParser(QUIET=True)
-    structure = parser.get_structure("protein", str(path))
+    parser = PDBParser(QUIET=True)
+    structure = parser.get_structure("protein", str(pdb_path))
+    ppb = PPBuilder()
     # Convert to yaml format
     sequences = []
     for model in structure:
         for chain in model:
-            # Get chain sequence
-            seq = ""
-            for residue in chain:
-                if residue.id[0] == " ":  # Only standard residues
-                    try:
-                        seq += protein_letters_3to1[residue.resname]
-                    except KeyError:
-                        continue
-            if seq:  # Only add if sequence is not empty
-                molecule = {
-                    "protein": {
-                        "id": chain.id,
-                        "sequence": seq,
-                        "modifications": [],
-                    },
-                }
-                sequences.append(molecule)
+            for pp in ppb.build_peptides(chain):
+                seq = str(pp.get_sequence())
+                if seq:  # Only add if sequence is not empty
+                    molecule = {
+                        "protein": {
+                            "id": chain.id,
+                            "sequence": seq,
+                            "modifications": [],
+                        },
+                    }
+                    sequences.append(molecule)
     data = {
         "sequences": sequences,
@@ -67,5 +63,4 @@ def parse_pdb(
         "version": 1,
     }
-    name = path.stem
-    return parse_boltz_schema(name, data, ccd, mol_dir, boltz2)
+    return parse_boltz_schema(pdb_path.stem, data, ccd, mol_dir, boltz2)

boltz/data/parse/pdb_download.py CHANGED Viewed

@@ -4,12 +4,13 @@ from typing import Optional
 import requests
 from Bio import PDB
-from Bio.Data.IUPACData import protein_letters_3to1
+from Bio.PDB.PDBParser import PDBParser
+from Bio.PDB.PPBuilder import PPBuilder
 from rdkit import Chem
 from rdkit.Chem.rdchem import Mol
 from boltz.data.types import Target
-from boltz.data.parse.yaml import parse_boltz_schema
+from boltz.data.parse.schema import parse_boltz_schema
 def download_pdb(pdb_id: str, cache_dir: Path) -> Path:
@@ -67,8 +68,8 @@ def parse_pdb_id(
         Path to the directory containing the molecules.
     cache_dir : Path
         The directory to cache downloaded PDB files.
-    boltz2 : bool
-        Whether to parse the input for Boltz2.
+    boltz2 : bool, optional
+        Whether to use Boltz2 format, by default False.
     Returns
     -------
@@ -79,31 +80,25 @@ def parse_pdb_id(
     pdb_path = download_pdb(pdb_id, cache_dir)
     # Read PDB file
-    parser = PDB.PDBParser(QUIET=True)
+    parser = PDBParser(QUIET=True)
     structure = parser.get_structure("protein", str(pdb_path))
+    ppb = PPBuilder()
     # Convert to yaml format
     sequences = []
     for model in structure:
         for chain in model:
-            # Get chain sequence
-            seq = ""
-            for residue in chain:
-                if residue.id[0] == " ":  # Only standard residues
-                    try:
-                        seq += protein_letters_3to1[residue.resname]
-                    except KeyError:
-                        continue
-            if seq:  # Only add if sequence is not empty
-                molecule = {
-                    "protein": {
-                        "id": chain.id,
-                        "sequence": seq,
-                        "modifications": [],
-                    },
-                }
-                sequences.append(molecule)
+            for pp in ppb.build_peptides(chain):
+                seq = str(pp.get_sequence())
+                if seq:  # Only add if sequence is not empty
+                    molecule = {
+                        "protein": {
+                            "id": chain.id,
+                            "sequence": seq,
+                            "modifications": [],
+                        },
+                    }
+                    sequences.append(molecule)
     data = {
         "sequences": sequences,

boltz/data/parse/schema.py CHANGED Viewed

@@ -1024,12 +1024,14 @@ def parse_boltz_schema(  # noqa: C901, PLR0915, PLR0912
                     # This is a PDB ID
                     from boltz.data.parse.pdb_download import parse_pdb_id
                     target = parse_pdb_id(pdb_path.stem, ccd, mol_dir, pdb_path.parent)
+                    # Get sequence from the first chain
+                    seq = target.sequences[0]["protein"]["sequence"]
                 else:
                     # This is a PDB file
                     from boltz.data.parse.pdb import parse_pdb
                     target = parse_pdb(pdb_path, ccd, mol_dir)
-                # Get sequence from the first chain
-                seq = target.sequences[0]
+                    # Get sequence from the first chain
+                    seq = target.sequences[0]["protein"]["sequence"]
             else:
                 msg = f"Protein must have either 'sequence' or 'pdb' field: {item}"
                 raise ValueError(msg)
@@ -1042,7 +1044,7 @@ def parse_boltz_schema(  # noqa: C901, PLR0915, PLR0912
                 from boltz.data.parse.sdf import parse_sdf
                 target = parse_sdf(sdf_path, ccd, mol_dir)
                 # Get sequence from the first ligand
-                seq = target.sequences[0]
+                seq = target.sequences[0]["ligand"]["sequence"]
             elif "ccd" in item[entity_type]:
                 seq = str(item[entity_type]["ccd"])
             else:

boltz/data/parse/sdf.py CHANGED Viewed

@@ -1,15 +1,49 @@
+import os
 from pathlib import Path
 from typing import Optional
 from rdkit import Chem
+from rdkit.Chem import AllChem
 from rdkit.Chem.rdchem import Mol
+import rdkit.Chem.rdmolfiles as rdmolfiles
 from boltz.data.types import Target
-from boltz.data.parse.yaml import parse_boltz_schema
+from boltz.data.parse.schema import parse_boltz_schema
+def _process_sdf(sdf_path: str) -> dict[str, str]:
+    """Process an SDF file and extract SMILES strings.
+    Parameters
+    ----------
+    sdf_path : str
+        Path to the SDF file.
+    Returns
+    -------
+    dict[str, str]
+        Dictionary mapping molecule names to SMILES strings.
+    """
+    output_dict = {}
+    suppl = rdmolfiles.ForwardSDMolSupplier(sdf_path)
+    for mol in suppl:
+        if mol is not None:
+            mol_smiles = rdmolfiles.MolToSmiles(mol)
+            if mol.HasProp("_Name"):
+                mol_name = mol.GetProp("_Name")
+                if mol_name == "":
+                    mol_name = mol_smiles
+            else:
+                mol_name = mol_smiles
+            output_dict[mol_name] = mol_smiles
+    return output_dict
 def parse_sdf(
-    path: Path,
+    sdf_path: Path,
     ccd: dict[str, Mol],
     mol_dir: Path,
     boltz2: bool = False,
@@ -18,37 +52,34 @@ def parse_sdf(
     Parameters
     ----------
-    path : Path
+    sdf_path : Path
         Path to the SDF file.
     ccd : Dict
         Dictionary of CCD components.
     mol_dir : Path
         Path to the directory containing the molecules.
-    boltz2 : bool
-        Whether to parse the input for Boltz2.
+    boltz2 : bool, optional
+        Whether to use Boltz2 format, by default False.
     Returns
     -------
     Target
         The parsed target.
     """
-    # Read SDF file
-    supplier = Chem.SDMolSupplier(str(path))
+    # Process SDF file
+    mol_dict = _process_sdf(str(sdf_path))
     # Convert to yaml format
     sequences = []
-    for i, mol in enumerate(supplier):
-        if mol is not None:
-            # Get SMILES
-            smiles = Chem.MolToSmiles(mol)
-            molecule = {
-                "ligand": {
-                    "id": f"L{i+1}",  # Use L1, L2, etc. as chain IDs
-                    "smiles": smiles,
-                },
-            }
-            sequences.append(molecule)
+    for mol_name, smiles in mol_dict.items():
+        molecule = {
+            "ligand": {
+                "id": mol_name,
+                "sequence": smiles,
+                "modifications": [],
+            },
+        }
+        sequences.append(molecule)
     data = {
         "sequences": sequences,
@@ -56,5 +87,4 @@ def parse_sdf(
         "version": 1,
     }
-    name = path.stem
-    return parse_boltz_schema(name, data, ccd, mol_dir, boltz2)
+    return parse_boltz_schema(sdf_path.stem, data, ccd, mol_dir, boltz2)

{boltz_vsynthes-1.0.15.dist-info → boltz_vsynthes-1.0.17.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: boltz-vsynthes
-Version: 1.0.15
+Version: 1.0.17
 Summary: Boltz for VSYNTHES
 Requires-Python: <3.13,>=3.10
 Description-Content-Type: text/markdown

{boltz_vsynthes-1.0.15.dist-info → boltz_vsynthes-1.0.17.dist-info}/RECORD RENAMED Viewed

@@ -38,10 +38,10 @@ boltz/data/parse/csv.py,sha256=Hcq8rJW2njczahEr8jfd_o-zxLaNSgJ3YIoC9srIqpw,2518
 boltz/data/parse/fasta.py,sha256=taI4s_CqPtyF0XaLJAsVAJHCL0GXm2g1g8Qeccdxikk,3906
 boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,36822
 boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
-boltz/data/parse/pdb.py,sha256=o2gGGVDxLaKhqPaZjuwfY6gdIyu4iUxczmkvDnEnNls,1808
-boltz/data/parse/pdb_download.py,sha256=FJTX7qHKJ_sBRcHg1PLKlqy6gVuluPDAWOAuYKI4-gM,2931
-boltz/data/parse/schema.py,sha256=uKv1toawysagqi7wnl1njCDrQKM1tNCNDxvcNc6VXK0,60801
-boltz/data/parse/sdf.py,sha256=myFA3bL6MkdPdMFfZHotxJ8yNMGpsc_u6w06YFadeiw,1364
+boltz/data/parse/pdb.py,sha256=R6rDCh6AKLn7n9ELltF1wDORDtR1CFgFnjFdYXKJCNg,1697
+boltz/data/parse/pdb_download.py,sha256=YEvSoYqKAan6EizCICMruC_3v-Vpvc4vpCpwmHPY67E,2808
+boltz/data/parse/schema.py,sha256=pq9PRrm3pyaOH0kJEjtG-rVHEPpr_KmiykvxKsMWnn0,60979
+boltz/data/parse/sdf.py,sha256=z7XTH-a38Rq-grC8bEZUPxGmLyP-g17LCxYWzVkBS_0,2107
 boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
 boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/data/sample/cluster.py,sha256=9Sx8qP7zGZOAyEspwYFtCTbGTBZnuN-zfCKFbbA_6oI,8175
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
 boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
 boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
-boltz_vsynthes-1.0.15.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
-boltz_vsynthes-1.0.15.dist-info/METADATA,sha256=dZV2p_UkKrGC3QZh40tY3ZEYLO7MylqdH77xA5pSe9I,7171
-boltz_vsynthes-1.0.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-boltz_vsynthes-1.0.15.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
-boltz_vsynthes-1.0.15.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
-boltz_vsynthes-1.0.15.dist-info/RECORD,,
+boltz_vsynthes-1.0.17.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
+boltz_vsynthes-1.0.17.dist-info/METADATA,sha256=s2lJPUPQJ4BMEFa2Hc57Q8e5EYMPuEKr5IO_SxCVTmc,7171
+boltz_vsynthes-1.0.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+boltz_vsynthes-1.0.17.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
+boltz_vsynthes-1.0.17.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
+boltz_vsynthes-1.0.17.dist-info/RECORD,,

{boltz_vsynthes-1.0.15.dist-info → boltz_vsynthes-1.0.17.dist-info}/WHEEL RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.15.dist-info → boltz_vsynthes-1.0.17.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.15.dist-info → boltz_vsynthes-1.0.17.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.15.dist-info → boltz_vsynthes-1.0.17.dist-info}/top_level.txt RENAMED Viewed

File without changes

boltz-vsynthes 1.0.15__py3-none-any.whl → 1.0.17__py3-none-any.whl

boltz-vsynthes 1.0.15py3-none-any.whl → 1.0.17py3-none-any.whl