PyPI - boltz-vsynthes - Versions diffs - 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend

boltz-vsynthes 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

boltz/data/parse/schema.py CHANGED Viewed

@@ -1005,46 +1005,31 @@ def parse_boltz_schema(  # noqa: C901, PLR0915, PLR0912
     # First group items that have the same type, sequence and modifications
     items_to_group = {}
     chain_name_to_entity_type = {}
+    # Keep track of ligand IDs
+    ligand_id = 1
+    ligand_id_map = {}
+    # Parse sequences
     for item in schema["sequences"]:
-        # Get entity type
-        entity_type = next(iter(item.keys())).lower()
-        if entity_type not in {"protein", "dna", "rna", "ligand"}:
-            msg = f"Invalid entity type: {entity_type}"
-            raise ValueError(msg)
+        entity_type = list(item.keys())[0]
+        entity_id = item[entity_type]["id"]
+        entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
-        # Get sequence or PDB
-        if entity_type in {"protein", "dna", "rna"}:
+        # Get sequence
+        if entity_type == "protein":
             if "sequence" in item[entity_type]:
-                seq = str(item[entity_type]["sequence"])
+                seq = item[entity_type]["sequence"]
             elif "pdb" in item[entity_type]:
-                pdb_input = str(item[entity_type]["pdb"])
-                # Check if it's a PDB code (4 characters) or a file path
-                if len(pdb_input) == 4 and pdb_input.isalnum():
-                    # It's a PDB code, check cache first
-                    cache_dir = Path(os.environ.get("BOLTZ_CACHE", "~/.boltz")).expanduser()
-                    pdb_cache_dir = cache_dir / "pdb"
-                    pdb_cache_dir.mkdir(parents=True, exist_ok=True)
-                    pdb_cache_file = pdb_cache_dir / f"{pdb_input.lower()}.pdb"
-                    if pdb_cache_file.exists():
-                        # Use cached file
-                        with pdb_cache_file.open("r") as f:
-                            pdb_data = f.read()
-                    else:
-                        # Download and cache
-                        import urllib.request
-                        pdb_url = f"https://files.rcsb.org/download/{pdb_input.lower()}.pdb"
-                        try:
-                            with urllib.request.urlopen(pdb_url) as response:
-                                pdb_data = response.read().decode()
-                                # Cache the downloaded data
-                                with pdb_cache_file.open("w") as f:
-                                    f.write(pdb_data)
-                        except Exception as e:
-                            msg = f"Failed to download PDB {pdb_input}: {str(e)}"
-                            raise RuntimeError(msg) from e
+                pdb_input = item[entity_type]["pdb"]
+                if pdb_input.startswith(("http://", "https://")):
+                    # It's a PDB ID
+                    import requests
+                    response = requests.get(f"https://files.rcsb.org/download/{pdb_input}.pdb")
+                    if response.status_code != 200:
+                        msg = f"Failed to download PDB file: {pdb_input}"
+                        raise FileNotFoundError(msg)
+                    pdb_data = response.text
                 else:
                     # It's a file path
                     pdb_path = Path(pdb_input)
@@ -1076,8 +1061,15 @@ def parse_boltz_schema(  # noqa: C901, PLR0915, PLR0912
             assert "smiles" not in item[entity_type] or "ccd" not in item[entity_type]
             if "smiles" in item[entity_type]:
                 seq = str(item[entity_type]["smiles"])
+                # Map user-provided ID to internal LIG1, LIG2, etc.
+                for id in entity_id:
+                    ligand_id_map[id] = f"LIG{ligand_id}"
+                ligand_id += 1
             else:
                 seq = str(item[entity_type]["ccd"])
+                # For CCD ligands, use the CCD code as the internal ID
+                for id in entity_id:
+                    ligand_id_map[id] = seq
         # Group items by entity
         items_to_group.setdefault((entity_type, seq), []).append(item)
@@ -1091,140 +1083,97 @@ def parse_boltz_schema(  # noqa: C901, PLR0915, PLR0912
     # Check if any affinity ligand is present
     affinity_ligands = set()
     properties = schema.get("properties", [])
-    if properties and not boltz_2:
-        msg = "Affinity prediction is only supported for Boltz2!"
-        raise ValueError(msg)
+    # Get all ligands
+    ligands = []
+    for item in schema["sequences"]:
+        entity_type = list(item.keys())[0]
+        if entity_type == "ligand":
+            entity_id = item[entity_type]["id"]
+            entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
+            ligands.extend(entity_id)
+    # Get user-specified binders
+    specified_binders = set()
     for prop in properties:
-        prop_type = next(iter(prop.keys())).lower()
-        if prop_type == "affinity":
+        if "affinity" in prop:
             binder = prop["affinity"]["binder"]
-            if not isinstance(binder, str):
-                # TODO: support multi residue ligands and ccd's
-                msg = "Binder must be a single chain."
-                raise ValueError(msg)
-            if binder not in chain_name_to_entity_type:
-                msg = f"Could not find binder with name {binder} in the input!"
-                raise ValueError(msg)
-            if chain_name_to_entity_type[binder] != "ligand":
-                msg = (
-                    f"Chain {binder} is not a ligand! "
-                    "Affinity is currently only supported for ligands."
-                )
-                raise ValueError(msg)
-            affinity_ligands.add(binder)
-    # Check only one affinity ligand is present
-    if len(affinity_ligands) > 1:
-        msg = "Only one affinity ligand is currently supported!"
-        raise ValueError(msg)
-    # Go through entities and parse them
-    extra_mols: dict[str, Mol] = {}
-    chains: dict[str, ParsedChain] = {}
-    chain_to_msa: dict[str, str] = {}
-    entity_to_seq: dict[str, str] = {}
-    is_msa_custom = False
-    is_msa_auto = False
-    ligand_id = 1
-    for entity_id, items in enumerate(items_to_group.values()):
-        # Get entity type and sequence
-        entity_type = next(iter(items[0].keys())).lower()
-        # Get ids
-        ids = []
-        for item in items:
-            if isinstance(item[entity_type]["id"], str):
-                ids.append(item[entity_type]["id"])
-            elif isinstance(item[entity_type]["id"], list):
-                ids.extend(item[entity_type]["id"])
-        # Check if any affinity ligand is present
-        if len(ids) == 1:
-            affinity = ids[0] in affinity_ligands
-        elif (len(ids) > 1) and any(x in affinity_ligands for x in ids):
-            msg = "Cannot compute affinity for a ligand that has multiple copies!"
-            raise ValueError(msg)
-        else:
-            affinity = False
-        # Ensure all the items share the same msa
-        msa = -1
+            specified_binders.add(binder)
+    # If no binders specified, use all proteins
+    if not specified_binders:
+        for item in schema["sequences"]:
+            entity_type = list(item.keys())[0]
+            if entity_type == "protein":
+                entity_id = item[entity_type]["id"]
+                entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
+                specified_binders.update(entity_id)
+    # Generate protein-ligand pairs for specified binders
+    new_properties = []
+    for binder in specified_binders:
+        for ligand in ligands:
+            if ligand in ligand_id_map:
+                ligand = ligand_id_map[ligand]  # Convert to internal LIG1, LIG2, etc.
+            affinity_ligands.add(ligand)
+            new_properties.append({
+                "affinity": {
+                    "binder": binder,
+                    "ligand": ligand
+                }
+            })
+    # Update schema with generated properties
+    schema["properties"] = new_properties
+    # Parse each group
+    chains = []
+    extra_mols = {}
+    for (entity_type, seq), items in items_to_group.items():
+        # Get entity id
+        entity_id = items[0][entity_type]["id"]
+        entity_id = [entity_id] if isinstance(entity_id, str) else entity_id
+        # Check if this entity has affinity
+        affinity = any(entity in affinity_ligands for entity in entity_id)
+        # Parse a protein
         if entity_type == "protein":
-            # Get the msa, default to 0, meaning auto-generated
-            msa = items[0][entity_type].get("msa", 0)
-            if (msa is None) or (msa == ""):
-                msa = 0
-            # Check if all MSAs are the same within the same entity
-            for item in items:
-                item_msa = item[entity_type].get("msa", 0)
-                if (item_msa is None) or (item_msa == ""):
-                    item_msa = 0
-                if item_msa != msa:
-                    msg = "All proteins with the same sequence must share the same MSA!"
-                    raise ValueError(msg)
-            # Set the MSA, warn if passed in single-sequence mode
-            if msa == "empty":
-                msa = -1
-                msg = (
-                    "Found explicit empty MSA for some proteins, will run "
-                    "these in single sequence mode. Keep in mind that the "
-                    "model predictions will be suboptimal without an MSA."
-                )
-                click.echo(msg)
-            if msa not in (0, -1):
-                is_msa_custom = True
-            elif msa == 0:
-                is_msa_auto = True
-        # Parse a polymer
-        if entity_type in {"protein", "dna", "rna"}:
-            # Get token map
-            if entity_type == "rna":
-                token_map = const.rna_letter_to_token
-            elif entity_type == "dna":
-                token_map = const.dna_letter_to_token
-            elif entity_type == "protein":
-                token_map = const.prot_letter_to_token
+            # Get MSA
+            msa = items[0][entity_type].get("msa")
+            if msa is not None:
+                msa = Path(msa)
+                if not msa.exists():
+                    msg = f"MSA file not found: {msa}"
+                    raise FileNotFoundError(msg)
+                with msa.open("r") as f:
+                    msa_data = f.read()
             else:
-                msg = f"Unknown polymer type: {entity_type}"
-                raise ValueError(msg)
-            # Get polymer info
-            chain_type = const.chain_type_ids[entity_type.upper()]
-            unk_token = const.unk_token[entity_type.upper()]
-            # Extract sequence
-            raw_seq = items[0][entity_type]["sequence"]
-            entity_to_seq[entity_id] = raw_seq
-            # Convert sequence to tokens
-            seq = [token_map.get(c, unk_token) for c in list(raw_seq)]
+                msa_data = None
-            # Apply modifications
-            for mod in items[0][entity_type].get("modifications", []):
-                code = mod["ccd"]
-                idx = mod["position"] - 1  # 1-indexed
-                seq[idx] = code
+            # Parse sequence
+            residues = []
+            for res_idx, code in enumerate(seq):
+                # Get mol
+                ref_mol = get_mol(code, ccd, mol_dir)
-            cyclic = items[0][entity_type].get("cyclic", False)
+                # Parse residue
+                residue = parse_ccd_residue(
+                    name=code,
+                    ref_mol=ref_mol,
+                    res_idx=res_idx,
+                )
+                residues.append(residue)
-            # Parse a polymer
-            parsed_chain = parse_polymer(
-                sequence=seq,
-                raw_sequence=raw_seq,
+            # Create protein chain
+            parsed_chain = ParsedChain(
                 entity=entity_id,
-                chain_type=chain_type,
-                components=ccd,
-                cyclic=cyclic,
-                mol_dir=mol_dir,
+                residues=residues,
+                type=const.chain_type_ids["PROTEIN"],
+                cyclic_period=0,
+                sequence=seq,
+                affinity=affinity,
+                affinity_mw=None,
             )
         # Parse a non-polymer
@@ -1298,14 +1247,16 @@ def parse_boltz_schema(  # noqa: C901, PLR0915, PLR0912
             mol_no_h = AllChem.RemoveHs(mol, sanitize=False)
             affinity_mw = AllChem.Descriptors.MolWt(mol_no_h) if affinity else None
-            extra_mols[f"LIG{ligand_id}"] = mol_no_h
+            # Use the mapped internal ID (LIG1, LIG2, etc.)
+            internal_id = ligand_id_map[entity_id[0]]
+            extra_mols[internal_id] = mol_no_h
             residue = parse_ccd_residue(
-                name=f"LIG{ligand_id}",
+                name=internal_id,
                 ref_mol=mol,
                 res_idx=0,
             )
-            ligand_id += 1
             parsed_chain = ParsedChain(
                 entity=entity_id,
                 residues=[residue],
@@ -1324,504 +1275,50 @@ def parse_boltz_schema(  # noqa: C901, PLR0915, PLR0912
             msg = f"Invalid entity type: {entity_type}"
             raise ValueError(msg)
-        # Add as many chains as provided ids
-        for item in items:
-            ids = item[entity_type]["id"]
-            if isinstance(ids, str):
-                ids = [ids]
-            for chain_name in ids:
-                chains[chain_name] = parsed_chain
-                chain_to_msa[chain_name] = msa
-    # Check if msa is custom or auto
-    if is_msa_custom and is_msa_auto:
-        msg = "Cannot mix custom and auto-generated MSAs in the same input!"
-        raise ValueError(msg)
-    # If no chains parsed fail
-    if not chains:
-        msg = "No chains parsed!"
-        raise ValueError(msg)
-    # Create tables
-    atom_data = []
-    bond_data = []
-    res_data = []
-    chain_data = []
-    protein_chains = set()
-    affinity_info = None
-    rdkit_bounds_constraint_data = []
-    chiral_atom_constraint_data = []
-    stereo_bond_constraint_data = []
-    planar_bond_constraint_data = []
-    planar_ring_5_constraint_data = []
-    planar_ring_6_constraint_data = []
-    # Convert parsed chains to tables
-    atom_idx = 0
-    res_idx = 0
-    asym_id = 0
-    sym_count = {}
-    chain_to_idx = {}
-    # Keep a mapping of (chain_name, residue_idx, atom_name) to atom_idx
-    atom_idx_map = {}
-    for asym_id, (chain_name, chain) in enumerate(chains.items()):
-        # Compute number of atoms and residues
-        res_num = len(chain.residues)
-        atom_num = sum(len(res.atoms) for res in chain.residues)
-        # Save protein chains for later
-        if chain.type == const.chain_type_ids["PROTEIN"]:
-            protein_chains.add(chain_name)
-        # Add affinity info
-        if chain.affinity and affinity_info is not None:
-            msg = "Cannot compute affinity for multiple ligands!"
-            raise ValueError(msg)
-        if chain.affinity:
-            affinity_info = AffinityInfo(
-                chain_id=asym_id,
-                mw=chain.affinity_mw,
-            )
-        # Find all copies of this chain in the assembly
-        entity_id = int(chain.entity)
-        sym_id = sym_count.get(entity_id, 0)
-        chain_data.append(
-            (
-                chain_name,
-                chain.type,
-                entity_id,
-                sym_id,
-                asym_id,
-                atom_idx,
-                atom_num,
-                res_idx,
-                res_num,
-                chain.cyclic_period,
-            )
-        )
-        chain_to_idx[chain_name] = asym_id
-        sym_count[entity_id] = sym_id + 1
-        # Add residue, atom, bond, data
-        for res in chain.residues:
-            atom_center = atom_idx + res.atom_center
-            atom_disto = atom_idx + res.atom_disto
-            res_data.append(
-                (
-                    res.name,
-                    res.type,
-                    res.idx,
-                    atom_idx,
-                    len(res.atoms),
-                    atom_center,
-                    atom_disto,
-                    res.is_standard,
-                    res.is_present,
-                )
-            )
-            if res.rdkit_bounds_constraints is not None:
-                for constraint in res.rdkit_bounds_constraints:
-                    rdkit_bounds_constraint_data.append(  # noqa: PERF401
-                        (
-                            tuple(
-                                c_atom_idx + atom_idx
-                                for c_atom_idx in constraint.atom_idxs
-                            ),
-                            constraint.is_bond,
-                            constraint.is_angle,
-                            constraint.upper_bound,
-                            constraint.lower_bound,
-                        )
-                    )
-            if res.chiral_atom_constraints is not None:
-                for constraint in res.chiral_atom_constraints:
-                    chiral_atom_constraint_data.append(  # noqa: PERF401
-                        (
-                            tuple(
-                                c_atom_idx + atom_idx
-                                for c_atom_idx in constraint.atom_idxs
-                            ),
-                            constraint.is_reference,
-                            constraint.is_r,
-                        )
-                    )
-            if res.stereo_bond_constraints is not None:
-                for constraint in res.stereo_bond_constraints:
-                    stereo_bond_constraint_data.append(  # noqa: PERF401
-                        (
-                            tuple(
-                                c_atom_idx + atom_idx
-                                for c_atom_idx in constraint.atom_idxs
-                            ),
-                            constraint.is_check,
-                            constraint.is_e,
-                        )
-                    )
-            if res.planar_bond_constraints is not None:
-                for constraint in res.planar_bond_constraints:
-                    planar_bond_constraint_data.append(  # noqa: PERF401
-                        (
-                            tuple(
-                                c_atom_idx + atom_idx
-                                for c_atom_idx in constraint.atom_idxs
-                            ),
-                        )
-                    )
-            if res.planar_ring_5_constraints is not None:
-                for constraint in res.planar_ring_5_constraints:
-                    planar_ring_5_constraint_data.append(  # noqa: PERF401
-                        (
-                            tuple(
-                                c_atom_idx + atom_idx
-                                for c_atom_idx in constraint.atom_idxs
-                            ),
-                        )
-                    )
-            if res.planar_ring_6_constraints is not None:
-                for constraint in res.planar_ring_6_constraints:
-                    planar_ring_6_constraint_data.append(  # noqa: PERF401
-                        (
-                            tuple(
-                                c_atom_idx + atom_idx
-                                for c_atom_idx in constraint.atom_idxs
-                            ),
-                        )
-                    )
-            for bond in res.bonds:
-                atom_1 = atom_idx + bond.atom_1
-                atom_2 = atom_idx + bond.atom_2
-                bond_data.append(
-                    (
-                        asym_id,
-                        asym_id,
-                        res_idx,
-                        res_idx,
-                        atom_1,
-                        atom_2,
-                        bond.type,
-                    )
-                )
-            for atom in res.atoms:
-                # Add atom to map
-                atom_idx_map[(chain_name, res.idx, atom.name)] = (
-                    asym_id,
-                    res_idx,
-                    atom_idx,
-                )
-                # Add atom to data
-                atom_data.append(
-                    (
-                        atom.name,
-                        atom.element,
-                        atom.charge,
-                        atom.coords,
-                        atom.conformer,
-                        atom.is_present,
-                        atom.chirality,
-                    )
-                )
-                atom_idx += 1
-            res_idx += 1
+        chains.append(parsed_chain)
     # Parse constraints
-    connections = []
-    pocket_constraints = []
-    contact_constraints = []
-    constraints = schema.get("constraints", [])
-    for constraint in constraints:
+    constraints = []
+    for constraint in schema.get("constraints", []):
         if "bond" in constraint:
-            if "atom1" not in constraint["bond"] or "atom2" not in constraint["bond"]:
-                msg = f"Bond constraint was not properly specified"
-                raise ValueError(msg)
-            c1, r1, a1 = tuple(constraint["bond"]["atom1"])
-            c2, r2, a2 = tuple(constraint["bond"]["atom2"])
-            c1, r1, a1 = atom_idx_map[(c1, r1 - 1, a1)]  # 1-indexed
-            c2, r2, a2 = atom_idx_map[(c2, r2 - 1, a2)]  # 1-indexed
-            connections.append((c1, c2, r1, r2, a1, a2))
+            atom1 = constraint["bond"]["atom1"]
+            atom2 = constraint["bond"]["atom2"]
+            constraints.append(ParsedBond(atom1, atom2))
         elif "pocket" in constraint:
-            if (
-                "binder" not in constraint["pocket"]
-                or "contacts" not in constraint["pocket"]
-            ):
-                msg = f"Pocket constraint was not properly specified"
-                raise ValueError(msg)
-            if len(pocket_constraints) > 0 and not boltz_2:
-                msg = f"Only one pocket binders is supported in Boltz-1!"
-                raise ValueError(msg)
-            max_distance = constraint["pocket"].get("max_distance", 6.0)
-            if max_distance != 6.0 and not boltz_2:
-                msg = f"Max distance != 6.0 is not supported in Boltz-1!"
-                raise ValueError(msg)
             binder = constraint["pocket"]["binder"]
-            binder = chain_to_idx[binder]
-            contacts = []
-            for chain_name, residue_index_or_atom_name in constraint["pocket"][
-                "contacts"
-            ]:
-                if chains[chain_name].type == const.chain_type_ids["NONPOLYMER"]:
-                    # Non-polymer chains are indexed by atom name
-                    _, _, atom_idx = atom_idx_map[
-                        (chain_name, 0, residue_index_or_atom_name)
-                    ]
-                    contact = (chain_to_idx[chain_name], atom_idx)
-                else:
-                    # Polymer chains are indexed by residue index
-                    contact = (chain_to_idx[chain_name], residue_index_or_atom_name - 1)
-                contacts.append(contact)
-            pocket_constraints.append((binder, contacts, max_distance))
+            if binder in ligand_id_map:
+                binder = ligand_id_map[binder]  # Convert to internal LIG1, LIG2, etc.
+            contacts = constraint["pocket"]["contacts"]
+            max_distance = constraint["pocket"].get("max_distance", 6.0)
+            constraints.append(ParsedPocket(binder, contacts, max_distance))
         elif "contact" in constraint:
-            if (
-                "token1" not in constraint["contact"]
-                or "token2" not in constraint["contact"]
-            ):
-                msg = f"Contact constraint was not properly specified"
-                raise ValueError(msg)
-            if not boltz_2:
-                msg = f"Contact constraint is not supported in Boltz-1!"
-                raise ValueError(msg)
+            token1 = constraint["contact"]["token1"]
+            token2 = constraint["contact"]["token2"]
             max_distance = constraint["contact"].get("max_distance", 6.0)
-            chain_name1, residue_index_or_atom_name1 = constraint["contact"]["token1"]
-            if chains[chain_name1].type == const.chain_type_ids["NONPOLYMER"]:
-                # Non-polymer chains are indexed by atom name
-                _, _, atom_idx = atom_idx_map[
-                    (chain_name1, 0, residue_index_or_atom_name1)
-                ]
-                token1 = (chain_to_idx[chain_name1], atom_idx)
-            else:
-                # Polymer chains are indexed by residue index
-                token1 = (chain_to_idx[chain_name1], residue_index_or_atom_name1 - 1)
-            pocket_constraints.append((binder, contacts, max_distance))
+            constraints.append(ParsedContact(token1, token2, max_distance))
         else:
-            msg = f"Invalid constraint: {constraint}"
+            msg = f"Invalid constraint type: {list(constraint.keys())[0]}"
             raise ValueError(msg)
-    # Get protein sequences in this YAML
-    protein_seqs = {name: chains[name].sequence for name in protein_chains}
     # Parse templates
-    template_schema = schema.get("templates", [])
-    if template_schema and not boltz_2:
-        msg = "Templates are not supported in Boltz 1.0!"
-        raise ValueError(msg)
-    templates = {}
-    template_records = []
-    for template in template_schema:
-        if "cif" not in template:
-            msg = "Template was not properly specified, missing CIF path!"
-            raise ValueError(msg)
-        path = template["cif"]
-        template_id = Path(path).stem
-        chain_ids = template.get("chain_id", None)
-        template_chain_ids = template.get("template_id", None)
-        # Check validity of input
-        matched = False
-        if chain_ids is not None and not isinstance(chain_ids, list):
-            chain_ids = [chain_ids]
-        if template_chain_ids is not None and not isinstance(template_chain_ids, list):
-            template_chain_ids = [template_chain_ids]
-        if (
-            template_chain_ids is not None
-            and chain_ids is not None
-            and len(template_chain_ids) != len(chain_ids)
-        ):
-            matched = True
-            if len(template_chain_ids) != len(chain_ids):
-                msg = (
-                    "When providing both the chain_id and template_id, the number of"
-                    "template_ids provided must match the number of chain_ids!"
-                )
-                raise ValueError(msg)
-        # Get relevant chains ids
-        if chain_ids is None:
-            chain_ids = list(protein_chains)
-        for chain_id in chain_ids:
-            if chain_id not in protein_chains:
-                msg = (
-                    f"Chain {chain_id} assigned for template"
-                    f"{template_id} is not one of the protein chains!"
-                )
-                raise ValueError(msg)
-        # Get relevant template chain ids
-        parsed_template = parse_mmcif(
-            path,
-            mols=ccd,
-            moldir=mol_dir,
-            use_assembly=False,
-            compute_interfaces=False,
-        )
-        template_proteins = {
-            str(c["name"])
-            for c in parsed_template.data.chains
-            if c["mol_type"] == const.chain_type_ids["PROTEIN"]
-        }
-        if template_chain_ids is None:
-            template_chain_ids = list(template_proteins)
-        for chain_id in template_chain_ids:
-            if chain_id not in template_proteins:
-                msg = (
-                    f"Template chain {chain_id} assigned for template"
-                    f"{template_id} is not one of the protein chains!"
-                )
-                raise ValueError(msg)
-        # Compute template records
-        if matched:
-            template_records.extend(
-                get_template_records_from_matching(
-                    template_id=template_id,
-                    chain_ids=chain_ids,
-                    sequences=protein_seqs,
-                    template_chain_ids=template_chain_ids,
-                    template_sequences=parsed_template.sequences,
-                )
-            )
-        else:
-            template_records.extend(
-                get_template_records_from_search(
-                    template_id=template_id,
-                    chain_ids=chain_ids,
-                    sequences=protein_seqs,
-                    template_chain_ids=template_chain_ids,
-                    template_sequences=parsed_template.sequences,
-                )
-            )
-        # Save template
-        templates[template_id] = parsed_template.data
-    # Convert into datatypes
-    residues = np.array(res_data, dtype=Residue)
-    chains = np.array(chain_data, dtype=Chain)
-    interfaces = np.array([], dtype=Interface)
-    mask = np.ones(len(chain_data), dtype=bool)
-    rdkit_bounds_constraints = np.array(
-        rdkit_bounds_constraint_data, dtype=RDKitBoundsConstraint
-    )
-    chiral_atom_constraints = np.array(
-        chiral_atom_constraint_data, dtype=ChiralAtomConstraint
-    )
-    stereo_bond_constraints = np.array(
-        stereo_bond_constraint_data, dtype=StereoBondConstraint
-    )
-    planar_bond_constraints = np.array(
-        planar_bond_constraint_data, dtype=PlanarBondConstraint
-    )
-    planar_ring_5_constraints = np.array(
-        planar_ring_5_constraint_data, dtype=PlanarRing5Constraint
-    )
-    planar_ring_6_constraints = np.array(
-        planar_ring_6_constraint_data, dtype=PlanarRing6Constraint
-    )
-    if boltz_2:
-        atom_data = [(a[0], a[3], a[5], 0.0, 1.0) for a in atom_data]
-        connections = [(*c, const.bond_type_ids["COVALENT"]) for c in connections]
-        bond_data = bond_data + connections
-        atoms = np.array(atom_data, dtype=AtomV2)
-        bonds = np.array(bond_data, dtype=BondV2)
-        coords = [(x,) for x in atoms["coords"]]
-        coords = np.array(coords, Coords)
-        ensemble = np.array([(0, len(coords))], dtype=Ensemble)
-        data = StructureV2(
-            atoms=atoms,
-            bonds=bonds,
-            residues=residues,
-            chains=chains,
-            interfaces=interfaces,
-            mask=mask,
-            coords=coords,
-            ensemble=ensemble,
-        )
-    else:
-        bond_data = [(b[4], b[5], b[6]) for b in bond_data]
-        atom_data = [(convert_atom_name(a[0]), *a[1:]) for a in atom_data]
-        atoms = np.array(atom_data, dtype=Atom)
-        bonds = np.array(bond_data, dtype=Bond)
-        connections = np.array(connections, dtype=Connection)
-        data = Structure(
-            atoms=atoms,
-            bonds=bonds,
-            residues=residues,
-            chains=chains,
-            connections=connections,
-            interfaces=interfaces,
-            mask=mask,
-        )
-    # Create metadata
-    struct_info = StructureInfo(num_chains=len(chains))
-    chain_infos = []
-    for chain in chains:
-        chain_info = ChainInfo(
-            chain_id=int(chain["asym_id"]),
-            chain_name=chain["name"],
-            mol_type=int(chain["mol_type"]),
-            cluster_id=-1,
-            msa_id=chain_to_msa[chain["name"]],
-            num_residues=int(chain["res_num"]),
-            valid=True,
-            entity_id=int(chain["entity_id"]),
-        )
-        chain_infos.append(chain_info)
-    options = InferenceOptions(pocket_constraints=pocket_constraints)
-    record = Record(
-        id=name,
-        structure=struct_info,
-        chains=chain_infos,
-        interfaces=[],
-        inference_options=options,
-        templates=template_records,
-        affinity=affinity_info,
-    )
-    residue_constraints = ResidueConstraints(
-        rdkit_bounds_constraints=rdkit_bounds_constraints,
-        chiral_atom_constraints=chiral_atom_constraints,
-        stereo_bond_constraints=stereo_bond_constraints,
-        planar_bond_constraints=planar_bond_constraints,
-        planar_ring_5_constraints=planar_ring_5_constraints,
-        planar_ring_6_constraints=planar_ring_6_constraints,
-    )
-    return Target(
-        record=record,
-        structure=data,
-        sequences=entity_to_seq,
-        residue_constraints=residue_constraints,
+    templates = []
+    for template in schema.get("templates", []):
+        cif = template["cif"]
+        chain_id = template.get("chain_id")
+        template_id = template.get("template_id")
+        templates.append(ParsedTemplate(cif, chain_id, template_id))
+    # Create target
+    target = Target(
+        name=name,
+        chains=chains,
+        constraints=constraints,
         templates=templates,
         extra_mols=extra_mols,
     )
+    return target
 def standardize(smiles: str) -> Optional[str]:
     """Standardize a molecule and return its SMILES and a flag indicating whether the molecule is valid.

boltz/main.py CHANGED Viewed

@@ -742,7 +742,11 @@ def process_inputs(
         # Process this input file
         click.echo(f"Processing {input_file.name}")
-        process_input_partial(input_file)
+        try:
+            process_input_partial(input_file)
+        except Exception as e:
+            click.echo(f"Error processing {input_file.name}: {str(e)}")
+            continue
         # Copy MSA files to central MSA directory
         for msa_file in file_processed_msa_dir.glob("*.npz"):

{boltz_vsynthes-1.0.5.dist-info → boltz_vsynthes-1.0.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: boltz-vsynthes
-Version: 1.0.5
+Version: 1.0.7
 Summary: Boltz for V-Synthes
 Requires-Python: <3.13,>=3.10
 Description-Content-Type: text/markdown

{boltz_vsynthes-1.0.5.dist-info → boltz_vsynthes-1.0.7.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
-boltz/main.py,sha256=sF_fNSzOElFhnlUBrnRidY1Dg_dduIHl23CREMo_ICc,41374
+boltz/main.py,sha256=w7c8dpAR0_97HIS_u76wywC1lswL4XVg98CuCrrXLvQ,41515
 boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
 boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
@@ -38,7 +38,7 @@ boltz/data/parse/csv.py,sha256=Hcq8rJW2njczahEr8jfd_o-zxLaNSgJ3YIoC9srIqpw,2518
 boltz/data/parse/fasta.py,sha256=taI4s_CqPtyF0XaLJAsVAJHCL0GXm2g1g8Qeccdxikk,3906
 boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,36822
 boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
-boltz/data/parse/schema.py,sha256=6dpgtwlPBkMCEnB6Wd-8m1l69-hgapDuNBkTGBu-p-M,62363
+boltz/data/parse/schema.py,sha256=DvMwh1Brn4ELzBuLEk89fdYv4XBx5bX3Fq2_TMeZ-08,43352
 boltz/data/parse/yaml.py,sha256=GRFRMtDD4PQ4PIpA_S1jj0vRaEu2LlZd_g4rN1zUrNo,1505
 boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/data/sample/cluster.py,sha256=9Sx8qP7zGZOAyEspwYFtCTbGTBZnuN-zfCKFbbA_6oI,8175
@@ -104,9 +104,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
 boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
 boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
-boltz_vsynthes-1.0.5.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
-boltz_vsynthes-1.0.5.dist-info/METADATA,sha256=-MCCHAI1TOA1tlDaX-X6npP-HgYgJZht77XK-J9CeAI,7171
-boltz_vsynthes-1.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-boltz_vsynthes-1.0.5.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
-boltz_vsynthes-1.0.5.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
-boltz_vsynthes-1.0.5.dist-info/RECORD,,
+boltz_vsynthes-1.0.7.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
+boltz_vsynthes-1.0.7.dist-info/METADATA,sha256=AQB7KiKkpIvaBZ2aMiTw1wfHE8_Vm_4D7cbJMN80J2U,7171
+boltz_vsynthes-1.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+boltz_vsynthes-1.0.7.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
+boltz_vsynthes-1.0.7.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
+boltz_vsynthes-1.0.7.dist-info/RECORD,,

{boltz_vsynthes-1.0.5.dist-info → boltz_vsynthes-1.0.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.5.dist-info → boltz_vsynthes-1.0.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.5.dist-info → boltz_vsynthes-1.0.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.5.dist-info → boltz_vsynthes-1.0.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

boltz-vsynthes 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

boltz-vsynthes 1.0.5py3-none-any.whl → 1.0.7py3-none-any.whl