PyPI - boltz-vsynthes - Versions diffs - 1.0.38__py3-none-any.whl → 1.0.40__py3-none-any.whl - Mend

boltz-vsynthes 1.0.38py3-none-any.whl → 1.0.40py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

boltz/data/parse/schema.py CHANGED Viewed

@@ -1326,6 +1326,60 @@ def parse_boltz_schema(  # noqa: C901, PLR0915, PLR0912
                 "cyclic", False
             ), "Cyclic flag is not supported for ligands"
+        elif (entity_type == "ligand") and ("sdf" in items[0][entity_type]):
+            # Handle SDF file
+            sdf_path = Path(items[0][entity_type]["sdf"])
+            from boltz.data.parse.sdf import parse_sdf
+            target = parse_sdf(sdf_path, ccd, mol_dir)
+            mol = target["sequences"][0]["ligand"]["smiles"]
+            if affinity:
+                mol = standardize(mol)
+            mol = AllChem.MolFromSmiles(mol)
+            mol = AllChem.AddHs(mol)
+            # Set atom names
+            canonical_order = AllChem.CanonicalRankAtoms(mol)
+            for atom, can_idx in zip(mol.GetAtoms(), canonical_order):
+                atom_name = atom.GetSymbol().upper() + str(can_idx + 1)
+                if len(atom_name) > 4:
+                    msg = (
+                        f"{mol} has an atom with a name longer than "
+                        f"4 characters: {atom_name}."
+                    )
+                    raise ValueError(msg)
+                atom.SetProp("name", atom_name)
+            success = compute_3d_conformer(mol)
+            if not success:
+                msg = f"Failed to compute 3D conformer for {mol}"
+                raise ValueError(msg)
+            mol_no_h = AllChem.RemoveHs(mol, sanitize=False)
+            affinity_mw = AllChem.Descriptors.MolWt(mol_no_h) if affinity else None
+            extra_mols[f"LIG{ligand_id}"] = mol_no_h
+            residue = parse_ccd_residue(
+                name=f"LIG{ligand_id}",
+                ref_mol=mol,
+                res_idx=0,
+            )
+            ligand_id += 1
+            parsed_chain = ParsedChain(
+                entity=entity_id,
+                residues=[residue],
+                type=const.chain_type_ids["NONPOLYMER"],
+                cyclic_period=0,
+                sequence=None,
+                affinity=affinity,
+                affinity_mw=affinity_mw,
+            )
+            assert not items[0][entity_type].get(
+                "cyclic", False
+            ), "Cyclic flag is not supported for ligands"
         else:
             msg = f"Invalid entity type: {entity_type}"
             raise ValueError(msg)
@@ -1393,15 +1447,6 @@ def parse_boltz_schema(  # noqa: C901, PLR0915, PLR0912
                 chain_id=asym_id,
                 mw=chain.affinity_mw,
             )
-            # Save affinity info if output directory is specified
-            if output_dir is not None:
-                affinity_path = subfolder / "affinity_info.json"
-                with open(affinity_path, "w") as f:
-                    json.dump({
-                        "chain_id": asym_id,
-                        "mw": chain.affinity_mw,
-                        "chain_name": chain_name
-                    }, f)
         # Find all copies of this chain in the assembly
         entity_id = int(chain.entity)

boltz/main.py CHANGED Viewed

@@ -498,13 +498,25 @@ def process_input(  # noqa: C901, PLR0912, PLR0915, D103
 ) -> None:
     try:
         # Parse data
-        if path.suffix in (".fa", ".fas", ".fasta"):
+        if path.is_dir():
+            # Process all YAML and FASTA files in the directory
+            targets = []
+            for file_path in path.glob("*"):
+                if file_path.suffix in (".fa", ".fas", ".fasta"):
+                    target = parse_fasta(file_path, ccd, mol_dir, boltz2)
+                    targets.append(target)
+                elif file_path.suffix in (".yml", ".yaml"):
+                    target = parse_yaml(file_path, ccd, mol_dir, boltz2)
+                    if not isinstance(target, list):
+                        target = [target]
+                    targets.extend(target)
+        elif path.suffix in (".fa", ".fas", ".fasta"):
             target = parse_fasta(path, ccd, mol_dir, boltz2)
+            targets = [target]
         elif path.suffix in (".yml", ".yaml"):
-            target = parse_yaml(path, ccd, mol_dir, boltz2)
-        elif path.is_dir():
-            msg = f"Found directory {path} instead of .fasta or .yaml, skipping."
-            raise RuntimeError(msg)  # noqa: TRY301
+            targets = parse_yaml(path, ccd, mol_dir, boltz2)
+            if not isinstance(targets, list):
+                targets = [targets]
         else:
             msg = (
                 f"Unable to parse filetype {path.suffix}, "
@@ -512,96 +524,98 @@ def process_input(  # noqa: C901, PLR0912, PLR0915, D103
             )
             raise RuntimeError(msg)  # noqa: TRY301
-        # Get target id
-        target_id = target.record.id
-        # Get all MSA ids and decide whether to generate MSA
-        to_generate = {}
-        prot_id = const.chain_type_ids["PROTEIN"]
-        for chain in target.record.chains:
-            # Add to generate list, assigning entity id
-            if (chain.mol_type == prot_id) and (chain.msa_id == 0):
-                entity_id = chain.entity_id
-                msa_id = f"{target_id}_{entity_id}"
-                to_generate[msa_id] = target.sequences[entity_id]
-                chain.msa_id = msa_dir / f"{msa_id}.csv"
-            # We do not support msa generation for non-protein chains
-            elif chain.msa_id == 0:
-                chain.msa_id = -1
-        # Generate MSA
-        if to_generate and not use_msa_server:
-            msg = "Missing MSA's in input and --use_msa_server flag not set."
-            raise RuntimeError(msg)  # noqa: TRY301
-        if to_generate:
-            msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
-            click.echo(msg)
-            compute_msa(
-                data=to_generate,
-                target_id=target_id,
-                msa_dir=msa_dir,
-                msa_server_url=msa_server_url,
-                msa_pairing_strategy=msa_pairing_strategy,
-            )
+        # Process each target
+        for target in targets:
+            # Get target id
+            target_id = target.record.id
+            # Get all MSA ids and decide whether to generate MSA
+            to_generate = {}
+            prot_id = const.chain_type_ids["PROTEIN"]
+            for chain in target.record.chains:
+                # Add to generate list, assigning entity id
+                if (chain.mol_type == prot_id) and (chain.msa_id == 0):
+                    entity_id = chain.entity_id
+                    msa_id = f"{target_id}_{entity_id}"
+                    to_generate[msa_id] = target.sequences[entity_id]
+                    chain.msa_id = msa_dir / f"{msa_id}.csv"
+                # We do not support msa generation for non-protein chains
+                elif chain.msa_id == 0:
+                    chain.msa_id = -1
+            # Generate MSA
+            if to_generate and not use_msa_server:
+                msg = "Missing MSA's in input and --use_msa_server flag not set."
+                raise RuntimeError(msg)  # noqa: TRY301
+            if to_generate:
+                msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
+                click.echo(msg)
+                compute_msa(
+                    data=to_generate,
+                    target_id=target_id,
+                    msa_dir=msa_dir,
+                    msa_server_url=msa_server_url,
+                    msa_pairing_strategy=msa_pairing_strategy,
+                )
-        # Parse MSA data
-        msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
-        msa_id_map = {}
-        for msa_idx, msa_id in enumerate(msas):
-            # Check that raw MSA exists
-            msa_path = Path(msa_id)
-            if not msa_path.exists():
-                msg = f"MSA file {msa_path} not found."
-                raise FileNotFoundError(msg)  # noqa: TRY301
-            # Dump processed MSA
-            processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
-            msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
-            if not processed.exists():
-                # Parse A3M
-                if msa_path.suffix == ".a3m":
-                    msa: MSA = parse_a3m(
-                        msa_path,
-                        taxonomy=None,
-                        max_seqs=max_msa_seqs,
-                    )
-                elif msa_path.suffix == ".csv":
-                    msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
-                else:
-                    msg = f"MSA file {msa_path} not supported, only a3m or csv."
-                    raise RuntimeError(msg)  # noqa: TRY301
-                msa.dump(processed)
-        # Modify records to point to processed MSA
-        for c in target.record.chains:
-            if (c.msa_id != -1) and (c.msa_id in msa_id_map):
-                c.msa_id = msa_id_map[c.msa_id]
-        # Dump templates
-        for template_id, template in target.templates.items():
-            name = f"{target.record.id}_{template_id}.npz"
-            template_path = processed_templates_dir / name
-            template.dump(template_path)
-        # Dump constraints
-        constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
-        target.residue_constraints.dump(constraints_path)
-        # Dump extra molecules
-        Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
-        with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
-            pickle.dump(target.extra_mols, f)
-        # Dump structure
-        struct_path = structure_dir / f"{target.record.id}.npz"
-        target.structure.dump(struct_path)
-        # Dump record
-        record_path = records_dir / f"{target.record.id}.json"
-        target.record.dump(record_path)
+            # Parse MSA data
+            msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
+            msa_id_map = {}
+            for msa_idx, msa_id in enumerate(msas):
+                # Check that raw MSA exists
+                msa_path = Path(msa_id)
+                if not msa_path.exists():
+                    msg = f"MSA file {msa_path} not found."
+                    raise FileNotFoundError(msg)  # noqa: TRY301
+                # Dump processed MSA
+                processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
+                msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
+                if not processed.exists():
+                    # Parse A3M
+                    if msa_path.suffix == ".a3m":
+                        msa: MSA = parse_a3m(
+                            msa_path,
+                            taxonomy=None,
+                            max_seqs=max_msa_seqs,
+                        )
+                    elif msa_path.suffix == ".csv":
+                        msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
+                    else:
+                        msg = f"MSA file {msa_path} not supported, only a3m or csv."
+                        raise RuntimeError(msg)  # noqa: TRY301
+                    msa.dump(processed)
+            # Modify records to point to processed MSA
+            for c in target.record.chains:
+                if (c.msa_id != -1) and (c.msa_id in msa_id_map):
+                    c.msa_id = msa_id_map[c.msa_id]
+            # Dump templates
+            for template_id, template in target.templates.items():
+                name = f"{target.record.id}_{template_id}.npz"
+                template_path = processed_templates_dir / name
+                template.dump(template_path)
+            # Dump constraints
+            constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
+            target.residue_constraints.dump(constraints_path)
+            # Dump extra molecules
+            Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
+            with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
+                pickle.dump(target.extra_mols, f)
+            # Dump structure
+            struct_path = structure_dir / f"{target.record.id}.npz"
+            target.structure.dump(struct_path)
+            # Dump record
+            record_path = records_dir / f"{target.record.id}.json"
+            target.record.dump(record_path)
     except Exception as e:  # noqa: BLE001
         import traceback

{boltz_vsynthes-1.0.38.dist-info → boltz_vsynthes-1.0.40.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: boltz-vsynthes
-Version: 1.0.38
+Version: 1.0.40
 Summary: Boltz for VSYNTHES
 Requires-Python: <3.13,>=3.10
 Description-Content-Type: text/markdown

{boltz_vsynthes-1.0.38.dist-info → boltz_vsynthes-1.0.40.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
-boltz/main.py,sha256=i5_15JZ9vjZ9RSLZb2F0a7scuQ0QfFkgUQVftTiD3h0,39945
+boltz/main.py,sha256=SHM-t-9wjwjTJmWR4N5SrAHxk2vgz7fTruz5shiixVc,40882
 boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
 boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
@@ -40,7 +40,7 @@ boltz/data/parse/mmcif.py,sha256=25kEXCkx-OuaawAs7cdz0fxdRu5_CCO0AV00u84PrjQ,368
 boltz/data/parse/mmcif_with_constraints.py,sha256=WHYZckSqUwu-Nb9vmVmxHmC7uxwVrF7AVUeVKsc5wGQ,51473
 boltz/data/parse/pdb.py,sha256=iybk4p2UgUy_ABGprDq_xxyPSdm1HAZsGTM0lhxVEwM,1654
 boltz/data/parse/pdb_download.py,sha256=wge-scX-lOatX0q83W1wOsaql99rYp-6uGWSHEc995M,2718
-boltz/data/parse/schema.py,sha256=b0Mh1eCg6gTyOQt7GkEFAQdYCZJ1jqAJbUy9Tv53K4E,64781
+boltz/data/parse/schema.py,sha256=p4KIAVzQAuApcxRLHc6-KKG7ICgLmEWVzE8Qqm6v04w,66402
 boltz/data/parse/sdf.py,sha256=fs3MQVClDcCzxJaeVYiDuoh-fUrYc8Tcd5Bz8ws3FKI,2052
 boltz/data/parse/yaml.py,sha256=M3dRQK2mMDue3bPSO_T2ThaVojSMrOV7rMY-KXQvaGQ,2047
 boltz/data/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
 boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
 boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
-boltz_vsynthes-1.0.38.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
-boltz_vsynthes-1.0.38.dist-info/METADATA,sha256=HtZ8GekM9xBSPv24CUhVnarUX_GnVP-_tvIM8HfLCZc,7171
-boltz_vsynthes-1.0.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-boltz_vsynthes-1.0.38.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
-boltz_vsynthes-1.0.38.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
-boltz_vsynthes-1.0.38.dist-info/RECORD,,
+boltz_vsynthes-1.0.40.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
+boltz_vsynthes-1.0.40.dist-info/METADATA,sha256=z2kizv_5w3PrpKHsDV_GXjhzQDRxRCWWT2pOESvbcFU,7171
+boltz_vsynthes-1.0.40.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+boltz_vsynthes-1.0.40.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
+boltz_vsynthes-1.0.40.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
+boltz_vsynthes-1.0.40.dist-info/RECORD,,

{boltz_vsynthes-1.0.38.dist-info → boltz_vsynthes-1.0.40.dist-info}/WHEEL RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.38.dist-info → boltz_vsynthes-1.0.40.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.38.dist-info → boltz_vsynthes-1.0.40.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.38.dist-info → boltz_vsynthes-1.0.40.dist-info}/top_level.txt RENAMED Viewed

File without changes

boltz-vsynthes 1.0.38__py3-none-any.whl → 1.0.40__py3-none-any.whl

boltz-vsynthes 1.0.38py3-none-any.whl → 1.0.40py3-none-any.whl