PyPI - boltz-vsynthes - Versions diffs - 1.0.37__py3-none-any.whl → 1.0.39__py3-none-any.whl - Mend

boltz-vsynthes 1.0.37py3-none-any.whl → 1.0.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

boltz/main.py CHANGED Viewed

@@ -272,7 +272,7 @@ def get_cache_path() -> str:
 def check_inputs(data: Path) -> list[Path]:
-    """Check the input data and output directory.
+    """Check the input data.
     Parameters
     ----------
@@ -282,18 +282,21 @@ def check_inputs(data: Path) -> list[Path]:
     Returns
     -------
     list[Path]
-        The list of input data.
+        The list of input files.
     """
-    click.echo("Checking input data.")
     # Check if data is a directory
     if data.is_dir():
         data: list[Path] = list(data.glob("*"))
         # Filter out non .fasta or .yaml files, raise
         # an error on directory and other file types
+        filtered_data = []
         for d in data:
+            # Skip hidden files and directories
+            if d.name.startswith('.') or any(part.startswith('.') for part in d.parts):
+                continue
             if d.is_dir():
                 msg = f"Found directory {d} instead of .fasta or .yaml."
                 raise RuntimeError(msg)
@@ -303,6 +306,8 @@ def check_inputs(data: Path) -> list[Path]:
                     "please provide a .fasta or .yaml file."
                 )
                 raise RuntimeError(msg)
+            filtered_data.append(d)
+        data = filtered_data
     else:
         data = [data]
@@ -493,13 +498,25 @@ def process_input(  # noqa: C901, PLR0912, PLR0915, D103
 ) -> None:
     try:
         # Parse data
-        if path.suffix in (".fa", ".fas", ".fasta"):
+        if path.is_dir():
+            # Process all YAML and FASTA files in the directory
+            targets = []
+            for file_path in path.glob("*"):
+                if file_path.suffix in (".fa", ".fas", ".fasta"):
+                    target = parse_fasta(file_path, ccd, mol_dir, boltz2)
+                    targets.append(target)
+                elif file_path.suffix in (".yml", ".yaml"):
+                    target = parse_yaml(file_path, ccd, mol_dir, boltz2)
+                    if not isinstance(target, list):
+                        target = [target]
+                    targets.extend(target)
+        elif path.suffix in (".fa", ".fas", ".fasta"):
             target = parse_fasta(path, ccd, mol_dir, boltz2)
+            targets = [target]
         elif path.suffix in (".yml", ".yaml"):
-            target = parse_yaml(path, ccd, mol_dir, boltz2)
-        elif path.is_dir():
-            msg = f"Found directory {path} instead of .fasta or .yaml, skipping."
-            raise RuntimeError(msg)  # noqa: TRY301
+            targets = parse_yaml(path, ccd, mol_dir, boltz2)
+            if not isinstance(targets, list):
+                targets = [targets]
         else:
             msg = (
                 f"Unable to parse filetype {path.suffix}, "
@@ -507,96 +524,98 @@ def process_input(  # noqa: C901, PLR0912, PLR0915, D103
             )
             raise RuntimeError(msg)  # noqa: TRY301
-        # Get target id
-        target_id = target.record.id
-        # Get all MSA ids and decide whether to generate MSA
-        to_generate = {}
-        prot_id = const.chain_type_ids["PROTEIN"]
-        for chain in target.record.chains:
-            # Add to generate list, assigning entity id
-            if (chain.mol_type == prot_id) and (chain.msa_id == 0):
-                entity_id = chain.entity_id
-                msa_id = f"{target_id}_{entity_id}"
-                to_generate[msa_id] = target.sequences[entity_id]
-                chain.msa_id = msa_dir / f"{msa_id}.csv"
-            # We do not support msa generation for non-protein chains
-            elif chain.msa_id == 0:
-                chain.msa_id = -1
-        # Generate MSA
-        if to_generate and not use_msa_server:
-            msg = "Missing MSA's in input and --use_msa_server flag not set."
-            raise RuntimeError(msg)  # noqa: TRY301
-        if to_generate:
-            msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
-            click.echo(msg)
-            compute_msa(
-                data=to_generate,
-                target_id=target_id,
-                msa_dir=msa_dir,
-                msa_server_url=msa_server_url,
-                msa_pairing_strategy=msa_pairing_strategy,
-            )
+        # Process each target
+        for target in targets:
+            # Get target id
+            target_id = target.record.id
+            # Get all MSA ids and decide whether to generate MSA
+            to_generate = {}
+            prot_id = const.chain_type_ids["PROTEIN"]
+            for chain in target.record.chains:
+                # Add to generate list, assigning entity id
+                if (chain.mol_type == prot_id) and (chain.msa_id == 0):
+                    entity_id = chain.entity_id
+                    msa_id = f"{target_id}_{entity_id}"
+                    to_generate[msa_id] = target.sequences[entity_id]
+                    chain.msa_id = msa_dir / f"{msa_id}.csv"
+                # We do not support msa generation for non-protein chains
+                elif chain.msa_id == 0:
+                    chain.msa_id = -1
+            # Generate MSA
+            if to_generate and not use_msa_server:
+                msg = "Missing MSA's in input and --use_msa_server flag not set."
+                raise RuntimeError(msg)  # noqa: TRY301
+            if to_generate:
+                msg = f"Generating MSA for {path} with {len(to_generate)} protein entities."
+                click.echo(msg)
+                compute_msa(
+                    data=to_generate,
+                    target_id=target_id,
+                    msa_dir=msa_dir,
+                    msa_server_url=msa_server_url,
+                    msa_pairing_strategy=msa_pairing_strategy,
+                )
-        # Parse MSA data
-        msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
-        msa_id_map = {}
-        for msa_idx, msa_id in enumerate(msas):
-            # Check that raw MSA exists
-            msa_path = Path(msa_id)
-            if not msa_path.exists():
-                msg = f"MSA file {msa_path} not found."
-                raise FileNotFoundError(msg)  # noqa: TRY301
-            # Dump processed MSA
-            processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
-            msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
-            if not processed.exists():
-                # Parse A3M
-                if msa_path.suffix == ".a3m":
-                    msa: MSA = parse_a3m(
-                        msa_path,
-                        taxonomy=None,
-                        max_seqs=max_msa_seqs,
-                    )
-                elif msa_path.suffix == ".csv":
-                    msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
-                else:
-                    msg = f"MSA file {msa_path} not supported, only a3m or csv."
-                    raise RuntimeError(msg)  # noqa: TRY301
-                msa.dump(processed)
-        # Modify records to point to processed MSA
-        for c in target.record.chains:
-            if (c.msa_id != -1) and (c.msa_id in msa_id_map):
-                c.msa_id = msa_id_map[c.msa_id]
-        # Dump templates
-        for template_id, template in target.templates.items():
-            name = f"{target.record.id}_{template_id}.npz"
-            template_path = processed_templates_dir / name
-            template.dump(template_path)
-        # Dump constraints
-        constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
-        target.residue_constraints.dump(constraints_path)
-        # Dump extra molecules
-        Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
-        with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
-            pickle.dump(target.extra_mols, f)
-        # Dump structure
-        struct_path = structure_dir / f"{target.record.id}.npz"
-        target.structure.dump(struct_path)
-        # Dump record
-        record_path = records_dir / f"{target.record.id}.json"
-        target.record.dump(record_path)
+            # Parse MSA data
+            msas = sorted({c.msa_id for c in target.record.chains if c.msa_id != -1})
+            msa_id_map = {}
+            for msa_idx, msa_id in enumerate(msas):
+                # Check that raw MSA exists
+                msa_path = Path(msa_id)
+                if not msa_path.exists():
+                    msg = f"MSA file {msa_path} not found."
+                    raise FileNotFoundError(msg)  # noqa: TRY301
+                # Dump processed MSA
+                processed = processed_msa_dir / f"{target_id}_{msa_idx}.npz"
+                msa_id_map[msa_id] = f"{target_id}_{msa_idx}"
+                if not processed.exists():
+                    # Parse A3M
+                    if msa_path.suffix == ".a3m":
+                        msa: MSA = parse_a3m(
+                            msa_path,
+                            taxonomy=None,
+                            max_seqs=max_msa_seqs,
+                        )
+                    elif msa_path.suffix == ".csv":
+                        msa: MSA = parse_csv(msa_path, max_seqs=max_msa_seqs)
+                    else:
+                        msg = f"MSA file {msa_path} not supported, only a3m or csv."
+                        raise RuntimeError(msg)  # noqa: TRY301
+                    msa.dump(processed)
+            # Modify records to point to processed MSA
+            for c in target.record.chains:
+                if (c.msa_id != -1) and (c.msa_id in msa_id_map):
+                    c.msa_id = msa_id_map[c.msa_id]
+            # Dump templates
+            for template_id, template in target.templates.items():
+                name = f"{target.record.id}_{template_id}.npz"
+                template_path = processed_templates_dir / name
+                template.dump(template_path)
+            # Dump constraints
+            constraints_path = processed_constraints_dir / f"{target.record.id}.npz"
+            target.residue_constraints.dump(constraints_path)
+            # Dump extra molecules
+            Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)
+            with (processed_mols_dir / f"{target.record.id}.pkl").open("wb") as f:
+                pickle.dump(target.extra_mols, f)
+            # Dump structure
+            struct_path = structure_dir / f"{target.record.id}.npz"
+            target.structure.dump(struct_path)
+            # Dump record
+            record_path = records_dir / f"{target.record.id}.json"
+            target.record.dump(record_path)
     except Exception as e:  # noqa: BLE001
         import traceback

{boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: boltz-vsynthes
-Version: 1.0.37
+Version: 1.0.39
 Summary: Boltz for VSYNTHES
 Requires-Python: <3.13,>=3.10
 Description-Content-Type: text/markdown

{boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 boltz/__init__.py,sha256=F_-so3S40iZrSZ89Ge4TS6aZqwWyZXq_H4AXGDlbA_g,187
-boltz/main.py,sha256=AMYdcqTLOL5Mbo8P2ix1KeNwTijH5fWNzKUnLHBNtn0,39735
+boltz/main.py,sha256=SHM-t-9wjwjTJmWR4N5SrAHxk2vgz7fTruz5shiixVc,40882
 boltz/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/data/const.py,sha256=1M-88Z6HkfKY6MkNtqcj3b9P-oX9xEXluh3qM_u8dNU,26779
 boltz/data/mol.py,sha256=maOpPHEGX1VVXCIFY6pQNGF7gUBZPAfgSvuPf2QO1yc,34268
@@ -107,9 +107,9 @@ boltz/model/optim/scheduler.py,sha256=nB4jz0CZ4pR4n08LQngExL_pNycIdYI8AXVoHPnZWQ
 boltz/model/potentials/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 boltz/model/potentials/potentials.py,sha256=vev8Vjfs-ML1hyrdv_R8DynG4wSFahJ6nzPWp7CYQqw,17507
 boltz/model/potentials/schedules.py,sha256=m7XJjfuF9uTX3bR9VisXv1rvzJjxiD8PobXRpcBBu1c,968
-boltz_vsynthes-1.0.37.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
-boltz_vsynthes-1.0.37.dist-info/METADATA,sha256=WhICTabdnBWjYVatPqvnCo9EutL801DxXi4tT5LcP2A,7171
-boltz_vsynthes-1.0.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-boltz_vsynthes-1.0.37.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
-boltz_vsynthes-1.0.37.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
-boltz_vsynthes-1.0.37.dist-info/RECORD,,
+boltz_vsynthes-1.0.39.dist-info/licenses/LICENSE,sha256=8GZ_1eZsUeG6jdqgJJxtciWzADfgLEV4LY8sKUOsJhc,1102
+boltz_vsynthes-1.0.39.dist-info/METADATA,sha256=s4YTm6DLYVooEmxNB0gz1_0aFspJeAqR5KWVZDyTEv4,7171
+boltz_vsynthes-1.0.39.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+boltz_vsynthes-1.0.39.dist-info/entry_points.txt,sha256=n5a5I35ntu9lmyr16oZgHPFY0b0YxjiixY7m7nbMTLc,41
+boltz_vsynthes-1.0.39.dist-info/top_level.txt,sha256=MgU3Jfb-ctWm07YGMts68PMjSh9v26D0gfG3dFRmVFA,6
+boltz_vsynthes-1.0.39.dist-info/RECORD,,

{boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/WHEEL RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{boltz_vsynthes-1.0.37.dist-info → boltz_vsynthes-1.0.39.dist-info}/top_level.txt RENAMED Viewed

File without changes

boltz-vsynthes 1.0.37__py3-none-any.whl → 1.0.39__py3-none-any.whl

boltz-vsynthes 1.0.37py3-none-any.whl → 1.0.39py3-none-any.whl