PyPI - openprotein-python - Versions diffs - 0.8.7__tar.gz → 0.8.9__tar.gz - Mend

openprotein-python 0.8.7tar.gz → 0.8.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

{openprotein_python-0.8.7 → openprotein_python-0.8.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openprotein-python
-Version: 0.8.7
+Version: 0.8.9
 Summary: OpenProtein Python interface.
 Author-email: Mark Gee <markgee@ne47.bio>, "Timothy Truong Jr." <ttruong@ne47.bio>, Tristan Bepler <tbepler@ne47.bio>
 License-Expression: MIT

{openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/api.py RENAMED Viewed

@@ -400,12 +400,6 @@ def prompt_post(
             "Either 'num_sequences' or 'num_residues' must be set, but not both."
         )
-    if num_sequences is not None and not (0 <= num_sequences < 100):
-        raise InvalidParameterError("The 'num_sequences' must be between 0 and 100.")
-    if num_residues is not None and not (0 <= num_residues < 24577):
-        raise InvalidParameterError("The 'num_residues' must be between 0 and 24577.")
     if random_seed is None:
         random_seed = random.randrange(2**32)

{openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/poet.py RENAMED Viewed

@@ -295,7 +295,11 @@ class PoETModel(EmbeddingModel):
         EmbeddingsGenerateFuture
             Future object representing the status and information about the generation job.
         """
-        prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        if prompt is not None:
+            kwargs["prompt_id"] = prompt if isinstance(prompt, str) else prompt.id
+        else:
+            # NB: this is for handling PoET-2
+            assert self.model_id != "poet"
         return EmbeddingsGenerateFuture.create(
             session=self.session,
             job=api.request_generate_post(
@@ -307,7 +311,6 @@ class PoETModel(EmbeddingModel):
                 topp=topp,
                 max_length=max_length,
                 random_seed=seed,
-                prompt_id=prompt_id,
                 **kwargs,
             ),
         )

{openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/poet2.py RENAMED Viewed

@@ -287,7 +287,7 @@ class PoET2Model(PoETModel, EmbeddingModel):
     def generate(
         self,
-        prompt: str | Prompt,
+        prompt: str | Prompt | None,
         query: str | bytes | Protein | Query | None = None,
         use_query_structure_in_decoder: bool = True,
         num_samples: int = 100,
@@ -304,7 +304,7 @@ class PoET2Model(PoETModel, EmbeddingModel):
         Parameters
         ----------
-        prompt : str or Prompt
+        prompt : str or Prompt or None, optional
             Prompt from an align workflow to condition PoET model.
         query : str or bytes or Protein or Query or None, optional
             Query to use with prompt.
@@ -351,7 +351,8 @@ class PoET2Model(PoETModel, EmbeddingModel):
                     f"equal to the number of prompts ({prompt.num_replicates})"
                 )
         return super().generate(
-            prompt=prompt,
+            # NB: poet(-1) cannot use null prompt, so we don't change its .generate's type signature
+            prompt=prompt,  # type: ignore
             num_samples=num_samples,
             temperature=temperature,
             topk=topk,

{openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/__init__.py RENAMED Viewed

@@ -7,6 +7,7 @@ isort:skip_file
 from .schemas import FoldJob, FoldMetadata
 from .models import FoldModel
 from .esmfold import ESMFoldModel
+from .minifold import MiniFoldModel
 from .alphafold2 import AlphaFold2Model
 from .boltz import (
     Boltz1Model,
@@ -17,5 +18,6 @@ from .boltz import (
     BoltzConstraint,
     BoltzProperty,
 )
+from .rosettafold3 import RosettaFold3Model
 from .future import FoldResultFuture, FoldComplexResultFuture
 from .fold import FoldAPI

{openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/api.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """Fold REST API interface for making HTTP calls to our fold backend."""
 import io
-from typing import Literal
+from typing import TYPE_CHECKING, Literal
 import numpy as np
 from pydantic import TypeAdapter
@@ -12,6 +12,9 @@ from openprotein.errors import HTTPError
 from .schemas import FoldJob, FoldMetadata
+if TYPE_CHECKING:
+    import pandas as pd
 PATH_PREFIX = "v1/fold"
@@ -160,8 +163,8 @@ def fold_get_complex_result(
 def fold_get_complex_extra_result(
     session: APISession,
     job_id: str,
-    key: Literal["pae", "pde", "plddt", "confidence", "affinity"],
-) -> np.ndarray | list[dict]:
+    key: Literal["pae", "pde", "plddt", "confidence", "affinity", "score", "metrics"],
+) -> "np.ndarray | list[dict] | pd.DataFrame":
     """
     Get extra result for a complex from the request ID.
@@ -183,6 +186,10 @@ def fold_get_complex_extra_result(
         formatter = lambda response: np.load(io.BytesIO(response.content))
     elif key in {"confidence", "affinity"}:
         formatter = lambda response: response.json()
+    elif key in {"score", "metrics"}:
+        import pandas as pd
+        formatter = lambda response: pd.read_csv(io.StringIO(response.content.decode()))
     else:
         raise ValueError(f"Unexpected key: {key}")
     endpoint = PATH_PREFIX + f"/{job_id}/complex/{key}"
@@ -194,7 +201,7 @@ def fold_get_complex_extra_result(
         if e.status_code == 400 and key == "affinity":
             raise ValueError("affinity not found for request") from None
         raise e
-    output: np.ndarray | list[dict] = formatter(response)
+    output = formatter(response)
     return output
@@ -254,34 +261,11 @@ def fold_models_post(
         sequences = kwargs["sequences"]
         # NOTE we are handling the boltz form here too
         sequences = [s.decode() if isinstance(s, bytes) else s for s in sequences]
-        body["sequences"] = sequences
-    if kwargs.get("msa_id"):
-        body["msa_id"] = kwargs["msa_id"]
-    if kwargs.get("num_recycles"):
-        body["num_recycles"] = kwargs["num_recycles"]
-    if kwargs.get("num_models"):
-        body["num_models"] = kwargs["num_models"]
-    if kwargs.get("num_relax"):
-        body["num_relax"] = kwargs["num_relax"]
-    if kwargs.get("use_potentials"):
-        body["use_potentials"] = kwargs["use_potentials"]
-    # boltz
-    if kwargs.get("diffusion_samples"):
-        body["diffusion_samples"] = kwargs["diffusion_samples"]
-    if kwargs.get("recycling_steps"):
-        body["recycling_steps"] = kwargs["recycling_steps"]
-    if kwargs.get("sampling_steps"):
-        body["sampling_steps"] = kwargs["sampling_steps"]
-    if kwargs.get("step_scale"):
-        body["step_scale"] = kwargs["step_scale"]
-    if kwargs.get("constraints"):
-        body["constraints"] = kwargs["constraints"]
-    if kwargs.get("templates"):
-        body["templates"] = kwargs["templates"]
-    if kwargs.get("properties"):
-        body["properties"] = kwargs["properties"]
-    if kwargs.get("method"):
-        body["method"] = kwargs["method"]
+        kwargs["sequences"] = sequences
+    # add non-None args - note this doesnt affect msa_id which is nested
+    for k, v in kwargs.items():
+        if v is not None:
+            body[k] = v
     response = session.post(endpoint, json=body)
     return FoldJob.model_validate(response.json())

{openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/boltz.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """Community-based Boltz models for complex structure prediction with ligands/dna/rna."""
-import re
-import string
+import warnings
+from logging import warning
 from typing import Any
 from pydantic import BaseModel, Field, TypeAdapter, model_validator
@@ -13,67 +13,10 @@ from openprotein.common import ModelMetadata
 from openprotein.protein import Protein
 from . import api
+from .complex import id_generator
 from .future import FoldComplexResultFuture
 from .models import FoldModel
-valid_id_pattern = re.compile(r"^[A-Z]{1,5}$|^\d{1,5}$")
-def is_valid_id(id_str: str) -> bool:
-    """
-    Check if the id_str matches the valid pattern for IDs (1-5 uppercase or 1-5 digits).
-    """
-    if not id_str or len(id_str) > 5:
-        return False
-    return bool(valid_id_pattern.fullmatch(id_str))
-def id_generator(used_ids: list[str] | None = None, max_alpha_len=5, max_numeric=99999):
-    """
-    Yields new chain IDs, skipping any in 'used_ids'.
-    First A..Z, AA..ZZ, … up to max_alpha_len, then '1','2',… up to max_numeric.
-    """
-    used = set(tuple(used_ids or []))
-    letters = list(string.ascii_uppercase)
-    # --- Alphabetic IDs ---
-    curr_len = 1
-    curr_indices = [0] * curr_len  # start at 'A'
-    def bump_indices():
-        # lexicographically increment curr_indices; return False on overflow
-        for i in reversed(range(len(curr_indices))):
-            if curr_indices[i] < len(letters) - 1:
-                curr_indices[i] += 1
-                for j in range(i + 1, len(curr_indices)):
-                    curr_indices[j] = 0
-                return True
-        return False
-    while curr_len <= max_alpha_len:
-        candidate = "".join(letters[i] for i in curr_indices)
-        if candidate not in used:
-            used.add(candidate)
-            yield candidate
-        # bump
-        if not bump_indices():
-            curr_len += 1
-            if curr_len > max_alpha_len:
-                break
-            curr_indices = [0] * curr_len
-    # --- Numeric IDs ---
-    num = 1
-    while num <= max_numeric:
-        candidate = str(num)
-        num += 1
-        if candidate not in used:
-            used.add(candidate)
-            yield candidate
-    # exhausted
-    raise RuntimeError("exhausted all possible IDs")
 class BoltzModel(FoldModel):
     """
@@ -97,8 +40,8 @@ class BoltzModel(FoldModel):
         rnas: list[RNA] | None = None,
         ligands: list[Ligand] | None = None,
         diffusion_samples: int = 1,
-        recycling_steps: int = 3,
-        sampling_steps: int = 200,
+        num_recycles: int = 3,
+        num_steps: int = 200,
         step_scale: float = 1.638,
         use_potentials: bool = False,
         constraints: list[dict] | None = None,
@@ -119,9 +62,9 @@ class BoltzModel(FoldModel):
             List of ligands to include in folded output.
         diffusion_samples: int
             Number of diffusion samples to use
-        recycling_steps : int
+        num_recycles : int
             Number of recycling steps to use
-        sampling_steps : int
+        num_steps : int
             Number of sampling steps to use
         step_scale : float
             Scaling factor for diffusion steps.
@@ -133,6 +76,17 @@ class BoltzModel(FoldModel):
         FoldComplexResultFuture
             Future for the folding complex result.
         """
+        # migrate old parameter
+        if (recycling_steps := kwargs.get("recycling_steps")) is not None:
+            num_recycles = recycling_steps
+            warnings.warn(
+                "`recycling_steps` has been updated to `num_recycles`. The parameter will be auto-corrected for now but raise an exception in the future."
+            )
+        if (sampling_steps := kwargs.get("sampling_steps")) is not None:
+            num_steps = sampling_steps
+            warnings.warn(
+                "`sampling_steps` has been updated to `num_steps`. The parameter will be auto-corrected for now but raise an exception in the future."
+            )
         # validate constraints
         if constraints is not None:
             TypeAdapter(list[BoltzConstraint]).validate_python(constraints)
@@ -247,8 +201,8 @@ class BoltzModel(FoldModel):
                 model_id=self.model_id,
                 sequences=sequences,
                 diffusion_samples=diffusion_samples,
-                recycling_steps=recycling_steps,
-                sampling_steps=sampling_steps,
+                num_recycles=num_recycles,
+                num_steps=num_steps,
                 step_scale=step_scale,
                 constraints=constraints,
                 use_potentials=use_potentials,
@@ -276,8 +230,8 @@ class Boltz2Model(BoltzModel, FoldModel):
         rnas: list[RNA] | None = None,
         ligands: list[Ligand] | None = None,
         diffusion_samples: int = 1,
-        recycling_steps: int = 3,
-        sampling_steps: int = 200,
+        num_recycles: int = 3,
+        num_steps: int = 200,
         step_scale: float = 1.638,
         use_potentials: bool = False,
         constraints: list[dict] | None = None,
@@ -300,9 +254,9 @@ class Boltz2Model(BoltzModel, FoldModel):
             List of ligands to include in folded output.
         diffusion_samples: int
             Number of diffusion samples to use
-        recycling_steps : int
+        num_recycles : int
             Number of recycling steps to use
-        sampling_steps : int
+        num_steps : int
             Number of sampling steps to use
         step_scale : float
             Scaling factor for diffusion steps.
@@ -360,8 +314,8 @@ class Boltz2Model(BoltzModel, FoldModel):
             rnas=rnas,
             ligands=ligands,
             diffusion_samples=diffusion_samples,
-            recycling_steps=recycling_steps,
-            sampling_steps=sampling_steps,
+            num_recycles=num_recycles,
+            num_steps=num_steps,
             step_scale=step_scale,
             use_potentials=use_potentials,
             constraints=constraints,
@@ -385,8 +339,8 @@ class Boltz1xModel(BoltzModel, FoldModel):
         rnas: list[RNA] | None = None,
         ligands: list[Ligand] | None = None,
         diffusion_samples: int = 1,
-        recycling_steps: int = 3,
-        sampling_steps: int = 200,
+        num_recycles: int = 3,
+        num_steps: int = 200,
         step_scale: float = 1.638,
         constraints: list[dict] | None = None,
     ) -> FoldComplexResultFuture:
@@ -405,9 +359,9 @@ class Boltz1xModel(BoltzModel, FoldModel):
             List of ligands to include in folded output.
         diffusion_samples: int
             Number of diffusion samples to use
-        recycling_steps : int
+        num_recycles : int
             Number of recycling steps to use
-        sampling_steps : int
+        num_steps : int
             Number of sampling steps to use
         step_scale : float
             Scaling factor for diffusion steps.
@@ -426,8 +380,8 @@ class Boltz1xModel(BoltzModel, FoldModel):
             rnas=rnas,
             ligands=ligands,
             diffusion_samples=diffusion_samples,
-            recycling_steps=recycling_steps,
-            sampling_steps=sampling_steps,
+            num_recycles=num_recycles,
+            num_steps=num_steps,
             step_scale=step_scale,
             use_potentials=True,
             constraints=constraints,
@@ -448,8 +402,8 @@ class Boltz1Model(BoltzModel, FoldModel):
         rnas: list[RNA] | None = None,
         ligands: list[Ligand] | None = None,
         diffusion_samples: int = 1,
-        recycling_steps: int = 3,
-        sampling_steps: int = 200,
+        num_recycles: int = 3,
+        num_steps: int = 200,
         step_scale: float = 1.638,
         use_potentials: bool = False,
         constraints: list[dict] | None = None,
@@ -469,9 +423,9 @@ class Boltz1Model(BoltzModel, FoldModel):
             List of ligands to include in folded output.
         diffusion_samples: int
             Number of diffusion samples to use
-        recycling_steps : int
+        num_recycles : int
             Number of recycling steps to use
-        sampling_steps : int
+        num_steps : int
             Number of sampling steps to use
         step_scale : float
             Scaling factor for diffusion steps.
@@ -492,8 +446,8 @@ class Boltz1Model(BoltzModel, FoldModel):
             rnas=rnas,
             ligands=ligands,
             diffusion_samples=diffusion_samples,
-            recycling_steps=recycling_steps,
-            sampling_steps=sampling_steps,
+            num_recycles=num_recycles,
+            num_steps=num_steps,
             step_scale=step_scale,
             use_potentials=use_potentials,
             constraints=constraints,

openprotein_python-0.8.9/openprotein/fold/complex.py ADDED Viewed

@@ -0,0 +1,60 @@
+import re
+import string
+valid_id_pattern = re.compile(r"^[A-Z]{1,5}$|^\d{1,5}$")
+def is_valid_id(id_str: str) -> bool:
+    """
+    Check if the id_str matches the valid pattern for IDs (1-5 uppercase or 1-5 digits).
+    """
+    if not id_str or len(id_str) > 5:
+        return False
+    return bool(valid_id_pattern.fullmatch(id_str))
+def id_generator(used_ids: list[str] | None = None, max_alpha_len=5, max_numeric=99999):
+    """
+    Yields new chain IDs, skipping any in 'used_ids'.
+    First A..Z, AA..ZZ, … up to max_alpha_len, then '1','2',… up to max_numeric.
+    """
+    used = set(tuple(used_ids or []))
+    letters = list(string.ascii_uppercase)
+    # --- Alphabetic IDs ---
+    curr_len = 1
+    curr_indices = [0] * curr_len  # start at 'A'
+    def bump_indices():
+        # lexicographically increment curr_indices; return False on overflow
+        for i in reversed(range(len(curr_indices))):
+            if curr_indices[i] < len(letters) - 1:
+                curr_indices[i] += 1
+                for j in range(i + 1, len(curr_indices)):
+                    curr_indices[j] = 0
+                return True
+        return False
+    while curr_len <= max_alpha_len:
+        candidate = "".join(letters[i] for i in curr_indices)
+        if candidate not in used:
+            used.add(candidate)
+            yield candidate
+        # bump
+        if not bump_indices():
+            curr_len += 1
+            if curr_len > max_alpha_len:
+                break
+            curr_indices = [0] * curr_len
+    # --- Numeric IDs ---
+    num = 1
+    while num <= max_numeric:
+        candidate = str(num)
+        num += 1
+        if candidate not in used:
+            used.add(candidate)
+            yield candidate
+    # exhausted
+    raise RuntimeError("exhausted all possible IDs")

{openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/fold.py RENAMED Viewed

@@ -7,9 +7,11 @@ from .alphafold2 import AlphaFold2Model
 from .boltz import Boltz1Model, Boltz1xModel, Boltz2Model
 from .esmfold import ESMFoldModel
 from .future import FoldComplexResultFuture, FoldResultFuture
+from .minifold import MiniFoldModel
 from .models import (
     FoldModel,
 )
+from .rosettafold3 import RosettaFold3Model
 class FoldAPI:
@@ -26,11 +28,16 @@ class FoldAPI:
     #: Boltz-1 model
     boltz1: Boltz1Model
     boltz_1: Boltz1Model
-    af2: AlphaFold2Model
     #: AlphaFold-2 model
+    af2: AlphaFold2Model
     alphafold2: AlphaFold2Model
+    #: RosettaFold-3 model
+    rf3: RosettaFold3Model
+    rosettafold_3: RosettaFold3Model
     #: ESMFold model
     esmfold: ESMFoldModel
+    #: MiniFold model
+    minifold: MiniFoldModel
     def __init__(self, session: APISession):
         self.session = session
@@ -45,6 +52,8 @@ class FoldAPI:
         # Setup aliases safely
         if getattr(self, "alphafold2", None):
             self.af2 = self.alphafold2
+        if getattr(self, "rosettafold_3", None):
+            self.rf3 = self.rosettafold_3
         if getattr(self, "boltz_1", None):
             self.boltz1 = self.boltz_1
         if getattr(self, "boltz_1x", None):

{openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/future.py RENAMED Viewed

@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING, Literal
 import numpy as np
+import pandas as pd
 from pydantic.type_adapter import TypeAdapter
 from typing_extensions import Self
@@ -50,14 +51,14 @@ class FoldResultFuture(MappedFuture, Future):
         if metadata is None:
             if job is None or job.job_id is None:
                 raise ValueError("Expected fold metadata or job")
-            metadata = api.fold_get(session, job.job_id)
+            metadata = api.fold_get(session=session, job_id=job.job_id)
         self._metadata = metadata
         if job is None:
             jobs_api = getattr(session, "jobs", None)
             assert isinstance(jobs_api, JobsAPI)
             job = FoldJob.create(jobs_api.get_job(job_id=metadata.job_id))
         if sequences is None:
-            sequences = api.fold_get_sequences(self.session, job_id=job.job_id)
+            sequences = api.fold_get_sequences(session=session, job_id=job.job_id)
         self._sequences = sequences
         super().__init__(session, job, max_workers)
@@ -93,7 +94,11 @@ class FoldResultFuture(MappedFuture, Future):
         else:
             raise ValueError("Expected fold metadata or job")
         model_id = api.fold_get(session=session, job_id=job_id).model_id
-        if model_id.startswith("boltz") or model_id.startswith("alphafold"):
+        if (
+            model_id.startswith("boltz")
+            or model_id.startswith("alphafold")
+            or model_id.startswith("rosettafold")
+        ):
             return FoldComplexResultFuture(session=session, job=job, **kwargs)
         else:
             return cls(session=session, job=job, **kwargs)
@@ -124,7 +129,6 @@ class FoldResultFuture(MappedFuture, Future):
         """
         return self.job.job_id
     @property
     def metadata(self) -> FoldMetadata:
         """The fold metadata."""
@@ -243,6 +247,8 @@ class FoldComplexResultFuture(Future):
         self._pae: np.ndarray | None = None
         self._pde: np.ndarray | None = None
         self._plddt: np.ndarray | None = None
+        self._score: pd.DataFrame | None = None
+        self._metrics: pd.DataFrame | None = None
         self._confidence: list["BoltzConfidence"] | None = None
         self._affinity: "BoltzAffinity | None" = None
@@ -436,6 +442,56 @@ class FoldComplexResultFuture(Future):
             self._plddt = plddt
         return self._plddt
+    @property
+    def score(self) -> pd.DataFrame:
+        """
+        Get the predicted scores.
+        Returns
+        -------
+        pd.DataFrame
+            Structure prediction scores.
+        Raises
+        ------
+        AttributeError
+            If score is not supported for the model.
+        """
+        if self.model_id not in {"rosettafold-3"}:
+            raise AttributeError("score not supported for non-RosettaFold model")
+        if self._score is None:
+            score = api.fold_get_complex_extra_result(
+                session=self.session, job_id=self.job.job_id, key="score"
+            )
+            assert isinstance(score, pd.DataFrame)
+            self._score = score
+        return self._score
+    @property
+    def metrics(self) -> pd.DataFrame:
+        """
+        Get the predicted metrics.
+        Returns
+        -------
+        pd.DataFrame
+            Structure prediction metrics.
+        Raises
+        ------
+        AttributeError
+            If metrics is not supported for the model.
+        """
+        if self.model_id not in {"rosettafold-3"}:
+            raise AttributeError("metrics not supported for non-RosettaFold model")
+        if self._metrics is None:
+            metrics = api.fold_get_complex_extra_result(
+                session=self.session, job_id=self.job.job_id, key="metrics"
+            )
+            assert isinstance(metrics, pd.DataFrame)
+            self._metrics = metrics
+        return self._metrics
     @property
     def confidence(self) -> list["BoltzConfidence"]:
         """

openprotein_python-0.8.9/openprotein/fold/minifold.py ADDED Viewed

@@ -0,0 +1,54 @@
+from collections.abc import Sequence
+from openprotein.base import APISession
+from openprotein.common import ModelMetadata
+from . import api
+from .future import FoldResultFuture
+from .models import FoldModel
+class MiniFoldModel(FoldModel):
+    """
+    Class providing inference endpoints for MiniFold.
+    """
+    model_id: str = "minifold"
+    def __init__(
+        self,
+        session: APISession,
+        model_id: str,
+        metadata: ModelMetadata | None = None,
+    ):
+        super().__init__(session=session, model_id=model_id, metadata=metadata)
+    def fold(
+        self, sequences: Sequence[bytes | str], num_recycles: int | None = None
+    ) -> FoldResultFuture:
+        """
+        Fold sequences using this model.
+        Parameters
+        ----------
+        sequences : Sequence[bytes | str]
+            sequences to fold
+        num_recycles : int | None
+            number of times to recycle models
+        Returns
+        -------
+            FoldResultFuture
+        """
+        sequences = [s.decode() if isinstance(s, bytes) else s for s in sequences]
+        assert all(":" not in s for s in sequences), "minifold does not support ':'"
+        result = FoldResultFuture.create(
+            session=self.session,
+            job=api.fold_models_post(
+                session=self.session,
+                model_id=self.model_id,
+                sequences=sequences,
+                num_recycles=num_recycles,
+            ),
+        )
+        assert isinstance(result, FoldResultFuture)
+        return result

openprotein_python-0.8.9/openprotein/fold/rosettafold3.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""Community-based RosettaFold3 models for complex structure prediction with ligands/dna/rna."""
+from typing import Any
+from pydantic import BaseModel, Field, TypeAdapter, model_validator
+from openprotein.align import AlignAPI, MSAFuture
+from openprotein.base import APISession
+from openprotein.chains import Ligand
+from openprotein.common import ModelMetadata
+from openprotein.protein import Protein
+from . import api
+from .complex import id_generator
+from .future import FoldComplexResultFuture
+from .models import FoldModel
+class RosettaFold3Model(FoldModel):
+    """
+    Class providing inference endpoints for RosettaFold-3 structure prediction model.
+    """
+    model_id: str = "rosettafold-3"
+    def __init__(
+        self,
+        session: APISession,
+        model_id: str,
+        metadata: ModelMetadata | None = None,
+    ):
+        super().__init__(session, model_id, metadata)
+    def fold(
+        self,
+        proteins: list[Protein] | MSAFuture | None = None,
+        ligands: list[Ligand] | None = None,
+        diffusion_samples: int = 1,
+        num_recycles: int = 10,
+        num_steps: int = 50,
+        **kwargs,
+    ) -> FoldComplexResultFuture:
+        """
+        Request structure prediction with RosettaFold-3 model.
+        Parameters
+        ----------
+        proteins : List[Protein] | MSAFuture | None
+            List of protein sequences to include in folded output. `Protein` objects must be tagged with an `msa`, which can be a `Protein.single_sequence_mode` for single sequence mode. Alternatively, supply an `MSAFuture` to use all query sequences as a multimer.
+        ligands : List[Ligand] | None
+            List of ligands to include in folded output.
+        diffusion_samples: int
+            Number of diffusion samples to use
+        num_recycles : int
+            Number of recycling steps to use
+        num_steps : int
+            Number of sampling steps to use
+        Returns
+        -------
+        FoldComplexResultFuture
+            Future for the folding complex result.
+        """
+        # collate the id's used
+        used_ids = []
+        if isinstance(proteins, list):
+            for protein in proteins:
+                if isinstance(protein, Protein) and protein.chain_id is not None:
+                    if isinstance(protein.chain_id, str):
+                        used_ids.append(protein.chain_id)
+                    elif isinstance(protein.chain_id, list):
+                        used_ids.extend(protein.chain_id)
+        for ligand in ligands or []:
+            if isinstance(ligand.chain_id, str):
+                used_ids.append(ligand.chain_id)
+            elif isinstance(ligand.chain_id, list):
+                used_ids.extend(ligand.chain_id)
+        id_gen = id_generator(used_ids)
+        # build the proteins from msa
+        if isinstance(proteins, MSAFuture):
+            align_api = getattr(self.session, "align", None)
+            assert isinstance(align_api, AlignAPI)
+            msa = proteins  # rename
+            proteins = []  # convert back to list of proteins
+            seed = align_api.get_seed(job_id=msa.job.job_id)
+            query_seqs_cardinality: dict[str, int] = dict()
+            for seq in seed.split(":"):
+                query_seqs_cardinality[seq] = query_seqs_cardinality.get(seq, 0) + 1
+            for seq, card in query_seqs_cardinality.items():
+                protein = Protein(sequence=seq)
+                if card == 1:
+                    id = next(id_gen)
+                else:
+                    id = [next(id_gen) for _ in range(card)]
+                protein.chain_id = id
+                protein.msa = msa
+                proteins.append(protein)
+        # build the sequences input
+        sequences: list[dict[str, Any]] = []
+        for protein in proteins or []:
+            # check the msa
+            msa = protein.msa
+            if msa is None:
+                raise ValueError(
+                    "Expected all protein sequences to have `.msa` set with an `MSAFuture` or `Protein.single_sequence_mode` for single sequence mode."
+                )
+            # convert to msa id or null for single sequence mode
+            msa_id = (
+                msa
+                if isinstance(msa, str)
+                else msa.id if isinstance(msa, MSAFuture) else None
+            )
+            # add the protein in the expected format
+            p = {
+                "id": protein.chain_id or next(id_gen),
+                "msa_id": msa_id,
+                "sequence": protein.sequence.decode(),
+            }
+            if protein.cyclic:
+                p["cyclic"] = protein.cyclic
+            sequences.append({"protein": p})
+        for ligand in ligands or []:
+            ligand_: dict = {"id": ligand.chain_id or next(id_gen)}
+            if ligand.ccd:
+                ligand_["ccd"] = ligand.ccd
+            if ligand.smiles:
+                ligand_["smiles"] = ligand.smiles
+            sequences.append({"ligand": ligand_})
+        if len(sequences) == 0:
+            raise ValueError("Expected proteins or ligands")
+        return FoldComplexResultFuture.create(
+            session=self.session,
+            job=api.fold_models_post(
+                session=self.session,
+                model_id=self.model_id,
+                sequences=sequences,
+                diffusion_samples=diffusion_samples,
+                num_recycles=num_recycles,
+                num_steps=num_steps,
+                **kwargs,
+            ),
+            model_id=self.model_id,
+            proteins=proteins,
+            ligands=ligands,
+        )