PyPI - openprotein-python - Versions diffs - 0.8.2__1-py3-none-any.whl - Mend

openprotein-python 0.8.2__1-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

openprotein/__init__.py +164 -0
openprotein/_version.py +48 -0
openprotein/align/__init__.py +8 -0
openprotein/align/align.py +395 -0
openprotein/align/api.py +428 -0
openprotein/align/future.py +55 -0
openprotein/align/msa.py +129 -0
openprotein/align/schemas.py +165 -0
openprotein/base.py +181 -0
openprotein/chains.py +88 -0
openprotein/common/__init__.py +5 -0
openprotein/common/features.py +7 -0
openprotein/common/model_metadata.py +33 -0
openprotein/common/reduction.py +8 -0
openprotein/config.py +9 -0
openprotein/csv.py +31 -0
openprotein/data/__init__.py +9 -0
openprotein/data/api.py +218 -0
openprotein/data/assaydataset.py +178 -0
openprotein/data/data.py +93 -0
openprotein/data/schemas.py +27 -0
openprotein/design/__init__.py +16 -0
openprotein/design/api.py +259 -0
openprotein/design/design.py +125 -0
openprotein/design/future.py +146 -0
openprotein/design/schemas.py +607 -0
openprotein/embeddings/__init__.py +27 -0
openprotein/embeddings/api.py +619 -0
openprotein/embeddings/embeddings.py +151 -0
openprotein/embeddings/esm.py +33 -0
openprotein/embeddings/future.py +146 -0
openprotein/embeddings/models.py +421 -0
openprotein/embeddings/openprotein.py +21 -0
openprotein/embeddings/poet.py +446 -0
openprotein/embeddings/poet2.py +505 -0
openprotein/embeddings/schemas.py +78 -0
openprotein/errors.py +76 -0
openprotein/fasta.py +92 -0
openprotein/fold/__init__.py +21 -0
openprotein/fold/alphafold2.py +131 -0
openprotein/fold/api.py +287 -0
openprotein/fold/boltz.py +691 -0
openprotein/fold/esmfold.py +54 -0
openprotein/fold/fold.py +107 -0
openprotein/fold/future.py +509 -0
openprotein/fold/models.py +139 -0
openprotein/fold/schemas.py +39 -0
openprotein/jobs/__init__.py +9 -0
openprotein/jobs/api.py +71 -0
openprotein/jobs/futures.py +746 -0
openprotein/jobs/jobs.py +69 -0
openprotein/jobs/schemas.py +135 -0
openprotein/models/__init__.py +4 -0
openprotein/models/base.py +63 -0
openprotein/models/foundation/rfdiffusion.py +283 -0
openprotein/models/models.py +33 -0
openprotein/predictor/__init__.py +25 -0
openprotein/predictor/api.py +384 -0
openprotein/predictor/models.py +374 -0
openprotein/predictor/prediction.py +79 -0
openprotein/predictor/predictor.py +242 -0
openprotein/predictor/schemas.py +113 -0
openprotein/predictor/validate.py +40 -0
openprotein/prompt/__init__.py +9 -0
openprotein/prompt/api.py +505 -0
openprotein/prompt/models.py +142 -0
openprotein/prompt/prompt.py +130 -0
openprotein/prompt/schemas.py +49 -0
openprotein/protein.py +587 -0
openprotein/svd/__init__.py +9 -0
openprotein/svd/api.py +206 -0
openprotein/svd/models.py +288 -0
openprotein/svd/schemas.py +31 -0
openprotein/svd/svd.py +134 -0
openprotein/umap/__init__.py +9 -0
openprotein/umap/api.py +259 -0
openprotein/umap/models.py +211 -0
openprotein/umap/schemas.py +35 -0
openprotein/umap/umap.py +175 -0
openprotein/utils/uuid.py +29 -0
openprotein_python-0.8.2.dist-info/METADATA +176 -0
openprotein_python-0.8.2.dist-info/RECORD +84 -0
openprotein_python-0.8.2.dist-info/WHEEL +4 -0
openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0

openprotein/embeddings/poet.py ADDED Viewed

@@ -0,0 +1,446 @@
+"""Original PoET model handling various protein engineering tasks."""
+from typing import TYPE_CHECKING
+from openprotein.base import APISession
+from openprotein.common import ModelMetadata, ReductionType
+from openprotein.data import AssayDataset, AssayMetadata
+from openprotein.prompt import Prompt
+from . import api
+from .future import (
+    EmbeddingsGenerateFuture,
+    EmbeddingsResultFuture,
+    EmbeddingsScoreFuture,
+)
+from .models import EmbeddingModel
+if TYPE_CHECKING:
+    from openprotein.predictor import PredictorModel
+    from openprotein.svd import SVDModel
+    from openprotein.umap import UMAPModel
+class PoETModel(EmbeddingModel):
+    """
+    Class for OpenProtein's foundation model PoET.
+    Note
+    ----
+    PoET functions are dependent on a prompt supplied via the prompt endpoints.
+    Examples
+    --------
+    View specific model details (including supported tokens) with the `?` operator.
+        >>> import openprotein
+        >>> session = openprotein.connect(username="user", password="password")
+        >>> session.embedding.poet.<embeddings_method>
+    """
+    model_id = "poet"
+    # TODO - Add model to explicitly require prompt_id
+    def __init__(
+        self,
+        session: APISession,
+        model_id: str,
+        metadata: ModelMetadata | None = None,
+    ):
+        super().__init__(session=session, model_id=model_id, metadata=metadata)
+    def embed(
+        self,
+        sequences: list[bytes],
+        prompt: str | Prompt | None = None,
+        reduction: ReductionType | None = ReductionType.MEAN,
+        **kwargs,
+    ) -> EmbeddingsResultFuture:
+        """
+        Embed sequences using the PoET model.
+        Parameters
+        ----------
+        sequences : list of bytes
+            Sequences to embed.
+        prompt : str or Prompt or None, optional
+            Prompt from an align workflow to condition the PoET model.
+        reduction : ReductionType or None, optional
+            Embeddings reduction to use (e.g., mean). Default is ReductionType.MEAN.
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        EmbeddingsResultFuture
+            Future object that returns the embeddings of the submitted sequences.
+        """
+        if prompt is None:
+            prompt_id = None
+        else:
+            prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        return super().embed(
+            sequences=sequences,
+            reduction=reduction,
+            prompt_id=prompt_id,
+            **kwargs,
+        )
+    def logits(
+        self,
+        sequences: list[bytes],
+        prompt: str | Prompt | None = None,
+        **kwargs,
+    ) -> EmbeddingsResultFuture:
+        """
+        Compute logits for sequences using the PoET model.
+        Parameters
+        ----------
+        sequences : list of bytes
+            Sequences to analyze.
+        prompt : str or Prompt or None, optional
+            Prompt from an align workflow to condition the PoET model.
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        EmbeddingsResultFuture
+            Future object that returns the logits of the submitted sequences.
+        """
+        if prompt is None:
+            prompt_id = None
+        else:
+            prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        return super().logits(sequences=sequences, prompt_id=prompt_id, **kwargs)
+    def attn(self):
+        """
+        Attention is not available for PoET.
+        Raises
+        ------
+        ValueError
+            Always raised, as attention is not supported for PoET.
+        :meta private:
+        """
+        raise ValueError("Attn not yet supported for poet")
+    def score(
+        self,
+        sequences: list[bytes],
+        prompt: str | Prompt | None = None,
+        **kwargs,
+    ) -> EmbeddingsScoreFuture:
+        """
+        Score query sequences using the specified prompt.
+        Parameters
+        ----------
+        sequences : list of bytes
+            Sequences to score.
+        prompt : str or Prompt or None, optional
+            Prompt from an align workflow to condition the PoET model.
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        EmbeddingsScoreFuture
+            Future object that returns the scores of the submitted sequences.
+        """
+        if prompt is None:
+            prompt_id = None
+        else:
+            prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        return EmbeddingsScoreFuture.create(
+            session=self.session,
+            job=api.request_score_post(
+                session=self.session,
+                model_id=self.id,
+                prompt_id=prompt_id,
+                sequences=sequences,
+                **kwargs,
+            ),
+        )
+    def indel(
+        self,
+        sequence: bytes,
+        prompt: str | Prompt | None = None,
+        insert: str | None = None,
+        delete: list[int] | None = None,
+        **kwargs,
+    ) -> EmbeddingsScoreFuture:
+        """
+        Score all indels of the query sequence using the specified prompt.
+        Parameters
+        ----------
+        sequence : bytes
+            Sequence to analyze.
+        prompt : str or Prompt or None, optional
+            Prompt from an align workflow to condition the PoET model.
+        insert : str or None, optional
+            Insertion fragment at each site.
+        delete : list of int or None, optional
+            Range of size of fragment to delete at each site.
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        EmbeddingsScoreFuture
+            Future object that returns the scores of the indel-ed sequence.
+        Raises
+        ------
+        ValueError
+            If neither insert nor delete is provided.
+        """
+        if not insert and not delete:
+            raise ValueError("Expected insert and/or delete to be provided")
+        if prompt is None:
+            prompt_id = None
+        else:
+            prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        return EmbeddingsScoreFuture.create(
+            session=self.session,
+            job=api.request_score_indel_post(
+                session=self.session,
+                model_id=self.id,
+                base_sequence=sequence,
+                prompt_id=prompt_id,
+                insert=insert,
+                delete=delete,
+                **kwargs,
+            ),
+        )
+    def single_site(
+        self,
+        sequence: bytes,
+        prompt: str | Prompt | None = None,
+        **kwargs,
+    ) -> EmbeddingsScoreFuture:
+        """
+        Score all single substitutions of the query sequence using the specified prompt.
+        Parameters
+        ----------
+        sequence : bytes
+            Sequence to analyze.
+        prompt : str or Prompt or None, optional
+            Prompt from an align workflow to condition the PoET model.
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        EmbeddingsScoreFuture
+            Future object that returns the scores of the mutated sequence.
+        """
+        if prompt is None:
+            prompt_id = None
+        else:
+            prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        return EmbeddingsScoreFuture.create(
+            session=self.session,
+            job=api.request_score_single_site_post(
+                session=self.session,
+                model_id=self.id,
+                base_sequence=sequence,
+                prompt_id=prompt_id,
+                **kwargs,
+            ),
+        )
+    def generate(
+        self,
+        prompt: str | Prompt,
+        num_samples: int = 100,
+        temperature: float = 1.0,
+        topk: float | None = None,
+        topp: float | None = None,
+        max_length: int = 1000,
+        seed: int | None = None,
+        **kwargs,
+    ) -> EmbeddingsGenerateFuture:
+        """
+        Generate protein sequences conditioned on a prompt.
+        Parameters
+        ----------
+        prompt : str or Prompt
+            Prompt from an align workflow to condition the PoET model.
+        num_samples : int, optional
+            Number of samples to generate. Default is 100.
+        temperature : float, optional
+            Temperature for sampling. Higher values produce more random outputs. Default is 1.0.
+        topk : float or None, optional
+            Number of top-k residues to consider during sampling. Default is None.
+        topp : float or None, optional
+            Cumulative probability threshold for top-p sampling. Default is None.
+        max_length : int, optional
+            Maximum length of generated proteins. Default is 1000.
+        seed : int or None, optional
+            Seed for random number generation. Default is None.
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        EmbeddingsGenerateFuture
+            Future object representing the status and information about the generation job.
+        """
+        prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        return EmbeddingsGenerateFuture.create(
+            session=self.session,
+            job=api.request_generate_post(
+                session=self.session,
+                model_id=self.id,
+                num_samples=num_samples,
+                temperature=temperature,
+                topk=topk,
+                topp=topp,
+                max_length=max_length,
+                random_seed=seed,
+                prompt_id=prompt_id,
+                **kwargs,
+            ),
+        )
+    def fit_svd(
+        self,
+        prompt: str | Prompt | None = None,
+        sequences: list[bytes] | list[str] | None = None,
+        assay: AssayDataset | None = None,
+        n_components: int = 1024,
+        reduction: ReductionType | None = None,
+        **kwargs,
+    ) -> "SVDModel":
+        """
+        Fit an SVD on the embedding results of PoET.
+        This function creates an SVDModel based on the embeddings from this model
+        as well as the hyperparameters specified in the arguments.
+        Parameters
+        ----------
+        prompt : str or Prompt or None, optional
+            Prompt from an align workflow to condition the PoET model.
+        sequences : list of bytes or list of str or None, optional
+            Sequences to use for SVD.
+        assay : AssayDataset or None, optional
+            Assay dataset to use for SVD.
+        n_components : int, optional
+            Number of components in SVD. Determines output shapes. Default is 1024.
+        reduction : ReductionType or None, optional
+            Embeddings reduction to use (e.g., mean).
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        SVDModel
+            Future that represents the fitted SVD model.
+        """
+        if prompt is None:
+            prompt_id = None
+        else:
+            prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        return super().fit_svd(
+            sequences=sequences,
+            assay=assay,
+            n_components=n_components,
+            reduction=reduction,
+            prompt_id=prompt_id,
+            **kwargs,
+        )
+    def fit_umap(
+        self,
+        prompt: str | Prompt | None = None,
+        sequences: list[bytes] | list[str] | None = None,
+        assay: AssayDataset | None = None,
+        n_components: int = 2,
+        reduction: ReductionType | None = ReductionType.MEAN,
+        **kwargs,
+    ) -> "UMAPModel":
+        """
+        Fit a UMAP on assay using PoET and hyperparameters.
+        This function creates a UMAP based on the embeddings from this PoET model
+        as well as the hyperparameters specified in the arguments.
+        Parameters
+        ----------
+        prompt : str or Prompt or None, optional
+            Prompt from an align workflow to condition the PoET model.
+        sequences : list of bytes or list of str or None, optional
+            Optional sequences to fit UMAP with. Either use sequences or assay. Sequences is preferred.
+        assay : AssayDataset or None, optional
+            Optional assay containing sequences to fit UMAP with. Either use sequences or assay. Ignored if sequences are provided.
+        n_components : int, optional
+            Number of components in UMAP fit. Determines output shapes. Default is 2.
+        reduction : ReductionType or None, optional
+            Embeddings reduction to use (e.g., mean). Default is ReductionType.MEAN.
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        UMAPModel
+            Future that represents the fitted UMAP model.
+        """
+        if prompt is None:
+            prompt_id = None
+        else:
+            prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        return super().fit_umap(
+            sequences=sequences,
+            assay=assay,
+            n_components=n_components,
+            reduction=reduction,
+            prompt_id=prompt_id,
+            **kwargs,
+        )
+    def fit_gp(
+        self,
+        assay: AssayMetadata | AssayDataset | str,
+        properties: list[str],
+        prompt: str | Prompt | None = None,
+        **kwargs,
+    ) -> "PredictorModel":
+        """
+        Fit a Gaussian Process (GP) on assay using this embedding model and hyperparameters.
+        Parameters
+        ----------
+        assay : AssayMetadata or AssayDataset or str
+            Assay to fit GP on.
+        properties : list of str
+            Properties in the assay to fit the GP on.
+        prompt : str or Prompt or None, optional
+            Prompt from an align workflow to condition the PoET model.
+        **kwargs
+            Additional keyword arguments.
+        Returns
+        -------
+        PredictorModel
+            Future that represents the trained predictor model.
+        """
+        if prompt is None:
+            prompt_id = None
+        else:
+            prompt_id = prompt if isinstance(prompt, str) else prompt.id
+        return super().fit_gp(
+            assay=assay,
+            properties=properties,
+            prompt_id=prompt_id,
+            **kwargs,
+        )