PyPI - openprotein-python - Versions diffs - 0.8.1__tar.gz → 0.8.2__tar.gz - Mend

openprotein-python 0.8.1tar.gz → 0.8.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openprotein-python
-Version: 0.8.1
+Version: 0.8.2
 Summary: OpenProtein Python interface.
 Author-email: Mark Gee <markgee@ne47.bio>, "Timothy Truong Jr." <ttruong@ne47.bio>, Tristan Bepler <tbepler@ne47.bio>
 License-Expression: MIT

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/__init__.py RENAMED Viewed

@@ -17,6 +17,7 @@ from openprotein.align import AlignAPI
 from openprotein.prompt import PromptAPI
 from openprotein.embeddings import EmbeddingsAPI
 from openprotein.fold import FoldAPI
+from openprotein.models import ModelsAPI
 from openprotein.svd import SVDAPI
 from openprotein.umap import UMAPAPI
 from openprotein.predictor import PredictorAPI
@@ -40,6 +41,7 @@ class OpenProtein(APISession):
     _fold = None
     _predictor = None
     _design = None
+    _models = None
     def wait(self, future: Future, *args, **kwargs):
         return future.wait(*args, **kwargs)
@@ -149,5 +151,14 @@ class OpenProtein(APISession):
             self._fold = FoldAPI(self)
         return self._fold
+    @property
+    def models(self) -> "ModelsAPI":
+        """
+        The models submodule provides a unified entry point to all protein models.
+        """
+        if self._models is None:
+            self._models = ModelsAPI(self)
+        return self._models
 connect = OpenProtein

openprotein_python-0.8.2/openprotein/_version.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Compute the version number and store it in the `__version__` variable.
+Based on <https://github.com/maresb/hatch-vcs-footgun-example>.
+"""
+def _get_hatch_version():
+    """Compute the most up-to-date version number in a development environment.
+    Returns `None` if Hatchling is not installed, e.g. in a production environment.
+    For more details, see <https://github.com/maresb/hatch-vcs-footgun-example/>.
+    """
+    import os
+    try:
+        from hatchling.metadata.core import ProjectMetadata
+        from hatchling.plugin.manager import PluginManager
+        from hatchling.utils.fs import locate_file
+    except ImportError:
+        # Hatchling is not installed, so probably we are not in
+        # a development environment.
+        return None
+    pyproject_toml = locate_file(__file__, "pyproject.toml")
+    if pyproject_toml is None:
+        raise RuntimeError("pyproject.toml not found although hatchling is installed")
+    root = os.path.dirname(pyproject_toml)
+    metadata = ProjectMetadata(root=root, plugin_manager=PluginManager())
+    # Version can be either statically set in pyproject.toml or computed dynamically:
+    return metadata.core.version or metadata.hatch.version.cached
+def _get_importlib_metadata_version():
+    """Compute the version number using importlib.metadata.
+    This is the official Pythonic way to get the version number of an installed
+    package. However, it is only updated when a package is installed. Thus, if a
+    package is installed in editable mode, and a different version is checked out,
+    then the version number will not be updated.
+    """
+    from importlib.metadata import version
+    __version__ = version(__package__)  # type: ignore
+    return __version__
+__version__ = _get_hatch_version() or _get_importlib_metadata_version()

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/align/msa.py RENAMED Viewed

@@ -22,33 +22,6 @@ from .schemas import (
 class MSAFuture(AlignFuture, Future):
     """
     Represents a future for MSA (Multiple Sequence Alignment) results.
-    Parameters
-    ----------
-    session : APISession
-        An instance of APISession for API interactions.
-    job : MSAJob
-        The MSA job.
-    page_size : int, optional
-        The number of results to fetch in a single page. Defaults to config.POET_PAGE_SIZE.
-    Attributes
-    ----------
-    session : APISession
-        An instance of APISession for API interactions.
-    job : MSAJob | MafftJob | ClustalOJob | AbNumberJob
-        The MSA job.
-    page_size : int
-        The number of results to fetch in a single page.
-    msa_id : str
-        The job ID for the MSA.
-    Methods
-    -------
-    get(verbose=False)
-        Retrieve the MSA of the job as an iterator over CSV rows.
-    sample_prompt(...)
-        Create a protein sequence prompt from the linked MSA for PoET Jobs.
     """
     job: MSAJob | MafftJob | ClustalOJob | AbNumberJob

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/base.py RENAMED Viewed

@@ -27,7 +27,7 @@ class BearerAuth(requests.auth.AuthBase):
         self.token = token
     def __call__(self, r):
-        r.headers["authorization"] = "Bearer " + self.token
+        r.headers["Authorization"] = "Bearer " + self.token
         return r

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/embeddings/embeddings.py RENAMED Viewed

@@ -43,16 +43,24 @@ class EmbeddingsAPI:
     # added for static typing, eg pylance, for autocomplete
     # at init these are all overwritten.
+    #: PoET-2 model
+    poet2: PoET2Model
+    #: PoET model
+    poet: PoETModel
+    #: Prot-seq model
     prot_seq: OpenProteinModel
+    #: Rotaprot model trained on UniRef50
     rotaprot_large_uniref50w: OpenProteinModel
+    #: Rotaprot model trained on UniRef90
     rotaprot_large_uniref90_ft: OpenProteinModel
-    poet: PoETModel
     poet_2: PoET2Model
-    poet2: PoET2Model
+    #: ESM1b model
     esm1b: ESMModel  # alias
     esm1b_t33_650M_UR50S: ESMModel
+    #: ESM1v model
     esm1v: ESMModel  # alias
     esm1v_t33_650M_UR90S_1: ESMModel
     esm1v_t33_650M_UR90S_2: ESMModel
@@ -60,6 +68,7 @@ class EmbeddingsAPI:
     esm1v_t33_650M_UR90S_4: ESMModel
     esm1v_t33_650M_UR90S_5: ESMModel
+    #: ESM2 model
     esm2: ESMModel  # alias
     esm2_t12_35M_UR50D: ESMModel
     esm2_t30_150M_UR50D: ESMModel

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/embeddings/esm.py RENAMED Viewed

@@ -1,9 +1,9 @@
 """Community-based ESM models."""
-from .models import EmbeddingModel
+from .models import AttnModel, EmbeddingModel
-class ESMModel(EmbeddingModel):
+class ESMModel(AttnModel, EmbeddingModel):
     """
     Class providing inference endpoints for Facebook's ESM protein language models.
@@ -13,9 +13,9 @@ class ESMModel(EmbeddingModel):
     .. code-block:: python
-    >>> import openprotein
-    >>> session = openprotein.connect(username="user", password="password")
-    >>> session.embedding.esm2_t12_35M_UR50D?
+        >>> import openprotein
+        >>> session = openprotein.connect(username="user", password="password")
+        >>> session.embedding.esm2_t12_35M_UR50D?
     """
     model_id = [

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/embeddings/future.py RENAMED Viewed

@@ -63,7 +63,15 @@ class EmbeddingsResultFuture(MappedFuture, Future):
     def id(self):
         return self.job.job_id
-    def keys(self):
+    def __keys__(self):
+        """
+        Get the list of sequences submitted for the embed request.
+        Returns
+        -------
+        list of bytes
+            List of sequences.
+        """
         return self.sequences
     def get_item(self, sequence: bytes) -> np.ndarray:

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/embeddings/models.py RENAMED Viewed

@@ -17,9 +17,10 @@ if TYPE_CHECKING:
 class EmbeddingModel:
+    """Base embeddings model used to understand and provide embeddings from sequences."""
     # overridden by subclasses
-    # used to get correct emb model
+    # used to get correct emb model during factory create
     model_id: list[str] | str = "protembed"
     def __init__(
@@ -78,9 +79,9 @@ class EmbeddingModel:
             The API session to use.
         model_id : str
             The model identifier.
-        default : type[EmbeddingModel] or None, optional
+        default : type variable of EmbeddingModel or None, optional
             Default EmbeddingModel subclass to use if no match is found.
-        **kwargs : dict, optional
+        kwargs :
             Additional keyword arguments to pass to the model constructor.
         Returns
@@ -149,8 +150,8 @@ class EmbeddingModel:
             Sequences to embed.
         reduction : ReductionType or None, optional
             Reduction to use (e.g. mean). Defaults to mean embedding.
-        **kwargs : dict, optional
-            Additional keyword arguments to pass to the embedding request.
+        kwargs:
+            Additional keyword arguments to be used from foundational models, e.g. prompt_id for PoET models.
         Returns
         -------
@@ -179,8 +180,8 @@ class EmbeddingModel:
         ----------
         sequences : list of bytes or list of str
             Sequences to compute logits for.
-        **kwargs : dict, optional
-            Additional keyword arguments to pass to the logits request.
+        kwargs :
+            Additional keyword arguments to be used from foundational models, e.g. prompt_id for PoET models.
         Returns
         -------
@@ -195,32 +196,6 @@ class EmbeddingModel:
             sequences=sequences,
         )
-    def attn(
-        self, sequences: list[bytes] | list[str], **kwargs
-    ) -> EmbeddingsResultFuture:
-        """
-        Compute attention embeddings for sequences using this model.
-        Parameters
-        ----------
-        sequences : list of bytes or list of str
-            Sequences to compute attention embeddings for.
-        **kwargs : dict, optional
-            Additional keyword arguments to pass to the attention request.
-        Returns
-        -------
-        EmbeddingsResultFuture
-            Future object representing the attention result.
-        """
-        return EmbeddingsResultFuture.create(
-            session=self.session,
-            job=api.request_attn_post(
-                session=self.session, model_id=self.id, sequences=sequences, **kwargs
-            ),
-            sequences=sequences,
-        )
     def fit_svd(
         self,
         sequences: list[bytes] | list[str] | None = None,
@@ -245,8 +220,8 @@ class EmbeddingModel:
             Number of components in SVD. Determines output shapes. Default is 1024.
         reduction : ReductionType or None, optional
             Embeddings reduction to use (e.g. mean).
-        **kwargs : dict, optional
-            Additional keyword arguments to pass to the SVD fitting.
+        kwargs :
+            Additional keyword arguments to be used from foundational models, e.g. prompt_id for PoET models.
         Returns
         -------
@@ -261,7 +236,7 @@ class EmbeddingModel:
         # local import for cyclic dep
         from openprotein.svd import SVDAPI
-        svd_api = getattr(self.session, "data", None)
+        svd_api = getattr(self.session, "svd", None)
         assert isinstance(svd_api, SVDAPI)
         # Ensure either or
@@ -273,10 +248,9 @@ class EmbeddingModel:
             )
         model_id = self.id
         return svd_api.fit_svd(
-            session=self.session,
             model_id=model_id,
             sequences=sequences,
-            assay_id=assay.id if assay is not None else None,
+            assay=assay,
             n_components=n_components,
             reduction=reduction,
             **kwargs,
@@ -306,8 +280,8 @@ class EmbeddingModel:
             Number of components in UMAP fit. Determines output shapes. Default is 2.
         reduction : ReductionType or None, optional
             Embeddings reduction to use (e.g. mean). Defaults to MEAN.
-        **kwargs : dict, optional
-            Additional keyword arguments to pass to the UMAP fitting.
+        kwargs :
+            Additional keyword arguments to be used from foundational models, e.g. prompt_id for PoET models.
         Returns
         -------
@@ -322,9 +296,8 @@ class EmbeddingModel:
         # local import for cyclic dep
         from openprotein.umap import UMAPAPI
-        umap_api = UMAPAPI(
-            session=self.session,
-        )
+        umap_api = getattr(self.session, "umap", None)
+        assert isinstance(umap_api, UMAPAPI)
         # Ensure either or
         if (assay is None and sequences is None) or (
@@ -335,7 +308,6 @@ class EmbeddingModel:
             )
         model_id = self.id
         return umap_api.fit_umap(
-            session=self.session,
             model_id=model_id,
             feature_type=FeatureType.PLM,
             sequences=sequences,
@@ -369,8 +341,8 @@ class EmbeddingModel:
             Optional name for the predictor model.
         description : str or None, optional
             Optional description for the predictor model.
-        **kwargs : dict, optional
-            Additional keyword arguments to pass to the GP fitting.
+        kwargs :
+            Additional keyword arguments to be used from foundational models, e.g. prompt_id for PoET models.
         Returns
         -------
@@ -391,11 +363,9 @@ class EmbeddingModel:
         predictor_api = getattr(self.session, "predictor", None)
         assert isinstance(predictor_api, PredictorAPI)
-        model_id = self.id
         # get assay if str
         assay = data_api.get(assay_id=assay) if isinstance(assay, str) else assay
         # extract assay_id
-        assay_id = assay.assay_id if isinstance(assay, AssayMetadata) else assay.id
         if len(properties) == 0:
             raise InvalidParameterError("Expected (at-least) 1 property to train")
         if not set(properties) <= set(assay.measurement_names):
@@ -410,12 +380,42 @@ class EmbeddingModel:
         # inject into predictor api
         return predictor_api.fit_gp(
-            assay_id=assay_id,
+            assay=assay,
             properties=properties,
             feature_type=FeatureType.PLM,
-            model_id=model_id,
+            model=self,
             reduction=reduction,
             name=name,
             description=description,
             **kwargs,
         )
+class AttnModel(EmbeddingModel):
+    """Embeddings model that provides attention computation."""
+    def attn(
+        self, sequences: list[bytes] | list[str], **kwargs
+    ) -> EmbeddingsResultFuture:
+        """
+        Compute attention embeddings for sequences using this model.
+        Parameters
+        ----------
+        sequences : list of bytes or list of str
+            Sequences to compute attention embeddings for.
+        kwargs :
+            Additional keyword arguments to be used from foundational models.
+        Returns
+        -------
+        EmbeddingsResultFuture
+            Future object representing the attention result.
+        """
+        return EmbeddingsResultFuture.create(
+            session=self.session,
+            job=api.request_attn_post(
+                session=self.session, model_id=self.id, sequences=sequences, **kwargs
+            ),
+            sequences=sequences,
+        )

openprotein_python-0.8.2/openprotein/embeddings/openprotein.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""OpenProtein-proprietary models."""
+from .models import AttnModel, EmbeddingModel
+class OpenProteinModel(AttnModel, EmbeddingModel):
+    """
+    Proprietary protein embedding models served by OpenProtein.
+    Examples
+    --------
+    View specific model details (inc supported tokens) with the `?` operator.
+    .. code-block:: python
+        >>> import openprotein
+        >>> session = openprotein.connect(username="user", password="password")
+        >>> session.embedding.prot_seq?
+    """
+    model_id = ["prot-seq", "rotaprot-large-uniref50w", "rotaprot_large_uniref90_ft"]

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/embeddings/poet.py RENAMED Viewed

@@ -33,9 +33,9 @@ class PoETModel(EmbeddingModel):
     --------
     View specific model details (including supported tokens) with the `?` operator.
-    >>> import openprotein
-    >>> session = openprotein.connect(username="user", password="password")
-    >>> session.embedding.poet.<embeddings_method>
+        >>> import openprotein
+        >>> session = openprotein.connect(username="user", password="password")
+        >>> session.embedding.poet.<embeddings_method>
     """
     model_id = "poet"
@@ -113,7 +113,7 @@ class PoETModel(EmbeddingModel):
             prompt_id = None
         else:
             prompt_id = prompt if isinstance(prompt, str) else prompt.id
-        return super().logits(sequences=sequences, prompt_id=prompt_id)
+        return super().logits(sequences=sequences, prompt_id=prompt_id, **kwargs)
     def attn(self):
         """
@@ -123,6 +123,8 @@ class PoETModel(EmbeddingModel):
         ------
         ValueError
             Always raised, as attention is not supported for PoET.
+        :meta private:
         """
         raise ValueError("Attn not yet supported for poet")

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/embeddings/poet2.py RENAMED Viewed

@@ -38,9 +38,11 @@ class PoET2Model(PoETModel, EmbeddingModel):
     Examples
     --------
-    >>> import openprotein
-    >>> session = openprotein.connect(username="user", password="password")
-    >>> session.embedding.poet2.<embeddings_method>
+    .. code-block:: python
+        >>> import openprotein
+        >>> session = openprotein.connect(username="user", password="password")
+        >>> session.embedding.poet2?
     """
     model_id = "poet-2"

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/fold/boltz.py RENAMED Viewed

@@ -102,11 +102,10 @@ class BoltzModel(FoldModel):
         step_scale: float = 1.638,
         use_potentials: bool = False,
         constraints: list[dict] | None = None,
-        force_single_sequence_mode: bool = False,
         **kwargs,
     ) -> FoldComplexResultFuture:
         """
-        Post sequences to boltz model.
+        Request structure prediction with boltz model.
         Parameters
         ----------
@@ -287,7 +286,7 @@ class Boltz2Model(BoltzModel, FoldModel):
         method: str | None = None,
     ) -> FoldComplexResultFuture:
         """
-        Post sequences to Boltz-2 model.
+        Request structure prediction with Boltz-2 model.
         Parameters
         ----------
@@ -392,7 +391,7 @@ class Boltz1xModel(BoltzModel, FoldModel):
         constraints: list[dict] | None = None,
     ) -> FoldComplexResultFuture:
         """
-        Post sequences to Boltz-1x model. Uses potentials with Boltz-1 model.
+        Request structure prediction with Boltz-1x model. Uses potentials with Boltz-1 model.
         Parameters
         ----------
@@ -456,7 +455,7 @@ class Boltz1Model(BoltzModel, FoldModel):
         constraints: list[dict] | None = None,
     ) -> FoldComplexResultFuture:
         """
-        Post sequences to Boltz-1 model.
+        Request structure prediction with Boltz-1 model.
         Parameters
         ----------

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/fold/fold.py RENAMED Viewed

@@ -17,15 +17,20 @@ class FoldAPI:
     Fold API provides a high level interface for making protein structure predictions.
     """
-    esmfold: ESMFoldModel
-    alphafold2: AlphaFold2Model
-    af2: AlphaFold2Model
-    boltz_1: Boltz1Model
-    boltz1: Boltz1Model
-    boltz_1x: Boltz1xModel
-    boltz1x: Boltz1xModel
-    boltz_2: Boltz2Model
+    #: Boltz-2 model
     boltz2: Boltz2Model
+    boltz_2: Boltz2Model
+    #: Boltz-1x model
+    boltz1x: Boltz1xModel
+    boltz_1x: Boltz1xModel
+    #: Boltz-1 model
+    boltz1: Boltz1Model
+    boltz_1: Boltz1Model
+    af2: AlphaFold2Model
+    #: AlphaFold-2 model
+    alphafold2: AlphaFold2Model
+    #: ESMFold model
+    esmfold: ESMFoldModel
     def __init__(self, session: APISession):
         self.session = session

{openprotein_python-0.8.1 → openprotein_python-0.8.2}/openprotein/fold/future.py RENAMED Viewed

@@ -129,13 +129,13 @@ class FoldResultFuture(MappedFuture, Future):
         """
         return self.job.job_id
-    def keys(self):
+    def __keys__(self):
         """
         Get the list of sequences submitted for the fold request.
         Returns
         -------
-        list[bytes]
+        list of bytes
             List of sequences.
         """
         return self.sequences

openprotein-python 0.8.1__tar.gz → 0.8.2__tar.gz

openprotein-python 0.8.1tar.gz → 0.8.2tar.gz