PyPI - openprotein-python - Versions diffs - 0.8.2__1-py3-none-any.whl - Mend

openprotein-python 0.8.2__1-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

openprotein/__init__.py +164 -0
openprotein/_version.py +48 -0
openprotein/align/__init__.py +8 -0
openprotein/align/align.py +395 -0
openprotein/align/api.py +428 -0
openprotein/align/future.py +55 -0
openprotein/align/msa.py +129 -0
openprotein/align/schemas.py +165 -0
openprotein/base.py +181 -0
openprotein/chains.py +88 -0
openprotein/common/__init__.py +5 -0
openprotein/common/features.py +7 -0
openprotein/common/model_metadata.py +33 -0
openprotein/common/reduction.py +8 -0
openprotein/config.py +9 -0
openprotein/csv.py +31 -0
openprotein/data/__init__.py +9 -0
openprotein/data/api.py +218 -0
openprotein/data/assaydataset.py +178 -0
openprotein/data/data.py +93 -0
openprotein/data/schemas.py +27 -0
openprotein/design/__init__.py +16 -0
openprotein/design/api.py +259 -0
openprotein/design/design.py +125 -0
openprotein/design/future.py +146 -0
openprotein/design/schemas.py +607 -0
openprotein/embeddings/__init__.py +27 -0
openprotein/embeddings/api.py +619 -0
openprotein/embeddings/embeddings.py +151 -0
openprotein/embeddings/esm.py +33 -0
openprotein/embeddings/future.py +146 -0
openprotein/embeddings/models.py +421 -0
openprotein/embeddings/openprotein.py +21 -0
openprotein/embeddings/poet.py +446 -0
openprotein/embeddings/poet2.py +505 -0
openprotein/embeddings/schemas.py +78 -0
openprotein/errors.py +76 -0
openprotein/fasta.py +92 -0
openprotein/fold/__init__.py +21 -0
openprotein/fold/alphafold2.py +131 -0
openprotein/fold/api.py +287 -0
openprotein/fold/boltz.py +691 -0
openprotein/fold/esmfold.py +54 -0
openprotein/fold/fold.py +107 -0
openprotein/fold/future.py +509 -0
openprotein/fold/models.py +139 -0
openprotein/fold/schemas.py +39 -0
openprotein/jobs/__init__.py +9 -0
openprotein/jobs/api.py +71 -0
openprotein/jobs/futures.py +746 -0
openprotein/jobs/jobs.py +69 -0
openprotein/jobs/schemas.py +135 -0
openprotein/models/__init__.py +4 -0
openprotein/models/base.py +63 -0
openprotein/models/foundation/rfdiffusion.py +283 -0
openprotein/models/models.py +33 -0
openprotein/predictor/__init__.py +25 -0
openprotein/predictor/api.py +384 -0
openprotein/predictor/models.py +374 -0
openprotein/predictor/prediction.py +79 -0
openprotein/predictor/predictor.py +242 -0
openprotein/predictor/schemas.py +113 -0
openprotein/predictor/validate.py +40 -0
openprotein/prompt/__init__.py +9 -0
openprotein/prompt/api.py +505 -0
openprotein/prompt/models.py +142 -0
openprotein/prompt/prompt.py +130 -0
openprotein/prompt/schemas.py +49 -0
openprotein/protein.py +587 -0
openprotein/svd/__init__.py +9 -0
openprotein/svd/api.py +206 -0
openprotein/svd/models.py +288 -0
openprotein/svd/schemas.py +31 -0
openprotein/svd/svd.py +134 -0
openprotein/umap/__init__.py +9 -0
openprotein/umap/api.py +259 -0
openprotein/umap/models.py +211 -0
openprotein/umap/schemas.py +35 -0
openprotein/umap/umap.py +175 -0
openprotein/utils/uuid.py +29 -0
openprotein_python-0.8.2.dist-info/METADATA +176 -0
openprotein_python-0.8.2.dist-info/RECORD +84 -0
openprotein_python-0.8.2.dist-info/WHEEL +4 -0
openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0

openprotein/design/api.py ADDED Viewed

@@ -0,0 +1,259 @@
+"""Design REST API for making HTTP calls to our design backend."""
+from typing import Iterator
+import numpy as np
+from pydantic import TypeAdapter
+from openprotein import csv
+from openprotein.base import APISession
+from .schemas import (
+    Criteria,
+    Criterion,
+    Design,
+    DesignConstraint,
+    DesignJob,
+    DesignResult,
+    Job,
+    Subcriterion,
+)
+PATH_PREFIX = "v1/designer/design"
+def designs_list(session: APISession) -> list[Design]:
+    """
+    List designs.
+    Parameters
+    ----------
+    session : APISession
+        Session object for API communication.
+    Returns
+    -------
+    list[Design]
+        List of designs.
+    """
+    endpoint = PATH_PREFIX
+    response = session.get(endpoint)
+    return TypeAdapter(list[Design]).validate_python(response.json())
+def design_get(session: APISession, design_id: str) -> Design:
+    """
+    Get design.
+    Parameters
+    ----------
+    session : APISession
+        Session object for API communication.
+    design_id: str
+        ID of design to get.
+    Returns
+    -------
+    Design
+        Design metadata.
+    """
+    endpoint = PATH_PREFIX + f"/{design_id}"
+    response = session.get(endpoint)
+    return TypeAdapter(Design).validate_python(response.json())
+def designer_create_genetic_algorithm(
+    session: APISession,
+    assay_id: str,
+    criteria: Criteria | Subcriterion | Criterion,
+    num_steps: int = 25,
+    pop_size: int = 1024,  # TODO - rename to library_size
+    n_offsprings: int = 5120,
+    crossover_prob: float = 1.0,
+    crossover_prob_pointwise: float = 0.2,
+    mutation_average_mutations_per_seq: int = 1,
+    allowed_tokens: DesignConstraint | dict[int, list[str]] = {},
+) -> Job:
+    """
+    Create design using genetic algorithm.
+    Parameters
+    ----------
+    session : APISession
+        Session object for API communication.
+    assay_id : str
+        Assay ID to fit GP on.
+    criteria: list[list[DesignCriterion]]
+        List of list of design criteria, logically grouping by OR then AND.
+    num_steps: int, optional
+        The number of steps in the genetic algorithm. Default is 8.
+    pop_size: int, optional
+        The population size for the genetic algorithm. Default is 256.
+    n_offsprings: int, optional
+        The number of offspring for the genetic algorithm. Default is 5120.
+    crossover_prob: float, optional
+        The crossover probability for the genetic algorithm. Default is 1.
+    crossover_prob_pointwise: float, optional
+        The pointwise crossover probability for the genetic algorithm. Default is 0.2.
+    mutation_average_mutations_per_seq: int, optional
+        The average number of mutations per sequence. Default is 1.
+    allowed_tokens: DesignConstraint | dict[int, list[str]]
+        A dict of positions and allows tokens (e.g. *{1:['G','L']})* ) designating how mutations may occur. Defaults to empty dict.
+    Returns
+    -------
+    DesignJob
+    """
+    if isinstance(criteria, Subcriterion):
+        criteria = Criteria([Criterion([criteria])])
+    elif isinstance(criteria, Criterion):
+        criteria = Criteria([criteria])
+    if isinstance(allowed_tokens, DesignConstraint):
+        allowed_tokens = allowed_tokens.as_dict()
+    endpoint = PATH_PREFIX + "/genetic-algorithm"
+    body = {
+        "assay_id": assay_id,
+        "criteria": criteria.model_dump(),
+        "num_steps": num_steps,
+        "pop_size": pop_size,
+        "n_offsprings": n_offsprings,
+        "crossover_prob": crossover_prob,
+        "crossover_prob_pointwise": crossover_prob_pointwise,
+        "mutation_average_mutations_per_seq": mutation_average_mutations_per_seq,
+        "allowed_tokens": allowed_tokens,
+    }
+    response = session.post(endpoint, json=body)
+    return DesignJob.model_validate(response.json())
+def design_delete(session: APISession, design_id: str):
+    raise NotImplementedError()
+def designer_get_design_results(
+    session: APISession,
+    design_id: str,
+    step: int | None = None,
+) -> Iterator[list[str]]:
+    """
+    Get csv encoded results for a design ID.
+    Parameters
+    ----------
+    session : APISession
+        Session object for API communication.
+    design_id : str
+        Design ID to retrieve results from.
+    step: int | None, optional
+        Step of the design whose results to fetch. Defaults to -1, which refers to the last step.
+    Returns
+    -------
+    bytes
+    """
+    params = {}
+    if step is not None:
+        if step != -1:
+            step -= 1
+        params["step"] = step
+    endpoint = PATH_PREFIX + f"/{design_id}/results"
+    response = session.get(endpoint, params=params, stream=True)
+    return csv.parse_stream(response.iter_lines())
+def decode_design_result(
+    row: list[str],
+    score_start_index: int,
+    subscore_start_index: int,
+    pred_start_index: int,
+) -> DesignResult:
+    """
+    Decode prediction scores.
+    Args:
+        data (bytes): raw bytes encoding the array received over the API
+        batched (bool): whether or not the result was batched. affects the retrieved csv format whether they contain additional columns and header rows.
+    Returns:
+        mus (np.ndarray): decoded array of means
+        vars (np.ndarray): decoded array of variances
+    """
+    scores = np.array(
+        [float(score) for score in row[score_start_index:subscore_start_index]]
+    )
+    subscores = np.array(
+        [float(subscore) for subscore in row[subscore_start_index:pred_start_index]]
+    )
+    preds = np.array([float(pred) for pred in row[pred_start_index:]])
+    result = DesignResult(
+        step=int(row[0]) + 1,
+        sample_index=int(row[1]) + 1,
+        sequence=row[2],
+        scores=scores,
+        subscores=subscores,
+        means=preds[::2],
+        vars=preds[1::2],
+    )
+    return result
+def decode_design_results_stream(
+    data: Iterator[list[str]], header: list[str] | None = None
+) -> Iterator[DesignResult]:
+    """
+    Decode design results.
+    Args:
+        data: Iterator[list[str]]
+            Data in the form of an iterator of list of string-encoded values
+        header: list[str] | None, optional
+            Headers describing the data. Should be same length as each row returned from the data iterator.
+            Defaults to None, which means the first row in the iterator should be header.
+    Returns:
+        step: int
+            Step index of the design.
+        sample_index: int
+            Index of the sample in the overall design.
+        sequence: str
+            Output designed sequence.
+        scores: np.ndarray[float]
+            M array of scores based on provided criteria (M groups of subcriteria).
+        subscores: np.ndarray[float]
+            N array of subscores based on provided criteria (flattened N subcriteria).
+        means: np.ndarray[float]
+            K array of means for each model subscriterion.
+        vars: np.ndarray[float]
+            K array of variances for each model subscriterion.
+        vars (np.ndarray): decoded array of variances
+    """
+    if header is None:
+        header = next(data)
+        if header[0].isnumeric():
+            raise ValueError(
+                "Expected first row in data to be header of 'step','sample_index',..."
+            )
+    score_start_index = subscore_start_index = pred_start_index = len(header)
+    # first start indices
+    for i, col_name in enumerate(header):
+        if col_name.startswith("score"):
+            score_start_index = i
+            break
+    for i, col_name in enumerate(header[score_start_index:]):
+        if col_name.endswith("score"):
+            subscore_start_index = score_start_index + i
+            break
+    for i, col_name in enumerate(header[subscore_start_index:]):
+        if col_name.endswith("y_mu"):
+            pred_start_index = subscore_start_index + i
+            break
+    for row in data:
+        yield decode_design_result(
+            row=row,
+            score_start_index=score_start_index,
+            subscore_start_index=subscore_start_index,
+            pred_start_index=pred_start_index,
+        )

openprotein/design/design.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""Design API providing the interface to design novel proteins based on a your design criteria."""
+from openprotein.base import APISession
+from openprotein.data import AssayDataset, DataAPI
+from openprotein.jobs import JobsAPI
+from . import api
+from .future import DesignFuture
+from .schemas import Criteria, Criterion, DesignConstraint, Subcriterion
+class DesignAPI:
+    """Design API providing the interface to design novel proteins based on your design criteria."""
+    def __init__(
+        self,
+        session: APISession,
+    ):
+        self.session = session
+    def list_designs(self) -> list[DesignFuture]:
+        """
+        List all designs.
+        Returns
+        -------
+        list of DesignFuture
+            A list of DesignFuture objects representing all designs.
+        """
+        return [
+            DesignFuture(
+                session=self.session,
+                metadata=m,
+            )
+            for m in api.designs_list(session=self.session)
+        ]
+    def get_design(self, design_id: str) -> DesignFuture:
+        """
+        Retrieve a specific design by its ID.
+        Parameters
+        ----------
+        design_id : str
+            ID of the design to retrieve.
+        Returns
+        -------
+        DesignFuture
+            A future object representing the design job and its results.
+        """
+        return DesignFuture(
+            session=self.session,
+            metadata=api.design_get(session=self.session, design_id=design_id),
+        )
+    def create_genetic_algorithm_design(
+        self,
+        assay: AssayDataset,
+        criteria: Criteria | Subcriterion | Criterion,
+        num_steps: int = 25,
+        pop_size: int = 1024,
+        n_offsprings: int = 5120,
+        crossover_prob: float = 1.0,
+        crossover_prob_pointwise: float = 0.2,
+        mutation_average_mutations_per_seq: int = 1,
+        allowed_tokens: DesignConstraint | dict[int, list[str]] = {},
+    ) -> DesignFuture:
+        """
+        Start a protein design job using a genetic algorithm based on assay data, a trained ML model, and specified criteria.
+        Parameters
+        ----------
+        assay : AssayDataset
+            The AssayDataset to design from.
+        criteria : Criteria or Subcriterion or Criterion
+            Criteria for evaluating the design.
+        num_steps : int, optional
+            The number of steps in the genetic algorithm. Default is 25.
+        pop_size : int, optional
+            The population size for the genetic algorithm. Default is 1024.
+        n_offsprings : int, optional
+            The number of offspring for the genetic algorithm. Default is 5120.
+        crossover_prob : float, optional
+            The crossover probability for the genetic algorithm. Default is 1.0.
+        crossover_prob_pointwise : float, optional
+            The pointwise crossover probability for the genetic algorithm. Default is 0.2.
+        mutation_average_mutations_per_seq : int, optional
+            The average number of mutations per sequence. Default is 1.
+        allowed_tokens : DesignConstraint or dict of int to list of str, optional
+            A dict of positions and allowed tokens (e.g. {1: ['G', 'L']}) designating how mutations may occur. Defaults to empty dict.
+        Returns
+        -------
+        DesignFuture
+            A future object representing the design job and its results.
+        """
+        return DesignFuture.create(
+            session=self.session,
+            job=api.designer_create_genetic_algorithm(
+                self.session,
+                assay_id=assay.id,
+                criteria=criteria,
+                num_steps=num_steps,
+                pop_size=pop_size,
+                n_offsprings=n_offsprings,
+                crossover_prob=crossover_prob,
+                crossover_prob_pointwise=crossover_prob_pointwise,
+                mutation_average_mutations_per_seq=mutation_average_mutations_per_seq,
+                allowed_tokens=allowed_tokens,
+            ),
+        )
+    def create_design_job(
+        self,
+        *args,
+    ):
+        raise AttributeError(
+            "create_design_job belongs to the deprecated design interface. Use create_genetic_algorithm_design instead in the new design interface."
+        )
+    def get_design_results(self, *args):
+        raise AttributeError(
+            "get_design_results belongs to the deprecated design interface. Use get_design and wait instead in the new design interface."
+        )

openprotein/design/future.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""Design results represented as futures."""
+from typing import Iterator
+from openprotein.base import APISession
+from openprotein.data import AssayDataset, DataAPI
+from openprotein.jobs import Future, JobsAPI, StreamingFuture
+from . import api
+from .schemas import Criteria, Design, DesignAlgorithm, DesignJob, DesignResult
+class DesignFuture(StreamingFuture, Future):
+    """A future object that will hold the results of the design job."""
+    job: DesignJob
+    def __init__(
+        self,
+        session: APISession,
+        job: DesignJob | None = None,
+        metadata: Design | None = None,
+    ):
+        """
+        Construct a future for a design job.
+        Takes in either a design job, or the design metadata.
+        :meta private:
+        """
+        self._design_assay = None
+        # initialize the metadata
+        if metadata is None:
+            if job is None:
+                raise ValueError("Expected design metadata or job")
+            metadata = api.design_get(session=session, design_id=job.job_id)
+        self._metadata = metadata
+        if job is None:
+            jobs_api = getattr(session, "jobs", None)
+            assert isinstance(jobs_api, JobsAPI)
+            job = DesignJob.create(jobs_api.get_job(job_id=metadata.id))
+        super().__init__(session, job)
+    @property
+    def id(self):
+        """ID of the design."""
+        return self._metadata.id
+    @property
+    def assay(self) -> AssayDataset:
+        """Assay used in the design."""
+        if self._design_assay is None:
+            self._design_assay = self.get_assay()
+        return self._design_assay
+    @property
+    def algorithm(self) -> DesignAlgorithm:
+        """Algorithm used in the design."""
+        return self._metadata.algorithm
+    @property
+    def criteria(self) -> Criteria:
+        """Criteria used in the design."""
+        return self._metadata.criteria
+    @property
+    def num_steps(self):
+        """Number of steps used in the design."""
+        return self._metadata.num_steps
+    @property
+    def num_rows(self):
+        """Number of rows in the total design output (across all steps)."""
+        return self._metadata.num_rows
+    @property
+    def allowed_tokens(self) -> dict[str, list[str]] | None:
+        """Allowed tokens used in the design."""
+        return self._metadata.allowed_tokens
+    @property
+    def pop_size(self) -> int:
+        """Population size used in the design."""
+        return self._metadata.pop_size
+    @property
+    def n_offsprings(self) -> int:
+        """Number of offsprings used in the design."""
+        return self._metadata.n_offsprings
+    @property
+    def crossover_prob(self) -> float:
+        """Crossover probability used in the design."""
+        return self._metadata.crossover_prob
+    @property
+    def crossover_prob_pointwise(self) -> float:
+        """Crossover probability pointwise used in the design."""
+        return self._metadata.crossover_prob_pointwise
+    @property
+    def mutation_average_mutations_per_seq(self) -> int:
+        """Average mutations per sequence used in the design."""
+        return self._metadata.mutation_average_mutations_per_seq
+    @property
+    def metadata(self):
+        """Design metadata."""
+        self._refresh_metadata()
+        return self._metadata
+    def _refresh_metadata(self):
+        if not self._metadata.is_done():
+            self._metadata = api.design_get(
+                session=self.session, design_id=self._metadata.id
+            )
+    def __delete(self) -> bool:
+        """
+        Delete this design.
+        TODO - implementation
+        """
+        return api.design_delete(session=self.session, design_id=self.id)
+    def stream(self, step: int | None = None) -> Iterator[DesignResult]:
+        stream = api.designer_get_design_results(
+            session=self.session, design_id=self.id, step=step
+        )
+        return api.decode_design_results_stream(data=stream)
+    def get(self, verbose: bool = False, **kwargs) -> list[DesignResult]:
+        return super().get(verbose, **kwargs)
+    def get_assay(self) -> AssayDataset:
+        """
+        Get assay used for design job.
+        Returns
+        -------
+        AssayDataset
+            Assay dataset used for design.
+        """
+        data_api = getattr(self.session, "data", None)
+        assert isinstance(data_api, DataAPI)
+        return data_api.get(self._metadata.assay_id)