openprotein-python 0.8.4__tar.gz → 0.8.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/PKG-INFO +9 -9
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/README.md +8 -8
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/common/__init__.py +2 -2
- openprotein_python-0.8.5/openprotein/common/features.py +15 -0
- openprotein_python-0.8.5/openprotein/common/reduction.py +14 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/data/api.py +13 -2
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/data/data.py +9 -2
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/models.py +37 -28
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fold/future.py +2 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/predictor/api.py +2 -2
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/predictor/predictor.py +40 -12
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/predictor/schemas.py +2 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/protein.py +53 -36
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/svd/svd.py +6 -4
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/umap/umap.py +43 -14
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/pyproject.toml +1 -18
- openprotein_python-0.8.4/openprotein/common/features.py +0 -7
- openprotein_python-0.8.4/openprotein/common/reduction.py +0 -8
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/.gitignore +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/LICENSE.txt +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/_version.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/align/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/align/align.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/align/api.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/align/future.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/align/msa.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/align/schemas.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/base.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/chains.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/common/model_metadata.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/config.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/csv.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/data/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/data/assaydataset.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/data/schemas.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/design/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/design/api.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/design/design.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/design/future.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/design/schemas.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/api.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/embeddings.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/esm.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/future.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/openprotein.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/poet.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/poet2.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/embeddings/schemas.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/errors.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fasta.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fold/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fold/alphafold2.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fold/api.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fold/boltz.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fold/esmfold.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fold/fold.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fold/models.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/fold/schemas.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/jobs/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/jobs/api.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/jobs/futures.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/jobs/jobs.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/jobs/schemas.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/models/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/models/base.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/models/foundation/rfdiffusion.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/models/models.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/predictor/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/predictor/models.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/predictor/prediction.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/predictor/validate.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/prompt/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/prompt/api.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/prompt/models.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/prompt/prompt.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/prompt/schemas.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/svd/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/svd/api.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/svd/models.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/svd/schemas.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/umap/__init__.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/umap/api.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/umap/models.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/umap/schemas.py +0 -0
- {openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/utils/uuid.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openprotein-python
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.5
|
|
4
4
|
Summary: OpenProtein Python interface.
|
|
5
5
|
Author-email: Mark Gee <markgee@ne47.bio>, "Timothy Truong Jr." <ttruong@ne47.bio>, Tristan Bepler <tbepler@ne47.bio>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -28,14 +28,14 @@ The OpenProtein.AI Python Interface provides a user-friendly library to interact
|
|
|
28
28
|
|
|
29
29
|
# Table of Contents
|
|
30
30
|
|
|
31
|
-
| | Workflow
|
|
32
|
-
|
|
33
|
-
| 0 | [`Quick start`](#Quick-start)
|
|
34
|
-
| 1 | [`Installation`](https://docs.openprotein.ai/api-python/installation.html)
|
|
35
|
-
| 2 | [`Session management`](https://docs.openprotein.ai/api-python/overview.html)
|
|
36
|
-
| 3 | [`Asssay-based Sequence Learning`](https://docs.openprotein.ai/api-python/core_workflow.html)
|
|
37
|
-
| 4 | [`De Novo prediction & generative models (PoET)`](https://docs.openprotein.ai/api-python/poet_workflow.html) | Covers PoET, a protein LLM for *de novo* scoring, as well as sequence generation.
|
|
38
|
-
| 5 | [`Protein Language Models & Embeddings`](https://docs.openprotein.ai/api-python/embedding_workflow.html)
|
|
31
|
+
| | Workflow | Description |
|
|
32
|
+
|---|--------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
|
|
33
|
+
| 0 | [`Quick start`](#Quick-start) | Quick start guide |
|
|
34
|
+
| 1 | [`Installation`](https://docs.openprotein.ai/api-python/installation.html) | Install guide for pip and conda. |
|
|
35
|
+
| 2 | [`Session management`](https://docs.openprotein.ai/api-python/overview.html) | An overview of the OpenProtein Python Client & the asynchronous jobs system. |
|
|
36
|
+
| 3 | [`Asssay-based Sequence Learning`](https://docs.openprotein.ai/api-python/core_workflow.html) | Covers core tasks such as data upload, model training & prediction, and sequence design. |
|
|
37
|
+
| 4 | [`De Novo prediction & generative models (PoET)`](https://docs.openprotein.ai/api-python/poet_workflow.html) | Covers PoET, a protein LLM for *de novo* scoring, as well as sequence generation. |
|
|
38
|
+
| 5 | [`Protein Language Models & Embeddings`](https://docs.openprotein.ai/api-python/embedding_workflow.html) | Covers methods for creating sequence embeddings with proprietary & open-source models. |
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
# Quick-start
|
|
@@ -10,14 +10,14 @@ The OpenProtein.AI Python Interface provides a user-friendly library to interact
|
|
|
10
10
|
|
|
11
11
|
# Table of Contents
|
|
12
12
|
|
|
13
|
-
| | Workflow
|
|
14
|
-
|
|
15
|
-
| 0 | [`Quick start`](#Quick-start)
|
|
16
|
-
| 1 | [`Installation`](https://docs.openprotein.ai/api-python/installation.html)
|
|
17
|
-
| 2 | [`Session management`](https://docs.openprotein.ai/api-python/overview.html)
|
|
18
|
-
| 3 | [`Asssay-based Sequence Learning`](https://docs.openprotein.ai/api-python/core_workflow.html)
|
|
19
|
-
| 4 | [`De Novo prediction & generative models (PoET)`](https://docs.openprotein.ai/api-python/poet_workflow.html) | Covers PoET, a protein LLM for *de novo* scoring, as well as sequence generation.
|
|
20
|
-
| 5 | [`Protein Language Models & Embeddings`](https://docs.openprotein.ai/api-python/embedding_workflow.html)
|
|
13
|
+
| | Workflow | Description |
|
|
14
|
+
|---|--------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
|
|
15
|
+
| 0 | [`Quick start`](#Quick-start) | Quick start guide |
|
|
16
|
+
| 1 | [`Installation`](https://docs.openprotein.ai/api-python/installation.html) | Install guide for pip and conda. |
|
|
17
|
+
| 2 | [`Session management`](https://docs.openprotein.ai/api-python/overview.html) | An overview of the OpenProtein Python Client & the asynchronous jobs system. |
|
|
18
|
+
| 3 | [`Asssay-based Sequence Learning`](https://docs.openprotein.ai/api-python/core_workflow.html) | Covers core tasks such as data upload, model training & prediction, and sequence design. |
|
|
19
|
+
| 4 | [`De Novo prediction & generative models (PoET)`](https://docs.openprotein.ai/api-python/poet_workflow.html) | Covers PoET, a protein LLM for *de novo* scoring, as well as sequence generation. |
|
|
20
|
+
| 5 | [`Protein Language Models & Embeddings`](https://docs.openprotein.ai/api-python/embedding_workflow.html) | Covers methods for creating sequence embeddings with proprietary & open-source models. |
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
# Quick-start
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
"""Common classes and utilities for OpenProtein."""
|
|
2
2
|
|
|
3
|
-
from .features import FeatureType
|
|
3
|
+
from .features import Feature, FeatureType
|
|
4
4
|
from .model_metadata import ModelDescription, ModelMetadata, TokenInfo
|
|
5
|
-
from .reduction import ReductionType
|
|
5
|
+
from .reduction import Reduction, ReductionType
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Feature types used in OpenProtein."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FeatureType(str, Enum):
|
|
8
|
+
|
|
9
|
+
PLM = "PLM"
|
|
10
|
+
SVD = "SVD"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# NOTE: only works with python 3.12+
|
|
14
|
+
# Feature = Literal[*tuple([r.value for r in FeatureType])]
|
|
15
|
+
Feature = Literal["PLM", "SVD"]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Reduction types used in OpenProtein."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ReductionType(str, Enum):
|
|
8
|
+
MEAN = "MEAN"
|
|
9
|
+
SUM = "SUM"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# NOTE: only works with python 3.12+
|
|
13
|
+
# Reduction = Literal[*tuple([r.value for r in ReductionType])]
|
|
14
|
+
Reduction = Literal["MEAN", "SUM"]
|
|
@@ -64,7 +64,9 @@ def assaydata_post(
|
|
|
64
64
|
raise APIError(f"Unable to post assay data: {response.text}")
|
|
65
65
|
|
|
66
66
|
|
|
67
|
-
def assaydata_list(
|
|
67
|
+
def assaydata_list(
|
|
68
|
+
session: APISession, limit: int | None = None, offset: int | None = None
|
|
69
|
+
) -> list[AssayMetadata]:
|
|
68
70
|
"""
|
|
69
71
|
Get a list of all assay metadata.
|
|
70
72
|
|
|
@@ -72,6 +74,10 @@ def assaydata_list(session: APISession) -> list[AssayMetadata]:
|
|
|
72
74
|
----------
|
|
73
75
|
session : APISession
|
|
74
76
|
Session object for API communication.
|
|
77
|
+
limit : int, optional
|
|
78
|
+
Limit the number of assays to return.
|
|
79
|
+
offset : int, optional
|
|
80
|
+
Offset of assays to retrieve. Useful with limit.
|
|
75
81
|
|
|
76
82
|
Returns
|
|
77
83
|
-------
|
|
@@ -84,7 +90,12 @@ def assaydata_list(session: APISession) -> list[AssayMetadata]:
|
|
|
84
90
|
If an error occurs during the API request.
|
|
85
91
|
"""
|
|
86
92
|
endpoint = "v1/assaydata"
|
|
87
|
-
|
|
93
|
+
params = {}
|
|
94
|
+
if limit is not None:
|
|
95
|
+
params["limit"] = limit
|
|
96
|
+
if offset is not None:
|
|
97
|
+
params["offset"] = offset
|
|
98
|
+
response = session.get(endpoint, params=params)
|
|
88
99
|
if response.status_code == 200:
|
|
89
100
|
return TypeAdapter(list[AssayMetadata]).validate_python(response.json())
|
|
90
101
|
else:
|
|
@@ -14,16 +14,23 @@ class DataAPI:
|
|
|
14
14
|
def __init__(self, session: APISession):
|
|
15
15
|
self.session = session
|
|
16
16
|
|
|
17
|
-
def list(
|
|
17
|
+
def list(
|
|
18
|
+
self, limit: int | None = None, offset: int | None = None
|
|
19
|
+
) -> list[AssayDataset]:
|
|
18
20
|
"""
|
|
19
21
|
List all assay datasets.
|
|
20
22
|
|
|
23
|
+
limit : int, optional
|
|
24
|
+
Limit the number of assays to return.
|
|
25
|
+
offset : int, optional
|
|
26
|
+
Offset of assays to retrieve. Useful with limit.
|
|
27
|
+
|
|
21
28
|
Returns
|
|
22
29
|
-------
|
|
23
30
|
List[AssayDataset]
|
|
24
31
|
List of all assay datasets.
|
|
25
32
|
"""
|
|
26
|
-
metadata = api.assaydata_list(self.session)
|
|
33
|
+
metadata = api.assaydata_list(session=self.session, limit=limit, offset=offset)
|
|
27
34
|
return [AssayDataset(self.session, x) for x in metadata]
|
|
28
35
|
|
|
29
36
|
def create(
|
|
@@ -3,7 +3,13 @@
|
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
5
5
|
from openprotein.base import APISession
|
|
6
|
-
from openprotein.common import
|
|
6
|
+
from openprotein.common import (
|
|
7
|
+
Feature,
|
|
8
|
+
FeatureType,
|
|
9
|
+
ModelMetadata,
|
|
10
|
+
Reduction,
|
|
11
|
+
ReductionType,
|
|
12
|
+
)
|
|
7
13
|
from openprotein.data import AssayDataset, AssayMetadata, DataAPI
|
|
8
14
|
from openprotein.errors import InvalidParameterError
|
|
9
15
|
|
|
@@ -199,9 +205,9 @@ class EmbeddingModel:
|
|
|
199
205
|
def fit_svd(
|
|
200
206
|
self,
|
|
201
207
|
sequences: list[bytes] | list[str] | None = None,
|
|
202
|
-
assay: AssayDataset | None = None,
|
|
208
|
+
assay: AssayDataset | AssayMetadata | None = None,
|
|
203
209
|
n_components: int = 1024,
|
|
204
|
-
reduction: ReductionType | None = None,
|
|
210
|
+
reduction: Reduction | ReductionType | None = None,
|
|
205
211
|
**kwargs,
|
|
206
212
|
) -> "SVDModel":
|
|
207
213
|
"""
|
|
@@ -236,6 +242,11 @@ class EmbeddingModel:
|
|
|
236
242
|
# local import for cyclic dep
|
|
237
243
|
from openprotein.svd import SVDAPI
|
|
238
244
|
|
|
245
|
+
# runtime check on value
|
|
246
|
+
if isinstance(reduction, str):
|
|
247
|
+
reduction = ReductionType(reduction)
|
|
248
|
+
reduction = reduction.value
|
|
249
|
+
|
|
239
250
|
svd_api = getattr(self.session, "svd", None)
|
|
240
251
|
assert isinstance(svd_api, SVDAPI)
|
|
241
252
|
|
|
@@ -246,9 +257,8 @@ class EmbeddingModel:
|
|
|
246
257
|
raise InvalidParameterError(
|
|
247
258
|
"Expected either assay or sequences to fit SVD on!"
|
|
248
259
|
)
|
|
249
|
-
model_id = self.id
|
|
250
260
|
return svd_api.fit_svd(
|
|
251
|
-
|
|
261
|
+
model=self,
|
|
252
262
|
sequences=sequences,
|
|
253
263
|
assay=assay,
|
|
254
264
|
n_components=n_components,
|
|
@@ -259,9 +269,9 @@ class EmbeddingModel:
|
|
|
259
269
|
def fit_umap(
|
|
260
270
|
self,
|
|
261
271
|
sequences: list[bytes] | list[str] | None = None,
|
|
262
|
-
assay: AssayDataset | None = None,
|
|
272
|
+
assay: AssayDataset | AssayMetadata | None = None,
|
|
263
273
|
n_components: int = 2,
|
|
264
|
-
reduction:
|
|
274
|
+
reduction: Reduction | ReductionType = "MEAN",
|
|
265
275
|
**kwargs,
|
|
266
276
|
) -> "UMAPModel":
|
|
267
277
|
"""
|
|
@@ -274,11 +284,11 @@ class EmbeddingModel:
|
|
|
274
284
|
----------
|
|
275
285
|
sequences : list of bytes or list of str or None, optional
|
|
276
286
|
Optional sequences to fit UMAP with. Either use sequences or assay. Sequences is preferred.
|
|
277
|
-
assay : AssayDataset or None, optional
|
|
287
|
+
assay : AssayDataset or AssayMetadata or None, optional
|
|
278
288
|
Optional assay containing sequences to fit UMAP with. Either use sequences or assay. Ignored if sequences are provided.
|
|
279
289
|
n_components : int, optional
|
|
280
290
|
Number of components in UMAP fit. Determines output shapes. Default is 2.
|
|
281
|
-
reduction : ReductionType or None, optional
|
|
291
|
+
reduction : Reduction or ReductionType or None, optional
|
|
282
292
|
Embeddings reduction to use (e.g. mean). Defaults to MEAN.
|
|
283
293
|
kwargs :
|
|
284
294
|
Additional keyword arguments to be used from foundational models, e.g. prompt_id for PoET models.
|
|
@@ -296,6 +306,16 @@ class EmbeddingModel:
|
|
|
296
306
|
# local import for cyclic dep
|
|
297
307
|
from openprotein.umap import UMAPAPI
|
|
298
308
|
|
|
309
|
+
if reduction is None:
|
|
310
|
+
raise InvalidParameterError(
|
|
311
|
+
"Expected reduction if using EmbeddingModel to fit UMAP"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# runtime check on value
|
|
315
|
+
if isinstance(reduction, str):
|
|
316
|
+
reduction = ReductionType(reduction)
|
|
317
|
+
reduction = reduction.value
|
|
318
|
+
|
|
299
319
|
umap_api = getattr(self.session, "umap", None)
|
|
300
320
|
assert isinstance(umap_api, UMAPAPI)
|
|
301
321
|
|
|
@@ -306,12 +326,18 @@ class EmbeddingModel:
|
|
|
306
326
|
raise InvalidParameterError(
|
|
307
327
|
"Expected either assay or sequences to fit UMAP on!"
|
|
308
328
|
)
|
|
329
|
+
# get assay_id
|
|
330
|
+
assay_id = (
|
|
331
|
+
assay.assay_id
|
|
332
|
+
if isinstance(assay, AssayMetadata)
|
|
333
|
+
else assay.id if isinstance(assay, AssayDataset) else assay
|
|
334
|
+
)
|
|
309
335
|
model_id = self.id
|
|
310
336
|
return umap_api.fit_umap(
|
|
311
337
|
model_id=model_id,
|
|
312
338
|
feature_type=FeatureType.PLM,
|
|
313
339
|
sequences=sequences,
|
|
314
|
-
assay_id=
|
|
340
|
+
assay_id=assay_id,
|
|
315
341
|
n_components=n_components,
|
|
316
342
|
reduction=reduction,
|
|
317
343
|
**kwargs,
|
|
@@ -319,7 +345,7 @@ class EmbeddingModel:
|
|
|
319
345
|
|
|
320
346
|
def fit_gp(
|
|
321
347
|
self,
|
|
322
|
-
assay:
|
|
348
|
+
assay: AssayDataset | AssayMetadata | str,
|
|
323
349
|
properties: list[str],
|
|
324
350
|
reduction: ReductionType,
|
|
325
351
|
name: str | None = None,
|
|
@@ -358,26 +384,9 @@ class EmbeddingModel:
|
|
|
358
384
|
# local import to resolve cyclic
|
|
359
385
|
from openprotein.predictor import PredictorAPI
|
|
360
386
|
|
|
361
|
-
data_api = getattr(self.session, "data", None)
|
|
362
|
-
assert isinstance(data_api, DataAPI)
|
|
363
387
|
predictor_api = getattr(self.session, "predictor", None)
|
|
364
388
|
assert isinstance(predictor_api, PredictorAPI)
|
|
365
389
|
|
|
366
|
-
# get assay if str
|
|
367
|
-
assay = data_api.get(assay_id=assay) if isinstance(assay, str) else assay
|
|
368
|
-
# extract assay_id
|
|
369
|
-
if len(properties) == 0:
|
|
370
|
-
raise InvalidParameterError("Expected (at-least) 1 property to train")
|
|
371
|
-
if not set(properties) <= set(assay.measurement_names):
|
|
372
|
-
raise InvalidParameterError(
|
|
373
|
-
f"Expected all provided properties to be a subset of assay's measurements: {assay.measurement_names}"
|
|
374
|
-
)
|
|
375
|
-
# TODO - support multitask
|
|
376
|
-
if len(properties) > 1:
|
|
377
|
-
raise InvalidParameterError(
|
|
378
|
-
"Training a multitask GP is not yet supported (i.e. number of properties should only be 1 for now)"
|
|
379
|
-
)
|
|
380
|
-
|
|
381
390
|
# inject into predictor api
|
|
382
391
|
return predictor_api.fit_gp(
|
|
383
392
|
assay=assay,
|
|
@@ -464,6 +464,8 @@ class FoldComplexResultFuture(Future):
|
|
|
464
464
|
AttributeError
|
|
465
465
|
If affinity is not supported for the model.
|
|
466
466
|
"""
|
|
467
|
+
from .boltz import BoltzAffinity
|
|
468
|
+
|
|
467
469
|
if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}:
|
|
468
470
|
raise AttributeError("affinity not supported for non-Boltz model")
|
|
469
471
|
if self._affinity is None:
|
|
@@ -162,8 +162,8 @@ def predictor_fit_gp_post(
|
|
|
162
162
|
body["name"] = name
|
|
163
163
|
if description is not None:
|
|
164
164
|
body["description"] = description
|
|
165
|
-
# add kwargs for embeddings kwargs
|
|
166
|
-
body.update(kwargs)
|
|
165
|
+
# add kwargs for embeddings kwargs to features
|
|
166
|
+
body["features"].update(kwargs)
|
|
167
167
|
|
|
168
168
|
response = session.post(endpoint, json=body)
|
|
169
169
|
return PredictorTrainJob.model_validate(response.json())
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
"""Predictor API providing the interface to train and predict predictors."""
|
|
2
2
|
|
|
3
3
|
from openprotein.base import APISession
|
|
4
|
-
from openprotein.common import FeatureType, ReductionType
|
|
4
|
+
from openprotein.common import Feature, FeatureType, Reduction, ReductionType
|
|
5
5
|
from openprotein.data import (
|
|
6
6
|
AssayDataset,
|
|
7
7
|
AssayMetadata,
|
|
8
|
+
DataAPI,
|
|
8
9
|
)
|
|
9
10
|
from openprotein.embeddings import EmbeddingModel, EmbeddingsAPI
|
|
10
11
|
from openprotein.errors import InvalidParameterError
|
|
@@ -120,8 +121,8 @@ class PredictorAPI:
|
|
|
120
121
|
assay: AssayDataset | AssayMetadata | str,
|
|
121
122
|
properties: list[str],
|
|
122
123
|
model: EmbeddingModel | SVDModel | str,
|
|
123
|
-
feature_type: FeatureType | None = None,
|
|
124
|
-
reduction: ReductionType | None = None,
|
|
124
|
+
feature_type: Feature | FeatureType | None = None,
|
|
125
|
+
reduction: Reduction | ReductionType | None = None,
|
|
125
126
|
name: str | None = None,
|
|
126
127
|
description: str | None = None,
|
|
127
128
|
**kwargs,
|
|
@@ -139,10 +140,10 @@ class PredictorAPI:
|
|
|
139
140
|
Instance of either EmbeddingModel or SVDModel to use depending
|
|
140
141
|
on feature type. Can also be a str specifying the model id,
|
|
141
142
|
but then feature_type would have to be specified.
|
|
142
|
-
feature_type : FeatureType or None
|
|
143
|
+
feature_type : Feature or FeatureType or None
|
|
143
144
|
Type of features to use for encoding sequences. "SVD" or "PLM".
|
|
144
145
|
None would require model to be EmbeddingModel or SVDModel.
|
|
145
|
-
reduction :
|
|
146
|
+
reduction : Reduction or ReductionType or None, optional
|
|
146
147
|
Type of embedding reduction to use for computing features.
|
|
147
148
|
E.g. "MEAN" or "SUM". Used only if using EmbeddingModel, and
|
|
148
149
|
must be non-nil if using an EmbeddingModel. Defaults to None.
|
|
@@ -154,6 +155,29 @@ class PredictorAPI:
|
|
|
154
155
|
PredictorModel
|
|
155
156
|
The GP model being fit.
|
|
156
157
|
"""
|
|
158
|
+
data_api = getattr(self.session, "data", None)
|
|
159
|
+
assert isinstance(data_api, DataAPI)
|
|
160
|
+
# 1. Check assay data input
|
|
161
|
+
# get assay if str
|
|
162
|
+
assay = data_api.get(assay_id=assay) if isinstance(assay, str) else assay
|
|
163
|
+
# extract assay_id
|
|
164
|
+
assay_id = (
|
|
165
|
+
assay.assay_id
|
|
166
|
+
if isinstance(assay, AssayMetadata)
|
|
167
|
+
else assay.id if isinstance(assay, AssayDataset) else assay
|
|
168
|
+
)
|
|
169
|
+
if len(properties) == 0:
|
|
170
|
+
raise InvalidParameterError("Expected (at-least) 1 property to train")
|
|
171
|
+
if not set(properties) <= set(assay.measurement_names):
|
|
172
|
+
raise InvalidParameterError(
|
|
173
|
+
f"Expected all provided properties to be a subset of assay's measurements: {assay.measurement_names}"
|
|
174
|
+
)
|
|
175
|
+
# TODO - support multitask
|
|
176
|
+
if len(properties) > 1:
|
|
177
|
+
raise InvalidParameterError(
|
|
178
|
+
"Training a multitask GP is not yet supported (i.e. number of properties should only be 1 for now)"
|
|
179
|
+
)
|
|
180
|
+
# 2. Check features input
|
|
157
181
|
# extract feature type
|
|
158
182
|
feature_type = (
|
|
159
183
|
FeatureType.PLM
|
|
@@ -164,6 +188,15 @@ class PredictorAPI:
|
|
|
164
188
|
raise InvalidParameterError(
|
|
165
189
|
"Expected feature_type to be provided if passing str model_id as model"
|
|
166
190
|
)
|
|
191
|
+
# runtime check on value
|
|
192
|
+
if isinstance(feature_type, str):
|
|
193
|
+
feature_type = FeatureType(feature_type)
|
|
194
|
+
|
|
195
|
+
# 3. Check reduction
|
|
196
|
+
if isinstance(reduction, str):
|
|
197
|
+
reduction = ReductionType(reduction)
|
|
198
|
+
reduction = reduction.value
|
|
199
|
+
|
|
167
200
|
# get model if model_id
|
|
168
201
|
if feature_type == FeatureType.PLM:
|
|
169
202
|
if reduction is None:
|
|
@@ -183,19 +216,14 @@ class PredictorAPI:
|
|
|
183
216
|
model = svd_api.get_svd(model)
|
|
184
217
|
assert isinstance(model, SVDModel), "Expected SVDModel"
|
|
185
218
|
model_id = model.id
|
|
186
|
-
|
|
187
|
-
assay_id = (
|
|
188
|
-
assay.assay_id
|
|
189
|
-
if isinstance(assay, AssayMetadata)
|
|
190
|
-
else assay.id if isinstance(assay, AssayDataset) else assay
|
|
191
|
-
)
|
|
219
|
+
|
|
192
220
|
return PredictorModel(
|
|
193
221
|
session=self.session,
|
|
194
222
|
job=api.predictor_fit_gp_post(
|
|
195
223
|
session=self.session,
|
|
196
224
|
assay_id=assay_id,
|
|
197
225
|
properties=properties,
|
|
198
|
-
feature_type=feature_type,
|
|
226
|
+
feature_type=feature_type.value,
|
|
199
227
|
model_id=model_id,
|
|
200
228
|
reduction=reduction,
|
|
201
229
|
name=name,
|
|
@@ -29,38 +29,6 @@ _BACKBONE_ATOM_TYPES = ("N", "CA", "C")
|
|
|
29
29
|
_NAN_BFACTOR_VALUE = 9999.75 # can't/hard to use 9999.99 due to precision issues
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def calc_rmsd(
|
|
33
|
-
xyz1: npt.NDArray[np.floating], xyz2: npt.NDArray[np.floating], eps: float = 1e-6
|
|
34
|
-
) -> tuple[float, npt.NDArray[np.floating]]:
|
|
35
|
-
"""
|
|
36
|
-
Calculates RMSD between two sets of atoms (L, 3)
|
|
37
|
-
Adapted from https://github.com/RosettaCommons/RFdiffusion/blob/b44206a2a79f219bb1a649ea50603a284c225050/rfdiffusion/util.py#L719
|
|
38
|
-
"""
|
|
39
|
-
# center to CA centroid
|
|
40
|
-
xyz1 = xyz1 - xyz1.mean(0)
|
|
41
|
-
xyz2 = xyz2 - xyz2.mean(0)
|
|
42
|
-
|
|
43
|
-
# Computation of the covariance matrix
|
|
44
|
-
C = xyz2.T @ xyz1
|
|
45
|
-
|
|
46
|
-
# Compute otimal rotation matrix using SVD
|
|
47
|
-
V, S, W = np.linalg.svd(C)
|
|
48
|
-
|
|
49
|
-
# get sign to ensure right-handedness
|
|
50
|
-
d = np.ones([3, 3])
|
|
51
|
-
d[:, -1] = np.sign(np.linalg.det(V) * np.linalg.det(W))
|
|
52
|
-
|
|
53
|
-
# Rotation matrix U
|
|
54
|
-
U = (d * V) @ W
|
|
55
|
-
|
|
56
|
-
# Rotate xyz2
|
|
57
|
-
xyz2_ = xyz2 @ U
|
|
58
|
-
L = xyz2_.shape[0]
|
|
59
|
-
rmsd = np.sqrt(np.sum((xyz2_ - xyz1) * (xyz2_ - xyz1), axis=(0, 1)) / L + eps)
|
|
60
|
-
|
|
61
|
-
return rmsd, U
|
|
62
|
-
|
|
63
|
-
|
|
64
32
|
class Protein:
|
|
65
33
|
"""
|
|
66
34
|
Represents a protein with optional sequence, atomic coordinates, per-residue
|
|
@@ -416,10 +384,12 @@ class Protein:
|
|
|
416
384
|
else:
|
|
417
385
|
atom.b_iso = _NAN_BFACTOR_VALUE
|
|
418
386
|
atom = residue.add_atom(atom)
|
|
419
|
-
block = structure.make_mmcif_block()
|
|
420
387
|
# NB: gemmi doesn't seem to write the _chem_comp category properly... it says
|
|
421
388
|
# the type is `.`, but is should be something like `L-PEPTIDE LINKING`...
|
|
422
|
-
|
|
389
|
+
# see also: https://github.com/project-gemmi/gemmi/discussions/362
|
|
390
|
+
block = structure.make_mmcif_block(
|
|
391
|
+
groups=gemmi.MmcifOutputGroups(True, chem_comp=False)
|
|
392
|
+
)
|
|
423
393
|
return block.as_string()
|
|
424
394
|
|
|
425
395
|
def make_fasta_bytes(self) -> bytes:
|
|
@@ -479,7 +449,6 @@ class Protein:
|
|
|
479
449
|
model_idx: int = 0,
|
|
480
450
|
verbose: bool = True,
|
|
481
451
|
) -> "Protein":
|
|
482
|
-
filestring = filestring if isinstance(filestring, str) else filestring.decode()
|
|
483
452
|
if format == "pdb":
|
|
484
453
|
structure = gemmi.read_pdb_string(filestring)
|
|
485
454
|
elif format == "cif":
|
|
@@ -507,7 +476,7 @@ class Protein:
|
|
|
507
476
|
structure.setup_entities()
|
|
508
477
|
structure.assign_label_seq_id()
|
|
509
478
|
if use_bfactor_as_plddt is None:
|
|
510
|
-
use_bfactor_as_plddt = structure
|
|
479
|
+
use_bfactor_as_plddt = _use_bfactor_as_plddt(structure=structure)
|
|
511
480
|
model = structure[model_idx]
|
|
512
481
|
chain = model.find_chain(chain_id)
|
|
513
482
|
assert chain is not None
|
|
@@ -585,3 +554,51 @@ def parse_fasta_as_proteins(path: str | Path) -> list[Protein]:
|
|
|
585
554
|
for name, sequence in fasta.parse_stream(fp):
|
|
586
555
|
proteins.append(Protein(name=name, sequence=sequence))
|
|
587
556
|
return proteins
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def _use_bfactor_as_plddt(structure: gemmi.Structure) -> bool:
|
|
560
|
+
"""
|
|
561
|
+
This heuristic decides whether to use B-factor as pLDDT.
|
|
562
|
+
It uses B-factor as pLDDT when all of the following fields are *not* set:
|
|
563
|
+
- structure resolution
|
|
564
|
+
- _pdbx_database_status.recvd_initial_deposition_date
|
|
565
|
+
This heuristic may be changed in the future.
|
|
566
|
+
"""
|
|
567
|
+
return (structure.resolution == 0.0) and (
|
|
568
|
+
structure.make_mmcif_block(
|
|
569
|
+
groups=gemmi.MmcifOutputGroups(False, database_status=True)
|
|
570
|
+
).find_value("_pdbx_database_status.recvd_initial_deposition_date")
|
|
571
|
+
is None
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def calc_rmsd(
|
|
576
|
+
xyz1: npt.NDArray[np.floating], xyz2: npt.NDArray[np.floating], eps: float = 1e-6
|
|
577
|
+
) -> tuple[float, npt.NDArray[np.floating]]:
|
|
578
|
+
"""
|
|
579
|
+
Calculates RMSD between two sets of atoms (L, 3)
|
|
580
|
+
Adapted from https://github.com/RosettaCommons/RFdiffusion/blob/b44206a2a79f219bb1a649ea50603a284c225050/rfdiffusion/util.py#L719
|
|
581
|
+
"""
|
|
582
|
+
# center to CA centroid
|
|
583
|
+
xyz1 = xyz1 - xyz1.mean(0)
|
|
584
|
+
xyz2 = xyz2 - xyz2.mean(0)
|
|
585
|
+
|
|
586
|
+
# Computation of the covariance matrix
|
|
587
|
+
C = xyz2.T @ xyz1
|
|
588
|
+
|
|
589
|
+
# Compute otimal rotation matrix using SVD
|
|
590
|
+
V, S, W = np.linalg.svd(C)
|
|
591
|
+
|
|
592
|
+
# get sign to ensure right-handedness
|
|
593
|
+
d = np.ones([3, 3])
|
|
594
|
+
d[:, -1] = np.sign(np.linalg.det(V) * np.linalg.det(W))
|
|
595
|
+
|
|
596
|
+
# Rotation matrix U
|
|
597
|
+
U = (d * V) @ W
|
|
598
|
+
|
|
599
|
+
# Rotate xyz2
|
|
600
|
+
xyz2_ = xyz2 @ U
|
|
601
|
+
L = xyz2_.shape[0]
|
|
602
|
+
rmsd = np.sqrt(np.sum((xyz2_ - xyz1) * (xyz2_ - xyz1), axis=(0, 1)) / L + eps)
|
|
603
|
+
|
|
604
|
+
return rmsd, U
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""SVD API providing the interface for creating and using SVD models."""
|
|
2
2
|
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
3
5
|
from openprotein.base import APISession
|
|
4
6
|
from openprotein.common import ReductionType
|
|
5
7
|
from openprotein.data import AssayDataset, AssayMetadata
|
|
@@ -20,11 +22,11 @@ class SVDAPI:
|
|
|
20
22
|
|
|
21
23
|
def fit_svd(
|
|
22
24
|
self,
|
|
23
|
-
model_id: str,
|
|
25
|
+
model_id: str | EmbeddingModel,
|
|
24
26
|
sequences: list[bytes] | list[str] | None = None,
|
|
25
27
|
assay: AssayMetadata | AssayDataset | str | None = None,
|
|
26
28
|
n_components: int = 1024,
|
|
27
|
-
reduction:
|
|
29
|
+
reduction: Literal["MEAN", "SUM"] | None = None,
|
|
28
30
|
**kwargs,
|
|
29
31
|
) -> SVDModel:
|
|
30
32
|
"""
|
|
@@ -32,7 +34,7 @@ class SVDAPI:
|
|
|
32
34
|
|
|
33
35
|
Parameters
|
|
34
36
|
----------
|
|
35
|
-
model_id : str
|
|
37
|
+
model_id : str or EmbeddingModel
|
|
36
38
|
ID of embeddings model to use.
|
|
37
39
|
sequences : list of bytes or None, optional
|
|
38
40
|
Optional sequences to fit SVD with. Either use sequences or
|
|
@@ -43,7 +45,7 @@ class SVDAPI:
|
|
|
43
45
|
Ignored if sequences are provided.
|
|
44
46
|
n_components : int, optional
|
|
45
47
|
The number of components for the SVD. Defaults to 1024.
|
|
46
|
-
reduction : str or None, optional
|
|
48
|
+
reduction : str or ReductionType or None, optional
|
|
47
49
|
Type of embedding reduction to use for computing features.
|
|
48
50
|
E.g. "MEAN" or "SUM". Useful when dealing with variable length
|
|
49
51
|
sequence. Defaults to None.
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
"""UMAP API providing the interface to fit and run UMAP visualizations."""
|
|
2
2
|
|
|
3
|
+
import typing
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
3
6
|
from openprotein.base import APISession
|
|
4
|
-
from openprotein.common import FeatureType, ReductionType
|
|
7
|
+
from openprotein.common import Feature, FeatureType, Reduction, ReductionType
|
|
5
8
|
from openprotein.data import AssayDataset, AssayMetadata
|
|
6
9
|
from openprotein.embeddings import EmbeddingModel, EmbeddingsAPI
|
|
7
10
|
from openprotein.errors import InvalidParameterError
|
|
@@ -21,16 +24,35 @@ class UMAPAPI:
|
|
|
21
24
|
):
|
|
22
25
|
self.session = session
|
|
23
26
|
|
|
27
|
+
@typing.overload
|
|
28
|
+
def fit_umap(
|
|
29
|
+
self,
|
|
30
|
+
model: EmbeddingModel,
|
|
31
|
+
reduction: Reduction | ReductionType,
|
|
32
|
+
feature_type: Literal["PLM"] = "PLM",
|
|
33
|
+
sequences: list[bytes] | list[str] | None = None,
|
|
34
|
+
assay: AssayDataset | AssayMetadata | str | None = None,
|
|
35
|
+
n_components: int = 2,
|
|
36
|
+
n_neighbors: int = 15,
|
|
37
|
+
min_dist: float = 0.1,
|
|
38
|
+
) -> UMAPModel: ...
|
|
39
|
+
|
|
40
|
+
@typing.overload
|
|
41
|
+
def fit_umap(
|
|
42
|
+
self,
|
|
43
|
+
model: EmbeddingModel,
|
|
44
|
+
) -> UMAPModel: ...
|
|
45
|
+
|
|
24
46
|
def fit_umap(
|
|
25
47
|
self,
|
|
26
48
|
model: EmbeddingModel | SVDModel | str,
|
|
27
|
-
|
|
49
|
+
reduction: Reduction | ReductionType | None = None,
|
|
50
|
+
feature_type: Feature | FeatureType | None = None,
|
|
28
51
|
sequences: list[bytes] | list[str] | None = None,
|
|
29
|
-
assay:
|
|
52
|
+
assay: AssayDataset | AssayMetadata | str | None = None,
|
|
30
53
|
n_components: int = 2,
|
|
31
54
|
n_neighbors: int = 15,
|
|
32
55
|
min_dist: float = 0.1,
|
|
33
|
-
reduction: ReductionType | None = None,
|
|
34
56
|
**kwargs,
|
|
35
57
|
) -> UMAPModel:
|
|
36
58
|
"""
|
|
@@ -42,14 +64,14 @@ class UMAPAPI:
|
|
|
42
64
|
Optional sequences to fit UMAP with. Either use sequences or
|
|
43
65
|
assay_id. sequences is preferred.
|
|
44
66
|
assay : AssayMetadata or AssayDataset or str or None, optional
|
|
45
|
-
Optional assay containing sequences to fit
|
|
67
|
+
Optional assay containing sequences to fit UMAP with.
|
|
46
68
|
Or its assay_id. Either use sequences or assay.
|
|
47
69
|
Ignored if sequences are provided.
|
|
48
70
|
model : EmbeddingModel or SVDModel or str
|
|
49
71
|
Instance of either EmbeddingModel or SVDModel to use depending
|
|
50
72
|
on feature type. Can also be a str specifying the model id,
|
|
51
73
|
but then feature_type would have to be specified.
|
|
52
|
-
feature_type : FeatureType or None, optional
|
|
74
|
+
feature_type : str or FeatureType or None, optional
|
|
53
75
|
Type of features to use for encoding sequences. "SVD" or "PLM".
|
|
54
76
|
None would require model to be EmbeddingModel or SVDModel.
|
|
55
77
|
n_components : int, optional
|
|
@@ -58,7 +80,7 @@ class UMAPAPI:
|
|
|
58
80
|
Number of neighbors to use for fitting. Defaults to 15.
|
|
59
81
|
min_dist : float, optional
|
|
60
82
|
Minimum distance in UMAP fitting. Defaults to 0.1.
|
|
61
|
-
reduction : str or None, optional
|
|
83
|
+
reduction : str or ReductionType or None, optional
|
|
62
84
|
Type of embedding reduction to use for computing features.
|
|
63
85
|
E.g. "MEAN" or "SUM". Useful when dealing with variable length
|
|
64
86
|
sequence. Defaults to None.
|
|
@@ -70,6 +92,13 @@ class UMAPAPI:
|
|
|
70
92
|
UMAPModel
|
|
71
93
|
The UMAP model being fit.
|
|
72
94
|
"""
|
|
95
|
+
# 1. Check assay data input - just need the id
|
|
96
|
+
# get assay_id
|
|
97
|
+
assay_id = (
|
|
98
|
+
assay.assay_id
|
|
99
|
+
if isinstance(assay, AssayMetadata)
|
|
100
|
+
else assay.id if isinstance(assay, AssayDataset) else assay
|
|
101
|
+
)
|
|
73
102
|
# extract feature type
|
|
74
103
|
feature_type = (
|
|
75
104
|
FeatureType.PLM
|
|
@@ -80,11 +109,15 @@ class UMAPAPI:
|
|
|
80
109
|
raise InvalidParameterError(
|
|
81
110
|
"Expected feature_type to be provided if passing str model_id as model"
|
|
82
111
|
)
|
|
112
|
+
if isinstance(feature_type, str):
|
|
113
|
+
feature_type = FeatureType(feature_type)
|
|
114
|
+
if isinstance(reduction, str):
|
|
115
|
+
reduction = ReductionType(reduction)
|
|
83
116
|
# get model if model_id
|
|
84
117
|
if feature_type == FeatureType.PLM:
|
|
85
118
|
if reduction is None:
|
|
86
119
|
raise InvalidParameterError(
|
|
87
|
-
"Expected reduction if using
|
|
120
|
+
"Expected reduction if using embedding model"
|
|
88
121
|
)
|
|
89
122
|
if isinstance(model, str):
|
|
90
123
|
embeddings_api = getattr(self.session, "embedding", None)
|
|
@@ -93,18 +126,14 @@ class UMAPAPI:
|
|
|
93
126
|
assert isinstance(model, EmbeddingModel), "Expected EmbeddingModel"
|
|
94
127
|
model_id = model.id
|
|
95
128
|
elif feature_type == FeatureType.SVD:
|
|
129
|
+
if reduction is not None:
|
|
130
|
+
raise InvalidParameterError("Unexpected reduction when using SVD model")
|
|
96
131
|
if isinstance(model, str):
|
|
97
132
|
svd_api = getattr(self.session, "svd", None)
|
|
98
133
|
assert isinstance(svd_api, SVDAPI)
|
|
99
134
|
model = svd_api.get_svd(model)
|
|
100
135
|
assert isinstance(model, SVDModel), "Expected SVDModel"
|
|
101
136
|
model_id = model.id
|
|
102
|
-
# get assay_id
|
|
103
|
-
assay_id = (
|
|
104
|
-
assay.assay_id
|
|
105
|
-
if isinstance(assay, AssayMetadata)
|
|
106
|
-
else assay.id if isinstance(assay, AssayDataset) else assay
|
|
107
|
-
)
|
|
108
137
|
return UMAPModel(
|
|
109
138
|
session=self.session,
|
|
110
139
|
job=api.umap_fit_post(
|
|
@@ -35,6 +35,7 @@ dev = [
|
|
|
35
35
|
"matplotlib>=3.9.2,<4",
|
|
36
36
|
"scipy>=1.14.1,<2",
|
|
37
37
|
"hatchling>=1.26.1",
|
|
38
|
+
"hatch-vcs>=0.5,<1",
|
|
38
39
|
"editables>=0.5,<0.6",
|
|
39
40
|
"seaborn>=0.13.2,<0.14",
|
|
40
41
|
"jupyterlab>=4.4.1,<5",
|
|
@@ -55,24 +56,6 @@ jupyterinstall = "python -m ipykernel install --user --name=openprotein-python"
|
|
|
55
56
|
[tool.pixi.environments]
|
|
56
57
|
dev = ["dev"]
|
|
57
58
|
|
|
58
|
-
[tool.pixi.package]
|
|
59
|
-
name = "openprotein-python"
|
|
60
|
-
|
|
61
|
-
[tool.pixi.package.build]
|
|
62
|
-
backend = { name = "pixi-build-python", version = "0.1.*" }
|
|
63
|
-
channels = ["conda-forge"]
|
|
64
|
-
|
|
65
|
-
[tool.pixi.package.host-dependencies]
|
|
66
|
-
hatchling = "*"
|
|
67
|
-
|
|
68
|
-
[tool.pixi.package.run-dependencies]
|
|
69
|
-
requests = ">=2.32.3,<3"
|
|
70
|
-
pydantic = ">=2.5,<3"
|
|
71
|
-
tqdm = ">=4.66.5,<5"
|
|
72
|
-
pandas = ">=2.2.2,<3"
|
|
73
|
-
numpy = ">=1.9,<3"
|
|
74
|
-
gemmi = ">=0.7.0,<0.8"
|
|
75
|
-
|
|
76
59
|
[build-system]
|
|
77
60
|
requires = ["hatchling>=1.26.1", "hatch-vcs>=0.5.0"]
|
|
78
61
|
build-backend = "hatchling.build"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{openprotein_python-0.8.4 → openprotein_python-0.8.5}/openprotein/models/foundation/rfdiffusion.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|