openprotein-python 0.8.4__tar.gz → 0.8.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/PKG-INFO +9 -9
  2. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/README.md +8 -8
  3. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/common/__init__.py +2 -2
  4. openprotein_python-0.8.6/openprotein/common/features.py +15 -0
  5. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/common/model_metadata.py +1 -1
  6. openprotein_python-0.8.6/openprotein/common/reduction.py +14 -0
  7. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/data/api.py +13 -2
  8. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/data/data.py +9 -2
  9. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/models.py +37 -28
  10. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fold/alphafold2.py +7 -4
  11. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fold/future.py +59 -33
  12. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/predictor/api.py +2 -2
  13. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/predictor/predictor.py +40 -12
  14. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/predictor/schemas.py +2 -0
  15. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/protein.py +53 -36
  16. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/svd/svd.py +6 -4
  17. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/umap/umap.py +43 -14
  18. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/pyproject.toml +1 -18
  19. openprotein_python-0.8.4/openprotein/common/features.py +0 -7
  20. openprotein_python-0.8.4/openprotein/common/reduction.py +0 -8
  21. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/.gitignore +0 -0
  22. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/LICENSE.txt +0 -0
  23. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/__init__.py +0 -0
  24. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/_version.py +0 -0
  25. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/align/__init__.py +0 -0
  26. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/align/align.py +0 -0
  27. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/align/api.py +0 -0
  28. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/align/future.py +0 -0
  29. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/align/msa.py +0 -0
  30. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/align/schemas.py +0 -0
  31. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/base.py +0 -0
  32. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/chains.py +0 -0
  33. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/config.py +0 -0
  34. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/csv.py +0 -0
  35. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/data/__init__.py +0 -0
  36. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/data/assaydataset.py +0 -0
  37. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/data/schemas.py +0 -0
  38. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/design/__init__.py +0 -0
  39. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/design/api.py +0 -0
  40. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/design/design.py +0 -0
  41. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/design/future.py +0 -0
  42. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/design/schemas.py +0 -0
  43. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/__init__.py +0 -0
  44. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/api.py +0 -0
  45. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/embeddings.py +0 -0
  46. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/esm.py +0 -0
  47. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/future.py +0 -0
  48. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/openprotein.py +0 -0
  49. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/poet.py +0 -0
  50. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/poet2.py +0 -0
  51. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/embeddings/schemas.py +0 -0
  52. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/errors.py +0 -0
  53. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fasta.py +0 -0
  54. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fold/__init__.py +0 -0
  55. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fold/api.py +0 -0
  56. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fold/boltz.py +0 -0
  57. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fold/esmfold.py +0 -0
  58. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fold/fold.py +0 -0
  59. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fold/models.py +0 -0
  60. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/fold/schemas.py +0 -0
  61. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/jobs/__init__.py +0 -0
  62. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/jobs/api.py +0 -0
  63. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/jobs/futures.py +0 -0
  64. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/jobs/jobs.py +0 -0
  65. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/jobs/schemas.py +0 -0
  66. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/models/__init__.py +0 -0
  67. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/models/base.py +0 -0
  68. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/models/foundation/rfdiffusion.py +0 -0
  69. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/models/models.py +0 -0
  70. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/predictor/__init__.py +0 -0
  71. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/predictor/models.py +0 -0
  72. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/predictor/prediction.py +0 -0
  73. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/predictor/validate.py +0 -0
  74. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/prompt/__init__.py +0 -0
  75. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/prompt/api.py +0 -0
  76. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/prompt/models.py +0 -0
  77. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/prompt/prompt.py +0 -0
  78. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/prompt/schemas.py +0 -0
  79. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/svd/__init__.py +0 -0
  80. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/svd/api.py +0 -0
  81. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/svd/models.py +0 -0
  82. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/svd/schemas.py +0 -0
  83. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/umap/__init__.py +0 -0
  84. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/umap/api.py +0 -0
  85. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/umap/models.py +0 -0
  86. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/umap/schemas.py +0 -0
  87. {openprotein_python-0.8.4 → openprotein_python-0.8.6}/openprotein/utils/uuid.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openprotein-python
3
- Version: 0.8.4
3
+ Version: 0.8.6
4
4
  Summary: OpenProtein Python interface.
5
5
  Author-email: Mark Gee <markgee@ne47.bio>, "Timothy Truong Jr." <ttruong@ne47.bio>, Tristan Bepler <tbepler@ne47.bio>
6
6
  License-Expression: MIT
@@ -28,14 +28,14 @@ The OpenProtein.AI Python Interface provides a user-friendly library to interact
28
28
 
29
29
  # Table of Contents
30
30
 
31
- | | Workflow | Description |
32
- |---|----------------------------------------------------|------------------------------------------------------|
33
- | 0 | [`Quick start`](#Quick-start) | Quick start guide |
34
- | 1 | [`Installation`](https://docs.openprotein.ai/api-python/installation.html) | Install guide for pip and conda. |
35
- | 2 | [`Session management`](https://docs.openprotein.ai/api-python/overview.html) | An overview of the OpenProtein Python Client & the asynchronous jobs system. |
36
- | 3 | [`Asssay-based Sequence Learning`](https://docs.openprotein.ai/api-python/core_workflow.html) | Covers core tasks such as data upload, model training & prediction, and sequence design. |
37
- | 4 | [`De Novo prediction & generative models (PoET)`](https://docs.openprotein.ai/api-python/poet_workflow.html) | Covers PoET, a protein LLM for *de novo* scoring, as well as sequence generation. |
38
- | 5 | [`Protein Language Models & Embeddings`](https://docs.openprotein.ai/api-python/embedding_workflow.html) | Covers methods for creating sequence embeddings with proprietary & open-source models. |
31
+ | | Workflow | Description |
32
+ |---|--------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
33
+ | 0 | [`Quick start`](#Quick-start) | Quick start guide |
34
+ | 1 | [`Installation`](https://docs.openprotein.ai/api-python/installation.html) | Install guide for pip and conda. |
35
+ | 2 | [`Session management`](https://docs.openprotein.ai/api-python/overview.html) | An overview of the OpenProtein Python Client & the asynchronous jobs system. |
36
+ | 3 | [`Asssay-based Sequence Learning`](https://docs.openprotein.ai/api-python/core_workflow.html) | Covers core tasks such as data upload, model training & prediction, and sequence design. |
37
+ | 4 | [`De Novo prediction & generative models (PoET)`](https://docs.openprotein.ai/api-python/poet_workflow.html) | Covers PoET, a protein LLM for *de novo* scoring, as well as sequence generation. |
38
+ | 5 | [`Protein Language Models & Embeddings`](https://docs.openprotein.ai/api-python/embedding_workflow.html) | Covers methods for creating sequence embeddings with proprietary & open-source models. |
39
39
 
40
40
 
41
41
  # Quick-start
@@ -10,14 +10,14 @@ The OpenProtein.AI Python Interface provides a user-friendly library to interact
10
10
 
11
11
  # Table of Contents
12
12
 
13
- | | Workflow | Description |
14
- |---|----------------------------------------------------|------------------------------------------------------|
15
- | 0 | [`Quick start`](#Quick-start) | Quick start guide |
16
- | 1 | [`Installation`](https://docs.openprotein.ai/api-python/installation.html) | Install guide for pip and conda. |
17
- | 2 | [`Session management`](https://docs.openprotein.ai/api-python/overview.html) | An overview of the OpenProtein Python Client & the asynchronous jobs system. |
18
- | 3 | [`Asssay-based Sequence Learning`](https://docs.openprotein.ai/api-python/core_workflow.html) | Covers core tasks such as data upload, model training & prediction, and sequence design. |
19
- | 4 | [`De Novo prediction & generative models (PoET)`](https://docs.openprotein.ai/api-python/poet_workflow.html) | Covers PoET, a protein LLM for *de novo* scoring, as well as sequence generation. |
20
- | 5 | [`Protein Language Models & Embeddings`](https://docs.openprotein.ai/api-python/embedding_workflow.html) | Covers methods for creating sequence embeddings with proprietary & open-source models. |
13
+ | | Workflow | Description |
14
+ |---|--------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------|
15
+ | 0 | [`Quick start`](#Quick-start) | Quick start guide |
16
+ | 1 | [`Installation`](https://docs.openprotein.ai/api-python/installation.html) | Install guide for pip and conda. |
17
+ | 2 | [`Session management`](https://docs.openprotein.ai/api-python/overview.html) | An overview of the OpenProtein Python Client & the asynchronous jobs system. |
18
+ | 3 | [`Asssay-based Sequence Learning`](https://docs.openprotein.ai/api-python/core_workflow.html) | Covers core tasks such as data upload, model training & prediction, and sequence design. |
19
+ | 4 | [`De Novo prediction & generative models (PoET)`](https://docs.openprotein.ai/api-python/poet_workflow.html) | Covers PoET, a protein LLM for *de novo* scoring, as well as sequence generation. |
20
+ | 5 | [`Protein Language Models & Embeddings`](https://docs.openprotein.ai/api-python/embedding_workflow.html) | Covers methods for creating sequence embeddings with proprietary & open-source models. |
21
21
 
22
22
 
23
23
  # Quick-start
@@ -1,5 +1,5 @@
1
1
  """Common classes and utilities for OpenProtein."""
2
2
 
3
- from .features import FeatureType
3
+ from .features import Feature, FeatureType
4
4
  from .model_metadata import ModelDescription, ModelMetadata, TokenInfo
5
- from .reduction import ReductionType
5
+ from .reduction import Reduction, ReductionType
@@ -0,0 +1,15 @@
1
+ """Feature types used in OpenProtein."""
2
+
3
+ from enum import Enum
4
+ from typing import Literal
5
+
6
+
7
+ class FeatureType(str, Enum):
8
+
9
+ PLM = "PLM"
10
+ SVD = "SVD"
11
+
12
+
13
+ # NOTE: only works with python 3.12+
14
+ # Feature = Literal[*tuple([r.value for r in FeatureType])]
15
+ Feature = Literal["PLM", "SVD"]
@@ -28,6 +28,6 @@ class ModelMetadata(BaseModel):
28
28
  max_sequence_length: int | None = None
29
29
  dimension: int
30
30
  output_types: list[str]
31
- input_tokens: list[str]
31
+ input_tokens: list[str] | None
32
32
  output_tokens: list[str] | None = None
33
33
  token_descriptions: list[list[TokenInfo]]
@@ -0,0 +1,14 @@
1
+ """Reduction types used in OpenProtein."""
2
+
3
+ from enum import Enum
4
+ from typing import Literal
5
+
6
+
7
+ class ReductionType(str, Enum):
8
+ MEAN = "MEAN"
9
+ SUM = "SUM"
10
+
11
+
12
+ # NOTE: only works with python 3.12+
13
+ # Reduction = Literal[*tuple([r.value for r in ReductionType])]
14
+ Reduction = Literal["MEAN", "SUM"]
@@ -64,7 +64,9 @@ def assaydata_post(
64
64
  raise APIError(f"Unable to post assay data: {response.text}")
65
65
 
66
66
 
67
- def assaydata_list(session: APISession) -> list[AssayMetadata]:
67
+ def assaydata_list(
68
+ session: APISession, limit: int | None = None, offset: int | None = None
69
+ ) -> list[AssayMetadata]:
68
70
  """
69
71
  Get a list of all assay metadata.
70
72
 
@@ -72,6 +74,10 @@ def assaydata_list(session: APISession) -> list[AssayMetadata]:
72
74
  ----------
73
75
  session : APISession
74
76
  Session object for API communication.
77
+ limit : int, optional
78
+ Limit the number of assays to return.
79
+ offset : int, optional
80
+ Offset of assays to retrieve. Useful with limit.
75
81
 
76
82
  Returns
77
83
  -------
@@ -84,7 +90,12 @@ def assaydata_list(session: APISession) -> list[AssayMetadata]:
84
90
  If an error occurs during the API request.
85
91
  """
86
92
  endpoint = "v1/assaydata"
87
- response = session.get(endpoint)
93
+ params = {}
94
+ if limit is not None:
95
+ params["limit"] = limit
96
+ if offset is not None:
97
+ params["offset"] = offset
98
+ response = session.get(endpoint, params=params)
88
99
  if response.status_code == 200:
89
100
  return TypeAdapter(list[AssayMetadata]).validate_python(response.json())
90
101
  else:
@@ -14,16 +14,23 @@ class DataAPI:
14
14
  def __init__(self, session: APISession):
15
15
  self.session = session
16
16
 
17
- def list(self) -> list[AssayDataset]:
17
+ def list(
18
+ self, limit: int | None = None, offset: int | None = None
19
+ ) -> list[AssayDataset]:
18
20
  """
19
21
  List all assay datasets.
20
22
 
23
+ limit : int, optional
24
+ Limit the number of assays to return.
25
+ offset : int, optional
26
+ Offset of assays to retrieve. Useful with limit.
27
+
21
28
  Returns
22
29
  -------
23
30
  List[AssayDataset]
24
31
  List of all assay datasets.
25
32
  """
26
- metadata = api.assaydata_list(self.session)
33
+ metadata = api.assaydata_list(session=self.session, limit=limit, offset=offset)
27
34
  return [AssayDataset(self.session, x) for x in metadata]
28
35
 
29
36
  def create(
@@ -3,7 +3,13 @@
3
3
  from typing import TYPE_CHECKING
4
4
 
5
5
  from openprotein.base import APISession
6
- from openprotein.common import FeatureType, ModelMetadata, ReductionType
6
+ from openprotein.common import (
7
+ Feature,
8
+ FeatureType,
9
+ ModelMetadata,
10
+ Reduction,
11
+ ReductionType,
12
+ )
7
13
  from openprotein.data import AssayDataset, AssayMetadata, DataAPI
8
14
  from openprotein.errors import InvalidParameterError
9
15
 
@@ -199,9 +205,9 @@ class EmbeddingModel:
199
205
  def fit_svd(
200
206
  self,
201
207
  sequences: list[bytes] | list[str] | None = None,
202
- assay: AssayDataset | None = None,
208
+ assay: AssayDataset | AssayMetadata | None = None,
203
209
  n_components: int = 1024,
204
- reduction: ReductionType | None = None,
210
+ reduction: Reduction | ReductionType | None = None,
205
211
  **kwargs,
206
212
  ) -> "SVDModel":
207
213
  """
@@ -236,6 +242,11 @@ class EmbeddingModel:
236
242
  # local import for cyclic dep
237
243
  from openprotein.svd import SVDAPI
238
244
 
245
+ # runtime check on value
246
+ if isinstance(reduction, str):
247
+ reduction = ReductionType(reduction)
248
+ reduction = reduction.value
249
+
239
250
  svd_api = getattr(self.session, "svd", None)
240
251
  assert isinstance(svd_api, SVDAPI)
241
252
 
@@ -246,9 +257,8 @@ class EmbeddingModel:
246
257
  raise InvalidParameterError(
247
258
  "Expected either assay or sequences to fit SVD on!"
248
259
  )
249
- model_id = self.id
250
260
  return svd_api.fit_svd(
251
- model_id=model_id,
261
+ model=self,
252
262
  sequences=sequences,
253
263
  assay=assay,
254
264
  n_components=n_components,
@@ -259,9 +269,9 @@ class EmbeddingModel:
259
269
  def fit_umap(
260
270
  self,
261
271
  sequences: list[bytes] | list[str] | None = None,
262
- assay: AssayDataset | None = None,
272
+ assay: AssayDataset | AssayMetadata | None = None,
263
273
  n_components: int = 2,
264
- reduction: ReductionType | None = ReductionType.MEAN,
274
+ reduction: Reduction | ReductionType = "MEAN",
265
275
  **kwargs,
266
276
  ) -> "UMAPModel":
267
277
  """
@@ -274,11 +284,11 @@ class EmbeddingModel:
274
284
  ----------
275
285
  sequences : list of bytes or list of str or None, optional
276
286
  Optional sequences to fit UMAP with. Either use sequences or assay. Sequences is preferred.
277
- assay : AssayDataset or None, optional
287
+ assay : AssayDataset or AssayMetadata or None, optional
278
288
  Optional assay containing sequences to fit UMAP with. Either use sequences or assay. Ignored if sequences are provided.
279
289
  n_components : int, optional
280
290
  Number of components in UMAP fit. Determines output shapes. Default is 2.
281
- reduction : ReductionType or None, optional
291
+ reduction : Reduction or ReductionType or None, optional
282
292
  Embeddings reduction to use (e.g. mean). Defaults to MEAN.
283
293
  kwargs :
284
294
  Additional keyword arguments to be used from foundational models, e.g. prompt_id for PoET models.
@@ -296,6 +306,16 @@ class EmbeddingModel:
296
306
  # local import for cyclic dep
297
307
  from openprotein.umap import UMAPAPI
298
308
 
309
+ if reduction is None:
310
+ raise InvalidParameterError(
311
+ "Expected reduction if using EmbeddingModel to fit UMAP"
312
+ )
313
+
314
+ # runtime check on value
315
+ if isinstance(reduction, str):
316
+ reduction = ReductionType(reduction)
317
+ reduction = reduction.value
318
+
299
319
  umap_api = getattr(self.session, "umap", None)
300
320
  assert isinstance(umap_api, UMAPAPI)
301
321
 
@@ -306,12 +326,18 @@ class EmbeddingModel:
306
326
  raise InvalidParameterError(
307
327
  "Expected either assay or sequences to fit UMAP on!"
308
328
  )
329
+ # get assay_id
330
+ assay_id = (
331
+ assay.assay_id
332
+ if isinstance(assay, AssayMetadata)
333
+ else assay.id if isinstance(assay, AssayDataset) else assay
334
+ )
309
335
  model_id = self.id
310
336
  return umap_api.fit_umap(
311
337
  model_id=model_id,
312
338
  feature_type=FeatureType.PLM,
313
339
  sequences=sequences,
314
- assay_id=assay.id if assay is not None else None,
340
+ assay_id=assay_id,
315
341
  n_components=n_components,
316
342
  reduction=reduction,
317
343
  **kwargs,
@@ -319,7 +345,7 @@ class EmbeddingModel:
319
345
 
320
346
  def fit_gp(
321
347
  self,
322
- assay: AssayMetadata | AssayDataset | str,
348
+ assay: AssayDataset | AssayMetadata | str,
323
349
  properties: list[str],
324
350
  reduction: ReductionType,
325
351
  name: str | None = None,
@@ -358,26 +384,9 @@ class EmbeddingModel:
358
384
  # local import to resolve cyclic
359
385
  from openprotein.predictor import PredictorAPI
360
386
 
361
- data_api = getattr(self.session, "data", None)
362
- assert isinstance(data_api, DataAPI)
363
387
  predictor_api = getattr(self.session, "predictor", None)
364
388
  assert isinstance(predictor_api, PredictorAPI)
365
389
 
366
- # get assay if str
367
- assay = data_api.get(assay_id=assay) if isinstance(assay, str) else assay
368
- # extract assay_id
369
- if len(properties) == 0:
370
- raise InvalidParameterError("Expected (at-least) 1 property to train")
371
- if not set(properties) <= set(assay.measurement_names):
372
- raise InvalidParameterError(
373
- f"Expected all provided properties to be a subset of assay's measurements: {assay.measurement_names}"
374
- )
375
- # TODO - support multitask
376
- if len(properties) > 1:
377
- raise InvalidParameterError(
378
- "Training a multitask GP is not yet supported (i.e. number of properties should only be 1 for now)"
379
- )
380
-
381
390
  # inject into predictor api
382
391
  return predictor_api.fit_gp(
383
392
  assay=assay,
@@ -47,10 +47,8 @@ class AlphaFold2Model(FoldModel):
47
47
  number of times to recycle models
48
48
  num_models : int
49
49
  number of models to train - best model will be used
50
- max_msa : Union[str, int]
51
- maximum number of sequences in the msa to use.
52
- relax_max_iterations : int
53
- maximum number of iterations
50
+ num_relax : int
51
+ maximum number of iterations for relax
54
52
 
55
53
  Returns
56
54
  -------
@@ -61,6 +59,7 @@ class AlphaFold2Model(FoldModel):
61
59
  "Inputs to AlphaFold 2 have been updated. 'msa' should be supplied as 'proteins' argument. Support will be dropped in the future."
62
60
  )
63
61
  proteins = kwargs["msa"]
62
+ assert isinstance(proteins, MSAFuture), "Expected msa to be an MSAFuture"
64
63
  if "ligands" in kwargs or "dnas" in kwargs or "rnas" in kwargs:
65
64
  with warnings.catch_warnings():
66
65
  warnings.simplefilter("always") # Force warning to always show
@@ -73,6 +72,10 @@ class AlphaFold2Model(FoldModel):
73
72
  msa_to_seed: dict[str, Counter] = dict()
74
73
  for protein in proteins:
75
74
  if (msa := protein.msa) is not None:
75
+ if isinstance(msa, Protein.NullMSA):
76
+ raise ValueError(
77
+ "AlphaFold 2 expects MSA and does not support single sequence mode"
78
+ )
76
79
  msa_id = msa.id if isinstance(msa, MSAFuture) else msa
77
80
  if msa_id in msa_to_seed:
78
81
  seeds = msa_to_seed[msa_id]
@@ -9,11 +9,11 @@ from typing_extensions import Self
9
9
  from openprotein import config
10
10
  from openprotein.base import APISession
11
11
  from openprotein.chains import DNA, RNA, Ligand
12
- from openprotein.jobs import Future, MappedFuture
12
+ from openprotein.jobs import Future, JobsAPI, MappedFuture
13
13
  from openprotein.protein import Protein
14
14
 
15
15
  from . import api
16
- from .schemas import FoldJob
16
+ from .schemas import FoldJob, FoldMetadata
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from .boltz import BoltzAffinity, BoltzConfidence
@@ -34,34 +34,39 @@ class FoldResultFuture(MappedFuture, Future):
34
34
  def __init__(
35
35
  self,
36
36
  session: APISession,
37
- job: FoldJob,
37
+ job: FoldJob | None = None,
38
+ metadata: FoldMetadata | None = None,
38
39
  sequences: list[bytes] | None = None,
39
40
  max_workers: int = config.MAX_CONCURRENT_WORKERS,
40
41
  ):
41
42
  """
42
43
  Initialize a FoldResultFuture instance.
43
44
 
44
- Parameters
45
- ----------
46
- session : APISession
47
- The API session to use for requests.
48
- job : FoldJob
49
- The fold job associated with this future.
50
- sequences : list[bytes], optional
51
- List of sequences submitted for the fold request. If None, sequences will be fetched.
52
- max_workers : int, optional
53
- Maximum number of concurrent workers. Default is config.MAX_CONCURRENT_WORKERS.
45
+ Takes in either a fold job, or the fold job metadata.
46
+
47
+ :meta private:
54
48
  """
55
- super().__init__(session, job, max_workers)
49
+ # initialize the fold job metadata
50
+ if metadata is None:
51
+ if job is None or job.job_id is None:
52
+ raise ValueError("Expected fold metadata or job")
53
+ metadata = api.fold_get(session, job.job_id)
54
+ self._metadata = metadata
55
+ if job is None:
56
+ jobs_api = getattr(session, "jobs", None)
57
+ assert isinstance(jobs_api, JobsAPI)
58
+ job = FoldJob.create(jobs_api.get_job(job_id=metadata.job_id))
56
59
  if sequences is None:
57
60
  sequences = api.fold_get_sequences(self.session, job_id=job.job_id)
58
61
  self._sequences = sequences
62
+ super().__init__(session, job, max_workers)
59
63
 
60
64
  @classmethod
61
65
  def create(
62
66
  cls: type[Self],
63
67
  session: APISession,
64
- job: FoldJob,
68
+ job: FoldJob | None = None,
69
+ metadata: FoldMetadata | None = None,
65
70
  **kwargs,
66
71
  ) -> "Self | FoldComplexResultFuture":
67
72
  """
@@ -81,7 +86,13 @@ class FoldResultFuture(MappedFuture, Future):
81
86
  FoldResultFuture or FoldComplexResultFuture
82
87
  An instance of FoldResultFuture or FoldComplexResultFuture depending on the model.
83
88
  """
84
- model_id = api.fold_get(session=session, job_id=job.job_id).model_id
89
+ if job is not None:
90
+ job_id = job.job_id
91
+ elif metadata is not None:
92
+ job_id = metadata.job_id
93
+ else:
94
+ raise ValueError("Expected fold metadata or job")
95
+ model_id = api.fold_get(session=session, job_id=job_id).model_id
85
96
  if model_id.startswith("boltz") or model_id.startswith("alphafold"):
86
97
  return FoldComplexResultFuture(session=session, job=job, **kwargs)
87
98
  else:
@@ -101,22 +112,6 @@ class FoldResultFuture(MappedFuture, Future):
101
112
  self._sequences = api.fold_get_sequences(self.session, self.job.job_id)
102
113
  return self._sequences
103
114
 
104
- @property
105
- def model_id(self) -> str:
106
- """
107
- Get the model ID used for the fold request.
108
-
109
- Returns
110
- -------
111
- str
112
- Model ID.
113
- """
114
- if self._model_id is None:
115
- self._model_id = api.fold_get(
116
- session=self.session, job_id=self.job.job_id
117
- ).model_id
118
- return self._model_id
119
-
120
115
  @property
121
116
  def id(self):
122
117
  """
@@ -129,6 +124,17 @@ class FoldResultFuture(MappedFuture, Future):
129
124
  """
130
125
  return self.job.job_id
131
126
 
127
+
128
+ @property
129
+ def metadata(self) -> FoldMetadata:
130
+ """The fold metadata."""
131
+ return self._metadata
132
+
133
+ @property
134
+ def model_id(self) -> str:
135
+ """The fold model used."""
136
+ return self._metadata.model_id
137
+
132
138
  def __keys__(self):
133
139
  """
134
140
  Get the list of sequences submitted for the fold request.
@@ -189,7 +195,8 @@ class FoldComplexResultFuture(Future):
189
195
  def __init__(
190
196
  self,
191
197
  session: APISession,
192
- job: FoldJob,
198
+ job: FoldJob | None = None,
199
+ metadata: FoldMetadata | None = None,
193
200
  model_id: str | None = None,
194
201
  proteins: list[Protein] | None = None,
195
202
  ligands: list[Ligand] | None = None,
@@ -216,6 +223,16 @@ class FoldComplexResultFuture(Future):
216
223
  rnas : list[RNA], optional
217
224
  List of RNAs submitted for fold request.
218
225
  """
226
+ # initialize the fold job metadata
227
+ if metadata is None:
228
+ if job is None or job.job_id is None:
229
+ raise ValueError("Expected fold metadata or job")
230
+ metadata = api.fold_get(session, job.job_id)
231
+ self._metadata = metadata
232
+ if job is None:
233
+ jobs_api = getattr(session, "jobs", None)
234
+ assert isinstance(jobs_api, JobsAPI)
235
+ job = FoldJob.create(jobs_api.get_job(job_id=metadata.job_id))
219
236
  super().__init__(session, job)
220
237
  self._model_id = model_id
221
238
  self._proteins = proteins
@@ -229,6 +246,11 @@ class FoldComplexResultFuture(Future):
229
246
  self._confidence: list["BoltzConfidence"] | None = None
230
247
  self._affinity: "BoltzAffinity | None" = None
231
248
 
249
+ @property
250
+ def metadata(self) -> FoldMetadata:
251
+ """The fold metadata."""
252
+ return self._metadata
253
+
232
254
  @property
233
255
  def model_id(self) -> str:
234
256
  """
@@ -433,6 +455,8 @@ class FoldComplexResultFuture(Future):
433
455
  AttributeError
434
456
  If confidence is not supported for the model.
435
457
  """
458
+ from .boltz import BoltzConfidence
459
+
436
460
  if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}:
437
461
  raise AttributeError("confidence not supported for non-Boltz model")
438
462
  if self._confidence is None:
@@ -464,6 +488,8 @@ class FoldComplexResultFuture(Future):
464
488
  AttributeError
465
489
  If affinity is not supported for the model.
466
490
  """
491
+ from .boltz import BoltzAffinity
492
+
467
493
  if self.model_id not in {"boltz-1", "boltz-1x", "boltz-2"}:
468
494
  raise AttributeError("affinity not supported for non-Boltz model")
469
495
  if self._affinity is None:
@@ -162,8 +162,8 @@ def predictor_fit_gp_post(
162
162
  body["name"] = name
163
163
  if description is not None:
164
164
  body["description"] = description
165
- # add kwargs for embeddings kwargs
166
- body.update(kwargs)
165
+ # add kwargs for embeddings kwargs to features
166
+ body["features"].update(kwargs)
167
167
 
168
168
  response = session.post(endpoint, json=body)
169
169
  return PredictorTrainJob.model_validate(response.json())
@@ -1,10 +1,11 @@
1
1
  """Predictor API providing the interface to train and predict predictors."""
2
2
 
3
3
  from openprotein.base import APISession
4
- from openprotein.common import FeatureType, ReductionType
4
+ from openprotein.common import Feature, FeatureType, Reduction, ReductionType
5
5
  from openprotein.data import (
6
6
  AssayDataset,
7
7
  AssayMetadata,
8
+ DataAPI,
8
9
  )
9
10
  from openprotein.embeddings import EmbeddingModel, EmbeddingsAPI
10
11
  from openprotein.errors import InvalidParameterError
@@ -120,8 +121,8 @@ class PredictorAPI:
120
121
  assay: AssayDataset | AssayMetadata | str,
121
122
  properties: list[str],
122
123
  model: EmbeddingModel | SVDModel | str,
123
- feature_type: FeatureType | None = None,
124
- reduction: ReductionType | None = None,
124
+ feature_type: Feature | FeatureType | None = None,
125
+ reduction: Reduction | ReductionType | None = None,
125
126
  name: str | None = None,
126
127
  description: str | None = None,
127
128
  **kwargs,
@@ -139,10 +140,10 @@ class PredictorAPI:
139
140
  Instance of either EmbeddingModel or SVDModel to use depending
140
141
  on feature type. Can also be a str specifying the model id,
141
142
  but then feature_type would have to be specified.
142
- feature_type : FeatureType or None
143
+ feature_type : Feature or FeatureType or None
143
144
  Type of features to use for encoding sequences. "SVD" or "PLM".
144
145
  None would require model to be EmbeddingModel or SVDModel.
145
- reduction : str or None, optional
146
+ reduction : Reduction or ReductionType or None, optional
146
147
  Type of embedding reduction to use for computing features.
147
148
  E.g. "MEAN" or "SUM". Used only if using EmbeddingModel, and
148
149
  must be non-nil if using an EmbeddingModel. Defaults to None.
@@ -154,6 +155,29 @@ class PredictorAPI:
154
155
  PredictorModel
155
156
  The GP model being fit.
156
157
  """
158
+ data_api = getattr(self.session, "data", None)
159
+ assert isinstance(data_api, DataAPI)
160
+ # 1. Check assay data input
161
+ # get assay if str
162
+ assay = data_api.get(assay_id=assay) if isinstance(assay, str) else assay
163
+ # extract assay_id
164
+ assay_id = (
165
+ assay.assay_id
166
+ if isinstance(assay, AssayMetadata)
167
+ else assay.id if isinstance(assay, AssayDataset) else assay
168
+ )
169
+ if len(properties) == 0:
170
+ raise InvalidParameterError("Expected (at-least) 1 property to train")
171
+ if not set(properties) <= set(assay.measurement_names):
172
+ raise InvalidParameterError(
173
+ f"Expected all provided properties to be a subset of assay's measurements: {assay.measurement_names}"
174
+ )
175
+ # TODO - support multitask
176
+ if len(properties) > 1:
177
+ raise InvalidParameterError(
178
+ "Training a multitask GP is not yet supported (i.e. number of properties should only be 1 for now)"
179
+ )
180
+ # 2. Check features input
157
181
  # extract feature type
158
182
  feature_type = (
159
183
  FeatureType.PLM
@@ -164,6 +188,15 @@ class PredictorAPI:
164
188
  raise InvalidParameterError(
165
189
  "Expected feature_type to be provided if passing str model_id as model"
166
190
  )
191
+ # runtime check on value
192
+ if isinstance(feature_type, str):
193
+ feature_type = FeatureType(feature_type)
194
+
195
+ # 3. Check reduction
196
+ if isinstance(reduction, str):
197
+ reduction = ReductionType(reduction)
198
+ reduction = reduction.value
199
+
167
200
  # get model if model_id
168
201
  if feature_type == FeatureType.PLM:
169
202
  if reduction is None:
@@ -183,19 +216,14 @@ class PredictorAPI:
183
216
  model = svd_api.get_svd(model)
184
217
  assert isinstance(model, SVDModel), "Expected SVDModel"
185
218
  model_id = model.id
186
- # get assay_id
187
- assay_id = (
188
- assay.assay_id
189
- if isinstance(assay, AssayMetadata)
190
- else assay.id if isinstance(assay, AssayDataset) else assay
191
- )
219
+
192
220
  return PredictorModel(
193
221
  session=self.session,
194
222
  job=api.predictor_fit_gp_post(
195
223
  session=self.session,
196
224
  assay_id=assay_id,
197
225
  properties=properties,
198
- feature_type=feature_type,
226
+ feature_type=feature_type.value,
199
227
  model_id=model_id,
200
228
  reduction=reduction,
201
229
  name=name,
@@ -29,6 +29,8 @@ class Features(BaseModel):
29
29
  model_id: str | None = None
30
30
  reduction: str | None = None
31
31
 
32
+ # TODO: model extra kwargs
33
+
32
34
  model_config = ConfigDict(protected_namespaces=())
33
35
 
34
36