openprotein-python 0.8.2__1-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. openprotein/__init__.py +164 -0
  2. openprotein/_version.py +48 -0
  3. openprotein/align/__init__.py +8 -0
  4. openprotein/align/align.py +395 -0
  5. openprotein/align/api.py +428 -0
  6. openprotein/align/future.py +55 -0
  7. openprotein/align/msa.py +129 -0
  8. openprotein/align/schemas.py +165 -0
  9. openprotein/base.py +181 -0
  10. openprotein/chains.py +88 -0
  11. openprotein/common/__init__.py +5 -0
  12. openprotein/common/features.py +7 -0
  13. openprotein/common/model_metadata.py +33 -0
  14. openprotein/common/reduction.py +8 -0
  15. openprotein/config.py +9 -0
  16. openprotein/csv.py +31 -0
  17. openprotein/data/__init__.py +9 -0
  18. openprotein/data/api.py +218 -0
  19. openprotein/data/assaydataset.py +178 -0
  20. openprotein/data/data.py +93 -0
  21. openprotein/data/schemas.py +27 -0
  22. openprotein/design/__init__.py +16 -0
  23. openprotein/design/api.py +259 -0
  24. openprotein/design/design.py +125 -0
  25. openprotein/design/future.py +146 -0
  26. openprotein/design/schemas.py +607 -0
  27. openprotein/embeddings/__init__.py +27 -0
  28. openprotein/embeddings/api.py +619 -0
  29. openprotein/embeddings/embeddings.py +151 -0
  30. openprotein/embeddings/esm.py +33 -0
  31. openprotein/embeddings/future.py +146 -0
  32. openprotein/embeddings/models.py +421 -0
  33. openprotein/embeddings/openprotein.py +21 -0
  34. openprotein/embeddings/poet.py +446 -0
  35. openprotein/embeddings/poet2.py +505 -0
  36. openprotein/embeddings/schemas.py +78 -0
  37. openprotein/errors.py +76 -0
  38. openprotein/fasta.py +92 -0
  39. openprotein/fold/__init__.py +21 -0
  40. openprotein/fold/alphafold2.py +131 -0
  41. openprotein/fold/api.py +287 -0
  42. openprotein/fold/boltz.py +691 -0
  43. openprotein/fold/esmfold.py +54 -0
  44. openprotein/fold/fold.py +107 -0
  45. openprotein/fold/future.py +509 -0
  46. openprotein/fold/models.py +139 -0
  47. openprotein/fold/schemas.py +39 -0
  48. openprotein/jobs/__init__.py +9 -0
  49. openprotein/jobs/api.py +71 -0
  50. openprotein/jobs/futures.py +746 -0
  51. openprotein/jobs/jobs.py +69 -0
  52. openprotein/jobs/schemas.py +135 -0
  53. openprotein/models/__init__.py +4 -0
  54. openprotein/models/base.py +63 -0
  55. openprotein/models/foundation/rfdiffusion.py +283 -0
  56. openprotein/models/models.py +33 -0
  57. openprotein/predictor/__init__.py +25 -0
  58. openprotein/predictor/api.py +384 -0
  59. openprotein/predictor/models.py +374 -0
  60. openprotein/predictor/prediction.py +79 -0
  61. openprotein/predictor/predictor.py +242 -0
  62. openprotein/predictor/schemas.py +113 -0
  63. openprotein/predictor/validate.py +40 -0
  64. openprotein/prompt/__init__.py +9 -0
  65. openprotein/prompt/api.py +505 -0
  66. openprotein/prompt/models.py +142 -0
  67. openprotein/prompt/prompt.py +130 -0
  68. openprotein/prompt/schemas.py +49 -0
  69. openprotein/protein.py +587 -0
  70. openprotein/svd/__init__.py +9 -0
  71. openprotein/svd/api.py +206 -0
  72. openprotein/svd/models.py +288 -0
  73. openprotein/svd/schemas.py +31 -0
  74. openprotein/svd/svd.py +134 -0
  75. openprotein/umap/__init__.py +9 -0
  76. openprotein/umap/api.py +259 -0
  77. openprotein/umap/models.py +211 -0
  78. openprotein/umap/schemas.py +35 -0
  79. openprotein/umap/umap.py +175 -0
  80. openprotein/utils/uuid.py +29 -0
  81. openprotein_python-0.8.2.dist-info/METADATA +176 -0
  82. openprotein_python-0.8.2.dist-info/RECORD +84 -0
  83. openprotein_python-0.8.2.dist-info/WHEEL +4 -0
  84. openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0
@@ -0,0 +1,505 @@
1
+ """Proprietary PoET-2 model providing top-class performance on protein engineering tasks."""
2
+
3
+ from collections.abc import Sequence
4
+ from typing import TYPE_CHECKING, Literal
5
+
6
+ import numpy as np
7
+
8
+ from openprotein.base import APISession
9
+ from openprotein.common import ModelMetadata, ReductionType
10
+ from openprotein.data import AssayDataset, AssayMetadata
11
+ from openprotein.prompt import Prompt, PromptAPI, Query
12
+ from openprotein.protein import Protein
13
+ from openprotein.utils import uuid
14
+
15
+ from .future import (
16
+ EmbeddingsGenerateFuture,
17
+ EmbeddingsResultFuture,
18
+ EmbeddingsScoreFuture,
19
+ )
20
+ from .models import EmbeddingModel
21
+ from .poet import PoETModel
22
+
23
+ if TYPE_CHECKING:
24
+ from openprotein.predictor import PredictorModel
25
+ from openprotein.svd import SVDModel
26
+ from openprotein.umap import UMAPModel
27
+
28
+
29
+ class PoET2Model(PoETModel, EmbeddingModel):
30
+ """
31
+ Class for OpenProtein's foundation model PoET 2.
32
+
33
+ PoET functions are dependent on a prompt supplied via the prompt endpoints.
34
+
35
+ Examples
36
+ --------
37
+ View specific model details (including supported tokens) with the `?` operator.
38
+
39
+ Examples
40
+ --------
41
+ .. code-block:: python
42
+
43
+ >>> import openprotein
44
+ >>> session = openprotein.connect(username="user", password="password")
45
+ >>> session.embedding.poet2?
46
+ """
47
+
48
+ model_id = "poet-2"
49
+
50
+ # TODO - Add model to explicitly require prompt_id
51
+ def __init__(
52
+ self,
53
+ session: APISession,
54
+ model_id: str,
55
+ metadata: ModelMetadata | None = None,
56
+ ):
57
+ super().__init__(session=session, model_id=model_id, metadata=metadata)
58
+
59
+ def __resolve_query(
60
+ self,
61
+ query: str | bytes | Protein | Query | None = None,
62
+ ) -> str | None:
63
+ if query is None:
64
+ query_id = None
65
+ elif (
66
+ isinstance(query, Protein)
67
+ or isinstance(query, bytes)
68
+ or (isinstance(query, str) and not uuid.is_valid_uuid(query))
69
+ ):
70
+ prompt_api = getattr(self.session, "prompt", None)
71
+ assert isinstance(prompt_api, PromptAPI)
72
+ query_ = prompt_api.create_query(query=query)
73
+ query_id = query_.id
74
+ else:
75
+ query_id = query if isinstance(query, str) else query.id
76
+ return query_id
77
+
78
+ def embed(
79
+ self,
80
+ sequences: list[bytes],
81
+ reduction: ReductionType | None = ReductionType.MEAN,
82
+ prompt: str | Prompt | None = None,
83
+ query: str | bytes | Protein | Query | None = None,
84
+ use_query_structure_in_decoder: bool = True,
85
+ decoder_type: Literal["mlm", "clm"] | None = None,
86
+ ) -> EmbeddingsResultFuture:
87
+ """
88
+ Embed sequences using this model.
89
+
90
+ Parameters
91
+ ----------
92
+ sequences : list of bytes
93
+ Sequences to embed.
94
+ reduction : ReductionType or None, optional
95
+ Embeddings reduction to use (e.g. mean). Default is ReductionType.MEAN.
96
+ prompt : str or Prompt or None, optional
97
+ Prompt or prompt_id or prompt from an align workflow to condition PoET model.
98
+ query : str or bytes or Protein or Query or None, optional
99
+ Query to use with prompt.
100
+ use_query_structure_in_decoder : bool, optional
101
+ Whether to use query structure in decoder. Default is True.
102
+ decoder_type : {'mlm', 'clm'} or None, optional
103
+ Decoder type. Default is None.
104
+
105
+ Returns
106
+ -------
107
+ EmbeddingsResultFuture
108
+ A future object that returns the embeddings of the submitted sequences.
109
+ """
110
+ query_id = self.__resolve_query(query=query)
111
+ return super().embed(
112
+ sequences=sequences,
113
+ reduction=reduction,
114
+ prompt=prompt,
115
+ query_id=query_id,
116
+ use_query_structure_in_decoder=use_query_structure_in_decoder,
117
+ decoder_type=decoder_type,
118
+ )
119
+
120
+ def logits(
121
+ self,
122
+ sequences: list[bytes],
123
+ prompt: str | Prompt | None = None,
124
+ query: str | bytes | Protein | Query | None = None,
125
+ use_query_structure_in_decoder: bool = True,
126
+ decoder_type: Literal["mlm", "clm"] | None = None,
127
+ ) -> EmbeddingsResultFuture:
128
+ """
129
+ Compute logit embeddings for sequences using this model.
130
+
131
+ Parameters
132
+ ----------
133
+ sequences : list of bytes
134
+ Sequences to analyze.
135
+ prompt : str or Prompt or None, optional
136
+ Prompt or prompt_id or prompt from an align workflow to condition PoET model.
137
+ query : str or bytes or Protein or Query or None, optional
138
+ Query to use with prompt.
139
+ use_query_structure_in_decoder : bool, optional
140
+ Whether to use query structure in decoder. Default is True.
141
+ decoder_type : {'mlm', 'clm'} or None, optional
142
+ Decoder type. Default is None.
143
+
144
+ Returns
145
+ -------
146
+ EmbeddingsResultFuture
147
+ A future object that returns the logits of the submitted sequences.
148
+ """
149
+ query_id = self.__resolve_query(query=query)
150
+ return super().logits(
151
+ sequences=sequences,
152
+ prompt=prompt,
153
+ query_id=query_id,
154
+ use_query_structure_in_decoder=use_query_structure_in_decoder,
155
+ decoder_type=decoder_type,
156
+ )
157
+
158
+ def score(
159
+ self,
160
+ sequences: list[bytes],
161
+ prompt: str | Prompt | None = None,
162
+ query: str | bytes | Protein | Query | None = None,
163
+ use_query_structure_in_decoder: bool = True,
164
+ decoder_type: Literal["mlm", "clm"] | None = None,
165
+ ) -> EmbeddingsScoreFuture:
166
+ """
167
+ Score query sequences using the specified prompt.
168
+
169
+ Parameters
170
+ ----------
171
+ sequences : list of bytes
172
+ Sequences to score.
173
+ prompt : str or Prompt or None, optional
174
+ Prompt or prompt_id or prompt from an align workflow to condition PoET model.
175
+ query : str or bytes or Protein or Query or None, optional
176
+ Query to use with prompt.
177
+ use_query_structure_in_decoder : bool, optional
178
+ Whether to use query structure in decoder. Default is True.
179
+ decoder_type : {'mlm', 'clm'} or None, optional
180
+ Decoder type. Default is None.
181
+
182
+ Returns
183
+ -------
184
+ EmbeddingsScoreFuture
185
+ A future object that returns the scores of the submitted sequences.
186
+ """
187
+ query_id = self.__resolve_query(query=query)
188
+ return super().score(
189
+ sequences=sequences,
190
+ prompt=prompt,
191
+ query_id=query_id,
192
+ use_query_structure_in_decoder=use_query_structure_in_decoder,
193
+ decoder_type=decoder_type,
194
+ )
195
+
196
+ def indel(
197
+ self,
198
+ sequence: bytes,
199
+ prompt: str | Prompt | None = None,
200
+ query: str | bytes | Protein | Query | None = None,
201
+ use_query_structure_in_decoder: bool = True,
202
+ decoder_type: Literal["mlm", "clm"] | None = None,
203
+ insert: str | None = None,
204
+ delete: list[int] | None = None,
205
+ **kwargs,
206
+ ) -> EmbeddingsScoreFuture:
207
+ """
208
+ Score all indels of the query sequence using the specified prompt.
209
+
210
+ Parameters
211
+ ----------
212
+ sequence : bytes
213
+ Sequence to analyze.
214
+ prompt : str or Prompt or None, optional
215
+ Prompt from an align workflow to condition the PoET model.
216
+ query : str or bytes or Protein or Query or None, optional
217
+ Query to use with prompt.
218
+ use_query_structure_in_decoder : bool, optional
219
+ Whether to use query structure in decoder. Default is True.
220
+ decoder_type : {'mlm', 'clm'} or None, optional
221
+ Decoder type. Default is None.
222
+ insert : str or None, optional
223
+ Insertion fragment at each site.
224
+ delete : list of int or None, optional
225
+ Range of size of fragment to delete at each site.
226
+ **kwargs
227
+ Additional keyword arguments.
228
+
229
+ Returns
230
+ -------
231
+ EmbeddingsScoreFuture
232
+ A future object that returns the scores of the indel-ed sequence.
233
+
234
+ Raises
235
+ ------
236
+ ValueError
237
+ If neither insert nor delete is provided.
238
+ """
239
+ query_id = self.__resolve_query(query=query)
240
+ return super().indel(
241
+ sequence=sequence,
242
+ prompt=prompt,
243
+ query_id=query_id,
244
+ use_query_structure_in_decoder=use_query_structure_in_decoder,
245
+ decoder_type=decoder_type,
246
+ insert=insert,
247
+ delete=delete,
248
+ )
249
+
250
+ def single_site(
251
+ self,
252
+ sequence: bytes,
253
+ prompt: str | Prompt | None = None,
254
+ query: str | bytes | Protein | Query | None = None,
255
+ use_query_structure_in_decoder: bool = True,
256
+ decoder_type: Literal["mlm", "clm"] | None = None,
257
+ ) -> EmbeddingsScoreFuture:
258
+ """
259
+ Score all single substitutions of the query sequence using the specified prompt.
260
+
261
+ Parameters
262
+ ----------
263
+ sequence : bytes
264
+ Sequence to analyze.
265
+ prompt : str or Prompt or None, optional
266
+ Prompt or prompt_id or prompt from an align workflow to condition PoET model.
267
+ query : str or bytes or Protein or Query or None, optional
268
+ Query to use with prompt.
269
+ use_query_structure_in_decoder : bool, optional
270
+ Whether to use query structure in decoder. Default is True.
271
+ decoder_type : {'mlm', 'clm'} or None, optional
272
+ Decoder type. Default is None.
273
+
274
+ Returns
275
+ -------
276
+ EmbeddingsScoreFuture
277
+ A future object that returns the scores of the mutated sequence.
278
+ """
279
+ query_id = self.__resolve_query(query=query)
280
+ return super().single_site(
281
+ sequence=sequence,
282
+ prompt=prompt,
283
+ query_id=query_id,
284
+ use_query_structure_in_decoder=use_query_structure_in_decoder,
285
+ decoder_type=decoder_type,
286
+ )
287
+
288
+ def generate(
289
+ self,
290
+ prompt: str | Prompt,
291
+ query: str | bytes | Protein | Query | None = None,
292
+ use_query_structure_in_decoder: bool = True,
293
+ num_samples: int = 100,
294
+ temperature: float = 1.0,
295
+ topk: float | None = None,
296
+ topp: float | None = None,
297
+ max_length: int = 1000,
298
+ seed: int | None = None,
299
+ ensemble_weights: Sequence[float] | None = None,
300
+ ensemble_method: Literal["arithmetic", "geometric"] | None = None,
301
+ ) -> EmbeddingsGenerateFuture:
302
+ """
303
+ Generate protein sequences conditioned on a prompt.
304
+
305
+ Parameters
306
+ ----------
307
+ prompt : str or Prompt
308
+ Prompt from an align workflow to condition PoET model.
309
+ query : str or bytes or Protein or Query or None, optional
310
+ Query to use with prompt.
311
+ use_query_structure_in_decoder : bool, optional
312
+ Whether to use query structure in decoder. Default is True.
313
+ num_samples : int, optional
314
+ The number of samples to generate. Default is 100.
315
+ temperature : float, optional
316
+ The temperature for sampling. Higher values produce more random outputs. Default is 1.0.
317
+ topk : float or None, optional
318
+ The number of top-k residues to consider during sampling. Default is None.
319
+ topp : float or None, optional
320
+ The cumulative probability threshold for top-p sampling. Default is None.
321
+ max_length : int, optional
322
+ The maximum length of generated proteins. Default is 1000.
323
+ seed : int or None, optional
324
+ Seed for random number generation. Default is None.
325
+ ensemble_weights : Sequence of float or None, optional
326
+ Weights for combining likelihoods from multiple prompts in the ensemble.
327
+ The length of this sequence must match the number of prompts.
328
+ All weights must be finite. If ensemble_method is "arithmetic", then weights
329
+ must also be non-negative, and have a non-zero sum.
330
+ ensemble_method : {'arithmetic', 'geometric'} or None, optional
331
+ Method used to combine likelihoods from multiple prompts in the ensemble.
332
+ If "arithmetic", the weighted mean is used; if "geometric", the weighted
333
+ geometric mean is used. If None (default), the method defaults to
334
+ "arithmetic", but this behavior may change in the future.
335
+
336
+ Returns
337
+ -------
338
+ EmbeddingsGenerateFuture
339
+ A future object representing the status and information about the generation job.
340
+ """
341
+ query_id = self.__resolve_query(query=query)
342
+ if ensemble_weights is not None:
343
+ # NB: for now, ensemble_method is None -> ensemble_method == "arithmetic"
344
+ if ensemble_method is None or (ensemble_method == "arithmetic"):
345
+ assert all(w >= 0 for w in ensemble_weights)
346
+ assert sum(ensemble_weights) >= 0
347
+ assert np.isfinite(np.array(ensemble_weights)).all()
348
+ if isinstance(prompt, Prompt):
349
+ assert len(ensemble_weights) == prompt.num_replicates, (
350
+ f"Number of ensemble weights ({len(ensemble_weights)}) must be "
351
+ f"equal to the number of prompts ({prompt.num_replicates})"
352
+ )
353
+ return super().generate(
354
+ prompt=prompt,
355
+ num_samples=num_samples,
356
+ temperature=temperature,
357
+ topk=topk,
358
+ topp=topp,
359
+ max_length=max_length,
360
+ seed=seed,
361
+ query_id=query_id,
362
+ use_query_structure_in_decoder=use_query_structure_in_decoder,
363
+ ensemble_weights=ensemble_weights,
364
+ ensemble_method=ensemble_method,
365
+ )
366
+
367
+ def fit_svd(
368
+ self,
369
+ sequences: list[bytes] | list[str] | None = None,
370
+ assay: AssayDataset | None = None,
371
+ n_components: int = 1024,
372
+ reduction: ReductionType | None = None,
373
+ prompt: str | Prompt | None = None,
374
+ query: str | bytes | Protein | Query | None = None,
375
+ use_query_structure_in_decoder: bool = True,
376
+ ) -> "SVDModel":
377
+ """
378
+ Fit an SVD on the embedding results of PoET.
379
+
380
+ This function will create an SVDModel based on the embeddings from this model
381
+ as well as the hyperparameters specified in the arguments.
382
+
383
+ Parameters
384
+ ----------
385
+ sequences : list of bytes or list of str or None, optional
386
+ Sequences to fit SVD. If None, assay must be provided.
387
+ assay : AssayDataset or None, optional
388
+ Assay containing sequences to fit SVD. Ignored if sequences are provided.
389
+ n_components : int, optional
390
+ Number of components in SVD. Determines output shapes. Default is 1024.
391
+ reduction : ReductionType or None, optional
392
+ Embeddings reduction to use (e.g. mean).
393
+ prompt : str or Prompt or None, optional
394
+ Prompt from an align workflow to condition PoET model.
395
+ query : str or bytes or Protein or Query or None, optional
396
+ Query to use with prompt.
397
+ use_query_structure_in_decoder : bool, optional
398
+ Whether to use query structure in decoder. Default is True.
399
+
400
+ Returns
401
+ -------
402
+ SVDModel
403
+ A future that represents the fitted SVD model.
404
+ """
405
+ query_id = self.__resolve_query(query=query)
406
+ return super().fit_svd(
407
+ sequences=sequences,
408
+ assay=assay,
409
+ n_components=n_components,
410
+ reduction=reduction,
411
+ prompt=prompt,
412
+ query_id=query_id,
413
+ use_query_structure_in_decoder=use_query_structure_in_decoder,
414
+ )
415
+
416
+ def fit_umap(
417
+ self,
418
+ sequences: list[bytes] | list[str] | None = None,
419
+ assay: AssayDataset | None = None,
420
+ n_components: int = 2,
421
+ reduction: ReductionType | None = ReductionType.MEAN,
422
+ prompt: str | Prompt | None = None,
423
+ query: str | bytes | Protein | Query | None = None,
424
+ use_query_structure_in_decoder: bool = True,
425
+ ) -> "UMAPModel":
426
+ """
427
+ Fit a UMAP on assay using PoET and hyperparameters.
428
+
429
+ This function will create a UMAP based on the embeddings from this PoET model
430
+ as well as the hyperparameters specified in the arguments.
431
+
432
+ Parameters
433
+ ----------
434
+ sequences : list of bytes or list of str or None, optional
435
+ Sequences to fit UMAP. If None, assay must be provided.
436
+ assay : AssayDataset or None, optional
437
+ Assay containing sequences to fit UMAP. Ignored if sequences are provided.
438
+ n_components : int, optional
439
+ Number of components in UMAP fit. Determines output shapes. Default is 2.
440
+ reduction : ReductionType or None, optional
441
+ Embeddings reduction to use (e.g. mean). Default is ReductionType.MEAN.
442
+ prompt : str or Prompt or None, optional
443
+ Prompt from an align workflow to condition PoET model.
444
+ query : str or bytes or Protein or Query or None, optional
445
+ Query to use with prompt.
446
+ use_query_structure_in_decoder : bool, optional
447
+ Whether to use query structure in decoder. Default is True.
448
+
449
+ Returns
450
+ -------
451
+ UMAPModel
452
+ A future that represents the fitted UMAP model.
453
+ """
454
+ query_id = self.__resolve_query(query=query)
455
+ return super().fit_umap(
456
+ sequences=sequences,
457
+ assay=assay,
458
+ n_components=n_components,
459
+ reduction=reduction,
460
+ prompt=prompt,
461
+ query_id=query_id,
462
+ use_query_structure_in_decoder=use_query_structure_in_decoder,
463
+ )
464
+
465
+ def fit_gp(
466
+ self,
467
+ assay: AssayMetadata | AssayDataset | str,
468
+ properties: list[str],
469
+ prompt: str | Prompt | None = None,
470
+ query: str | bytes | Protein | Query | None = None,
471
+ use_query_structure_in_decoder: bool = True,
472
+ **kwargs,
473
+ ) -> "PredictorModel":
474
+ """
475
+ Fit a Gaussian Process (GP) on assay using this embedding model and hyperparameters.
476
+
477
+ Parameters
478
+ ----------
479
+ assay : AssayMetadata or AssayDataset or str
480
+ Assay to fit GP on.
481
+ properties : list of str
482
+ Properties in the assay to fit the GP on.
483
+ prompt : str or Prompt or None, optional
484
+ Prompt from an align workflow to condition PoET model.
485
+ query : str or bytes or Protein or Query or None, optional
486
+ Query to use with prompt.
487
+ use_query_structure_in_decoder : bool, optional
488
+ Whether to use query structure in decoder. Default is True.
489
+ **kwargs
490
+ Additional keyword arguments.
491
+
492
+ Returns
493
+ -------
494
+ PredictorModel
495
+ A future that represents the trained predictor model.
496
+ """
497
+ query_id = self.__resolve_query(query=query)
498
+ return super().fit_gp(
499
+ assay=assay,
500
+ properties=properties,
501
+ prompt=prompt,
502
+ query_id=query_id,
503
+ use_query_structure_in_decoder=use_query_structure_in_decoder,
504
+ **kwargs,
505
+ )
@@ -0,0 +1,78 @@
1
+ """Schemas for OpenProtein embeddings system."""
2
+
3
+ from typing import Literal
4
+
5
+ import numpy as np
6
+ from pydantic import BaseModel, ConfigDict, Field
7
+
8
+ from openprotein.jobs import BatchJob, Job, JobType
9
+
10
+
11
+ class EmbeddedSequence(BaseModel):
12
+ """
13
+ Representation of an embedded sequence created from our models.
14
+
15
+ Represented as an iterable yielding the sequence followed by the embedding.
16
+ """
17
+
18
+ sequence: bytes
19
+ embedding: np.ndarray
20
+
21
+ model_config = ConfigDict(arbitrary_types_allowed=True)
22
+
23
+ def __iter__(self):
24
+ yield self.sequence
25
+ yield self.embedding
26
+
27
+ def __len__(self):
28
+ return 2
29
+
30
+ def __getitem__(self, i):
31
+ if i == 0:
32
+ return self.sequence
33
+ elif i == 1:
34
+ return self.embedding
35
+ raise IndexError("Index out of range")
36
+
37
+
38
+ class EmbeddingsJob(Job, BatchJob):
39
+
40
+ job_type: Literal[JobType.embeddings_embed, JobType.embeddings_embed_reduced] = Field(
41
+ default=JobType.embeddings_embed
42
+ )
43
+
44
+
45
+ class AttnJob(Job, BatchJob):
46
+
47
+ job_type: Literal[JobType.embeddings_attn] = Field(default=JobType.embeddings_attn)
48
+
49
+
50
+ class LogitsJob(Job, BatchJob):
51
+
52
+ job_type: Literal[JobType.embeddings_logits] = Field(
53
+ default=JobType.embeddings_logits
54
+ )
55
+
56
+
57
+ class ScoreJob(Job, BatchJob):
58
+
59
+ job_type: Literal[JobType.poet_score] = Field(default=JobType.poet_score)
60
+
61
+
62
+ class ScoreIndelJob(Job, BatchJob):
63
+
64
+ job_type: Literal[JobType.poet_score_indel] = Field(
65
+ default=JobType.poet_score_indel
66
+ )
67
+
68
+
69
+ class ScoreSingleSiteJob(Job, BatchJob):
70
+
71
+ job_type: Literal[JobType.poet_single_site] = Field(
72
+ default=JobType.poet_single_site
73
+ )
74
+
75
+
76
+ class GenerateJob(Job, BatchJob):
77
+
78
+ job_type: Literal[JobType.poet_generate] = Field(default=JobType.poet_generate)
openprotein/errors.py ADDED
@@ -0,0 +1,76 @@
1
+ from pydantic import BaseModel
2
+ from requests import Response
3
+
4
+
5
+ # Errors for OpenProtein
6
+ class InvalidParameterError(Exception):
7
+ """InvalidParameterError"""
8
+
9
+ def __init__(self, message="Invalid parameter"):
10
+ self.message = message
11
+ super().__init__(self.message)
12
+
13
+
14
+ class MissingParameterError(Exception):
15
+ """MissingParameterError"""
16
+
17
+ def __init__(self, message="Required parameter is missing"):
18
+ self.message = message
19
+ super().__init__(self.message)
20
+
21
+
22
+ class RawAPIError(BaseModel):
23
+
24
+ detail: str
25
+
26
+
27
+ class APIError(Exception):
28
+ """APIError"""
29
+
30
+ def __init__(self, message: str):
31
+ self.message = message
32
+ super().__init__(self.message)
33
+
34
+
35
+ class HTTPError(APIError):
36
+ def __init__(self, response: Response):
37
+ self.response = response
38
+ self.status_code = response.status_code
39
+ self.text = response.text
40
+ self.url = response.url
41
+ message = (
42
+ f"Status code {self.status_code}\non resource: {self.url}\n{self.text}"
43
+ )
44
+ super().__init__(message)
45
+
46
+
47
+ class AuthError(Exception):
48
+ """InvalidParameterError"""
49
+
50
+ def __init__(self, message="Invalid authorization"):
51
+ self.message = message
52
+ super().__init__(self.message)
53
+
54
+
55
+ class InvalidJob(Exception):
56
+ """InvalidParameterError"""
57
+
58
+ def __init__(self, message="No such job"):
59
+ self.message = message
60
+ super().__init__(self.message)
61
+
62
+
63
+ class TimeoutException(Exception):
64
+ """InvalidParameterError"""
65
+
66
+ def __init__(self, message="Request timed out!"):
67
+ self.message = message
68
+ super().__init__(self.message)
69
+
70
+
71
+ class DeprecationError(Exception):
72
+ """DeprecationError used for flagging to the user to not use this interface anymore."""
73
+
74
+ def __init__(self, message="This API is deprecated and no longer supported"):
75
+ self.message = message
76
+ super().__init__(self.message)