openprotein-python 0.8.2__1-py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openprotein/__init__.py +164 -0
- openprotein/_version.py +48 -0
- openprotein/align/__init__.py +8 -0
- openprotein/align/align.py +395 -0
- openprotein/align/api.py +428 -0
- openprotein/align/future.py +55 -0
- openprotein/align/msa.py +129 -0
- openprotein/align/schemas.py +165 -0
- openprotein/base.py +181 -0
- openprotein/chains.py +88 -0
- openprotein/common/__init__.py +5 -0
- openprotein/common/features.py +7 -0
- openprotein/common/model_metadata.py +33 -0
- openprotein/common/reduction.py +8 -0
- openprotein/config.py +9 -0
- openprotein/csv.py +31 -0
- openprotein/data/__init__.py +9 -0
- openprotein/data/api.py +218 -0
- openprotein/data/assaydataset.py +178 -0
- openprotein/data/data.py +93 -0
- openprotein/data/schemas.py +27 -0
- openprotein/design/__init__.py +16 -0
- openprotein/design/api.py +259 -0
- openprotein/design/design.py +125 -0
- openprotein/design/future.py +146 -0
- openprotein/design/schemas.py +607 -0
- openprotein/embeddings/__init__.py +27 -0
- openprotein/embeddings/api.py +619 -0
- openprotein/embeddings/embeddings.py +151 -0
- openprotein/embeddings/esm.py +33 -0
- openprotein/embeddings/future.py +146 -0
- openprotein/embeddings/models.py +421 -0
- openprotein/embeddings/openprotein.py +21 -0
- openprotein/embeddings/poet.py +446 -0
- openprotein/embeddings/poet2.py +505 -0
- openprotein/embeddings/schemas.py +78 -0
- openprotein/errors.py +76 -0
- openprotein/fasta.py +92 -0
- openprotein/fold/__init__.py +21 -0
- openprotein/fold/alphafold2.py +131 -0
- openprotein/fold/api.py +287 -0
- openprotein/fold/boltz.py +691 -0
- openprotein/fold/esmfold.py +54 -0
- openprotein/fold/fold.py +107 -0
- openprotein/fold/future.py +509 -0
- openprotein/fold/models.py +139 -0
- openprotein/fold/schemas.py +39 -0
- openprotein/jobs/__init__.py +9 -0
- openprotein/jobs/api.py +71 -0
- openprotein/jobs/futures.py +746 -0
- openprotein/jobs/jobs.py +69 -0
- openprotein/jobs/schemas.py +135 -0
- openprotein/models/__init__.py +4 -0
- openprotein/models/base.py +63 -0
- openprotein/models/foundation/rfdiffusion.py +283 -0
- openprotein/models/models.py +33 -0
- openprotein/predictor/__init__.py +25 -0
- openprotein/predictor/api.py +384 -0
- openprotein/predictor/models.py +374 -0
- openprotein/predictor/prediction.py +79 -0
- openprotein/predictor/predictor.py +242 -0
- openprotein/predictor/schemas.py +113 -0
- openprotein/predictor/validate.py +40 -0
- openprotein/prompt/__init__.py +9 -0
- openprotein/prompt/api.py +505 -0
- openprotein/prompt/models.py +142 -0
- openprotein/prompt/prompt.py +130 -0
- openprotein/prompt/schemas.py +49 -0
- openprotein/protein.py +587 -0
- openprotein/svd/__init__.py +9 -0
- openprotein/svd/api.py +206 -0
- openprotein/svd/models.py +288 -0
- openprotein/svd/schemas.py +31 -0
- openprotein/svd/svd.py +134 -0
- openprotein/umap/__init__.py +9 -0
- openprotein/umap/api.py +259 -0
- openprotein/umap/models.py +211 -0
- openprotein/umap/schemas.py +35 -0
- openprotein/umap/umap.py +175 -0
- openprotein/utils/uuid.py +29 -0
- openprotein_python-0.8.2.dist-info/METADATA +176 -0
- openprotein_python-0.8.2.dist-info/RECORD +84 -0
- openprotein_python-0.8.2.dist-info/WHEEL +4 -0
- openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0
|
@@ -0,0 +1,607 @@
|
|
|
1
|
+
"""Schemas for the OpenProtein design system."""
|
|
2
|
+
|
|
3
|
+
from collections import namedtuple
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from typing import Any, Literal, NamedTuple
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from pydantic import BaseModel, ConfigDict, Field, RootModel, model_serializer
|
|
10
|
+
|
|
11
|
+
from openprotein.jobs import Job, JobStatus, JobType
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CriterionType(str, Enum):
|
|
15
|
+
"""
|
|
16
|
+
Enum representing the types of criteria.
|
|
17
|
+
|
|
18
|
+
Attributes
|
|
19
|
+
----------
|
|
20
|
+
model : str
|
|
21
|
+
Criterion type for model-based criteria.
|
|
22
|
+
n_mutations : str
|
|
23
|
+
Criterion type for mutation count-based criteria.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
model = "model"
|
|
27
|
+
n_mutations = "n_mutations"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Subcriterion(BaseModel):
|
|
31
|
+
"""
|
|
32
|
+
Base class for subcriteria.
|
|
33
|
+
|
|
34
|
+
Attributes
|
|
35
|
+
----------
|
|
36
|
+
criterion_type : CriterionType
|
|
37
|
+
The type of the criterion.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
criterion_type: CriterionType
|
|
41
|
+
|
|
42
|
+
def __and__(self, other: "Subcriterion | Criterion | Any") -> "Criterion":
|
|
43
|
+
"""
|
|
44
|
+
Combine this subcriterion with another using logical AND.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
other : Subcriterion or Criterion or Any
|
|
49
|
+
The other subcriterion or criterion to combine.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
Criterion
|
|
54
|
+
A new Criterion with the two subcriteria AND-ed.
|
|
55
|
+
|
|
56
|
+
Raises
|
|
57
|
+
------
|
|
58
|
+
ValueError
|
|
59
|
+
If `other` is not a Subcriterion or Criterion.
|
|
60
|
+
"""
|
|
61
|
+
others = []
|
|
62
|
+
if isinstance(other, Subcriterion):
|
|
63
|
+
others = [other]
|
|
64
|
+
elif isinstance(other, Criterion):
|
|
65
|
+
others = other.root
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError(
|
|
68
|
+
f"Expected to chain only with criterion or subcriterion, got {type(other)}"
|
|
69
|
+
)
|
|
70
|
+
return Criterion([self] + others) # type: ignore - doesnt like Self
|
|
71
|
+
|
|
72
|
+
def __or__(self, other: "Subcriterion | Criterion | Any") -> "Criteria":
|
|
73
|
+
"""
|
|
74
|
+
Combine this subcriterion with another using logical OR.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
other : Subcriterion or Criterion or Any
|
|
79
|
+
The other subcriterion or criterion to combine.
|
|
80
|
+
|
|
81
|
+
Returns
|
|
82
|
+
-------
|
|
83
|
+
Criteria
|
|
84
|
+
A new Criteria with the two subcriteria OR-ed.
|
|
85
|
+
|
|
86
|
+
Raises
|
|
87
|
+
------
|
|
88
|
+
ValueError
|
|
89
|
+
If `other` is not a Subcriterion or Criterion.
|
|
90
|
+
"""
|
|
91
|
+
if isinstance(other, Criterion):
|
|
92
|
+
pass
|
|
93
|
+
elif isinstance(other, Subcriterion):
|
|
94
|
+
other = Criterion([other])
|
|
95
|
+
else:
|
|
96
|
+
raise ValueError(
|
|
97
|
+
f"Expected to chain only with criterion or subcriterion, got {type(other)}"
|
|
98
|
+
)
|
|
99
|
+
return Criteria([Criterion([self]), other])
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class ModelCriterion(Subcriterion):
|
|
103
|
+
"""
|
|
104
|
+
Subcriterion for model-based criteria.
|
|
105
|
+
|
|
106
|
+
Attributes
|
|
107
|
+
----------
|
|
108
|
+
criterion_type : CriterionType
|
|
109
|
+
The type of the criterion (always 'model').
|
|
110
|
+
model_id : str
|
|
111
|
+
The identifier of the model.
|
|
112
|
+
measurement_name : str
|
|
113
|
+
The name of the measurement.
|
|
114
|
+
criterion : ModelCriterion.Criterion
|
|
115
|
+
The criterion details.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
class Criterion(BaseModel):
|
|
119
|
+
"""
|
|
120
|
+
Inner class representing the details of a model criterion.
|
|
121
|
+
|
|
122
|
+
Attributes
|
|
123
|
+
----------
|
|
124
|
+
weight : float
|
|
125
|
+
The weight of the criterion.
|
|
126
|
+
direction : DirectionEnum or None
|
|
127
|
+
The direction of the comparison.
|
|
128
|
+
target : float or None
|
|
129
|
+
The target value for the criterion.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
class DirectionEnum(str, Enum):
|
|
133
|
+
"""
|
|
134
|
+
Enum for direction of comparison.
|
|
135
|
+
|
|
136
|
+
Attributes
|
|
137
|
+
----------
|
|
138
|
+
gt : str
|
|
139
|
+
Greater than.
|
|
140
|
+
lt : str
|
|
141
|
+
Less than.
|
|
142
|
+
eq : str
|
|
143
|
+
Equal to.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
gt = ">"
|
|
147
|
+
lt = "<"
|
|
148
|
+
eq = "="
|
|
149
|
+
|
|
150
|
+
weight: float = 1.0
|
|
151
|
+
direction: DirectionEnum | None = None
|
|
152
|
+
target: float | None = None
|
|
153
|
+
|
|
154
|
+
criterion_type: CriterionType = CriterionType.model
|
|
155
|
+
model_id: str
|
|
156
|
+
measurement_name: str
|
|
157
|
+
criterion: Criterion = Criterion()
|
|
158
|
+
|
|
159
|
+
model_config = ConfigDict(protected_namespaces=())
|
|
160
|
+
|
|
161
|
+
def __mul__(self, weight: float) -> "ModelCriterion":
|
|
162
|
+
"""
|
|
163
|
+
Set the weight of the criterion.
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
weight : float
|
|
168
|
+
The weight to set.
|
|
169
|
+
|
|
170
|
+
Returns
|
|
171
|
+
-------
|
|
172
|
+
ModelCriterion
|
|
173
|
+
The updated ModelCriterion.
|
|
174
|
+
"""
|
|
175
|
+
self.criterion.weight = weight
|
|
176
|
+
return self
|
|
177
|
+
|
|
178
|
+
def __lt__(self, other: float) -> "ModelCriterion":
|
|
179
|
+
"""
|
|
180
|
+
Set the criterion to less than a target value.
|
|
181
|
+
|
|
182
|
+
Parameters
|
|
183
|
+
----------
|
|
184
|
+
other : float
|
|
185
|
+
The target value.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
ModelCriterion
|
|
190
|
+
The updated ModelCriterion.
|
|
191
|
+
"""
|
|
192
|
+
self.criterion.target = other
|
|
193
|
+
self.criterion.direction = ModelCriterion.Criterion.DirectionEnum.lt
|
|
194
|
+
return self
|
|
195
|
+
|
|
196
|
+
def __gt__(self, other: float) -> "ModelCriterion":
|
|
197
|
+
"""
|
|
198
|
+
Set the criterion to greater than a target value.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
other : float
|
|
203
|
+
The target value.
|
|
204
|
+
|
|
205
|
+
Returns
|
|
206
|
+
-------
|
|
207
|
+
ModelCriterion
|
|
208
|
+
The updated ModelCriterion.
|
|
209
|
+
"""
|
|
210
|
+
self.criterion.target = other
|
|
211
|
+
self.criterion.direction = ModelCriterion.Criterion.DirectionEnum.gt
|
|
212
|
+
return self
|
|
213
|
+
|
|
214
|
+
def __eq__(self, other: float) -> "ModelCriterion":
|
|
215
|
+
"""
|
|
216
|
+
Set the criterion to equal a target value.
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
other : float
|
|
221
|
+
The target value.
|
|
222
|
+
|
|
223
|
+
Returns
|
|
224
|
+
-------
|
|
225
|
+
ModelCriterion
|
|
226
|
+
The updated ModelCriterion.
|
|
227
|
+
"""
|
|
228
|
+
self.criterion.target = other
|
|
229
|
+
self.criterion.direction = ModelCriterion.Criterion.DirectionEnum.eq
|
|
230
|
+
return self
|
|
231
|
+
|
|
232
|
+
__rmul__ = __mul__
|
|
233
|
+
|
|
234
|
+
@model_serializer(mode="wrap")
|
|
235
|
+
def validate_criterion_before_serialize(self, handler):
|
|
236
|
+
"""
|
|
237
|
+
Validate the criterion before serialization.
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
handler : callable
|
|
242
|
+
The serialization handler.
|
|
243
|
+
|
|
244
|
+
Returns
|
|
245
|
+
-------
|
|
246
|
+
Any
|
|
247
|
+
The serialized object.
|
|
248
|
+
|
|
249
|
+
Raises
|
|
250
|
+
------
|
|
251
|
+
ValueError
|
|
252
|
+
If direction or target is not set.
|
|
253
|
+
"""
|
|
254
|
+
if (
|
|
255
|
+
self.criterion is None
|
|
256
|
+
or self.criterion.direction is None
|
|
257
|
+
or self.criterion.target is None
|
|
258
|
+
):
|
|
259
|
+
raise ValueError("Expected direction and target to be set")
|
|
260
|
+
return handler(self)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class NMutationCriterion(Subcriterion):
|
|
264
|
+
"""
|
|
265
|
+
Subcriterion for mutation count-based criteria.
|
|
266
|
+
|
|
267
|
+
Attributes
|
|
268
|
+
----------
|
|
269
|
+
criterion_type : CriterionType
|
|
270
|
+
The type of the criterion (always 'n_mutations').
|
|
271
|
+
sequences : list of str
|
|
272
|
+
List of sequences.
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
criterion_type: CriterionType = CriterionType.n_mutations
|
|
276
|
+
sequences: list[str] = Field(default_factory=list)
|
|
277
|
+
|
|
278
|
+
@model_serializer(mode="wrap")
|
|
279
|
+
def remove_empty_sequences(self, handler):
|
|
280
|
+
"""
|
|
281
|
+
Remove empty sequences before serialization.
|
|
282
|
+
|
|
283
|
+
Parameters
|
|
284
|
+
----------
|
|
285
|
+
handler : callable
|
|
286
|
+
The serialization handler.
|
|
287
|
+
|
|
288
|
+
Returns
|
|
289
|
+
-------
|
|
290
|
+
dict
|
|
291
|
+
The serialized object with empty sequences removed.
|
|
292
|
+
"""
|
|
293
|
+
d = handler(self)
|
|
294
|
+
if not d["sequences"]:
|
|
295
|
+
del d["sequences"]
|
|
296
|
+
return d
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
n_mutations = NMutationCriterion
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
class Criterion(RootModel):
|
|
303
|
+
"""
|
|
304
|
+
Class representing a logical AND of subcriteria.
|
|
305
|
+
|
|
306
|
+
Attributes
|
|
307
|
+
----------
|
|
308
|
+
root : list of Subcriterion
|
|
309
|
+
The list of subcriteria.
|
|
310
|
+
"""
|
|
311
|
+
|
|
312
|
+
root: list[ModelCriterion | NMutationCriterion | Subcriterion]
|
|
313
|
+
|
|
314
|
+
def __and__(self, other: "Criterion | Subcriterion") -> "Criterion":
|
|
315
|
+
"""
|
|
316
|
+
Combine this criterion with another using logical AND.
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
other : Criterion or Subcriterion
|
|
321
|
+
The other criterion or subcriterion to combine.
|
|
322
|
+
|
|
323
|
+
Returns
|
|
324
|
+
-------
|
|
325
|
+
Criterion
|
|
326
|
+
A new Criterion with the two criteria AND-ed.
|
|
327
|
+
"""
|
|
328
|
+
others = []
|
|
329
|
+
|
|
330
|
+
if isinstance(other, Subcriterion):
|
|
331
|
+
others = [other]
|
|
332
|
+
elif isinstance(other, Criterion):
|
|
333
|
+
others = other.root
|
|
334
|
+
|
|
335
|
+
return Criterion(self.root + others)
|
|
336
|
+
|
|
337
|
+
def __or__(self, other: "Criterion | Subcriterion") -> "Criteria":
|
|
338
|
+
"""
|
|
339
|
+
Combine this criterion with another using logical OR.
|
|
340
|
+
|
|
341
|
+
Parameters
|
|
342
|
+
----------
|
|
343
|
+
other : Criterion or Subcriterion
|
|
344
|
+
The other criterion or subcriterion to combine.
|
|
345
|
+
|
|
346
|
+
Returns
|
|
347
|
+
-------
|
|
348
|
+
Criteria
|
|
349
|
+
A new Criteria with the two criteria OR-ed.
|
|
350
|
+
"""
|
|
351
|
+
if isinstance(other, Criterion):
|
|
352
|
+
pass
|
|
353
|
+
elif isinstance(other, Subcriterion):
|
|
354
|
+
other = Criterion([other])
|
|
355
|
+
|
|
356
|
+
return Criteria([self, other])
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class Criteria(RootModel):
|
|
360
|
+
"""
|
|
361
|
+
Class representing a logical OR of criteria.
|
|
362
|
+
|
|
363
|
+
Attributes
|
|
364
|
+
----------
|
|
365
|
+
root : list of Criterion
|
|
366
|
+
The list of criteria.
|
|
367
|
+
"""
|
|
368
|
+
|
|
369
|
+
root: list[Criterion]
|
|
370
|
+
|
|
371
|
+
def __or__(self, other: "Criterion | Subcriterion | Criteria") -> "Criteria":
|
|
372
|
+
"""
|
|
373
|
+
Combine this criteria with another using logical OR.
|
|
374
|
+
|
|
375
|
+
Parameters
|
|
376
|
+
----------
|
|
377
|
+
other : Criterion or Subcriterion or Criteria
|
|
378
|
+
The other criterion, subcriterion, or criteria to combine.
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
Criteria
|
|
383
|
+
A new Criteria with the two criteria OR-ed.
|
|
384
|
+
"""
|
|
385
|
+
if isinstance(other, Criteria):
|
|
386
|
+
pass
|
|
387
|
+
if isinstance(other, Criterion):
|
|
388
|
+
other = Criteria([other])
|
|
389
|
+
elif isinstance(other, Subcriterion):
|
|
390
|
+
other = Criteria([Criterion([other])])
|
|
391
|
+
|
|
392
|
+
return Criteria(self.root + other.root)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
class DesignConstraint:
|
|
396
|
+
"""
|
|
397
|
+
Class for managing design constraints on a sequence.
|
|
398
|
+
|
|
399
|
+
Attributes
|
|
400
|
+
----------
|
|
401
|
+
sequence : str
|
|
402
|
+
The sequence to constrain.
|
|
403
|
+
mutations : dict of int to set of str
|
|
404
|
+
Allowed amino acids at each position.
|
|
405
|
+
"""
|
|
406
|
+
|
|
407
|
+
def __init__(self, sequence: str):
|
|
408
|
+
"""
|
|
409
|
+
Initialize the design constraint.
|
|
410
|
+
|
|
411
|
+
Parameters
|
|
412
|
+
----------
|
|
413
|
+
sequence : str
|
|
414
|
+
The sequence to constrain.
|
|
415
|
+
"""
|
|
416
|
+
self.sequence = sequence
|
|
417
|
+
self.mutations = self.initialize(sequence)
|
|
418
|
+
|
|
419
|
+
def initialize(self, sequence: str) -> dict[int, set[str]]:
|
|
420
|
+
"""
|
|
421
|
+
Initialize with no changes allowed to the sequence.
|
|
422
|
+
|
|
423
|
+
Parameters
|
|
424
|
+
----------
|
|
425
|
+
sequence : str
|
|
426
|
+
The sequence to constrain.
|
|
427
|
+
|
|
428
|
+
Returns
|
|
429
|
+
-------
|
|
430
|
+
dict of int to set of str
|
|
431
|
+
Allowed amino acids at each position.
|
|
432
|
+
"""
|
|
433
|
+
return {i: {aa} for i, aa in enumerate(sequence, start=1)}
|
|
434
|
+
|
|
435
|
+
def allow(
|
|
436
|
+
self,
|
|
437
|
+
amino_acids: list[str] | str | None = None,
|
|
438
|
+
positions: int | list[int] | None = None,
|
|
439
|
+
) -> None:
|
|
440
|
+
"""
|
|
441
|
+
Allow specific amino acids at given positions.
|
|
442
|
+
|
|
443
|
+
Parameters
|
|
444
|
+
----------
|
|
445
|
+
amino_acids : list of str or str or None, optional
|
|
446
|
+
Amino acids to allow. If None, allows all amino acids in the sequence.
|
|
447
|
+
positions : int or list of int or None, optional
|
|
448
|
+
Positions to allow amino acids at. If None, allows at all positions.
|
|
449
|
+
"""
|
|
450
|
+
if isinstance(positions, int):
|
|
451
|
+
positions = [positions]
|
|
452
|
+
elif positions is None:
|
|
453
|
+
positions = [i + 1 for i in range(len(self.sequence))]
|
|
454
|
+
if isinstance(amino_acids, str):
|
|
455
|
+
amino_acids = list(amino_acids)
|
|
456
|
+
elif amino_acids is None:
|
|
457
|
+
amino_acids = list(self.sequence)
|
|
458
|
+
|
|
459
|
+
for position in positions:
|
|
460
|
+
if position in self.mutations:
|
|
461
|
+
for aa in amino_acids:
|
|
462
|
+
self.mutations[position].add(aa)
|
|
463
|
+
else:
|
|
464
|
+
self.mutations[position] = set(amino_acids)
|
|
465
|
+
|
|
466
|
+
def remove(
|
|
467
|
+
self,
|
|
468
|
+
amino_acids: list[str] | str | None = None,
|
|
469
|
+
positions: int | list[int] | None = None,
|
|
470
|
+
) -> None:
|
|
471
|
+
"""
|
|
472
|
+
Remove specific amino acids from being allowed at given positions.
|
|
473
|
+
|
|
474
|
+
Parameters
|
|
475
|
+
----------
|
|
476
|
+
amino_acids : list of str or str or None, optional
|
|
477
|
+
Amino acids to remove. If None, removes all amino acids in the sequence.
|
|
478
|
+
positions : int or list of int or None, optional
|
|
479
|
+
Positions to remove amino acids from. If None, removes from all positions.
|
|
480
|
+
"""
|
|
481
|
+
if isinstance(positions, int):
|
|
482
|
+
positions = [positions]
|
|
483
|
+
elif positions is None:
|
|
484
|
+
positions = [i + 1 for i in range(len(self.sequence))]
|
|
485
|
+
if isinstance(amino_acids, str):
|
|
486
|
+
amino_acids = list(amino_acids)
|
|
487
|
+
elif amino_acids is None:
|
|
488
|
+
amino_acids = list(self.sequence)
|
|
489
|
+
|
|
490
|
+
for position in positions:
|
|
491
|
+
if position in self.mutations:
|
|
492
|
+
for aa in amino_acids:
|
|
493
|
+
if aa in self.mutations[position]:
|
|
494
|
+
self.mutations[position].remove(aa)
|
|
495
|
+
|
|
496
|
+
def as_dict(self) -> dict[int, list[str]]:
|
|
497
|
+
"""
|
|
498
|
+
Convert the internal mutations representation into a dictionary.
|
|
499
|
+
|
|
500
|
+
Returns
|
|
501
|
+
-------
|
|
502
|
+
dict of int to list of str
|
|
503
|
+
Allowed amino acids at each position.
|
|
504
|
+
"""
|
|
505
|
+
return {i: list(aa) for i, aa in self.mutations.items()}
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class DesignAlgorithm(str, Enum):
|
|
509
|
+
"""
|
|
510
|
+
Enum representing design algorithms.
|
|
511
|
+
|
|
512
|
+
Attributes
|
|
513
|
+
----------
|
|
514
|
+
genetic_algorithm : str
|
|
515
|
+
Genetic algorithm.
|
|
516
|
+
"""
|
|
517
|
+
|
|
518
|
+
genetic_algorithm = "genetic-algorithm"
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
class Design(BaseModel):
|
|
522
|
+
"""
|
|
523
|
+
Class representing a design.
|
|
524
|
+
|
|
525
|
+
Attributes
|
|
526
|
+
----------
|
|
527
|
+
id : str
|
|
528
|
+
The design identifier.
|
|
529
|
+
status : JobStatus
|
|
530
|
+
The status of the design job.
|
|
531
|
+
progress_counter : int
|
|
532
|
+
The progress counter.
|
|
533
|
+
created_date : datetime
|
|
534
|
+
The creation date.
|
|
535
|
+
algorithm : DesignAlgorithm
|
|
536
|
+
The design algorithm used.
|
|
537
|
+
num_rows : int
|
|
538
|
+
The number of rows.
|
|
539
|
+
num_steps : int
|
|
540
|
+
The number of steps.
|
|
541
|
+
assay_id : str
|
|
542
|
+
The assay identifier.
|
|
543
|
+
criteria : Criteria
|
|
544
|
+
The design criteria.
|
|
545
|
+
allowed_tokens : dict of str to list of str or None
|
|
546
|
+
Allowed tokens for the design.
|
|
547
|
+
pop_size : int
|
|
548
|
+
Population size.
|
|
549
|
+
n_offsprings : int
|
|
550
|
+
Number of offsprings (GA parameter).
|
|
551
|
+
crossover_prob : float
|
|
552
|
+
Crossover probability (GA parameter).
|
|
553
|
+
crossover_prob_pointwise : float
|
|
554
|
+
Pointwise crossover probability (GA parameter).
|
|
555
|
+
mutation_average_mutations_per_seq : int
|
|
556
|
+
Average number of mutations per sequence (GA parameter).
|
|
557
|
+
"""
|
|
558
|
+
|
|
559
|
+
id: str
|
|
560
|
+
status: JobStatus
|
|
561
|
+
progress_counter: int
|
|
562
|
+
created_date: datetime
|
|
563
|
+
algorithm: DesignAlgorithm
|
|
564
|
+
num_rows: int
|
|
565
|
+
num_steps: int
|
|
566
|
+
assay_id: str
|
|
567
|
+
criteria: Criteria
|
|
568
|
+
allowed_tokens: dict[str, list[str]] | None
|
|
569
|
+
pop_size: int
|
|
570
|
+
n_offsprings: int
|
|
571
|
+
crossover_prob: float
|
|
572
|
+
crossover_prob_pointwise: float
|
|
573
|
+
mutation_average_mutations_per_seq: int
|
|
574
|
+
|
|
575
|
+
def is_done(self):
|
|
576
|
+
"""
|
|
577
|
+
Check if the design job is done.
|
|
578
|
+
|
|
579
|
+
Returns
|
|
580
|
+
-------
|
|
581
|
+
bool
|
|
582
|
+
True if the job is done, False otherwise.
|
|
583
|
+
"""
|
|
584
|
+
return self.status.done()
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
class DesignJob(Job):
|
|
588
|
+
"""
|
|
589
|
+
Class representing a design job.
|
|
590
|
+
|
|
591
|
+
Attributes
|
|
592
|
+
----------
|
|
593
|
+
job_type : Literal[JobType.designer]
|
|
594
|
+
The type of the job (always 'designer').
|
|
595
|
+
"""
|
|
596
|
+
|
|
597
|
+
job_type: Literal[JobType.designer]
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
class DesignResult(NamedTuple):
|
|
601
|
+
step: int
|
|
602
|
+
sample_index: int
|
|
603
|
+
sequence: str
|
|
604
|
+
scores: np.ndarray
|
|
605
|
+
subscores: np.ndarray
|
|
606
|
+
means: np.ndarray
|
|
607
|
+
vars: np.ndarray
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embeddings module for using protein language models on OpenProtein.
|
|
3
|
+
|
|
4
|
+
isort:skip_file
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .embeddings import EmbeddingsAPI
|
|
8
|
+
from .models import EmbeddingModel
|
|
9
|
+
from .openprotein import OpenProteinModel
|
|
10
|
+
from .esm import ESMModel
|
|
11
|
+
from .poet import PoETModel
|
|
12
|
+
from .poet2 import PoET2Model
|
|
13
|
+
from .schemas import (
|
|
14
|
+
EmbeddedSequence,
|
|
15
|
+
EmbeddingsJob,
|
|
16
|
+
AttnJob,
|
|
17
|
+
LogitsJob,
|
|
18
|
+
ScoreJob,
|
|
19
|
+
ScoreIndelJob,
|
|
20
|
+
ScoreSingleSiteJob,
|
|
21
|
+
GenerateJob,
|
|
22
|
+
)
|
|
23
|
+
from .future import (
|
|
24
|
+
EmbeddingsGenerateFuture,
|
|
25
|
+
EmbeddingsResultFuture,
|
|
26
|
+
EmbeddingsScoreFuture,
|
|
27
|
+
)
|