openprotein-python 0.8.2__1-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. openprotein/__init__.py +164 -0
  2. openprotein/_version.py +48 -0
  3. openprotein/align/__init__.py +8 -0
  4. openprotein/align/align.py +395 -0
  5. openprotein/align/api.py +428 -0
  6. openprotein/align/future.py +55 -0
  7. openprotein/align/msa.py +129 -0
  8. openprotein/align/schemas.py +165 -0
  9. openprotein/base.py +181 -0
  10. openprotein/chains.py +88 -0
  11. openprotein/common/__init__.py +5 -0
  12. openprotein/common/features.py +7 -0
  13. openprotein/common/model_metadata.py +33 -0
  14. openprotein/common/reduction.py +8 -0
  15. openprotein/config.py +9 -0
  16. openprotein/csv.py +31 -0
  17. openprotein/data/__init__.py +9 -0
  18. openprotein/data/api.py +218 -0
  19. openprotein/data/assaydataset.py +178 -0
  20. openprotein/data/data.py +93 -0
  21. openprotein/data/schemas.py +27 -0
  22. openprotein/design/__init__.py +16 -0
  23. openprotein/design/api.py +259 -0
  24. openprotein/design/design.py +125 -0
  25. openprotein/design/future.py +146 -0
  26. openprotein/design/schemas.py +607 -0
  27. openprotein/embeddings/__init__.py +27 -0
  28. openprotein/embeddings/api.py +619 -0
  29. openprotein/embeddings/embeddings.py +151 -0
  30. openprotein/embeddings/esm.py +33 -0
  31. openprotein/embeddings/future.py +146 -0
  32. openprotein/embeddings/models.py +421 -0
  33. openprotein/embeddings/openprotein.py +21 -0
  34. openprotein/embeddings/poet.py +446 -0
  35. openprotein/embeddings/poet2.py +505 -0
  36. openprotein/embeddings/schemas.py +78 -0
  37. openprotein/errors.py +76 -0
  38. openprotein/fasta.py +92 -0
  39. openprotein/fold/__init__.py +21 -0
  40. openprotein/fold/alphafold2.py +131 -0
  41. openprotein/fold/api.py +287 -0
  42. openprotein/fold/boltz.py +691 -0
  43. openprotein/fold/esmfold.py +54 -0
  44. openprotein/fold/fold.py +107 -0
  45. openprotein/fold/future.py +509 -0
  46. openprotein/fold/models.py +139 -0
  47. openprotein/fold/schemas.py +39 -0
  48. openprotein/jobs/__init__.py +9 -0
  49. openprotein/jobs/api.py +71 -0
  50. openprotein/jobs/futures.py +746 -0
  51. openprotein/jobs/jobs.py +69 -0
  52. openprotein/jobs/schemas.py +135 -0
  53. openprotein/models/__init__.py +4 -0
  54. openprotein/models/base.py +63 -0
  55. openprotein/models/foundation/rfdiffusion.py +283 -0
  56. openprotein/models/models.py +33 -0
  57. openprotein/predictor/__init__.py +25 -0
  58. openprotein/predictor/api.py +384 -0
  59. openprotein/predictor/models.py +374 -0
  60. openprotein/predictor/prediction.py +79 -0
  61. openprotein/predictor/predictor.py +242 -0
  62. openprotein/predictor/schemas.py +113 -0
  63. openprotein/predictor/validate.py +40 -0
  64. openprotein/prompt/__init__.py +9 -0
  65. openprotein/prompt/api.py +505 -0
  66. openprotein/prompt/models.py +142 -0
  67. openprotein/prompt/prompt.py +130 -0
  68. openprotein/prompt/schemas.py +49 -0
  69. openprotein/protein.py +587 -0
  70. openprotein/svd/__init__.py +9 -0
  71. openprotein/svd/api.py +206 -0
  72. openprotein/svd/models.py +288 -0
  73. openprotein/svd/schemas.py +31 -0
  74. openprotein/svd/svd.py +134 -0
  75. openprotein/umap/__init__.py +9 -0
  76. openprotein/umap/api.py +259 -0
  77. openprotein/umap/models.py +211 -0
  78. openprotein/umap/schemas.py +35 -0
  79. openprotein/umap/umap.py +175 -0
  80. openprotein/utils/uuid.py +29 -0
  81. openprotein_python-0.8.2.dist-info/METADATA +176 -0
  82. openprotein_python-0.8.2.dist-info/RECORD +84 -0
  83. openprotein_python-0.8.2.dist-info/WHEEL +4 -0
  84. openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0
@@ -0,0 +1,607 @@
1
+ """Schemas for the OpenProtein design system."""
2
+
3
+ from collections import namedtuple
4
+ from datetime import datetime
5
+ from enum import Enum
6
+ from typing import Any, Literal, NamedTuple
7
+
8
+ import numpy as np
9
+ from pydantic import BaseModel, ConfigDict, Field, RootModel, model_serializer
10
+
11
+ from openprotein.jobs import Job, JobStatus, JobType
12
+
13
+
14
+ class CriterionType(str, Enum):
15
+ """
16
+ Enum representing the types of criteria.
17
+
18
+ Attributes
19
+ ----------
20
+ model : str
21
+ Criterion type for model-based criteria.
22
+ n_mutations : str
23
+ Criterion type for mutation count-based criteria.
24
+ """
25
+
26
+ model = "model"
27
+ n_mutations = "n_mutations"
28
+
29
+
30
+ class Subcriterion(BaseModel):
31
+ """
32
+ Base class for subcriteria.
33
+
34
+ Attributes
35
+ ----------
36
+ criterion_type : CriterionType
37
+ The type of the criterion.
38
+ """
39
+
40
+ criterion_type: CriterionType
41
+
42
+ def __and__(self, other: "Subcriterion | Criterion | Any") -> "Criterion":
43
+ """
44
+ Combine this subcriterion with another using logical AND.
45
+
46
+ Parameters
47
+ ----------
48
+ other : Subcriterion or Criterion or Any
49
+ The other subcriterion or criterion to combine.
50
+
51
+ Returns
52
+ -------
53
+ Criterion
54
+ A new Criterion with the two subcriteria AND-ed.
55
+
56
+ Raises
57
+ ------
58
+ ValueError
59
+ If `other` is not a Subcriterion or Criterion.
60
+ """
61
+ others = []
62
+ if isinstance(other, Subcriterion):
63
+ others = [other]
64
+ elif isinstance(other, Criterion):
65
+ others = other.root
66
+ else:
67
+ raise ValueError(
68
+ f"Expected to chain only with criterion or subcriterion, got {type(other)}"
69
+ )
70
+ return Criterion([self] + others) # type: ignore - doesnt like Self
71
+
72
+ def __or__(self, other: "Subcriterion | Criterion | Any") -> "Criteria":
73
+ """
74
+ Combine this subcriterion with another using logical OR.
75
+
76
+ Parameters
77
+ ----------
78
+ other : Subcriterion or Criterion or Any
79
+ The other subcriterion or criterion to combine.
80
+
81
+ Returns
82
+ -------
83
+ Criteria
84
+ A new Criteria with the two subcriteria OR-ed.
85
+
86
+ Raises
87
+ ------
88
+ ValueError
89
+ If `other` is not a Subcriterion or Criterion.
90
+ """
91
+ if isinstance(other, Criterion):
92
+ pass
93
+ elif isinstance(other, Subcriterion):
94
+ other = Criterion([other])
95
+ else:
96
+ raise ValueError(
97
+ f"Expected to chain only with criterion or subcriterion, got {type(other)}"
98
+ )
99
+ return Criteria([Criterion([self]), other])
100
+
101
+
102
+ class ModelCriterion(Subcriterion):
103
+ """
104
+ Subcriterion for model-based criteria.
105
+
106
+ Attributes
107
+ ----------
108
+ criterion_type : CriterionType
109
+ The type of the criterion (always 'model').
110
+ model_id : str
111
+ The identifier of the model.
112
+ measurement_name : str
113
+ The name of the measurement.
114
+ criterion : ModelCriterion.Criterion
115
+ The criterion details.
116
+ """
117
+
118
+ class Criterion(BaseModel):
119
+ """
120
+ Inner class representing the details of a model criterion.
121
+
122
+ Attributes
123
+ ----------
124
+ weight : float
125
+ The weight of the criterion.
126
+ direction : DirectionEnum or None
127
+ The direction of the comparison.
128
+ target : float or None
129
+ The target value for the criterion.
130
+ """
131
+
132
+ class DirectionEnum(str, Enum):
133
+ """
134
+ Enum for direction of comparison.
135
+
136
+ Attributes
137
+ ----------
138
+ gt : str
139
+ Greater than.
140
+ lt : str
141
+ Less than.
142
+ eq : str
143
+ Equal to.
144
+ """
145
+
146
+ gt = ">"
147
+ lt = "<"
148
+ eq = "="
149
+
150
+ weight: float = 1.0
151
+ direction: DirectionEnum | None = None
152
+ target: float | None = None
153
+
154
+ criterion_type: CriterionType = CriterionType.model
155
+ model_id: str
156
+ measurement_name: str
157
+ criterion: Criterion = Criterion()
158
+
159
+ model_config = ConfigDict(protected_namespaces=())
160
+
161
+ def __mul__(self, weight: float) -> "ModelCriterion":
162
+ """
163
+ Set the weight of the criterion.
164
+
165
+ Parameters
166
+ ----------
167
+ weight : float
168
+ The weight to set.
169
+
170
+ Returns
171
+ -------
172
+ ModelCriterion
173
+ The updated ModelCriterion.
174
+ """
175
+ self.criterion.weight = weight
176
+ return self
177
+
178
+ def __lt__(self, other: float) -> "ModelCriterion":
179
+ """
180
+ Set the criterion to less than a target value.
181
+
182
+ Parameters
183
+ ----------
184
+ other : float
185
+ The target value.
186
+
187
+ Returns
188
+ -------
189
+ ModelCriterion
190
+ The updated ModelCriterion.
191
+ """
192
+ self.criterion.target = other
193
+ self.criterion.direction = ModelCriterion.Criterion.DirectionEnum.lt
194
+ return self
195
+
196
+ def __gt__(self, other: float) -> "ModelCriterion":
197
+ """
198
+ Set the criterion to greater than a target value.
199
+
200
+ Parameters
201
+ ----------
202
+ other : float
203
+ The target value.
204
+
205
+ Returns
206
+ -------
207
+ ModelCriterion
208
+ The updated ModelCriterion.
209
+ """
210
+ self.criterion.target = other
211
+ self.criterion.direction = ModelCriterion.Criterion.DirectionEnum.gt
212
+ return self
213
+
214
+ def __eq__(self, other: float) -> "ModelCriterion":
215
+ """
216
+ Set the criterion to equal a target value.
217
+
218
+ Parameters
219
+ ----------
220
+ other : float
221
+ The target value.
222
+
223
+ Returns
224
+ -------
225
+ ModelCriterion
226
+ The updated ModelCriterion.
227
+ """
228
+ self.criterion.target = other
229
+ self.criterion.direction = ModelCriterion.Criterion.DirectionEnum.eq
230
+ return self
231
+
232
+ __rmul__ = __mul__
233
+
234
+ @model_serializer(mode="wrap")
235
+ def validate_criterion_before_serialize(self, handler):
236
+ """
237
+ Validate the criterion before serialization.
238
+
239
+ Parameters
240
+ ----------
241
+ handler : callable
242
+ The serialization handler.
243
+
244
+ Returns
245
+ -------
246
+ Any
247
+ The serialized object.
248
+
249
+ Raises
250
+ ------
251
+ ValueError
252
+ If direction or target is not set.
253
+ """
254
+ if (
255
+ self.criterion is None
256
+ or self.criterion.direction is None
257
+ or self.criterion.target is None
258
+ ):
259
+ raise ValueError("Expected direction and target to be set")
260
+ return handler(self)
261
+
262
+
263
+ class NMutationCriterion(Subcriterion):
264
+ """
265
+ Subcriterion for mutation count-based criteria.
266
+
267
+ Attributes
268
+ ----------
269
+ criterion_type : CriterionType
270
+ The type of the criterion (always 'n_mutations').
271
+ sequences : list of str
272
+ List of sequences.
273
+ """
274
+
275
+ criterion_type: CriterionType = CriterionType.n_mutations
276
+ sequences: list[str] = Field(default_factory=list)
277
+
278
+ @model_serializer(mode="wrap")
279
+ def remove_empty_sequences(self, handler):
280
+ """
281
+ Remove empty sequences before serialization.
282
+
283
+ Parameters
284
+ ----------
285
+ handler : callable
286
+ The serialization handler.
287
+
288
+ Returns
289
+ -------
290
+ dict
291
+ The serialized object with empty sequences removed.
292
+ """
293
+ d = handler(self)
294
+ if not d["sequences"]:
295
+ del d["sequences"]
296
+ return d
297
+
298
+
299
+ n_mutations = NMutationCriterion
300
+
301
+
302
+ class Criterion(RootModel):
303
+ """
304
+ Class representing a logical AND of subcriteria.
305
+
306
+ Attributes
307
+ ----------
308
+ root : list of Subcriterion
309
+ The list of subcriteria.
310
+ """
311
+
312
+ root: list[ModelCriterion | NMutationCriterion | Subcriterion]
313
+
314
+ def __and__(self, other: "Criterion | Subcriterion") -> "Criterion":
315
+ """
316
+ Combine this criterion with another using logical AND.
317
+
318
+ Parameters
319
+ ----------
320
+ other : Criterion or Subcriterion
321
+ The other criterion or subcriterion to combine.
322
+
323
+ Returns
324
+ -------
325
+ Criterion
326
+ A new Criterion with the two criteria AND-ed.
327
+ """
328
+ others = []
329
+
330
+ if isinstance(other, Subcriterion):
331
+ others = [other]
332
+ elif isinstance(other, Criterion):
333
+ others = other.root
334
+
335
+ return Criterion(self.root + others)
336
+
337
+ def __or__(self, other: "Criterion | Subcriterion") -> "Criteria":
338
+ """
339
+ Combine this criterion with another using logical OR.
340
+
341
+ Parameters
342
+ ----------
343
+ other : Criterion or Subcriterion
344
+ The other criterion or subcriterion to combine.
345
+
346
+ Returns
347
+ -------
348
+ Criteria
349
+ A new Criteria with the two criteria OR-ed.
350
+ """
351
+ if isinstance(other, Criterion):
352
+ pass
353
+ elif isinstance(other, Subcriterion):
354
+ other = Criterion([other])
355
+
356
+ return Criteria([self, other])
357
+
358
+
359
+ class Criteria(RootModel):
360
+ """
361
+ Class representing a logical OR of criteria.
362
+
363
+ Attributes
364
+ ----------
365
+ root : list of Criterion
366
+ The list of criteria.
367
+ """
368
+
369
+ root: list[Criterion]
370
+
371
+ def __or__(self, other: "Criterion | Subcriterion | Criteria") -> "Criteria":
372
+ """
373
+ Combine this criteria with another using logical OR.
374
+
375
+ Parameters
376
+ ----------
377
+ other : Criterion or Subcriterion or Criteria
378
+ The other criterion, subcriterion, or criteria to combine.
379
+
380
+ Returns
381
+ -------
382
+ Criteria
383
+ A new Criteria with the two criteria OR-ed.
384
+ """
385
+ if isinstance(other, Criteria):
386
+ pass
387
+ if isinstance(other, Criterion):
388
+ other = Criteria([other])
389
+ elif isinstance(other, Subcriterion):
390
+ other = Criteria([Criterion([other])])
391
+
392
+ return Criteria(self.root + other.root)
393
+
394
+
395
+ class DesignConstraint:
396
+ """
397
+ Class for managing design constraints on a sequence.
398
+
399
+ Attributes
400
+ ----------
401
+ sequence : str
402
+ The sequence to constrain.
403
+ mutations : dict of int to set of str
404
+ Allowed amino acids at each position.
405
+ """
406
+
407
+ def __init__(self, sequence: str):
408
+ """
409
+ Initialize the design constraint.
410
+
411
+ Parameters
412
+ ----------
413
+ sequence : str
414
+ The sequence to constrain.
415
+ """
416
+ self.sequence = sequence
417
+ self.mutations = self.initialize(sequence)
418
+
419
+ def initialize(self, sequence: str) -> dict[int, set[str]]:
420
+ """
421
+ Initialize with no changes allowed to the sequence.
422
+
423
+ Parameters
424
+ ----------
425
+ sequence : str
426
+ The sequence to constrain.
427
+
428
+ Returns
429
+ -------
430
+ dict of int to set of str
431
+ Allowed amino acids at each position.
432
+ """
433
+ return {i: {aa} for i, aa in enumerate(sequence, start=1)}
434
+
435
+ def allow(
436
+ self,
437
+ amino_acids: list[str] | str | None = None,
438
+ positions: int | list[int] | None = None,
439
+ ) -> None:
440
+ """
441
+ Allow specific amino acids at given positions.
442
+
443
+ Parameters
444
+ ----------
445
+ amino_acids : list of str or str or None, optional
446
+ Amino acids to allow. If None, allows all amino acids in the sequence.
447
+ positions : int or list of int or None, optional
448
+ Positions to allow amino acids at. If None, allows at all positions.
449
+ """
450
+ if isinstance(positions, int):
451
+ positions = [positions]
452
+ elif positions is None:
453
+ positions = [i + 1 for i in range(len(self.sequence))]
454
+ if isinstance(amino_acids, str):
455
+ amino_acids = list(amino_acids)
456
+ elif amino_acids is None:
457
+ amino_acids = list(self.sequence)
458
+
459
+ for position in positions:
460
+ if position in self.mutations:
461
+ for aa in amino_acids:
462
+ self.mutations[position].add(aa)
463
+ else:
464
+ self.mutations[position] = set(amino_acids)
465
+
466
+ def remove(
467
+ self,
468
+ amino_acids: list[str] | str | None = None,
469
+ positions: int | list[int] | None = None,
470
+ ) -> None:
471
+ """
472
+ Remove specific amino acids from being allowed at given positions.
473
+
474
+ Parameters
475
+ ----------
476
+ amino_acids : list of str or str or None, optional
477
+ Amino acids to remove. If None, removes all amino acids in the sequence.
478
+ positions : int or list of int or None, optional
479
+ Positions to remove amino acids from. If None, removes from all positions.
480
+ """
481
+ if isinstance(positions, int):
482
+ positions = [positions]
483
+ elif positions is None:
484
+ positions = [i + 1 for i in range(len(self.sequence))]
485
+ if isinstance(amino_acids, str):
486
+ amino_acids = list(amino_acids)
487
+ elif amino_acids is None:
488
+ amino_acids = list(self.sequence)
489
+
490
+ for position in positions:
491
+ if position in self.mutations:
492
+ for aa in amino_acids:
493
+ if aa in self.mutations[position]:
494
+ self.mutations[position].remove(aa)
495
+
496
+ def as_dict(self) -> dict[int, list[str]]:
497
+ """
498
+ Convert the internal mutations representation into a dictionary.
499
+
500
+ Returns
501
+ -------
502
+ dict of int to list of str
503
+ Allowed amino acids at each position.
504
+ """
505
+ return {i: list(aa) for i, aa in self.mutations.items()}
506
+
507
+
508
+ class DesignAlgorithm(str, Enum):
509
+ """
510
+ Enum representing design algorithms.
511
+
512
+ Attributes
513
+ ----------
514
+ genetic_algorithm : str
515
+ Genetic algorithm.
516
+ """
517
+
518
+ genetic_algorithm = "genetic-algorithm"
519
+
520
+
521
+ class Design(BaseModel):
522
+ """
523
+ Class representing a design.
524
+
525
+ Attributes
526
+ ----------
527
+ id : str
528
+ The design identifier.
529
+ status : JobStatus
530
+ The status of the design job.
531
+ progress_counter : int
532
+ The progress counter.
533
+ created_date : datetime
534
+ The creation date.
535
+ algorithm : DesignAlgorithm
536
+ The design algorithm used.
537
+ num_rows : int
538
+ The number of rows.
539
+ num_steps : int
540
+ The number of steps.
541
+ assay_id : str
542
+ The assay identifier.
543
+ criteria : Criteria
544
+ The design criteria.
545
+ allowed_tokens : dict of str to list of str or None
546
+ Allowed tokens for the design.
547
+ pop_size : int
548
+ Population size.
549
+ n_offsprings : int
550
+ Number of offsprings (GA parameter).
551
+ crossover_prob : float
552
+ Crossover probability (GA parameter).
553
+ crossover_prob_pointwise : float
554
+ Pointwise crossover probability (GA parameter).
555
+ mutation_average_mutations_per_seq : int
556
+ Average number of mutations per sequence (GA parameter).
557
+ """
558
+
559
+ id: str
560
+ status: JobStatus
561
+ progress_counter: int
562
+ created_date: datetime
563
+ algorithm: DesignAlgorithm
564
+ num_rows: int
565
+ num_steps: int
566
+ assay_id: str
567
+ criteria: Criteria
568
+ allowed_tokens: dict[str, list[str]] | None
569
+ pop_size: int
570
+ n_offsprings: int
571
+ crossover_prob: float
572
+ crossover_prob_pointwise: float
573
+ mutation_average_mutations_per_seq: int
574
+
575
+ def is_done(self):
576
+ """
577
+ Check if the design job is done.
578
+
579
+ Returns
580
+ -------
581
+ bool
582
+ True if the job is done, False otherwise.
583
+ """
584
+ return self.status.done()
585
+
586
+
587
+ class DesignJob(Job):
588
+ """
589
+ Class representing a design job.
590
+
591
+ Attributes
592
+ ----------
593
+ job_type : Literal[JobType.designer]
594
+ The type of the job (always 'designer').
595
+ """
596
+
597
+ job_type: Literal[JobType.designer]
598
+
599
+
600
+ class DesignResult(NamedTuple):
601
+ step: int
602
+ sample_index: int
603
+ sequence: str
604
+ scores: np.ndarray
605
+ subscores: np.ndarray
606
+ means: np.ndarray
607
+ vars: np.ndarray
@@ -0,0 +1,27 @@
1
+ """
2
+ Embeddings module for using protein language models on OpenProtein.
3
+
4
+ isort:skip_file
5
+ """
6
+
7
+ from .embeddings import EmbeddingsAPI
8
+ from .models import EmbeddingModel
9
+ from .openprotein import OpenProteinModel
10
+ from .esm import ESMModel
11
+ from .poet import PoETModel
12
+ from .poet2 import PoET2Model
13
+ from .schemas import (
14
+ EmbeddedSequence,
15
+ EmbeddingsJob,
16
+ AttnJob,
17
+ LogitsJob,
18
+ ScoreJob,
19
+ ScoreIndelJob,
20
+ ScoreSingleSiteJob,
21
+ GenerateJob,
22
+ )
23
+ from .future import (
24
+ EmbeddingsGenerateFuture,
25
+ EmbeddingsResultFuture,
26
+ EmbeddingsScoreFuture,
27
+ )