openprotein-python 0.8.2__1-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. openprotein/__init__.py +164 -0
  2. openprotein/_version.py +48 -0
  3. openprotein/align/__init__.py +8 -0
  4. openprotein/align/align.py +395 -0
  5. openprotein/align/api.py +428 -0
  6. openprotein/align/future.py +55 -0
  7. openprotein/align/msa.py +129 -0
  8. openprotein/align/schemas.py +165 -0
  9. openprotein/base.py +181 -0
  10. openprotein/chains.py +88 -0
  11. openprotein/common/__init__.py +5 -0
  12. openprotein/common/features.py +7 -0
  13. openprotein/common/model_metadata.py +33 -0
  14. openprotein/common/reduction.py +8 -0
  15. openprotein/config.py +9 -0
  16. openprotein/csv.py +31 -0
  17. openprotein/data/__init__.py +9 -0
  18. openprotein/data/api.py +218 -0
  19. openprotein/data/assaydataset.py +178 -0
  20. openprotein/data/data.py +93 -0
  21. openprotein/data/schemas.py +27 -0
  22. openprotein/design/__init__.py +16 -0
  23. openprotein/design/api.py +259 -0
  24. openprotein/design/design.py +125 -0
  25. openprotein/design/future.py +146 -0
  26. openprotein/design/schemas.py +607 -0
  27. openprotein/embeddings/__init__.py +27 -0
  28. openprotein/embeddings/api.py +619 -0
  29. openprotein/embeddings/embeddings.py +151 -0
  30. openprotein/embeddings/esm.py +33 -0
  31. openprotein/embeddings/future.py +146 -0
  32. openprotein/embeddings/models.py +421 -0
  33. openprotein/embeddings/openprotein.py +21 -0
  34. openprotein/embeddings/poet.py +446 -0
  35. openprotein/embeddings/poet2.py +505 -0
  36. openprotein/embeddings/schemas.py +78 -0
  37. openprotein/errors.py +76 -0
  38. openprotein/fasta.py +92 -0
  39. openprotein/fold/__init__.py +21 -0
  40. openprotein/fold/alphafold2.py +131 -0
  41. openprotein/fold/api.py +287 -0
  42. openprotein/fold/boltz.py +691 -0
  43. openprotein/fold/esmfold.py +54 -0
  44. openprotein/fold/fold.py +107 -0
  45. openprotein/fold/future.py +509 -0
  46. openprotein/fold/models.py +139 -0
  47. openprotein/fold/schemas.py +39 -0
  48. openprotein/jobs/__init__.py +9 -0
  49. openprotein/jobs/api.py +71 -0
  50. openprotein/jobs/futures.py +746 -0
  51. openprotein/jobs/jobs.py +69 -0
  52. openprotein/jobs/schemas.py +135 -0
  53. openprotein/models/__init__.py +4 -0
  54. openprotein/models/base.py +63 -0
  55. openprotein/models/foundation/rfdiffusion.py +283 -0
  56. openprotein/models/models.py +33 -0
  57. openprotein/predictor/__init__.py +25 -0
  58. openprotein/predictor/api.py +384 -0
  59. openprotein/predictor/models.py +374 -0
  60. openprotein/predictor/prediction.py +79 -0
  61. openprotein/predictor/predictor.py +242 -0
  62. openprotein/predictor/schemas.py +113 -0
  63. openprotein/predictor/validate.py +40 -0
  64. openprotein/prompt/__init__.py +9 -0
  65. openprotein/prompt/api.py +505 -0
  66. openprotein/prompt/models.py +142 -0
  67. openprotein/prompt/prompt.py +130 -0
  68. openprotein/prompt/schemas.py +49 -0
  69. openprotein/protein.py +587 -0
  70. openprotein/svd/__init__.py +9 -0
  71. openprotein/svd/api.py +206 -0
  72. openprotein/svd/models.py +288 -0
  73. openprotein/svd/schemas.py +31 -0
  74. openprotein/svd/svd.py +134 -0
  75. openprotein/umap/__init__.py +9 -0
  76. openprotein/umap/api.py +259 -0
  77. openprotein/umap/models.py +211 -0
  78. openprotein/umap/schemas.py +35 -0
  79. openprotein/umap/umap.py +175 -0
  80. openprotein/utils/uuid.py +29 -0
  81. openprotein_python-0.8.2.dist-info/METADATA +176 -0
  82. openprotein_python-0.8.2.dist-info/RECORD +84 -0
  83. openprotein_python-0.8.2.dist-info/WHEEL +4 -0
  84. openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0
@@ -0,0 +1,151 @@
1
+ """Embeddings API providing the interface for creating embeddings using protein language models."""
2
+
3
+ from openprotein.base import APISession
4
+
5
+ from . import api
6
+ from .esm import ESMModel
7
+ from .future import EmbeddingsResultFuture
8
+ from .models import EmbeddingModel
9
+ from .openprotein import OpenProteinModel
10
+ from .poet import PoETModel
11
+ from .poet2 import PoET2Model
12
+
13
+
14
+ class EmbeddingsAPI:
15
+ """
16
+ Embeddings API providing the interface for creating embeddings using protein language models.
17
+
18
+ You can access all our models either via :meth:`get_model` or directly through the session's embedding attribute using the model's ID and the desired method. For example, to use the attention method on the protein sequence model, you would use ``session.embedding.prot_seq.attn()``.
19
+
20
+ Examples
21
+ --------
22
+ Accessing a model's method:
23
+
24
+ .. code-block:: python
25
+
26
+ # To call the attention method on the protein sequence model:
27
+ import openprotein
28
+ session = openprotein.connect(username="user", password="password")
29
+ session.embedding.prot_seq.attn()
30
+
31
+ Using the `get_model` method:
32
+
33
+ .. code-block:: python
34
+
35
+ # Get a model instance by name:
36
+ import openprotein
37
+ session = openprotein.connect(username="user", password="password")
38
+ # list available models:
39
+ print(session.embedding.list_models() )
40
+ # init model by name
41
+ model = session.embedding.get_model('prot-seq')
42
+ """
43
+
44
+ # added for static typing, eg pylance, for autocomplete
45
+ # at init these are all overwritten.
46
+
47
+ #: PoET-2 model
48
+ poet2: PoET2Model
49
+ #: PoET model
50
+ poet: PoETModel
51
+ #: Prot-seq model
52
+ prot_seq: OpenProteinModel
53
+ #: Rotaprot model trained on UniRef50
54
+ rotaprot_large_uniref50w: OpenProteinModel
55
+ #: Rotaprot model trained on UniRef90
56
+ rotaprot_large_uniref90_ft: OpenProteinModel
57
+ poet_2: PoET2Model
58
+
59
+ #: ESM1b model
60
+ esm1b: ESMModel # alias
61
+ esm1b_t33_650M_UR50S: ESMModel
62
+
63
+ #: ESM1v model
64
+ esm1v: ESMModel # alias
65
+ esm1v_t33_650M_UR90S_1: ESMModel
66
+ esm1v_t33_650M_UR90S_2: ESMModel
67
+ esm1v_t33_650M_UR90S_3: ESMModel
68
+ esm1v_t33_650M_UR90S_4: ESMModel
69
+ esm1v_t33_650M_UR90S_5: ESMModel
70
+
71
+ #: ESM2 model
72
+ esm2: ESMModel # alias
73
+ esm2_t12_35M_UR50D: ESMModel
74
+ esm2_t30_150M_UR50D: ESMModel
75
+ esm2_t33_650M_UR50D: ESMModel
76
+ esm2_t36_3B_UR50D: ESMModel
77
+ esm2_t6_8M_UR50D: ESMModel
78
+
79
+ def __init__(self, session: APISession):
80
+ self.session = session
81
+ # dynamically add models from api list
82
+ self._load_models()
83
+
84
+ def _load_models(self):
85
+ # Dynamically add model instances as attributes - precludes any drift
86
+ models = self.list_models()
87
+ for model in models:
88
+ model_name = model.id.replace("-", "_") # hyphens out
89
+ setattr(self, model_name, model)
90
+ # Setup aliases safely
91
+ if getattr(self, "esm1b_t33_650M_UR50S", None):
92
+ self.esm1b = self.esm1b_t33_650M_UR50S
93
+ if getattr(self, "esm1v_t33_650M_UR90S_1", None):
94
+ self.esm1v = self.esm1v_t33_650M_UR90S_1
95
+ if getattr(self, "esm2_t33_650M_UR50D", None):
96
+ self.esm2 = self.esm2_t33_650M_UR50D
97
+ if getattr(self, "poet_2", None):
98
+ self.poet2 = self.poet_2
99
+
100
+ def list_models(self) -> list[EmbeddingModel]:
101
+ """list models available for creating embeddings of your sequences"""
102
+ models = []
103
+ for model_id in api.list_models(self.session):
104
+ models.append(
105
+ EmbeddingModel.create(
106
+ session=self.session, model_id=model_id, default=EmbeddingModel
107
+ )
108
+ )
109
+ return models
110
+
111
+ def get_model(self, name: str) -> EmbeddingModel:
112
+ """
113
+ Get model by model_id.
114
+
115
+ ProtembedModel allows all the usual job manipulation: \
116
+ e.g. making POST and GET requests for this model specifically.
117
+
118
+
119
+ Parameters
120
+ ----------
121
+ model_id : str
122
+ the model identifier
123
+
124
+ Returns
125
+ -------
126
+ ProtembedModel
127
+ The model
128
+
129
+ Raises
130
+ ------
131
+ HTTPError
132
+ If the GET request does not succeed.
133
+ """
134
+ model_name = name.replace("-", "_")
135
+ return getattr(self, model_name)
136
+
137
+ def __get_results(self, job) -> EmbeddingsResultFuture:
138
+ """
139
+ Retrieves the results of an embedding job.
140
+
141
+ Parameters
142
+ ----------
143
+ job : Job
144
+ The embedding job whose results are to be retrieved.
145
+
146
+ Returns
147
+ -------
148
+ EmbeddingResultFuture
149
+ An instance of EmbeddingResultFuture
150
+ """
151
+ return EmbeddingsResultFuture(job=job, session=self.session)
@@ -0,0 +1,33 @@
1
+ """Community-based ESM models."""
2
+
3
+ from .models import AttnModel, EmbeddingModel
4
+
5
+
6
+ class ESMModel(AttnModel, EmbeddingModel):
7
+ """
8
+ Class providing inference endpoints for Facebook's ESM protein language models.
9
+
10
+ Examples
11
+ --------
12
+ View specific model details (inc supported tokens) with the `?` operator.
13
+
14
+ .. code-block:: python
15
+
16
+ >>> import openprotein
17
+ >>> session = openprotein.connect(username="user", password="password")
18
+ >>> session.embedding.esm2_t12_35M_UR50D?
19
+ """
20
+
21
+ model_id = [
22
+ "esm1b_t33_650M_UR50S",
23
+ "esm1v_t33_650M_UR90S_1",
24
+ "esm1v_t33_650M_UR90S_2",
25
+ "esm1v_t33_650M_UR90S_3",
26
+ "esm1v_t33_650M_UR90S_4",
27
+ "esm1v_t33_650M_UR90S_5",
28
+ "esm2_t12_35M_UR50D",
29
+ "esm2_t30_150M_UR50D",
30
+ "esm2_t33_650M_UR50D",
31
+ "esm2_t36_3B_UR50D",
32
+ "esm2_t6_8M_UR50D",
33
+ ]
@@ -0,0 +1,146 @@
1
+ """Future for embeddings-related jobs."""
2
+
3
+ from collections import namedtuple
4
+ from typing import Generator
5
+
6
+ import numpy as np
7
+
8
+ from openprotein import config
9
+
10
+ """Embeddings results represented as futures."""
11
+
12
+ from openprotein.base import APISession
13
+ from openprotein.jobs import Future, MappedFuture, StreamingFuture
14
+
15
+ from . import api
16
+ from .schemas import (
17
+ AttnJob,
18
+ EmbeddingsJob,
19
+ GenerateJob,
20
+ JobType,
21
+ LogitsJob,
22
+ ScoreIndelJob,
23
+ ScoreJob,
24
+ ScoreSingleSiteJob,
25
+ )
26
+
27
+
28
+ class EmbeddingsResultFuture(MappedFuture, Future):
29
+ """Future for manipulating results for embeddings-related requests."""
30
+
31
+ job: EmbeddingsJob | AttnJob | LogitsJob
32
+
33
+ def __init__(
34
+ self,
35
+ session: APISession,
36
+ job: EmbeddingsJob | AttnJob | LogitsJob,
37
+ sequences: list[bytes] | list[str] | None = None,
38
+ max_workers: int = config.MAX_CONCURRENT_WORKERS,
39
+ ):
40
+ super().__init__(session=session, job=job, max_workers=max_workers)
41
+ # ensure all list[bytes]
42
+ self._sequences = (
43
+ [seq.encode() if isinstance(seq, str) else seq for seq in sequences]
44
+ if sequences is not None
45
+ else sequences
46
+ )
47
+
48
+ def stream(self):
49
+ return api.request_get_embeddings_stream(session=self.session, job_id=self.id)
50
+
51
+ def get(self, verbose=False) -> list[np.ndarray]:
52
+ return super().get(verbose=verbose)
53
+
54
+ @property
55
+ def sequences(self) -> list[bytes] | list[str]:
56
+ if self._sequences is None:
57
+ self._sequences = api.get_request_sequences(
58
+ session=self.session, job_id=self.job.job_id, job_type=self.job.job_type
59
+ )
60
+ return self._sequences
61
+
62
+ @property
63
+ def id(self):
64
+ return self.job.job_id
65
+
66
+ def __keys__(self):
67
+ """
68
+ Get the list of sequences submitted for the embed request.
69
+
70
+ Returns
71
+ -------
72
+ list of bytes
73
+ List of sequences.
74
+ """
75
+ return self.sequences
76
+
77
+ def get_item(self, sequence: bytes) -> np.ndarray:
78
+ """
79
+ Get embedding results for specified sequence.
80
+
81
+ Args:
82
+ sequence (bytes): sequence to fetch results for
83
+
84
+ Returns:
85
+ np.ndarray: embeddings
86
+ """
87
+ data = api.request_get_sequence_result(
88
+ session=self.session,
89
+ job_id=self.job.job_id,
90
+ sequence=sequence,
91
+ job_type=self.job.job_type,
92
+ )
93
+ return api.result_decode(data)
94
+
95
+
96
+ class EmbeddingsScoreFuture(StreamingFuture, Future):
97
+ """Future for manipulating results for embeddings score-related requests."""
98
+
99
+ job: ScoreJob | ScoreIndelJob | ScoreSingleSiteJob
100
+
101
+ def __init__(
102
+ self,
103
+ session: APISession,
104
+ job: ScoreJob | ScoreSingleSiteJob | GenerateJob,
105
+ sequences: list[bytes] | list[str] | None = None,
106
+ ):
107
+ super().__init__(session=session, job=job)
108
+ self._sequences = sequences
109
+
110
+ @property
111
+ def sequences(self) -> list[bytes] | list[str]:
112
+ if self._sequences is None:
113
+ self._sequences = api.get_request_sequences(self.session, self.job.job_id)
114
+ return self._sequences
115
+
116
+ def stream(self) -> Generator:
117
+ if self.job_type == JobType.poet_generate:
118
+ stream = api.request_get_generate_result(
119
+ session=self.session, job_id=self.id
120
+ )
121
+ else:
122
+ stream = api.request_get_score_result(session=self.session, job_id=self.id)
123
+ # mut_code, ... for ssp
124
+ # name, sequence, ... for score
125
+ header = next(stream)
126
+ score_start_index = 0
127
+ for i, col_name in enumerate(header):
128
+ if col_name.startswith("score"):
129
+ score_start_index = i
130
+ break
131
+ Score = namedtuple("Score", header[:score_start_index] + ["score"])
132
+ for line in stream:
133
+ # combine scores into numpy array
134
+ scores = np.array([float(s) for s in line[score_start_index:]])
135
+ output = Score(*line[:score_start_index], scores)
136
+ yield output
137
+
138
+
139
+ class EmbeddingsGenerateFuture(EmbeddingsScoreFuture, StreamingFuture, Future):
140
+ """Future for manipulating results for embeddings generate-related requests."""
141
+
142
+ job: GenerateJob
143
+
144
+ @property
145
+ def sequences(self):
146
+ raise Exception("generate job does not support listing sequences")