openprotein-python 0.8.2__1-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. openprotein/__init__.py +164 -0
  2. openprotein/_version.py +48 -0
  3. openprotein/align/__init__.py +8 -0
  4. openprotein/align/align.py +395 -0
  5. openprotein/align/api.py +428 -0
  6. openprotein/align/future.py +55 -0
  7. openprotein/align/msa.py +129 -0
  8. openprotein/align/schemas.py +165 -0
  9. openprotein/base.py +181 -0
  10. openprotein/chains.py +88 -0
  11. openprotein/common/__init__.py +5 -0
  12. openprotein/common/features.py +7 -0
  13. openprotein/common/model_metadata.py +33 -0
  14. openprotein/common/reduction.py +8 -0
  15. openprotein/config.py +9 -0
  16. openprotein/csv.py +31 -0
  17. openprotein/data/__init__.py +9 -0
  18. openprotein/data/api.py +218 -0
  19. openprotein/data/assaydataset.py +178 -0
  20. openprotein/data/data.py +93 -0
  21. openprotein/data/schemas.py +27 -0
  22. openprotein/design/__init__.py +16 -0
  23. openprotein/design/api.py +259 -0
  24. openprotein/design/design.py +125 -0
  25. openprotein/design/future.py +146 -0
  26. openprotein/design/schemas.py +607 -0
  27. openprotein/embeddings/__init__.py +27 -0
  28. openprotein/embeddings/api.py +619 -0
  29. openprotein/embeddings/embeddings.py +151 -0
  30. openprotein/embeddings/esm.py +33 -0
  31. openprotein/embeddings/future.py +146 -0
  32. openprotein/embeddings/models.py +421 -0
  33. openprotein/embeddings/openprotein.py +21 -0
  34. openprotein/embeddings/poet.py +446 -0
  35. openprotein/embeddings/poet2.py +505 -0
  36. openprotein/embeddings/schemas.py +78 -0
  37. openprotein/errors.py +76 -0
  38. openprotein/fasta.py +92 -0
  39. openprotein/fold/__init__.py +21 -0
  40. openprotein/fold/alphafold2.py +131 -0
  41. openprotein/fold/api.py +287 -0
  42. openprotein/fold/boltz.py +691 -0
  43. openprotein/fold/esmfold.py +54 -0
  44. openprotein/fold/fold.py +107 -0
  45. openprotein/fold/future.py +509 -0
  46. openprotein/fold/models.py +139 -0
  47. openprotein/fold/schemas.py +39 -0
  48. openprotein/jobs/__init__.py +9 -0
  49. openprotein/jobs/api.py +71 -0
  50. openprotein/jobs/futures.py +746 -0
  51. openprotein/jobs/jobs.py +69 -0
  52. openprotein/jobs/schemas.py +135 -0
  53. openprotein/models/__init__.py +4 -0
  54. openprotein/models/base.py +63 -0
  55. openprotein/models/foundation/rfdiffusion.py +283 -0
  56. openprotein/models/models.py +33 -0
  57. openprotein/predictor/__init__.py +25 -0
  58. openprotein/predictor/api.py +384 -0
  59. openprotein/predictor/models.py +374 -0
  60. openprotein/predictor/prediction.py +79 -0
  61. openprotein/predictor/predictor.py +242 -0
  62. openprotein/predictor/schemas.py +113 -0
  63. openprotein/predictor/validate.py +40 -0
  64. openprotein/prompt/__init__.py +9 -0
  65. openprotein/prompt/api.py +505 -0
  66. openprotein/prompt/models.py +142 -0
  67. openprotein/prompt/prompt.py +130 -0
  68. openprotein/prompt/schemas.py +49 -0
  69. openprotein/protein.py +587 -0
  70. openprotein/svd/__init__.py +9 -0
  71. openprotein/svd/api.py +206 -0
  72. openprotein/svd/models.py +288 -0
  73. openprotein/svd/schemas.py +31 -0
  74. openprotein/svd/svd.py +134 -0
  75. openprotein/umap/__init__.py +9 -0
  76. openprotein/umap/api.py +259 -0
  77. openprotein/umap/models.py +211 -0
  78. openprotein/umap/schemas.py +35 -0
  79. openprotein/umap/umap.py +175 -0
  80. openprotein/utils/uuid.py +29 -0
  81. openprotein_python-0.8.2.dist-info/METADATA +176 -0
  82. openprotein_python-0.8.2.dist-info/RECORD +84 -0
  83. openprotein_python-0.8.2.dist-info/WHEEL +4 -0
  84. openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0
@@ -0,0 +1,259 @@
1
+ """UMAP REST API for making HTTP calls to our UMAP backend."""
2
+
3
+ import io
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from pydantic import TypeAdapter
8
+
9
+ from openprotein.base import APISession
10
+ from openprotein.errors import APIError, InvalidParameterError
11
+
12
+ from .schemas import FeatureType, UMAPEmbeddingsJob, UMAPFitJob, UMAPMetadata
13
+
14
+ PATH_PREFIX = "v1/umap"
15
+
16
+
17
+ def umap_list_get(session: APISession) -> list[UMAPMetadata]:
18
+ """Get UMAP job metadata for all UMAPs. Including UMAP dimension and sequence lengths."""
19
+ endpoint = PATH_PREFIX
20
+ response = session.get(endpoint)
21
+ return TypeAdapter(list[UMAPMetadata]).validate_python(response.json())
22
+
23
+
24
+ def umap_get(session: APISession, umap_id: str) -> UMAPMetadata:
25
+ """Get UMAP job metadata. Including UMAP dimension and sequence lengths."""
26
+ endpoint = PATH_PREFIX + f"/{umap_id}"
27
+ response = session.get(endpoint)
28
+ return UMAPMetadata.model_validate(response.json())
29
+
30
+
31
+ def umap_get_sequences(session: APISession, umap_id: str) -> list[bytes]:
32
+ """
33
+ Get sequences used to fit an UMAP.
34
+
35
+ Parameters
36
+ ----------
37
+ session : APISession
38
+ Session object for API communication.
39
+ umap_id : str
40
+ UMAP ID whose sequences to fetch
41
+
42
+ Returns
43
+ -------
44
+ sequences : List[bytes]
45
+ """
46
+ endpoint = PATH_PREFIX + f"/{umap_id}/sequences"
47
+ response = session.get(endpoint)
48
+ return TypeAdapter(list[bytes]).validate_python(response.json())
49
+
50
+
51
+ def embed_get_sequence_result(
52
+ session: APISession, job_id: str, sequence: str | bytes
53
+ ) -> bytes:
54
+ """
55
+ Get encoded umap embeddings result for a sequence from the request ID.
56
+
57
+ Parameters
58
+ ----------
59
+ session : APISession
60
+ Session object for API communication.
61
+ job_id : str
62
+ job ID to retrieve results from
63
+ sequence : bytes
64
+ sequence to retrieve results for
65
+
66
+ Returns
67
+ -------
68
+ result : bytes
69
+ """
70
+ if isinstance(sequence, bytes):
71
+ sequence = sequence.decode()
72
+ endpoint = PATH_PREFIX + f"/embed/{job_id}/{sequence}"
73
+ response = session.get(endpoint)
74
+ return response.content
75
+
76
+
77
+ def embed_get_batch_result(session: APISession, job_id: str) -> bytes:
78
+ """
79
+ Get encoded umap embeddings batched result from the request ID.
80
+
81
+ Parameters
82
+ ----------
83
+ session : APISession
84
+ Session object for API communication.
85
+ job_id : str
86
+ Job ID to retrieve results from
87
+
88
+ Returns
89
+ -------
90
+ result : bytes
91
+ """
92
+ endpoint = PATH_PREFIX + f"/embed/{job_id}/csv"
93
+ response = session.get(endpoint)
94
+ return response.content
95
+
96
+
97
+ def embed_decode(data: bytes) -> np.ndarray:
98
+ """
99
+ Decode embedding as numpy array.
100
+
101
+ Parameters
102
+ ----------
103
+ data (bytes): raw bytes encoding the array received over the API
104
+
105
+ Returns
106
+ -------
107
+ np.ndarray: decoded array
108
+ """
109
+ s = io.BytesIO(data)
110
+ return np.load(s, allow_pickle=False)
111
+
112
+
113
+ def embed_batch_decode(data: bytes) -> np.ndarray:
114
+ """
115
+ Decode prediction scores.
116
+
117
+ Args:
118
+ data (bytes): raw bytes encoding the array received over the API
119
+ batched (bool): whether or not the result was batched. affects the retrieved csv format whether they contain additional columns and header rows.
120
+
121
+ Returns:
122
+ mus (np.ndarray): decoded array of means
123
+ vars (np.ndarray): decoded array of variances
124
+ """
125
+ s = io.BytesIO(data)
126
+ # should contain header and sequence column
127
+ df = pd.read_csv(s)
128
+ umaps = df.iloc[:, 1:].values
129
+ return umaps
130
+
131
+
132
+ def umap_delete(session: APISession, umap_id: str) -> bool:
133
+ """
134
+ Delete and UMAP model.
135
+
136
+ Parameters
137
+ ----------
138
+ session : APISession
139
+ Session object for API communication.
140
+ umap_id : str
141
+ UMAP model to delete
142
+
143
+ Returns
144
+ -------
145
+ bool
146
+ """
147
+
148
+ endpoint = PATH_PREFIX + f"/{umap_id}"
149
+ response = session.delete(endpoint)
150
+ if 200 <= response.status_code < 300:
151
+ return True
152
+ else:
153
+ raise APIError(response.text)
154
+
155
+
156
+ def umap_fit_post(
157
+ session: APISession,
158
+ model_id: str,
159
+ feature_type: str,
160
+ sequences: list[bytes] | list[str] | None = None,
161
+ assay_id: str | None = None,
162
+ n_components: int = 2,
163
+ n_neighbors: int = 15,
164
+ min_dist: float = 0.1,
165
+ reduction: str | None = None,
166
+ **kwargs,
167
+ ) -> UMAPFitJob:
168
+ """
169
+ Create UMAP fit job.
170
+
171
+ Parameters
172
+ ----------
173
+ session : APISession
174
+ Session object for API communication.
175
+ model_id : str
176
+ Model to use. Can be either svd_id or id of a foundational model.
177
+ feature_type: str
178
+ Type of feature to use for fitting UMAP. Either PLM or SVD.
179
+ sequences : list[bytes] | None, optional
180
+ Optional sequences to fit UMAP with. Either use sequences or
181
+ assay_id. sequences is preferred.
182
+ assay_id: str | None, optional
183
+ Optional ID of assay containing sequences to fit UMAP with.
184
+ Either use sequences or assay_id. Ignored if sequences are
185
+ provided.
186
+ n_components: int
187
+ Number of UMAP components to fit. Defaults to 2.
188
+ n_neighbors: int
189
+ Number of neighbors to use for fitting. Defaults to 15.
190
+ min_dist: float
191
+ Minimum distance in UMAP fitting. Defaults to 0.1.
192
+ reduction : str | None
193
+ Embedding reduction to use for fitting the UMAP. Defaults to None.
194
+ kwargs:
195
+ Additional keyword arguments to be passed to foundational models, e.g. prompt_id for PoET models.
196
+
197
+ Returns
198
+ -------
199
+ UMAPFitJob
200
+ """
201
+
202
+ endpoint = PATH_PREFIX
203
+
204
+ body = {
205
+ "model_id": model_id,
206
+ "feature_type": feature_type,
207
+ "n_components": n_components,
208
+ "n_neighbors": n_neighbors,
209
+ "min_dist": min_dist,
210
+ }
211
+ if reduction is not None:
212
+ body["reduction"] = reduction
213
+ if sequences is not None:
214
+ # both provided
215
+ if assay_id is not None:
216
+ raise InvalidParameterError("Expected only either sequences or assay_id")
217
+ sequences = [(s if isinstance(s, str) else s.decode()) for s in sequences]
218
+ body["sequences"] = sequences
219
+ else:
220
+ # both are none
221
+ if assay_id is None:
222
+ raise InvalidParameterError("Expected either sequences or assay_id")
223
+ body["assay_id"] = assay_id
224
+ # add kwargs for embeddings kwargs
225
+ body.update(**kwargs)
226
+
227
+ response = session.post(endpoint, json=body)
228
+ # return job for metadata
229
+ return UMAPFitJob.model_validate(response.json())
230
+
231
+
232
+ def umap_embed_post(
233
+ session: APISession, umap_id: str, sequences: list[bytes] | list[str]
234
+ ) -> UMAPEmbeddingsJob:
235
+ """
236
+ POST a request for embeddings from the given UMAP model.
237
+
238
+ Parameters
239
+ ----------
240
+ session : APISession
241
+ Session object for API communication.
242
+ umap_id : str
243
+ UMAP model to use
244
+ sequences : List[bytes]
245
+ sequences to UMAP
246
+
247
+ Returns
248
+ -------
249
+ UMAPEmbeddingsJob
250
+ """
251
+ endpoint = PATH_PREFIX + f"/{umap_id}/embed"
252
+
253
+ sequences_unicode = [(s if isinstance(s, str) else s.decode()) for s in sequences]
254
+ body = {
255
+ "sequences": sequences_unicode,
256
+ }
257
+ response = session.post(endpoint, json=body)
258
+
259
+ return UMAPEmbeddingsJob.model_validate(response.json())
@@ -0,0 +1,211 @@
1
+ """UMAP models on the OpenProtein system which can be used directly to create projected embeddings useful for visualization."""
2
+
3
+ import numpy as np
4
+
5
+ from openprotein import config
6
+ from openprotein.base import APISession
7
+ from openprotein.embeddings import EmbeddingModel, EmbeddingsResultFuture
8
+ from openprotein.jobs import Future, JobsAPI
9
+
10
+ from . import api
11
+ from .schemas import UMAPEmbeddingsJob, UMAPFitJob, UMAPMetadata
12
+
13
+
14
+ class UMAPModel(Future):
15
+ """
16
+ UMAP model that can be used to create projected embeddings.
17
+
18
+ The model is also implemented as a `Future` to allow waiting for a fit job.
19
+ The projected embeddings of the sequences used to fit the UMAP can be
20
+ accessed using `embeddings`.
21
+ """
22
+
23
+ job: UMAPFitJob
24
+
25
+ def __init__(
26
+ self,
27
+ session: APISession,
28
+ job: UMAPFitJob | None = None,
29
+ metadata: UMAPMetadata | None = None,
30
+ ):
31
+ # Initializes with either job get or umap metadata get.
32
+ if metadata is None:
33
+ # use job to fetch metadata
34
+ if job is None:
35
+ raise ValueError("Expected umap metadata or job")
36
+ metadata = api.umap_get(session, job.job_id)
37
+ self._metadata = metadata
38
+ if job is None:
39
+ jobs_api = getattr(session, "jobs", None)
40
+ assert isinstance(jobs_api, JobsAPI)
41
+ job = UMAPFitJob.create(jobs_api.get_job(job_id=metadata.id))
42
+ self._sequences = None
43
+ self._embeddings = None
44
+ # getter initializes job if not provided
45
+ super().__init__(session, job)
46
+
47
+ def __str__(self) -> str:
48
+ return str(self.metadata)
49
+
50
+ def __repr__(self) -> str:
51
+ return repr(self.metadata)
52
+
53
+ @property
54
+ def id(self):
55
+ """UMAP unique identifier."""
56
+
57
+ return self._metadata.id
58
+
59
+ @property
60
+ def n_components(self):
61
+ """Number of components specified for the UMAP."""
62
+
63
+ return self._metadata.n_components
64
+
65
+ @property
66
+ def n_neighbors(self):
67
+ """Number of neighbors specified for the UMAP."""
68
+
69
+ return self._metadata.n_neighbors
70
+
71
+ @property
72
+ def min_dist(self):
73
+ """Minimum distance specified for the UMAP."""
74
+
75
+ return self._metadata.min_dist
76
+
77
+ @property
78
+ def sequence_length(self):
79
+ """Sequence length constraint of the UMAP."""
80
+
81
+ return self._metadata.sequence_length
82
+
83
+ @property
84
+ def reduction(self):
85
+ """Reduction used to fit the UMAP."""
86
+
87
+ return self._metadata.reduction
88
+
89
+ @property
90
+ def metadata(self):
91
+ """Metadata of the UMAP."""
92
+
93
+ self._refresh_metadata()
94
+ return self._metadata
95
+
96
+ @property
97
+ def sequences(self):
98
+ """The sequences used to fit the UMAP."""
99
+
100
+ if self._sequences is not None:
101
+ return self._sequences
102
+ self._sequences = self.get_inputs()
103
+ return self._sequences
104
+
105
+ @property
106
+ def embeddings(self):
107
+ """The projected embeddings of the sequences used to fit the UMAP."""
108
+
109
+ if self._embeddings is not None:
110
+ return self._embeddings
111
+ data = api.embed_get_batch_result(session=self.session, job_id=self.id)
112
+ embeddings = [
113
+ (seq, umap)
114
+ for seq, umap in zip(self.sequences, api.embed_batch_decode(data))
115
+ ]
116
+ self._embeddings = embeddings
117
+ return self._embeddings
118
+
119
+ def _refresh_metadata(self):
120
+ if not self._metadata.is_done():
121
+ self._metadata = api.umap_get(self.session, self._metadata.id)
122
+
123
+ def get_model(self) -> EmbeddingModel:
124
+ model = EmbeddingModel.create(session=self.session, model_id=self._metadata.id)
125
+ return model
126
+
127
+ @property
128
+ def model(self) -> EmbeddingModel:
129
+ """Base embeddings model used for the UMAP."""
130
+ return self.get_model()
131
+
132
+ def delete(self) -> bool:
133
+ """
134
+ Delete this UMAP model.
135
+ """
136
+ return api.umap_delete(self.session, self.id)
137
+
138
+ def get(self, verbose: bool = False):
139
+ """Retrieve this UMAP model itself."""
140
+ return self
141
+
142
+ def get_inputs(self) -> list[bytes]:
143
+ """
144
+ Get sequences used for umap job.
145
+
146
+ Returns
147
+ -------
148
+ list[bytes]
149
+ list of sequences
150
+ """
151
+ return api.umap_get_sequences(session=self.session, umap_id=self.id)
152
+
153
+ def embed(
154
+ self, sequences: list[bytes] | list[str], **kwargs
155
+ ) -> "UMAPEmbeddingsResultFuture":
156
+ """
157
+ Use this UMAP model to get projected embeddings from input sequences.
158
+
159
+ Parameters
160
+ ----------
161
+ sequences : List[bytes]
162
+ List of protein sequences.
163
+
164
+ Returns
165
+ -------
166
+ UMAPEmbeddingsResultFuture
167
+ Future result containing the projected embeddings.
168
+ """
169
+ return UMAPEmbeddingsResultFuture.create(
170
+ session=self.session,
171
+ job=api.umap_embed_post(
172
+ session=self.session, umap_id=self.id, sequences=sequences, **kwargs
173
+ ),
174
+ sequences=sequences,
175
+ )
176
+
177
+
178
+ class UMAPEmbeddingsResultFuture(EmbeddingsResultFuture, Future):
179
+ """UMAP embeddings results represented as a future."""
180
+
181
+ job: UMAPEmbeddingsJob
182
+
183
+ def wait(
184
+ self,
185
+ interval: int = config.POLLING_INTERVAL,
186
+ timeout: int | None = None,
187
+ verbose: bool = False,
188
+ ) -> list[np.ndarray]:
189
+ """Wait for the UMAP embeddings job and retrieve the embeddings."""
190
+ return super().wait(interval, timeout, verbose)
191
+
192
+ def get(self, verbose=False) -> list[np.ndarray]:
193
+ """Get all the UMAP projected embeddings from the job."""
194
+ return super().get(verbose)
195
+
196
+ def get_item(self, sequence: bytes) -> np.ndarray:
197
+ """
198
+ Get UMAP embeddings for specified sequence.
199
+
200
+ Parameters
201
+ ----------
202
+ sequence: bytes
203
+ Sequence to fetch UMAP embeddings for.
204
+
205
+ Returns
206
+ -------
207
+ np.ndarray
208
+ UMAP embeddings represented a numpy array.
209
+ """
210
+ data = api.embed_get_sequence_result(self.session, self.job.job_id, sequence)
211
+ return api.embed_decode(data)
@@ -0,0 +1,35 @@
1
+ """Schemas for OpenProtein UMAP system."""
2
+
3
+ from datetime import datetime
4
+ from typing import Literal
5
+
6
+ from pydantic import BaseModel, ConfigDict
7
+
8
+ from openprotein.common import FeatureType
9
+ from openprotein.jobs import BatchJob, Job, JobStatus, JobType
10
+
11
+
12
+ class UMAPMetadata(BaseModel):
13
+ id: str
14
+ status: JobStatus
15
+ created_date: datetime | None = None
16
+ model_id: str
17
+ feature_type: FeatureType
18
+ n_components: int = 2
19
+ n_neighbors: int = 15
20
+ min_dist: float = 0.1
21
+ reduction: str | None = None
22
+ sequence_length: int | None = None
23
+
24
+ def is_done(self):
25
+ return self.status.done()
26
+
27
+ model_config = ConfigDict(protected_namespaces=())
28
+
29
+
30
+ class UMAPFitJob(Job):
31
+ job_type: Literal[JobType.umap_fit]
32
+
33
+
34
+ class UMAPEmbeddingsJob(Job, BatchJob):
35
+ job_type: Literal[JobType.umap_embed]
@@ -0,0 +1,175 @@
1
+ """UMAP API providing the interface to fit and run UMAP visualizations."""
2
+
3
+ from openprotein.base import APISession
4
+ from openprotein.common import FeatureType, ReductionType
5
+ from openprotein.data import AssayDataset, AssayMetadata
6
+ from openprotein.embeddings import EmbeddingModel, EmbeddingsAPI
7
+ from openprotein.errors import InvalidParameterError
8
+ from openprotein.jobs import JobsAPI
9
+ from openprotein.svd import SVDAPI, SVDModel
10
+
11
+ from . import api
12
+ from .models import UMAPModel
13
+
14
+
15
+ class UMAPAPI:
16
+ """UMAP API providing the interface to fit and run UMAP visualizations."""
17
+
18
+ def __init__(
19
+ self,
20
+ session: APISession,
21
+ ):
22
+ self.session = session
23
+
24
+ def fit_umap(
25
+ self,
26
+ model: EmbeddingModel | SVDModel | str,
27
+ feature_type: FeatureType | None = None,
28
+ sequences: list[bytes] | list[str] | None = None,
29
+ assay: AssayMetadata | AssayDataset | str | None = None,
30
+ n_components: int = 2,
31
+ n_neighbors: int = 15,
32
+ min_dist: float = 0.1,
33
+ reduction: ReductionType | None = None,
34
+ **kwargs,
35
+ ) -> UMAPModel:
36
+ """
37
+ Fit an UMAP on the sequences with the specified model_id and hyperparameters (n_components).
38
+
39
+ Parameters
40
+ ----------
41
+ sequences: list of bytes or None, optional
42
+ Optional sequences to fit UMAP with. Either use sequences or
43
+ assay_id. sequences is preferred.
44
+ assay : AssayMetadata or AssayDataset or str or None, optional
45
+ Optional assay containing sequences to fit SVD with.
46
+ Or its assay_id. Either use sequences or assay.
47
+ Ignored if sequences are provided.
48
+ model : EmbeddingModel or SVDModel or str
49
+ Instance of either EmbeddingModel or SVDModel to use depending
50
+ on feature type. Can also be a str specifying the model id,
51
+ but then feature_type would have to be specified.
52
+ feature_type : FeatureType or None, optional
53
+ Type of features to use for encoding sequences. "SVD" or "PLM".
54
+ None would require model to be EmbeddingModel or SVDModel.
55
+ n_components : int, optional
56
+ Number of UMAP components to fit. Defaults to 2.
57
+ n_neighbors : int, optional
58
+ Number of neighbors to use for fitting. Defaults to 15.
59
+ min_dist : float, optional
60
+ Minimum distance in UMAP fitting. Defaults to 0.1.
61
+ reduction : str or None, optional
62
+ Type of embedding reduction to use for computing features.
63
+ E.g. "MEAN" or "SUM". Useful when dealing with variable length
64
+ sequence. Defaults to None.
65
+ kwargs :
66
+ Additional keyword arguments to be passed to foundational models, e.g. prompt_id for PoET models.
67
+
68
+ Returns
69
+ -------
70
+ UMAPModel
71
+ The UMAP model being fit.
72
+ """
73
+ # extract feature type
74
+ feature_type = (
75
+ FeatureType.PLM
76
+ if isinstance(model, EmbeddingModel)
77
+ else FeatureType.SVD if isinstance(model, SVDModel) else feature_type
78
+ )
79
+ if feature_type is None:
80
+ raise InvalidParameterError(
81
+ "Expected feature_type to be provided if passing str model_id as model"
82
+ )
83
+ # get model if model_id
84
+ if feature_type == FeatureType.PLM:
85
+ if reduction is None:
86
+ raise InvalidParameterError(
87
+ "Expected reduction if using EmbeddingModel"
88
+ )
89
+ if isinstance(model, str):
90
+ embeddings_api = getattr(self.session, "embedding", None)
91
+ assert isinstance(embeddings_api, EmbeddingsAPI)
92
+ model = embeddings_api.get_model(model)
93
+ assert isinstance(model, EmbeddingModel), "Expected EmbeddingModel"
94
+ model_id = model.id
95
+ elif feature_type == FeatureType.SVD:
96
+ if isinstance(model, str):
97
+ svd_api = getattr(self.session, "svd", None)
98
+ assert isinstance(svd_api, SVDAPI)
99
+ model = svd_api.get_svd(model)
100
+ assert isinstance(model, SVDModel), "Expected SVDModel"
101
+ model_id = model.id
102
+ # get assay_id
103
+ assay_id = (
104
+ assay.assay_id
105
+ if isinstance(assay, AssayMetadata)
106
+ else assay.id if isinstance(assay, AssayDataset) else assay
107
+ )
108
+ return UMAPModel(
109
+ session=self.session,
110
+ job=api.umap_fit_post(
111
+ session=self.session,
112
+ model_id=model_id,
113
+ feature_type=feature_type,
114
+ sequences=sequences,
115
+ assay_id=assay_id,
116
+ n_components=n_components,
117
+ n_neighbors=n_neighbors,
118
+ min_dist=min_dist,
119
+ reduction=reduction,
120
+ **kwargs,
121
+ ),
122
+ )
123
+
124
+ def get_umap(self, umap_id: str) -> UMAPModel:
125
+ """
126
+ Get UMAP job results. Including UMAP dimension and sequence lengths.
127
+
128
+ Requires a successful UMAP job from fit_umap.
129
+
130
+ Parameters
131
+ ----------
132
+ umap_id : str
133
+ The ID of the UMAP job.
134
+ Returns
135
+ -------
136
+ UMAPModel
137
+ The model with the UMAP fit.
138
+ """
139
+ metadata = api.umap_get(self.session, umap_id)
140
+ return UMAPModel(session=self.session, metadata=metadata)
141
+
142
+ def __delete_umap(self, umap_id: str) -> bool:
143
+ """
144
+ Delete UMAP model.
145
+
146
+ Parameters
147
+ ----------
148
+ umap_id : str
149
+ The ID of the UMAP job.
150
+ Returns
151
+ -------
152
+ bool
153
+ True: successful deletion
154
+
155
+ """
156
+ return api.umap_delete(self.session, umap_id)
157
+
158
+ def list_umap(self) -> list[UMAPModel]:
159
+ """
160
+ List UMAP models made by user.
161
+
162
+ Takes no args.
163
+
164
+ Returns
165
+ -------
166
+ list[UMAPModel]
167
+ UMAPModels
168
+
169
+ """
170
+ jobs_api = getattr(self.session, "jobs", None)
171
+ assert isinstance(jobs_api, JobsAPI)
172
+ return [
173
+ UMAPModel(session=self.session, metadata=metadata)
174
+ for metadata in api.umap_list_get(self.session)
175
+ ]
@@ -0,0 +1,29 @@
1
+ from uuid import UUID
2
+
3
+
4
+ def is_valid_uuid(uuid_to_test: str, version=4):
5
+ """
6
+ Check if uuid_to_test is a valid UUID.
7
+
8
+ Parameters
9
+ ----------
10
+ uuid_to_test : str
11
+ version : {1, 2, 3, 4}
12
+
13
+ Returns
14
+ -------
15
+ `True` if uuid_to_test is a valid UUID, otherwise `False`.
16
+
17
+ Examples
18
+ --------
19
+ >>> is_valid_uuid('c9bf9e57-1685-4c89-bafb-ff5af830be8a')
20
+ True
21
+ >>> is_valid_uuid('c9bf9e58')
22
+ False
23
+ """
24
+
25
+ try:
26
+ uuid_obj = UUID(uuid_to_test, version=version)
27
+ except ValueError:
28
+ return False
29
+ return str(uuid_obj) == uuid_to_test