openprotein-python 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/PKG-INFO +2 -2
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/align.py +1 -1
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/data.py +3 -3
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/design.py +1 -1
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/embedding.py +8 -16
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/fold.py +1 -1
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/jobs.py +6 -6
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/poet.py +1 -1
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/predict.py +72 -27
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/train.py +2 -2
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/futures.py +12 -9
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/jobs.py +2 -2
- openprotein_python-0.4.2/openprotein/pydantic.py +20 -0
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/schemas.py +1 -1
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/pyproject.toml +2 -2
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/LICENSE.txt +0 -0
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/README.md +0 -0
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/__init__.py +0 -0
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/_version.py +0 -0
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/api/__init__.py +0 -0
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/base.py +0 -0
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/config.py +0 -0
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/errors.py +0 -0
- {openprotein_python-0.4.1 → openprotein_python-0.4.2}/openprotein/fasta.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: openprotein-python
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: OpenProtein Python interface.
|
|
5
5
|
Home-page: https://docs.openprotein.ai/
|
|
6
6
|
License: MIT
|
|
@@ -16,7 +16,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
17
17
|
Classifier: Programming Language :: Python :: 3
|
|
18
18
|
Requires-Dist: pandas (>=1)
|
|
19
|
-
Requires-Dist: pydantic (>=1)
|
|
19
|
+
Requires-Dist: pydantic (>=1.0.0,<2.0.0)
|
|
20
20
|
Requires-Dist: requests (>=2)
|
|
21
21
|
Requires-Dist: tqdm (>=4)
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from typing import Iterator, Optional, List, BinaryIO, Literal, Union
|
|
2
|
-
from pydantic import BaseModel, Field, validator, root_validator
|
|
2
|
+
from openprotein.pydantic import BaseModel, Field, validator, root_validator
|
|
3
3
|
from enum import Enum
|
|
4
4
|
from io import BytesIO
|
|
5
5
|
import random
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
-
import pydantic
|
|
3
|
-
from pydantic import BaseModel
|
|
2
|
+
import openprotein.pydantic as pydantic
|
|
3
|
+
from openprotein.pydantic import BaseModel
|
|
4
4
|
from typing import Optional, List, Union
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from io import BytesIO
|
|
@@ -468,7 +468,7 @@ class DataAPI:
|
|
|
468
468
|
metadata.sequence_length = len(table["sequence"].values[0])
|
|
469
469
|
return AssayDataset(self.session, metadata)
|
|
470
470
|
|
|
471
|
-
def get(self, assay_id: str) -> AssayMetadata:
|
|
471
|
+
def get(self, assay_id: str, verbose: bool = False) -> AssayMetadata:
|
|
472
472
|
"""
|
|
473
473
|
Get an assay dataset by its ID.
|
|
474
474
|
|
|
@@ -9,7 +9,7 @@ from openprotein.jobs import JobType, Job
|
|
|
9
9
|
|
|
10
10
|
from openprotein.errors import APIError
|
|
11
11
|
from openprotein.futures import FutureFactory, FutureBase
|
|
12
|
-
from pydantic import BaseModel, Field, validator
|
|
12
|
+
from openprotein.pydantic import BaseModel, Field, validator
|
|
13
13
|
from datetime import datetime
|
|
14
14
|
import re
|
|
15
15
|
|
|
@@ -11,7 +11,7 @@ from openprotein.api.poet import (
|
|
|
11
11
|
)
|
|
12
12
|
from openprotein.futures import FutureBase, FutureFactory
|
|
13
13
|
|
|
14
|
-
from pydantic import BaseModel, parse_obj_as
|
|
14
|
+
from openprotein.pydantic import BaseModel, parse_obj_as
|
|
15
15
|
import numpy as np
|
|
16
16
|
from typing import Optional, List, Union, Any
|
|
17
17
|
import io
|
|
@@ -247,7 +247,7 @@ class EmbeddingResultFuture(MappedAsyncJobFuture, FutureBase):
|
|
|
247
247
|
|
|
248
248
|
def get(self, verbose=False) -> List:
|
|
249
249
|
return super().get(verbose=verbose)
|
|
250
|
-
|
|
250
|
+
|
|
251
251
|
@property
|
|
252
252
|
def sequences(self):
|
|
253
253
|
if self._sequences is None:
|
|
@@ -305,9 +305,7 @@ def embedding_model_post(
|
|
|
305
305
|
"""
|
|
306
306
|
endpoint = PATH_PREFIX + f"/models/{model_id}/embed"
|
|
307
307
|
|
|
308
|
-
sequences_unicode = [
|
|
309
|
-
(s if isinstance(s, str) else s.decode()) for s in sequences
|
|
310
|
-
]
|
|
308
|
+
sequences_unicode = [(s if isinstance(s, str) else s.decode()) for s in sequences]
|
|
311
309
|
body = {
|
|
312
310
|
"sequences": sequences_unicode,
|
|
313
311
|
}
|
|
@@ -345,9 +343,7 @@ def embedding_model_logits_post(
|
|
|
345
343
|
"""
|
|
346
344
|
endpoint = PATH_PREFIX + f"/models/{model_id}/logits"
|
|
347
345
|
|
|
348
|
-
sequences_unicode = [
|
|
349
|
-
(s if isinstance(s, str) else s.decode()) for s in sequences
|
|
350
|
-
]
|
|
346
|
+
sequences_unicode = [(s if isinstance(s, str) else s.decode()) for s in sequences]
|
|
351
347
|
body = {
|
|
352
348
|
"sequences": sequences_unicode,
|
|
353
349
|
}
|
|
@@ -385,9 +381,7 @@ def embedding_model_attn_post(
|
|
|
385
381
|
"""
|
|
386
382
|
endpoint = PATH_PREFIX + f"/models/{model_id}/attn"
|
|
387
383
|
|
|
388
|
-
sequences_unicode = [
|
|
389
|
-
(s if isinstance(s, str) else s.decode()) for s in sequences
|
|
390
|
-
]
|
|
384
|
+
sequences_unicode = [(s if isinstance(s, str) else s.decode()) for s in sequences]
|
|
391
385
|
body = {
|
|
392
386
|
"sequences": sequences_unicode,
|
|
393
387
|
}
|
|
@@ -500,9 +494,7 @@ def svd_embed_post(session: APISession, svd_id: str, sequences: List[bytes]) ->
|
|
|
500
494
|
"""
|
|
501
495
|
endpoint = PATH_PREFIX + f"/svd/{svd_id}/embed"
|
|
502
496
|
|
|
503
|
-
sequences_unicode = [
|
|
504
|
-
(s if isinstance(s, str) else s.decode()) for s in sequences
|
|
505
|
-
]
|
|
497
|
+
sequences_unicode = [(s if isinstance(s, str) else s.decode()) for s in sequences]
|
|
506
498
|
body = {
|
|
507
499
|
"sequences": sequences_unicode,
|
|
508
500
|
}
|
|
@@ -715,7 +707,7 @@ class SVDModel(AsyncJobFuture, FutureBase):
|
|
|
715
707
|
"""Get job associated with this SVD model"""
|
|
716
708
|
return job_get(self.session, self.id)
|
|
717
709
|
|
|
718
|
-
def get(self):
|
|
710
|
+
def get(self, verbose: bool = False):
|
|
719
711
|
# overload for AsyncJobFuture
|
|
720
712
|
return self
|
|
721
713
|
|
|
@@ -963,7 +955,7 @@ class PoETModel(OpenProteinModel, EmbBase):
|
|
|
963
955
|
sequences: List[bytes],
|
|
964
956
|
n_components: int = 1024,
|
|
965
957
|
reduction: Optional[str] = None,
|
|
966
|
-
) -> SVDModel:
|
|
958
|
+
) -> SVDModel: # type: ignore
|
|
967
959
|
"""
|
|
968
960
|
Fit an SVD on the embedding results of this model.
|
|
969
961
|
|
|
@@ -3,7 +3,7 @@ from openprotein.api.jobs import Job, MappedAsyncJobFuture
|
|
|
3
3
|
import openprotein.config as config
|
|
4
4
|
from openprotein.api.embedding import ModelMetadata
|
|
5
5
|
from openprotein.api.align import validate_msa, MSAFuture
|
|
6
|
-
import pydantic
|
|
6
|
+
import openprotein.pydantic as pydantic
|
|
7
7
|
from typing import Optional, List, Union, Tuple
|
|
8
8
|
from openprotein.futures import FutureBase, FutureFactory
|
|
9
9
|
from abc import ABC, abstractmethod
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# Jobs and job centric flows
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
from typing import List, Union
|
|
4
|
+
from typing import List, Union, Optional
|
|
5
5
|
import concurrent.futures
|
|
6
6
|
import time
|
|
7
7
|
|
|
8
8
|
import tqdm
|
|
9
|
-
import pydantic
|
|
9
|
+
import openprotein.pydantic as pydantic
|
|
10
10
|
|
|
11
11
|
from openprotein.base import APISession
|
|
12
12
|
import openprotein.config as config
|
|
@@ -105,7 +105,7 @@ class JobsAPI:
|
|
|
105
105
|
more_recent_than=more_recent_than,
|
|
106
106
|
)
|
|
107
107
|
|
|
108
|
-
def get(self, job_id) -> Job:
|
|
108
|
+
def get(self, job_id: str, verbose: bool = False) -> Job:
|
|
109
109
|
"""get Job by ID"""
|
|
110
110
|
return load_job(self.session, job_id)
|
|
111
111
|
# return job_get(self.session, job_id)
|
|
@@ -150,7 +150,7 @@ class AsyncJobFuture:
|
|
|
150
150
|
def cancelled(self):
|
|
151
151
|
return self.job.cancelled()
|
|
152
152
|
|
|
153
|
-
def get(self, verbose=False):
|
|
153
|
+
def get(self, verbose: bool = False):
|
|
154
154
|
raise NotImplementedError()
|
|
155
155
|
|
|
156
156
|
def wait_until_done(
|
|
@@ -176,7 +176,7 @@ class AsyncJobFuture:
|
|
|
176
176
|
def wait(
|
|
177
177
|
self,
|
|
178
178
|
interval: int = config.POLLING_INTERVAL,
|
|
179
|
-
timeout: int = None,
|
|
179
|
+
timeout: Optional[int] = None,
|
|
180
180
|
verbose: bool = False,
|
|
181
181
|
):
|
|
182
182
|
"""
|
|
@@ -195,7 +195,7 @@ class AsyncJobFuture:
|
|
|
195
195
|
self.session, interval=interval, timeout=timeout, verbose=verbose
|
|
196
196
|
)
|
|
197
197
|
self.job = job
|
|
198
|
-
return self.get(
|
|
198
|
+
return self.get()
|
|
199
199
|
|
|
200
200
|
|
|
201
201
|
class StreamingAsyncJobFuture(AsyncJobFuture):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from typing import Optional, List, Union, Any, Dict, Literal
|
|
2
|
-
from pydantic import BaseModel
|
|
2
|
+
from openprotein.pydantic import BaseModel, root_validator
|
|
3
3
|
|
|
4
4
|
from openprotein.base import APISession
|
|
5
5
|
from openprotein.api.jobs import AsyncJobFuture
|
|
@@ -16,6 +16,29 @@ class SequenceDataset(BaseModel):
|
|
|
16
16
|
sequences: List[str]
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
class _Prediction(BaseModel):
|
|
20
|
+
"""Prediction details."""
|
|
21
|
+
|
|
22
|
+
@root_validator(pre=True)
|
|
23
|
+
def extract_pred(cls, values):
|
|
24
|
+
p = values.pop("properties")
|
|
25
|
+
name = list(p.keys())[0]
|
|
26
|
+
ymu = p[name]["y_mu"]
|
|
27
|
+
yvar = p[name]["y_var"]
|
|
28
|
+
p["name"] = name
|
|
29
|
+
p["y_mu"] = ymu
|
|
30
|
+
p["y_var"] = yvar
|
|
31
|
+
|
|
32
|
+
values.update(p)
|
|
33
|
+
return values
|
|
34
|
+
|
|
35
|
+
model_id: str
|
|
36
|
+
model_name: str
|
|
37
|
+
y_mu: Optional[float] = None
|
|
38
|
+
y_var: Optional[float] = None
|
|
39
|
+
name: Optional[str]
|
|
40
|
+
|
|
41
|
+
|
|
19
42
|
class Prediction(BaseModel):
|
|
20
43
|
"""Prediction details."""
|
|
21
44
|
|
|
@@ -35,6 +58,17 @@ class PredictJobBase(Job):
|
|
|
35
58
|
class PredictJob(PredictJobBase):
|
|
36
59
|
"""Properties about predict job returned via API."""
|
|
37
60
|
|
|
61
|
+
@root_validator(pre=True)
|
|
62
|
+
def extract_pred(cls, values):
|
|
63
|
+
# Extracting 'predictions' and 'sequences' from the input values
|
|
64
|
+
v = values.pop("result")
|
|
65
|
+
preds = [i["predictions"] for i in v]
|
|
66
|
+
seqs = [i["sequence"] for i in v]
|
|
67
|
+
values["result"] = [
|
|
68
|
+
{"sequence": i, "predictions": p} for i, p in zip(seqs, preds)
|
|
69
|
+
]
|
|
70
|
+
return values
|
|
71
|
+
|
|
38
72
|
class SequencePrediction(BaseModel):
|
|
39
73
|
"""Sequence prediction."""
|
|
40
74
|
|
|
@@ -42,7 +76,7 @@ class PredictJob(PredictJobBase):
|
|
|
42
76
|
predictions: List[Prediction] = []
|
|
43
77
|
|
|
44
78
|
result: Optional[List[SequencePrediction]] = None
|
|
45
|
-
job_type:
|
|
79
|
+
job_type: str
|
|
46
80
|
|
|
47
81
|
|
|
48
82
|
@register_job_type(JobType.worflow_predict_single_site)
|
|
@@ -128,9 +162,9 @@ def _create_predict_job(
|
|
|
128
162
|
def create_predict_job(
|
|
129
163
|
session: APISession,
|
|
130
164
|
sequences: SequenceDataset,
|
|
131
|
-
train_job: Any,
|
|
165
|
+
train_job: Optional[Any] = None,
|
|
132
166
|
model_ids: Optional[List[str]] = None,
|
|
133
|
-
) ->
|
|
167
|
+
) -> FutureBase:
|
|
134
168
|
"""
|
|
135
169
|
Creates a predict job with a given set of sequences and a train job.
|
|
136
170
|
|
|
@@ -167,8 +201,9 @@ def create_predict_job(
|
|
|
167
201
|
model_ids = [model_ids]
|
|
168
202
|
endpoint = "v1/workflow/predict"
|
|
169
203
|
payload = {"sequences": sequences.sequences}
|
|
204
|
+
train_job_id = train_job.id if train_job is not None else None
|
|
170
205
|
return _create_predict_job(
|
|
171
|
-
session, endpoint, payload, model_ids=model_ids, train_job_id=
|
|
206
|
+
session, endpoint, payload, model_ids=model_ids, train_job_id=train_job_id
|
|
172
207
|
)
|
|
173
208
|
|
|
174
209
|
|
|
@@ -177,7 +212,7 @@ def create_predict_single_site(
|
|
|
177
212
|
sequence: SequenceData,
|
|
178
213
|
train_job: Any,
|
|
179
214
|
model_ids: Optional[List[str]] = None,
|
|
180
|
-
) ->
|
|
215
|
+
) -> FutureBase:
|
|
181
216
|
"""
|
|
182
217
|
Creates a predict job for single site mutants with a given sequence and a train job.
|
|
183
218
|
|
|
@@ -318,6 +353,7 @@ class PredictFutureMixin:
|
|
|
318
353
|
|
|
319
354
|
session: APISession
|
|
320
355
|
job: PredictJob
|
|
356
|
+
id: Optional[str] = None
|
|
321
357
|
|
|
322
358
|
def get_results(
|
|
323
359
|
self, page_size: Optional[int] = None, page_offset: Optional[int] = None
|
|
@@ -344,6 +380,7 @@ class PredictFutureMixin:
|
|
|
344
380
|
HTTPError
|
|
345
381
|
If the GET request does not succeed.
|
|
346
382
|
"""
|
|
383
|
+
assert self.id is not None
|
|
347
384
|
if "single_site" in self.job.job_type:
|
|
348
385
|
return get_single_site_prediction_results(
|
|
349
386
|
self.session, self.id, page_size, page_offset
|
|
@@ -352,7 +389,7 @@ class PredictFutureMixin:
|
|
|
352
389
|
return get_prediction_results(self.session, self.id, page_size, page_offset)
|
|
353
390
|
|
|
354
391
|
|
|
355
|
-
class PredictFuture(PredictFutureMixin, AsyncJobFuture, FutureBase):
|
|
392
|
+
class PredictFuture(PredictFutureMixin, AsyncJobFuture, FutureBase): # type: ignore
|
|
356
393
|
"""Future Job for manipulating results"""
|
|
357
394
|
|
|
358
395
|
job_type = [JobType.workflow_predict, JobType.worflow_predict_single_site]
|
|
@@ -372,24 +409,29 @@ class PredictFuture(PredictFutureMixin, AsyncJobFuture, FutureBase):
|
|
|
372
409
|
return self.job.job_id
|
|
373
410
|
|
|
374
411
|
def _fmt_results(self, results):
|
|
375
|
-
properties = set(
|
|
412
|
+
properties = set(
|
|
413
|
+
list(i["properties"].keys())[0] for i in results[0].dict()["predictions"]
|
|
414
|
+
)
|
|
376
415
|
dict_results = {}
|
|
377
416
|
for p in properties:
|
|
378
417
|
dict_results[p] = {}
|
|
379
418
|
for i, r in enumerate(results):
|
|
380
419
|
s = r.sequence
|
|
381
|
-
props =
|
|
420
|
+
props = [i.properties[p] for i in r.predictions if p in i.properties][0]
|
|
382
421
|
dict_results[p][s] = {"mean": props["y_mu"], "variance": props["y_var"]}
|
|
422
|
+
dict_results
|
|
383
423
|
return dict_results
|
|
384
424
|
|
|
385
425
|
def _fmt_ssp_results(self, results):
|
|
386
|
-
properties = set(
|
|
426
|
+
properties = set(
|
|
427
|
+
list(i["properties"].keys())[0] for i in results[0].dict()["predictions"]
|
|
428
|
+
)
|
|
387
429
|
dict_results = {}
|
|
388
430
|
for p in properties:
|
|
389
431
|
dict_results[p] = {}
|
|
390
432
|
for i, r in enumerate(results):
|
|
391
|
-
s = f"{r.position+1}{r.amino_acid}"
|
|
392
|
-
props =
|
|
433
|
+
s = s = f"{r.position+1}{r.amino_acid}"
|
|
434
|
+
props = [i.properties[p] for i in r.predictions if p in i.properties][0]
|
|
393
435
|
dict_results[p][s] = {"mean": props["y_mu"], "variance": props["y_var"]}
|
|
394
436
|
return dict_results
|
|
395
437
|
|
|
@@ -408,19 +450,21 @@ class PredictFuture(PredictFutureMixin, AsyncJobFuture, FutureBase):
|
|
|
408
450
|
"""
|
|
409
451
|
step = self.page_size
|
|
410
452
|
|
|
411
|
-
results = []
|
|
453
|
+
results: List = []
|
|
412
454
|
num_returned = step
|
|
413
455
|
offset = 0
|
|
414
456
|
|
|
415
457
|
while num_returned >= step:
|
|
416
458
|
try:
|
|
417
459
|
response = self.get_results(page_offset=offset, page_size=step)
|
|
460
|
+
assert isinstance(response.result, list)
|
|
418
461
|
results += response.result
|
|
419
462
|
num_returned = len(response.result)
|
|
420
463
|
offset += num_returned
|
|
421
464
|
except APIError as exc:
|
|
422
465
|
if verbose:
|
|
423
466
|
print(f"Failed to get results: {exc}")
|
|
467
|
+
|
|
424
468
|
if self.job.job_type == JobType.workflow_predict:
|
|
425
469
|
return self._fmt_results(results)
|
|
426
470
|
else:
|
|
@@ -444,7 +488,7 @@ class PredictService:
|
|
|
444
488
|
def create_predict_job(
|
|
445
489
|
self,
|
|
446
490
|
sequences: List,
|
|
447
|
-
train_job: Any,
|
|
491
|
+
train_job: Optional[Any] = None,
|
|
448
492
|
model_ids: Optional[List[str]] = None,
|
|
449
493
|
) -> PredictFuture:
|
|
450
494
|
"""
|
|
@@ -475,17 +519,18 @@ class PredictService:
|
|
|
475
519
|
APIError
|
|
476
520
|
If the backend refuses the job (due to sequence length or invalid inputs)
|
|
477
521
|
"""
|
|
478
|
-
if train_job
|
|
479
|
-
if train_job.assaymetadata
|
|
480
|
-
if
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
522
|
+
if train_job is not None:
|
|
523
|
+
if train_job.assaymetadata is not None:
|
|
524
|
+
if train_job.assaymetadata.sequence_length is not None:
|
|
525
|
+
if any(
|
|
526
|
+
[
|
|
527
|
+
train_job.assaymetadata.sequence_length != len(s)
|
|
528
|
+
for s in sequences
|
|
529
|
+
]
|
|
530
|
+
):
|
|
531
|
+
raise InvalidParameterError(
|
|
532
|
+
f"Predict sequences length {len(sequences[0])} != training assaydata ({train_job.assaymetadata.sequence_length})"
|
|
533
|
+
)
|
|
489
534
|
if not train_job.done():
|
|
490
535
|
print(f"WARNING: training job has status {train_job.status}")
|
|
491
536
|
# raise InvalidParameterError(
|
|
@@ -494,7 +539,7 @@ class PredictService:
|
|
|
494
539
|
|
|
495
540
|
sequence_dataset = SequenceDataset(sequences=sequences)
|
|
496
541
|
return create_predict_job(
|
|
497
|
-
self.session, sequence_dataset, train_job, model_ids=model_ids
|
|
542
|
+
self.session, sequence_dataset, train_job, model_ids=model_ids # type: ignore
|
|
498
543
|
)
|
|
499
544
|
|
|
500
545
|
def create_predict_single_site(
|
|
@@ -546,5 +591,5 @@ class PredictService:
|
|
|
546
591
|
|
|
547
592
|
sequence_dataset = SequenceData(sequence=sequence)
|
|
548
593
|
return create_predict_single_site(
|
|
549
|
-
self.session, sequence_dataset, train_job, model_ids=model_ids
|
|
594
|
+
self.session, sequence_dataset, train_job, model_ids=model_ids # type: ignore
|
|
550
595
|
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Optional, List, Union
|
|
2
|
-
from pydantic import BaseModel
|
|
2
|
+
from openprotein.pydantic import BaseModel
|
|
3
3
|
|
|
4
|
-
import pydantic
|
|
4
|
+
import openprotein.pydantic as pydantic
|
|
5
5
|
from openprotein.base import APISession
|
|
6
6
|
from openprotein.api.jobs import AsyncJobFuture, Job
|
|
7
7
|
from openprotein.futures import FutureFactory, FutureBase
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
# Store for Model and Future classes
|
|
2
2
|
from openprotein.jobs import job_get, ResultsParser
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Optional, Any
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class FutureBase:
|
|
7
7
|
"""Base class for all Future classes.
|
|
8
|
-
|
|
8
|
+
|
|
9
9
|
This class needs to be directly inherited for class discovery."""
|
|
10
|
+
|
|
10
11
|
# overridden by subclasses
|
|
11
|
-
job_type = None
|
|
12
|
+
job_type: Optional[Any] = None
|
|
12
13
|
|
|
13
14
|
@classmethod
|
|
14
15
|
def get_job_type(cls):
|
|
@@ -23,7 +24,9 @@ class FutureFactory:
|
|
|
23
24
|
"""Factory class for creating Future instances based on job_type."""
|
|
24
25
|
|
|
25
26
|
@staticmethod
|
|
26
|
-
def create_future(
|
|
27
|
+
def create_future(
|
|
28
|
+
session, job_id: Optional[str] = None, response: Optional[dict] = None, **kwargs
|
|
29
|
+
):
|
|
27
30
|
"""
|
|
28
31
|
Create and return an instance of the appropriate Future class based on the job type.
|
|
29
32
|
|
|
@@ -36,22 +39,22 @@ class FutureFactory:
|
|
|
36
39
|
- An instance of the appropriate Future class.
|
|
37
40
|
"""
|
|
38
41
|
|
|
39
|
-
# parse job
|
|
42
|
+
# parse job
|
|
40
43
|
if job_id:
|
|
41
44
|
job = job_get(session, job_id)
|
|
42
45
|
else:
|
|
43
|
-
if
|
|
46
|
+
if "job" not in kwargs:
|
|
44
47
|
job = ResultsParser.parse_obj(response)
|
|
45
48
|
else:
|
|
46
49
|
job = kwargs.pop("job")
|
|
47
|
-
|
|
50
|
+
|
|
48
51
|
# Dynamically discover all subclasses of FutureBase
|
|
49
52
|
future_classes = FutureBase.__subclasses__()
|
|
50
|
-
kwargs = {k:v for k,v in kwargs.items() if v is not None}
|
|
53
|
+
kwargs = {k: v for k, v in kwargs.items() if v is not None}
|
|
51
54
|
|
|
52
55
|
# Find the Future class that matches the job type
|
|
53
56
|
for future_class in future_classes:
|
|
54
57
|
if job.job_type in future_class.get_job_type():
|
|
55
|
-
return future_class(session=session, job=job, **kwargs)
|
|
58
|
+
return future_class(session=session, job=job, **kwargs) # type: ignore
|
|
56
59
|
|
|
57
60
|
raise ValueError(f"Unsupported job type: {job.job_type}")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
2
|
from typing import Optional, Literal
|
|
3
3
|
import time
|
|
4
|
-
from pydantic import BaseModel, Field
|
|
4
|
+
from openprotein.pydantic import BaseModel, Field
|
|
5
5
|
from openprotein.errors import TimeoutException
|
|
6
6
|
from openprotein.base import APISession
|
|
7
7
|
import openprotein.config as config
|
|
@@ -13,7 +13,7 @@ from openprotein.schemas import JobStatus, JobType
|
|
|
13
13
|
|
|
14
14
|
class Job(BaseModel):
|
|
15
15
|
status: JobStatus
|
|
16
|
-
job_id: str
|
|
16
|
+
job_id: Optional[str] # must be optional as predict can return None
|
|
17
17
|
# new emb service get doesnt have job_type
|
|
18
18
|
job_type: Optional[Literal[tuple(member.value for member in JobType.__members__.values())]] # type: ignore
|
|
19
19
|
created_date: Optional[datetime] = None
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from pydantic.v1 import (
|
|
3
|
+
BaseModel,
|
|
4
|
+
Field,
|
|
5
|
+
ConfigDict,
|
|
6
|
+
validator,
|
|
7
|
+
root_validator,
|
|
8
|
+
parse_obj_as,
|
|
9
|
+
)
|
|
10
|
+
import pydantic.v1 as pydantic
|
|
11
|
+
except ImportError:
|
|
12
|
+
from pydantic import (
|
|
13
|
+
BaseModel,
|
|
14
|
+
Field,
|
|
15
|
+
ConfigDict,
|
|
16
|
+
validator,
|
|
17
|
+
root_validator,
|
|
18
|
+
parse_obj_as,
|
|
19
|
+
)
|
|
20
|
+
import pydantic
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "openprotein_python"
|
|
3
3
|
packages = [{include = "openprotein"}]
|
|
4
|
-
version = "0.4.
|
|
4
|
+
version = "0.4.2"
|
|
5
5
|
description = "OpenProtein Python interface."
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
@@ -15,7 +15,7 @@ classifiers = [
|
|
|
15
15
|
[tool.poetry.dependencies]
|
|
16
16
|
python = "^3.8"
|
|
17
17
|
requests = ">=2"
|
|
18
|
-
pydantic = "
|
|
18
|
+
pydantic = "==1.*"
|
|
19
19
|
tqdm = ">=4"
|
|
20
20
|
pandas = ">=1"
|
|
21
21
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|