openprotein-python 0.8.2__1-py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openprotein/__init__.py +164 -0
- openprotein/_version.py +48 -0
- openprotein/align/__init__.py +8 -0
- openprotein/align/align.py +395 -0
- openprotein/align/api.py +428 -0
- openprotein/align/future.py +55 -0
- openprotein/align/msa.py +129 -0
- openprotein/align/schemas.py +165 -0
- openprotein/base.py +181 -0
- openprotein/chains.py +88 -0
- openprotein/common/__init__.py +5 -0
- openprotein/common/features.py +7 -0
- openprotein/common/model_metadata.py +33 -0
- openprotein/common/reduction.py +8 -0
- openprotein/config.py +9 -0
- openprotein/csv.py +31 -0
- openprotein/data/__init__.py +9 -0
- openprotein/data/api.py +218 -0
- openprotein/data/assaydataset.py +178 -0
- openprotein/data/data.py +93 -0
- openprotein/data/schemas.py +27 -0
- openprotein/design/__init__.py +16 -0
- openprotein/design/api.py +259 -0
- openprotein/design/design.py +125 -0
- openprotein/design/future.py +146 -0
- openprotein/design/schemas.py +607 -0
- openprotein/embeddings/__init__.py +27 -0
- openprotein/embeddings/api.py +619 -0
- openprotein/embeddings/embeddings.py +151 -0
- openprotein/embeddings/esm.py +33 -0
- openprotein/embeddings/future.py +146 -0
- openprotein/embeddings/models.py +421 -0
- openprotein/embeddings/openprotein.py +21 -0
- openprotein/embeddings/poet.py +446 -0
- openprotein/embeddings/poet2.py +505 -0
- openprotein/embeddings/schemas.py +78 -0
- openprotein/errors.py +76 -0
- openprotein/fasta.py +92 -0
- openprotein/fold/__init__.py +21 -0
- openprotein/fold/alphafold2.py +131 -0
- openprotein/fold/api.py +287 -0
- openprotein/fold/boltz.py +691 -0
- openprotein/fold/esmfold.py +54 -0
- openprotein/fold/fold.py +107 -0
- openprotein/fold/future.py +509 -0
- openprotein/fold/models.py +139 -0
- openprotein/fold/schemas.py +39 -0
- openprotein/jobs/__init__.py +9 -0
- openprotein/jobs/api.py +71 -0
- openprotein/jobs/futures.py +746 -0
- openprotein/jobs/jobs.py +69 -0
- openprotein/jobs/schemas.py +135 -0
- openprotein/models/__init__.py +4 -0
- openprotein/models/base.py +63 -0
- openprotein/models/foundation/rfdiffusion.py +283 -0
- openprotein/models/models.py +33 -0
- openprotein/predictor/__init__.py +25 -0
- openprotein/predictor/api.py +384 -0
- openprotein/predictor/models.py +374 -0
- openprotein/predictor/prediction.py +79 -0
- openprotein/predictor/predictor.py +242 -0
- openprotein/predictor/schemas.py +113 -0
- openprotein/predictor/validate.py +40 -0
- openprotein/prompt/__init__.py +9 -0
- openprotein/prompt/api.py +505 -0
- openprotein/prompt/models.py +142 -0
- openprotein/prompt/prompt.py +130 -0
- openprotein/prompt/schemas.py +49 -0
- openprotein/protein.py +587 -0
- openprotein/svd/__init__.py +9 -0
- openprotein/svd/api.py +206 -0
- openprotein/svd/models.py +288 -0
- openprotein/svd/schemas.py +31 -0
- openprotein/svd/svd.py +134 -0
- openprotein/umap/__init__.py +9 -0
- openprotein/umap/api.py +259 -0
- openprotein/umap/models.py +211 -0
- openprotein/umap/schemas.py +35 -0
- openprotein/umap/umap.py +175 -0
- openprotein/utils/uuid.py +29 -0
- openprotein_python-0.8.2.dist-info/METADATA +176 -0
- openprotein_python-0.8.2.dist-info/RECORD +84 -0
- openprotein_python-0.8.2.dist-info/WHEEL +4 -0
- openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0
openprotein/jobs/jobs.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
from openprotein import config
|
|
4
|
+
from openprotein.base import APISession
|
|
5
|
+
|
|
6
|
+
from . import api
|
|
7
|
+
from .futures import Future
|
|
8
|
+
from .schemas import Job, JobStatus, JobType
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class JobsAPI:
|
|
12
|
+
"""API interface to get jobs."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, session: APISession):
|
|
15
|
+
self.session = session
|
|
16
|
+
|
|
17
|
+
def list(
|
|
18
|
+
self,
|
|
19
|
+
status: JobStatus | None = None,
|
|
20
|
+
job_type: JobType | None = None,
|
|
21
|
+
assay_id: str | None = None,
|
|
22
|
+
more_recent_than: datetime | str | None = None,
|
|
23
|
+
limit: int = 100,
|
|
24
|
+
) -> list[Job]:
|
|
25
|
+
"""List jobs."""
|
|
26
|
+
more_recent_than_str = (
|
|
27
|
+
more_recent_than.isoformat()
|
|
28
|
+
if isinstance(more_recent_than, datetime)
|
|
29
|
+
else more_recent_than
|
|
30
|
+
)
|
|
31
|
+
return [
|
|
32
|
+
Job.create(j)
|
|
33
|
+
for j in api.jobs_list(
|
|
34
|
+
self.session,
|
|
35
|
+
status=status,
|
|
36
|
+
job_type=job_type,
|
|
37
|
+
assay_id=assay_id,
|
|
38
|
+
more_recent_than=more_recent_than_str,
|
|
39
|
+
limit=limit,
|
|
40
|
+
)
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
def get_job(self, job_id: str) -> Job:
|
|
44
|
+
return api.job_get(session=self.session, job_id=job_id)
|
|
45
|
+
|
|
46
|
+
def get(self, job_id: str, verbose: bool = False) -> Future: # Job:
|
|
47
|
+
"""
|
|
48
|
+
Get job by ID.
|
|
49
|
+
|
|
50
|
+
Notes
|
|
51
|
+
-----
|
|
52
|
+
This retrieves the job and loads it as a future so you can do `wait` and `get`.
|
|
53
|
+
"""
|
|
54
|
+
return self.__load(job_id=job_id)
|
|
55
|
+
# return Job.create(job.job_get(session=self.session, job_id=job_id))
|
|
56
|
+
|
|
57
|
+
def __load(self, job_id: str) -> Future:
|
|
58
|
+
"""Loads a job by ID and returns the future."""
|
|
59
|
+
return Future.create(session=self.session, job_id=job_id)
|
|
60
|
+
|
|
61
|
+
def wait(
|
|
62
|
+
self,
|
|
63
|
+
future: Future,
|
|
64
|
+
interval=config.POLLING_INTERVAL,
|
|
65
|
+
timeout: int | None = None,
|
|
66
|
+
verbose: bool = False,
|
|
67
|
+
):
|
|
68
|
+
"""Waits on a job result."""
|
|
69
|
+
return future.wait(interval=interval, timeout=timeout, verbose=verbose)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, TypeAdapter
|
|
7
|
+
from requests import Response
|
|
8
|
+
from typing_extensions import Self
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class JobType(str, Enum):
|
|
14
|
+
"""
|
|
15
|
+
Type of job.
|
|
16
|
+
|
|
17
|
+
Describes the types of jobs that can be done.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
stub = "stub"
|
|
21
|
+
|
|
22
|
+
workflow_preprocess = "/workflow/preprocess"
|
|
23
|
+
workflow_train = "/workflow/train"
|
|
24
|
+
workflow_embed_umap = "/workflow/embed/umap"
|
|
25
|
+
workflow_predict = "/workflow/predict"
|
|
26
|
+
workflow_predict_single_site = "/workflow/predict/single_site"
|
|
27
|
+
workflow_crossvalidate = "/workflow/crossvalidate"
|
|
28
|
+
workflow_evaluate = "/workflow/evaluate"
|
|
29
|
+
workflow_design = "/workflow/design"
|
|
30
|
+
|
|
31
|
+
align_align = "/align/align"
|
|
32
|
+
align_prompt = "/align/prompt"
|
|
33
|
+
|
|
34
|
+
clustalo = "/align/clustalo"
|
|
35
|
+
mafft = "/align/mafft"
|
|
36
|
+
abnumber = "/align/abnumber"
|
|
37
|
+
|
|
38
|
+
poet = "/poet"
|
|
39
|
+
poet_score = "/poet/score"
|
|
40
|
+
poet_single_site = "/poet/single_site"
|
|
41
|
+
poet_generate = "/poet/generate"
|
|
42
|
+
poet_score_indel = "/poet/score/indel"
|
|
43
|
+
|
|
44
|
+
embeddings_embed = "/embeddings/embed"
|
|
45
|
+
embeddings_svd = "/embeddings/svd"
|
|
46
|
+
embeddings_attn = "/embeddings/attn"
|
|
47
|
+
embeddings_logits = "/embeddings/logits"
|
|
48
|
+
embeddings_embed_reduced = "/embeddings/embed_reduced"
|
|
49
|
+
|
|
50
|
+
svd_fit = "/svd/fit"
|
|
51
|
+
svd_embed = "/svd/embed"
|
|
52
|
+
|
|
53
|
+
umap_fit = "/umap/fit"
|
|
54
|
+
umap_embed = "/umap/embed"
|
|
55
|
+
|
|
56
|
+
embeddings_fold = "/embeddings/fold"
|
|
57
|
+
|
|
58
|
+
# predictor jobs
|
|
59
|
+
predictor_train = "/predictor/train"
|
|
60
|
+
predictor_predict = "/predictor/predict"
|
|
61
|
+
predictor_crossvalidate = "/predictor/crossvalidate"
|
|
62
|
+
predictor_predict_single_site = "/predictor/predict_single_site"
|
|
63
|
+
predictor_predict_multi = "/predictor/predict_multi"
|
|
64
|
+
predictor_predict_multi_single_site = "/predictor/predict_multi_single_site"
|
|
65
|
+
|
|
66
|
+
# designer
|
|
67
|
+
designer = "/design"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class JobStatus(str, Enum):
|
|
71
|
+
PENDING = "PENDING"
|
|
72
|
+
RUNNING = "RUNNING"
|
|
73
|
+
SUCCESS = "SUCCESS"
|
|
74
|
+
FAILURE = "FAILURE"
|
|
75
|
+
RETRYING = "RETRYING"
|
|
76
|
+
CANCELED = "CANCELED"
|
|
77
|
+
|
|
78
|
+
def done(self):
|
|
79
|
+
return (
|
|
80
|
+
(self is self.SUCCESS) or (self is self.FAILURE) or (self is self.CANCELED)
|
|
81
|
+
) # noqa: E501
|
|
82
|
+
|
|
83
|
+
def cancelled(self):
|
|
84
|
+
return self is self.CANCELED
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class Job(BaseModel):
|
|
88
|
+
job_id: str
|
|
89
|
+
# new emb service get doesnt have job_type
|
|
90
|
+
job_type: str
|
|
91
|
+
status: JobStatus
|
|
92
|
+
created_date: datetime
|
|
93
|
+
start_date: datetime | None = None
|
|
94
|
+
end_date: datetime | None = None
|
|
95
|
+
prerequisite_job_id: str | None = None
|
|
96
|
+
progress_message: str | None = None
|
|
97
|
+
progress_counter: int | None = None
|
|
98
|
+
sequence_length: int | None = None
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
def create(cls, obj: "Job | Response | dict", **kwargs) -> Self:
|
|
102
|
+
# parse specific child Job from base Job or Response
|
|
103
|
+
try:
|
|
104
|
+
# try to parse as subclass job
|
|
105
|
+
# get dict form
|
|
106
|
+
d = (
|
|
107
|
+
obj.json()
|
|
108
|
+
if isinstance(obj, Response)
|
|
109
|
+
else obj.model_dump() if isinstance(obj, Job) else obj
|
|
110
|
+
)
|
|
111
|
+
job_classes = Job.__subclasses__()
|
|
112
|
+
job = TypeAdapter(Union[tuple(job_classes)]).validate_python(d | kwargs) # type: ignore
|
|
113
|
+
except Exception as e:
|
|
114
|
+
job = Job.model_validate(d | kwargs)
|
|
115
|
+
return job # type: ignore - static checker cannot know runtime type
|
|
116
|
+
|
|
117
|
+
# hide extra allowed fields
|
|
118
|
+
def __repr_args__(self):
|
|
119
|
+
for k, v in self.__dict__.items():
|
|
120
|
+
field = self.model_fields.get(k)
|
|
121
|
+
if field and field.repr:
|
|
122
|
+
yield k, v
|
|
123
|
+
|
|
124
|
+
yield from (
|
|
125
|
+
(k, getattr(self, k))
|
|
126
|
+
for k, v in self.model_computed_fields.items()
|
|
127
|
+
if v.repr
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# allows to carry over subclassed job fields when factory creating
|
|
131
|
+
model_config = ConfigDict(extra="allow")
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class BatchJob(BaseModel):
|
|
135
|
+
num_records: int | None = None
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Base protein models for working with proteins."""
|
|
2
|
+
|
|
3
|
+
from openprotein.base import APISession
|
|
4
|
+
from openprotein.common import ModelMetadata
|
|
5
|
+
from openprotein.jobs import Future
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ProteinModel:
|
|
9
|
+
def __init__(
|
|
10
|
+
self,
|
|
11
|
+
session: APISession,
|
|
12
|
+
model_id: str,
|
|
13
|
+
metadata: ModelMetadata | None = None,
|
|
14
|
+
):
|
|
15
|
+
self.session = session
|
|
16
|
+
self.id = model_id
|
|
17
|
+
self._metadata = metadata
|
|
18
|
+
self.__doc__ = self.__fmt_doc()
|
|
19
|
+
|
|
20
|
+
def __fmt_doc(self):
|
|
21
|
+
summary = str(self.metadata.description.summary)
|
|
22
|
+
return f"""\t{summary}
|
|
23
|
+
\t max_sequence_length = {self.metadata.max_sequence_length}
|
|
24
|
+
\t supported outputs = {self.metadata.output_types}
|
|
25
|
+
\t supported tokens = {self.metadata.input_tokens}
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __str__(self) -> str:
|
|
29
|
+
return self.id
|
|
30
|
+
|
|
31
|
+
def __repr__(self) -> str:
|
|
32
|
+
return self.id
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def metadata(self):
|
|
36
|
+
"""
|
|
37
|
+
ModelMetadata for this model.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
ModelMetadata
|
|
42
|
+
The metadata associated with this model.
|
|
43
|
+
"""
|
|
44
|
+
if self._metadata is None:
|
|
45
|
+
self._metadata = self.get_metadata()
|
|
46
|
+
return self._metadata
|
|
47
|
+
|
|
48
|
+
def get_metadata(self) -> ModelMetadata:
|
|
49
|
+
"""
|
|
50
|
+
Get model metadata for this model.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
ModelMetadata
|
|
55
|
+
The metadata associated with this model.
|
|
56
|
+
"""
|
|
57
|
+
raise NotImplementedError("`get_metadata` not implemented for this model")
|
|
58
|
+
|
|
59
|
+
def predict(self, *args, **kwargs) -> Future:
|
|
60
|
+
"""
|
|
61
|
+
Alias for the `design` method to conform to the base ProteinModel.
|
|
62
|
+
"""
|
|
63
|
+
raise NotImplementedError("`predict` not implemented for this model")
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""RFdiffusion model for protein structure and sequence design."""
|
|
2
|
+
|
|
3
|
+
from typing import BinaryIO, Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
from openprotein.base import APISession
|
|
8
|
+
from openprotein.common import ModelMetadata
|
|
9
|
+
from openprotein.common.model_metadata import ModelDescription
|
|
10
|
+
from openprotein.jobs import Future, Job
|
|
11
|
+
from openprotein.models.base import ProteinModel
|
|
12
|
+
from openprotein.protein import Protein
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Contig(BaseModel):
|
|
16
|
+
"""Defines a contig segment for protein design."""
|
|
17
|
+
|
|
18
|
+
length: str = Field(..., description="Length range, e.g., '10-20' or '100'")
|
|
19
|
+
chain: str | None = Field(None, description="Chain to sample from")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Hotspot(BaseModel):
|
|
23
|
+
"""Specifies a hotspot residue constraint."""
|
|
24
|
+
|
|
25
|
+
res_id: str = Field(
|
|
26
|
+
..., description="Residue identifier, e.g., 'A100' for chain A, residue 100"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class RFdiffusionRequest(BaseModel):
|
|
31
|
+
"Specification for an RFdiffusion request."
|
|
32
|
+
|
|
33
|
+
n: int = 1
|
|
34
|
+
# protein: Protein
|
|
35
|
+
structure_text: str | None = None
|
|
36
|
+
# contigs: list[Contig]
|
|
37
|
+
contigs: str | None = None
|
|
38
|
+
inpaint_seq: str | None = None
|
|
39
|
+
provide_seq: str | None = None
|
|
40
|
+
# hotspots: list[Hotspot]
|
|
41
|
+
hotspot: str | None = None
|
|
42
|
+
T: int | None = None
|
|
43
|
+
partial_T: int | None = None
|
|
44
|
+
use_active_site_model: bool | None = None
|
|
45
|
+
use_beta_model: bool | None = None
|
|
46
|
+
|
|
47
|
+
# Simplified symmetry options
|
|
48
|
+
symmetry: Literal["cyclic", "dihedral", "tetrahedral"] | None = None
|
|
49
|
+
order: int | None = None
|
|
50
|
+
add_potential: bool | None = None
|
|
51
|
+
|
|
52
|
+
# Fold conditioning
|
|
53
|
+
scaffold_target_structure_text: str | None = None
|
|
54
|
+
scaffold_target_use_struct: bool = False
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class RFdiffusionJob(Job):
|
|
58
|
+
"""Job schema for an RFdiffusion request."""
|
|
59
|
+
|
|
60
|
+
job_type: Literal["/models/rfdiffusion"]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class RFdiffusionFuture(Future):
|
|
64
|
+
"""Future for handling the results of an RFdiffusion job."""
|
|
65
|
+
|
|
66
|
+
job: RFdiffusionJob
|
|
67
|
+
|
|
68
|
+
def get_pdb(self, replicate: int = 0) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Retrieve the PDB file for a specific design.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
design_index (int): The 0-based index of the design to retrieve.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
str: The content of the PDB file as a string.
|
|
77
|
+
"""
|
|
78
|
+
return _rfdiffusion_api_result_get(
|
|
79
|
+
session=self.session, job_id=self.id, replicate=replicate
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def get(self, replicate: int = 0):
|
|
83
|
+
"""Default result accessor, returns the first PDB."""
|
|
84
|
+
# TODO handle different design index
|
|
85
|
+
return self.get_pdb(replicate=replicate)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _rfdiffusion_api_post(
|
|
89
|
+
session: APISession, request: RFdiffusionRequest, **kwargs
|
|
90
|
+
) -> RFdiffusionJob:
|
|
91
|
+
"""
|
|
92
|
+
POST a request for RFdiffusion design.
|
|
93
|
+
|
|
94
|
+
Returns a Job object that can be used to retrieve results later.
|
|
95
|
+
"""
|
|
96
|
+
endpoint = "v1/design/models/rfdiffusion"
|
|
97
|
+
body = request.model_dump(exclude_none=True)
|
|
98
|
+
body.update(kwargs)
|
|
99
|
+
response = session.post(endpoint, json=body)
|
|
100
|
+
return RFdiffusionJob.model_validate(response.json())
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _rfdiffusion_api_get_metadata(session: APISession) -> ModelMetadata:
|
|
104
|
+
"""
|
|
105
|
+
POST a request for RFdiffusion design.
|
|
106
|
+
|
|
107
|
+
Returns a Job object that can be used to retrieve results later.
|
|
108
|
+
"""
|
|
109
|
+
endpoint = f"v1/design/models/rfdiffusion"
|
|
110
|
+
response = session.get(endpoint)
|
|
111
|
+
return ModelMetadata.model_validate(response.json())
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _rfdiffusion_api_result_get(
|
|
115
|
+
session: APISession, job_id: str, replicate: int = 0
|
|
116
|
+
) -> str:
|
|
117
|
+
"""
|
|
118
|
+
POST a request for RFdiffusion design.
|
|
119
|
+
|
|
120
|
+
# Returns a Job object that can be used to retrieve results later.
|
|
121
|
+
"""
|
|
122
|
+
endpoint = f"v1/design/{job_id}/results"
|
|
123
|
+
response = session.get(endpoint, params={"replicate": replicate})
|
|
124
|
+
return response.text
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class RFdiffusionModel(ProteinModel):
|
|
128
|
+
"""
|
|
129
|
+
RFdiffusion model for generating de novo protein structures.
|
|
130
|
+
|
|
131
|
+
This model supports functionalities like unconditional design, scaffolding,
|
|
132
|
+
and binder design.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
model_id: str = "rfdiffusion"
|
|
136
|
+
|
|
137
|
+
def __init__(self, session: APISession, model_id: str = "rfdiffusion"):
|
|
138
|
+
# The model_id from the API might be more specific, e.g., "rfdiffusion-v1.1"
|
|
139
|
+
super().__init__(session, model_id)
|
|
140
|
+
|
|
141
|
+
def get_metadata(self) -> ModelMetadata:
|
|
142
|
+
return ModelMetadata(
|
|
143
|
+
model_id="rfdiffusion",
|
|
144
|
+
description=ModelDescription(summary="RFdiffusion"),
|
|
145
|
+
dimension=0,
|
|
146
|
+
output_types=["pdb"],
|
|
147
|
+
input_tokens=[],
|
|
148
|
+
token_descriptions=[[]],
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def generate(
|
|
152
|
+
self,
|
|
153
|
+
n: int = 1,
|
|
154
|
+
structure_file: str | bytes | BinaryIO | None = None,
|
|
155
|
+
contigs: int | str | None = None,
|
|
156
|
+
inpaint_seq: str | None = None,
|
|
157
|
+
provide_seq: str | None = None,
|
|
158
|
+
hotspot: str | None = None,
|
|
159
|
+
T: int | None = None,
|
|
160
|
+
partial_T: int | None = None,
|
|
161
|
+
use_active_site_model: bool | None = None,
|
|
162
|
+
use_beta_model: bool | None = None,
|
|
163
|
+
# Symmetry options
|
|
164
|
+
symmetry: Literal["cyclic", "dihedral", "tetrahedral"] | None = None,
|
|
165
|
+
order: int | None = None,
|
|
166
|
+
add_potential: bool | None = None,
|
|
167
|
+
# Fold conditioning
|
|
168
|
+
scaffold_target_structure_file: str | bytes | BinaryIO | None = None,
|
|
169
|
+
scaffold_target_use_struct: bool = False,
|
|
170
|
+
**kwargs,
|
|
171
|
+
) -> RFdiffusionFuture:
|
|
172
|
+
"""
|
|
173
|
+
Run a protein structure generate job using RFdiffusion.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
n : int, optional
|
|
178
|
+
The number of unique design trajectories to run (default is 1).
|
|
179
|
+
structure_file : BinaryIO, optional
|
|
180
|
+
An input PDB file (as a file-like object) used for inpainting or other
|
|
181
|
+
guided design tasks where parts of an existing structure are provided.
|
|
182
|
+
contigs : int, str, optional
|
|
183
|
+
Defines the lengths and connectivity of chain segments for the desired
|
|
184
|
+
structure, specified in RFdiffusion's contig string format.
|
|
185
|
+
Required for most design tasks. Example: 150, '10-20/A100-110/10-20' for a
|
|
186
|
+
binder design.
|
|
187
|
+
inpaint_seq : str, optional
|
|
188
|
+
A string specifying the regions in the input structure to mask for
|
|
189
|
+
in-painting. Example: 'A1-A10/A30-40'.
|
|
190
|
+
provide_seq : str, optional
|
|
191
|
+
A string specifying which segments of the contig have a provided
|
|
192
|
+
sequence. Example: 'A1-A10/A30-40'.
|
|
193
|
+
hotspot : str, optional
|
|
194
|
+
A string specifying hotspot residues to constrain during design,
|
|
195
|
+
typically for functional sites. Example: 'A10,A12,A14'.
|
|
196
|
+
T : int, optional
|
|
197
|
+
The number of timesteps for the diffusion process.
|
|
198
|
+
partial_T : int, optional
|
|
199
|
+
The number of timesteps for partial diffusion.
|
|
200
|
+
use_active_site_model : bool, optional
|
|
201
|
+
If True, uses the active site model checkpoint, which has been finetuned to
|
|
202
|
+
better keep very small motifs in place in the output for motif scaffolding
|
|
203
|
+
(default is False).
|
|
204
|
+
use_beta_model : bool, optional
|
|
205
|
+
If True, uses the complex beta model checkpoint, which generates a
|
|
206
|
+
greater diversity of topologies but has not been extensively
|
|
207
|
+
experimentally validated (default is False).
|
|
208
|
+
symmetry : {"cyclic", "dihedral", "tetrahedral"}, optional
|
|
209
|
+
The type of symmetry to apply to the design.
|
|
210
|
+
order : int, optional
|
|
211
|
+
The order of the symmetry (e.g., 3 for C3 or D3 symmetry).
|
|
212
|
+
Must be provided if `symmetry` is set.
|
|
213
|
+
add_potential : bool, optional
|
|
214
|
+
A flag to toggle an additional potential to guide the design.
|
|
215
|
+
This defaults to true in the case of symmetric design.
|
|
216
|
+
scaffold_target_structure_file : str, bytes, BinaryIO, optional
|
|
217
|
+
A PDB file (which can be the text string or bytes or the file-like
|
|
218
|
+
object) containing a scaffold structure to be used as a structural
|
|
219
|
+
guide. It could also be used as a target when doing scaffold guided
|
|
220
|
+
binder design with `scaffold_target_use_struct`.
|
|
221
|
+
scaffold_target_use_struct : bool, optional
|
|
222
|
+
Whether or not to use the provided scaffold structure as a target.
|
|
223
|
+
Otherwise, it is used only as a topology guide.
|
|
224
|
+
|
|
225
|
+
Other Parameters
|
|
226
|
+
----------------
|
|
227
|
+
**kwargs : dict
|
|
228
|
+
Additional keyword args that are passed directly to the rfdiffusion
|
|
229
|
+
inference script. Overwrites any preceding options.
|
|
230
|
+
|
|
231
|
+
Returns
|
|
232
|
+
-------
|
|
233
|
+
RFdiffusionFuture
|
|
234
|
+
A future object that can be used to retrieve the results of the design
|
|
235
|
+
job upon completion.
|
|
236
|
+
"""
|
|
237
|
+
if isinstance(contigs, int):
|
|
238
|
+
contigs = f"{contigs}-{contigs}"
|
|
239
|
+
request = RFdiffusionRequest(
|
|
240
|
+
n=n,
|
|
241
|
+
contigs=contigs,
|
|
242
|
+
inpaint_seq=inpaint_seq,
|
|
243
|
+
provide_seq=provide_seq,
|
|
244
|
+
hotspot=hotspot,
|
|
245
|
+
T=T,
|
|
246
|
+
partial_T=partial_T,
|
|
247
|
+
use_active_site_model=use_active_site_model,
|
|
248
|
+
use_beta_model=use_beta_model,
|
|
249
|
+
symmetry=symmetry,
|
|
250
|
+
order=order,
|
|
251
|
+
add_potential=add_potential,
|
|
252
|
+
scaffold_target_use_struct=scaffold_target_use_struct,
|
|
253
|
+
)
|
|
254
|
+
if structure_file is not None:
|
|
255
|
+
if isinstance(structure_file, bytes):
|
|
256
|
+
structure_text = structure_file.decode()
|
|
257
|
+
elif isinstance(structure_file, str):
|
|
258
|
+
structure_text = structure_file
|
|
259
|
+
else:
|
|
260
|
+
structure_text = structure_file.read().decode()
|
|
261
|
+
request.structure_text = structure_text
|
|
262
|
+
if scaffold_target_structure_file is not None:
|
|
263
|
+
if isinstance(scaffold_target_structure_file, bytes):
|
|
264
|
+
scaffold_target_structure_text = scaffold_target_structure_file.decode()
|
|
265
|
+
elif isinstance(scaffold_target_structure_file, str):
|
|
266
|
+
scaffold_target_structure_text = scaffold_target_structure_file
|
|
267
|
+
else:
|
|
268
|
+
scaffold_target_structure_text = (
|
|
269
|
+
scaffold_target_structure_file.read().decode()
|
|
270
|
+
)
|
|
271
|
+
request.scaffold_target_structure_text = scaffold_target_structure_text
|
|
272
|
+
|
|
273
|
+
# Submit the job via the private API function
|
|
274
|
+
job = _rfdiffusion_api_post(
|
|
275
|
+
session=self.session,
|
|
276
|
+
request=request,
|
|
277
|
+
**kwargs,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Return the future object
|
|
281
|
+
return RFdiffusionFuture(session=self.session, job=job)
|
|
282
|
+
|
|
283
|
+
predict = generate
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""The ModelsAPI class, providing access to all protein models."""
|
|
2
|
+
|
|
3
|
+
from openprotein.base import APISession
|
|
4
|
+
|
|
5
|
+
from .foundation.rfdiffusion import RFdiffusionModel
|
|
6
|
+
|
|
7
|
+
# In the future, we would import other models here:
|
|
8
|
+
# from .foundation.esm import ESMModel
|
|
9
|
+
# from .foundation.alphafold import AlphaFoldModel
|
|
10
|
+
# from .custom.gp import GaussianProcessModel
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ModelsAPI:
|
|
14
|
+
"""
|
|
15
|
+
API-like accessor that groups all available protein models.
|
|
16
|
+
|
|
17
|
+
This class is attached to the main APISession and provides a single,
|
|
18
|
+
consistent entry point for accessing various models.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, session: APISession):
|
|
22
|
+
"""
|
|
23
|
+
Initializes the ModelsAPI and attaches instances of all available models.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
session: The active APISession to be used by the models for API calls.
|
|
27
|
+
"""
|
|
28
|
+
self.rfdiffusion = RFdiffusionModel(session)
|
|
29
|
+
|
|
30
|
+
# To add new models, you would simply instantiate them here:
|
|
31
|
+
# self.esm = ESMModel(session)
|
|
32
|
+
# self.alphafold = AlphaFoldModel(session)
|
|
33
|
+
# self.gp = GaussianProcessModel(session)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Predictor module for training predictors on OpenProtein.
|
|
3
|
+
|
|
4
|
+
isort:skip_file
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .schemas import (
|
|
8
|
+
Kernel,
|
|
9
|
+
Constraints,
|
|
10
|
+
Features,
|
|
11
|
+
Dataset,
|
|
12
|
+
PredictorMetadata,
|
|
13
|
+
PredictorType,
|
|
14
|
+
PredictorArgs,
|
|
15
|
+
PredictJob,
|
|
16
|
+
PredictMultiJob,
|
|
17
|
+
PredictMultiSingleSiteJob,
|
|
18
|
+
PredictSingleSiteJob,
|
|
19
|
+
PredictorTrainJob,
|
|
20
|
+
PredictorEnsembleJob,
|
|
21
|
+
PredictorCVJob,
|
|
22
|
+
)
|
|
23
|
+
from .models import PredictorModel
|
|
24
|
+
from .prediction import PredictionResultFuture
|
|
25
|
+
from .predictor import PredictorAPI
|