openprotein-python 0.8.2__1-py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openprotein/__init__.py +164 -0
- openprotein/_version.py +48 -0
- openprotein/align/__init__.py +8 -0
- openprotein/align/align.py +395 -0
- openprotein/align/api.py +428 -0
- openprotein/align/future.py +55 -0
- openprotein/align/msa.py +129 -0
- openprotein/align/schemas.py +165 -0
- openprotein/base.py +181 -0
- openprotein/chains.py +88 -0
- openprotein/common/__init__.py +5 -0
- openprotein/common/features.py +7 -0
- openprotein/common/model_metadata.py +33 -0
- openprotein/common/reduction.py +8 -0
- openprotein/config.py +9 -0
- openprotein/csv.py +31 -0
- openprotein/data/__init__.py +9 -0
- openprotein/data/api.py +218 -0
- openprotein/data/assaydataset.py +178 -0
- openprotein/data/data.py +93 -0
- openprotein/data/schemas.py +27 -0
- openprotein/design/__init__.py +16 -0
- openprotein/design/api.py +259 -0
- openprotein/design/design.py +125 -0
- openprotein/design/future.py +146 -0
- openprotein/design/schemas.py +607 -0
- openprotein/embeddings/__init__.py +27 -0
- openprotein/embeddings/api.py +619 -0
- openprotein/embeddings/embeddings.py +151 -0
- openprotein/embeddings/esm.py +33 -0
- openprotein/embeddings/future.py +146 -0
- openprotein/embeddings/models.py +421 -0
- openprotein/embeddings/openprotein.py +21 -0
- openprotein/embeddings/poet.py +446 -0
- openprotein/embeddings/poet2.py +505 -0
- openprotein/embeddings/schemas.py +78 -0
- openprotein/errors.py +76 -0
- openprotein/fasta.py +92 -0
- openprotein/fold/__init__.py +21 -0
- openprotein/fold/alphafold2.py +131 -0
- openprotein/fold/api.py +287 -0
- openprotein/fold/boltz.py +691 -0
- openprotein/fold/esmfold.py +54 -0
- openprotein/fold/fold.py +107 -0
- openprotein/fold/future.py +509 -0
- openprotein/fold/models.py +139 -0
- openprotein/fold/schemas.py +39 -0
- openprotein/jobs/__init__.py +9 -0
- openprotein/jobs/api.py +71 -0
- openprotein/jobs/futures.py +746 -0
- openprotein/jobs/jobs.py +69 -0
- openprotein/jobs/schemas.py +135 -0
- openprotein/models/__init__.py +4 -0
- openprotein/models/base.py +63 -0
- openprotein/models/foundation/rfdiffusion.py +283 -0
- openprotein/models/models.py +33 -0
- openprotein/predictor/__init__.py +25 -0
- openprotein/predictor/api.py +384 -0
- openprotein/predictor/models.py +374 -0
- openprotein/predictor/prediction.py +79 -0
- openprotein/predictor/predictor.py +242 -0
- openprotein/predictor/schemas.py +113 -0
- openprotein/predictor/validate.py +40 -0
- openprotein/prompt/__init__.py +9 -0
- openprotein/prompt/api.py +505 -0
- openprotein/prompt/models.py +142 -0
- openprotein/prompt/prompt.py +130 -0
- openprotein/prompt/schemas.py +49 -0
- openprotein/protein.py +587 -0
- openprotein/svd/__init__.py +9 -0
- openprotein/svd/api.py +206 -0
- openprotein/svd/models.py +288 -0
- openprotein/svd/schemas.py +31 -0
- openprotein/svd/svd.py +134 -0
- openprotein/umap/__init__.py +9 -0
- openprotein/umap/api.py +259 -0
- openprotein/umap/models.py +211 -0
- openprotein/umap/schemas.py +35 -0
- openprotein/umap/umap.py +175 -0
- openprotein/utils/uuid.py +29 -0
- openprotein_python-0.8.2.dist-info/METADATA +176 -0
- openprotein_python-0.8.2.dist-info/RECORD +84 -0
- openprotein_python-0.8.2.dist-info/WHEEL +4 -0
- openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from openprotein.base import APISession
|
|
2
|
+
from openprotein.jobs import Future, JobsAPI
|
|
3
|
+
from openprotein.protein import Protein
|
|
4
|
+
|
|
5
|
+
from . import api
|
|
6
|
+
from .schemas import PromptJob, PromptMetadata, QueryMetadata
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Prompt(Future):
|
|
10
|
+
"""Prompt which contains a set of sequences and/or structures used to condition the PoET models."""
|
|
11
|
+
|
|
12
|
+
metadata: PromptMetadata
|
|
13
|
+
session: APISession
|
|
14
|
+
job: PromptJob | None
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
session: APISession,
|
|
19
|
+
job: PromptJob | None = None,
|
|
20
|
+
metadata: PromptMetadata | None = None,
|
|
21
|
+
num_replicates: int | None = None,
|
|
22
|
+
):
|
|
23
|
+
"""
|
|
24
|
+
Initialize a new Prompt instance.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
session : APISession
|
|
29
|
+
An APISession object used for interacting with the API.
|
|
30
|
+
job: PromptJob | None
|
|
31
|
+
A PromptJob containing information about the optional prompt job.
|
|
32
|
+
metadata : PromptMetadata
|
|
33
|
+
A PromptMetadata object containing metadata for the prompt.
|
|
34
|
+
"""
|
|
35
|
+
"""Initializes with either job get or svd metadata get."""
|
|
36
|
+
if metadata is None:
|
|
37
|
+
# use job to fetch metadata
|
|
38
|
+
if job is None:
|
|
39
|
+
raise ValueError("Expected prompt metadata or job")
|
|
40
|
+
# if no num_replicates, we need an api call to get the info
|
|
41
|
+
if num_replicates is None:
|
|
42
|
+
metadata = api.get_prompt_metadata(
|
|
43
|
+
session=session, prompt_id=job.job_id
|
|
44
|
+
)
|
|
45
|
+
# else we can just build the metadata from the job
|
|
46
|
+
else:
|
|
47
|
+
metadata = PromptMetadata(
|
|
48
|
+
id=job.job_id,
|
|
49
|
+
name=job.job_id,
|
|
50
|
+
description=None,
|
|
51
|
+
created_date=job.created_date,
|
|
52
|
+
num_replicates=num_replicates,
|
|
53
|
+
job_id=job.job_id,
|
|
54
|
+
status=job.status,
|
|
55
|
+
)
|
|
56
|
+
self.metadata = metadata
|
|
57
|
+
self.session = session
|
|
58
|
+
if self.metadata.job_id is not None:
|
|
59
|
+
jobs_api = getattr(session, "jobs", None)
|
|
60
|
+
assert isinstance(jobs_api, JobsAPI)
|
|
61
|
+
job = PromptJob.create(jobs_api.get_job(job_id=self.metadata.job_id))
|
|
62
|
+
super().__init__(session, job)
|
|
63
|
+
|
|
64
|
+
def __str__(self) -> str:
|
|
65
|
+
return str(self.metadata)
|
|
66
|
+
|
|
67
|
+
def __repr__(self) -> str:
|
|
68
|
+
return repr(self.metadata)
|
|
69
|
+
|
|
70
|
+
def get(self) -> list[list[Protein]]:
|
|
71
|
+
context = api.get_prompt(session=self.session, prompt_id=str(self.id))
|
|
72
|
+
return context
|
|
73
|
+
|
|
74
|
+
def _wait_job(self, **kwargs):
|
|
75
|
+
if self.job is None:
|
|
76
|
+
return None
|
|
77
|
+
return super()._wait_job(**kwargs)
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def id(self):
|
|
81
|
+
return self.metadata.id
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def name(self):
|
|
85
|
+
return self.metadata.name
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def description(self):
|
|
89
|
+
return self.metadata.description
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def created_date(self):
|
|
93
|
+
return self.metadata.created_date
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def num_replicates(self):
|
|
97
|
+
return self.metadata.num_replicates
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def status(self):
|
|
101
|
+
if self.job is not None:
|
|
102
|
+
return super().status
|
|
103
|
+
return self.metadata.status
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class Query:
|
|
107
|
+
"""Query containing a sequence/structure used to query the PoET-2 model which opens up new workflows."""
|
|
108
|
+
|
|
109
|
+
metadata: QueryMetadata
|
|
110
|
+
session: APISession
|
|
111
|
+
|
|
112
|
+
def __init__(self, session: APISession, metadata: QueryMetadata):
|
|
113
|
+
"""
|
|
114
|
+
Initialize a new Query instance.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
session : APISession
|
|
119
|
+
An APISession object used for interacting with the API.
|
|
120
|
+
metadata : QueryMetadata
|
|
121
|
+
A QueryMetadata object containing metadata for the query.
|
|
122
|
+
"""
|
|
123
|
+
self.metadata = metadata
|
|
124
|
+
self.session = session
|
|
125
|
+
|
|
126
|
+
def __str__(self) -> str:
|
|
127
|
+
return str(self.metadata)
|
|
128
|
+
|
|
129
|
+
def __repr__(self) -> str:
|
|
130
|
+
return repr(self.metadata)
|
|
131
|
+
|
|
132
|
+
def get(self) -> Protein:
|
|
133
|
+
query = api.get_query(session=self.session, query_id=str(self.id))
|
|
134
|
+
return query
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def id(self):
|
|
138
|
+
return self.metadata.id
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def created_date(self):
|
|
142
|
+
return self.metadata.created_date
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Prompt API providing the interface to create prompts for use with PoET models."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Sequence
|
|
4
|
+
|
|
5
|
+
from openprotein.base import APISession
|
|
6
|
+
from openprotein.protein import Protein
|
|
7
|
+
|
|
8
|
+
from . import api
|
|
9
|
+
from .models import Prompt, Query
|
|
10
|
+
from .schemas import Context
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PromptAPI:
|
|
14
|
+
"""Prompt API providing the interface to create prompts for use with PoET models."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, session: APISession):
|
|
17
|
+
self.session = session
|
|
18
|
+
|
|
19
|
+
def create_prompt(
|
|
20
|
+
self,
|
|
21
|
+
context: Context | Sequence[Context],
|
|
22
|
+
name: str | None = None,
|
|
23
|
+
description: str | None = None,
|
|
24
|
+
) -> Prompt:
|
|
25
|
+
"""
|
|
26
|
+
Create a prompt.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
context : Context | Sequence[Context]
|
|
31
|
+
context or list of contexts, where each context is a Sequence of str,
|
|
32
|
+
bytes, and/or Protein
|
|
33
|
+
query : Optional[bytes | str | Protein]
|
|
34
|
+
Optional query provided as sequence/structure
|
|
35
|
+
name : str
|
|
36
|
+
Name of the prompt.
|
|
37
|
+
description : Optional[str]
|
|
38
|
+
Description of the prompt.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
PromptMetadata
|
|
43
|
+
Metadata of the created prompt.
|
|
44
|
+
"""
|
|
45
|
+
return Prompt(
|
|
46
|
+
session=self.session,
|
|
47
|
+
metadata=api.create_prompt(
|
|
48
|
+
session=self.session,
|
|
49
|
+
context=context,
|
|
50
|
+
name=name,
|
|
51
|
+
description=description,
|
|
52
|
+
),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def get_prompt(self, prompt_id: str) -> Prompt:
|
|
56
|
+
"""
|
|
57
|
+
Get the prompt for a given prompt ID.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
prompt_id : str
|
|
62
|
+
The prompt ID.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
BinaryIO
|
|
67
|
+
The prompt data in binary format.
|
|
68
|
+
"""
|
|
69
|
+
return Prompt(
|
|
70
|
+
session=self.session,
|
|
71
|
+
metadata=api.get_prompt_metadata(session=self.session, prompt_id=prompt_id),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def list_prompts(self) -> List[Prompt]:
|
|
75
|
+
"""
|
|
76
|
+
List all prompts.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
List[PromptMetadata]
|
|
81
|
+
List of prompt metadata.
|
|
82
|
+
"""
|
|
83
|
+
return [
|
|
84
|
+
Prompt(session=self.session, metadata=p)
|
|
85
|
+
for p in api.list_prompts(session=self.session)
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
def create_query(
|
|
89
|
+
self,
|
|
90
|
+
query: str | bytes | Protein,
|
|
91
|
+
) -> Query:
|
|
92
|
+
"""
|
|
93
|
+
Create a query.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
query : Optional[bytes | str | Protein]
|
|
98
|
+
Optional query provided as sequence/structure
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
QueryMetadata
|
|
103
|
+
Metadata of the created query.
|
|
104
|
+
"""
|
|
105
|
+
return Query(
|
|
106
|
+
session=self.session,
|
|
107
|
+
metadata=api.create_query(
|
|
108
|
+
session=self.session,
|
|
109
|
+
query=query,
|
|
110
|
+
),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def get_query(self, query_id: str) -> Query:
|
|
114
|
+
"""
|
|
115
|
+
Get the query for a given query ID.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
query_id : str
|
|
120
|
+
The query ID.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
BinaryIO
|
|
125
|
+
The query data in binary format.
|
|
126
|
+
"""
|
|
127
|
+
return Query(
|
|
128
|
+
session=self.session,
|
|
129
|
+
metadata=api.get_query_metadata(session=self.session, query_id=query_id),
|
|
130
|
+
)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Literal, Sequence
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from openprotein.jobs import Job, JobStatus, JobType
|
|
7
|
+
from openprotein.protein import Protein
|
|
8
|
+
|
|
9
|
+
Context = Sequence[bytes | str | Protein]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PromptJob(Job):
|
|
13
|
+
"""A representation of a prompt job."""
|
|
14
|
+
|
|
15
|
+
job_type: Literal[JobType.align_prompt]
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def msa_id(self):
|
|
19
|
+
"""ID of the underlying MSA."""
|
|
20
|
+
return self.msa_id
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def prompt_id(self):
|
|
24
|
+
"""Prompt ID."""
|
|
25
|
+
return self.job_id
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PromptMetadata(BaseModel):
|
|
29
|
+
"""Metadata about a prompt."""
|
|
30
|
+
|
|
31
|
+
id: str = Field(description="Prompt unique identifier.")
|
|
32
|
+
name: str = Field(description="Name of the prompt")
|
|
33
|
+
description: str | None = Field(
|
|
34
|
+
None,
|
|
35
|
+
description="Description of the prompt",
|
|
36
|
+
)
|
|
37
|
+
created_date: datetime = Field(description="The date the prompt was created.")
|
|
38
|
+
num_replicates: int = Field(description="Number of replicates provided as context.")
|
|
39
|
+
job_id: str | None = Field(
|
|
40
|
+
None, description="The job_id of the sampling job, if it exists."
|
|
41
|
+
)
|
|
42
|
+
status: JobStatus = Field(description="The status of the prompt.")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class QueryMetadata(BaseModel):
|
|
46
|
+
"""Metadata about a query."""
|
|
47
|
+
|
|
48
|
+
id: str = Field(description="Query unique identifier.")
|
|
49
|
+
created_date: datetime = Field(description="The date the query was created.")
|