openprotein-python 0.8.2__1-py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openprotein/__init__.py +164 -0
- openprotein/_version.py +48 -0
- openprotein/align/__init__.py +8 -0
- openprotein/align/align.py +395 -0
- openprotein/align/api.py +428 -0
- openprotein/align/future.py +55 -0
- openprotein/align/msa.py +129 -0
- openprotein/align/schemas.py +165 -0
- openprotein/base.py +181 -0
- openprotein/chains.py +88 -0
- openprotein/common/__init__.py +5 -0
- openprotein/common/features.py +7 -0
- openprotein/common/model_metadata.py +33 -0
- openprotein/common/reduction.py +8 -0
- openprotein/config.py +9 -0
- openprotein/csv.py +31 -0
- openprotein/data/__init__.py +9 -0
- openprotein/data/api.py +218 -0
- openprotein/data/assaydataset.py +178 -0
- openprotein/data/data.py +93 -0
- openprotein/data/schemas.py +27 -0
- openprotein/design/__init__.py +16 -0
- openprotein/design/api.py +259 -0
- openprotein/design/design.py +125 -0
- openprotein/design/future.py +146 -0
- openprotein/design/schemas.py +607 -0
- openprotein/embeddings/__init__.py +27 -0
- openprotein/embeddings/api.py +619 -0
- openprotein/embeddings/embeddings.py +151 -0
- openprotein/embeddings/esm.py +33 -0
- openprotein/embeddings/future.py +146 -0
- openprotein/embeddings/models.py +421 -0
- openprotein/embeddings/openprotein.py +21 -0
- openprotein/embeddings/poet.py +446 -0
- openprotein/embeddings/poet2.py +505 -0
- openprotein/embeddings/schemas.py +78 -0
- openprotein/errors.py +76 -0
- openprotein/fasta.py +92 -0
- openprotein/fold/__init__.py +21 -0
- openprotein/fold/alphafold2.py +131 -0
- openprotein/fold/api.py +287 -0
- openprotein/fold/boltz.py +691 -0
- openprotein/fold/esmfold.py +54 -0
- openprotein/fold/fold.py +107 -0
- openprotein/fold/future.py +509 -0
- openprotein/fold/models.py +139 -0
- openprotein/fold/schemas.py +39 -0
- openprotein/jobs/__init__.py +9 -0
- openprotein/jobs/api.py +71 -0
- openprotein/jobs/futures.py +746 -0
- openprotein/jobs/jobs.py +69 -0
- openprotein/jobs/schemas.py +135 -0
- openprotein/models/__init__.py +4 -0
- openprotein/models/base.py +63 -0
- openprotein/models/foundation/rfdiffusion.py +283 -0
- openprotein/models/models.py +33 -0
- openprotein/predictor/__init__.py +25 -0
- openprotein/predictor/api.py +384 -0
- openprotein/predictor/models.py +374 -0
- openprotein/predictor/prediction.py +79 -0
- openprotein/predictor/predictor.py +242 -0
- openprotein/predictor/schemas.py +113 -0
- openprotein/predictor/validate.py +40 -0
- openprotein/prompt/__init__.py +9 -0
- openprotein/prompt/api.py +505 -0
- openprotein/prompt/models.py +142 -0
- openprotein/prompt/prompt.py +130 -0
- openprotein/prompt/schemas.py +49 -0
- openprotein/protein.py +587 -0
- openprotein/svd/__init__.py +9 -0
- openprotein/svd/api.py +206 -0
- openprotein/svd/models.py +288 -0
- openprotein/svd/schemas.py +31 -0
- openprotein/svd/svd.py +134 -0
- openprotein/umap/__init__.py +9 -0
- openprotein/umap/api.py +259 -0
- openprotein/umap/models.py +211 -0
- openprotein/umap/schemas.py +35 -0
- openprotein/umap/umap.py +175 -0
- openprotein/utils/uuid.py +29 -0
- openprotein_python-0.8.2.dist-info/METADATA +176 -0
- openprotein_python-0.8.2.dist-info/RECORD +84 -0
- openprotein_python-0.8.2.dist-info/WHEEL +4 -0
- openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0
openprotein/__init__.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenProtein Python client.
|
|
3
|
+
|
|
4
|
+
A pythonic interface for interacting with our cutting-edge protein engineering platform.
|
|
5
|
+
|
|
6
|
+
isort:skip_file
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
from openprotein._version import __version__
|
|
13
|
+
from openprotein.data import DataAPI
|
|
14
|
+
from openprotein.errors import DeprecationError
|
|
15
|
+
from openprotein.jobs import JobsAPI
|
|
16
|
+
from openprotein.align import AlignAPI
|
|
17
|
+
from openprotein.prompt import PromptAPI
|
|
18
|
+
from openprotein.embeddings import EmbeddingsAPI
|
|
19
|
+
from openprotein.fold import FoldAPI
|
|
20
|
+
from openprotein.models import ModelsAPI
|
|
21
|
+
from openprotein.svd import SVDAPI
|
|
22
|
+
from openprotein.umap import UMAPAPI
|
|
23
|
+
from openprotein.predictor import PredictorAPI
|
|
24
|
+
from openprotein.design import DesignAPI
|
|
25
|
+
from openprotein.jobs import Future
|
|
26
|
+
from openprotein.base import APISession
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class OpenProtein(APISession):
|
|
30
|
+
"""
|
|
31
|
+
The base class for accessing OpenProtein API functionality.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
_data = None
|
|
35
|
+
_jobs = None
|
|
36
|
+
_align = None
|
|
37
|
+
_prompt = None
|
|
38
|
+
_embeddings = None
|
|
39
|
+
_svd = None
|
|
40
|
+
_umap = None
|
|
41
|
+
_fold = None
|
|
42
|
+
_predictor = None
|
|
43
|
+
_design = None
|
|
44
|
+
_models = None
|
|
45
|
+
|
|
46
|
+
def wait(self, future: Future, *args, **kwargs):
|
|
47
|
+
return future.wait(*args, **kwargs)
|
|
48
|
+
|
|
49
|
+
wait_until_done = wait
|
|
50
|
+
|
|
51
|
+
def load_job(self, job_id):
|
|
52
|
+
return self.jobs.get(job_id=job_id)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def data(self) -> DataAPI:
|
|
56
|
+
"""
|
|
57
|
+
The data submodule gives access to functionality for uploading and accessing user data.
|
|
58
|
+
"""
|
|
59
|
+
if self._data is None:
|
|
60
|
+
self._data = DataAPI(self)
|
|
61
|
+
return self._data
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def jobs(self) -> JobsAPI:
|
|
65
|
+
"""
|
|
66
|
+
The jobs submodule gives access to functionality for listing jobs and checking their status.
|
|
67
|
+
"""
|
|
68
|
+
if self._jobs is None:
|
|
69
|
+
self._jobs = JobsAPI(self)
|
|
70
|
+
return self._jobs
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def align(self) -> AlignAPI:
|
|
74
|
+
"""
|
|
75
|
+
The Align submodule gives access to the sequence alignment capabilities by building MSAs and prompts that can be used with PoET.
|
|
76
|
+
"""
|
|
77
|
+
if self._align is None:
|
|
78
|
+
self._align = AlignAPI(self)
|
|
79
|
+
return self._align
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def prompt(self) -> PromptAPI:
|
|
83
|
+
"""
|
|
84
|
+
The Align submodule gives access to the sequence alignment capabilities by building MSAs and prompts that can be used with PoET.
|
|
85
|
+
"""
|
|
86
|
+
if self._prompt is None:
|
|
87
|
+
self._prompt = PromptAPI(self)
|
|
88
|
+
return self._prompt
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def embedding(self) -> EmbeddingsAPI:
|
|
92
|
+
"""
|
|
93
|
+
The embedding submodule gives access to protein embedding models and their inference endpoints.
|
|
94
|
+
"""
|
|
95
|
+
if self._embeddings is None:
|
|
96
|
+
self._embeddings = EmbeddingsAPI(self)
|
|
97
|
+
return self._embeddings
|
|
98
|
+
|
|
99
|
+
embeddings = embedding
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def svd(self) -> SVDAPI:
|
|
103
|
+
"""
|
|
104
|
+
The embedding submodule gives access to protein embedding models and their inference endpoints.
|
|
105
|
+
"""
|
|
106
|
+
if self._svd is None:
|
|
107
|
+
self._svd = SVDAPI(
|
|
108
|
+
session=self,
|
|
109
|
+
)
|
|
110
|
+
return self._svd
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def umap(self) -> UMAPAPI:
|
|
114
|
+
"""
|
|
115
|
+
The embedding submodule gives access to protein embedding models and their inference endpoints.
|
|
116
|
+
"""
|
|
117
|
+
if self._umap is None:
|
|
118
|
+
self._umap = UMAPAPI(
|
|
119
|
+
session=self,
|
|
120
|
+
)
|
|
121
|
+
return self._umap
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def predictor(self) -> PredictorAPI:
|
|
125
|
+
"""
|
|
126
|
+
The predictor submodule gives access to training and predicting with predictors built on top of embeddings.
|
|
127
|
+
"""
|
|
128
|
+
if self._predictor is None:
|
|
129
|
+
self._predictor = PredictorAPI(
|
|
130
|
+
session=self,
|
|
131
|
+
)
|
|
132
|
+
return self._predictor
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def design(self) -> DesignAPI:
|
|
136
|
+
"""
|
|
137
|
+
The designer submodule gives access to functionality for designing new sequences using models from predictor train.
|
|
138
|
+
"""
|
|
139
|
+
if self._design is None:
|
|
140
|
+
self._design = DesignAPI(
|
|
141
|
+
session=self,
|
|
142
|
+
)
|
|
143
|
+
return self._design
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def fold(self) -> FoldAPI:
|
|
147
|
+
"""
|
|
148
|
+
The fold submodule gives access to functionality for folding sequences and returning PDBs.
|
|
149
|
+
"""
|
|
150
|
+
if self._fold is None:
|
|
151
|
+
self._fold = FoldAPI(self)
|
|
152
|
+
return self._fold
|
|
153
|
+
|
|
154
|
+
@property
|
|
155
|
+
def models(self) -> "ModelsAPI":
|
|
156
|
+
"""
|
|
157
|
+
The models submodule provides a unified entry point to all protein models.
|
|
158
|
+
"""
|
|
159
|
+
if self._models is None:
|
|
160
|
+
self._models = ModelsAPI(self)
|
|
161
|
+
return self._models
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
connect = OpenProtein
|
openprotein/_version.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Compute the version number and store it in the `__version__` variable.
|
|
2
|
+
|
|
3
|
+
Based on <https://github.com/maresb/hatch-vcs-footgun-example>.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _get_hatch_version():
|
|
8
|
+
"""Compute the most up-to-date version number in a development environment.
|
|
9
|
+
|
|
10
|
+
Returns `None` if Hatchling is not installed, e.g. in a production environment.
|
|
11
|
+
|
|
12
|
+
For more details, see <https://github.com/maresb/hatch-vcs-footgun-example/>.
|
|
13
|
+
"""
|
|
14
|
+
import os
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from hatchling.metadata.core import ProjectMetadata
|
|
18
|
+
from hatchling.plugin.manager import PluginManager
|
|
19
|
+
from hatchling.utils.fs import locate_file
|
|
20
|
+
except ImportError:
|
|
21
|
+
# Hatchling is not installed, so probably we are not in
|
|
22
|
+
# a development environment.
|
|
23
|
+
return None
|
|
24
|
+
|
|
25
|
+
pyproject_toml = locate_file(__file__, "pyproject.toml")
|
|
26
|
+
if pyproject_toml is None:
|
|
27
|
+
raise RuntimeError("pyproject.toml not found although hatchling is installed")
|
|
28
|
+
root = os.path.dirname(pyproject_toml)
|
|
29
|
+
metadata = ProjectMetadata(root=root, plugin_manager=PluginManager())
|
|
30
|
+
# Version can be either statically set in pyproject.toml or computed dynamically:
|
|
31
|
+
return metadata.core.version or metadata.hatch.version.cached
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _get_importlib_metadata_version():
|
|
35
|
+
"""Compute the version number using importlib.metadata.
|
|
36
|
+
|
|
37
|
+
This is the official Pythonic way to get the version number of an installed
|
|
38
|
+
package. However, it is only updated when a package is installed. Thus, if a
|
|
39
|
+
package is installed in editable mode, and a different version is checked out,
|
|
40
|
+
then the version number will not be updated.
|
|
41
|
+
"""
|
|
42
|
+
from importlib.metadata import version
|
|
43
|
+
|
|
44
|
+
__version__ = version(__package__) # type: ignore
|
|
45
|
+
return __version__
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
__version__ = _get_hatch_version() or _get_importlib_metadata_version()
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
"""Align API interface for creating alignments and MSAs (multiple sequence alignments) which can be used for other protein tasks."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
from typing import BinaryIO, Iterator
|
|
6
|
+
|
|
7
|
+
from openprotein.base import APISession
|
|
8
|
+
from openprotein.errors import DeprecationError
|
|
9
|
+
from openprotein.jobs import Job
|
|
10
|
+
from openprotein.protein import Protein
|
|
11
|
+
|
|
12
|
+
from . import api
|
|
13
|
+
from .msa import MSAFuture
|
|
14
|
+
from .schemas import AbNumberScheme, AlignType
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AlignAPI:
|
|
18
|
+
"""Align API interface for creating alignments and MSAs (multiple sequence alignments) which can be used for other protein tasks."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, session: APISession):
|
|
21
|
+
self.session = session
|
|
22
|
+
|
|
23
|
+
def mafft(
|
|
24
|
+
self,
|
|
25
|
+
sequences: Sequence[bytes | str],
|
|
26
|
+
names: Sequence[str] | None = None,
|
|
27
|
+
auto: bool = True,
|
|
28
|
+
ep: float | None = None,
|
|
29
|
+
op: float | None = None,
|
|
30
|
+
) -> MSAFuture:
|
|
31
|
+
"""
|
|
32
|
+
Align sequences using the `mafft` algorithm.
|
|
33
|
+
|
|
34
|
+
Set `auto` to True to automatically attempt the best parameters. Leave a parameter as None to use system defaults.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
sequences : Sequence[bytes or str]
|
|
39
|
+
Sequences to align.
|
|
40
|
+
names : Sequence[str], optional
|
|
41
|
+
Optional list of sequence names, must be the same length as sequences if provided.
|
|
42
|
+
auto : bool, default=True
|
|
43
|
+
Set to True to automatically set algorithm parameters.
|
|
44
|
+
ep : float, optional
|
|
45
|
+
MAFFT "ep" parameter. Sets the offset value for the scoring matrix; lower values make gap opening more difficult. If None, uses system default.
|
|
46
|
+
op : float, optional
|
|
47
|
+
MAFFT "op" parameter. Sets the gap opening penalty; higher values increase the cost of opening gaps. If None, uses system default.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
MSAFuture
|
|
52
|
+
Future object awaiting the contents of the MSA upload.
|
|
53
|
+
|
|
54
|
+
Raises
|
|
55
|
+
------
|
|
56
|
+
Exception
|
|
57
|
+
If names and sequences are not the same length.
|
|
58
|
+
"""
|
|
59
|
+
if names is not None and len(names) != len(sequences):
|
|
60
|
+
raise Exception(
|
|
61
|
+
f"Names and sequences must be same length, but were {len(names)} and {len(sequences)}"
|
|
62
|
+
)
|
|
63
|
+
lines = []
|
|
64
|
+
if names is None:
|
|
65
|
+
# as CSV
|
|
66
|
+
lines = [s.encode() if isinstance(s, str) else s for s in sequences]
|
|
67
|
+
else:
|
|
68
|
+
# as fasta
|
|
69
|
+
for name, sequence in zip(names, sequences):
|
|
70
|
+
if isinstance(name, str):
|
|
71
|
+
name = name.encode()
|
|
72
|
+
if isinstance(sequence, str):
|
|
73
|
+
sequence = sequence.encode()
|
|
74
|
+
lines.append(b">" + name)
|
|
75
|
+
lines.append(sequence)
|
|
76
|
+
content = b"\n".join(lines)
|
|
77
|
+
stream = BytesIO(content)
|
|
78
|
+
return self.mafft_file(stream, auto=auto, ep=ep, op=op)
|
|
79
|
+
|
|
80
|
+
def mafft_file(self, file, auto=True, ep=None, op=None) -> MSAFuture:
|
|
81
|
+
"""
|
|
82
|
+
Align sequences using the `mafft` algorithm. Sequences can be provided as FASTA or CSV formats.
|
|
83
|
+
If CSV, the file must be headerless with either a single sequence column or name, sequence columns.
|
|
84
|
+
|
|
85
|
+
Set `auto` to True to automatically attempt the best parameters. Leave a parameter as None to use system defaults.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
file : file-like object
|
|
90
|
+
Sequences to align in FASTA or CSV format.
|
|
91
|
+
auto : bool, default=True
|
|
92
|
+
Set to True to automatically set algorithm parameters.
|
|
93
|
+
ep : float, optional
|
|
94
|
+
MAFFT "ep" parameter. Sets the offset value for the scoring matrix; lower values make gap opening more difficult. If None, uses system default.
|
|
95
|
+
op : float, optional
|
|
96
|
+
MAFFT "op" parameter. Sets the gap opening penalty; higher values increase the cost of opening gaps. If None, uses system default.
|
|
97
|
+
|
|
98
|
+
Returns
|
|
99
|
+
-------
|
|
100
|
+
MSAFuture
|
|
101
|
+
Future object awaiting the contents of the MSA upload.
|
|
102
|
+
"""
|
|
103
|
+
job = api.mafft_post(self.session, file, auto=auto, ep=ep, op=op)
|
|
104
|
+
return MSAFuture.create(session=self.session, job=job)
|
|
105
|
+
|
|
106
|
+
def clustalo(
|
|
107
|
+
self,
|
|
108
|
+
sequences: Sequence[bytes | str],
|
|
109
|
+
names: Sequence[str] | None = None,
|
|
110
|
+
clustersize: int | None = None,
|
|
111
|
+
iterations: int | None = None,
|
|
112
|
+
) -> MSAFuture:
|
|
113
|
+
"""
|
|
114
|
+
Align sequences using the `clustal omega` algorithm.
|
|
115
|
+
|
|
116
|
+
Sequences can be provided as FASTA or CSV formats. If CSV, the file must be headerless with either a single sequence column or name, sequence columns.
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
sequences : Sequence[bytes or str]
|
|
121
|
+
Sequences to align.
|
|
122
|
+
names : Sequence[str], optional
|
|
123
|
+
Optional list of sequence names, must be the same length as sequences if provided.
|
|
124
|
+
clustersize : int, optional
|
|
125
|
+
Maximum number of sequences per cluster during guide tree generation. If None, uses the default value.
|
|
126
|
+
iterations : int, optional
|
|
127
|
+
Number of refinement iterations performed during alignment. If None, uses the default value.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
MSAFuture
|
|
132
|
+
Future object awaiting the contents of the MSA upload.
|
|
133
|
+
|
|
134
|
+
Raises
|
|
135
|
+
------
|
|
136
|
+
Exception
|
|
137
|
+
If names and sequences are not the same length.
|
|
138
|
+
"""
|
|
139
|
+
if names is not None and len(names) != len(sequences):
|
|
140
|
+
raise Exception(
|
|
141
|
+
f"Names and sequences must be same length, but were {len(names)} and {len(sequences)}"
|
|
142
|
+
)
|
|
143
|
+
lines = []
|
|
144
|
+
if names is None:
|
|
145
|
+
# as CSV
|
|
146
|
+
lines = [s.encode() if isinstance(s, str) else s for s in sequences]
|
|
147
|
+
else:
|
|
148
|
+
# as fasta
|
|
149
|
+
for name, sequence in zip(names, sequences):
|
|
150
|
+
if isinstance(name, str):
|
|
151
|
+
name = name.encode()
|
|
152
|
+
if isinstance(sequence, str):
|
|
153
|
+
sequence = sequence.encode()
|
|
154
|
+
lines.append(b">" + name)
|
|
155
|
+
lines.append(sequence)
|
|
156
|
+
content = b"\n".join(lines)
|
|
157
|
+
stream = BytesIO(content)
|
|
158
|
+
return self.clustalo_file(
|
|
159
|
+
stream, clustersize=clustersize, iterations=iterations
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
def clustalo_file(self, file, clustersize=None, iterations=None) -> MSAFuture:
|
|
163
|
+
"""
|
|
164
|
+
Align sequences using the `clustal omega` algorithm.
|
|
165
|
+
|
|
166
|
+
Sequences can be provided as FASTA or CSV formats. If CSV, the file must be headerless with either a single sequence column or name, sequence columns.
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
file : file-like object
|
|
171
|
+
Sequences to align in FASTA or CSV format.
|
|
172
|
+
clustersize : int, optional
|
|
173
|
+
Maximum number of sequences per cluster during guide tree generation. If None, uses the default value.
|
|
174
|
+
iterations : int, optional
|
|
175
|
+
Number of refinement iterations performed during alignment. If None, uses the default value.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
MSAFuture
|
|
180
|
+
Future object awaiting the contents of the MSA upload.
|
|
181
|
+
"""
|
|
182
|
+
job = api.clustalo_post(
|
|
183
|
+
self.session, file, clustersize=clustersize, iterations=iterations
|
|
184
|
+
)
|
|
185
|
+
return MSAFuture.create(session=self.session, job=job)
|
|
186
|
+
|
|
187
|
+
def abnumber(
|
|
188
|
+
self,
|
|
189
|
+
sequences: Sequence[bytes | str],
|
|
190
|
+
names: Sequence[str] | None = None,
|
|
191
|
+
scheme: AbNumberScheme = AbNumberScheme.CHOTHIA,
|
|
192
|
+
) -> MSAFuture:
|
|
193
|
+
"""
|
|
194
|
+
Align antibody sequences using `AbNumber`.
|
|
195
|
+
|
|
196
|
+
Sequences can be provided as FASTA or CSV formats. If CSV, the file must be headerless with either a single sequence column or name, sequence columns.
|
|
197
|
+
|
|
198
|
+
The antibody numbering scheme can be specified.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
sequences : Sequence[bytes or str]
|
|
203
|
+
Sequences to align.
|
|
204
|
+
names : Sequence[str], optional
|
|
205
|
+
Optional list of sequence names, must be the same length as sequences if provided.
|
|
206
|
+
scheme : AbNumberScheme, default=AbNumberScheme.CHOTHIA
|
|
207
|
+
Antibody numbering scheme.
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
MSAFuture
|
|
212
|
+
Future object awaiting the contents of the MSA upload.
|
|
213
|
+
|
|
214
|
+
Raises
|
|
215
|
+
------
|
|
216
|
+
Exception
|
|
217
|
+
If names and sequences are not the same length.
|
|
218
|
+
"""
|
|
219
|
+
if names is not None and len(names) != len(sequences):
|
|
220
|
+
raise Exception(
|
|
221
|
+
f"Names and sequences must be same length, but were {len(names)} and {len(sequences)}"
|
|
222
|
+
)
|
|
223
|
+
lines = []
|
|
224
|
+
if names is None:
|
|
225
|
+
# as CSV
|
|
226
|
+
lines = [s.encode() if isinstance(s, str) else s for s in sequences]
|
|
227
|
+
else:
|
|
228
|
+
# as fasta
|
|
229
|
+
for name, sequence in zip(names, sequences):
|
|
230
|
+
if isinstance(name, str):
|
|
231
|
+
name = name.encode()
|
|
232
|
+
if isinstance(sequence, str):
|
|
233
|
+
sequence = sequence.encode()
|
|
234
|
+
lines.append(b">" + name)
|
|
235
|
+
lines.append(sequence)
|
|
236
|
+
content = b"\n".join(lines)
|
|
237
|
+
stream = BytesIO(content)
|
|
238
|
+
return self.abnumber_file(stream, scheme=scheme)
|
|
239
|
+
|
|
240
|
+
def abnumber_file(
|
|
241
|
+
self, file, scheme: AbNumberScheme = AbNumberScheme.CHOTHIA
|
|
242
|
+
) -> MSAFuture:
|
|
243
|
+
"""
|
|
244
|
+
Align antibody sequences using `AbNumber`.
|
|
245
|
+
|
|
246
|
+
Sequences can be provided as FASTA or CSV formats. If CSV, the file must be headerless with either a single sequence column or name, sequence columns.
|
|
247
|
+
|
|
248
|
+
The antibody numbering scheme can be specified.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
file : file-like object
|
|
253
|
+
Sequences to align in FASTA or CSV format.
|
|
254
|
+
scheme : AbNumberScheme, default=AbNumberScheme.CHOTHIA
|
|
255
|
+
Antibody numbering scheme.
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
MSAFuture
|
|
260
|
+
Future object awaiting the contents of the MSA upload.
|
|
261
|
+
"""
|
|
262
|
+
job = api.abnumber_post(self.session, file, scheme=scheme)
|
|
263
|
+
return MSAFuture.create(session=self.session, job=job)
|
|
264
|
+
|
|
265
|
+
def upload_msa(self, msa_file: BinaryIO) -> MSAFuture:
|
|
266
|
+
"""
|
|
267
|
+
Upload an MSA from a file.
|
|
268
|
+
|
|
269
|
+
Parameters
|
|
270
|
+
----------
|
|
271
|
+
msa_file : str
|
|
272
|
+
Path to a ready-made MSA file.
|
|
273
|
+
|
|
274
|
+
Returns
|
|
275
|
+
-------
|
|
276
|
+
MSAFuture
|
|
277
|
+
Future object awaiting the contents of the MSA upload.
|
|
278
|
+
|
|
279
|
+
Raises
|
|
280
|
+
------
|
|
281
|
+
APIError
|
|
282
|
+
If there is an issue with the API request.
|
|
283
|
+
"""
|
|
284
|
+
return MSAFuture.create(
|
|
285
|
+
session=self.session, job=api.msa_post(self.session, msa_file=msa_file)
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
def create_msa(self, seed: bytes) -> MSAFuture:
|
|
289
|
+
"""
|
|
290
|
+
Construct an MSA via homology search with the seed sequence.
|
|
291
|
+
|
|
292
|
+
Parameters
|
|
293
|
+
----------
|
|
294
|
+
seed : bytes
|
|
295
|
+
Seed sequence for the MSA construction.
|
|
296
|
+
|
|
297
|
+
Returns
|
|
298
|
+
-------
|
|
299
|
+
MSAFuture
|
|
300
|
+
Future object awaiting the contents of the MSA upload.
|
|
301
|
+
|
|
302
|
+
Raises
|
|
303
|
+
------
|
|
304
|
+
APIError
|
|
305
|
+
If there is an issue with the API request.
|
|
306
|
+
"""
|
|
307
|
+
return MSAFuture.create(
|
|
308
|
+
session=self.session, job=api.msa_post(self.session, seed=seed)
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
def upload_prompt(self, prompt_file: BinaryIO):
|
|
312
|
+
"""
|
|
313
|
+
Directly upload a prompt.
|
|
314
|
+
|
|
315
|
+
This method is deprecated. Use `create_prompt` on the `prompt` module instead.
|
|
316
|
+
|
|
317
|
+
Parameters
|
|
318
|
+
----------
|
|
319
|
+
prompt_file : BinaryIO
|
|
320
|
+
Binary I/O object representing the prompt file.
|
|
321
|
+
|
|
322
|
+
Returns
|
|
323
|
+
-------
|
|
324
|
+
PromptJob
|
|
325
|
+
An object representing the status and results of the prompt job.
|
|
326
|
+
|
|
327
|
+
Raises
|
|
328
|
+
------
|
|
329
|
+
DeprecationError
|
|
330
|
+
This method is no longer supported.
|
|
331
|
+
"""
|
|
332
|
+
raise DeprecationError(
|
|
333
|
+
"This method is no longer supported! Use `create_prompt` on the `prompt` module instead."
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
def get_prompt(
|
|
337
|
+
self, job: Job, prompt_index: int | None = None
|
|
338
|
+
) -> Iterator[list[str]]:
|
|
339
|
+
"""
|
|
340
|
+
Get prompts for a given job.
|
|
341
|
+
|
|
342
|
+
This method is deprecated. Use `get_prompt` on the `prompt` module instead.
|
|
343
|
+
|
|
344
|
+
Parameters
|
|
345
|
+
----------
|
|
346
|
+
job : Job
|
|
347
|
+
The job for which to retrieve data.
|
|
348
|
+
prompt_index : int, optional
|
|
349
|
+
The replicate number for the prompt (input_type=-PROMPT only).
|
|
350
|
+
|
|
351
|
+
Returns
|
|
352
|
+
-------
|
|
353
|
+
Iterator[list[str]]
|
|
354
|
+
An iterator over rows of the prompt data.
|
|
355
|
+
|
|
356
|
+
Raises
|
|
357
|
+
------
|
|
358
|
+
DeprecationError
|
|
359
|
+
This method is no longer supported.
|
|
360
|
+
"""
|
|
361
|
+
raise DeprecationError(
|
|
362
|
+
"This method is no longer supported! Use `get_prompt` on the `prompt` module instead."
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def get_seed(self, job_id: str) -> str:
|
|
366
|
+
"""
|
|
367
|
+
Get seed sequence for a given MSA job.
|
|
368
|
+
|
|
369
|
+
Parameters
|
|
370
|
+
----------
|
|
371
|
+
job : Job
|
|
372
|
+
The job for which to retrieve data.
|
|
373
|
+
|
|
374
|
+
Returns
|
|
375
|
+
-------
|
|
376
|
+
str
|
|
377
|
+
Seed sequence that was used to generate the MSA.
|
|
378
|
+
"""
|
|
379
|
+
return api.get_seed(session=self.session, job_id=job_id)
|
|
380
|
+
|
|
381
|
+
def get_msa(self, job_id: str) -> Iterator[tuple[str, str]]:
|
|
382
|
+
"""
|
|
383
|
+
Get generated MSA for a given job.
|
|
384
|
+
|
|
385
|
+
Parameters
|
|
386
|
+
----------
|
|
387
|
+
job : Job
|
|
388
|
+
The job for which to retrieve data.
|
|
389
|
+
|
|
390
|
+
Returns
|
|
391
|
+
-------
|
|
392
|
+
Iterator[tuple[str, str]]
|
|
393
|
+
An iterator over names and sequences of the MSA data.
|
|
394
|
+
"""
|
|
395
|
+
return api.get_msa(session=self.session, job_id=job_id)
|