openprotein-python 0.1.1b4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openprotein_python-0.1.1b4/LICENSE.txt +30 -0
- openprotein_python-0.1.1b4/PKG-INFO +124 -0
- openprotein_python-0.1.1b4/README.md +100 -0
- openprotein_python-0.1.1b4/openprotein_python/__init__.py +70 -0
- openprotein_python-0.1.1b4/openprotein_python/_version.py +2 -0
- openprotein_python-0.1.1b4/openprotein_python/api/__init__.py +6 -0
- openprotein_python-0.1.1b4/openprotein_python/api/data.py +507 -0
- openprotein_python-0.1.1b4/openprotein_python/api/design.py +259 -0
- openprotein_python-0.1.1b4/openprotein_python/api/embedding.py +834 -0
- openprotein_python-0.1.1b4/openprotein_python/api/jobs.py +473 -0
- openprotein_python-0.1.1b4/openprotein_python/api/poet.py +1427 -0
- openprotein_python-0.1.1b4/openprotein_python/api/predict.py +567 -0
- openprotein_python-0.1.1b4/openprotein_python/api/train.py +674 -0
- openprotein_python-0.1.1b4/openprotein_python/base.py +100 -0
- openprotein_python-0.1.1b4/openprotein_python/config.py +9 -0
- openprotein_python-0.1.1b4/openprotein_python/errors.py +36 -0
- openprotein_python-0.1.1b4/openprotein_python/fasta.py +39 -0
- openprotein_python-0.1.1b4/openprotein_python/models.py +377 -0
- openprotein_python-0.1.1b4/pyproject.toml +25 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
Copyright (c) 2021-2023, OpenProtein.
|
|
2
|
+
All rights reserved.
|
|
3
|
+
|
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
|
5
|
+
modification, are permitted provided that the following conditions are
|
|
6
|
+
met:
|
|
7
|
+
|
|
8
|
+
* Redistributions of source code must retain the above copyright
|
|
9
|
+
notice, this list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
* Redistributions in binary form must reproduce the above
|
|
12
|
+
copyright notice, this list of conditions and the following
|
|
13
|
+
disclaimer in the documentation and/or other materials provided
|
|
14
|
+
with the distribution.
|
|
15
|
+
|
|
16
|
+
* Neither the name of the OPenProtein Developers nor the names of any
|
|
17
|
+
contributors may be used to endorse or promote products derived
|
|
18
|
+
from this software without specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
21
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
22
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
23
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
24
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
25
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
26
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
27
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
28
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
29
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: openprotein-python
|
|
3
|
+
Version: 0.1.1b4
|
|
4
|
+
Summary: OpenProtein Python interface.
|
|
5
|
+
Home-page: https://docs.openprotein.ai/
|
|
6
|
+
License: MIT
|
|
7
|
+
Author: OpenProtein
|
|
8
|
+
Author-email: info@ne47.bio
|
|
9
|
+
Requires-Python: >=3.7,<4.0
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Requires-Dist: pandas (>=1.2,<2.0)
|
|
20
|
+
Requires-Dist: pydantic (>=1.8,<2.0)
|
|
21
|
+
Requires-Dist: requests (>=2.25,<3.0)
|
|
22
|
+
Requires-Dist: tqdm (>=4.61,<5.0)
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# openprotein-python
|
|
26
|
+
Python interface for the OpenProtein.AI REST API.
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
You can install with pip:
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
pip install openprotein-python
|
|
34
|
+
```
|
|
35
|
+
## Getting started
|
|
36
|
+
|
|
37
|
+
First, create a session using your login credentials.
|
|
38
|
+
```
|
|
39
|
+
import openprotein
|
|
40
|
+
session = openprotein.connect(USERNAME, PASSWORD)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Async calls return `AsyncJobFuture` objects that allow tracking the status of the job and retrieving the result when it's ready.
|
|
44
|
+
|
|
45
|
+
Given a future, check its status and retrieve results
|
|
46
|
+
```
|
|
47
|
+
future.refresh() # call the backend to update the job status
|
|
48
|
+
future.done() # returns True if the job is done, meaning the status could be SUCCESS, FAILED, or CANCELLED
|
|
49
|
+
future.wait() # wait until done and then fetch results, verbosity is controlled with verbose arg.
|
|
50
|
+
result = future.get() # get the result from a finished job
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
### Jobs interface
|
|
55
|
+
|
|
56
|
+
List your jobs, optionally filtered by date, job type, and status.
|
|
57
|
+
```
|
|
58
|
+
session.jobs.list() # list all jobs
|
|
59
|
+
session.jobs.get(JOB_ID) # get a specific job
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Resume an `AsyncJobFuture` from where you left off with each API's load_job:
|
|
63
|
+
|
|
64
|
+
For example for training jobs:
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
session.train.load_job(JOB_ID)
|
|
68
|
+
```
|
|
69
|
+
### PoET interface
|
|
70
|
+
|
|
71
|
+
Score sequences using the PoET interface.
|
|
72
|
+
```
|
|
73
|
+
prompt_seqs = b'MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN'
|
|
74
|
+
|
|
75
|
+
prompt = session.poet.upload_prompt(prompt_seqs)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
queries = [
|
|
80
|
+
b'MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN',
|
|
81
|
+
b'MALWMRLLPLLVLLALWGPDPASAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN',
|
|
82
|
+
b'MALWTRLRPLLALLALWPPPPARAFVNQHLCGSHLVEALYLVCGERGFFYTPKARREVEGPQVGALELAGGPGAGGLEGPPQKRGIVEQCCASVCSLYQLENYCN',
|
|
83
|
+
b'MALWIRSLPLLALLVFSGPGTSYAAANQHLCGSHLVEALYLVCGERGFFYSPKARRDVEQPLVSSPLRGEAGVLPFQQEEYEKVKRGIVEQCCHNTCSLYQLENYCN',
|
|
84
|
+
b'MALWMRLLPLLALLALWAPAPTRAFVNQHLCGSHLVEALYLVCGERGFFYTPKARREVEDLQVRDVELAGAPGEGGLQPLALEGALQKRGIVEQCCTSICSLYQLENYCN',
|
|
85
|
+
]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
future = session.poet.score(prompt, queries)
|
|
90
|
+
result = future.wait()
|
|
91
|
+
# result is a list of (sequence, score) pydantic objects
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Score single site variants using the PoET interface.
|
|
95
|
+
```
|
|
96
|
+
sequence = "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN"
|
|
97
|
+
future = session.poet.single_site(prompt, sequence, prompt_is_seed=True)
|
|
98
|
+
result = future.wait()
|
|
99
|
+
# result is a dictionary of {variant: score}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Generate sequences from the PoET model.
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
future = session.poet.generate(
|
|
106
|
+
prompt,
|
|
107
|
+
max_seqs_from_msa=1024,
|
|
108
|
+
num_samples=100,
|
|
109
|
+
temperature=1.0,
|
|
110
|
+
topk=15
|
|
111
|
+
)
|
|
112
|
+
samples = future.wait()
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Retrieve the prompt, MSA, or input (seed) sequences for a PoET job.
|
|
116
|
+
```
|
|
117
|
+
future.get_input(INPUT_TYPE)
|
|
118
|
+
# or, functions for each type
|
|
119
|
+
future.get_prompt()
|
|
120
|
+
future.get_msa()
|
|
121
|
+
future.get_seed()
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
See more at our [Homepage](https://docs.openprotein.ai/)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# openprotein-python
|
|
2
|
+
Python interface for the OpenProtein.AI REST API.
|
|
3
|
+
|
|
4
|
+
## Installation
|
|
5
|
+
|
|
6
|
+
You can install with pip:
|
|
7
|
+
|
|
8
|
+
```
|
|
9
|
+
pip install openprotein-python
|
|
10
|
+
```
|
|
11
|
+
## Getting started
|
|
12
|
+
|
|
13
|
+
First, create a session using your login credentials.
|
|
14
|
+
```
|
|
15
|
+
import openprotein
|
|
16
|
+
session = openprotein.connect(USERNAME, PASSWORD)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Async calls return `AsyncJobFuture` objects that allow tracking the status of the job and retrieving the result when it's ready.
|
|
20
|
+
|
|
21
|
+
Given a future, check its status and retrieve results
|
|
22
|
+
```
|
|
23
|
+
future.refresh() # call the backend to update the job status
|
|
24
|
+
future.done() # returns True if the job is done, meaning the status could be SUCCESS, FAILED, or CANCELLED
|
|
25
|
+
future.wait() # wait until done and then fetch results, verbosity is controlled with verbose arg.
|
|
26
|
+
result = future.get() # get the result from a finished job
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
### Jobs interface
|
|
31
|
+
|
|
32
|
+
List your jobs, optionally filtered by date, job type, and status.
|
|
33
|
+
```
|
|
34
|
+
session.jobs.list() # list all jobs
|
|
35
|
+
session.jobs.get(JOB_ID) # get a specific job
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Resume an `AsyncJobFuture` from where you left off with each API's load_job:
|
|
39
|
+
|
|
40
|
+
For example for training jobs:
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
session.train.load_job(JOB_ID)
|
|
44
|
+
```
|
|
45
|
+
### PoET interface
|
|
46
|
+
|
|
47
|
+
Score sequences using the PoET interface.
|
|
48
|
+
```
|
|
49
|
+
prompt_seqs = b'MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN'
|
|
50
|
+
|
|
51
|
+
prompt = session.poet.upload_prompt(prompt_seqs)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
queries = [
|
|
56
|
+
b'MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN',
|
|
57
|
+
b'MALWMRLLPLLVLLALWGPDPASAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN',
|
|
58
|
+
b'MALWTRLRPLLALLALWPPPPARAFVNQHLCGSHLVEALYLVCGERGFFYTPKARREVEGPQVGALELAGGPGAGGLEGPPQKRGIVEQCCASVCSLYQLENYCN',
|
|
59
|
+
b'MALWIRSLPLLALLVFSGPGTSYAAANQHLCGSHLVEALYLVCGERGFFYSPKARRDVEQPLVSSPLRGEAGVLPFQQEEYEKVKRGIVEQCCHNTCSLYQLENYCN',
|
|
60
|
+
b'MALWMRLLPLLALLALWAPAPTRAFVNQHLCGSHLVEALYLVCGERGFFYTPKARREVEDLQVRDVELAGAPGEGGLQPLALEGALQKRGIVEQCCTSICSLYQLENYCN',
|
|
61
|
+
]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
future = session.poet.score(prompt, queries)
|
|
66
|
+
result = future.wait()
|
|
67
|
+
# result is a list of (sequence, score) pydantic objects
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Score single site variants using the PoET interface.
|
|
71
|
+
```
|
|
72
|
+
sequence = "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN"
|
|
73
|
+
future = session.poet.single_site(prompt, sequence, prompt_is_seed=True)
|
|
74
|
+
result = future.wait()
|
|
75
|
+
# result is a dictionary of {variant: score}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Generate sequences from the PoET model.
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
future = session.poet.generate(
|
|
82
|
+
prompt,
|
|
83
|
+
max_seqs_from_msa=1024,
|
|
84
|
+
num_samples=100,
|
|
85
|
+
temperature=1.0,
|
|
86
|
+
topk=15
|
|
87
|
+
)
|
|
88
|
+
samples = future.wait()
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Retrieve the prompt, MSA, or input (seed) sequences for a PoET job.
|
|
92
|
+
```
|
|
93
|
+
future.get_input(INPUT_TYPE)
|
|
94
|
+
# or, functions for each type
|
|
95
|
+
future.get_prompt()
|
|
96
|
+
future.get_msa()
|
|
97
|
+
future.get_seed()
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
See more at our [Homepage](https://docs.openprotein.ai/)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from openprotein._version import __version__
|
|
2
|
+
|
|
3
|
+
from openprotein.base import APISession
|
|
4
|
+
from openprotein.api.jobs import JobsAPI, Job
|
|
5
|
+
from openprotein.api.data import DataAPI
|
|
6
|
+
from openprotein.api.poet import PoetAPI
|
|
7
|
+
from openprotein.api.embedding import EmbeddingAPI
|
|
8
|
+
from openprotein.api.train import TrainingAPI
|
|
9
|
+
from openprotein.api.design import DesignAPI
|
|
10
|
+
from openprotein.api.predict import PredictAPI
|
|
11
|
+
class OpenProtein(APISession):
|
|
12
|
+
"""
|
|
13
|
+
The base class for accessing OpenProtein API functionality.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def wait(self, job: Job, *args, **kwargs):
|
|
17
|
+
return job.wait(self, *args, **kwargs)
|
|
18
|
+
|
|
19
|
+
wait_until_done = wait
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def jobs(self):
|
|
23
|
+
"""
|
|
24
|
+
The jobs submodule gives access to functionality for listing jobs and checking their status.
|
|
25
|
+
"""
|
|
26
|
+
return JobsAPI(self)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def data(self):
|
|
30
|
+
"""
|
|
31
|
+
The data submodule gives access to functionality for uploading and accessing user data.
|
|
32
|
+
"""
|
|
33
|
+
return DataAPI(self)
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def train(self):
|
|
37
|
+
"""
|
|
38
|
+
The train submodule gives access to functionality for training and validating ML models.
|
|
39
|
+
"""
|
|
40
|
+
return TrainingAPI(self)
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def poet(self):
|
|
44
|
+
"""
|
|
45
|
+
The PoET submodule gives access to the PoET generative model and MSA and prompt creation interfaces.
|
|
46
|
+
"""
|
|
47
|
+
return PoetAPI(self)
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def embedding(self):
|
|
51
|
+
"""
|
|
52
|
+
The embedding submodule gives access to protein embedding models and their inference endpoints.
|
|
53
|
+
"""
|
|
54
|
+
return EmbeddingAPI(self)
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def predict(self):
|
|
58
|
+
"""
|
|
59
|
+
The predict submodule gives access to sequence predictions using models from train.
|
|
60
|
+
"""
|
|
61
|
+
return PredictAPI(self)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def design(self):
|
|
65
|
+
"""
|
|
66
|
+
The design submodule gives access to functionality for designing new sequences using models from train.
|
|
67
|
+
"""
|
|
68
|
+
return DesignAPI(self)
|
|
69
|
+
|
|
70
|
+
connect = OpenProtein
|