biolmai 0.1.5__py2.py3-none-any.whl → 0.1.7__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biolmai might be problematic. Click here for more details.
- biolmai/__init__.py +3 -7
- biolmai/api.py +93 -89
- biolmai/asynch.py +65 -67
- biolmai/auth.py +47 -44
- biolmai/biolmai.py +1 -3
- biolmai/cli.py +30 -22
- biolmai/cls.py +33 -36
- biolmai/const.py +13 -11
- biolmai/payloads.py +9 -10
- biolmai/validate.py +55 -28
- {biolmai-0.1.5.dist-info → biolmai-0.1.7.dist-info}/METADATA +1 -1
- biolmai-0.1.7.dist-info/RECORD +18 -0
- {biolmai-0.1.5.dist-info → biolmai-0.1.7.dist-info}/WHEEL +1 -1
- biolmai-0.1.5.dist-info/RECORD +0 -18
- {biolmai-0.1.5.dist-info → biolmai-0.1.7.dist-info}/AUTHORS.rst +0 -0
- {biolmai-0.1.5.dist-info → biolmai-0.1.7.dist-info}/LICENSE +0 -0
- {biolmai-0.1.5.dist-info → biolmai-0.1.7.dist-info}/entry_points.txt +0 -0
- {biolmai-0.1.5.dist-info → biolmai-0.1.7.dist-info}/top_level.txt +0 -0
biolmai/auth.py
CHANGED
|
@@ -6,20 +6,19 @@ import stat
|
|
|
6
6
|
import click
|
|
7
7
|
import requests
|
|
8
8
|
|
|
9
|
-
from biolmai.const import ACCESS_TOK_PATH, BASE_DOMAIN, GEN_TOKEN_URL,
|
|
10
|
-
USER_BIOLM_DIR
|
|
9
|
+
from biolmai.const import ACCESS_TOK_PATH, BASE_DOMAIN, GEN_TOKEN_URL, USER_BIOLM_DIR
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
def validate_user_auth(api_token=None, access=None, refresh=None):
|
|
14
13
|
"""Validates an API token, to be used as 'Authorization: Token 1235abc'
|
|
15
14
|
authentication method."""
|
|
16
|
-
url = f
|
|
15
|
+
url = f"{BASE_DOMAIN}/api/v1/auth/login-check/"
|
|
17
16
|
if api_token is not None:
|
|
18
|
-
headers = {
|
|
17
|
+
headers = {"Authorization": f"Token {api_token}"}
|
|
19
18
|
else:
|
|
20
19
|
headers = {
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
"Cookie": f"access={access};refresh={refresh}",
|
|
21
|
+
"Content-Type": "application/json",
|
|
23
22
|
}
|
|
24
23
|
try:
|
|
25
24
|
r = requests.post(url=url, headers=headers)
|
|
@@ -36,28 +35,25 @@ def validate_user_auth(api_token=None, access=None, refresh=None):
|
|
|
36
35
|
def refresh_access_token(refresh):
|
|
37
36
|
"""Attempt to refresh temporary user access token, by using their refresh
|
|
38
37
|
token, which has a longer TTL."""
|
|
39
|
-
url = f
|
|
40
|
-
headers = {
|
|
41
|
-
'Cookie': 'refresh={}'.format(refresh),
|
|
42
|
-
'Content-Type': 'application/json'
|
|
43
|
-
}
|
|
38
|
+
url = f"{BASE_DOMAIN}/api/auth/token/refresh/"
|
|
39
|
+
headers = {"Cookie": f"refresh={refresh}", "Content-Type": "application/json"}
|
|
44
40
|
r = requests.post(url=url, headers=headers)
|
|
45
41
|
json_response = r.json()
|
|
46
|
-
if r.status_code != 200 or (r.status_code == 200 and
|
|
42
|
+
if r.status_code != 200 or (r.status_code == 200 and "code" in r.json()):
|
|
47
43
|
pretty_json = pprint.pformat(json_response, indent=2)
|
|
48
44
|
click.echo(pretty_json)
|
|
49
|
-
click.echo(
|
|
50
|
-
|
|
45
|
+
click.echo(
|
|
46
|
+
"Token refresh failed! Please login by " "running `biolmai login`.\n"
|
|
47
|
+
)
|
|
51
48
|
return False
|
|
52
49
|
else:
|
|
53
|
-
access_refresh_dict = {
|
|
54
|
-
'refresh': refresh}
|
|
50
|
+
access_refresh_dict = {"access": json_response["access"], "refresh": refresh}
|
|
55
51
|
save_access_refresh_token(access_refresh_dict)
|
|
56
52
|
return True
|
|
57
53
|
|
|
58
54
|
|
|
59
55
|
def get_auth_status():
|
|
60
|
-
environ_token = os.environ.get(
|
|
56
|
+
environ_token = os.environ.get("BIOLMAI_TOKEN", None)
|
|
61
57
|
if environ_token:
|
|
62
58
|
msg = "Environment variable BIOLMAI_TOKEN detected. Validating token..."
|
|
63
59
|
click.echo(msg)
|
|
@@ -65,12 +61,14 @@ def get_auth_status():
|
|
|
65
61
|
elif os.path.exists(ACCESS_TOK_PATH):
|
|
66
62
|
msg = f"Credentials file found {ACCESS_TOK_PATH}. Validating token..."
|
|
67
63
|
click.echo(msg)
|
|
68
|
-
with open(ACCESS_TOK_PATH
|
|
64
|
+
with open(ACCESS_TOK_PATH) as f:
|
|
69
65
|
access_refresh_dict = json.load(f)
|
|
70
|
-
access = access_refresh_dict.get(
|
|
71
|
-
refresh = access_refresh_dict.get(
|
|
66
|
+
access = access_refresh_dict.get("access")
|
|
67
|
+
refresh = access_refresh_dict.get("refresh")
|
|
72
68
|
resp = validate_user_auth(access=access, refresh=refresh)
|
|
73
|
-
if resp.status_code != 200 or (
|
|
69
|
+
if resp.status_code != 200 or (
|
|
70
|
+
resp.status_code == 200 and "code" in resp.json()
|
|
71
|
+
):
|
|
74
72
|
click.echo("Access token validation failed. Attempting to refresh token...")
|
|
75
73
|
# Attempt to use the 'refresh' token to get a new 'access' token
|
|
76
74
|
if not refresh_access_token(refresh):
|
|
@@ -78,9 +76,11 @@ def get_auth_status():
|
|
|
78
76
|
else:
|
|
79
77
|
click.echo("Access token refresh was successful.")
|
|
80
78
|
else:
|
|
81
|
-
msg =
|
|
82
|
-
|
|
83
|
-
|
|
79
|
+
msg = (
|
|
80
|
+
f"No https://biolm.ai credentials found. Please "
|
|
81
|
+
f"set the environment variable BIOLMAI_TOKEN to a token from "
|
|
82
|
+
f"{GEN_TOKEN_URL}, or login by running `biolmai login`."
|
|
83
|
+
)
|
|
84
84
|
click.echo(msg)
|
|
85
85
|
|
|
86
86
|
|
|
@@ -95,9 +95,9 @@ def generate_access_token(uname, password):
|
|
|
95
95
|
more permanent auth method for the API, use an API token by setting the
|
|
96
96
|
BIOLMAI_TOKEN environment variable.
|
|
97
97
|
"""
|
|
98
|
-
url = f
|
|
98
|
+
url = f"{BASE_DOMAIN}/api/auth/token/"
|
|
99
99
|
try:
|
|
100
|
-
r = requests.post(url=url, data={
|
|
100
|
+
r = requests.post(url=url, data={"username": uname, "password": password})
|
|
101
101
|
json_response = r.json()
|
|
102
102
|
except Exception:
|
|
103
103
|
click.echo("Login failed!\n")
|
|
@@ -118,12 +118,12 @@ def save_access_refresh_token(access_refresh_dict):
|
|
|
118
118
|
use."""
|
|
119
119
|
os.makedirs(USER_BIOLM_DIR, exist_ok=True)
|
|
120
120
|
# Save token
|
|
121
|
-
with open(ACCESS_TOK_PATH,
|
|
121
|
+
with open(ACCESS_TOK_PATH, "w") as f:
|
|
122
122
|
json.dump(access_refresh_dict, f)
|
|
123
123
|
os.chmod(ACCESS_TOK_PATH, stat.S_IRUSR | stat.S_IWUSR)
|
|
124
124
|
# Validate token and print user info
|
|
125
|
-
access = access_refresh_dict.get(
|
|
126
|
-
refresh = access_refresh_dict.get(
|
|
125
|
+
access = access_refresh_dict.get("access")
|
|
126
|
+
refresh = access_refresh_dict.get("refresh")
|
|
127
127
|
validate_user_auth(access=access, refresh=refresh)
|
|
128
128
|
|
|
129
129
|
|
|
@@ -134,13 +134,13 @@ def get_api_token():
|
|
|
134
134
|
"""
|
|
135
135
|
url = "https://biolm.ai/api/auth/token/"
|
|
136
136
|
|
|
137
|
-
payload = json.dumps(
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
}
|
|
137
|
+
payload = json.dumps(
|
|
138
|
+
{
|
|
139
|
+
"username": os.environ.get("BIOLM_USER"),
|
|
140
|
+
"password": os.environ.get("BIOLM_PASSWORD"),
|
|
141
|
+
}
|
|
142
|
+
)
|
|
143
|
+
headers = {"Content-Type": "application/json"}
|
|
144
144
|
|
|
145
145
|
response = requests.request("POST", url, headers=headers, data=payload)
|
|
146
146
|
response_json = response.json()
|
|
@@ -152,19 +152,22 @@ def get_user_auth_header():
|
|
|
152
152
|
"""Returns a dict with the appropriate Authorization header, either using
|
|
153
153
|
an API token from BIOLMAI_TOKEN environment variable, or by reading the
|
|
154
154
|
credentials file at ~/.biolmai/credntials next."""
|
|
155
|
-
api_token = os.environ.get(
|
|
155
|
+
api_token = os.environ.get("BIOLMAI_TOKEN", None)
|
|
156
156
|
if api_token:
|
|
157
|
-
headers = {
|
|
157
|
+
headers = {"Authorization": f"Token {api_token}"}
|
|
158
158
|
elif os.path.exists(ACCESS_TOK_PATH):
|
|
159
|
-
with open(ACCESS_TOK_PATH
|
|
159
|
+
with open(ACCESS_TOK_PATH) as f:
|
|
160
160
|
access_refresh_dict = json.load(f)
|
|
161
|
-
access = access_refresh_dict.get(
|
|
162
|
-
refresh = access_refresh_dict.get(
|
|
161
|
+
access = access_refresh_dict.get("access")
|
|
162
|
+
refresh = access_refresh_dict.get("refresh")
|
|
163
163
|
headers = {
|
|
164
|
-
|
|
165
|
-
|
|
164
|
+
"Cookie": f"access={access};refresh={refresh}",
|
|
165
|
+
"Content-Type": "application/json",
|
|
166
166
|
}
|
|
167
167
|
else:
|
|
168
|
-
err =
|
|
168
|
+
err = (
|
|
169
|
+
"No https://biolm.ai credentials found. Please run "
|
|
170
|
+
"`biolmai status` to debug."
|
|
171
|
+
)
|
|
169
172
|
raise AssertionError(err)
|
|
170
173
|
return headers
|
biolmai/biolmai.py
CHANGED
biolmai/cli.py
CHANGED
|
@@ -1,37 +1,43 @@
|
|
|
1
1
|
"""Console script for biolmai."""
|
|
2
|
+
import os
|
|
2
3
|
import sys
|
|
4
|
+
|
|
3
5
|
import click
|
|
4
|
-
|
|
5
|
-
from biolmai.auth import
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
|
|
7
|
+
from biolmai.auth import (
|
|
8
|
+
generate_access_token,
|
|
9
|
+
get_auth_status,
|
|
10
|
+
save_access_refresh_token,
|
|
11
|
+
)
|
|
12
|
+
from biolmai.const import ACCESS_TOK_PATH, BASE_API_URL, MULTIPROCESS_THREADS
|
|
8
13
|
|
|
9
14
|
|
|
10
15
|
@click.command()
|
|
11
16
|
def main(args=None):
|
|
12
17
|
"""Console script for biolmai."""
|
|
13
|
-
click.echo("Replace this message by putting your code into "
|
|
14
|
-
"biolmai.cli.main")
|
|
18
|
+
click.echo("Replace this message by putting your code into " "biolmai.cli.main")
|
|
15
19
|
click.echo("See click documentation at https://click.palletsprojects.com/")
|
|
16
20
|
return 0
|
|
17
21
|
|
|
18
22
|
|
|
19
23
|
@click.group()
|
|
20
|
-
@click.option(
|
|
24
|
+
@click.option("--debug/--no-debug", default=False)
|
|
21
25
|
def cli(debug):
|
|
22
26
|
pass
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
def echo_env_vars():
|
|
26
|
-
env_var_tok = os.environ.get(
|
|
30
|
+
env_var_tok = os.environ.get("BIOLMAI_TOKEN", "")[:6]
|
|
27
31
|
if env_var_tok and len(env_var_tok) == 6:
|
|
28
|
-
env_var_tok +=
|
|
29
|
-
s =
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
env_var_tok += "*****************"
|
|
33
|
+
s = "\n".join(
|
|
34
|
+
[
|
|
35
|
+
f"BIOLMAI_TOKEN={env_var_tok}",
|
|
36
|
+
f"BIOLMAI_ACCESS_CRED={ACCESS_TOK_PATH}",
|
|
37
|
+
"BIOLMAI_THREADS={}".format(MULTIPROCESS_THREADS or ""),
|
|
38
|
+
f"BIOLMAI_BASE_API_URL={BASE_API_URL}",
|
|
39
|
+
]
|
|
40
|
+
)
|
|
35
41
|
click.echo(s)
|
|
36
42
|
|
|
37
43
|
|
|
@@ -43,17 +49,19 @@ def status():
|
|
|
43
49
|
|
|
44
50
|
@cli.command()
|
|
45
51
|
def login():
|
|
46
|
-
uname = click.prompt(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
52
|
+
uname = click.prompt(
|
|
53
|
+
"Username", default=None, hide_input=False, confirmation_prompt=False, type=str
|
|
54
|
+
)
|
|
55
|
+
password = click.prompt(
|
|
56
|
+
"Password", default=None, hide_input=True, confirmation_prompt=False, type=str
|
|
57
|
+
)
|
|
50
58
|
access_refresh_tok_dict = generate_access_token(uname, password)
|
|
51
59
|
try:
|
|
52
|
-
|
|
53
|
-
|
|
60
|
+
assert access_refresh_tok_dict.get("access") is not None
|
|
61
|
+
assert access_refresh_tok_dict.get("refresh") is not None
|
|
54
62
|
click.echo("Saving new access and refresh token.")
|
|
55
63
|
save_access_refresh_token(access_refresh_tok_dict)
|
|
56
|
-
except Exception
|
|
64
|
+
except Exception:
|
|
57
65
|
click.echo("Unhandled login exception!")
|
|
58
66
|
raise
|
|
59
67
|
|
biolmai/cls.py
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
"""API inference classes."""
|
|
2
|
-
from biolmai.api import APIEndpoint, PredictAction, TransformAction
|
|
3
|
-
from biolmai.validate import
|
|
2
|
+
from biolmai.api import APIEndpoint, GenerateAction, PredictAction, TransformAction
|
|
3
|
+
from biolmai.validate import ExtendedAAPlusExtra, SingleOccurrenceOf, UnambiguousAA
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class ESMFoldSingleChain(APIEndpoint):
|
|
7
|
-
slug =
|
|
8
|
-
action_classes = (PredictAction,
|
|
9
|
-
seq_classes = (UnambiguousAA(),
|
|
7
|
+
slug = "esmfold-singlechain"
|
|
8
|
+
action_classes = (PredictAction,)
|
|
9
|
+
seq_classes = (UnambiguousAA(),)
|
|
10
10
|
batch_size = 2
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class ESMFoldMultiChain(APIEndpoint):
|
|
14
|
-
slug =
|
|
15
|
-
action_classes = (PredictAction,
|
|
16
|
-
seq_classes = (ExtendedAAPlusExtra(extra=[
|
|
14
|
+
slug = "esmfold-multichain"
|
|
15
|
+
action_classes = (PredictAction,)
|
|
16
|
+
seq_classes = (ExtendedAAPlusExtra(extra=[":"]),)
|
|
17
17
|
batch_size = 2
|
|
18
18
|
|
|
19
19
|
|
|
@@ -29,9 +29,10 @@ class ESM2Embeddings(APIEndpoint):
|
|
|
29
29
|
}]
|
|
30
30
|
}
|
|
31
31
|
"""
|
|
32
|
-
|
|
32
|
+
|
|
33
|
+
slug = "esm2_t33_650M_UR50D"
|
|
33
34
|
action_classes = (TransformAction,)
|
|
34
|
-
seq_classes = (UnambiguousAA(),
|
|
35
|
+
seq_classes = (UnambiguousAA(),)
|
|
35
36
|
batch_size = 1
|
|
36
37
|
|
|
37
38
|
|
|
@@ -47,54 +48,50 @@ class ESM1v1(APIEndpoint):
|
|
|
47
48
|
}]
|
|
48
49
|
}
|
|
49
50
|
"""
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
|
|
52
|
+
slug = "esm1v_t33_650M_UR90S_1"
|
|
53
|
+
action_classes = (PredictAction,)
|
|
54
|
+
seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
|
|
54
55
|
batch_size = 5
|
|
55
56
|
|
|
56
57
|
|
|
57
58
|
class ESM1v2(APIEndpoint):
|
|
58
|
-
slug =
|
|
59
|
-
action_classes = (PredictAction,
|
|
60
|
-
seq_classes = (SingleOccurrenceOf(
|
|
61
|
-
ExtendedAAPlusExtra(extra=['<mask>']))
|
|
59
|
+
slug = "esm1v_t33_650M_UR90S_2"
|
|
60
|
+
action_classes = (PredictAction,)
|
|
61
|
+
seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
|
|
62
62
|
batch_size = 5
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
class ESM1v3(APIEndpoint):
|
|
66
|
-
slug =
|
|
67
|
-
action_classes = (PredictAction,
|
|
68
|
-
seq_classes = (SingleOccurrenceOf(
|
|
69
|
-
ExtendedAAPlusExtra(extra=['<mask>']))
|
|
66
|
+
slug = "esm1v_t33_650M_UR90S_3"
|
|
67
|
+
action_classes = (PredictAction,)
|
|
68
|
+
seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
|
|
70
69
|
batch_size = 5
|
|
71
70
|
|
|
72
71
|
|
|
73
72
|
class ESM1v4(APIEndpoint):
|
|
74
|
-
slug =
|
|
75
|
-
action_classes = (PredictAction,
|
|
76
|
-
seq_classes = (SingleOccurrenceOf(
|
|
77
|
-
ExtendedAAPlusExtra(extra=['<mask>']))
|
|
73
|
+
slug = "esm1v_t33_650M_UR90S_4"
|
|
74
|
+
action_classes = (PredictAction,)
|
|
75
|
+
seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
|
|
78
76
|
batch_size = 5
|
|
79
77
|
|
|
80
78
|
|
|
81
79
|
class ESM1v5(APIEndpoint):
|
|
82
|
-
slug =
|
|
83
|
-
action_classes = (PredictAction,
|
|
84
|
-
seq_classes = (SingleOccurrenceOf(
|
|
85
|
-
ExtendedAAPlusExtra(extra=['<mask>']))
|
|
80
|
+
slug = "esm1v_t33_650M_UR90S_5"
|
|
81
|
+
action_classes = (PredictAction,)
|
|
82
|
+
seq_classes = (SingleOccurrenceOf("<mask>"), ExtendedAAPlusExtra(extra=["<mask>"]))
|
|
86
83
|
batch_size = 5
|
|
87
84
|
|
|
88
85
|
|
|
89
86
|
class ESMIF1(APIEndpoint):
|
|
90
|
-
slug =
|
|
91
|
-
action_classes = (GenerateAction,
|
|
92
|
-
seq_classes =
|
|
87
|
+
slug = "esmif1"
|
|
88
|
+
action_classes = (GenerateAction,)
|
|
89
|
+
seq_classes = ()
|
|
93
90
|
batch_size = 2
|
|
94
91
|
|
|
95
92
|
|
|
96
93
|
class Progen2(APIEndpoint):
|
|
97
|
-
slug =
|
|
98
|
-
action_classes = (GenerateAction,
|
|
99
|
-
seq_classes =
|
|
94
|
+
slug = "progen2"
|
|
95
|
+
action_classes = (GenerateAction,)
|
|
96
|
+
seq_classes = ()
|
|
100
97
|
batch_size = 1
|
biolmai/const.py
CHANGED
|
@@ -1,27 +1,29 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import multiprocessing
|
|
2
|
+
import os
|
|
3
3
|
|
|
4
4
|
cpu_count = multiprocessing.cpu_count()
|
|
5
5
|
max_threads = cpu_count * 4
|
|
6
6
|
|
|
7
|
-
if os.environ.get(
|
|
7
|
+
if os.environ.get("BIOLMAI_LOCAL", False):
|
|
8
8
|
# For local development and tests only
|
|
9
|
-
BASE_DOMAIN =
|
|
9
|
+
BASE_DOMAIN = "http://localhost:8000"
|
|
10
10
|
else:
|
|
11
|
-
BASE_DOMAIN =
|
|
11
|
+
BASE_DOMAIN = "https://biolm.ai"
|
|
12
12
|
|
|
13
|
-
USER_BIOLM_DIR = os.path.join(os.path.expanduser(
|
|
14
|
-
ACCESS_TOK_PATH = os.path.join(USER_BIOLM_DIR,
|
|
15
|
-
GEN_TOKEN_URL = f
|
|
16
|
-
MULTIPROCESS_THREADS = os.environ.get(
|
|
13
|
+
USER_BIOLM_DIR = os.path.join(os.path.expanduser("~"), ".biolmai")
|
|
14
|
+
ACCESS_TOK_PATH = os.path.join(USER_BIOLM_DIR, "credentials")
|
|
15
|
+
GEN_TOKEN_URL = f"{BASE_DOMAIN}/ui/accounts/user-api-tokens/"
|
|
16
|
+
MULTIPROCESS_THREADS = os.environ.get("BIOLMAI_THREADS", 1)
|
|
17
17
|
if isinstance(MULTIPROCESS_THREADS, str) and not MULTIPROCESS_THREADS:
|
|
18
18
|
MULTIPROCESS_THREADS = 1
|
|
19
19
|
if int(MULTIPROCESS_THREADS) > max_threads or int(MULTIPROCESS_THREADS) > 128:
|
|
20
|
-
err =
|
|
21
|
-
|
|
20
|
+
err = (
|
|
21
|
+
f"Maximum threads allowed is 4x number of CPU cores ("
|
|
22
|
+
f"{max_threads}) or 128, whichever is lower."
|
|
23
|
+
)
|
|
22
24
|
err += " Please update environment variable BIOLMAI_THREADS."
|
|
23
25
|
raise ValueError(err)
|
|
24
26
|
elif int(MULTIPROCESS_THREADS) <= 0:
|
|
25
27
|
err = "Environment variable BIOLMAI_THREADS must be a positive integer."
|
|
26
28
|
raise ValueError(err)
|
|
27
|
-
BASE_API_URL = f
|
|
29
|
+
BASE_API_URL = f"{BASE_DOMAIN}/api/v1"
|
biolmai/payloads.py
CHANGED
|
@@ -1,29 +1,28 @@
|
|
|
1
1
|
def INST_DAT_TXT(batch, include_batch_size=False):
|
|
2
2
|
d = {"instances": []}
|
|
3
|
-
for
|
|
3
|
+
for _, row in batch.iterrows():
|
|
4
4
|
inst = {"data": {"text": row.text}}
|
|
5
|
-
d[
|
|
5
|
+
d["instances"].append(inst)
|
|
6
6
|
if include_batch_size is True:
|
|
7
|
-
d[
|
|
7
|
+
d["batch_size"] = len(d["instances"])
|
|
8
8
|
return d
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def predict_resp_many_in_one_to_many_singles(
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
def predict_resp_many_in_one_to_many_singles(
|
|
12
|
+
resp_json, status_code, batch_id, local_err, batch_size
|
|
13
|
+
):
|
|
14
|
+
expected_root_key = "predictions"
|
|
14
15
|
to_ret = []
|
|
15
16
|
if not local_err and status_code and status_code == 200:
|
|
16
17
|
list_of_individual_seq_results = resp_json[expected_root_key]
|
|
17
18
|
elif local_err:
|
|
18
|
-
list_of_individual_seq_results = [{
|
|
19
|
+
list_of_individual_seq_results = [{"error": resp_json}]
|
|
19
20
|
elif status_code and status_code != 200 and isinstance(resp_json, dict):
|
|
20
21
|
list_of_individual_seq_results = [resp_json] * batch_size
|
|
21
22
|
else:
|
|
22
23
|
raise ValueError("Unexpected response in parser")
|
|
23
24
|
for idx, item in enumerate(list_of_individual_seq_results):
|
|
24
|
-
d = {
|
|
25
|
-
'batch_id': batch_id,
|
|
26
|
-
'batch_item': idx}
|
|
25
|
+
d = {"status_code": status_code, "batch_id": batch_id, "batch_item": idx}
|
|
27
26
|
if not status_code or status_code != 200:
|
|
28
27
|
d.update(item) # Put all resp keys at root there
|
|
29
28
|
else:
|
biolmai/validate.py
CHANGED
|
@@ -1,28 +1,47 @@
|
|
|
1
1
|
import re
|
|
2
2
|
|
|
3
|
-
UNAMBIGUOUS_AA = (
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
UNAMBIGUOUS_AA = (
|
|
4
|
+
"A",
|
|
5
|
+
"C",
|
|
6
|
+
"D",
|
|
7
|
+
"E",
|
|
8
|
+
"F",
|
|
9
|
+
"G",
|
|
10
|
+
"H",
|
|
11
|
+
"I",
|
|
12
|
+
"K",
|
|
13
|
+
"L",
|
|
14
|
+
"M",
|
|
15
|
+
"N",
|
|
16
|
+
"P",
|
|
17
|
+
"Q",
|
|
18
|
+
"R",
|
|
19
|
+
"S",
|
|
20
|
+
"T",
|
|
21
|
+
"V",
|
|
22
|
+
"W",
|
|
23
|
+
"Y",
|
|
24
|
+
)
|
|
25
|
+
AAs = "".join(UNAMBIGUOUS_AA)
|
|
7
26
|
# Let's use extended list for ESM-1v
|
|
8
|
-
AAs_EXTENDED =
|
|
27
|
+
AAs_EXTENDED = "ACDEFGHIKLMNPQRSTVWYBXZJUO"
|
|
9
28
|
|
|
10
29
|
|
|
11
|
-
UNAMBIGUOUS_DNA = (
|
|
12
|
-
AMBIGUOUS_DNA = (
|
|
30
|
+
UNAMBIGUOUS_DNA = ("A", "C", "T", "G")
|
|
31
|
+
AMBIGUOUS_DNA = ("A", "C", "T", "G", "X", "N", "U")
|
|
13
32
|
|
|
14
33
|
|
|
15
34
|
regexes = {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
35
|
+
"empty_or_unambiguous_aa_validator": re.compile(f"^[{AAs}]*$"),
|
|
36
|
+
"empty_or_unambiguous_dna_validator": re.compile(r"^[ACGT]*$"),
|
|
37
|
+
"extended_aa_validator": re.compile(f"^[{AAs_EXTENDED}]+$"),
|
|
38
|
+
"unambiguous_aa_validator": re.compile(f"^[{AAs}]+$"),
|
|
39
|
+
"unambiguous_dna_validator": re.compile(r"^[ACGT]+$"),
|
|
21
40
|
}
|
|
22
41
|
|
|
23
42
|
|
|
24
43
|
def empty_or_unambiguous_aa_validator(txt):
|
|
25
|
-
r = regexes[
|
|
44
|
+
r = regexes["empty_or_unambiguous_aa_validator"]
|
|
26
45
|
if not bool(r.match(txt)):
|
|
27
46
|
err = f"Residues can only be represented with '{AAs}' characters"
|
|
28
47
|
raise AssertionError(err)
|
|
@@ -30,36 +49,40 @@ def empty_or_unambiguous_aa_validator(txt):
|
|
|
30
49
|
|
|
31
50
|
|
|
32
51
|
def empty_or_unambiguous_dna_validator(txt):
|
|
33
|
-
r = regexes[
|
|
52
|
+
r = regexes["empty_or_unambiguous_dna_validator"]
|
|
34
53
|
if not bool(r.match(txt)):
|
|
35
|
-
err =
|
|
54
|
+
err = "Nucleotides can only be represented with 'ACTG' characters"
|
|
36
55
|
raise AssertionError(err)
|
|
37
56
|
return txt
|
|
38
57
|
|
|
39
58
|
|
|
40
59
|
def extended_aa_validator(txt):
|
|
41
|
-
r = regexes[
|
|
60
|
+
r = regexes["extended_aa_validator"]
|
|
42
61
|
if not bool(r.match(txt)):
|
|
43
|
-
err =
|
|
44
|
-
|
|
62
|
+
err = (
|
|
63
|
+
f"Extended residues can only be represented with "
|
|
64
|
+
f"'{AAs_EXTENDED}' characters"
|
|
65
|
+
)
|
|
45
66
|
raise AssertionError(err)
|
|
46
67
|
return txt
|
|
47
68
|
|
|
48
69
|
|
|
49
70
|
def unambiguous_aa_validator(txt):
|
|
50
|
-
r = regexes[
|
|
71
|
+
r = regexes["unambiguous_aa_validator"]
|
|
51
72
|
if not bool(r.match(txt)):
|
|
52
|
-
err =
|
|
53
|
-
|
|
73
|
+
err = (
|
|
74
|
+
f"Unambiguous residues can only be represented with '{AAs}' " f"characters"
|
|
75
|
+
)
|
|
54
76
|
raise AssertionError(err)
|
|
55
77
|
return txt
|
|
56
78
|
|
|
57
79
|
|
|
58
80
|
def unambiguous_dna_validator(txt):
|
|
59
|
-
r = regexes[
|
|
81
|
+
r = regexes["unambiguous_dna_validator"]
|
|
60
82
|
if not bool(r.match(txt)):
|
|
61
|
-
err =
|
|
62
|
-
|
|
83
|
+
err = (
|
|
84
|
+
"Unambiguous nucleotides can only be represented with 'ACTG' " "characters"
|
|
85
|
+
)
|
|
63
86
|
raise AssertionError(err)
|
|
64
87
|
return txt
|
|
65
88
|
|
|
@@ -70,7 +93,9 @@ class UnambiguousAA:
|
|
|
70
93
|
|
|
71
94
|
|
|
72
95
|
class UnambiguousAAPlusExtra:
|
|
73
|
-
def __init__(self, extra=
|
|
96
|
+
def __init__(self, extra=None):
|
|
97
|
+
if extra is None:
|
|
98
|
+
extra = []
|
|
74
99
|
self.extra = extra
|
|
75
100
|
assert len(extra) > 0
|
|
76
101
|
assert isinstance(extra, list)
|
|
@@ -78,12 +103,14 @@ class UnambiguousAAPlusExtra:
|
|
|
78
103
|
def __call__(self, value):
|
|
79
104
|
txt_clean = value
|
|
80
105
|
for ex in self.extra:
|
|
81
|
-
txt_clean = value.replace(ex,
|
|
106
|
+
txt_clean = value.replace(ex, "")
|
|
82
107
|
_ = unambiguous_aa_validator(txt_clean)
|
|
83
108
|
|
|
84
109
|
|
|
85
110
|
class ExtendedAAPlusExtra:
|
|
86
|
-
def __init__(self, extra=
|
|
111
|
+
def __init__(self, extra=None):
|
|
112
|
+
if extra is None:
|
|
113
|
+
extra = []
|
|
87
114
|
self.extra = extra
|
|
88
115
|
assert len(extra) > 0
|
|
89
116
|
assert isinstance(extra, list)
|
|
@@ -91,7 +118,7 @@ class ExtendedAAPlusExtra:
|
|
|
91
118
|
def __call__(self, value):
|
|
92
119
|
txt_clean = value
|
|
93
120
|
for ex in self.extra:
|
|
94
|
-
txt_clean = value.replace(ex,
|
|
121
|
+
txt_clean = value.replace(ex, "")
|
|
95
122
|
_ = extended_aa_validator(txt_clean)
|
|
96
123
|
|
|
97
124
|
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
biolmai/__init__.py,sha256=lJ7PiA_IyjKhz3dI8nrnqy8S_wqAHtEM3iN3v3eArr0,136
|
|
2
|
+
biolmai/api.py,sha256=3DcXeTFwXdn2KpHrGPxFGN6bvzdFjK6_4KUZuaRe64w,10974
|
|
3
|
+
biolmai/asynch.py,sha256=ZLCiNdGDR2XvijM6jFB2IFl3bG7ROp4PxKbo1rI5s7A,8698
|
|
4
|
+
biolmai/auth.py,sha256=flI9KAD90qdXyLDnpJTrc9voKsiK0uWtD2ehsPBn8r4,6329
|
|
5
|
+
biolmai/biolmai.py,sha256=xwjAvuw6AtmQdkRf_usSGUZ-k2oU-fjl82_WAgfSvVE,74
|
|
6
|
+
biolmai/cli.py,sha256=bdb4q8QlN73A6Ttz0e-dBIwoct7PYqy5WSc52jCMIyU,1967
|
|
7
|
+
biolmai/cls.py,sha256=yacZIwDyDq3sgU3FSc-l8uld83lkwSTh4wiS-vGNT4I,2425
|
|
8
|
+
biolmai/const.py,sha256=kbpmBEm-bw7lhGIJcMFeq1pfsIYeRk01_JwBufjupXc,1111
|
|
9
|
+
biolmai/ltc.py,sha256=al7HZc5tLyUR5fmpIb95hOz5ctudVsc0xzjd_c2Ew3M,49
|
|
10
|
+
biolmai/payloads.py,sha256=WmFN9JUojbrdvd_By8WWURS6Gm5Bh1fPYK0UjLDCbzU,1356
|
|
11
|
+
biolmai/validate.py,sha256=QdPDuZodHn85p1Y7KGkxCDMuRcXBOzAB9lkNZpigw9g,3311
|
|
12
|
+
biolmai-0.1.7.dist-info/AUTHORS.rst,sha256=TB_ACuFPgVmxn1NspYwksTdT6jdZeShcxfafmi-XWKQ,158
|
|
13
|
+
biolmai-0.1.7.dist-info/LICENSE,sha256=8yt0SdP38I7a3g0zWqZjNe0VSDQhJA4bWLQSqqKtAVg,583
|
|
14
|
+
biolmai-0.1.7.dist-info/METADATA,sha256=S2JBm8gzzRm_Xsb0aY3LozcW9TSocbqFLZd8BsA7gQw,1929
|
|
15
|
+
biolmai-0.1.7.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
|
16
|
+
biolmai-0.1.7.dist-info/entry_points.txt,sha256=ylQnDpCYrxF1F9z_T7NRQcYMWYF5ia_KsTUuboxjEAM,44
|
|
17
|
+
biolmai-0.1.7.dist-info/top_level.txt,sha256=jyQO45JN3g_jbdI8WqMnb0aEIzf4h1MrmPAZkKgfnwY,8
|
|
18
|
+
biolmai-0.1.7.dist-info/RECORD,,
|