biolmai 0.1.4__py2.py3-none-any.whl → 0.1.7__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biolmai might be problematic. Click here for more details.
- biolmai/__init__.py +3 -11
- biolmai/api.py +163 -247
- biolmai/asynch.py +90 -53
- biolmai/auth.py +75 -29
- biolmai/biolmai.py +1 -149
- biolmai/cli.py +30 -22
- biolmai/cls.py +96 -0
- biolmai/const.py +13 -11
- biolmai/payloads.py +28 -3
- biolmai/validate.py +55 -28
- {biolmai-0.1.4.dist-info → biolmai-0.1.7.dist-info}/METADATA +1 -1
- biolmai-0.1.7.dist-info/RECORD +18 -0
- {biolmai-0.1.4.dist-info → biolmai-0.1.7.dist-info}/WHEEL +1 -1
- biolmai-0.1.4.dist-info/RECORD +0 -18
- {biolmai-0.1.4.dist-info → biolmai-0.1.7.dist-info}/AUTHORS.rst +0 -0
- {biolmai-0.1.4.dist-info → biolmai-0.1.7.dist-info}/LICENSE +0 -0
- {biolmai-0.1.4.dist-info → biolmai-0.1.7.dist-info}/entry_points.txt +0 -0
- {biolmai-0.1.4.dist-info → biolmai-0.1.7.dist-info}/top_level.txt +0 -0
biolmai/asynch.py
CHANGED
|
@@ -1,23 +1,15 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from asyncio import create_task, gather, run
|
|
3
|
+
from itertools import zip_longest
|
|
4
|
+
from typing import Dict, List
|
|
5
|
+
|
|
1
6
|
import aiohttp.resolver
|
|
7
|
+
from aiohttp import ClientSession
|
|
2
8
|
|
|
9
|
+
from biolmai.auth import get_user_auth_header
|
|
3
10
|
from biolmai.const import BASE_API_URL, MULTIPROCESS_THREADS
|
|
4
11
|
|
|
5
12
|
aiohttp.resolver.DefaultResolver = aiohttp.resolver.AsyncResolver
|
|
6
|
-
from aiohttp import ClientSession, TCPConnector
|
|
7
|
-
from typing import List
|
|
8
|
-
import json
|
|
9
|
-
import asyncio
|
|
10
|
-
|
|
11
|
-
from asyncio import create_task, gather, run, sleep
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
async def get_one(session: ClientSession, slug: str, action: str,
|
|
16
|
-
payload: dict, response_key: str):
|
|
17
|
-
pass
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
from aiohttp import ClientSession
|
|
21
13
|
|
|
22
14
|
|
|
23
15
|
async def get_one(session: ClientSession, url: str) -> None:
|
|
@@ -30,25 +22,31 @@ async def get_one(session: ClientSession, url: str) -> None:
|
|
|
30
22
|
return text_resp
|
|
31
23
|
|
|
32
24
|
|
|
33
|
-
async def get_one_biolm(
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
25
|
+
async def get_one_biolm(
|
|
26
|
+
session: ClientSession,
|
|
27
|
+
url: str,
|
|
28
|
+
pload: dict,
|
|
29
|
+
headers: dict,
|
|
30
|
+
response_key: str = None,
|
|
31
|
+
) -> None:
|
|
38
32
|
print("Requesting", url)
|
|
39
|
-
pload_batch = pload.pop(
|
|
40
|
-
pload_batch_size = pload.pop(
|
|
33
|
+
pload_batch = pload.pop("batch")
|
|
34
|
+
pload_batch_size = pload.pop("batch_size")
|
|
41
35
|
t = aiohttp.ClientTimeout(
|
|
42
|
-
total=
|
|
43
|
-
# total timeout (time consists connection establishment for
|
|
36
|
+
total=1600, # 27 mins
|
|
37
|
+
# total timeout (time consists connection establishment for
|
|
38
|
+
# a new connection or waiting for a free connection from a
|
|
39
|
+
# pool if pool connection limits are exceeded) default value
|
|
40
|
+
# is 5 minutes, set to `None` or `0` for unlimited timeout
|
|
44
41
|
sock_connect=None,
|
|
45
|
-
# Maximal number of seconds for connecting to a peer for a
|
|
42
|
+
# Maximal number of seconds for connecting to a peer for a
|
|
43
|
+
# new connection, not given from a pool. See also connect.
|
|
46
44
|
sock_read=None
|
|
47
45
|
# Maximal number of seconds for reading a portion of data from a peer
|
|
48
46
|
)
|
|
49
47
|
async with session.post(url, headers=headers, json=pload, timeout=t) as resp:
|
|
50
48
|
resp_json = await resp.json()
|
|
51
|
-
resp_json[
|
|
49
|
+
resp_json["batch"] = pload_batch
|
|
52
50
|
status_code = resp.status
|
|
53
51
|
expected_root_key = response_key
|
|
54
52
|
to_ret = []
|
|
@@ -61,9 +59,7 @@ async def get_one_biolm(session: ClientSession,
|
|
|
61
59
|
else:
|
|
62
60
|
raise ValueError("Unexpected response in parser")
|
|
63
61
|
for idx, item in enumerate(list_of_individual_seq_results):
|
|
64
|
-
d = {
|
|
65
|
-
'batch_id': pload_batch,
|
|
66
|
-
'batch_item': idx}
|
|
62
|
+
d = {"status_code": status_code, "batch_id": pload_batch, "batch_item": idx}
|
|
67
63
|
if not status_code or status_code != 200:
|
|
68
64
|
d.update(item) # Put all resp keys at root there
|
|
69
65
|
else:
|
|
@@ -77,16 +73,15 @@ async def get_one_biolm(session: ClientSession,
|
|
|
77
73
|
# await sleep(2) # for demo purposes
|
|
78
74
|
# text_resp = text.strip().split("\n", 1)[0]
|
|
79
75
|
# print("Got response from", url, text_resp)
|
|
80
|
-
return j
|
|
81
76
|
|
|
82
77
|
|
|
83
78
|
async def async_range(count):
|
|
84
79
|
for i in range(count):
|
|
85
|
-
yield(i)
|
|
80
|
+
yield (i)
|
|
86
81
|
await asyncio.sleep(0.0)
|
|
87
82
|
|
|
88
83
|
|
|
89
|
-
async def get_all(urls: List[str], num_concurrent: int) ->
|
|
84
|
+
async def get_all(urls: List[str], num_concurrent: int) -> list:
|
|
90
85
|
url_iterator = iter(urls)
|
|
91
86
|
keep_going = True
|
|
92
87
|
results = []
|
|
@@ -106,22 +101,26 @@ async def get_all(urls: List[str], num_concurrent: int) -> List:
|
|
|
106
101
|
return results
|
|
107
102
|
|
|
108
103
|
|
|
109
|
-
async def get_all_biolm(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
104
|
+
async def get_all_biolm(
|
|
105
|
+
url: str,
|
|
106
|
+
ploads: List[Dict],
|
|
107
|
+
headers: dict,
|
|
108
|
+
num_concurrent: int,
|
|
109
|
+
response_key: str = None,
|
|
110
|
+
) -> list:
|
|
114
111
|
ploads_iterator = iter(ploads)
|
|
115
112
|
keep_going = True
|
|
116
113
|
results = []
|
|
117
|
-
connector = aiohttp.TCPConnector(limit=100,
|
|
118
|
-
limit_per_host=50,
|
|
119
|
-
ttl_dns_cache=60)
|
|
114
|
+
connector = aiohttp.TCPConnector(limit=100, limit_per_host=50, ttl_dns_cache=60)
|
|
120
115
|
ov_tout = aiohttp.ClientTimeout(
|
|
121
116
|
total=None,
|
|
122
|
-
# total timeout (time consists connection establishment for
|
|
117
|
+
# total timeout (time consists connection establishment for
|
|
118
|
+
# a new connection or waiting for a free connection from a
|
|
119
|
+
# pool if pool connection limits are exceeded) default value
|
|
120
|
+
# is 5 minutes, set to `None` or `0` for unlimited timeout
|
|
123
121
|
sock_connect=None,
|
|
124
|
-
# Maximal number of seconds for connecting to a peer for a
|
|
122
|
+
# Maximal number of seconds for connecting to a peer for a
|
|
123
|
+
# new connection, not given from a pool. See also connect.
|
|
125
124
|
sock_read=None
|
|
126
125
|
# Maximal number of seconds for reading a portion of data from a peer
|
|
127
126
|
)
|
|
@@ -134,35 +133,31 @@ async def get_all_biolm(url: str,
|
|
|
134
133
|
except StopIteration:
|
|
135
134
|
keep_going = False
|
|
136
135
|
break
|
|
137
|
-
new_task = create_task(
|
|
138
|
-
|
|
136
|
+
new_task = create_task(
|
|
137
|
+
get_one_biolm(session, url, pload, headers, response_key)
|
|
138
|
+
)
|
|
139
139
|
tasks.append(new_task)
|
|
140
140
|
res = await gather(*tasks)
|
|
141
141
|
results.extend(res)
|
|
142
142
|
return results
|
|
143
143
|
|
|
144
144
|
|
|
145
|
-
async def async_main(urls, concurrency) ->
|
|
145
|
+
async def async_main(urls, concurrency) -> list:
|
|
146
146
|
return await get_all(urls, concurrency)
|
|
147
147
|
|
|
148
148
|
|
|
149
|
-
async def async_api_calls(model_name,
|
|
150
|
-
action,
|
|
151
|
-
headers,
|
|
152
|
-
payloads,
|
|
153
|
-
response_key=None):
|
|
149
|
+
async def async_api_calls(model_name, action, headers, payloads, response_key=None):
|
|
154
150
|
"""Hit an arbitrary BioLM model inference API."""
|
|
155
151
|
# Normally would POST multiple sequences at once for greater efficiency,
|
|
156
152
|
# but for simplicity sake will do one at at time right now
|
|
157
|
-
url = f
|
|
153
|
+
url = f"{BASE_API_URL}/models/{model_name}/{action}/"
|
|
158
154
|
|
|
159
155
|
if not isinstance(payloads, (list, dict)):
|
|
160
156
|
err = "API request payload must be a list or dict, got {}"
|
|
161
157
|
raise AssertionError(err.format(type(payloads)))
|
|
162
158
|
|
|
163
159
|
concurrency = int(MULTIPROCESS_THREADS)
|
|
164
|
-
return await get_all_biolm(url, payloads, headers, concurrency,
|
|
165
|
-
response_key)
|
|
160
|
+
return await get_all_biolm(url, payloads, headers, concurrency, response_key)
|
|
166
161
|
|
|
167
162
|
# payload = json.dumps(payload)
|
|
168
163
|
# session = requests_retry_session()
|
|
@@ -183,3 +178,45 @@ async def async_api_calls(model_name,
|
|
|
183
178
|
# headers = get_user_auth_header() # Need to re-get these now
|
|
184
179
|
# response = retry_minutes(session, url, headers, payload, tout, mins=10)
|
|
185
180
|
# return response
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def async_api_call_wrapper(grouped_df, slug, action, payload_maker, response_key):
|
|
184
|
+
"""Wrap API calls to assist with sequence validation as a pre-cursor to
|
|
185
|
+
each API call.
|
|
186
|
+
"""
|
|
187
|
+
model_name = slug
|
|
188
|
+
# payload = payload_maker(grouped_df)
|
|
189
|
+
init_ploads = grouped_df.groupby("batch").apply(
|
|
190
|
+
payload_maker, include_batch_size=True
|
|
191
|
+
)
|
|
192
|
+
ploads = init_ploads.to_list()
|
|
193
|
+
init_ploads = init_ploads.to_frame(name="pload")
|
|
194
|
+
init_ploads["batch"] = init_ploads.index
|
|
195
|
+
init_ploads = init_ploads.reset_index(drop=True)
|
|
196
|
+
assert len(ploads) == init_ploads.shape[0]
|
|
197
|
+
for inst, b in zip_longest(ploads, init_ploads["batch"].to_list()):
|
|
198
|
+
if inst is None or b is None:
|
|
199
|
+
raise ValueError(
|
|
200
|
+
"ploads and init_ploads['batch'] are not of the same length"
|
|
201
|
+
)
|
|
202
|
+
inst["batch"] = b
|
|
203
|
+
|
|
204
|
+
headers = get_user_auth_header() # Need to pull each time
|
|
205
|
+
# urls = [
|
|
206
|
+
# "https://github.com",
|
|
207
|
+
# "https://stackoverflow.com",
|
|
208
|
+
# "https://python.org",
|
|
209
|
+
# ]
|
|
210
|
+
# concurrency = 3
|
|
211
|
+
api_resp = run(async_api_calls(model_name, action, headers, ploads, response_key))
|
|
212
|
+
api_resp = [item for sublist in api_resp for item in sublist]
|
|
213
|
+
api_resp = sorted(api_resp, key=lambda x: x["batch_id"])
|
|
214
|
+
# print(api_resp)
|
|
215
|
+
# api_resp = biolmai.api_call(model_name, action, headers, payload,
|
|
216
|
+
# response_key)
|
|
217
|
+
# resp_json = api_resp.json()
|
|
218
|
+
# batch_id = int(grouped_df.batch.iloc[0])
|
|
219
|
+
# batch_size = grouped_df.shape[0]
|
|
220
|
+
# response = predict_resp_many_in_one_to_many_singles(
|
|
221
|
+
# resp_json, api_resp.status_code, batch_id, None, batch_size)
|
|
222
|
+
return api_resp
|
biolmai/auth.py
CHANGED
|
@@ -6,20 +6,19 @@ import stat
|
|
|
6
6
|
import click
|
|
7
7
|
import requests
|
|
8
8
|
|
|
9
|
-
from biolmai.const import ACCESS_TOK_PATH, BASE_DOMAIN, GEN_TOKEN_URL,
|
|
10
|
-
USER_BIOLM_DIR
|
|
9
|
+
from biolmai.const import ACCESS_TOK_PATH, BASE_DOMAIN, GEN_TOKEN_URL, USER_BIOLM_DIR
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
def validate_user_auth(api_token=None, access=None, refresh=None):
|
|
14
13
|
"""Validates an API token, to be used as 'Authorization: Token 1235abc'
|
|
15
14
|
authentication method."""
|
|
16
|
-
url = f
|
|
15
|
+
url = f"{BASE_DOMAIN}/api/v1/auth/login-check/"
|
|
17
16
|
if api_token is not None:
|
|
18
|
-
headers = {
|
|
17
|
+
headers = {"Authorization": f"Token {api_token}"}
|
|
19
18
|
else:
|
|
20
19
|
headers = {
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
"Cookie": f"access={access};refresh={refresh}",
|
|
21
|
+
"Content-Type": "application/json",
|
|
23
22
|
}
|
|
24
23
|
try:
|
|
25
24
|
r = requests.post(url=url, headers=headers)
|
|
@@ -36,28 +35,25 @@ def validate_user_auth(api_token=None, access=None, refresh=None):
|
|
|
36
35
|
def refresh_access_token(refresh):
|
|
37
36
|
"""Attempt to refresh temporary user access token, by using their refresh
|
|
38
37
|
token, which has a longer TTL."""
|
|
39
|
-
url = f
|
|
40
|
-
headers = {
|
|
41
|
-
'Cookie': 'refresh={}'.format(refresh),
|
|
42
|
-
'Content-Type': 'application/json'
|
|
43
|
-
}
|
|
38
|
+
url = f"{BASE_DOMAIN}/api/auth/token/refresh/"
|
|
39
|
+
headers = {"Cookie": f"refresh={refresh}", "Content-Type": "application/json"}
|
|
44
40
|
r = requests.post(url=url, headers=headers)
|
|
45
41
|
json_response = r.json()
|
|
46
|
-
if r.status_code != 200 or (r.status_code == 200 and
|
|
42
|
+
if r.status_code != 200 or (r.status_code == 200 and "code" in r.json()):
|
|
47
43
|
pretty_json = pprint.pformat(json_response, indent=2)
|
|
48
44
|
click.echo(pretty_json)
|
|
49
|
-
click.echo(
|
|
50
|
-
|
|
45
|
+
click.echo(
|
|
46
|
+
"Token refresh failed! Please login by " "running `biolmai login`.\n"
|
|
47
|
+
)
|
|
51
48
|
return False
|
|
52
49
|
else:
|
|
53
|
-
access_refresh_dict = {
|
|
54
|
-
'refresh': refresh}
|
|
50
|
+
access_refresh_dict = {"access": json_response["access"], "refresh": refresh}
|
|
55
51
|
save_access_refresh_token(access_refresh_dict)
|
|
56
52
|
return True
|
|
57
53
|
|
|
58
54
|
|
|
59
55
|
def get_auth_status():
|
|
60
|
-
environ_token = os.environ.get(
|
|
56
|
+
environ_token = os.environ.get("BIOLMAI_TOKEN", None)
|
|
61
57
|
if environ_token:
|
|
62
58
|
msg = "Environment variable BIOLMAI_TOKEN detected. Validating token..."
|
|
63
59
|
click.echo(msg)
|
|
@@ -65,12 +61,14 @@ def get_auth_status():
|
|
|
65
61
|
elif os.path.exists(ACCESS_TOK_PATH):
|
|
66
62
|
msg = f"Credentials file found {ACCESS_TOK_PATH}. Validating token..."
|
|
67
63
|
click.echo(msg)
|
|
68
|
-
with open(ACCESS_TOK_PATH
|
|
64
|
+
with open(ACCESS_TOK_PATH) as f:
|
|
69
65
|
access_refresh_dict = json.load(f)
|
|
70
|
-
access = access_refresh_dict.get(
|
|
71
|
-
refresh = access_refresh_dict.get(
|
|
66
|
+
access = access_refresh_dict.get("access")
|
|
67
|
+
refresh = access_refresh_dict.get("refresh")
|
|
72
68
|
resp = validate_user_auth(access=access, refresh=refresh)
|
|
73
|
-
if resp.status_code != 200 or (
|
|
69
|
+
if resp.status_code != 200 or (
|
|
70
|
+
resp.status_code == 200 and "code" in resp.json()
|
|
71
|
+
):
|
|
74
72
|
click.echo("Access token validation failed. Attempting to refresh token...")
|
|
75
73
|
# Attempt to use the 'refresh' token to get a new 'access' token
|
|
76
74
|
if not refresh_access_token(refresh):
|
|
@@ -78,9 +76,11 @@ def get_auth_status():
|
|
|
78
76
|
else:
|
|
79
77
|
click.echo("Access token refresh was successful.")
|
|
80
78
|
else:
|
|
81
|
-
msg =
|
|
82
|
-
|
|
83
|
-
|
|
79
|
+
msg = (
|
|
80
|
+
f"No https://biolm.ai credentials found. Please "
|
|
81
|
+
f"set the environment variable BIOLMAI_TOKEN to a token from "
|
|
82
|
+
f"{GEN_TOKEN_URL}, or login by running `biolmai login`."
|
|
83
|
+
)
|
|
84
84
|
click.echo(msg)
|
|
85
85
|
|
|
86
86
|
|
|
@@ -95,9 +95,9 @@ def generate_access_token(uname, password):
|
|
|
95
95
|
more permanent auth method for the API, use an API token by setting the
|
|
96
96
|
BIOLMAI_TOKEN environment variable.
|
|
97
97
|
"""
|
|
98
|
-
url = f
|
|
98
|
+
url = f"{BASE_DOMAIN}/api/auth/token/"
|
|
99
99
|
try:
|
|
100
|
-
r = requests.post(url=url, data={
|
|
100
|
+
r = requests.post(url=url, data={"username": uname, "password": password})
|
|
101
101
|
json_response = r.json()
|
|
102
102
|
except Exception:
|
|
103
103
|
click.echo("Login failed!\n")
|
|
@@ -118,10 +118,56 @@ def save_access_refresh_token(access_refresh_dict):
|
|
|
118
118
|
use."""
|
|
119
119
|
os.makedirs(USER_BIOLM_DIR, exist_ok=True)
|
|
120
120
|
# Save token
|
|
121
|
-
with open(ACCESS_TOK_PATH,
|
|
121
|
+
with open(ACCESS_TOK_PATH, "w") as f:
|
|
122
122
|
json.dump(access_refresh_dict, f)
|
|
123
123
|
os.chmod(ACCESS_TOK_PATH, stat.S_IRUSR | stat.S_IWUSR)
|
|
124
124
|
# Validate token and print user info
|
|
125
|
-
access = access_refresh_dict.get(
|
|
126
|
-
refresh = access_refresh_dict.get(
|
|
125
|
+
access = access_refresh_dict.get("access")
|
|
126
|
+
refresh = access_refresh_dict.get("refresh")
|
|
127
127
|
validate_user_auth(access=access, refresh=refresh)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_api_token():
|
|
131
|
+
"""Get a BioLM API token to use with future API requests.
|
|
132
|
+
|
|
133
|
+
Copied from https://api.biolm.ai/#d7f87dfd-321f-45ae-99b6-eb203519ddeb.
|
|
134
|
+
"""
|
|
135
|
+
url = "https://biolm.ai/api/auth/token/"
|
|
136
|
+
|
|
137
|
+
payload = json.dumps(
|
|
138
|
+
{
|
|
139
|
+
"username": os.environ.get("BIOLM_USER"),
|
|
140
|
+
"password": os.environ.get("BIOLM_PASSWORD"),
|
|
141
|
+
}
|
|
142
|
+
)
|
|
143
|
+
headers = {"Content-Type": "application/json"}
|
|
144
|
+
|
|
145
|
+
response = requests.request("POST", url, headers=headers, data=payload)
|
|
146
|
+
response_json = response.json()
|
|
147
|
+
|
|
148
|
+
return response_json
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def get_user_auth_header():
|
|
152
|
+
"""Returns a dict with the appropriate Authorization header, either using
|
|
153
|
+
an API token from BIOLMAI_TOKEN environment variable, or by reading the
|
|
154
|
+
credentials file at ~/.biolmai/credntials next."""
|
|
155
|
+
api_token = os.environ.get("BIOLMAI_TOKEN", None)
|
|
156
|
+
if api_token:
|
|
157
|
+
headers = {"Authorization": f"Token {api_token}"}
|
|
158
|
+
elif os.path.exists(ACCESS_TOK_PATH):
|
|
159
|
+
with open(ACCESS_TOK_PATH) as f:
|
|
160
|
+
access_refresh_dict = json.load(f)
|
|
161
|
+
access = access_refresh_dict.get("access")
|
|
162
|
+
refresh = access_refresh_dict.get("refresh")
|
|
163
|
+
headers = {
|
|
164
|
+
"Cookie": f"access={access};refresh={refresh}",
|
|
165
|
+
"Content-Type": "application/json",
|
|
166
|
+
}
|
|
167
|
+
else:
|
|
168
|
+
err = (
|
|
169
|
+
"No https://biolm.ai credentials found. Please run "
|
|
170
|
+
"`biolmai status` to debug."
|
|
171
|
+
)
|
|
172
|
+
raise AssertionError(err)
|
|
173
|
+
return headers
|
biolmai/biolmai.py
CHANGED
|
@@ -1,153 +1,5 @@
|
|
|
1
1
|
"""Main module."""
|
|
2
|
-
import json
|
|
3
|
-
import os
|
|
4
|
-
import requests
|
|
5
|
-
import random
|
|
6
|
-
|
|
7
|
-
import json, os, requests
|
|
8
|
-
import urllib3
|
|
9
|
-
import datetime
|
|
10
|
-
import time
|
|
11
|
-
|
|
12
|
-
from requests.adapters import HTTPAdapter
|
|
13
|
-
from requests.packages.urllib3.util.retry import Retry
|
|
14
2
|
|
|
15
3
|
import logging
|
|
16
4
|
|
|
17
|
-
|
|
18
|
-
from biolmai.const import ACCESS_TOK_PATH, BASE_API_URL
|
|
19
|
-
|
|
20
|
-
log = logging.getLogger('biolm_util')
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def requests_retry_session(
|
|
24
|
-
retries=3,
|
|
25
|
-
backoff_factor=0.3,
|
|
26
|
-
status_forcelist=list(range(400, 599)),
|
|
27
|
-
session=None,
|
|
28
|
-
):
|
|
29
|
-
session = session or requests.Session()
|
|
30
|
-
retry = Retry(
|
|
31
|
-
total=retries,
|
|
32
|
-
read=retries,
|
|
33
|
-
connect=retries,
|
|
34
|
-
backoff_factor=backoff_factor,
|
|
35
|
-
status_forcelist=status_forcelist
|
|
36
|
-
)
|
|
37
|
-
adapter = HTTPAdapter(max_retries=retry)
|
|
38
|
-
session.mount('http://', adapter)
|
|
39
|
-
session.mount('https://', adapter)
|
|
40
|
-
return session
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def retry_minutes(sess, URL, HEADERS, dat, timeout, mins):
|
|
44
|
-
"""Retry for N minutes."""
|
|
45
|
-
HEADERS.update({'Content-Type': 'application/json'})
|
|
46
|
-
attempts, max_attempts = 0, 5
|
|
47
|
-
try:
|
|
48
|
-
now = datetime.datetime.now()
|
|
49
|
-
try_until = now + datetime.timedelta(minutes=mins)
|
|
50
|
-
while datetime.datetime.now() < try_until and attempts < max_attempts:
|
|
51
|
-
response = None
|
|
52
|
-
try:
|
|
53
|
-
log.info('Trying {}'.format(datetime.datetime.now()))
|
|
54
|
-
response = sess.post(
|
|
55
|
-
URL,
|
|
56
|
-
headers=HEADERS,
|
|
57
|
-
data=dat,
|
|
58
|
-
timeout=timeout
|
|
59
|
-
)
|
|
60
|
-
if response.status_code not in (400, 404):
|
|
61
|
-
response.raise_for_status()
|
|
62
|
-
if 'error' in response.json():
|
|
63
|
-
raise ValueError(response.json().dumps())
|
|
64
|
-
else:
|
|
65
|
-
break
|
|
66
|
-
except Exception as e:
|
|
67
|
-
log.warning(e)
|
|
68
|
-
if response:
|
|
69
|
-
log.warning(response.text)
|
|
70
|
-
time.sleep(5) # Wait 5 seconds between tries
|
|
71
|
-
attempts += 1
|
|
72
|
-
if response is None:
|
|
73
|
-
err = "Got Nonetype response"
|
|
74
|
-
raise ValueError(err)
|
|
75
|
-
elif 'Server Error' in response.text:
|
|
76
|
-
err = "Got Server Error"
|
|
77
|
-
raise ValueError(err)
|
|
78
|
-
except Exception as e:
|
|
79
|
-
return response
|
|
80
|
-
return response
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def get_user_auth_header():
|
|
84
|
-
"""Returns a dict with the appropriate Authorization header, either using
|
|
85
|
-
an API token from BIOLMAI_TOKEN environment variable, or by reading the
|
|
86
|
-
credentials file at ~/.biolmai/credntials next."""
|
|
87
|
-
api_token = os.environ.get('BIOLMAI_TOKEN', None)
|
|
88
|
-
if api_token:
|
|
89
|
-
headers = {'Authorization': f'Token {api_token}'}
|
|
90
|
-
elif os.path.exists(ACCESS_TOK_PATH):
|
|
91
|
-
with open(ACCESS_TOK_PATH, 'r') as f:
|
|
92
|
-
access_refresh_dict = json.load(f)
|
|
93
|
-
access = access_refresh_dict.get('access')
|
|
94
|
-
refresh = access_refresh_dict.get('refresh')
|
|
95
|
-
headers = {
|
|
96
|
-
'Cookie': 'access={};refresh={}'.format(access, refresh),
|
|
97
|
-
'Content-Type': 'application/json'
|
|
98
|
-
}
|
|
99
|
-
else:
|
|
100
|
-
err = "No https://biolm.ai credentials found. Please run `biolmai status` to debug."
|
|
101
|
-
raise AssertionError(err)
|
|
102
|
-
return headers
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def get_api_token():
|
|
106
|
-
"""Get a BioLM API token to use with future API requests.
|
|
107
|
-
|
|
108
|
-
Copied from https://api.biolm.ai/#d7f87dfd-321f-45ae-99b6-eb203519ddeb.
|
|
109
|
-
"""
|
|
110
|
-
url = "https://biolm.ai/api/auth/token/"
|
|
111
|
-
|
|
112
|
-
payload = json.dumps({
|
|
113
|
-
"username": os.environ.get("BIOLM_USER"),
|
|
114
|
-
"password": os.environ.get("BIOLM_PASSWORD")
|
|
115
|
-
})
|
|
116
|
-
headers = {
|
|
117
|
-
'Content-Type': 'application/json'
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
response = requests.request("POST", url, headers=headers, data=payload)
|
|
121
|
-
response_json = response.json()
|
|
122
|
-
|
|
123
|
-
return response_json
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def api_call(model_name, action, headers, payload, response_key=None):
|
|
127
|
-
"""Hit an arbitrary BioLM model inference API."""
|
|
128
|
-
# Normally would POST multiple sequences at once for greater efficiency,
|
|
129
|
-
# but for simplicity sake will do one at at time right now
|
|
130
|
-
url = f'{BASE_API_URL}/models/{model_name}/{action}/'
|
|
131
|
-
|
|
132
|
-
if not isinstance(payload, (list, dict)):
|
|
133
|
-
err = "API request payload must be a list or dict, got {}"
|
|
134
|
-
raise AssertionError(err.format(type(payload)))
|
|
135
|
-
payload = json.dumps(payload)
|
|
136
|
-
session = requests_retry_session()
|
|
137
|
-
tout = urllib3.util.Timeout(total=180, read=180)
|
|
138
|
-
response = retry_minutes(session, url, headers, payload, tout, mins=10)
|
|
139
|
-
# If token expired / invalid, attempt to refresh.
|
|
140
|
-
if response.status_code == 401 and os.path.exists(ACCESS_TOK_PATH):
|
|
141
|
-
# Add jitter to slow down in case we're multiprocessing so all threads
|
|
142
|
-
# don't try to re-authenticate at once
|
|
143
|
-
time.sleep(random.random() * 4)
|
|
144
|
-
with open(ACCESS_TOK_PATH, 'r') as f:
|
|
145
|
-
access_refresh_dict = json.load(f)
|
|
146
|
-
refresh = access_refresh_dict.get('refresh')
|
|
147
|
-
if not refresh_access_token(refresh):
|
|
148
|
-
err = "Unauthenticated! Please run `biolmai status` to debug or " \
|
|
149
|
-
"`biolmai login`."
|
|
150
|
-
raise AssertionError(err)
|
|
151
|
-
headers = get_user_auth_header() # Need to re-get these now
|
|
152
|
-
response = retry_minutes(session, url, headers, payload, tout, mins=10)
|
|
153
|
-
return response
|
|
5
|
+
log = logging.getLogger("biolm_util")
|
biolmai/cli.py
CHANGED
|
@@ -1,37 +1,43 @@
|
|
|
1
1
|
"""Console script for biolmai."""
|
|
2
|
+
import os
|
|
2
3
|
import sys
|
|
4
|
+
|
|
3
5
|
import click
|
|
4
|
-
|
|
5
|
-
from biolmai.auth import
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
|
|
7
|
+
from biolmai.auth import (
|
|
8
|
+
generate_access_token,
|
|
9
|
+
get_auth_status,
|
|
10
|
+
save_access_refresh_token,
|
|
11
|
+
)
|
|
12
|
+
from biolmai.const import ACCESS_TOK_PATH, BASE_API_URL, MULTIPROCESS_THREADS
|
|
8
13
|
|
|
9
14
|
|
|
10
15
|
@click.command()
|
|
11
16
|
def main(args=None):
|
|
12
17
|
"""Console script for biolmai."""
|
|
13
|
-
click.echo("Replace this message by putting your code into "
|
|
14
|
-
"biolmai.cli.main")
|
|
18
|
+
click.echo("Replace this message by putting your code into " "biolmai.cli.main")
|
|
15
19
|
click.echo("See click documentation at https://click.palletsprojects.com/")
|
|
16
20
|
return 0
|
|
17
21
|
|
|
18
22
|
|
|
19
23
|
@click.group()
|
|
20
|
-
@click.option(
|
|
24
|
+
@click.option("--debug/--no-debug", default=False)
|
|
21
25
|
def cli(debug):
|
|
22
26
|
pass
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
def echo_env_vars():
|
|
26
|
-
env_var_tok = os.environ.get(
|
|
30
|
+
env_var_tok = os.environ.get("BIOLMAI_TOKEN", "")[:6]
|
|
27
31
|
if env_var_tok and len(env_var_tok) == 6:
|
|
28
|
-
env_var_tok +=
|
|
29
|
-
s =
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
env_var_tok += "*****************"
|
|
33
|
+
s = "\n".join(
|
|
34
|
+
[
|
|
35
|
+
f"BIOLMAI_TOKEN={env_var_tok}",
|
|
36
|
+
f"BIOLMAI_ACCESS_CRED={ACCESS_TOK_PATH}",
|
|
37
|
+
"BIOLMAI_THREADS={}".format(MULTIPROCESS_THREADS or ""),
|
|
38
|
+
f"BIOLMAI_BASE_API_URL={BASE_API_URL}",
|
|
39
|
+
]
|
|
40
|
+
)
|
|
35
41
|
click.echo(s)
|
|
36
42
|
|
|
37
43
|
|
|
@@ -43,17 +49,19 @@ def status():
|
|
|
43
49
|
|
|
44
50
|
@cli.command()
|
|
45
51
|
def login():
|
|
46
|
-
uname = click.prompt(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
52
|
+
uname = click.prompt(
|
|
53
|
+
"Username", default=None, hide_input=False, confirmation_prompt=False, type=str
|
|
54
|
+
)
|
|
55
|
+
password = click.prompt(
|
|
56
|
+
"Password", default=None, hide_input=True, confirmation_prompt=False, type=str
|
|
57
|
+
)
|
|
50
58
|
access_refresh_tok_dict = generate_access_token(uname, password)
|
|
51
59
|
try:
|
|
52
|
-
|
|
53
|
-
|
|
60
|
+
assert access_refresh_tok_dict.get("access") is not None
|
|
61
|
+
assert access_refresh_tok_dict.get("refresh") is not None
|
|
54
62
|
click.echo("Saving new access and refresh token.")
|
|
55
63
|
save_access_refresh_token(access_refresh_tok_dict)
|
|
56
|
-
except Exception
|
|
64
|
+
except Exception:
|
|
57
65
|
click.echo("Unhandled login exception!")
|
|
58
66
|
raise
|
|
59
67
|
|