pybiolib 1.1.1629__py3-none-any.whl → 1.1.1881__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +11 -3
- biolib/_internal/data_record/__init__.py +1 -0
- biolib/_internal/data_record/data_record.py +153 -0
- biolib/_internal/data_record/remote_storage_endpoint.py +27 -0
- biolib/_internal/http_client.py +45 -15
- biolib/_internal/push_application.py +22 -37
- biolib/_internal/runtime.py +73 -0
- biolib/_internal/utils/__init__.py +18 -0
- biolib/api/client.py +12 -6
- biolib/app/app.py +6 -1
- biolib/app/search_apps.py +8 -12
- biolib/biolib_api_client/api_client.py +14 -9
- biolib/biolib_api_client/app_types.py +1 -0
- biolib/biolib_api_client/auth.py +0 -12
- biolib/biolib_api_client/biolib_app_api.py +53 -27
- biolib/biolib_api_client/biolib_job_api.py +11 -40
- biolib/biolib_binary_format/utils.py +19 -2
- biolib/cli/__init__.py +9 -3
- biolib/cli/auth.py +58 -0
- biolib/cli/data_record.py +43 -0
- biolib/cli/download_container.py +3 -1
- biolib/cli/init.py +1 -0
- biolib/cli/lfs.py +39 -9
- biolib/cli/push.py +1 -1
- biolib/cli/run.py +3 -2
- biolib/cli/start.py +1 -0
- biolib/compute_node/cloud_utils/cloud_utils.py +38 -65
- biolib/compute_node/job_worker/cache_state.py +1 -1
- biolib/compute_node/job_worker/executors/docker_executor.py +10 -8
- biolib/compute_node/job_worker/job_storage.py +9 -13
- biolib/compute_node/job_worker/job_worker.py +10 -4
- biolib/compute_node/remote_host_proxy.py +48 -11
- biolib/compute_node/webserver/worker_thread.py +2 -2
- biolib/jobs/job.py +33 -32
- biolib/lfs/__init__.py +0 -2
- biolib/lfs/utils.py +23 -115
- biolib/runtime/__init__.py +13 -1
- biolib/sdk/__init__.py +17 -4
- biolib/user/sign_in.py +8 -12
- biolib/utils/__init__.py +17 -45
- biolib/utils/app_uri.py +11 -4
- biolib/utils/cache_state.py +2 -2
- biolib/utils/multipart_uploader.py +42 -68
- biolib/utils/seq_util.py +47 -9
- biolib/utils/zip/remote_zip.py +9 -17
- {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/METADATA +1 -2
- {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/RECORD +50 -46
- {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/WHEEL +1 -1
- biolib/biolib_api_client/biolib_account_api.py +0 -21
- biolib/biolib_api_client/biolib_large_file_system_api.py +0 -53
- biolib/runtime/results.py +0 -20
- {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/entry_points.txt +0 -0
biolib/lfs/utils.py
CHANGED
@@ -1,31 +1,15 @@
|
|
1
1
|
import io
|
2
|
-
import json
|
3
2
|
import os
|
4
3
|
import zipfile as zf
|
5
|
-
from collections import namedtuple
|
6
4
|
from pathlib import Path
|
7
|
-
from struct import Struct
|
8
5
|
|
9
|
-
import
|
10
|
-
|
11
|
-
from biolib import utils
|
12
|
-
from biolib.app import BioLibApp
|
13
|
-
from biolib.biolib_api_client.biolib_account_api import BiolibAccountApi
|
14
|
-
from biolib.biolib_api_client.biolib_large_file_system_api import BiolibLargeFileSystemApi
|
6
|
+
from biolib import utils, api
|
15
7
|
from biolib.biolib_api_client import BiolibApiClient
|
8
|
+
from biolib.biolib_api_client.lfs_types import LargeFileSystem, LargeFileSystemVersion
|
16
9
|
from biolib.biolib_logging import logger
|
17
10
|
from biolib.biolib_errors import BioLibError
|
18
11
|
from biolib.typing_utils import List, Tuple, Iterator, Optional
|
19
|
-
from biolib.utils.
|
20
|
-
|
21
|
-
|
22
|
-
def _get_lfs_info_from_uri(lfs_uri):
|
23
|
-
lfs_uri_parts = lfs_uri.split('/')
|
24
|
-
lfs_uri_parts = [uri_part for uri_part in lfs_uri_parts if '@' not in uri_part] # Remove hostname
|
25
|
-
team_account_handle = lfs_uri_parts[0]
|
26
|
-
lfs_name = lfs_uri_parts[1]
|
27
|
-
account = BiolibAccountApi.fetch_by_handle(team_account_handle)
|
28
|
-
return account, lfs_name
|
12
|
+
from biolib.utils.app_uri import parse_app_uri
|
29
13
|
|
30
14
|
|
31
15
|
def get_files_and_size_of_directory(directory: str) -> Tuple[List[str], int]:
|
@@ -99,14 +83,23 @@ def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes
|
|
99
83
|
yield chunk
|
100
84
|
|
101
85
|
|
102
|
-
def create_large_file_system(lfs_uri: str):
|
86
|
+
def create_large_file_system(lfs_uri: str) -> str:
|
103
87
|
BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Large File System')
|
104
|
-
lfs_account, lfs_name = _get_lfs_info_from_uri(lfs_uri)
|
105
|
-
lfs_resource = BiolibLargeFileSystemApi.create(account_uuid=lfs_account['public_id'], name=lfs_name)
|
106
|
-
logger.info(f"Successfully created new Large File System '{lfs_resource['uri']}'")
|
107
88
|
|
89
|
+
uri_parsed = parse_app_uri(lfs_uri)
|
90
|
+
response = api.client.post(
|
91
|
+
path='/lfs/',
|
92
|
+
data={
|
93
|
+
'account_handle': uri_parsed['account_handle_normalized'],
|
94
|
+
'name': uri_parsed['app_name'],
|
95
|
+
},
|
96
|
+
)
|
97
|
+
lfs: LargeFileSystem = response.json()
|
98
|
+
logger.info(f"Successfully created new Large File System '{lfs['uri']}'")
|
99
|
+
return lfs['uri']
|
108
100
|
|
109
|
-
|
101
|
+
|
102
|
+
def push_large_file_system(lfs_uri: str, input_dir: str, chunk_size_in_mb: Optional[int] = None) -> str:
|
110
103
|
BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Large File System')
|
111
104
|
|
112
105
|
if not os.path.isdir(input_dir):
|
@@ -115,8 +108,6 @@ def push_large_file_system(lfs_uri: str, input_dir: str, chunk_size_in_mb: Optio
|
|
115
108
|
if os.path.realpath(input_dir) == '/':
|
116
109
|
raise BioLibError('Pushing your root directory is not possible')
|
117
110
|
|
118
|
-
lfs_resource = BioLibApp(lfs_uri)
|
119
|
-
|
120
111
|
original_working_dir = os.getcwd()
|
121
112
|
os.chdir(input_dir)
|
122
113
|
files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
|
@@ -138,108 +129,25 @@ def push_large_file_system(lfs_uri: str, input_dir: str, chunk_size_in_mb: Optio
|
|
138
129
|
data_size_in_mb = round(data_size_in_bytes / 10 ** 6)
|
139
130
|
print(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
|
140
131
|
|
141
|
-
|
142
|
-
|
143
|
-
|
132
|
+
response = api.client.post(path='/lfs/versions/', data={'resource_uri': lfs_uri})
|
133
|
+
lfs_version: LargeFileSystemVersion = response.json()
|
144
134
|
iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
|
145
135
|
|
146
|
-
base_url = BiolibApiClient.get().base_url
|
147
136
|
multipart_uploader = utils.MultiPartUploader(
|
148
137
|
use_process_pool=True,
|
149
138
|
get_presigned_upload_url_request=dict(
|
150
139
|
headers=None,
|
151
140
|
requires_biolib_auth=True,
|
152
|
-
|
141
|
+
path=f"/lfs/versions/{lfs_version['uuid']}/presigned_upload_url/",
|
153
142
|
),
|
154
143
|
complete_upload_request=dict(
|
155
144
|
headers=None,
|
156
145
|
requires_biolib_auth=True,
|
157
|
-
|
146
|
+
path=f"/lfs/versions/{lfs_version['uuid']}/complete_upload/",
|
158
147
|
),
|
159
148
|
)
|
160
149
|
|
161
150
|
multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
|
162
|
-
logger.info(f"Successfully pushed a new LFS version '{lfs_resource_version['uri']}'")
|
163
151
|
os.chdir(original_working_dir)
|
164
|
-
|
165
|
-
|
166
|
-
def describe_large_file_system(lfs_uri: str, output_as_json: bool = False) -> None:
|
167
|
-
BiolibApiClient.assert_is_signed_in(authenticated_action_description='describe a Large File System')
|
168
|
-
lfs_resource = BioLibApp(lfs_uri)
|
169
|
-
lfs_version = BiolibLargeFileSystemApi.fetch_version(lfs_version_uuid=lfs_resource.version['public_id'])
|
170
|
-
|
171
|
-
files = []
|
172
|
-
total_size = 0
|
173
|
-
with RemoteZip(url=lfs_version['presigned_download_url']) as remote_zip:
|
174
|
-
central_directory = remote_zip.get_central_directory()
|
175
|
-
for file in central_directory.values():
|
176
|
-
files.append(dict(path=file['filename'], size_bytes=file['file_size']))
|
177
|
-
total_size += file['file_size']
|
178
|
-
|
179
|
-
lfs_version_metadata = dict(files=files, **lfs_version)
|
180
|
-
lfs_version_metadata['size_bytes'] = total_size
|
181
|
-
|
182
|
-
if output_as_json:
|
183
|
-
print(json.dumps(lfs_version_metadata, indent=4))
|
184
|
-
else:
|
185
|
-
print(f"Large File System {lfs_version_metadata['uri']}\ntotal {lfs_version_metadata['size_bytes']} bytes\n")
|
186
|
-
print('size bytes path')
|
187
|
-
for file in files:
|
188
|
-
size_string = str(file['size_bytes'])
|
189
|
-
leading_space_string = ' ' * (10 - len(size_string))
|
190
|
-
print(f"{leading_space_string}{size_string} {file['path']}")
|
191
|
-
|
192
|
-
|
193
|
-
def get_file_data_from_large_file_system(lfs_uri: str, file_path: str) -> bytes:
|
194
|
-
BiolibApiClient.assert_is_signed_in(authenticated_action_description='get file from a Large File System')
|
195
|
-
lfs_resource = BioLibApp(lfs_uri)
|
196
|
-
lfs_version = BiolibLargeFileSystemApi.fetch_version(lfs_version_uuid=lfs_resource.version['public_id'])
|
197
|
-
lfs_url = lfs_version['presigned_download_url']
|
198
|
-
|
199
|
-
with RemoteZip(lfs_url) as remote_zip:
|
200
|
-
central_directory = remote_zip.get_central_directory()
|
201
|
-
if file_path not in central_directory:
|
202
|
-
raise Exception('File not found in Large File System')
|
203
|
-
|
204
|
-
file_info = central_directory[file_path]
|
205
|
-
|
206
|
-
local_file_header_signature_bytes = b'\x50\x4b\x03\x04'
|
207
|
-
local_file_header_struct = Struct('<H2sHHHIIIHH')
|
208
|
-
LocalFileHeader = namedtuple('LocalFileHeader', (
|
209
|
-
'version',
|
210
|
-
'flags',
|
211
|
-
'compression_raw',
|
212
|
-
'mod_time',
|
213
|
-
'mod_date',
|
214
|
-
'crc_32_expected',
|
215
|
-
'compressed_size_raw',
|
216
|
-
'uncompressed_size_raw',
|
217
|
-
'file_name_len',
|
218
|
-
'extra_field_len',
|
219
|
-
))
|
220
|
-
|
221
|
-
local_file_header_start = file_info['header_offset'] + len(local_file_header_signature_bytes)
|
222
|
-
local_file_header_end = local_file_header_start + local_file_header_struct.size
|
223
|
-
|
224
|
-
local_file_header_response = requests.get(
|
225
|
-
url=lfs_url,
|
226
|
-
stream=True,
|
227
|
-
headers={'range': f'bytes={local_file_header_start}-{local_file_header_end - 1}'},
|
228
|
-
timeout=300,
|
229
|
-
)
|
230
|
-
local_file_header_response.raise_for_status()
|
231
|
-
local_file_header_bytes: bytes = local_file_header_response.raw.data
|
232
|
-
local_file_header = LocalFileHeader._make(local_file_header_struct.unpack(local_file_header_bytes))
|
233
|
-
|
234
|
-
file_start = local_file_header_end + local_file_header.file_name_len + local_file_header.extra_field_len
|
235
|
-
file_end = file_start + file_info['file_size']
|
236
|
-
|
237
|
-
response = requests.get(
|
238
|
-
url=lfs_url,
|
239
|
-
stream=True,
|
240
|
-
headers={'range': f'bytes={file_start}-{file_end - 1}'},
|
241
|
-
timeout=300, # timeout after 5 min
|
242
|
-
)
|
243
|
-
response.raise_for_status()
|
244
|
-
data: bytes = response.raw.data
|
245
|
-
return data
|
152
|
+
logger.info(f"Successfully pushed a new LFS version '{lfs_version['uri']}'")
|
153
|
+
return lfs_version['uri']
|
biolib/runtime/__init__.py
CHANGED
@@ -1 +1,13 @@
|
|
1
|
-
|
1
|
+
import warnings
|
2
|
+
from biolib.sdk import Runtime as _Runtime
|
3
|
+
|
4
|
+
|
5
|
+
def set_main_result_prefix(result_prefix: str) -> None:
|
6
|
+
warnings.warn(
|
7
|
+
'The "biolib.runtime.set_main_result_prefix" function is deprecated. '
|
8
|
+
'It will be removed in future releases from mid 2024. '
|
9
|
+
'Please use "from biolib.sdk import Runtime" and then "Runtime.set_main_result_prefix" instead.',
|
10
|
+
DeprecationWarning,
|
11
|
+
stacklevel=2,
|
12
|
+
)
|
13
|
+
_Runtime.set_main_result_prefix(result_prefix)
|
biolib/sdk/__init__.py
CHANGED
@@ -1,24 +1,33 @@
|
|
1
|
+
# Imports to hide and use as private internal utils
|
2
|
+
from biolib._internal.data_record import DataRecord as _DataRecord
|
1
3
|
from biolib._internal.push_application import push_application as _push_application
|
2
4
|
from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
|
3
|
-
|
4
5
|
from biolib.app import BioLibApp as _BioLibApp
|
6
|
+
from biolib.typing_utils import Optional as _Optional
|
7
|
+
|
8
|
+
# Imports to expose as public API
|
9
|
+
from biolib._internal.runtime import Runtime
|
10
|
+
|
5
11
|
|
6
12
|
def push_app_version(uri: str, path: str) -> _BioLibApp:
|
7
13
|
push_data = _push_application(
|
8
14
|
app_uri=uri,
|
9
15
|
app_path=path,
|
10
16
|
app_version_to_copy_images_from=None,
|
11
|
-
is_dev_version=True
|
17
|
+
is_dev_version=True,
|
18
|
+
)
|
12
19
|
uri = f'{push_data["app_uri"]}:{push_data["sematic_version"]}'
|
13
20
|
return _BioLibApp(uri)
|
14
21
|
|
22
|
+
|
15
23
|
def set_app_version_as_default(app_version: _BioLibApp) -> None:
|
16
24
|
app_version_uuid = app_version.version['public_id']
|
17
25
|
_set_app_version_as_active(app_version_uuid)
|
18
26
|
|
27
|
+
|
19
28
|
def get_app_version_pytest_plugin(app_version: _BioLibApp):
|
20
29
|
try:
|
21
|
-
import pytest
|
30
|
+
import pytest # type: ignore # pylint: disable=import-outside-toplevel,import-error
|
22
31
|
except BaseException:
|
23
32
|
raise Exception('Failed to import pytest; please make sure it is installed') from None
|
24
33
|
|
@@ -27,7 +36,11 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
|
|
27
36
|
self.app_version_ref = app_version_ref
|
28
37
|
|
29
38
|
@pytest.fixture(scope='session')
|
30
|
-
def app_version(self, request):
|
39
|
+
def app_version(self, request): # pylint: disable=unused-argument
|
31
40
|
return self.app_version_ref
|
32
41
|
|
33
42
|
return AppVersionFixturePlugin(app_version)
|
43
|
+
|
44
|
+
|
45
|
+
def create_data_record(destination: str, data_path: str, name: _Optional[str] = None) -> _DataRecord:
|
46
|
+
return _DataRecord.create(destination, data_path, name)
|
biolib/user/sign_in.py
CHANGED
@@ -1,19 +1,11 @@
|
|
1
1
|
import time
|
2
|
-
import
|
2
|
+
import webbrowser
|
3
3
|
|
4
4
|
from biolib.biolib_api_client import BiolibApiClient
|
5
5
|
from biolib.biolib_api_client.auth import BiolibAuthChallengeApi
|
6
6
|
from biolib.biolib_logging import logger_no_user_data
|
7
7
|
from biolib.utils import IS_RUNNING_IN_NOTEBOOK
|
8
|
-
|
9
|
-
|
10
|
-
def _open_browser_window(url_to_open: str) -> None:
|
11
|
-
from IPython.display import display, Javascript, update_display # type:ignore # pylint: disable=import-error, import-outside-toplevel
|
12
|
-
|
13
|
-
display_id = str(uuid.uuid4())
|
14
|
-
display(Javascript(f'window.open("{url_to_open}");'), display_id=display_id)
|
15
|
-
time.sleep(1)
|
16
|
-
update_display(Javascript(''), display_id=display_id)
|
8
|
+
from biolib._internal.utils import open_browser_window_from_notebook
|
17
9
|
|
18
10
|
|
19
11
|
def sign_out() -> None:
|
@@ -21,7 +13,7 @@ def sign_out() -> None:
|
|
21
13
|
api_client.sign_out()
|
22
14
|
|
23
15
|
|
24
|
-
def sign_in() -> None:
|
16
|
+
def sign_in(open_in_default_browser: bool = False) -> None:
|
25
17
|
api_client = BiolibApiClient.get()
|
26
18
|
if api_client.is_signed_in:
|
27
19
|
logger_no_user_data.info('Already signed in')
|
@@ -37,7 +29,11 @@ def sign_in() -> None:
|
|
37
29
|
if IS_RUNNING_IN_NOTEBOOK:
|
38
30
|
print(f'Opening authorization page at: {frontend_sign_in_url}')
|
39
31
|
print('If your browser does not open automatically, click on the link above.')
|
40
|
-
|
32
|
+
open_browser_window_from_notebook(frontend_sign_in_url)
|
33
|
+
elif open_in_default_browser:
|
34
|
+
print(f'Opening authorization page at: {frontend_sign_in_url}')
|
35
|
+
print('If your browser does not open automatically, click on the link above.')
|
36
|
+
webbrowser.open(frontend_sign_in_url)
|
41
37
|
else:
|
42
38
|
print('Please copy and paste the following link into your browser:')
|
43
39
|
print(frontend_sign_in_url)
|
biolib/utils/__init__.py
CHANGED
@@ -1,22 +1,19 @@
|
|
1
1
|
import collections.abc
|
2
2
|
import multiprocessing
|
3
3
|
import os
|
4
|
-
import time
|
5
4
|
import socket
|
6
5
|
import sys
|
7
|
-
from urllib.parse import urlparse
|
8
6
|
|
9
|
-
import requests
|
10
7
|
from importlib_metadata import version, PackageNotFoundError
|
11
8
|
|
9
|
+
from biolib.typing_utils import Optional
|
12
10
|
from biolib.utils.seq_util import SeqUtil, SeqUtilRecord
|
13
|
-
|
14
|
-
# try fetching version, if it fails (usually when in dev), add default
|
15
|
-
from biolib.biolib_errors import BioLibError
|
11
|
+
from biolib._internal.http_client import HttpClient
|
16
12
|
from biolib.biolib_logging import logger_no_user_data, logger
|
17
13
|
from biolib.typing_utils import Tuple, Iterator
|
18
14
|
from .multipart_uploader import MultiPartUploader, get_chunk_iterator_from_bytes
|
19
15
|
|
16
|
+
# try fetching version, if it fails (usually when in dev), add default
|
20
17
|
try:
|
21
18
|
BIOLIB_PACKAGE_VERSION = version('pybiolib')
|
22
19
|
except PackageNotFoundError:
|
@@ -25,7 +22,7 @@ except PackageNotFoundError:
|
|
25
22
|
IS_DEV = os.getenv('BIOLIB_DEV', '').upper() == 'TRUE'
|
26
23
|
|
27
24
|
|
28
|
-
def
|
25
|
+
def load_base_url_from_env() -> str:
|
29
26
|
base_url = os.getenv('BIOLIB_BASE_URL')
|
30
27
|
if base_url:
|
31
28
|
return base_url.lower().rstrip('/')
|
@@ -53,8 +50,8 @@ def _get_base_url() -> str:
|
|
53
50
|
return 'https://biolib.com'
|
54
51
|
|
55
52
|
|
56
|
-
BIOLIB_BASE_URL =
|
57
|
-
BIOLIB_SITE_HOSTNAME =
|
53
|
+
BIOLIB_BASE_URL: Optional[str] = None
|
54
|
+
BIOLIB_SITE_HOSTNAME: Optional[str] = None
|
58
55
|
|
59
56
|
BIOLIB_CLOUD_BASE_URL = os.getenv('BIOLIB_CLOUD_BASE_URL', '').lower()
|
60
57
|
|
@@ -66,8 +63,7 @@ BIOLIB_SECRETS_TMPFS_PATH = os.environ.get('BIOLIB_SECRETS_TMPFS_PATH')
|
|
66
63
|
|
67
64
|
IS_RUNNING_IN_CLOUD = BIOLIB_CLOUD_ENVIRONMENT == 'non-enclave'
|
68
65
|
|
69
|
-
BASE_URL_IS_PUBLIC_BIOLIB =
|
70
|
-
os.environ.get('BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB', '').upper() == 'TRUE'
|
66
|
+
BASE_URL_IS_PUBLIC_BIOLIB: Optional[bool] = None
|
71
67
|
|
72
68
|
# sys.stdout is an instance of OutStream in Jupyter and Colab which does not have .buffer
|
73
69
|
if not hasattr(sys.stdout, 'buffer'):
|
@@ -88,38 +84,17 @@ DownloadChunkInputTuple = Tuple[ByteRangeTuple, str]
|
|
88
84
|
|
89
85
|
|
90
86
|
def _download_chunk(input_tuple: DownloadChunkInputTuple) -> bytes:
|
91
|
-
max_download_retries = 10
|
92
|
-
|
93
87
|
byte_range, presigned_url = input_tuple
|
94
88
|
start, end = byte_range
|
95
89
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
timeout=300, # timeout after 5 min
|
105
|
-
)
|
106
|
-
if response.ok:
|
107
|
-
return_value: bytes = response.raw.data
|
108
|
-
logger_no_user_data.debug(f'Returning raw data for part {start}')
|
109
|
-
return return_value
|
110
|
-
else:
|
111
|
-
logger_no_user_data.warning(
|
112
|
-
f'Got not ok response when downloading part {start}:{end}. '
|
113
|
-
f'Got response status {response.status_code} and content: {response.content.decode()} '
|
114
|
-
f'Retrying...'
|
115
|
-
)
|
116
|
-
except Exception: # pylint: disable=broad-except
|
117
|
-
logger_no_user_data.warning(f'Encountered error when downloading part {start}:{end}. Retrying...')
|
118
|
-
|
119
|
-
time.sleep(5)
|
120
|
-
|
121
|
-
logger_no_user_data.debug(f'Max retries hit, when downloading part {start}:{end}. Exiting...')
|
122
|
-
raise BioLibError(f'Max retries hit, when downloading part {start}:{end}. Exiting...')
|
90
|
+
response = HttpClient.request(
|
91
|
+
url=presigned_url,
|
92
|
+
headers={'range': f'bytes={start}-{end}'},
|
93
|
+
timeout_in_seconds=300, # timeout after 5 min
|
94
|
+
retries=10,
|
95
|
+
)
|
96
|
+
logger_no_user_data.debug(f'Returning raw data for part {start}')
|
97
|
+
return response.content
|
123
98
|
|
124
99
|
|
125
100
|
class ChunkIterator(collections.abc.Iterator):
|
@@ -154,11 +129,8 @@ class ChunkIterator(collections.abc.Iterator):
|
|
154
129
|
def download_presigned_s3_url(presigned_url: str, output_file_path: str) -> None:
|
155
130
|
chunk_size = 50_000_000
|
156
131
|
|
157
|
-
|
158
|
-
|
159
|
-
raise Exception(f'Got response status code {response.status_code} and content {response.content.decode()}')
|
160
|
-
|
161
|
-
file_size = int(response.headers['Content-Range'].split('/')[1])
|
132
|
+
response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-1'})
|
133
|
+
file_size = int(response.headers['Content-Range'].split('/')[1])
|
162
134
|
|
163
135
|
chunk_iterator = ChunkIterator(file_size, chunk_size, presigned_url)
|
164
136
|
|
biolib/utils/app_uri.py
CHANGED
@@ -12,17 +12,18 @@ class SemanticVersion(TypedDict):
|
|
12
12
|
|
13
13
|
class AppUriParsed(TypedDict):
|
14
14
|
account_handle_normalized: str
|
15
|
-
app_name_normalized: str
|
15
|
+
app_name_normalized: Optional[str]
|
16
|
+
app_name: Optional[str]
|
16
17
|
resource_name_prefix: Optional[str]
|
17
18
|
version: Optional[SemanticVersion]
|
18
19
|
|
19
20
|
|
20
|
-
def normalize(string):
|
21
|
+
def normalize(string: str) -> str:
|
21
22
|
return string.replace('-', '_').lower()
|
22
23
|
|
23
24
|
|
24
25
|
# Mainly copied from backend
|
25
|
-
def parse_app_uri(uri: str) -> AppUriParsed:
|
26
|
+
def parse_app_uri(uri: str, use_account_as_name_default: bool = True) -> AppUriParsed:
|
26
27
|
uri_regex = r'^(@(?P<resource_name_prefix>[\w._-]+)/)?(?P<account_handle>[\w-]+)(/(?P<app_name>[\w-]+))?' \
|
27
28
|
r'(:(?P<version>(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)))?$'
|
28
29
|
|
@@ -36,12 +37,18 @@ def parse_app_uri(uri: str) -> AppUriParsed:
|
|
36
37
|
app_name: Optional[str] = matches.group('app_name')
|
37
38
|
|
38
39
|
# Default to account_handle if app_name is not supplied
|
39
|
-
|
40
|
+
if app_name:
|
41
|
+
app_name_normalized = normalize(app_name)
|
42
|
+
elif use_account_as_name_default:
|
43
|
+
app_name_normalized = account_handle_normalized
|
44
|
+
else:
|
45
|
+
app_name_normalized = None
|
40
46
|
|
41
47
|
return AppUriParsed(
|
42
48
|
resource_name_prefix=resource_name_prefix.lower() if resource_name_prefix is not None else 'biolib.com',
|
43
49
|
account_handle_normalized=account_handle_normalized,
|
44
50
|
app_name_normalized=app_name_normalized,
|
51
|
+
app_name=app_name if app_name is not None or not use_account_as_name_default else account_handle_normalized,
|
45
52
|
version=None if not matches.group('version') else SemanticVersion(
|
46
53
|
major=int(matches.group('major')),
|
47
54
|
minor=int(matches.group('minor')),
|
biolib/utils/cache_state.py
CHANGED
@@ -10,7 +10,7 @@ from biolib.biolib_errors import BioLibError
|
|
10
10
|
from biolib.biolib_logging import logger_no_user_data
|
11
11
|
from biolib.typing_utils import Optional, Generic, TypeVar
|
12
12
|
|
13
|
-
StateType = TypeVar('StateType')
|
13
|
+
StateType = TypeVar('StateType') # pylint: disable=invalid-name
|
14
14
|
|
15
15
|
|
16
16
|
class CacheStateError(BioLibError):
|
@@ -37,7 +37,7 @@ class CacheState(abc.ABC, Generic[StateType]):
|
|
37
37
|
def _state_lock_path(self) -> str:
|
38
38
|
return f'{self._state_path}.lock'
|
39
39
|
|
40
|
-
def __init__(self):
|
40
|
+
def __init__(self) -> None:
|
41
41
|
self._state: Optional[StateType] = None
|
42
42
|
|
43
43
|
def __enter__(self) -> StateType:
|
@@ -5,10 +5,9 @@ import os
|
|
5
5
|
import time
|
6
6
|
from urllib.parse import urlparse
|
7
7
|
|
8
|
-
import
|
9
|
-
|
8
|
+
import biolib.api
|
9
|
+
from biolib._internal.http_client import HttpClient
|
10
10
|
from biolib.biolib_api_client import BiolibApiClient
|
11
|
-
from biolib.biolib_api_client.auth import BearerAuth
|
12
11
|
from biolib.biolib_errors import BioLibError
|
13
12
|
from biolib.biolib_logging import logger, logger_no_user_data
|
14
13
|
from biolib.typing_utils import TypedDict, List, Iterator, Tuple, Optional, Dict
|
@@ -33,7 +32,7 @@ def get_chunk_iterator_from_file_object(file_object, chunk_size_in_bytes: int =
|
|
33
32
|
class RequestOptions(TypedDict):
|
34
33
|
headers: Optional[Dict[str, str]]
|
35
34
|
requires_biolib_auth: bool
|
36
|
-
|
35
|
+
path: str
|
37
36
|
|
38
37
|
|
39
38
|
class _PartMetadata(TypedDict):
|
@@ -67,20 +66,15 @@ class MultiPartUploader:
|
|
67
66
|
logger_no_user_data.debug(f'Starting multipart upload of payload with size {payload_size_in_bytes} bytes')
|
68
67
|
|
69
68
|
if self._start_multipart_upload_request:
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
url=self._start_multipart_upload_request['url'],
|
76
|
-
)
|
77
|
-
if start_multipart_upload.ok:
|
78
|
-
logger_no_user_data.debug('Multipart upload started')
|
79
|
-
else:
|
80
|
-
logger_no_user_data.debug(
|
81
|
-
f'Failed to start multipart upload got response status: {start_multipart_upload.status_code}'
|
69
|
+
try:
|
70
|
+
biolib.api.client.post(
|
71
|
+
authenticate=self._start_multipart_upload_request['requires_biolib_auth'],
|
72
|
+
headers=self._start_multipart_upload_request['headers'],
|
73
|
+
path=self._start_multipart_upload_request['path'],
|
82
74
|
)
|
83
|
-
|
75
|
+
except BaseException as error:
|
76
|
+
logger_no_user_data.debug(f'Failed to start multipart upload got error: {error}')
|
77
|
+
raise error
|
84
78
|
|
85
79
|
# if multiprocessing start method is spawn or we are running in a daemon process,
|
86
80
|
# multiprocessing.Pool may fail when called from script
|
@@ -116,30 +110,12 @@ class MultiPartUploader:
|
|
116
110
|
BiolibApiClient.refresh_auth_token()
|
117
111
|
|
118
112
|
logger_no_user_data.debug(f'Uploaded {len(parts)} parts, now calling complete upload...')
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
timeout=30,
|
126
|
-
url=self._complete_upload_request['url'],
|
127
|
-
)
|
128
|
-
if complete_upload_response.ok:
|
129
|
-
logger_no_user_data.debug('Multipart upload completed returning')
|
130
|
-
return
|
131
|
-
|
132
|
-
logger_no_user_data.warning(
|
133
|
-
f'Failed to complete multipart upload got response status {complete_upload_response.status_code}. '
|
134
|
-
f'Retrying...'
|
135
|
-
)
|
136
|
-
|
137
|
-
except Exception as error: # pylint: disable=broad-except
|
138
|
-
logger_no_user_data.warning('Encountered error when completing multipart upload. Retrying...')
|
139
|
-
logger.debug(f'Multipart complete error: {error}')
|
140
|
-
time.sleep(index * index + 2)
|
141
|
-
|
142
|
-
raise BioLibError('Max retries hit, when completing multipart upload')
|
113
|
+
biolib.api.client.post(
|
114
|
+
authenticate=requires_biolib_auth,
|
115
|
+
headers=self._complete_upload_request['headers'],
|
116
|
+
data={'parts': parts, 'size_bytes': self._bytes_uploaded},
|
117
|
+
path=self._complete_upload_request['path'],
|
118
|
+
)
|
143
119
|
|
144
120
|
def _upload_chunk(self, _input: _UploadChunkInputType) -> _UploadChunkReturnType:
|
145
121
|
part_number, chunk = _input
|
@@ -150,44 +126,42 @@ class MultiPartUploader:
|
|
150
126
|
BiolibApiClient.refresh_auth_token()
|
151
127
|
|
152
128
|
logger_no_user_data.debug(f'Uploading part number {part_number} with size {len(chunk)} bytes...')
|
129
|
+
presigned_upload_url = None
|
153
130
|
try:
|
154
131
|
logger_no_user_data.debug(f'Getting upload URL for chunk {part_number}...')
|
155
|
-
get_url_response =
|
156
|
-
|
132
|
+
get_url_response = biolib.api.client.get(
|
133
|
+
authenticate=requires_biolib_auth,
|
157
134
|
headers=self._get_presigned_upload_url_request['headers'],
|
158
135
|
params={'part_number': part_number},
|
159
|
-
|
160
|
-
url=self._get_presigned_upload_url_request['url'],
|
136
|
+
path=self._get_presigned_upload_url_request['path'],
|
161
137
|
)
|
162
|
-
if not get_url_response.ok:
|
163
|
-
raise Exception(
|
164
|
-
f'Failed to get upload URL for part {part_number} got response status code '
|
165
|
-
f'{get_url_response.status_code}'
|
166
|
-
)
|
167
138
|
|
168
139
|
presigned_upload_url = get_url_response.json()['presigned_upload_url']
|
169
140
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
parsed_url = urlparse(presigned_upload_url)
|
174
|
-
presigned_upload_url = \
|
175
|
-
f'{app_caller_proxy_job_storage_base_url}{parsed_url.path}?{parsed_url.query}'
|
176
|
-
|
177
|
-
put_chunk_response = requests.put(url=presigned_upload_url, data=chunk, timeout=300)
|
141
|
+
except Exception as error: # pylint: disable=broad-except
|
142
|
+
logger_no_user_data.warning(f'Error when getting url for part {part_number}. Retrying...')
|
143
|
+
logger.debug(f'Upload error: {error}')
|
178
144
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
145
|
+
if presigned_upload_url:
|
146
|
+
try:
|
147
|
+
app_caller_proxy_job_storage_base_url = os.getenv('BIOLIB_CLOUD_JOB_STORAGE_BASE_URL', '')
|
148
|
+
if app_caller_proxy_job_storage_base_url:
|
149
|
+
# Done to hit App Caller Proxy when uploading result from inside an app
|
150
|
+
parsed_url = urlparse(presigned_upload_url)
|
151
|
+
presigned_upload_url = \
|
152
|
+
f'{app_caller_proxy_job_storage_base_url}{parsed_url.path}?{parsed_url.query}'
|
153
|
+
|
154
|
+
put_chunk_response = HttpClient.request(
|
155
|
+
url=presigned_upload_url,
|
156
|
+
data=chunk,
|
157
|
+
method='PUT',
|
158
|
+
timeout_in_seconds=300,
|
185
159
|
)
|
186
|
-
|
160
|
+
return _PartMetadata(PartNumber=part_number, ETag=put_chunk_response.headers['ETag']), len(chunk)
|
187
161
|
|
188
|
-
|
189
|
-
|
190
|
-
|
162
|
+
except Exception as error: # pylint: disable=broad-except
|
163
|
+
logger_no_user_data.warning(f'Encountered error when uploading part {part_number}. Retrying...')
|
164
|
+
logger.debug(f'Upload error: {error} ({presigned_upload_url})')
|
191
165
|
|
192
166
|
time.sleep(index * index + 2)
|
193
167
|
|