pybiolib 1.2.1642__py3-none-any.whl → 1.2.1727__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pybiolib might be problematic. Click here for more details.
- biolib/_data_record/data_record.py +79 -15
- biolib/_index/index.py +1 -1
- biolib/_internal/data_record/push_data.py +64 -15
- biolib/_internal/utils/__init__.py +15 -0
- biolib/_internal/utils/auth.py +46 -0
- biolib/_runtime/runtime.py +1 -1
- biolib/_shared/types/__init__.py +8 -3
- biolib/_shared/types/resource.py +21 -1
- biolib/_shared/types/resource_permission.py +1 -1
- biolib/_shared/types/resource_version.py +8 -2
- biolib/api/client.py +3 -47
- biolib/app/app.py +1 -10
- biolib/biolib_api_client/api_client.py +3 -47
- biolib/cli/data_record.py +65 -0
- biolib/cli/init.py +39 -1
- biolib/cli/run.py +8 -5
- biolib/compute_node/job_worker/job_worker.py +2 -2
- biolib/compute_node/remote_host_proxy.py +18 -16
- biolib/experiments/experiment.py +13 -0
- biolib/utils/multipart_uploader.py +24 -18
- pybiolib-1.2.1727.dist-info/METADATA +41 -0
- {pybiolib-1.2.1642.dist-info → pybiolib-1.2.1727.dist-info}/RECORD +50 -50
- {pybiolib-1.2.1642.dist-info → pybiolib-1.2.1727.dist-info}/WHEEL +1 -1
- pybiolib-1.2.1727.dist-info/entry_points.txt +2 -0
- biolib/_shared/types/resource_types.py +0 -18
- pybiolib-1.2.1642.dist-info/METADATA +0 -52
- pybiolib-1.2.1642.dist-info/entry_points.txt +0 -3
- {pybiolib-1.2.1642.dist-info → pybiolib-1.2.1727.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,12 +3,13 @@ from collections import namedtuple
|
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from struct import Struct
|
|
6
|
-
from typing import Callable, Dict, Iterable, List, Optional, Union, cast
|
|
6
|
+
from typing import Callable, Dict, Iterable, Iterator, List, Optional, Union, cast
|
|
7
7
|
|
|
8
8
|
from biolib import api
|
|
9
9
|
from biolib._internal.data_record import get_data_record_state_from_uri
|
|
10
10
|
from biolib._internal.data_record.data_record import validate_sqlite_v1
|
|
11
11
|
from biolib._internal.data_record.push_data import (
|
|
12
|
+
_upload_from_iterator,
|
|
12
13
|
push_data_path,
|
|
13
14
|
validate_data_path_and_get_files_and_size_of_directory,
|
|
14
15
|
)
|
|
@@ -19,7 +20,7 @@ from biolib._shared.types import ZipFileNodeDict
|
|
|
19
20
|
from biolib._shared.utils import parse_resource_uri
|
|
20
21
|
from biolib.api import client as api_client
|
|
21
22
|
from biolib.biolib_api_client import BiolibApiClient
|
|
22
|
-
from biolib.biolib_api_client.lfs_types import DataRecordInfo,
|
|
23
|
+
from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersionInfo
|
|
23
24
|
from biolib.biolib_binary_format import LazyLoadedFile
|
|
24
25
|
from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
|
|
25
26
|
from biolib.biolib_logging import logger
|
|
@@ -113,30 +114,29 @@ class DataRecord:
|
|
|
113
114
|
else:
|
|
114
115
|
raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
|
|
115
116
|
|
|
116
|
-
|
|
117
|
-
data_record_version: DataRecordVersion = response.json()
|
|
118
|
-
resource_version_uuid = data_record_version['uuid']
|
|
119
|
-
|
|
120
|
-
push_data_path(
|
|
117
|
+
new_resource_version_uuid = push_data_path(
|
|
121
118
|
data_path=data_path,
|
|
122
119
|
data_size_in_bytes=data_size_in_bytes,
|
|
123
120
|
files_to_zip=files_to_zip,
|
|
124
|
-
|
|
121
|
+
resource_uuid=self._state['resource_uuid'],
|
|
125
122
|
chunk_size_in_mb=chunk_size_in_mb,
|
|
123
|
+
publish=True,
|
|
126
124
|
)
|
|
127
125
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
logger.info(f"Successfully pushed a new Data Record version '{data_record_version['uri']}'")
|
|
134
|
-
self._state = get_data_record_state_from_uri(data_record_version['uri'])
|
|
126
|
+
updated_record = DataRecord._get_by_version_uuid(new_resource_version_uuid)
|
|
127
|
+
self._state = updated_record._state # pylint: disable=protected-access
|
|
128
|
+
logger.info(f"Successfully pushed a new Data Record version '{self.uri}'")
|
|
135
129
|
|
|
136
130
|
@staticmethod
|
|
137
131
|
def get_by_uri(uri: str) -> 'DataRecord':
|
|
138
132
|
return DataRecord(_internal_state=get_data_record_state_from_uri(uri))
|
|
139
133
|
|
|
134
|
+
@staticmethod
|
|
135
|
+
def _get_by_version_uuid(version_uuid: str) -> 'DataRecord':
|
|
136
|
+
response = api.client.get(path=f'/lfs/versions/{version_uuid}/')
|
|
137
|
+
version_info = response.json()
|
|
138
|
+
return DataRecord.get_by_uri(version_info['uri'])
|
|
139
|
+
|
|
140
140
|
@staticmethod
|
|
141
141
|
def create(destination: str, data_path: Optional[str] = None, record_type: Optional[str] = None) -> 'DataRecord':
|
|
142
142
|
BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Data Record')
|
|
@@ -285,6 +285,70 @@ class DataRecord:
|
|
|
285
285
|
def _get_detailed_dict(self) -> types.DataRecordDetailedDict:
|
|
286
286
|
return cast(types.DataRecordDetailedDict, api_client.get(f'/resources/data-records/{self.uuid}/').json())
|
|
287
287
|
|
|
288
|
+
def _get_zip_size_bytes(self) -> int:
|
|
289
|
+
remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
|
|
290
|
+
resource_version_uuid=self._state['resource_version_uuid'],
|
|
291
|
+
)
|
|
292
|
+
presigned_url = remote_storage_endpoint.get_remote_url()
|
|
293
|
+
response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
|
|
294
|
+
content_range = response.headers.get('Content-Range', '')
|
|
295
|
+
if not content_range or '/' not in content_range:
|
|
296
|
+
raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
|
|
297
|
+
total_size = int(content_range.split('/')[1])
|
|
298
|
+
return total_size
|
|
299
|
+
|
|
300
|
+
def _iter_zip_bytes(self, chunk_size_bytes: int) -> Iterator[bytes]:
|
|
301
|
+
remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
|
|
302
|
+
resource_version_uuid=self._state['resource_version_uuid'],
|
|
303
|
+
)
|
|
304
|
+
presigned_url = remote_storage_endpoint.get_remote_url()
|
|
305
|
+
response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
|
|
306
|
+
content_range = response.headers.get('Content-Range', '')
|
|
307
|
+
if not content_range or '/' not in content_range:
|
|
308
|
+
raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
|
|
309
|
+
total_size = int(content_range.split('/')[1])
|
|
310
|
+
|
|
311
|
+
for start in range(0, total_size, chunk_size_bytes):
|
|
312
|
+
end = min(start + chunk_size_bytes - 1, total_size - 1)
|
|
313
|
+
presigned_url = remote_storage_endpoint.get_remote_url()
|
|
314
|
+
response = HttpClient.request(
|
|
315
|
+
url=presigned_url,
|
|
316
|
+
headers={'range': f'bytes={start}-{end}'},
|
|
317
|
+
timeout_in_seconds=300,
|
|
318
|
+
)
|
|
319
|
+
yield response.content
|
|
320
|
+
|
|
321
|
+
@staticmethod
|
|
322
|
+
def clone(
|
|
323
|
+
source: 'DataRecord',
|
|
324
|
+
destination: 'DataRecord',
|
|
325
|
+
on_progress: Optional[Callable[[int, int], None]] = None,
|
|
326
|
+
) -> 'DataRecord':
|
|
327
|
+
BiolibApiClient.assert_is_signed_in(authenticated_action_description='clone a Data Record')
|
|
328
|
+
|
|
329
|
+
# pylint: disable=protected-access
|
|
330
|
+
total_size_in_bytes = source._get_zip_size_bytes()
|
|
331
|
+
|
|
332
|
+
if total_size_in_bytes == 0:
|
|
333
|
+
raise ValueError('Source data record has no data to clone')
|
|
334
|
+
|
|
335
|
+
min_chunk_size_bytes = 10_000_000
|
|
336
|
+
chunk_size_in_bytes = max(min_chunk_size_bytes, int(total_size_in_bytes / 9_000))
|
|
337
|
+
|
|
338
|
+
zip_iterator = source._iter_zip_bytes(chunk_size_bytes=chunk_size_in_bytes)
|
|
339
|
+
|
|
340
|
+
new_resource_version_uuid = _upload_from_iterator(
|
|
341
|
+
resource_uuid=destination._state['resource_uuid'],
|
|
342
|
+
payload_iterator=zip_iterator,
|
|
343
|
+
payload_size_in_bytes=total_size_in_bytes,
|
|
344
|
+
publish=True,
|
|
345
|
+
on_progress=on_progress,
|
|
346
|
+
)
|
|
347
|
+
# pylint: enable=protected-access
|
|
348
|
+
|
|
349
|
+
logger.info(f"Successfully cloned data to '{destination.uri}'")
|
|
350
|
+
return DataRecord._get_by_version_uuid(new_resource_version_uuid)
|
|
351
|
+
|
|
288
352
|
def delete(self) -> None:
|
|
289
353
|
"""Delete the data record.
|
|
290
354
|
|
biolib/_index/index.py
CHANGED
|
@@ -21,7 +21,7 @@ class Index:
|
|
|
21
21
|
|
|
22
22
|
@property
|
|
23
23
|
def id(self) -> str:
|
|
24
|
-
return f
|
|
24
|
+
return f'{self._state["group_uuid"]}.{self._state["resource_uuid"]}'.replace('-', '_')
|
|
25
25
|
|
|
26
26
|
@staticmethod
|
|
27
27
|
def get_by_uri(uri: str) -> 'Index':
|
|
@@ -1,10 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import os
|
|
4
|
+
from typing import Callable, Iterator
|
|
2
5
|
|
|
3
|
-
|
|
6
|
+
import biolib.api as api
|
|
4
7
|
from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
|
|
5
8
|
from biolib.biolib_errors import BioLibError
|
|
6
9
|
from biolib.biolib_logging import logger
|
|
7
10
|
from biolib.typing_utils import List, Optional, Tuple
|
|
11
|
+
from biolib.utils import MultiPartUploader
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _upload_from_iterator(
|
|
15
|
+
payload_iterator: Iterator[bytes],
|
|
16
|
+
payload_size_in_bytes: int,
|
|
17
|
+
resource_uuid: Optional[str] = None,
|
|
18
|
+
resource_version_uuid: Optional[str] = None,
|
|
19
|
+
use_process_pool: bool = False,
|
|
20
|
+
publish: bool = False,
|
|
21
|
+
on_progress: Optional[Callable[[int, int], None]] = None,
|
|
22
|
+
) -> str:
|
|
23
|
+
if (resource_uuid is None) == (resource_version_uuid is None):
|
|
24
|
+
raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
|
|
25
|
+
|
|
26
|
+
if resource_version_uuid is None:
|
|
27
|
+
response = api.client.post(
|
|
28
|
+
path='/lfs/versions/',
|
|
29
|
+
data={'resource_uuid': resource_uuid},
|
|
30
|
+
)
|
|
31
|
+
resource_version_uuid = response.json()['uuid']
|
|
32
|
+
|
|
33
|
+
multipart_uploader = MultiPartUploader(
|
|
34
|
+
use_process_pool=use_process_pool,
|
|
35
|
+
get_presigned_upload_url_request={
|
|
36
|
+
'headers': None,
|
|
37
|
+
'requires_biolib_auth': True,
|
|
38
|
+
'path': f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
|
|
39
|
+
},
|
|
40
|
+
complete_upload_request={
|
|
41
|
+
'headers': None,
|
|
42
|
+
'requires_biolib_auth': True,
|
|
43
|
+
'path': f'/lfs/versions/{resource_version_uuid}/complete_upload/',
|
|
44
|
+
},
|
|
45
|
+
on_progress=on_progress,
|
|
46
|
+
)
|
|
47
|
+
multipart_uploader.upload(payload_iterator=payload_iterator, payload_size_in_bytes=payload_size_in_bytes)
|
|
48
|
+
|
|
49
|
+
if publish:
|
|
50
|
+
api.client.patch(
|
|
51
|
+
path=f'/resources/versions/{resource_version_uuid}/',
|
|
52
|
+
data={'state': 'published', 'set_as_active': True},
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
return resource_version_uuid
|
|
8
56
|
|
|
9
57
|
|
|
10
58
|
def validate_data_path_and_get_files_and_size_of_directory(data_path: str) -> Tuple[List[str], int]:
|
|
@@ -28,9 +76,14 @@ def push_data_path(
|
|
|
28
76
|
data_path: str,
|
|
29
77
|
data_size_in_bytes: int,
|
|
30
78
|
files_to_zip: List[str],
|
|
31
|
-
|
|
79
|
+
resource_uuid: Optional[str] = None,
|
|
80
|
+
resource_version_uuid: Optional[str] = None,
|
|
32
81
|
chunk_size_in_mb: Optional[int] = None,
|
|
33
|
-
|
|
82
|
+
publish: bool = False,
|
|
83
|
+
) -> str:
|
|
84
|
+
if (resource_uuid is None) == (resource_version_uuid is None):
|
|
85
|
+
raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
|
|
86
|
+
|
|
34
87
|
original_working_dir = os.getcwd()
|
|
35
88
|
os.chdir(data_path)
|
|
36
89
|
|
|
@@ -49,19 +102,15 @@ def push_data_path(
|
|
|
49
102
|
logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
|
|
50
103
|
|
|
51
104
|
iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
|
|
52
|
-
|
|
105
|
+
|
|
106
|
+
new_resource_version_uuid = _upload_from_iterator(
|
|
107
|
+
payload_iterator=iterable_zip_stream,
|
|
108
|
+
payload_size_in_bytes=data_size_in_bytes,
|
|
109
|
+
resource_uuid=resource_uuid,
|
|
110
|
+
resource_version_uuid=resource_version_uuid,
|
|
53
111
|
use_process_pool=True,
|
|
54
|
-
|
|
55
|
-
headers=None,
|
|
56
|
-
requires_biolib_auth=True,
|
|
57
|
-
path=f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
|
|
58
|
-
),
|
|
59
|
-
complete_upload_request=dict(
|
|
60
|
-
headers=None,
|
|
61
|
-
requires_biolib_auth=True,
|
|
62
|
-
path=f'/lfs/versions/{resource_version_uuid}/complete_upload/',
|
|
63
|
-
),
|
|
112
|
+
publish=publish,
|
|
64
113
|
)
|
|
65
114
|
|
|
66
|
-
multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
|
|
67
115
|
os.chdir(original_working_dir)
|
|
116
|
+
return new_resource_version_uuid
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import base64
|
|
1
2
|
import time
|
|
2
3
|
import uuid
|
|
3
4
|
from fnmatch import fnmatch
|
|
@@ -41,3 +42,17 @@ def open_browser_window_from_notebook(url_to_open: str) -> None:
|
|
|
41
42
|
display(Javascript(f'window.open("{url_to_open}");'), display_id=display_id)
|
|
42
43
|
time.sleep(1)
|
|
43
44
|
update_display(Javascript(''), display_id=display_id)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def base64_encode_string(input_str: str) -> str:
|
|
48
|
+
input_bytes = input_str.encode('utf-8')
|
|
49
|
+
base64_bytes = base64.b64encode(input_bytes)
|
|
50
|
+
base64_str = base64_bytes.decode('utf-8')
|
|
51
|
+
return base64_str
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def decode_base64_string(base64_str: str) -> str:
|
|
55
|
+
base64_bytes = base64_str.encode('utf-8')
|
|
56
|
+
input_bytes = base64.b64decode(base64_bytes)
|
|
57
|
+
input_str = input_bytes.decode('utf-8')
|
|
58
|
+
return input_str
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import binascii
|
|
3
|
+
import json
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class JwtDecodeError(Exception):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
|
|
12
|
+
jwt_bytes = jwt.encode('utf-8')
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
signing_input, _ = jwt_bytes.rsplit(b'.', 1)
|
|
16
|
+
header_segment, payload_segment = signing_input.split(b'.', 1)
|
|
17
|
+
except ValueError as error:
|
|
18
|
+
raise JwtDecodeError('Not enough segments') from error
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
header_data = base64.urlsafe_b64decode(header_segment)
|
|
22
|
+
except (TypeError, binascii.Error) as error:
|
|
23
|
+
raise JwtDecodeError('Invalid header padding') from error
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
header = json.loads(header_data)
|
|
27
|
+
except ValueError as error:
|
|
28
|
+
raise JwtDecodeError(f'Invalid header string: {error}') from error
|
|
29
|
+
|
|
30
|
+
if not isinstance(header, dict):
|
|
31
|
+
raise JwtDecodeError('Invalid header string: must be a json object')
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
payload_data = base64.urlsafe_b64decode(payload_segment)
|
|
35
|
+
except (TypeError, binascii.Error) as error:
|
|
36
|
+
raise JwtDecodeError('Invalid payload padding') from error
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
payload = json.loads(payload_data)
|
|
40
|
+
except ValueError as error:
|
|
41
|
+
raise JwtDecodeError(f'Invalid payload string: {error}') from error
|
|
42
|
+
|
|
43
|
+
if not isinstance(payload, dict):
|
|
44
|
+
raise JwtDecodeError('Invalid payload string: must be a json object')
|
|
45
|
+
|
|
46
|
+
return dict(header=header, payload=payload)
|
biolib/_runtime/runtime.py
CHANGED
|
@@ -38,7 +38,7 @@ class Runtime:
|
|
|
38
38
|
return job_requested_machine
|
|
39
39
|
|
|
40
40
|
@staticmethod
|
|
41
|
-
def
|
|
41
|
+
def is_spot_machine_requested() -> bool:
|
|
42
42
|
job_data = Runtime._get_job_data()
|
|
43
43
|
return job_data.get('job_requested_machine_spot', False)
|
|
44
44
|
|
biolib/_shared/types/__init__.py
CHANGED
|
@@ -19,11 +19,14 @@ from .experiment import (
|
|
|
19
19
|
)
|
|
20
20
|
from .file_node import FileNodeDict, FileZipMetadataDict, ZipFileNodeDict
|
|
21
21
|
from .push import PushResponseDict
|
|
22
|
-
from .resource import ResourceUriDict, SemanticVersionDict
|
|
22
|
+
from .resource import ResourceDetailedDict, ResourceDict, ResourceTypeLiteral, ResourceUriDict, SemanticVersionDict
|
|
23
23
|
from .resource_deploy_key import ResourceDeployKeyDict, ResourceDeployKeyWithSecretDict
|
|
24
24
|
from .resource_permission import ResourcePermissionDetailedDict, ResourcePermissionDict
|
|
25
|
-
from .
|
|
26
|
-
|
|
25
|
+
from .resource_version import (
|
|
26
|
+
ResourceVersionAssetsDict,
|
|
27
|
+
ResourceVersionDetailedDict,
|
|
28
|
+
ResourceVersionDict,
|
|
29
|
+
)
|
|
27
30
|
from .result import ResultDetailedDict, ResultDict
|
|
28
31
|
from .typing import Optional
|
|
29
32
|
from .user import EnterpriseSettingsDict, UserDetailedDict, UserDict
|
|
@@ -50,9 +53,11 @@ __all__ = [
|
|
|
50
53
|
'ResourceDeployKeyWithSecretDict',
|
|
51
54
|
'ResourceDetailedDict',
|
|
52
55
|
'ResourceDict',
|
|
56
|
+
'ResourceTypeLiteral',
|
|
53
57
|
'ResourcePermissionDetailedDict',
|
|
54
58
|
'ResourcePermissionDict',
|
|
55
59
|
'ResourceUriDict',
|
|
60
|
+
'ResourceVersionAssetsDict',
|
|
56
61
|
'ResourceVersionDetailedDict',
|
|
57
62
|
'ResourceVersionDict',
|
|
58
63
|
'ResultCounts',
|
biolib/_shared/types/resource.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from .experiment import DeprecatedExperimentDict
|
|
2
|
+
from .resource_version import ResourceVersionDetailedDict
|
|
3
|
+
from .typing import Literal, NotRequired, Optional, TypedDict
|
|
2
4
|
|
|
3
5
|
|
|
4
6
|
class SemanticVersionDict(TypedDict):
|
|
@@ -15,3 +17,21 @@ class ResourceUriDict(TypedDict):
|
|
|
15
17
|
resource_prefix: Optional[str]
|
|
16
18
|
version: Optional[SemanticVersionDict]
|
|
17
19
|
tag: Optional[str]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
ResourceTypeLiteral = Literal['app', 'data-record', 'experiment', 'index']
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ResourceDict(TypedDict):
|
|
26
|
+
uuid: str
|
|
27
|
+
uri: str
|
|
28
|
+
name: str
|
|
29
|
+
created_at: str
|
|
30
|
+
description: str
|
|
31
|
+
account_uuid: str
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ResourceDetailedDict(ResourceDict):
|
|
35
|
+
type: ResourceTypeLiteral
|
|
36
|
+
version: NotRequired[ResourceVersionDetailedDict]
|
|
37
|
+
experiment: Optional[DeprecatedExperimentDict]
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
from .typing import Literal, NotRequired, TypedDict
|
|
1
|
+
from .typing import Literal, NotRequired, Optional, TypedDict
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ResourceVersionAssetsDict(TypedDict):
|
|
5
|
+
download_url: str
|
|
6
|
+
size_bytes: int
|
|
2
7
|
|
|
3
8
|
|
|
4
9
|
class ResourceVersionDict(TypedDict):
|
|
@@ -7,7 +12,8 @@ class ResourceVersionDict(TypedDict):
|
|
|
7
12
|
state: Literal['published', 'unpublished']
|
|
8
13
|
created_at: str
|
|
9
14
|
git_branch_name: NotRequired[str]
|
|
15
|
+
git_commit_hash: NotRequired[str]
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
class ResourceVersionDetailedDict(ResourceVersionDict):
|
|
13
|
-
|
|
19
|
+
assets: Optional[ResourceVersionAssetsDict]
|
biolib/api/client.py
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
import base64
|
|
2
|
-
import binascii
|
|
3
|
-
import json
|
|
4
1
|
from datetime import datetime, timezone
|
|
5
2
|
from json.decoder import JSONDecodeError
|
|
6
3
|
from urllib.parse import urlencode, urljoin
|
|
@@ -8,7 +5,8 @@ from urllib.parse import urlencode, urljoin
|
|
|
8
5
|
import importlib_metadata
|
|
9
6
|
|
|
10
7
|
from biolib._internal.http_client import HttpClient, HttpResponse
|
|
11
|
-
from biolib.
|
|
8
|
+
from biolib._internal.utils.auth import decode_jwt_without_checking_signature
|
|
9
|
+
from biolib._shared.types.typing import Dict, Optional, TypedDict, Union, cast
|
|
12
10
|
from biolib.biolib_api_client import BiolibApiClient as DeprecatedApiClient
|
|
13
11
|
from biolib.biolib_errors import BioLibError
|
|
14
12
|
from biolib.biolib_logging import logger
|
|
@@ -33,10 +31,6 @@ class ApiClientInitDict(TypedDict):
|
|
|
33
31
|
client_type: Optional[str]
|
|
34
32
|
|
|
35
33
|
|
|
36
|
-
class JwtDecodeError(Exception):
|
|
37
|
-
pass
|
|
38
|
-
|
|
39
|
-
|
|
40
34
|
class ApiClient(HttpClient):
|
|
41
35
|
_biolib_package_version: str = _get_biolib_package_version()
|
|
42
36
|
|
|
@@ -147,7 +141,7 @@ class ApiClient(HttpClient):
|
|
|
147
141
|
|
|
148
142
|
def _get_access_token(self) -> str:
|
|
149
143
|
if self._access_token:
|
|
150
|
-
decoded_token =
|
|
144
|
+
decoded_token = decode_jwt_without_checking_signature(self._access_token)
|
|
151
145
|
if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60: # 60 second buffer
|
|
152
146
|
# Token has not expired yet
|
|
153
147
|
return self._access_token
|
|
@@ -171,41 +165,3 @@ class ApiClient(HttpClient):
|
|
|
171
165
|
|
|
172
166
|
self._access_token = cast(str, response_dict['access'])
|
|
173
167
|
return self._access_token
|
|
174
|
-
|
|
175
|
-
@staticmethod
|
|
176
|
-
def _decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
|
|
177
|
-
jwt_bytes = jwt.encode('utf-8')
|
|
178
|
-
|
|
179
|
-
try:
|
|
180
|
-
signing_input, _ = jwt_bytes.rsplit(b'.', 1)
|
|
181
|
-
header_segment, payload_segment = signing_input.split(b'.', 1)
|
|
182
|
-
except ValueError as error:
|
|
183
|
-
raise JwtDecodeError('Not enough segments') from error
|
|
184
|
-
|
|
185
|
-
try:
|
|
186
|
-
header_data = base64.urlsafe_b64decode(header_segment)
|
|
187
|
-
except (TypeError, binascii.Error) as error:
|
|
188
|
-
raise JwtDecodeError('Invalid header padding') from error
|
|
189
|
-
|
|
190
|
-
try:
|
|
191
|
-
header = json.loads(header_data)
|
|
192
|
-
except ValueError as error:
|
|
193
|
-
raise JwtDecodeError(f'Invalid header string: {error}') from error
|
|
194
|
-
|
|
195
|
-
if not isinstance(header, dict):
|
|
196
|
-
raise JwtDecodeError('Invalid header string: must be a json object')
|
|
197
|
-
|
|
198
|
-
try:
|
|
199
|
-
payload_data = base64.urlsafe_b64decode(payload_segment)
|
|
200
|
-
except (TypeError, binascii.Error) as error:
|
|
201
|
-
raise JwtDecodeError('Invalid payload padding') from error
|
|
202
|
-
|
|
203
|
-
try:
|
|
204
|
-
payload = json.loads(payload_data)
|
|
205
|
-
except ValueError as error:
|
|
206
|
-
raise JwtDecodeError(f'Invalid payload string: {error}') from error
|
|
207
|
-
|
|
208
|
-
if not isinstance(header, dict):
|
|
209
|
-
raise JwtDecodeError('Invalid payload string: must be a json object')
|
|
210
|
-
|
|
211
|
-
return dict(header=header, payload=payload)
|
biolib/app/app.py
CHANGED
|
@@ -113,16 +113,7 @@ class BioLibApp:
|
|
|
113
113
|
module_input_serialized = self._get_serialized_module_input(args, stdin, files)
|
|
114
114
|
|
|
115
115
|
if machine == 'local':
|
|
116
|
-
|
|
117
|
-
raise BioLibError('The argument "blocking" cannot be False when running locally')
|
|
118
|
-
|
|
119
|
-
if experiment_id:
|
|
120
|
-
logger.warning('The argument "experiment_id" is ignored when running locally')
|
|
121
|
-
|
|
122
|
-
if result_prefix:
|
|
123
|
-
logger.warning('The argument "result_prefix" is ignored when running locally')
|
|
124
|
-
|
|
125
|
-
return self._run_locally(module_input_serialized)
|
|
116
|
+
raise BioLibError('Running applications locally with machine="local" is no longer supported.')
|
|
126
117
|
|
|
127
118
|
job = Result._start_job_in_cloud( # pylint: disable=protected-access
|
|
128
119
|
app_uri=self._app_uri,
|
|
@@ -1,15 +1,13 @@
|
|
|
1
|
-
import base64
|
|
2
|
-
import binascii
|
|
3
|
-
import json
|
|
4
1
|
import os
|
|
5
2
|
from datetime import datetime, timezone
|
|
6
3
|
from json.decoder import JSONDecodeError
|
|
7
4
|
|
|
8
5
|
from biolib._internal.http_client import HttpClient
|
|
6
|
+
from biolib._internal.utils.auth import decode_jwt_without_checking_signature
|
|
9
7
|
from biolib._runtime.runtime import Runtime
|
|
10
8
|
from biolib.biolib_errors import BioLibError
|
|
11
9
|
from biolib.biolib_logging import logger, logger_no_user_data
|
|
12
|
-
from biolib.typing_utils import
|
|
10
|
+
from biolib.typing_utils import Optional, TypedDict
|
|
13
11
|
|
|
14
12
|
from .user_state import UserState
|
|
15
13
|
|
|
@@ -19,10 +17,6 @@ class UserTokens(TypedDict):
|
|
|
19
17
|
refresh: str
|
|
20
18
|
|
|
21
19
|
|
|
22
|
-
class JwtDecodeError(Exception):
|
|
23
|
-
pass
|
|
24
|
-
|
|
25
|
-
|
|
26
20
|
class _ApiClient:
|
|
27
21
|
def __init__(self, base_url: str, access_token: Optional[str] = None):
|
|
28
22
|
self.base_url: str = base_url
|
|
@@ -60,7 +54,7 @@ class _ApiClient:
|
|
|
60
54
|
return
|
|
61
55
|
|
|
62
56
|
if self.access_token:
|
|
63
|
-
decoded_token =
|
|
57
|
+
decoded_token = decode_jwt_without_checking_signature(self.access_token)
|
|
64
58
|
if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60: # 60 second buffer
|
|
65
59
|
# Token has not expired yet
|
|
66
60
|
return
|
|
@@ -132,44 +126,6 @@ class _ApiClient:
|
|
|
132
126
|
self.access_token = json_response['access_token']
|
|
133
127
|
self.refresh_token = json_response['refresh_token']
|
|
134
128
|
|
|
135
|
-
@staticmethod
|
|
136
|
-
def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
|
|
137
|
-
jwt_bytes = jwt.encode('utf-8')
|
|
138
|
-
|
|
139
|
-
try:
|
|
140
|
-
signing_input, _ = jwt_bytes.rsplit(b'.', 1)
|
|
141
|
-
header_segment, payload_segment = signing_input.split(b'.', 1)
|
|
142
|
-
except ValueError as error:
|
|
143
|
-
raise JwtDecodeError('Not enough segments') from error
|
|
144
|
-
|
|
145
|
-
try:
|
|
146
|
-
header_data = base64.urlsafe_b64decode(header_segment)
|
|
147
|
-
except (TypeError, binascii.Error) as error:
|
|
148
|
-
raise JwtDecodeError('Invalid header padding') from error
|
|
149
|
-
|
|
150
|
-
try:
|
|
151
|
-
header = json.loads(header_data)
|
|
152
|
-
except ValueError as error:
|
|
153
|
-
raise JwtDecodeError(f'Invalid header string: {error}') from error
|
|
154
|
-
|
|
155
|
-
if not isinstance(header, dict):
|
|
156
|
-
raise JwtDecodeError('Invalid header string: must be a json object')
|
|
157
|
-
|
|
158
|
-
try:
|
|
159
|
-
payload_data = base64.urlsafe_b64decode(payload_segment)
|
|
160
|
-
except (TypeError, binascii.Error) as error:
|
|
161
|
-
raise JwtDecodeError('Invalid payload padding') from error
|
|
162
|
-
|
|
163
|
-
try:
|
|
164
|
-
payload = json.loads(payload_data)
|
|
165
|
-
except ValueError as error:
|
|
166
|
-
raise JwtDecodeError(f'Invalid payload string: {error}') from error
|
|
167
|
-
|
|
168
|
-
if not isinstance(header, dict):
|
|
169
|
-
raise JwtDecodeError('Invalid payload string: must be a json object')
|
|
170
|
-
|
|
171
|
-
return dict(header=header, payload=payload)
|
|
172
|
-
|
|
173
129
|
|
|
174
130
|
class BiolibApiClient:
|
|
175
131
|
api_client: Optional[_ApiClient] = None
|
biolib/cli/data_record.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
+
import sys
|
|
4
5
|
from typing import Dict, List
|
|
5
6
|
|
|
6
7
|
import click
|
|
8
|
+
import rich.progress
|
|
7
9
|
|
|
8
10
|
from biolib._data_record.data_record import DataRecord
|
|
9
11
|
from biolib.biolib_api_client import BiolibApiClient
|
|
@@ -97,3 +99,66 @@ def delete(uri: str) -> None:
|
|
|
97
99
|
|
|
98
100
|
record.delete()
|
|
99
101
|
print(f'Data record {record.uri} has been deleted.')
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _clone_data_record_with_progress(
|
|
105
|
+
source_record: DataRecord,
|
|
106
|
+
dest_record: DataRecord,
|
|
107
|
+
) -> None:
|
|
108
|
+
# pylint: disable=protected-access
|
|
109
|
+
total_size_in_bytes = source_record._get_zip_size_bytes()
|
|
110
|
+
# pylint: enable=protected-access
|
|
111
|
+
|
|
112
|
+
if total_size_in_bytes == 0:
|
|
113
|
+
logger.info('Source data record has no data to clone')
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
if sys.stdout.isatty():
|
|
117
|
+
with rich.progress.Progress(
|
|
118
|
+
rich.progress.TextColumn('[bold blue]{task.description}'),
|
|
119
|
+
rich.progress.BarColumn(),
|
|
120
|
+
rich.progress.TaskProgressColumn(),
|
|
121
|
+
rich.progress.TimeRemainingColumn(),
|
|
122
|
+
rich.progress.TransferSpeedColumn(),
|
|
123
|
+
) as progress:
|
|
124
|
+
task_id = progress.add_task('Cloning data record', total=total_size_in_bytes)
|
|
125
|
+
|
|
126
|
+
def on_progress(bytes_uploaded: int, _total_bytes: int) -> None:
|
|
127
|
+
progress.update(task_id, completed=bytes_uploaded)
|
|
128
|
+
|
|
129
|
+
DataRecord.clone(source=source_record, destination=dest_record, on_progress=on_progress)
|
|
130
|
+
else:
|
|
131
|
+
logger.info(f'Cloning ~{round(total_size_in_bytes / 10**6)}mb of data')
|
|
132
|
+
DataRecord.clone(source=source_record, destination=dest_record)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _get_or_create_destination_record(destination_uri: str) -> Optional[DataRecord]:
|
|
136
|
+
try:
|
|
137
|
+
return DataRecord.get_by_uri(uri=destination_uri)
|
|
138
|
+
except Exception:
|
|
139
|
+
print(f'Destination data record "{destination_uri}" does not exist.')
|
|
140
|
+
confirmation = input('Would you like to create it? [y/N]: ')
|
|
141
|
+
if confirmation.lower() != 'y':
|
|
142
|
+
print('Clone cancelled.')
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
return DataRecord.create(destination=destination_uri)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@data_record.command(help='Clone a Data Record to another location')
|
|
149
|
+
@click.argument('source_uri', required=True)
|
|
150
|
+
@click.argument('destination_uri', required=True)
|
|
151
|
+
def clone(source_uri: str, destination_uri: str) -> None:
|
|
152
|
+
BiolibApiClient.assert_is_signed_in(authenticated_action_description='clone a Data Record')
|
|
153
|
+
|
|
154
|
+
logger.info(f'Fetching source data record: {source_uri}')
|
|
155
|
+
source_record = DataRecord.get_by_uri(uri=source_uri)
|
|
156
|
+
|
|
157
|
+
logger.info(f'Checking destination data record: {destination_uri}')
|
|
158
|
+
dest_record = _get_or_create_destination_record(destination_uri)
|
|
159
|
+
if dest_record is None:
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
logger.info(f'Cloning from {source_record.uri} to {dest_record.uri}...')
|
|
163
|
+
_clone_data_record_with_progress(source_record=source_record, dest_record=dest_record)
|
|
164
|
+
logger.info('Clone completed successfully.')
|