pybiolib 1.2.1642__py3-none-any.whl → 1.2.1727__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pybiolib might be problematic. Click here for more details.

@@ -3,12 +3,13 @@ from collections import namedtuple
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
5
  from struct import Struct
6
- from typing import Callable, Dict, Iterable, List, Optional, Union, cast
6
+ from typing import Callable, Dict, Iterable, Iterator, List, Optional, Union, cast
7
7
 
8
8
  from biolib import api
9
9
  from biolib._internal.data_record import get_data_record_state_from_uri
10
10
  from biolib._internal.data_record.data_record import validate_sqlite_v1
11
11
  from biolib._internal.data_record.push_data import (
12
+ _upload_from_iterator,
12
13
  push_data_path,
13
14
  validate_data_path_and_get_files_and_size_of_directory,
14
15
  )
@@ -19,7 +20,7 @@ from biolib._shared.types import ZipFileNodeDict
19
20
  from biolib._shared.utils import parse_resource_uri
20
21
  from biolib.api import client as api_client
21
22
  from biolib.biolib_api_client import BiolibApiClient
22
- from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersion, DataRecordVersionInfo
23
+ from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersionInfo
23
24
  from biolib.biolib_binary_format import LazyLoadedFile
24
25
  from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
25
26
  from biolib.biolib_logging import logger
@@ -113,30 +114,29 @@ class DataRecord:
113
114
  else:
114
115
  raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
115
116
 
116
- response = api.client.post(path='/lfs/versions/', data={'resource_uuid': self._state['resource_uuid']})
117
- data_record_version: DataRecordVersion = response.json()
118
- resource_version_uuid = data_record_version['uuid']
119
-
120
- push_data_path(
117
+ new_resource_version_uuid = push_data_path(
121
118
  data_path=data_path,
122
119
  data_size_in_bytes=data_size_in_bytes,
123
120
  files_to_zip=files_to_zip,
124
- resource_version_uuid=resource_version_uuid,
121
+ resource_uuid=self._state['resource_uuid'],
125
122
  chunk_size_in_mb=chunk_size_in_mb,
123
+ publish=True,
126
124
  )
127
125
 
128
- api.client.patch(
129
- path=f'/resources/versions/{resource_version_uuid}/',
130
- data={'state': 'published', 'set_as_active': True},
131
- )
132
-
133
- logger.info(f"Successfully pushed a new Data Record version '{data_record_version['uri']}'")
134
- self._state = get_data_record_state_from_uri(data_record_version['uri'])
126
+ updated_record = DataRecord._get_by_version_uuid(new_resource_version_uuid)
127
+ self._state = updated_record._state # pylint: disable=protected-access
128
+ logger.info(f"Successfully pushed a new Data Record version '{self.uri}'")
135
129
 
136
130
  @staticmethod
137
131
  def get_by_uri(uri: str) -> 'DataRecord':
138
132
  return DataRecord(_internal_state=get_data_record_state_from_uri(uri))
139
133
 
134
+ @staticmethod
135
+ def _get_by_version_uuid(version_uuid: str) -> 'DataRecord':
136
+ response = api.client.get(path=f'/lfs/versions/{version_uuid}/')
137
+ version_info = response.json()
138
+ return DataRecord.get_by_uri(version_info['uri'])
139
+
140
140
  @staticmethod
141
141
  def create(destination: str, data_path: Optional[str] = None, record_type: Optional[str] = None) -> 'DataRecord':
142
142
  BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Data Record')
@@ -285,6 +285,70 @@ class DataRecord:
285
285
  def _get_detailed_dict(self) -> types.DataRecordDetailedDict:
286
286
  return cast(types.DataRecordDetailedDict, api_client.get(f'/resources/data-records/{self.uuid}/').json())
287
287
 
288
+ def _get_zip_size_bytes(self) -> int:
289
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
290
+ resource_version_uuid=self._state['resource_version_uuid'],
291
+ )
292
+ presigned_url = remote_storage_endpoint.get_remote_url()
293
+ response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
294
+ content_range = response.headers.get('Content-Range', '')
295
+ if not content_range or '/' not in content_range:
296
+ raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
297
+ total_size = int(content_range.split('/')[1])
298
+ return total_size
299
+
300
+ def _iter_zip_bytes(self, chunk_size_bytes: int) -> Iterator[bytes]:
301
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
302
+ resource_version_uuid=self._state['resource_version_uuid'],
303
+ )
304
+ presigned_url = remote_storage_endpoint.get_remote_url()
305
+ response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
306
+ content_range = response.headers.get('Content-Range', '')
307
+ if not content_range or '/' not in content_range:
308
+ raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
309
+ total_size = int(content_range.split('/')[1])
310
+
311
+ for start in range(0, total_size, chunk_size_bytes):
312
+ end = min(start + chunk_size_bytes - 1, total_size - 1)
313
+ presigned_url = remote_storage_endpoint.get_remote_url()
314
+ response = HttpClient.request(
315
+ url=presigned_url,
316
+ headers={'range': f'bytes={start}-{end}'},
317
+ timeout_in_seconds=300,
318
+ )
319
+ yield response.content
320
+
321
+ @staticmethod
322
+ def clone(
323
+ source: 'DataRecord',
324
+ destination: 'DataRecord',
325
+ on_progress: Optional[Callable[[int, int], None]] = None,
326
+ ) -> 'DataRecord':
327
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='clone a Data Record')
328
+
329
+ # pylint: disable=protected-access
330
+ total_size_in_bytes = source._get_zip_size_bytes()
331
+
332
+ if total_size_in_bytes == 0:
333
+ raise ValueError('Source data record has no data to clone')
334
+
335
+ min_chunk_size_bytes = 10_000_000
336
+ chunk_size_in_bytes = max(min_chunk_size_bytes, int(total_size_in_bytes / 9_000))
337
+
338
+ zip_iterator = source._iter_zip_bytes(chunk_size_bytes=chunk_size_in_bytes)
339
+
340
+ new_resource_version_uuid = _upload_from_iterator(
341
+ resource_uuid=destination._state['resource_uuid'],
342
+ payload_iterator=zip_iterator,
343
+ payload_size_in_bytes=total_size_in_bytes,
344
+ publish=True,
345
+ on_progress=on_progress,
346
+ )
347
+ # pylint: enable=protected-access
348
+
349
+ logger.info(f"Successfully cloned data to '{destination.uri}'")
350
+ return DataRecord._get_by_version_uuid(new_resource_version_uuid)
351
+
288
352
  def delete(self) -> None:
289
353
  """Delete the data record.
290
354
 
biolib/_index/index.py CHANGED
@@ -21,7 +21,7 @@ class Index:
21
21
 
22
22
  @property
23
23
  def id(self) -> str:
24
- return f"{self._state['group_uuid']}.{self._state['resource_uuid']}".replace("-", "_")
24
+ return f'{self._state["group_uuid"]}.{self._state["resource_uuid"]}'.replace('-', '_')
25
25
 
26
26
  @staticmethod
27
27
  def get_by_uri(uri: str) -> 'Index':
@@ -1,10 +1,58 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
4
+ from typing import Callable, Iterator
2
5
 
3
- from biolib import utils
6
+ import biolib.api as api
4
7
  from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
5
8
  from biolib.biolib_errors import BioLibError
6
9
  from biolib.biolib_logging import logger
7
10
  from biolib.typing_utils import List, Optional, Tuple
11
+ from biolib.utils import MultiPartUploader
12
+
13
+
14
+ def _upload_from_iterator(
15
+ payload_iterator: Iterator[bytes],
16
+ payload_size_in_bytes: int,
17
+ resource_uuid: Optional[str] = None,
18
+ resource_version_uuid: Optional[str] = None,
19
+ use_process_pool: bool = False,
20
+ publish: bool = False,
21
+ on_progress: Optional[Callable[[int, int], None]] = None,
22
+ ) -> str:
23
+ if (resource_uuid is None) == (resource_version_uuid is None):
24
+ raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
25
+
26
+ if resource_version_uuid is None:
27
+ response = api.client.post(
28
+ path='/lfs/versions/',
29
+ data={'resource_uuid': resource_uuid},
30
+ )
31
+ resource_version_uuid = response.json()['uuid']
32
+
33
+ multipart_uploader = MultiPartUploader(
34
+ use_process_pool=use_process_pool,
35
+ get_presigned_upload_url_request={
36
+ 'headers': None,
37
+ 'requires_biolib_auth': True,
38
+ 'path': f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
39
+ },
40
+ complete_upload_request={
41
+ 'headers': None,
42
+ 'requires_biolib_auth': True,
43
+ 'path': f'/lfs/versions/{resource_version_uuid}/complete_upload/',
44
+ },
45
+ on_progress=on_progress,
46
+ )
47
+ multipart_uploader.upload(payload_iterator=payload_iterator, payload_size_in_bytes=payload_size_in_bytes)
48
+
49
+ if publish:
50
+ api.client.patch(
51
+ path=f'/resources/versions/{resource_version_uuid}/',
52
+ data={'state': 'published', 'set_as_active': True},
53
+ )
54
+
55
+ return resource_version_uuid
8
56
 
9
57
 
10
58
  def validate_data_path_and_get_files_and_size_of_directory(data_path: str) -> Tuple[List[str], int]:
@@ -28,9 +76,14 @@ def push_data_path(
28
76
  data_path: str,
29
77
  data_size_in_bytes: int,
30
78
  files_to_zip: List[str],
31
- resource_version_uuid: str,
79
+ resource_uuid: Optional[str] = None,
80
+ resource_version_uuid: Optional[str] = None,
32
81
  chunk_size_in_mb: Optional[int] = None,
33
- ) -> None:
82
+ publish: bool = False,
83
+ ) -> str:
84
+ if (resource_uuid is None) == (resource_version_uuid is None):
85
+ raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
86
+
34
87
  original_working_dir = os.getcwd()
35
88
  os.chdir(data_path)
36
89
 
@@ -49,19 +102,15 @@ def push_data_path(
49
102
  logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
50
103
 
51
104
  iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
52
- multipart_uploader = utils.MultiPartUploader(
105
+
106
+ new_resource_version_uuid = _upload_from_iterator(
107
+ payload_iterator=iterable_zip_stream,
108
+ payload_size_in_bytes=data_size_in_bytes,
109
+ resource_uuid=resource_uuid,
110
+ resource_version_uuid=resource_version_uuid,
53
111
  use_process_pool=True,
54
- get_presigned_upload_url_request=dict(
55
- headers=None,
56
- requires_biolib_auth=True,
57
- path=f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
58
- ),
59
- complete_upload_request=dict(
60
- headers=None,
61
- requires_biolib_auth=True,
62
- path=f'/lfs/versions/{resource_version_uuid}/complete_upload/',
63
- ),
112
+ publish=publish,
64
113
  )
65
114
 
66
- multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
67
115
  os.chdir(original_working_dir)
116
+ return new_resource_version_uuid
@@ -1,3 +1,4 @@
1
+ import base64
1
2
  import time
2
3
  import uuid
3
4
  from fnmatch import fnmatch
@@ -41,3 +42,17 @@ def open_browser_window_from_notebook(url_to_open: str) -> None:
41
42
  display(Javascript(f'window.open("{url_to_open}");'), display_id=display_id)
42
43
  time.sleep(1)
43
44
  update_display(Javascript(''), display_id=display_id)
45
+
46
+
47
+ def base64_encode_string(input_str: str) -> str:
48
+ input_bytes = input_str.encode('utf-8')
49
+ base64_bytes = base64.b64encode(input_bytes)
50
+ base64_str = base64_bytes.decode('utf-8')
51
+ return base64_str
52
+
53
+
54
+ def decode_base64_string(base64_str: str) -> str:
55
+ base64_bytes = base64_str.encode('utf-8')
56
+ input_bytes = base64.b64decode(base64_bytes)
57
+ input_str = input_bytes.decode('utf-8')
58
+ return input_str
@@ -0,0 +1,46 @@
1
+ import base64
2
+ import binascii
3
+ import json
4
+ from typing import Any, Dict
5
+
6
+
7
+ class JwtDecodeError(Exception):
8
+ pass
9
+
10
+
11
+ def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
12
+ jwt_bytes = jwt.encode('utf-8')
13
+
14
+ try:
15
+ signing_input, _ = jwt_bytes.rsplit(b'.', 1)
16
+ header_segment, payload_segment = signing_input.split(b'.', 1)
17
+ except ValueError as error:
18
+ raise JwtDecodeError('Not enough segments') from error
19
+
20
+ try:
21
+ header_data = base64.urlsafe_b64decode(header_segment)
22
+ except (TypeError, binascii.Error) as error:
23
+ raise JwtDecodeError('Invalid header padding') from error
24
+
25
+ try:
26
+ header = json.loads(header_data)
27
+ except ValueError as error:
28
+ raise JwtDecodeError(f'Invalid header string: {error}') from error
29
+
30
+ if not isinstance(header, dict):
31
+ raise JwtDecodeError('Invalid header string: must be a json object')
32
+
33
+ try:
34
+ payload_data = base64.urlsafe_b64decode(payload_segment)
35
+ except (TypeError, binascii.Error) as error:
36
+ raise JwtDecodeError('Invalid payload padding') from error
37
+
38
+ try:
39
+ payload = json.loads(payload_data)
40
+ except ValueError as error:
41
+ raise JwtDecodeError(f'Invalid payload string: {error}') from error
42
+
43
+ if not isinstance(payload, dict):
44
+ raise JwtDecodeError('Invalid payload string: must be a json object')
45
+
46
+ return dict(header=header, payload=payload)
@@ -38,7 +38,7 @@ class Runtime:
38
38
  return job_requested_machine
39
39
 
40
40
  @staticmethod
41
- def get_job_requested_spot_machine() -> bool:
41
+ def is_spot_machine_requested() -> bool:
42
42
  job_data = Runtime._get_job_data()
43
43
  return job_data.get('job_requested_machine_spot', False)
44
44
 
@@ -19,11 +19,14 @@ from .experiment import (
19
19
  )
20
20
  from .file_node import FileNodeDict, FileZipMetadataDict, ZipFileNodeDict
21
21
  from .push import PushResponseDict
22
- from .resource import ResourceUriDict, SemanticVersionDict
22
+ from .resource import ResourceDetailedDict, ResourceDict, ResourceTypeLiteral, ResourceUriDict, SemanticVersionDict
23
23
  from .resource_deploy_key import ResourceDeployKeyDict, ResourceDeployKeyWithSecretDict
24
24
  from .resource_permission import ResourcePermissionDetailedDict, ResourcePermissionDict
25
- from .resource_types import ResourceDetailedDict, ResourceDict
26
- from .resource_version import ResourceVersionDetailedDict, ResourceVersionDict
25
+ from .resource_version import (
26
+ ResourceVersionAssetsDict,
27
+ ResourceVersionDetailedDict,
28
+ ResourceVersionDict,
29
+ )
27
30
  from .result import ResultDetailedDict, ResultDict
28
31
  from .typing import Optional
29
32
  from .user import EnterpriseSettingsDict, UserDetailedDict, UserDict
@@ -50,9 +53,11 @@ __all__ = [
50
53
  'ResourceDeployKeyWithSecretDict',
51
54
  'ResourceDetailedDict',
52
55
  'ResourceDict',
56
+ 'ResourceTypeLiteral',
53
57
  'ResourcePermissionDetailedDict',
54
58
  'ResourcePermissionDict',
55
59
  'ResourceUriDict',
60
+ 'ResourceVersionAssetsDict',
56
61
  'ResourceVersionDetailedDict',
57
62
  'ResourceVersionDict',
58
63
  'ResultCounts',
@@ -1,4 +1,6 @@
1
- from .typing import Optional, TypedDict
1
+ from .experiment import DeprecatedExperimentDict
2
+ from .resource_version import ResourceVersionDetailedDict
3
+ from .typing import Literal, NotRequired, Optional, TypedDict
2
4
 
3
5
 
4
6
  class SemanticVersionDict(TypedDict):
@@ -15,3 +17,21 @@ class ResourceUriDict(TypedDict):
15
17
  resource_prefix: Optional[str]
16
18
  version: Optional[SemanticVersionDict]
17
19
  tag: Optional[str]
20
+
21
+
22
+ ResourceTypeLiteral = Literal['app', 'data-record', 'experiment', 'index']
23
+
24
+
25
+ class ResourceDict(TypedDict):
26
+ uuid: str
27
+ uri: str
28
+ name: str
29
+ created_at: str
30
+ description: str
31
+ account_uuid: str
32
+
33
+
34
+ class ResourceDetailedDict(ResourceDict):
35
+ type: ResourceTypeLiteral
36
+ version: NotRequired[ResourceVersionDetailedDict]
37
+ experiment: Optional[DeprecatedExperimentDict]
@@ -1,4 +1,4 @@
1
- from .resource_types import ResourceDict
1
+ from .resource import ResourceDict
2
2
  from .typing import TypedDict
3
3
 
4
4
 
@@ -1,4 +1,9 @@
1
- from .typing import Literal, NotRequired, TypedDict
1
+ from .typing import Literal, NotRequired, Optional, TypedDict
2
+
3
+
4
+ class ResourceVersionAssetsDict(TypedDict):
5
+ download_url: str
6
+ size_bytes: int
2
7
 
3
8
 
4
9
  class ResourceVersionDict(TypedDict):
@@ -7,7 +12,8 @@ class ResourceVersionDict(TypedDict):
7
12
  state: Literal['published', 'unpublished']
8
13
  created_at: str
9
14
  git_branch_name: NotRequired[str]
15
+ git_commit_hash: NotRequired[str]
10
16
 
11
17
 
12
18
  class ResourceVersionDetailedDict(ResourceVersionDict):
13
- pass
19
+ assets: Optional[ResourceVersionAssetsDict]
biolib/api/client.py CHANGED
@@ -1,6 +1,3 @@
1
- import base64
2
- import binascii
3
- import json
4
1
  from datetime import datetime, timezone
5
2
  from json.decoder import JSONDecodeError
6
3
  from urllib.parse import urlencode, urljoin
@@ -8,7 +5,8 @@ from urllib.parse import urlencode, urljoin
8
5
  import importlib_metadata
9
6
 
10
7
  from biolib._internal.http_client import HttpClient, HttpResponse
11
- from biolib._shared.types.typing import Any, Dict, Optional, TypedDict, Union, cast
8
+ from biolib._internal.utils.auth import decode_jwt_without_checking_signature
9
+ from biolib._shared.types.typing import Dict, Optional, TypedDict, Union, cast
12
10
  from biolib.biolib_api_client import BiolibApiClient as DeprecatedApiClient
13
11
  from biolib.biolib_errors import BioLibError
14
12
  from biolib.biolib_logging import logger
@@ -33,10 +31,6 @@ class ApiClientInitDict(TypedDict):
33
31
  client_type: Optional[str]
34
32
 
35
33
 
36
- class JwtDecodeError(Exception):
37
- pass
38
-
39
-
40
34
  class ApiClient(HttpClient):
41
35
  _biolib_package_version: str = _get_biolib_package_version()
42
36
 
@@ -147,7 +141,7 @@ class ApiClient(HttpClient):
147
141
 
148
142
  def _get_access_token(self) -> str:
149
143
  if self._access_token:
150
- decoded_token = self._decode_jwt_without_checking_signature(self._access_token)
144
+ decoded_token = decode_jwt_without_checking_signature(self._access_token)
151
145
  if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60: # 60 second buffer
152
146
  # Token has not expired yet
153
147
  return self._access_token
@@ -171,41 +165,3 @@ class ApiClient(HttpClient):
171
165
 
172
166
  self._access_token = cast(str, response_dict['access'])
173
167
  return self._access_token
174
-
175
- @staticmethod
176
- def _decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
177
- jwt_bytes = jwt.encode('utf-8')
178
-
179
- try:
180
- signing_input, _ = jwt_bytes.rsplit(b'.', 1)
181
- header_segment, payload_segment = signing_input.split(b'.', 1)
182
- except ValueError as error:
183
- raise JwtDecodeError('Not enough segments') from error
184
-
185
- try:
186
- header_data = base64.urlsafe_b64decode(header_segment)
187
- except (TypeError, binascii.Error) as error:
188
- raise JwtDecodeError('Invalid header padding') from error
189
-
190
- try:
191
- header = json.loads(header_data)
192
- except ValueError as error:
193
- raise JwtDecodeError(f'Invalid header string: {error}') from error
194
-
195
- if not isinstance(header, dict):
196
- raise JwtDecodeError('Invalid header string: must be a json object')
197
-
198
- try:
199
- payload_data = base64.urlsafe_b64decode(payload_segment)
200
- except (TypeError, binascii.Error) as error:
201
- raise JwtDecodeError('Invalid payload padding') from error
202
-
203
- try:
204
- payload = json.loads(payload_data)
205
- except ValueError as error:
206
- raise JwtDecodeError(f'Invalid payload string: {error}') from error
207
-
208
- if not isinstance(header, dict):
209
- raise JwtDecodeError('Invalid payload string: must be a json object')
210
-
211
- return dict(header=header, payload=payload)
biolib/app/app.py CHANGED
@@ -113,16 +113,7 @@ class BioLibApp:
113
113
  module_input_serialized = self._get_serialized_module_input(args, stdin, files)
114
114
 
115
115
  if machine == 'local':
116
- if not blocking:
117
- raise BioLibError('The argument "blocking" cannot be False when running locally')
118
-
119
- if experiment_id:
120
- logger.warning('The argument "experiment_id" is ignored when running locally')
121
-
122
- if result_prefix:
123
- logger.warning('The argument "result_prefix" is ignored when running locally')
124
-
125
- return self._run_locally(module_input_serialized)
116
+ raise BioLibError('Running applications locally with machine="local" is no longer supported.')
126
117
 
127
118
  job = Result._start_job_in_cloud( # pylint: disable=protected-access
128
119
  app_uri=self._app_uri,
@@ -1,15 +1,13 @@
1
- import base64
2
- import binascii
3
- import json
4
1
  import os
5
2
  from datetime import datetime, timezone
6
3
  from json.decoder import JSONDecodeError
7
4
 
8
5
  from biolib._internal.http_client import HttpClient
6
+ from biolib._internal.utils.auth import decode_jwt_without_checking_signature
9
7
  from biolib._runtime.runtime import Runtime
10
8
  from biolib.biolib_errors import BioLibError
11
9
  from biolib.biolib_logging import logger, logger_no_user_data
12
- from biolib.typing_utils import Any, Dict, Optional, TypedDict
10
+ from biolib.typing_utils import Optional, TypedDict
13
11
 
14
12
  from .user_state import UserState
15
13
 
@@ -19,10 +17,6 @@ class UserTokens(TypedDict):
19
17
  refresh: str
20
18
 
21
19
 
22
- class JwtDecodeError(Exception):
23
- pass
24
-
25
-
26
20
  class _ApiClient:
27
21
  def __init__(self, base_url: str, access_token: Optional[str] = None):
28
22
  self.base_url: str = base_url
@@ -60,7 +54,7 @@ class _ApiClient:
60
54
  return
61
55
 
62
56
  if self.access_token:
63
- decoded_token = self.decode_jwt_without_checking_signature(self.access_token)
57
+ decoded_token = decode_jwt_without_checking_signature(self.access_token)
64
58
  if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60: # 60 second buffer
65
59
  # Token has not expired yet
66
60
  return
@@ -132,44 +126,6 @@ class _ApiClient:
132
126
  self.access_token = json_response['access_token']
133
127
  self.refresh_token = json_response['refresh_token']
134
128
 
135
- @staticmethod
136
- def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
137
- jwt_bytes = jwt.encode('utf-8')
138
-
139
- try:
140
- signing_input, _ = jwt_bytes.rsplit(b'.', 1)
141
- header_segment, payload_segment = signing_input.split(b'.', 1)
142
- except ValueError as error:
143
- raise JwtDecodeError('Not enough segments') from error
144
-
145
- try:
146
- header_data = base64.urlsafe_b64decode(header_segment)
147
- except (TypeError, binascii.Error) as error:
148
- raise JwtDecodeError('Invalid header padding') from error
149
-
150
- try:
151
- header = json.loads(header_data)
152
- except ValueError as error:
153
- raise JwtDecodeError(f'Invalid header string: {error}') from error
154
-
155
- if not isinstance(header, dict):
156
- raise JwtDecodeError('Invalid header string: must be a json object')
157
-
158
- try:
159
- payload_data = base64.urlsafe_b64decode(payload_segment)
160
- except (TypeError, binascii.Error) as error:
161
- raise JwtDecodeError('Invalid payload padding') from error
162
-
163
- try:
164
- payload = json.loads(payload_data)
165
- except ValueError as error:
166
- raise JwtDecodeError(f'Invalid payload string: {error}') from error
167
-
168
- if not isinstance(header, dict):
169
- raise JwtDecodeError('Invalid payload string: must be a json object')
170
-
171
- return dict(header=header, payload=payload)
172
-
173
129
 
174
130
  class BiolibApiClient:
175
131
  api_client: Optional[_ApiClient] = None
biolib/cli/data_record.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
+ import sys
4
5
  from typing import Dict, List
5
6
 
6
7
  import click
8
+ import rich.progress
7
9
 
8
10
  from biolib._data_record.data_record import DataRecord
9
11
  from biolib.biolib_api_client import BiolibApiClient
@@ -97,3 +99,66 @@ def delete(uri: str) -> None:
97
99
 
98
100
  record.delete()
99
101
  print(f'Data record {record.uri} has been deleted.')
102
+
103
+
104
+ def _clone_data_record_with_progress(
105
+ source_record: DataRecord,
106
+ dest_record: DataRecord,
107
+ ) -> None:
108
+ # pylint: disable=protected-access
109
+ total_size_in_bytes = source_record._get_zip_size_bytes()
110
+ # pylint: enable=protected-access
111
+
112
+ if total_size_in_bytes == 0:
113
+ logger.info('Source data record has no data to clone')
114
+ return
115
+
116
+ if sys.stdout.isatty():
117
+ with rich.progress.Progress(
118
+ rich.progress.TextColumn('[bold blue]{task.description}'),
119
+ rich.progress.BarColumn(),
120
+ rich.progress.TaskProgressColumn(),
121
+ rich.progress.TimeRemainingColumn(),
122
+ rich.progress.TransferSpeedColumn(),
123
+ ) as progress:
124
+ task_id = progress.add_task('Cloning data record', total=total_size_in_bytes)
125
+
126
+ def on_progress(bytes_uploaded: int, _total_bytes: int) -> None:
127
+ progress.update(task_id, completed=bytes_uploaded)
128
+
129
+ DataRecord.clone(source=source_record, destination=dest_record, on_progress=on_progress)
130
+ else:
131
+ logger.info(f'Cloning ~{round(total_size_in_bytes / 10**6)}mb of data')
132
+ DataRecord.clone(source=source_record, destination=dest_record)
133
+
134
+
135
+ def _get_or_create_destination_record(destination_uri: str) -> Optional[DataRecord]:
136
+ try:
137
+ return DataRecord.get_by_uri(uri=destination_uri)
138
+ except Exception:
139
+ print(f'Destination data record "{destination_uri}" does not exist.')
140
+ confirmation = input('Would you like to create it? [y/N]: ')
141
+ if confirmation.lower() != 'y':
142
+ print('Clone cancelled.')
143
+ return None
144
+
145
+ return DataRecord.create(destination=destination_uri)
146
+
147
+
148
+ @data_record.command(help='Clone a Data Record to another location')
149
+ @click.argument('source_uri', required=True)
150
+ @click.argument('destination_uri', required=True)
151
+ def clone(source_uri: str, destination_uri: str) -> None:
152
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='clone a Data Record')
153
+
154
+ logger.info(f'Fetching source data record: {source_uri}')
155
+ source_record = DataRecord.get_by_uri(uri=source_uri)
156
+
157
+ logger.info(f'Checking destination data record: {destination_uri}')
158
+ dest_record = _get_or_create_destination_record(destination_uri)
159
+ if dest_record is None:
160
+ return
161
+
162
+ logger.info(f'Cloning from {source_record.uri} to {dest_record.uri}...')
163
+ _clone_data_record_with_progress(source_record=source_record, dest_record=dest_record)
164
+ logger.info('Clone completed successfully.')