pybiolib 1.1.1629__py3-none-any.whl → 1.1.1881__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. biolib/__init__.py +11 -3
  2. biolib/_internal/data_record/__init__.py +1 -0
  3. biolib/_internal/data_record/data_record.py +153 -0
  4. biolib/_internal/data_record/remote_storage_endpoint.py +27 -0
  5. biolib/_internal/http_client.py +45 -15
  6. biolib/_internal/push_application.py +22 -37
  7. biolib/_internal/runtime.py +73 -0
  8. biolib/_internal/utils/__init__.py +18 -0
  9. biolib/api/client.py +12 -6
  10. biolib/app/app.py +6 -1
  11. biolib/app/search_apps.py +8 -12
  12. biolib/biolib_api_client/api_client.py +14 -9
  13. biolib/biolib_api_client/app_types.py +1 -0
  14. biolib/biolib_api_client/auth.py +0 -12
  15. biolib/biolib_api_client/biolib_app_api.py +53 -27
  16. biolib/biolib_api_client/biolib_job_api.py +11 -40
  17. biolib/biolib_binary_format/utils.py +19 -2
  18. biolib/cli/__init__.py +9 -3
  19. biolib/cli/auth.py +58 -0
  20. biolib/cli/data_record.py +43 -0
  21. biolib/cli/download_container.py +3 -1
  22. biolib/cli/init.py +1 -0
  23. biolib/cli/lfs.py +39 -9
  24. biolib/cli/push.py +1 -1
  25. biolib/cli/run.py +3 -2
  26. biolib/cli/start.py +1 -0
  27. biolib/compute_node/cloud_utils/cloud_utils.py +38 -65
  28. biolib/compute_node/job_worker/cache_state.py +1 -1
  29. biolib/compute_node/job_worker/executors/docker_executor.py +10 -8
  30. biolib/compute_node/job_worker/job_storage.py +9 -13
  31. biolib/compute_node/job_worker/job_worker.py +10 -4
  32. biolib/compute_node/remote_host_proxy.py +48 -11
  33. biolib/compute_node/webserver/worker_thread.py +2 -2
  34. biolib/jobs/job.py +33 -32
  35. biolib/lfs/__init__.py +0 -2
  36. biolib/lfs/utils.py +23 -115
  37. biolib/runtime/__init__.py +13 -1
  38. biolib/sdk/__init__.py +17 -4
  39. biolib/user/sign_in.py +8 -12
  40. biolib/utils/__init__.py +17 -45
  41. biolib/utils/app_uri.py +11 -4
  42. biolib/utils/cache_state.py +2 -2
  43. biolib/utils/multipart_uploader.py +42 -68
  44. biolib/utils/seq_util.py +47 -9
  45. biolib/utils/zip/remote_zip.py +9 -17
  46. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/METADATA +1 -2
  47. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/RECORD +50 -46
  48. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/WHEEL +1 -1
  49. biolib/biolib_api_client/biolib_account_api.py +0 -21
  50. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -53
  51. biolib/runtime/results.py +0 -20
  52. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/LICENSE +0 -0
  53. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/entry_points.txt +0 -0
biolib/lfs/utils.py CHANGED
@@ -1,31 +1,15 @@
1
1
  import io
2
- import json
3
2
  import os
4
3
  import zipfile as zf
5
- from collections import namedtuple
6
4
  from pathlib import Path
7
- from struct import Struct
8
5
 
9
- import requests
10
-
11
- from biolib import utils
12
- from biolib.app import BioLibApp
13
- from biolib.biolib_api_client.biolib_account_api import BiolibAccountApi
14
- from biolib.biolib_api_client.biolib_large_file_system_api import BiolibLargeFileSystemApi
6
+ from biolib import utils, api
15
7
  from biolib.biolib_api_client import BiolibApiClient
8
+ from biolib.biolib_api_client.lfs_types import LargeFileSystem, LargeFileSystemVersion
16
9
  from biolib.biolib_logging import logger
17
10
  from biolib.biolib_errors import BioLibError
18
11
  from biolib.typing_utils import List, Tuple, Iterator, Optional
19
- from biolib.utils.zip.remote_zip import RemoteZip # type:ignore
20
-
21
-
22
- def _get_lfs_info_from_uri(lfs_uri):
23
- lfs_uri_parts = lfs_uri.split('/')
24
- lfs_uri_parts = [uri_part for uri_part in lfs_uri_parts if '@' not in uri_part] # Remove hostname
25
- team_account_handle = lfs_uri_parts[0]
26
- lfs_name = lfs_uri_parts[1]
27
- account = BiolibAccountApi.fetch_by_handle(team_account_handle)
28
- return account, lfs_name
12
+ from biolib.utils.app_uri import parse_app_uri
29
13
 
30
14
 
31
15
  def get_files_and_size_of_directory(directory: str) -> Tuple[List[str], int]:
@@ -99,14 +83,23 @@ def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes
99
83
  yield chunk
100
84
 
101
85
 
102
- def create_large_file_system(lfs_uri: str):
86
+ def create_large_file_system(lfs_uri: str) -> str:
103
87
  BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Large File System')
104
- lfs_account, lfs_name = _get_lfs_info_from_uri(lfs_uri)
105
- lfs_resource = BiolibLargeFileSystemApi.create(account_uuid=lfs_account['public_id'], name=lfs_name)
106
- logger.info(f"Successfully created new Large File System '{lfs_resource['uri']}'")
107
88
 
89
+ uri_parsed = parse_app_uri(lfs_uri)
90
+ response = api.client.post(
91
+ path='/lfs/',
92
+ data={
93
+ 'account_handle': uri_parsed['account_handle_normalized'],
94
+ 'name': uri_parsed['app_name'],
95
+ },
96
+ )
97
+ lfs: LargeFileSystem = response.json()
98
+ logger.info(f"Successfully created new Large File System '{lfs['uri']}'")
99
+ return lfs['uri']
108
100
 
109
- def push_large_file_system(lfs_uri: str, input_dir: str, chunk_size_in_mb: Optional[int] = None) -> None:
101
+
102
+ def push_large_file_system(lfs_uri: str, input_dir: str, chunk_size_in_mb: Optional[int] = None) -> str:
110
103
  BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Large File System')
111
104
 
112
105
  if not os.path.isdir(input_dir):
@@ -115,8 +108,6 @@ def push_large_file_system(lfs_uri: str, input_dir: str, chunk_size_in_mb: Optio
115
108
  if os.path.realpath(input_dir) == '/':
116
109
  raise BioLibError('Pushing your root directory is not possible')
117
110
 
118
- lfs_resource = BioLibApp(lfs_uri)
119
-
120
111
  original_working_dir = os.getcwd()
121
112
  os.chdir(input_dir)
122
113
  files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
@@ -138,108 +129,25 @@ def push_large_file_system(lfs_uri: str, input_dir: str, chunk_size_in_mb: Optio
138
129
  data_size_in_mb = round(data_size_in_bytes / 10 ** 6)
139
130
  print(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
140
131
 
141
- lfs_resource_version = BiolibLargeFileSystemApi.create_version(resource_uuid=lfs_resource.uuid)
142
- lfs_resource_version_uuid = lfs_resource_version['uuid']
143
-
132
+ response = api.client.post(path='/lfs/versions/', data={'resource_uri': lfs_uri})
133
+ lfs_version: LargeFileSystemVersion = response.json()
144
134
  iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
145
135
 
146
- base_url = BiolibApiClient.get().base_url
147
136
  multipart_uploader = utils.MultiPartUploader(
148
137
  use_process_pool=True,
149
138
  get_presigned_upload_url_request=dict(
150
139
  headers=None,
151
140
  requires_biolib_auth=True,
152
- url=f'{base_url}/api/lfs/versions/{lfs_resource_version_uuid}/presigned_upload_url/',
141
+ path=f"/lfs/versions/{lfs_version['uuid']}/presigned_upload_url/",
153
142
  ),
154
143
  complete_upload_request=dict(
155
144
  headers=None,
156
145
  requires_biolib_auth=True,
157
- url=f'{base_url}/api/lfs/versions/{lfs_resource_version_uuid}/complete_upload/',
146
+ path=f"/lfs/versions/{lfs_version['uuid']}/complete_upload/",
158
147
  ),
159
148
  )
160
149
 
161
150
  multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
162
- logger.info(f"Successfully pushed a new LFS version '{lfs_resource_version['uri']}'")
163
151
  os.chdir(original_working_dir)
164
-
165
-
166
- def describe_large_file_system(lfs_uri: str, output_as_json: bool = False) -> None:
167
- BiolibApiClient.assert_is_signed_in(authenticated_action_description='describe a Large File System')
168
- lfs_resource = BioLibApp(lfs_uri)
169
- lfs_version = BiolibLargeFileSystemApi.fetch_version(lfs_version_uuid=lfs_resource.version['public_id'])
170
-
171
- files = []
172
- total_size = 0
173
- with RemoteZip(url=lfs_version['presigned_download_url']) as remote_zip:
174
- central_directory = remote_zip.get_central_directory()
175
- for file in central_directory.values():
176
- files.append(dict(path=file['filename'], size_bytes=file['file_size']))
177
- total_size += file['file_size']
178
-
179
- lfs_version_metadata = dict(files=files, **lfs_version)
180
- lfs_version_metadata['size_bytes'] = total_size
181
-
182
- if output_as_json:
183
- print(json.dumps(lfs_version_metadata, indent=4))
184
- else:
185
- print(f"Large File System {lfs_version_metadata['uri']}\ntotal {lfs_version_metadata['size_bytes']} bytes\n")
186
- print('size bytes path')
187
- for file in files:
188
- size_string = str(file['size_bytes'])
189
- leading_space_string = ' ' * (10 - len(size_string))
190
- print(f"{leading_space_string}{size_string} {file['path']}")
191
-
192
-
193
- def get_file_data_from_large_file_system(lfs_uri: str, file_path: str) -> bytes:
194
- BiolibApiClient.assert_is_signed_in(authenticated_action_description='get file from a Large File System')
195
- lfs_resource = BioLibApp(lfs_uri)
196
- lfs_version = BiolibLargeFileSystemApi.fetch_version(lfs_version_uuid=lfs_resource.version['public_id'])
197
- lfs_url = lfs_version['presigned_download_url']
198
-
199
- with RemoteZip(lfs_url) as remote_zip:
200
- central_directory = remote_zip.get_central_directory()
201
- if file_path not in central_directory:
202
- raise Exception('File not found in Large File System')
203
-
204
- file_info = central_directory[file_path]
205
-
206
- local_file_header_signature_bytes = b'\x50\x4b\x03\x04'
207
- local_file_header_struct = Struct('<H2sHHHIIIHH')
208
- LocalFileHeader = namedtuple('LocalFileHeader', (
209
- 'version',
210
- 'flags',
211
- 'compression_raw',
212
- 'mod_time',
213
- 'mod_date',
214
- 'crc_32_expected',
215
- 'compressed_size_raw',
216
- 'uncompressed_size_raw',
217
- 'file_name_len',
218
- 'extra_field_len',
219
- ))
220
-
221
- local_file_header_start = file_info['header_offset'] + len(local_file_header_signature_bytes)
222
- local_file_header_end = local_file_header_start + local_file_header_struct.size
223
-
224
- local_file_header_response = requests.get(
225
- url=lfs_url,
226
- stream=True,
227
- headers={'range': f'bytes={local_file_header_start}-{local_file_header_end - 1}'},
228
- timeout=300,
229
- )
230
- local_file_header_response.raise_for_status()
231
- local_file_header_bytes: bytes = local_file_header_response.raw.data
232
- local_file_header = LocalFileHeader._make(local_file_header_struct.unpack(local_file_header_bytes))
233
-
234
- file_start = local_file_header_end + local_file_header.file_name_len + local_file_header.extra_field_len
235
- file_end = file_start + file_info['file_size']
236
-
237
- response = requests.get(
238
- url=lfs_url,
239
- stream=True,
240
- headers={'range': f'bytes={file_start}-{file_end - 1}'},
241
- timeout=300, # timeout after 5 min
242
- )
243
- response.raise_for_status()
244
- data: bytes = response.raw.data
245
- return data
152
+ logger.info(f"Successfully pushed a new LFS version '{lfs_version['uri']}'")
153
+ return lfs_version['uri']
@@ -1 +1,13 @@
1
- from .results import set_main_result_prefix
1
+ import warnings
2
+ from biolib.sdk import Runtime as _Runtime
3
+
4
+
5
+ def set_main_result_prefix(result_prefix: str) -> None:
6
+ warnings.warn(
7
+ 'The "biolib.runtime.set_main_result_prefix" function is deprecated. '
8
+ 'It will be removed in future releases from mid 2024. '
9
+ 'Please use "from biolib.sdk import Runtime" and then "Runtime.set_main_result_prefix" instead.',
10
+ DeprecationWarning,
11
+ stacklevel=2,
12
+ )
13
+ _Runtime.set_main_result_prefix(result_prefix)
biolib/sdk/__init__.py CHANGED
@@ -1,24 +1,33 @@
1
+ # Imports to hide and use as private internal utils
2
+ from biolib._internal.data_record import DataRecord as _DataRecord
1
3
  from biolib._internal.push_application import push_application as _push_application
2
4
  from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
3
-
4
5
  from biolib.app import BioLibApp as _BioLibApp
6
+ from biolib.typing_utils import Optional as _Optional
7
+
8
+ # Imports to expose as public API
9
+ from biolib._internal.runtime import Runtime
10
+
5
11
 
6
12
  def push_app_version(uri: str, path: str) -> _BioLibApp:
7
13
  push_data = _push_application(
8
14
  app_uri=uri,
9
15
  app_path=path,
10
16
  app_version_to_copy_images_from=None,
11
- is_dev_version=True)
17
+ is_dev_version=True,
18
+ )
12
19
  uri = f'{push_data["app_uri"]}:{push_data["sematic_version"]}'
13
20
  return _BioLibApp(uri)
14
21
 
22
+
15
23
  def set_app_version_as_default(app_version: _BioLibApp) -> None:
16
24
  app_version_uuid = app_version.version['public_id']
17
25
  _set_app_version_as_active(app_version_uuid)
18
26
 
27
+
19
28
  def get_app_version_pytest_plugin(app_version: _BioLibApp):
20
29
  try:
21
- import pytest # type: ignore # pylint: disable=import-outside-toplevel,import-error
30
+ import pytest # type: ignore # pylint: disable=import-outside-toplevel,import-error
22
31
  except BaseException:
23
32
  raise Exception('Failed to import pytest; please make sure it is installed') from None
24
33
 
@@ -27,7 +36,11 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
27
36
  self.app_version_ref = app_version_ref
28
37
 
29
38
  @pytest.fixture(scope='session')
30
- def app_version(self, request): # pylint: disable=unused-argument
39
+ def app_version(self, request): # pylint: disable=unused-argument
31
40
  return self.app_version_ref
32
41
 
33
42
  return AppVersionFixturePlugin(app_version)
43
+
44
+
45
+ def create_data_record(destination: str, data_path: str, name: _Optional[str] = None) -> _DataRecord:
46
+ return _DataRecord.create(destination, data_path, name)
biolib/user/sign_in.py CHANGED
@@ -1,19 +1,11 @@
1
1
  import time
2
- import uuid
2
+ import webbrowser
3
3
 
4
4
  from biolib.biolib_api_client import BiolibApiClient
5
5
  from biolib.biolib_api_client.auth import BiolibAuthChallengeApi
6
6
  from biolib.biolib_logging import logger_no_user_data
7
7
  from biolib.utils import IS_RUNNING_IN_NOTEBOOK
8
-
9
-
10
- def _open_browser_window(url_to_open: str) -> None:
11
- from IPython.display import display, Javascript, update_display # type:ignore # pylint: disable=import-error, import-outside-toplevel
12
-
13
- display_id = str(uuid.uuid4())
14
- display(Javascript(f'window.open("{url_to_open}");'), display_id=display_id)
15
- time.sleep(1)
16
- update_display(Javascript(''), display_id=display_id)
8
+ from biolib._internal.utils import open_browser_window_from_notebook
17
9
 
18
10
 
19
11
  def sign_out() -> None:
@@ -21,7 +13,7 @@ def sign_out() -> None:
21
13
  api_client.sign_out()
22
14
 
23
15
 
24
- def sign_in() -> None:
16
+ def sign_in(open_in_default_browser: bool = False) -> None:
25
17
  api_client = BiolibApiClient.get()
26
18
  if api_client.is_signed_in:
27
19
  logger_no_user_data.info('Already signed in')
@@ -37,7 +29,11 @@ def sign_in() -> None:
37
29
  if IS_RUNNING_IN_NOTEBOOK:
38
30
  print(f'Opening authorization page at: {frontend_sign_in_url}')
39
31
  print('If your browser does not open automatically, click on the link above.')
40
- _open_browser_window(frontend_sign_in_url)
32
+ open_browser_window_from_notebook(frontend_sign_in_url)
33
+ elif open_in_default_browser:
34
+ print(f'Opening authorization page at: {frontend_sign_in_url}')
35
+ print('If your browser does not open automatically, click on the link above.')
36
+ webbrowser.open(frontend_sign_in_url)
41
37
  else:
42
38
  print('Please copy and paste the following link into your browser:')
43
39
  print(frontend_sign_in_url)
biolib/utils/__init__.py CHANGED
@@ -1,22 +1,19 @@
1
1
  import collections.abc
2
2
  import multiprocessing
3
3
  import os
4
- import time
5
4
  import socket
6
5
  import sys
7
- from urllib.parse import urlparse
8
6
 
9
- import requests
10
7
  from importlib_metadata import version, PackageNotFoundError
11
8
 
9
+ from biolib.typing_utils import Optional
12
10
  from biolib.utils.seq_util import SeqUtil, SeqUtilRecord
13
-
14
- # try fetching version, if it fails (usually when in dev), add default
15
- from biolib.biolib_errors import BioLibError
11
+ from biolib._internal.http_client import HttpClient
16
12
  from biolib.biolib_logging import logger_no_user_data, logger
17
13
  from biolib.typing_utils import Tuple, Iterator
18
14
  from .multipart_uploader import MultiPartUploader, get_chunk_iterator_from_bytes
19
15
 
16
+ # try fetching version, if it fails (usually when in dev), add default
20
17
  try:
21
18
  BIOLIB_PACKAGE_VERSION = version('pybiolib')
22
19
  except PackageNotFoundError:
@@ -25,7 +22,7 @@ except PackageNotFoundError:
25
22
  IS_DEV = os.getenv('BIOLIB_DEV', '').upper() == 'TRUE'
26
23
 
27
24
 
28
- def _get_base_url() -> str:
25
+ def load_base_url_from_env() -> str:
29
26
  base_url = os.getenv('BIOLIB_BASE_URL')
30
27
  if base_url:
31
28
  return base_url.lower().rstrip('/')
@@ -53,8 +50,8 @@ def _get_base_url() -> str:
53
50
  return 'https://biolib.com'
54
51
 
55
52
 
56
- BIOLIB_BASE_URL = _get_base_url()
57
- BIOLIB_SITE_HOSTNAME = urlparse(BIOLIB_BASE_URL).hostname
53
+ BIOLIB_BASE_URL: Optional[str] = None
54
+ BIOLIB_SITE_HOSTNAME: Optional[str] = None
58
55
 
59
56
  BIOLIB_CLOUD_BASE_URL = os.getenv('BIOLIB_CLOUD_BASE_URL', '').lower()
60
57
 
@@ -66,8 +63,7 @@ BIOLIB_SECRETS_TMPFS_PATH = os.environ.get('BIOLIB_SECRETS_TMPFS_PATH')
66
63
 
67
64
  IS_RUNNING_IN_CLOUD = BIOLIB_CLOUD_ENVIRONMENT == 'non-enclave'
68
65
 
69
- BASE_URL_IS_PUBLIC_BIOLIB = BIOLIB_BASE_URL.endswith('biolib.com') or \
70
- os.environ.get('BIOLIB_ENVIRONMENT_IS_PUBLIC_BIOLIB', '').upper() == 'TRUE'
66
+ BASE_URL_IS_PUBLIC_BIOLIB: Optional[bool] = None
71
67
 
72
68
  # sys.stdout is an instance of OutStream in Jupyter and Colab which does not have .buffer
73
69
  if not hasattr(sys.stdout, 'buffer'):
@@ -88,38 +84,17 @@ DownloadChunkInputTuple = Tuple[ByteRangeTuple, str]
88
84
 
89
85
 
90
86
  def _download_chunk(input_tuple: DownloadChunkInputTuple) -> bytes:
91
- max_download_retries = 10
92
-
93
87
  byte_range, presigned_url = input_tuple
94
88
  start, end = byte_range
95
89
 
96
- for retry_attempt in range(max_download_retries):
97
- if retry_attempt > 0:
98
- logger_no_user_data.debug(f'Attempt number {retry_attempt} for part {start}')
99
- try:
100
- response = requests.get(
101
- url=presigned_url,
102
- stream=True,
103
- headers={'range': f'bytes={start}-{end}'},
104
- timeout=300, # timeout after 5 min
105
- )
106
- if response.ok:
107
- return_value: bytes = response.raw.data
108
- logger_no_user_data.debug(f'Returning raw data for part {start}')
109
- return return_value
110
- else:
111
- logger_no_user_data.warning(
112
- f'Got not ok response when downloading part {start}:{end}. '
113
- f'Got response status {response.status_code} and content: {response.content.decode()} '
114
- f'Retrying...'
115
- )
116
- except Exception: # pylint: disable=broad-except
117
- logger_no_user_data.warning(f'Encountered error when downloading part {start}:{end}. Retrying...')
118
-
119
- time.sleep(5)
120
-
121
- logger_no_user_data.debug(f'Max retries hit, when downloading part {start}:{end}. Exiting...')
122
- raise BioLibError(f'Max retries hit, when downloading part {start}:{end}. Exiting...')
90
+ response = HttpClient.request(
91
+ url=presigned_url,
92
+ headers={'range': f'bytes={start}-{end}'},
93
+ timeout_in_seconds=300, # timeout after 5 min
94
+ retries=10,
95
+ )
96
+ logger_no_user_data.debug(f'Returning raw data for part {start}')
97
+ return response.content
123
98
 
124
99
 
125
100
  class ChunkIterator(collections.abc.Iterator):
@@ -154,11 +129,8 @@ class ChunkIterator(collections.abc.Iterator):
154
129
  def download_presigned_s3_url(presigned_url: str, output_file_path: str) -> None:
155
130
  chunk_size = 50_000_000
156
131
 
157
- with requests.get(presigned_url, stream=True, headers={'range': 'bytes=0-1'}) as response:
158
- if not response.ok:
159
- raise Exception(f'Got response status code {response.status_code} and content {response.content.decode()}')
160
-
161
- file_size = int(response.headers['Content-Range'].split('/')[1])
132
+ response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-1'})
133
+ file_size = int(response.headers['Content-Range'].split('/')[1])
162
134
 
163
135
  chunk_iterator = ChunkIterator(file_size, chunk_size, presigned_url)
164
136
 
biolib/utils/app_uri.py CHANGED
@@ -12,17 +12,18 @@ class SemanticVersion(TypedDict):
12
12
 
13
13
  class AppUriParsed(TypedDict):
14
14
  account_handle_normalized: str
15
- app_name_normalized: str
15
+ app_name_normalized: Optional[str]
16
+ app_name: Optional[str]
16
17
  resource_name_prefix: Optional[str]
17
18
  version: Optional[SemanticVersion]
18
19
 
19
20
 
20
- def normalize(string):
21
+ def normalize(string: str) -> str:
21
22
  return string.replace('-', '_').lower()
22
23
 
23
24
 
24
25
  # Mainly copied from backend
25
- def parse_app_uri(uri: str) -> AppUriParsed:
26
+ def parse_app_uri(uri: str, use_account_as_name_default: bool = True) -> AppUriParsed:
26
27
  uri_regex = r'^(@(?P<resource_name_prefix>[\w._-]+)/)?(?P<account_handle>[\w-]+)(/(?P<app_name>[\w-]+))?' \
27
28
  r'(:(?P<version>(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)))?$'
28
29
 
@@ -36,12 +37,18 @@ def parse_app_uri(uri: str) -> AppUriParsed:
36
37
  app_name: Optional[str] = matches.group('app_name')
37
38
 
38
39
  # Default to account_handle if app_name is not supplied
39
- app_name_normalized = normalize(app_name) if app_name is not None else account_handle_normalized
40
+ if app_name:
41
+ app_name_normalized = normalize(app_name)
42
+ elif use_account_as_name_default:
43
+ app_name_normalized = account_handle_normalized
44
+ else:
45
+ app_name_normalized = None
40
46
 
41
47
  return AppUriParsed(
42
48
  resource_name_prefix=resource_name_prefix.lower() if resource_name_prefix is not None else 'biolib.com',
43
49
  account_handle_normalized=account_handle_normalized,
44
50
  app_name_normalized=app_name_normalized,
51
+ app_name=app_name if app_name is not None or not use_account_as_name_default else account_handle_normalized,
45
52
  version=None if not matches.group('version') else SemanticVersion(
46
53
  major=int(matches.group('major')),
47
54
  minor=int(matches.group('minor')),
@@ -10,7 +10,7 @@ from biolib.biolib_errors import BioLibError
10
10
  from biolib.biolib_logging import logger_no_user_data
11
11
  from biolib.typing_utils import Optional, Generic, TypeVar
12
12
 
13
- StateType = TypeVar('StateType')
13
+ StateType = TypeVar('StateType') # pylint: disable=invalid-name
14
14
 
15
15
 
16
16
  class CacheStateError(BioLibError):
@@ -37,7 +37,7 @@ class CacheState(abc.ABC, Generic[StateType]):
37
37
  def _state_lock_path(self) -> str:
38
38
  return f'{self._state_path}.lock'
39
39
 
40
- def __init__(self):
40
+ def __init__(self) -> None:
41
41
  self._state: Optional[StateType] = None
42
42
 
43
43
  def __enter__(self) -> StateType:
@@ -5,10 +5,9 @@ import os
5
5
  import time
6
6
  from urllib.parse import urlparse
7
7
 
8
- import requests
9
-
8
+ import biolib.api
9
+ from biolib._internal.http_client import HttpClient
10
10
  from biolib.biolib_api_client import BiolibApiClient
11
- from biolib.biolib_api_client.auth import BearerAuth
12
11
  from biolib.biolib_errors import BioLibError
13
12
  from biolib.biolib_logging import logger, logger_no_user_data
14
13
  from biolib.typing_utils import TypedDict, List, Iterator, Tuple, Optional, Dict
@@ -33,7 +32,7 @@ def get_chunk_iterator_from_file_object(file_object, chunk_size_in_bytes: int =
33
32
  class RequestOptions(TypedDict):
34
33
  headers: Optional[Dict[str, str]]
35
34
  requires_biolib_auth: bool
36
- url: str
35
+ path: str
37
36
 
38
37
 
39
38
  class _PartMetadata(TypedDict):
@@ -67,20 +66,15 @@ class MultiPartUploader:
67
66
  logger_no_user_data.debug(f'Starting multipart upload of payload with size {payload_size_in_bytes} bytes')
68
67
 
69
68
  if self._start_multipart_upload_request:
70
- requires_biolib_auth = self._start_multipart_upload_request['requires_biolib_auth']
71
- start_multipart_upload = requests.post(
72
- auth=BearerAuth(BiolibApiClient.get().access_token) if requires_biolib_auth else None,
73
- headers=self._start_multipart_upload_request['headers'],
74
- timeout=30,
75
- url=self._start_multipart_upload_request['url'],
76
- )
77
- if start_multipart_upload.ok:
78
- logger_no_user_data.debug('Multipart upload started')
79
- else:
80
- logger_no_user_data.debug(
81
- f'Failed to start multipart upload got response status: {start_multipart_upload.status_code}'
69
+ try:
70
+ biolib.api.client.post(
71
+ authenticate=self._start_multipart_upload_request['requires_biolib_auth'],
72
+ headers=self._start_multipart_upload_request['headers'],
73
+ path=self._start_multipart_upload_request['path'],
82
74
  )
83
- raise Exception('Failed to start multipart upload')
75
+ except BaseException as error:
76
+ logger_no_user_data.debug(f'Failed to start multipart upload got error: {error}')
77
+ raise error
84
78
 
85
79
  # if multiprocessing start method is spawn or we are running in a daemon process,
86
80
  # multiprocessing.Pool may fail when called from script
@@ -116,30 +110,12 @@ class MultiPartUploader:
116
110
  BiolibApiClient.refresh_auth_token()
117
111
 
118
112
  logger_no_user_data.debug(f'Uploaded {len(parts)} parts, now calling complete upload...')
119
- for index in range(3):
120
- try:
121
- complete_upload_response = requests.post(
122
- auth=BearerAuth(BiolibApiClient.get().access_token) if requires_biolib_auth else None,
123
- headers=self._complete_upload_request['headers'],
124
- json={'parts': parts, 'size_bytes': self._bytes_uploaded},
125
- timeout=30,
126
- url=self._complete_upload_request['url'],
127
- )
128
- if complete_upload_response.ok:
129
- logger_no_user_data.debug('Multipart upload completed returning')
130
- return
131
-
132
- logger_no_user_data.warning(
133
- f'Failed to complete multipart upload got response status {complete_upload_response.status_code}. '
134
- f'Retrying...'
135
- )
136
-
137
- except Exception as error: # pylint: disable=broad-except
138
- logger_no_user_data.warning('Encountered error when completing multipart upload. Retrying...')
139
- logger.debug(f'Multipart complete error: {error}')
140
- time.sleep(index * index + 2)
141
-
142
- raise BioLibError('Max retries hit, when completing multipart upload')
113
+ biolib.api.client.post(
114
+ authenticate=requires_biolib_auth,
115
+ headers=self._complete_upload_request['headers'],
116
+ data={'parts': parts, 'size_bytes': self._bytes_uploaded},
117
+ path=self._complete_upload_request['path'],
118
+ )
143
119
 
144
120
  def _upload_chunk(self, _input: _UploadChunkInputType) -> _UploadChunkReturnType:
145
121
  part_number, chunk = _input
@@ -150,44 +126,42 @@ class MultiPartUploader:
150
126
  BiolibApiClient.refresh_auth_token()
151
127
 
152
128
  logger_no_user_data.debug(f'Uploading part number {part_number} with size {len(chunk)} bytes...')
129
+ presigned_upload_url = None
153
130
  try:
154
131
  logger_no_user_data.debug(f'Getting upload URL for chunk {part_number}...')
155
- get_url_response = requests.get(
156
- auth=BearerAuth(BiolibApiClient.get().access_token) if requires_biolib_auth else None,
132
+ get_url_response = biolib.api.client.get(
133
+ authenticate=requires_biolib_auth,
157
134
  headers=self._get_presigned_upload_url_request['headers'],
158
135
  params={'part_number': part_number},
159
- timeout=30,
160
- url=self._get_presigned_upload_url_request['url'],
136
+ path=self._get_presigned_upload_url_request['path'],
161
137
  )
162
- if not get_url_response.ok:
163
- raise Exception(
164
- f'Failed to get upload URL for part {part_number} got response status code '
165
- f'{get_url_response.status_code}'
166
- )
167
138
 
168
139
  presigned_upload_url = get_url_response.json()['presigned_upload_url']
169
140
 
170
- app_caller_proxy_job_storage_base_url = os.getenv('BIOLIB_CLOUD_JOB_STORAGE_BASE_URL', '')
171
- if app_caller_proxy_job_storage_base_url:
172
- # Done to hit App Caller Proxy when uploading result from inside an app
173
- parsed_url = urlparse(presigned_upload_url)
174
- presigned_upload_url = \
175
- f'{app_caller_proxy_job_storage_base_url}{parsed_url.path}?{parsed_url.query}'
176
-
177
- put_chunk_response = requests.put(url=presigned_upload_url, data=chunk, timeout=300)
141
+ except Exception as error: # pylint: disable=broad-except
142
+ logger_no_user_data.warning(f'Error when getting url for part {part_number}. Retrying...')
143
+ logger.debug(f'Upload error: {error}')
178
144
 
179
- if put_chunk_response.ok:
180
- return _PartMetadata(PartNumber=part_number, ETag=put_chunk_response.headers['ETag']), len(chunk)
181
- else:
182
- logger_no_user_data.warning(
183
- f'Got response with status {put_chunk_response.status_code} when uploading part {part_number}. '
184
- 'Retrying...'
145
+ if presigned_upload_url:
146
+ try:
147
+ app_caller_proxy_job_storage_base_url = os.getenv('BIOLIB_CLOUD_JOB_STORAGE_BASE_URL', '')
148
+ if app_caller_proxy_job_storage_base_url:
149
+ # Done to hit App Caller Proxy when uploading result from inside an app
150
+ parsed_url = urlparse(presigned_upload_url)
151
+ presigned_upload_url = \
152
+ f'{app_caller_proxy_job_storage_base_url}{parsed_url.path}?{parsed_url.query}'
153
+
154
+ put_chunk_response = HttpClient.request(
155
+ url=presigned_upload_url,
156
+ data=chunk,
157
+ method='PUT',
158
+ timeout_in_seconds=300,
185
159
  )
186
- logger.debug(f'Response content: {put_chunk_response.content.decode()}')
160
+ return _PartMetadata(PartNumber=part_number, ETag=put_chunk_response.headers['ETag']), len(chunk)
187
161
 
188
- except Exception as error: # pylint: disable=broad-except
189
- logger_no_user_data.warning(f'Encountered error when uploading part {part_number}. Retrying...')
190
- logger.debug(f'Upload error: {error}')
162
+ except Exception as error: # pylint: disable=broad-except
163
+ logger_no_user_data.warning(f'Encountered error when uploading part {part_number}. Retrying...')
164
+ logger.debug(f'Upload error: {error} ({presigned_upload_url})')
191
165
 
192
166
  time.sleep(index * index + 2)
193
167