pybiolib 1.2.52.dev1__py3-none-any.whl → 1.2.63.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,19 +6,21 @@ from pathlib import Path
6
6
  from struct import Struct
7
7
  from typing import Callable, Dict, List, Optional, Union, cast
8
8
 
9
- from biolib import api, utils
9
+ from biolib import api
10
10
  from biolib._internal import types
11
11
  from biolib._internal.data_record import get_data_record_state_from_uri
12
12
  from biolib._internal.data_record.data_record import validate_sqlite_v1
13
+ from biolib._internal.data_record.push_data import (
14
+ push_data_path,
15
+ validate_data_path_and_get_files_and_size_of_directory,
16
+ )
13
17
  from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
14
- from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
15
18
  from biolib._internal.http_client import HttpClient
16
19
  from biolib.api import client as api_client
17
20
  from biolib.biolib_api_client import BiolibApiClient
18
21
  from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersion, DataRecordVersionInfo
19
22
  from biolib.biolib_binary_format import LazyLoadedFile
20
23
  from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
21
- from biolib.biolib_errors import BioLibError
22
24
  from biolib.biolib_logging import logger
23
25
  from biolib.utils.app_uri import parse_app_uri
24
26
  from biolib.utils.zip.remote_zip import RemoteZip
@@ -85,18 +87,8 @@ class DataRecord:
85
87
  self.download_files(output_dir=output_dir, path_filter=path_filter)
86
88
 
87
89
  def update(self, data_path: str, chunk_size_in_mb: Optional[int] = None) -> None:
88
- assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
89
90
  BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Data Record')
90
-
91
- if os.path.realpath(data_path) == '/':
92
- raise BioLibError('Pushing your root directory is not possible')
93
-
94
- original_working_dir = os.getcwd()
95
- os.chdir(data_path)
96
- files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
97
-
98
- if data_size_in_bytes > 4_500_000_000_000:
99
- raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
91
+ files_to_zip, data_size_in_bytes = validate_data_path_and_get_files_and_size_of_directory(data_path)
100
92
 
101
93
  # validate data record
102
94
  detailed_dict: types.DataRecordDetailedDict = self._get_detailed_dict()
@@ -114,43 +106,20 @@ class DataRecord:
114
106
  else:
115
107
  raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
116
108
 
117
- min_chunk_size_bytes = 10_000_000
118
- chunk_size_in_bytes: int
119
- if chunk_size_in_mb:
120
- chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
121
- if chunk_size_in_bytes < min_chunk_size_bytes:
122
- logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
123
- chunk_size_in_bytes = min_chunk_size_bytes
124
- else:
125
- # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
126
- chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
127
-
128
- data_size_in_mb = round(data_size_in_bytes / 10**6)
129
- logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
130
-
131
109
  response = api.client.post(path='/lfs/versions/', data={'resource_uuid': self._state['resource_uuid']})
132
110
  data_record_version: DataRecordVersion = response.json()
133
- iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
134
-
135
- multipart_uploader = utils.MultiPartUploader(
136
- use_process_pool=True,
137
- get_presigned_upload_url_request=dict(
138
- headers=None,
139
- requires_biolib_auth=True,
140
- path=f"/lfs/versions/{data_record_version['uuid']}/presigned_upload_url/",
141
- ),
142
- complete_upload_request=dict(
143
- headers=None,
144
- requires_biolib_auth=True,
145
- path=f"/lfs/versions/{data_record_version['uuid']}/complete_upload/",
146
- ),
111
+ resource_version_uuid = data_record_version['uuid']
112
+
113
+ push_data_path(
114
+ data_path=data_path,
115
+ data_size_in_bytes=data_size_in_bytes,
116
+ files_to_zip=files_to_zip,
117
+ resource_version_uuid=resource_version_uuid,
118
+ chunk_size_in_mb=chunk_size_in_mb,
147
119
  )
148
120
 
149
- multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
150
- os.chdir(original_working_dir)
151
-
152
121
  api.client.patch(
153
- path=f"/resources/versions/{data_record_version['uuid']}/",
122
+ path=f'/resources/versions/{resource_version_uuid}/',
154
123
  data={'state': 'published', 'set_as_active': True},
155
124
  )
156
125
 
@@ -0,0 +1,67 @@
1
+ import os
2
+
3
+ from biolib import utils
4
+ from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
5
+ from biolib._internal.types.typing import List, Optional, Tuple
6
+ from biolib.biolib_errors import BioLibError
7
+ from biolib.biolib_logging import logger
8
+
9
+
10
+ def validate_data_path_and_get_files_and_size_of_directory(data_path: str) -> Tuple[List[str], int]:
11
+ assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
12
+
13
+ if os.path.realpath(data_path) == '/':
14
+ raise BioLibError('Pushing your root directory is not possible')
15
+
16
+ original_working_dir = os.getcwd()
17
+ os.chdir(data_path)
18
+ files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
19
+ os.chdir(original_working_dir)
20
+
21
+ if data_size_in_bytes > 4_500_000_000_000:
22
+ raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
23
+
24
+ return files_to_zip, data_size_in_bytes
25
+
26
+
27
+ def push_data_path(
28
+ data_path: str,
29
+ data_size_in_bytes: int,
30
+ files_to_zip: List[str],
31
+ resource_version_uuid: str,
32
+ chunk_size_in_mb: Optional[int] = None,
33
+ ) -> None:
34
+ original_working_dir = os.getcwd()
35
+ os.chdir(data_path)
36
+
37
+ min_chunk_size_bytes = 10_000_000
38
+ chunk_size_in_bytes: int
39
+ if chunk_size_in_mb:
40
+ chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
41
+ if chunk_size_in_bytes < min_chunk_size_bytes:
42
+ logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
43
+ chunk_size_in_bytes = min_chunk_size_bytes
44
+ else:
45
+ # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
46
+ chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
47
+
48
+ data_size_in_mb = round(data_size_in_bytes / 10**6)
49
+ logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
50
+
51
+ iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
52
+ multipart_uploader = utils.MultiPartUploader(
53
+ use_process_pool=True,
54
+ get_presigned_upload_url_request=dict(
55
+ headers=None,
56
+ requires_biolib_auth=True,
57
+ path=f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
58
+ ),
59
+ complete_upload_request=dict(
60
+ headers=None,
61
+ requires_biolib_auth=True,
62
+ path=f'/lfs/versions/{resource_version_uuid}/complete_upload/',
63
+ ),
64
+ )
65
+
66
+ multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
67
+ os.chdir(original_working_dir)
@@ -6,6 +6,10 @@ import rich.progress
6
6
  import yaml
7
7
 
8
8
  from biolib import api, utils
9
+ from biolib._internal.data_record.push_data import (
10
+ push_data_path,
11
+ validate_data_path_and_get_files_and_size_of_directory,
12
+ )
9
13
  from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
10
14
  from biolib.biolib_api_client import BiolibApiClient
11
15
  from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
@@ -94,10 +98,25 @@ def push_application(
94
98
  zip_filters.add('.biolib/config.yml')
95
99
 
96
100
  input_files_maps_to_root = False
101
+ app_data_path: Optional[Path] = None
97
102
  try:
98
103
  with open(config_yml_path) as config_yml_file:
99
104
  config = yaml.safe_load(config_yml_file.read())
100
105
 
106
+ app_data = config.get('app_data')
107
+ if app_data:
108
+ if not isinstance(app_data, str):
109
+ raise BioLibError(
110
+ f'In .biolib/config.yml the value of "app_data" must be a string but got {type(app_data)}'
111
+ )
112
+
113
+ app_data_path = app_path_absolute.joinpath(app_data).resolve()
114
+ if not app_data_path.is_dir():
115
+ raise BioLibError(
116
+ 'In .biolib/config.yml the value of "app_data" must be a path to a directory '
117
+ 'in the application directory'
118
+ )
119
+
101
120
  license_file_relative_path = config.get('license_file', 'LICENSE')
102
121
  if app_path_absolute.joinpath(license_file_relative_path).is_file():
103
122
  zip_filters.add(license_file_relative_path)
@@ -118,11 +137,14 @@ def push_application(
118
137
  if module.get('source_files'):
119
138
  zip_filters.add('*')
120
139
 
121
- for mapping in module['input_files']:
140
+ for mapping in module.get('input_files', []):
122
141
  mapping_parts = mapping.split(' ')
123
142
  if len(mapping_parts) == 3 and mapping_parts[2] == '/':
124
143
  input_files_maps_to_root = True
125
144
 
145
+ except BioLibError as error:
146
+ raise error from None
147
+
126
148
  except Exception as error:
127
149
  raise BioLibError('Failed to parse the .biolib/config.yml file') from error
128
150
 
@@ -175,6 +197,17 @@ def push_application(
175
197
  else None,
176
198
  )
177
199
 
200
+ if app_data_path:
201
+ app_data_files_to_zip, app_data_size_in_bytes = validate_data_path_and_get_files_and_size_of_directory(
202
+ data_path=str(app_data_path),
203
+ )
204
+ push_data_path(
205
+ resource_version_uuid=new_app_version_json['public_id'],
206
+ data_path=str(app_data_path),
207
+ data_size_in_bytes=app_data_size_in_bytes,
208
+ files_to_zip=app_data_files_to_zip,
209
+ )
210
+
178
211
  # Don't push docker images if copying from another app version
179
212
  docker_tags = new_app_version_json.get('docker_tags', {})
180
213
  if not app_version_to_copy_images_from and docker_tags:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.2.52.dev1
3
+ Version: 1.2.63.dev1
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -1,10 +1,11 @@
1
1
  LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
2
2
  PYPI_README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
3
3
  biolib/__init__.py,sha256=q_YhAYw51Vq16IKtSk8_MJclDAa4CfCmPhvWDYmrSIg,4393
4
- biolib/_data_record/data_record.py,sha256=-_EAcyheLWIkx02vQYygzEjxArYqx9XuQQYeE5GtuzU,12672
4
+ biolib/_data_record/data_record.py,sha256=zVAhFU1RLI1-ptoQ_l639RNwrMANXV9j75yXHvB7dtA,10950
5
5
  biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  biolib/_internal/data_record/__init__.py,sha256=fGdME6JGRU_2VxpJbYpGXYndjN-feUkmKY4fuMyq3cg,76
7
7
  biolib/_internal/data_record/data_record.py,sha256=g_-jdy5-Zem3dthwxJj2OuQqkDGTyc-iGqN1rtYYD1A,4418
8
+ biolib/_internal/data_record/push_data.py,sha256=-L3a_7zZzDCXabBu3O4lWPMAMeBbeRPTrBlEM-_5SCI,2693
8
9
  biolib/_internal/data_record/remote_storage_endpoint.py,sha256=eCptuZ4DMAPnaNCVDvpWXwXGI6Jac9U1N5dqU8Cj95Q,1732
9
10
  biolib/_internal/file_utils.py,sha256=4jT6j7bB21c0JNn5BfnyWQib_zt0CVtJ_TiOFOStRcE,2604
10
11
  biolib/_internal/fuse_mount/__init__.py,sha256=B_tM6RM2dBw-vbpoHJC4X3tOAaN1H2RDvqYJOw3xFwg,55
@@ -14,7 +15,7 @@ biolib/_internal/lfs/__init__.py,sha256=gSWo_xg61UniYgD7yNYxeT4I9uaXBCBSi3_nmZjn
14
15
  biolib/_internal/lfs/cache.py,sha256=pQS2np21rdJ6I3DpoOutnzPHpLOZgUIS8TMltUJk_k4,2226
15
16
  biolib/_internal/libs/__init__.py,sha256=Jdf4tNPqe_oIIf6zYml6TiqhL_02Vyqwge6IELrAFhw,98
16
17
  biolib/_internal/libs/fusepy/__init__.py,sha256=AWDzNFS-XV_5yKb0Qx7kggIhPzq1nj_BZS5y2Nso08k,41944
17
- biolib/_internal/push_application.py,sha256=b1WvlijJYOLp7f43AoAzjINKkALM9DVB8yfSetEP4Z4,10381
18
+ biolib/_internal/push_application.py,sha256=mKs3kIKW-ZYfz3Cy6LIyFBwsWkbcGZ9zgMk-xn5NDyg,11660
18
19
  biolib/_internal/runtime.py,sha256=BiHl4klUHr36MCpqKaUso4idHeBZfPAahLYRQrabFqA,486
19
20
  biolib/_internal/types/__init__.py,sha256=xLgOQJFh3GRtiqIJq7MaqHReZx4pp34_zcaFQ_JjuJ4,198
20
21
  biolib/_internal/types/app.py,sha256=Mz2QGD_jESX-K9JYnLWPo4YA__Q_1FQQTk9pvidCohU,118
@@ -118,8 +119,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
118
119
  biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
119
120
  biolib/utils/seq_util.py,sha256=ZQFcaE37B2dtucN2zDjOmdya_X0ITc1zBFZJNQY13XA,5183
120
121
  biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
121
- pybiolib-1.2.52.dev1.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
122
- pybiolib-1.2.52.dev1.dist-info/METADATA,sha256=8m8D3rQE-Z7a3nLw8V5ky0Dt4K0bHefBONkoUaiieoo,1511
123
- pybiolib-1.2.52.dev1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
124
- pybiolib-1.2.52.dev1.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
125
- pybiolib-1.2.52.dev1.dist-info/RECORD,,
122
+ pybiolib-1.2.63.dev1.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
123
+ pybiolib-1.2.63.dev1.dist-info/METADATA,sha256=t8YhPOU4slbJKvi3kieokCcqoKqgyqaYf5Tjwi7Vs3s,1511
124
+ pybiolib-1.2.63.dev1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
125
+ pybiolib-1.2.63.dev1.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
126
+ pybiolib-1.2.63.dev1.dist-info/RECORD,,