pybiolib 1.2.52.dev1__py3-none-any.whl → 1.2.63.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/_data_record/data_record.py +15 -46
- biolib/_internal/data_record/push_data.py +67 -0
- biolib/_internal/push_application.py +34 -1
- {pybiolib-1.2.52.dev1.dist-info → pybiolib-1.2.63.dev1.dist-info}/METADATA +1 -1
- {pybiolib-1.2.52.dev1.dist-info → pybiolib-1.2.63.dev1.dist-info}/RECORD +8 -7
- {pybiolib-1.2.52.dev1.dist-info → pybiolib-1.2.63.dev1.dist-info}/LICENSE +0 -0
- {pybiolib-1.2.52.dev1.dist-info → pybiolib-1.2.63.dev1.dist-info}/WHEEL +0 -0
- {pybiolib-1.2.52.dev1.dist-info → pybiolib-1.2.63.dev1.dist-info}/entry_points.txt +0 -0
@@ -6,19 +6,21 @@ from pathlib import Path
|
|
6
6
|
from struct import Struct
|
7
7
|
from typing import Callable, Dict, List, Optional, Union, cast
|
8
8
|
|
9
|
-
from biolib import api
|
9
|
+
from biolib import api
|
10
10
|
from biolib._internal import types
|
11
11
|
from biolib._internal.data_record import get_data_record_state_from_uri
|
12
12
|
from biolib._internal.data_record.data_record import validate_sqlite_v1
|
13
|
+
from biolib._internal.data_record.push_data import (
|
14
|
+
push_data_path,
|
15
|
+
validate_data_path_and_get_files_and_size_of_directory,
|
16
|
+
)
|
13
17
|
from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
|
14
|
-
from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
|
15
18
|
from biolib._internal.http_client import HttpClient
|
16
19
|
from biolib.api import client as api_client
|
17
20
|
from biolib.biolib_api_client import BiolibApiClient
|
18
21
|
from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersion, DataRecordVersionInfo
|
19
22
|
from biolib.biolib_binary_format import LazyLoadedFile
|
20
23
|
from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
|
21
|
-
from biolib.biolib_errors import BioLibError
|
22
24
|
from biolib.biolib_logging import logger
|
23
25
|
from biolib.utils.app_uri import parse_app_uri
|
24
26
|
from biolib.utils.zip.remote_zip import RemoteZip
|
@@ -85,18 +87,8 @@ class DataRecord:
|
|
85
87
|
self.download_files(output_dir=output_dir, path_filter=path_filter)
|
86
88
|
|
87
89
|
def update(self, data_path: str, chunk_size_in_mb: Optional[int] = None) -> None:
|
88
|
-
assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
|
89
90
|
BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Data Record')
|
90
|
-
|
91
|
-
if os.path.realpath(data_path) == '/':
|
92
|
-
raise BioLibError('Pushing your root directory is not possible')
|
93
|
-
|
94
|
-
original_working_dir = os.getcwd()
|
95
|
-
os.chdir(data_path)
|
96
|
-
files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
|
97
|
-
|
98
|
-
if data_size_in_bytes > 4_500_000_000_000:
|
99
|
-
raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
|
91
|
+
files_to_zip, data_size_in_bytes = validate_data_path_and_get_files_and_size_of_directory(data_path)
|
100
92
|
|
101
93
|
# validate data record
|
102
94
|
detailed_dict: types.DataRecordDetailedDict = self._get_detailed_dict()
|
@@ -114,43 +106,20 @@ class DataRecord:
|
|
114
106
|
else:
|
115
107
|
raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
|
116
108
|
|
117
|
-
min_chunk_size_bytes = 10_000_000
|
118
|
-
chunk_size_in_bytes: int
|
119
|
-
if chunk_size_in_mb:
|
120
|
-
chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
|
121
|
-
if chunk_size_in_bytes < min_chunk_size_bytes:
|
122
|
-
logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
|
123
|
-
chunk_size_in_bytes = min_chunk_size_bytes
|
124
|
-
else:
|
125
|
-
# Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
|
126
|
-
chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
|
127
|
-
|
128
|
-
data_size_in_mb = round(data_size_in_bytes / 10**6)
|
129
|
-
logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
|
130
|
-
|
131
109
|
response = api.client.post(path='/lfs/versions/', data={'resource_uuid': self._state['resource_uuid']})
|
132
110
|
data_record_version: DataRecordVersion = response.json()
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
),
|
142
|
-
complete_upload_request=dict(
|
143
|
-
headers=None,
|
144
|
-
requires_biolib_auth=True,
|
145
|
-
path=f"/lfs/versions/{data_record_version['uuid']}/complete_upload/",
|
146
|
-
),
|
111
|
+
resource_version_uuid = data_record_version['uuid']
|
112
|
+
|
113
|
+
push_data_path(
|
114
|
+
data_path=data_path,
|
115
|
+
data_size_in_bytes=data_size_in_bytes,
|
116
|
+
files_to_zip=files_to_zip,
|
117
|
+
resource_version_uuid=resource_version_uuid,
|
118
|
+
chunk_size_in_mb=chunk_size_in_mb,
|
147
119
|
)
|
148
120
|
|
149
|
-
multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
|
150
|
-
os.chdir(original_working_dir)
|
151
|
-
|
152
121
|
api.client.patch(
|
153
|
-
path=f
|
122
|
+
path=f'/resources/versions/{resource_version_uuid}/',
|
154
123
|
data={'state': 'published', 'set_as_active': True},
|
155
124
|
)
|
156
125
|
|
@@ -0,0 +1,67 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
from biolib import utils
|
4
|
+
from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
|
5
|
+
from biolib._internal.types.typing import List, Optional, Tuple
|
6
|
+
from biolib.biolib_errors import BioLibError
|
7
|
+
from biolib.biolib_logging import logger
|
8
|
+
|
9
|
+
|
10
|
+
def validate_data_path_and_get_files_and_size_of_directory(data_path: str) -> Tuple[List[str], int]:
|
11
|
+
assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
|
12
|
+
|
13
|
+
if os.path.realpath(data_path) == '/':
|
14
|
+
raise BioLibError('Pushing your root directory is not possible')
|
15
|
+
|
16
|
+
original_working_dir = os.getcwd()
|
17
|
+
os.chdir(data_path)
|
18
|
+
files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
|
19
|
+
os.chdir(original_working_dir)
|
20
|
+
|
21
|
+
if data_size_in_bytes > 4_500_000_000_000:
|
22
|
+
raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
|
23
|
+
|
24
|
+
return files_to_zip, data_size_in_bytes
|
25
|
+
|
26
|
+
|
27
|
+
def push_data_path(
|
28
|
+
data_path: str,
|
29
|
+
data_size_in_bytes: int,
|
30
|
+
files_to_zip: List[str],
|
31
|
+
resource_version_uuid: str,
|
32
|
+
chunk_size_in_mb: Optional[int] = None,
|
33
|
+
) -> None:
|
34
|
+
original_working_dir = os.getcwd()
|
35
|
+
os.chdir(data_path)
|
36
|
+
|
37
|
+
min_chunk_size_bytes = 10_000_000
|
38
|
+
chunk_size_in_bytes: int
|
39
|
+
if chunk_size_in_mb:
|
40
|
+
chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
|
41
|
+
if chunk_size_in_bytes < min_chunk_size_bytes:
|
42
|
+
logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
|
43
|
+
chunk_size_in_bytes = min_chunk_size_bytes
|
44
|
+
else:
|
45
|
+
# Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
|
46
|
+
chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
|
47
|
+
|
48
|
+
data_size_in_mb = round(data_size_in_bytes / 10**6)
|
49
|
+
logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
|
50
|
+
|
51
|
+
iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
|
52
|
+
multipart_uploader = utils.MultiPartUploader(
|
53
|
+
use_process_pool=True,
|
54
|
+
get_presigned_upload_url_request=dict(
|
55
|
+
headers=None,
|
56
|
+
requires_biolib_auth=True,
|
57
|
+
path=f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
|
58
|
+
),
|
59
|
+
complete_upload_request=dict(
|
60
|
+
headers=None,
|
61
|
+
requires_biolib_auth=True,
|
62
|
+
path=f'/lfs/versions/{resource_version_uuid}/complete_upload/',
|
63
|
+
),
|
64
|
+
)
|
65
|
+
|
66
|
+
multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
|
67
|
+
os.chdir(original_working_dir)
|
@@ -6,6 +6,10 @@ import rich.progress
|
|
6
6
|
import yaml
|
7
7
|
|
8
8
|
from biolib import api, utils
|
9
|
+
from biolib._internal.data_record.push_data import (
|
10
|
+
push_data_path,
|
11
|
+
validate_data_path_and_get_files_and_size_of_directory,
|
12
|
+
)
|
9
13
|
from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
|
10
14
|
from biolib.biolib_api_client import BiolibApiClient
|
11
15
|
from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
|
@@ -94,10 +98,25 @@ def push_application(
|
|
94
98
|
zip_filters.add('.biolib/config.yml')
|
95
99
|
|
96
100
|
input_files_maps_to_root = False
|
101
|
+
app_data_path: Optional[Path] = None
|
97
102
|
try:
|
98
103
|
with open(config_yml_path) as config_yml_file:
|
99
104
|
config = yaml.safe_load(config_yml_file.read())
|
100
105
|
|
106
|
+
app_data = config.get('app_data')
|
107
|
+
if app_data:
|
108
|
+
if not isinstance(app_data, str):
|
109
|
+
raise BioLibError(
|
110
|
+
f'In .biolib/config.yml the value of "app_data" must be a string but got {type(app_data)}'
|
111
|
+
)
|
112
|
+
|
113
|
+
app_data_path = app_path_absolute.joinpath(app_data).resolve()
|
114
|
+
if not app_data_path.is_dir():
|
115
|
+
raise BioLibError(
|
116
|
+
'In .biolib/config.yml the value of "app_data" must be a path to a directory '
|
117
|
+
'in the application directory'
|
118
|
+
)
|
119
|
+
|
101
120
|
license_file_relative_path = config.get('license_file', 'LICENSE')
|
102
121
|
if app_path_absolute.joinpath(license_file_relative_path).is_file():
|
103
122
|
zip_filters.add(license_file_relative_path)
|
@@ -118,11 +137,14 @@ def push_application(
|
|
118
137
|
if module.get('source_files'):
|
119
138
|
zip_filters.add('*')
|
120
139
|
|
121
|
-
for mapping in module
|
140
|
+
for mapping in module.get('input_files', []):
|
122
141
|
mapping_parts = mapping.split(' ')
|
123
142
|
if len(mapping_parts) == 3 and mapping_parts[2] == '/':
|
124
143
|
input_files_maps_to_root = True
|
125
144
|
|
145
|
+
except BioLibError as error:
|
146
|
+
raise error from None
|
147
|
+
|
126
148
|
except Exception as error:
|
127
149
|
raise BioLibError('Failed to parse the .biolib/config.yml file') from error
|
128
150
|
|
@@ -175,6 +197,17 @@ def push_application(
|
|
175
197
|
else None,
|
176
198
|
)
|
177
199
|
|
200
|
+
if app_data_path:
|
201
|
+
app_data_files_to_zip, app_data_size_in_bytes = validate_data_path_and_get_files_and_size_of_directory(
|
202
|
+
data_path=str(app_data_path),
|
203
|
+
)
|
204
|
+
push_data_path(
|
205
|
+
resource_version_uuid=new_app_version_json['public_id'],
|
206
|
+
data_path=str(app_data_path),
|
207
|
+
data_size_in_bytes=app_data_size_in_bytes,
|
208
|
+
files_to_zip=app_data_files_to_zip,
|
209
|
+
)
|
210
|
+
|
178
211
|
# Don't push docker images if copying from another app version
|
179
212
|
docker_tags = new_app_version_json.get('docker_tags', {})
|
180
213
|
if not app_version_to_copy_images_from and docker_tags:
|
@@ -1,10 +1,11 @@
|
|
1
1
|
LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
|
2
2
|
PYPI_README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
|
3
3
|
biolib/__init__.py,sha256=q_YhAYw51Vq16IKtSk8_MJclDAa4CfCmPhvWDYmrSIg,4393
|
4
|
-
biolib/_data_record/data_record.py,sha256
|
4
|
+
biolib/_data_record/data_record.py,sha256=zVAhFU1RLI1-ptoQ_l639RNwrMANXV9j75yXHvB7dtA,10950
|
5
5
|
biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
biolib/_internal/data_record/__init__.py,sha256=fGdME6JGRU_2VxpJbYpGXYndjN-feUkmKY4fuMyq3cg,76
|
7
7
|
biolib/_internal/data_record/data_record.py,sha256=g_-jdy5-Zem3dthwxJj2OuQqkDGTyc-iGqN1rtYYD1A,4418
|
8
|
+
biolib/_internal/data_record/push_data.py,sha256=-L3a_7zZzDCXabBu3O4lWPMAMeBbeRPTrBlEM-_5SCI,2693
|
8
9
|
biolib/_internal/data_record/remote_storage_endpoint.py,sha256=eCptuZ4DMAPnaNCVDvpWXwXGI6Jac9U1N5dqU8Cj95Q,1732
|
9
10
|
biolib/_internal/file_utils.py,sha256=4jT6j7bB21c0JNn5BfnyWQib_zt0CVtJ_TiOFOStRcE,2604
|
10
11
|
biolib/_internal/fuse_mount/__init__.py,sha256=B_tM6RM2dBw-vbpoHJC4X3tOAaN1H2RDvqYJOw3xFwg,55
|
@@ -14,7 +15,7 @@ biolib/_internal/lfs/__init__.py,sha256=gSWo_xg61UniYgD7yNYxeT4I9uaXBCBSi3_nmZjn
|
|
14
15
|
biolib/_internal/lfs/cache.py,sha256=pQS2np21rdJ6I3DpoOutnzPHpLOZgUIS8TMltUJk_k4,2226
|
15
16
|
biolib/_internal/libs/__init__.py,sha256=Jdf4tNPqe_oIIf6zYml6TiqhL_02Vyqwge6IELrAFhw,98
|
16
17
|
biolib/_internal/libs/fusepy/__init__.py,sha256=AWDzNFS-XV_5yKb0Qx7kggIhPzq1nj_BZS5y2Nso08k,41944
|
17
|
-
biolib/_internal/push_application.py,sha256=
|
18
|
+
biolib/_internal/push_application.py,sha256=mKs3kIKW-ZYfz3Cy6LIyFBwsWkbcGZ9zgMk-xn5NDyg,11660
|
18
19
|
biolib/_internal/runtime.py,sha256=BiHl4klUHr36MCpqKaUso4idHeBZfPAahLYRQrabFqA,486
|
19
20
|
biolib/_internal/types/__init__.py,sha256=xLgOQJFh3GRtiqIJq7MaqHReZx4pp34_zcaFQ_JjuJ4,198
|
20
21
|
biolib/_internal/types/app.py,sha256=Mz2QGD_jESX-K9JYnLWPo4YA__Q_1FQQTk9pvidCohU,118
|
@@ -118,8 +119,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
|
|
118
119
|
biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
|
119
120
|
biolib/utils/seq_util.py,sha256=ZQFcaE37B2dtucN2zDjOmdya_X0ITc1zBFZJNQY13XA,5183
|
120
121
|
biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
|
121
|
-
pybiolib-1.2.
|
122
|
-
pybiolib-1.2.
|
123
|
-
pybiolib-1.2.
|
124
|
-
pybiolib-1.2.
|
125
|
-
pybiolib-1.2.
|
122
|
+
pybiolib-1.2.63.dev1.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
|
123
|
+
pybiolib-1.2.63.dev1.dist-info/METADATA,sha256=t8YhPOU4slbJKvi3kieokCcqoKqgyqaYf5Tjwi7Vs3s,1511
|
124
|
+
pybiolib-1.2.63.dev1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
125
|
+
pybiolib-1.2.63.dev1.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
|
126
|
+
pybiolib-1.2.63.dev1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|