pybiolib 1.1.2145__py3-none-any.whl → 1.1.2155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,21 +5,24 @@ from fnmatch import fnmatch
5
5
  from struct import Struct
6
6
  from typing import Callable, Dict, List, Union, cast
7
7
 
8
- from biolib import api
8
+ from biolib import api, utils
9
9
  from biolib._internal import types
10
- from biolib._internal.data_record import get_data_record_state_from_uri, push_data_record_version
10
+ from biolib._internal.data_record import get_data_record_state_from_uri
11
11
  from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
12
+ from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
12
13
  from biolib._internal.http_client import HttpClient
13
14
  from biolib.api import client as api_client
14
15
  from biolib.biolib_api_client import BiolibApiClient
15
- from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersionInfo
16
+ from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersion, DataRecordVersionInfo
16
17
  from biolib.biolib_binary_format import LazyLoadedFile
17
18
  from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
19
+ from biolib.biolib_errors import BioLibError
18
20
  from biolib.biolib_logging import logger
19
21
  from biolib.typing_utils import Optional as _Optional
20
22
  from biolib.utils.app_uri import parse_app_uri
21
23
  from biolib.utils.zip.remote_zip import RemoteZip
22
24
 
25
+
23
26
  PathFilter = Union[str, Callable[[str], bool]]
24
27
 
25
28
 
@@ -83,10 +86,63 @@ class DataRecord:
83
86
 
84
87
  def update(self, data_path: str, chunk_size_in_mb: _Optional[int] = None) -> None:
85
88
  assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
86
- uri = push_data_record_version(
87
- data_record_uuid=self._state['resource_uuid'], input_dir=data_path, chunk_size_in_mb=chunk_size_in_mb
89
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Data Record')
90
+
91
+ if os.path.realpath(data_path) == '/':
92
+ raise BioLibError('Pushing your root directory is not possible')
93
+
94
+ original_working_dir = os.getcwd()
95
+ os.chdir(data_path)
96
+ files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
97
+
98
+ if data_size_in_bytes > 4_500_000_000_000:
99
+ raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
100
+
101
+ # validate data record
102
+ detailed_dict: types.DataRecordDetailedDict = self._get_detailed_dict()
103
+ if detailed_dict['type']:
104
+ # only validate if data record has a type
105
+ data_record_type: types.DataRecordTypeDict = detailed_dict['type']
106
+ logger.info(f"Validating data record of type {data_record_type['name']}")
107
+ for rule in data_record_type['validation_rules']:
108
+ logger.info(f"Validating rule {rule['type']} for {rule['path']}...")
109
+
110
+ min_chunk_size_bytes = 10_000_000
111
+ chunk_size_in_bytes: int
112
+ if chunk_size_in_mb:
113
+ chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
114
+ if chunk_size_in_bytes < min_chunk_size_bytes:
115
+ logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
116
+ chunk_size_in_bytes = min_chunk_size_bytes
117
+ else:
118
+ # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
119
+ chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
120
+
121
+ data_size_in_mb = round(data_size_in_bytes / 10**6)
122
+ logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
123
+
124
+ response = api.client.post(path='/lfs/versions/', data={'resource_uuid': self._state['resource_uuid']})
125
+ data_record_version: DataRecordVersion = response.json()
126
+ iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
127
+
128
+ multipart_uploader = utils.MultiPartUploader(
129
+ use_process_pool=True,
130
+ get_presigned_upload_url_request=dict(
131
+ headers=None,
132
+ requires_biolib_auth=True,
133
+ path=f"/lfs/versions/{data_record_version['uuid']}/presigned_upload_url/",
134
+ ),
135
+ complete_upload_request=dict(
136
+ headers=None,
137
+ requires_biolib_auth=True,
138
+ path=f"/lfs/versions/{data_record_version['uuid']}/complete_upload/",
139
+ ),
88
140
  )
89
- self._state = get_data_record_state_from_uri(uri)
141
+
142
+ multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
143
+ os.chdir(original_working_dir)
144
+ logger.info(f"Successfully pushed a new Data Record version '{data_record_version['uri']}'")
145
+ self._state = get_data_record_state_from_uri(data_record_version['uri'])
90
146
 
91
147
  @staticmethod
92
148
  def get_by_uri(uri: str) -> 'DataRecord':
@@ -112,14 +168,15 @@ class DataRecord:
112
168
  'name': uri_parsed['app_name'],
113
169
  },
114
170
  )
115
- data_record: DataRecordInfo = response.json()
116
- logger.info(f"Successfully created new Data Record '{data_record['uri']}'")
171
+ data_record_info: DataRecordInfo = response.json()
172
+ logger.info(f"Successfully created new Data Record '{data_record_info['uri']}'")
117
173
 
118
174
  if data_path is not None:
119
- record_version_uri = push_data_record_version(data_record_uuid=data_record['uuid'], input_dir=data_path)
120
- return DataRecord.get_by_uri(uri=record_version_uri)
175
+ data_record = DataRecord.get_by_uri(uri=data_record_info['uri'])
176
+ data_record.update(data_path=data_path)
177
+ return data_record
121
178
  else:
122
- return DataRecord.get_by_uri(uri=data_record_uri)
179
+ return DataRecord.get_by_uri(uri=data_record_info['uri'])
123
180
 
124
181
  @staticmethod
125
182
  def fetch(uri: _Optional[str] = None, count: _Optional[int] = None) -> List['DataRecord']:
@@ -1 +1 @@
1
- from .data_record import get_data_record_state_from_uri, push_data_record_version
1
+ from .data_record import get_data_record_state_from_uri
@@ -1,67 +1,6 @@
1
- import os
2
- from typing import Optional
3
-
4
- from biolib import api, utils
5
- from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
6
1
  from biolib.api import client as api_client
7
- from biolib.biolib_api_client import AppGetResponse, BiolibApiClient
8
- from biolib.biolib_api_client.lfs_types import DataRecordVersion, DataRecordVersionInfo
9
- from biolib.biolib_errors import BioLibError
10
- from biolib.biolib_logging import logger
11
-
12
-
13
- def push_data_record_version(data_record_uuid: str, input_dir: str, chunk_size_in_mb: Optional[int] = None) -> str:
14
- BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Data Record')
15
-
16
- if not os.path.isdir(input_dir):
17
- raise BioLibError(f'Could not find folder at {input_dir}')
18
-
19
- if os.path.realpath(input_dir) == '/':
20
- raise BioLibError('Pushing your root directory is not possible')
21
-
22
- original_working_dir = os.getcwd()
23
- os.chdir(input_dir)
24
- files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
25
-
26
- if data_size_in_bytes > 4_500_000_000_000:
27
- raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
28
-
29
- min_chunk_size_bytes = 10_000_000
30
- chunk_size_in_bytes: int
31
- if chunk_size_in_mb:
32
- chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
33
- if chunk_size_in_bytes < min_chunk_size_bytes:
34
- logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
35
- chunk_size_in_bytes = min_chunk_size_bytes
36
- else:
37
- # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
38
- chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
39
-
40
- data_size_in_mb = round(data_size_in_bytes / 10**6)
41
- print(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
42
-
43
- response = api.client.post(path='/lfs/versions/', data={'resource_uuid': data_record_uuid})
44
- data_record_version: DataRecordVersion = response.json()
45
- iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
46
-
47
- multipart_uploader = utils.MultiPartUploader(
48
- use_process_pool=True,
49
- get_presigned_upload_url_request=dict(
50
- headers=None,
51
- requires_biolib_auth=True,
52
- path=f"/lfs/versions/{data_record_version['uuid']}/presigned_upload_url/",
53
- ),
54
- complete_upload_request=dict(
55
- headers=None,
56
- requires_biolib_auth=True,
57
- path=f"/lfs/versions/{data_record_version['uuid']}/complete_upload/",
58
- ),
59
- )
60
-
61
- multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
62
- os.chdir(original_working_dir)
63
- logger.info(f"Successfully pushed a new Data Record version '{data_record_version['uri']}'")
64
- return data_record_version['uri']
2
+ from biolib.biolib_api_client import AppGetResponse
3
+ from biolib.biolib_api_client.lfs_types import DataRecordVersionInfo
65
4
 
66
5
 
67
6
  def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.1.2145
3
+ Version: 1.1.2155
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -1,10 +1,10 @@
1
1
  LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
2
2
  README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
3
3
  biolib/__init__.py,sha256=_tThyzISH81yS9KXP_X3qEiKXmsIp5XOBcJIODfLVnc,4338
4
- biolib/_data_record/data_record.py,sha256=KTHrVy_wj6FU3ckD0_EsakyNJcniLIdT9c3ugujNRoI,9210
4
+ biolib/_data_record/data_record.py,sha256=Sud8yXz7yR6YW4V6OqE7nO6I4a0TdqijmMTZwwU59j8,12152
5
5
  biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- biolib/_internal/data_record/__init__.py,sha256=0T0CV6PfKc8itjMu-48sCJjcZQEzXl1ZLBqG_LjJTqQ,82
7
- biolib/_internal/data_record/data_record.py,sha256=D0BaC8WhnkM564eKUI69hVHUkKY1In0cyfpjxYyWk18,3363
6
+ biolib/_internal/data_record/__init__.py,sha256=wLOy3Pb7dWYik5eQtQM00DH2AWC-M5RbTjwh9InPiqo,56
7
+ biolib/_internal/data_record/data_record.py,sha256=If4SQj-XwKSPzCpaWA01LEGKalZ6DEjD5PJZRtl3Mao,556
8
8
  biolib/_internal/data_record/remote_storage_endpoint.py,sha256=eCptuZ4DMAPnaNCVDvpWXwXGI6Jac9U1N5dqU8Cj95Q,1732
9
9
  biolib/_internal/file_utils.py,sha256=4jT6j7bB21c0JNn5BfnyWQib_zt0CVtJ_TiOFOStRcE,2604
10
10
  biolib/_internal/fuse_mount/__init__.py,sha256=B_tM6RM2dBw-vbpoHJC4X3tOAaN1H2RDvqYJOw3xFwg,55
@@ -116,8 +116,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
116
116
  biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
117
117
  biolib/utils/seq_util.py,sha256=jC5WhH63FTD7SLFJbxQGA2hOt9NTwq9zHl_BEec1Z0c,4907
118
118
  biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
119
- pybiolib-1.1.2145.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
120
- pybiolib-1.1.2145.dist-info/METADATA,sha256=rpVBXD5_Q8O8-SBz_G9jwhvCWRz_ww1DsPc0eApjBMc,1508
121
- pybiolib-1.1.2145.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
122
- pybiolib-1.1.2145.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
123
- pybiolib-1.1.2145.dist-info/RECORD,,
119
+ pybiolib-1.1.2155.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
120
+ pybiolib-1.1.2155.dist-info/METADATA,sha256=tu73TYRGUjbJ3MfO15gYq2UO81SYCy508-wwmoHN3WQ,1508
121
+ pybiolib-1.1.2155.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
122
+ pybiolib-1.1.2155.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
123
+ pybiolib-1.1.2155.dist-info/RECORD,,