PyPI - pybiolib - Versions diffs - 1.1.2038__py3-none-any.whl → 1.1.2097__py3-none-any.whl - Mend

pybiolib 1.1.2038py3-none-any.whl → 1.1.2097py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

biolib/__init__.py +5 -1
biolib/_data_record/data_record.py +208 -0
biolib/_internal/data_record/__init__.py +1 -1
biolib/_internal/data_record/data_record.py +67 -163
biolib/_internal/data_record/remote_storage_endpoint.py +2 -2
biolib/_internal/file_utils.py +77 -0
biolib/_internal/lfs/__init__.py +1 -0
biolib/_internal/push_application.py +1 -1
biolib/_internal/runtime.py +1 -78
biolib/_runtime/runtime.py +79 -0
biolib/biolib_api_client/api_client.py +1 -1
biolib/biolib_api_client/app_types.py +1 -0
biolib/biolib_api_client/lfs_types.py +8 -2
biolib/cli/data_record.py +42 -6
biolib/cli/lfs.py +10 -6
biolib/runtime/__init__.py +1 -1
biolib/sdk/__init__.py +9 -5
{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/METADATA +1 -1
{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/RECORD +23 -21
biolib/lfs/__init__.py +0 -4
biolib/lfs/utils.py +0 -153
/biolib/{lfs → _internal/lfs}/cache.py +0 -0
{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/LICENSE +0 -0
{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/WHEEL +0 -0
{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/entry_points.txt +0 -0

biolib/__init__.py CHANGED Viewed

@@ -13,7 +13,7 @@ from biolib.biolib_api_client import BiolibApiClient as _BioLibApiClient, App
 from biolib.jobs import Job as _Job
 from biolib import user as _user
 from biolib.typing_utils import List, Optional
-from biolib._internal.data_record import DataRecord as _DataRecord
+from biolib._data_record.data_record import DataRecord as _DataRecord
 import biolib.api
 import biolib.app
@@ -45,6 +45,10 @@ def get_job(job_id: str) -> _Job:
     return _Job.create_from_uuid(uuid=job_id)
+def get_data_record(uri: str) -> _DataRecord:
+    return _DataRecord.get_by_uri(uri)
 def fetch_jobs(count: int = 25) -> List[_Job]:
     return _Job.fetch_jobs(count)

biolib/_data_record/data_record.py ADDED Viewed

@@ -0,0 +1,208 @@
+from biolib import api
+from biolib._internal.data_record import get_data_record_state_from_uri, push_data_record_version
+from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
+from biolib._internal.http_client import HttpClient
+from biolib.api import client as api_client
+from biolib.biolib_api_client import BiolibApiClient
+from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersionInfo
+from biolib.biolib_binary_format import LazyLoadedFile
+from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
+from biolib.biolib_logging import logger
+from biolib.typing_utils import Optional as _Optional
+from biolib.utils.app_uri import parse_app_uri
+from biolib.utils.zip.remote_zip import RemoteZip
+import os
+from collections import namedtuple
+from datetime import datetime
+from fnmatch import fnmatch
+from struct import Struct
+from typing import Callable, Dict, List, cast, Union
+PathFilter = Union[str, Callable[[str], bool]]
+class DataRecord:
+    def __init__(self, _internal_state: DataRecordVersionInfo):
+        self._state = _internal_state
+    def __repr__(self):
+        return f'DataRecord: {self._state["resource_uri"]}'
+    @property
+    def uri(self) -> str:
+        return self._state['resource_uri']
+    @property
+    def name(self) -> str:
+        uri_parsed = parse_app_uri(self._state['resource_uri'], use_account_as_name_default=False)
+        if not uri_parsed['app_name']:
+            raise ValueError('Expected parameter "resource_uri" to contain resource name')
+        return uri_parsed['app_name']
+    def list_files(self, path_filter: _Optional[PathFilter] = None) -> List[LazyLoadedFile]:
+        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
+            resource_version_uuid=self._state['resource_version_uuid'],
+        )
+        files: List[LazyLoadedFile] = []
+        with RemoteZip(url=remote_storage_endpoint.get_remote_url()) as remote_zip:
+            central_directory = remote_zip.get_central_directory()
+            for file_info in central_directory.values():
+                files.append(self._get_file(remote_storage_endpoint, file_info))
+        return self._get_filtered_files(files=files, path_filter=path_filter) if path_filter else files
+    def download_zip(self, output_path: str):
+        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
+            resource_version_uuid=self._state['resource_version_uuid'],
+        )
+        HttpClient.request(url=remote_storage_endpoint.get_remote_url(), response_path=output_path)
+    def download_files(self, output_dir: str, path_filter: _Optional[PathFilter] = None) -> None:
+        filtered_files = self.list_files(path_filter=path_filter)
+        if len(filtered_files) == 0:
+            logger.debug('No files to save')
+            return
+        for file in filtered_files:
+            file_path = os.path.join(output_dir, file.path)
+            os.makedirs(os.path.dirname(file_path), exist_ok=True)
+            with open(file_path, mode='wb') as file_handle:
+                for chunk in file.get_data_iterator():
+                    file_handle.write(chunk)
+    def save_files(self, output_dir: str, path_filter: _Optional[PathFilter] = None) -> None:
+        self.download_files(output_dir=output_dir, path_filter=path_filter)
+    def update(self, data_path: str, chunk_size_in_mb: _Optional[int] = None) -> None:
+        assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
+        uri = push_data_record_version(
+            data_record_uuid=self._state['resource_uuid'], input_dir=data_path, chunk_size_in_mb=chunk_size_in_mb
+        )
+        self._state = get_data_record_state_from_uri(uri)
+    @staticmethod
+    def get_by_uri(uri: str) -> 'DataRecord':
+        return DataRecord(_internal_state=get_data_record_state_from_uri(uri))
+    @staticmethod
+    def create(destination: str, data_path: _Optional[str] = None) -> 'DataRecord':
+        BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Data Record')
+        if data_path is not None:
+            assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
+        uri_parsed = parse_app_uri(destination, use_account_as_name_default=False)
+        if uri_parsed['app_name_normalized']:
+            data_record_uri = destination
+        else:
+            record_name = 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
+            data_record_uri = f'{destination}/{record_name}'
+        uri_parsed = parse_app_uri(data_record_uri)
+        response = api.client.post(
+            path='/lfs/',
+            data={
+                'account_handle': uri_parsed['account_handle_normalized'],
+                'name': uri_parsed['app_name'],
+            },
+        )
+        data_record: DataRecordInfo = response.json()
+        logger.info(f"Successfully created new Data Record '{data_record['uri']}'")
+        if data_path is not None:
+            record_version_uri = push_data_record_version(data_record_uuid=data_record['uuid'], input_dir=data_path)
+            return DataRecord.get_by_uri(uri=record_version_uri)
+        else:
+            return DataRecord.get_by_uri(uri=data_record_uri)
+    @staticmethod
+    def fetch(uri: _Optional[str] = None, count: _Optional[int] = None) -> List['DataRecord']:
+        max_page_size = 1_000
+        params: Dict[str, Union[str, int]] = {
+            'page_size': str(count or max_page_size),
+            'resource_type': 'data-record',
+        }
+        if uri:
+            uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
+            params['account_handle'] = uri_parsed['account_handle_normalized']
+            if uri_parsed['app_name_normalized']:
+                params['app_name'] = uri_parsed['app_name_normalized']
+        results = api_client.get(path='/apps/', params=params).json()['results']
+        if count is None and len(results) == max_page_size:
+            logger.warning(
+                f'Fetch results exceeded maximum count of {max_page_size}. Some data records might not be fetched.'
+            )
+        return [
+            DataRecord(
+                _internal_state={
+                    'resource_uri': result['resource_uri'],
+                    'resource_uuid': result['public_id'],
+                    'resource_version_uuid': result['active_version'],
+                }
+            )
+            for result in results
+        ]
+    @staticmethod
+    def _get_file(remote_storage_endpoint: DataRecordRemoteStorageEndpoint, file_info: Dict) -> LazyLoadedFile:
+        local_file_header_signature_bytes = b'\x50\x4b\x03\x04'
+        local_file_header_struct = Struct('<H2sHHHIIIHH')
+        LocalFileHeader = namedtuple(
+            'LocalFileHeader',
+            (
+                'version',
+                'flags',
+                'compression_raw',
+                'mod_time',
+                'mod_date',
+                'crc_32_expected',
+                'compressed_size_raw',
+                'uncompressed_size_raw',
+                'file_name_len',
+                'extra_field_len',
+            ),
+        )
+        local_file_header_start = file_info['header_offset'] + len(local_file_header_signature_bytes)
+        local_file_header_end = local_file_header_start + local_file_header_struct.size
+        def file_start_func() -> int:
+            local_file_header_response = HttpClient.request(
+                url=remote_storage_endpoint.get_remote_url(),
+                headers={'range': f'bytes={local_file_header_start}-{local_file_header_end - 1}'},
+                timeout_in_seconds=300,
+            )
+            local_file_header = LocalFileHeader._make(
+                local_file_header_struct.unpack(local_file_header_response.content)
+            )
+            file_start: int = (
+                local_file_header_end + local_file_header.file_name_len + local_file_header.extra_field_len
+            )
+            return file_start
+        return LazyLoadedFile(
+            buffer=RemoteIndexableBuffer(endpoint=remote_storage_endpoint),
+            length=file_info['file_size'],
+            path=file_info['filename'],
+            start=None,
+            start_func=file_start_func,
+        )
+    @staticmethod
+    def _get_filtered_files(files: List[LazyLoadedFile], path_filter: PathFilter) -> List[LazyLoadedFile]:
+        if not (isinstance(path_filter, str) or callable(path_filter)):
+            raise Exception('Expected path_filter to be a string or a function')
+        if callable(path_filter):
+            return list(filter(lambda x: path_filter(x.path), files))  # type: ignore
+        glob_filter = cast(str, path_filter)
+        def _filter_function(file: LazyLoadedFile) -> bool:
+            return fnmatch(file.path, glob_filter)
+        return list(filter(_filter_function, files))

biolib/_internal/data_record/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .data_record import ~~DataRecord~~
1	+ from .data_record import get_data_record_state_from_uri, push_data_record_version

biolib/_internal/data_record/data_record.py CHANGED Viewed

@@ -1,169 +1,73 @@
 import os
-from collections import namedtuple
-from datetime import datetime
-from fnmatch import fnmatch
-from struct import Struct
-from typing import Callable, Dict, List, Optional, Union, cast
+from typing import Optional
-from biolib import lfs
-from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
-from biolib._internal.http_client import HttpClient
+from biolib import api, utils
+from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
 from biolib.api import client as api_client
-from biolib.biolib_api_client import AppGetResponse
-from biolib.biolib_binary_format import LazyLoadedFile
-from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
+from biolib.biolib_api_client import AppGetResponse, BiolibApiClient
+from biolib.biolib_api_client.lfs_types import DataRecordVersion, DataRecordVersionInfo
+from biolib.biolib_errors import BioLibError
 from biolib.biolib_logging import logger
-from biolib.utils.app_uri import parse_app_uri
-from biolib.utils.zip.remote_zip import RemoteZip  # type: ignore
-PathFilter = Union[str, Callable[[str], bool]]
-class DataRecord:
-    def __init__(self, uri: str):
-        self._uri = uri
-    def __repr__(self):
-        return f'DataRecord: {self._uri}'
-    @property
-    def uri(self) -> str:
-        return self._uri
-    @property
-    def name(self) -> str:
-        uri_parsed = parse_app_uri(self.uri, use_account_as_name_default=False)
-        if not uri_parsed['app_name']:
-            raise ValueError('Expected parameter "uri" to contain resource name')
-        return uri_parsed['app_name']
-    def list_files(self, path_filter: Optional[PathFilter] = None) -> List[LazyLoadedFile]:
-        app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
-        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
-            resource_version_uuid=app_response['app_version']['public_id'],
-        )
-        files: List[LazyLoadedFile] = []
-        with RemoteZip(url=remote_storage_endpoint.get_remote_url()) as remote_zip:
-            central_directory = remote_zip.get_central_directory()
-            for file_info in central_directory.values():
-                files.append(self._get_file(remote_storage_endpoint, file_info))
-        return self._get_filtered_files(files=files, path_filter=path_filter) if path_filter else files
-    def download_zip(self, output_path: str):
-        app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
-        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
-            resource_version_uuid=app_response['app_version']['public_id'],
-        )
-        HttpClient.request(url=remote_storage_endpoint.get_remote_url(), response_path=output_path)
-    def download_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
-        filtered_files = self.list_files(path_filter=path_filter)
-        if len(filtered_files) == 0:
-            logger.debug('No files to save')
-            return
-        for file in filtered_files:
-            file_path = os.path.join(output_dir, file.path)
-            os.makedirs(os.path.dirname(file_path), exist_ok=True)
-            with open(file_path, mode='wb') as file_handle:
-                for chunk in file.get_data_iterator():
-                    file_handle.write(chunk)
-    def save_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
-        self.download_files(output_dir=output_dir, path_filter=path_filter)
-    def update(self, data_path: str) -> None:
-        assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
-        self._uri = lfs.push_large_file_system(lfs_uri=self._uri, input_dir=data_path)
-    @staticmethod
-    def create(destination: str, data_path: str, name: Optional[str] = None) -> 'DataRecord':
-        assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
-        record_name = name if name else 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
-        record_uri = lfs.create_large_file_system(lfs_uri=f'{destination}/{record_name}')
-        record_version_uri = lfs.push_large_file_system(lfs_uri=record_uri, input_dir=data_path)
-        return DataRecord(uri=record_version_uri)
-    @staticmethod
-    def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
-        max_page_size = 1_000
-        params: Dict[str, Union[str, int]] = {
-            'page_size': str(count or max_page_size),
-            'resource_type': 'data-record',
-        }
-        if uri:
-            uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
-            params['account_handle'] = uri_parsed['account_handle_normalized']
-            if uri_parsed['app_name_normalized']:
-                params['app_name'] = uri_parsed['app_name_normalized']
-        results = api_client.get(path='/apps/', params=params).json()['results']
-        if count is None and len(results) == max_page_size:
-            logger.warning(
-                f'Fetch results exceeded maximum count of {max_page_size}. Some data records might not be fetched.'
-            )
-        return [DataRecord(result['resource_uri']) for result in results]
-    @staticmethod
-    def _get_file(remote_storage_endpoint: DataRecordRemoteStorageEndpoint, file_info: Dict) -> LazyLoadedFile:
-        local_file_header_signature_bytes = b'\x50\x4b\x03\x04'
-        local_file_header_struct = Struct('<H2sHHHIIIHH')
-        LocalFileHeader = namedtuple(
-            'LocalFileHeader',
-            (
-                'version',
-                'flags',
-                'compression_raw',
-                'mod_time',
-                'mod_date',
-                'crc_32_expected',
-                'compressed_size_raw',
-                'uncompressed_size_raw',
-                'file_name_len',
-                'extra_field_len',
-            ),
-        )
-        local_file_header_start = file_info['header_offset'] + len(local_file_header_signature_bytes)
-        local_file_header_end = local_file_header_start + local_file_header_struct.size
-        def file_start_func() -> int:
-            local_file_header_response = HttpClient.request(
-                url=remote_storage_endpoint.get_remote_url(),
-                headers={'range': f'bytes={local_file_header_start}-{local_file_header_end - 1}'},
-                timeout_in_seconds=300,
-            )
-            local_file_header = LocalFileHeader._make(
-                local_file_header_struct.unpack(local_file_header_response.content)
-            )
-            file_start: int = (
-                local_file_header_end + local_file_header.file_name_len + local_file_header.extra_field_len
-            )
-            return file_start
-        return LazyLoadedFile(
-            buffer=RemoteIndexableBuffer(endpoint=remote_storage_endpoint),
-            length=file_info['file_size'],
-            path=file_info['filename'],
-            start=None,
-            start_func=file_start_func,
-        )
-    @staticmethod
-    def _get_filtered_files(files: List[LazyLoadedFile], path_filter: PathFilter) -> List[LazyLoadedFile]:
-        if not (isinstance(path_filter, str) or callable(path_filter)):
-            raise Exception('Expected path_filter to be a string or a function')
-        if callable(path_filter):
-            return list(filter(lambda x: path_filter(x.path), files))  # type: ignore
-        glob_filter = cast(str, path_filter)
-        def _filter_function(file: LazyLoadedFile) -> bool:
-            return fnmatch(file.path, glob_filter)
-        return list(filter(_filter_function, files))
+def push_data_record_version(data_record_uuid: str, input_dir: str, chunk_size_in_mb: Optional[int] = None) -> str:
+    BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Data Record')
+    if not os.path.isdir(input_dir):
+        raise BioLibError(f'Could not find folder at {input_dir}')
+    if os.path.realpath(input_dir) == '/':
+        raise BioLibError('Pushing your root directory is not possible')
+    original_working_dir = os.getcwd()
+    os.chdir(input_dir)
+    files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
+    if data_size_in_bytes > 4_500_000_000_000:
+        raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
+    min_chunk_size_bytes = 10_000_000
+    chunk_size_in_bytes: int
+    if chunk_size_in_mb:
+        chunk_size_in_bytes = chunk_size_in_mb * 1_000_000  # Convert megabytes to bytes
+        if chunk_size_in_bytes < min_chunk_size_bytes:
+            logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
+            chunk_size_in_bytes = min_chunk_size_bytes
+    else:
+        # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
+        chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
+    data_size_in_mb = round(data_size_in_bytes / 10**6)
+    print(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
+    response = api.client.post(path='/lfs/versions/', data={'resource_uuid': data_record_uuid})
+    data_record_version: DataRecordVersion = response.json()
+    iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
+    multipart_uploader = utils.MultiPartUploader(
+        use_process_pool=True,
+        get_presigned_upload_url_request=dict(
+            headers=None,
+            requires_biolib_auth=True,
+            path=f"/lfs/versions/{data_record_version['uuid']}/presigned_upload_url/",
+        ),
+        complete_upload_request=dict(
+            headers=None,
+            requires_biolib_auth=True,
+            path=f"/lfs/versions/{data_record_version['uuid']}/complete_upload/",
+        ),
+    )
+    multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
+    os.chdir(original_working_dir)
+    logger.info(f"Successfully pushed a new Data Record version '{data_record_version['uri']}'")
+    return data_record_version['uri']
+def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
+    app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': uri}).json()
+    return DataRecordVersionInfo(
+        resource_uri=app_response['app_version']['app_uri'],
+        resource_uuid=app_response['app']['public_id'],
+        resource_version_uuid=app_response['app_version']['public_id'],
+    )

biolib/_internal/data_record/remote_storage_endpoint.py CHANGED Viewed

@@ -3,7 +3,7 @@ from datetime import datetime, timedelta
 from urllib.parse import urlparse
 from biolib.api import client as api_client
-from biolib.biolib_api_client.lfs_types import LargeFileSystemVersion
+from biolib.biolib_api_client.lfs_types import DataRecordVersion
 from biolib.biolib_binary_format.utils import RemoteEndpoint
 from biolib.biolib_logging import logger
 from biolib.typing_utils import Optional
@@ -17,7 +17,7 @@ class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
     def get_remote_url(self) -> str:
         if not self._presigned_url or not self._expires_at or datetime.utcnow() > self._expires_at:
-            lfs_version: LargeFileSystemVersion = api_client.get(
+            lfs_version: DataRecordVersion = api_client.get(
                 path=f'/lfs/versions/{self._resource_version_uuid}/',
             ).json()

biolib/_internal/file_utils.py ADDED Viewed

@@ -0,0 +1,77 @@
+import io
+import os
+import zipfile as zf
+from pathlib import Path
+from biolib.typing_utils import Iterator, List, Tuple
+def get_files_and_size_of_directory(directory: str) -> Tuple[List[str], int]:
+    data_size = 0
+    file_list: List[str] = []
+    for path, _, files in os.walk(directory):
+        for file in files:
+            file_path = os.path.join(path, file)
+            if os.path.islink(file_path):
+                continue  # skip symlinks
+            relative_file_path = file_path[len(directory) + 1 :]  # +1 to remove starting slash
+            file_list.append(relative_file_path)
+            data_size += os.path.getsize(file_path)
+    return file_list, data_size
+def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes]:
+    class ChunkedIOBuffer(io.RawIOBase):
+        def __init__(self, chunk_size: int):
+            super().__init__()
+            self.chunk_size = chunk_size
+            self.tmp_data = bytearray()
+        def get_buffer_size(self):
+            return len(self.tmp_data)
+        def read_chunk(self):
+            chunk = bytes(self.tmp_data[: self.chunk_size])
+            self.tmp_data = self.tmp_data[self.chunk_size :]
+            return chunk
+        def write(self, data):
+            data_length = len(data)
+            self.tmp_data += data
+            return data_length
+    # create chunked buffer to hold data temporarily
+    io_buffer = ChunkedIOBuffer(chunk_size)
+    # create zip writer that will write to the io buffer
+    zip_writer = zf.ZipFile(io_buffer, mode='w')  # type: ignore
+    for file_path in files:
+        # generate zip info and prepare zip pointer for writing
+        z_info = zf.ZipInfo.from_file(file_path)
+        zip_pointer = zip_writer.open(z_info, mode='w')
+        if Path(file_path).is_file():
+            # read file chunk by chunk
+            with open(file_path, 'br') as file_pointer:
+                while True:
+                    chunk = file_pointer.read(chunk_size)
+                    if len(chunk) == 0:
+                        break
+                    # write the chunk to the zip
+                    zip_pointer.write(chunk)
+                    # if writing the chunk caused us to go over chunk_size, flush it
+                    if io_buffer.get_buffer_size() > chunk_size:
+                        yield io_buffer.read_chunk()
+        zip_pointer.close()
+    # flush any remaining data in the stream (e.g. zip file meta data)
+    zip_writer.close()
+    while True:
+        chunk = io_buffer.read_chunk()
+        if len(chunk) == 0:
+            break
+        yield chunk

biolib/_internal/lfs/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .cache import prune_lfs_cache

biolib/_internal/push_application.py CHANGED Viewed

@@ -6,12 +6,12 @@ import rich.progress
 import yaml
 from biolib import api, utils
+from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
 from biolib.biolib_api_client import BiolibApiClient
 from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
 from biolib.biolib_docker_client import BiolibDockerClient
 from biolib.biolib_errors import BioLibError
 from biolib.biolib_logging import logger
-from biolib.lfs.utils import get_files_and_size_of_directory, get_iterable_zip_stream
 from biolib.typing_utils import Iterable, Optional, Set, TypedDict
 REGEX_MARKDOWN_INLINE_IMAGE = re.compile(r'!\[(?P<alt>.*)\]\((?P<src>.*)\)')

biolib/_internal/runtime.py CHANGED Viewed

@@ -1,8 +1,4 @@
-import json
-import re
-from biolib import api
-from biolib.typing_utils import Optional, TypedDict, cast
+from biolib.typing_utils import TypedDict
 class RuntimeJobDataDict(TypedDict):
@@ -21,76 +17,3 @@ class BioLibRuntimeNotRecognizedError(BioLibRuntimeError):
     def __init__(self, message='The runtime is not recognized as a BioLib app'):
         self.message = message
         super().__init__(self.message)
-class Runtime:
-    _job_data: Optional[RuntimeJobDataDict] = None
-    @staticmethod
-    def check_is_environment_biolib_app() -> bool:
-        return bool(Runtime._try_to_get_job_data())
-    @staticmethod
-    def get_job_id() -> str:
-        return Runtime._get_job_data()['job_uuid']
-    @staticmethod
-    def get_job_auth_token() -> str:
-        return Runtime._get_job_data()['job_auth_token']
-    @staticmethod
-    def get_job_requested_machine() -> str:
-        return Runtime._get_job_data()['job_requested_machine']
-    @staticmethod
-    def get_app_uri() -> str:
-        return Runtime._get_job_data()['app_uri']
-    @staticmethod
-    def get_secret(secret_name: str) -> bytes:
-        assert re.match(
-            '^[a-zA-Z0-9_-]*$', secret_name
-        ), 'Secret name can only contain alphanumeric characters and dashes or underscores '
-        try:
-            with open(f'/biolib/secrets/{secret_name}', 'rb') as file:
-                return file.read()
-        except BaseException as error:
-            raise BioLibRuntimeError(f'Unable to get system secret: {secret_name}') from error
-    @staticmethod
-    def set_main_result_prefix(result_prefix: str) -> None:
-        job_data = Runtime._get_job_data()
-        api.client.patch(
-            data={'result_name_prefix': result_prefix},
-            headers={'Job-Auth-Token': job_data['job_auth_token']},
-            path=f"/jobs/{job_data['job_uuid']}/main_result/",
-        )
-    @staticmethod
-    def create_result_note(note: str) -> None:
-        job_id = Runtime.get_job_id()
-        # Note: Authentication is added by app caller proxy in compute node
-        api.client.post(data={'note': note}, path=f'/jobs/{job_id}/notes/')
-    @staticmethod
-    def _try_to_get_job_data() -> Optional[RuntimeJobDataDict]:
-        if not Runtime._job_data:
-            try:
-                with open('/biolib/secrets/biolib_system_secret') as file:
-                    job_data: RuntimeJobDataDict = json.load(file)
-            except BaseException:
-                return None
-            if not job_data['version'].startswith('1.'):
-                raise BioLibRuntimeError(f"Unexpected system secret version {job_data['version']} expected 1.x.x")
-            Runtime._job_data = job_data
-        return cast(RuntimeJobDataDict, Runtime._job_data)
-    @staticmethod
-    def _get_job_data() -> RuntimeJobDataDict:
-        job_data = Runtime._try_to_get_job_data()
-        if not job_data:
-            raise BioLibRuntimeNotRecognizedError() from None
-        return job_data

biolib/_runtime/runtime.py ADDED Viewed

@@ -0,0 +1,79 @@
+from biolib import api
+from biolib._internal.runtime import BioLibRuntimeError, BioLibRuntimeNotRecognizedError, RuntimeJobDataDict
+from biolib.typing_utils import cast, Optional as _Optional
+import json
+import re
+class Runtime:
+    _job_data: _Optional[RuntimeJobDataDict] = None
+    @staticmethod
+    def check_is_environment_biolib_app() -> bool:
+        return bool(Runtime._try_to_get_job_data())
+    @staticmethod
+    def get_job_id() -> str:
+        return Runtime._get_job_data()['job_uuid']
+    @staticmethod
+    def get_job_auth_token() -> str:
+        return Runtime._get_job_data()['job_auth_token']
+    @staticmethod
+    def get_job_requested_machine() -> str:
+        return Runtime._get_job_data()['job_requested_machine']
+    @staticmethod
+    def get_app_uri() -> str:
+        return Runtime._get_job_data()['app_uri']
+    @staticmethod
+    def get_secret(secret_name: str) -> bytes:
+        assert re.match(
+            '^[a-zA-Z0-9_-]*$', secret_name
+        ), 'Secret name can only contain alphanumeric characters and dashes or underscores '
+        try:
+            with open(f'/biolib/secrets/{secret_name}', 'rb') as file:
+                return file.read()
+        except BaseException as error:
+            raise BioLibRuntimeError(f'Unable to get system secret: {secret_name}') from error
+    @staticmethod
+    def set_main_result_prefix(result_prefix: str) -> None:
+        job_data = Runtime._get_job_data()
+        api.client.patch(
+            data={'result_name_prefix': result_prefix},
+            headers={'Job-Auth-Token': job_data['job_auth_token']},
+            path=f"/jobs/{job_data['job_uuid']}/main_result/",
+        )
+    @staticmethod
+    def create_result_note(note: str) -> None:
+        job_id = Runtime.get_job_id()
+        # Note: Authentication is added by app caller proxy in compute node
+        api.client.post(data={'note': note}, path=f'/jobs/{job_id}/notes/')
+    @staticmethod
+    def _try_to_get_job_data() -> _Optional[RuntimeJobDataDict]:
+        if not Runtime._job_data:
+            try:
+                with open('/biolib/secrets/biolib_system_secret') as file:
+                    job_data: RuntimeJobDataDict = json.load(file)
+            except BaseException:
+                return None
+            if not job_data['version'].startswith('1.'):
+                raise BioLibRuntimeError(f"Unexpected system secret version {job_data['version']} expected 1.x.x")
+            Runtime._job_data = job_data
+        return cast(RuntimeJobDataDict, Runtime._job_data)
+    @staticmethod
+    def _get_job_data() -> RuntimeJobDataDict:
+        job_data = Runtime._try_to_get_job_data()
+        if not job_data:
+            raise BioLibRuntimeNotRecognizedError() from None
+        return job_data

biolib/biolib_api_client/api_client.py CHANGED Viewed

@@ -6,7 +6,7 @@ import os
 from datetime import datetime, timezone
 from json.decoder import JSONDecodeError
-from biolib._internal.runtime import Runtime
+from biolib._runtime.runtime import Runtime
 from biolib._internal.http_client import HttpClient
 from biolib.typing_utils import Optional
 from biolib.biolib_errors import BioLibError

biolib/biolib_api_client/app_types.py CHANGED Viewed

@@ -16,6 +16,7 @@ class AppVersion(AppVersionSlim):
     source_code_license: str
     stdout_render_type: Literal['text', 'markdown']
     main_output_file: Optional[str]
+    app_uri: str
 class App(TypedDict):

biolib/biolib_api_client/lfs_types.py CHANGED Viewed

@@ -1,13 +1,19 @@
 from biolib.typing_utils import TypedDict
-class LargeFileSystemVersion(TypedDict):
+class DataRecordVersion(TypedDict):
     presigned_download_url: str
     size_bytes: int
     uri: str
     uuid: str
-class LargeFileSystem(TypedDict):
+class DataRecordInfo(TypedDict):
     uri: str
     uuid: str
+class DataRecordVersionInfo(TypedDict):
+    resource_uri: str
+    resource_uuid: str
+    resource_version_uuid: str

biolib/cli/data_record.py CHANGED Viewed

@@ -1,9 +1,11 @@
+import json
 import logging
 import os
+from typing import Dict, List
 import click
-from biolib._internal.data_record import DataRecord
+from biolib._data_record.data_record import DataRecord
 from biolib.biolib_logging import logger, logger_no_user_data
 from biolib.typing_utils import Optional
@@ -15,11 +17,18 @@ def data_record() -> None:
 @data_record.command(help='Create a Data Record')
-@click.option('--destination', type=str, required=True)
+@click.argument('uri', required=True)
+@click.option('--data-path', required=True, type=click.Path(exists=True))
+def create(uri: str, data_path: str) -> None:
+    DataRecord.create(destination=uri, data_path=data_path)
+@data_record.command(help='Update a Data Record')
+@click.argument('uri', required=True)
 @click.option('--data-path', required=True, type=click.Path(exists=True))
-@click.option('--name', type=str, required=False)
-def create(destination: str, data_path: str, name: Optional[str] = None) -> None:
-    DataRecord.create(destination, data_path, name)
+@click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
+def update(uri: str, data_path: str, chunk_size: Optional[int]) -> None:
+    DataRecord.get_by_uri(uri=uri).update(data_path=data_path, chunk_size_in_mb=chunk_size)
 @data_record.command(help='Download files from a Data Record')
@@ -27,7 +36,7 @@ def create(destination: str, data_path: str, name: Optional[str] = None) -> None
 @click.option('--file', required=False, type=str)
 @click.option('--path-filter', required=False, type=str, hide_input=True)
 def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
-    record = DataRecord(uri=uri)
+    record = DataRecord.get_by_uri(uri=uri)
     if file is not None:
         try:
             file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file][0]
@@ -41,3 +50,30 @@ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
     else:
         assert not os.path.exists(record.name), f'Directory with name {record.name} already exists in current directory'
         record.save_files(output_dir=record.name, path_filter=path_filter)
+@data_record.command(help='Describe a Data Record')
+@click.argument('uri', required=True)
+@click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
+def describe(uri: str, output_as_json: bool) -> None:
+    record = DataRecord.get_by_uri(uri)
+    files_info: List[Dict] = []
+    total_size_in_bytes = 0
+    for file in record.list_files():
+        files_info.append({'path': file.path, 'size_bytes': file.length})
+        total_size_in_bytes += file.length
+    if output_as_json:
+        print(
+            json.dumps(
+                obj={'uri': record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
+                indent=4,
+            )
+        )
+    else:
+        print(f'Data Record {record.uri}\ntotal {total_size_in_bytes} bytes\n')
+        print('size bytes    path')
+        for file_info in files_info:
+            size_string = str(file_info['size_bytes'])
+            leading_space_string = ' ' * (10 - len(size_string))
+            print(f"{leading_space_string}{size_string}    {file_info['path']}")

biolib/cli/lfs.py CHANGED Viewed

@@ -7,9 +7,9 @@ from typing import Dict, List
 import click
 from biolib import biolib_errors
-from biolib._internal.data_record import DataRecord
+from biolib._data_record.data_record import DataRecord
+from biolib._internal.lfs import prune_lfs_cache
 from biolib.biolib_logging import logger, logger_no_user_data
-from biolib.lfs import create_large_file_system, prune_lfs_cache, push_large_file_system
 from biolib.typing_utils import Optional
@@ -21,9 +21,10 @@ def lfs() -> None:
 @lfs.command(help='Create a Large File System')
 @click.argument('uri', required=True)
 def create(uri: str) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record create" instead.')
     logger.configure(default_log_level=logging.INFO)
     logger_no_user_data.configure(default_log_level=logging.INFO)
-    create_large_file_system(lfs_uri=uri)
+    DataRecord.create(destination=uri)
 @lfs.command(help='Push a new version of a Large File System')
@@ -31,10 +32,11 @@ def create(uri: str) -> None:
 @click.option('--path', required=True, type=click.Path(exists=True))
 @click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
 def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record update" instead.')
     logger.configure(default_log_level=logging.INFO)
     logger_no_user_data.configure(default_log_level=logging.INFO)
     try:
-        push_large_file_system(lfs_uri=uri, input_dir=path, chunk_size_in_mb=chunk_size)
+        DataRecord.get_by_uri(uri=uri).update(data_path=path, chunk_size_in_mb=chunk_size)
     except biolib_errors.BioLibError as error:
         print(f'An error occurred:\n{error.message}', file=sys.stderr)
         exit(1)
@@ -44,10 +46,11 @@ def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
 @click.argument('uri', required=True)
 @click.option('--file-path', required=True, type=str)
 def download_file(uri: str, file_path: str) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record download" instead.')
     logger.configure(default_log_level=logging.INFO)
     logger_no_user_data.configure(default_log_level=logging.INFO)
     try:
-        record = DataRecord(uri=uri)
+        record = DataRecord.get_by_uri(uri=uri)
         try:
             file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file_path][0]
         except IndexError:
@@ -66,7 +69,8 @@ def download_file(uri: str, file_path: str) -> None:
 @click.argument('uri', required=True)
 @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
 def describe(uri: str, output_as_json: bool) -> None:
-    data_record = DataRecord(uri)
+    logger.warning('This is command deprecated, please use "biolib data-record describe" instead.')
+    data_record = DataRecord.get_by_uri(uri)
     files_info: List[Dict] = []
     total_size_in_bytes = 0
     for file in data_record.list_files():

biolib/runtime/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import warnings
-from biolib.sdk import Runtime as _Runtime
+from biolib._runtime.runtime import Runtime as _Runtime
 def set_main_result_prefix(result_prefix: str) -> None:

biolib/sdk/__init__.py CHANGED Viewed

@@ -1,12 +1,12 @@
 # Imports to hide and use as private internal utils
+from biolib._data_record.data_record import DataRecord as _DataRecord
 from biolib._internal.push_application import push_application as _push_application
 from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
 from biolib.app import BioLibApp as _BioLibApp
 from biolib.typing_utils import Optional as _Optional
-# Imports to expose as public API
-from biolib._internal.data_record import DataRecord
-from biolib._internal.runtime import Runtime
+# Classes to expose as public API
+from biolib._runtime.runtime import Runtime
 def push_app_version(uri: str, path: str) -> _BioLibApp:
@@ -42,5 +42,9 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
     return AppVersionFixturePlugin(app_version)
-def create_data_record(destination: str, data_path: str, name: _Optional[str] = None) -> DataRecord:
-    return DataRecord.create(destination, data_path, name)
+def create_data_record(destination: str, data_path: str, name: _Optional[str] = None) -> _DataRecord:
+    if name:
+        destination_with_name = f"{destination}/{name}"
+    else:
+        destination_with_name = destination
+    return _DataRecord.create(destination_with_name, data_path)

{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pybiolib
-Version: 1.1.2038
+Version: 1.1.2097
 Summary: BioLib Python Client
 Home-page: https://github.com/biolib
 License: MIT

{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/RECORD RENAMED Viewed

@@ -1,32 +1,37 @@
 LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
 README.md,sha256=_IH7pxFiqy2bIAmaVeA-iVTyUwWRjMIlfgtUbYTtmls,368
-biolib/__init__.py,sha256=nfZvVkrHZLvjvvlAvFzhvem9NMfqgmw8NWaCH9HGzew,4045
+biolib/__init__.py,sha256=yX8w8bDiY7CIxfKHFRF0U1hhwgCCIXtVr18Td5iNLp8,4135
+biolib/_data_record/data_record.py,sha256=jUeCQjnVQLNLmlXO3rREEUnjXjOYuaQjBO7R66P6wFU,8909
 biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-biolib/_internal/data_record/__init__.py,sha256=1Bk303i3rFet9veS56fIsrBYtT5X3n9vcsYMA6T6c5o,36
-biolib/_internal/data_record/data_record.py,sha256=NLzeyzqtzB9QOVUDcEiOn2WbMdMijbjZGYgy_p592_c,7372
-biolib/_internal/data_record/remote_storage_endpoint.py,sha256=hILu0TmFx-ZyDYbWPC4QEPogP0RRVEqwgJc8OEHYp50,1742
+biolib/_internal/data_record/__init__.py,sha256=0T0CV6PfKc8itjMu-48sCJjcZQEzXl1ZLBqG_LjJTqQ,82
+biolib/_internal/data_record/data_record.py,sha256=D0BaC8WhnkM564eKUI69hVHUkKY1In0cyfpjxYyWk18,3363
+biolib/_internal/data_record/remote_storage_endpoint.py,sha256=eCptuZ4DMAPnaNCVDvpWXwXGI6Jac9U1N5dqU8Cj95Q,1732
+biolib/_internal/file_utils.py,sha256=4jT6j7bB21c0JNn5BfnyWQib_zt0CVtJ_TiOFOStRcE,2604
 biolib/_internal/fuse_mount/__init__.py,sha256=B_tM6RM2dBw-vbpoHJC4X3tOAaN1H2RDvqYJOw3xFwg,55
 biolib/_internal/fuse_mount/experiment_fuse_mount.py,sha256=08aUdEq_bvqLBft_gSLjOClKDy5sBnMts1RfJf7AP_U,7012
 biolib/_internal/http_client.py,sha256=DdooXei93JKGYGV4aQmzue_oFzvHkozg2UCxgk9dfDM,5081
+biolib/_internal/lfs/__init__.py,sha256=gSWo_xg61UniYgD7yNYxeT4I9uaXBCBSi3_nmZjnPpE,35
+biolib/_internal/lfs/cache.py,sha256=pQS2np21rdJ6I3DpoOutnzPHpLOZgUIS8TMltUJk_k4,2226
 biolib/_internal/libs/__init__.py,sha256=Jdf4tNPqe_oIIf6zYml6TiqhL_02Vyqwge6IELrAFhw,98
 biolib/_internal/libs/fusepy/__init__.py,sha256=AWDzNFS-XV_5yKb0Qx7kggIhPzq1nj_BZS5y2Nso08k,41944
-biolib/_internal/push_application.py,sha256=H1PGNtVJ0vRC0li39gFMpPpjm6QeZ8Ob-7cLkLmxS_Y,10009
-biolib/_internal/runtime.py,sha256=BnFvRWYnxPXCgOtfxupN255Zxx9Gw6oPZyzUIGODw3k,3060
+biolib/_internal/push_application.py,sha256=8P7eXvySn7CRp5XBDkO3xjTGixS8g7-jD-_iwzM_XDI,10020
+biolib/_internal/runtime.py,sha256=9pZ3s3L7LGxdqOgnHh1KK3Jjyn_9MjhQmKHI-6hMT3U,448
 biolib/_internal/utils/__init__.py,sha256=p5vsIFyu-zYqBgdSMfwW9NC_jk7rXvvCbV4Bzd3As7c,630
+biolib/_runtime/runtime.py,sha256=zy9HrE4X5hBqm8doUHkckyflquSBDSXV3REhT2MQGas,2767
 biolib/api/__init__.py,sha256=mQ4u8FijqyLzjYMezMUUbbBGNB3iFmkNdjXnWPZ7Jlw,138
 biolib/api/client.py,sha256=9MD1qI52BnRC_QSydFGjyFquwFw0R9dkDfUrjUouuHQ,3490
 biolib/app/__init__.py,sha256=cdPtcfb_U-bxb9iSL4fCEq2rpD9OjkyY4W-Zw60B0LI,37
 biolib/app/app.py,sha256=8AvPYL1W2wxQ7t7BB2KeVU2WPrm3UL6vVuHPGs8g9L0,8388
 biolib/app/search_apps.py,sha256=K4a41f5XIWth2BWI7OffASgIsD0ko8elCax8YL2igaY,1470
 biolib/biolib_api_client/__init__.py,sha256=E5EMa19wJoblwSdQPYrxc_BtIeRsAuO0L_jQweWw-Yk,182
-biolib/biolib_api_client/api_client.py,sha256=J03jRVvod1bgwwAZ3BZVKlUSJi43-ev2DUB0j63GZpc,7189
-biolib/biolib_api_client/app_types.py,sha256=lm_mZ5knl-70eVB5Zj03jSMrPN1ERqu_5ofzcuSUwN4,2425
+biolib/biolib_api_client/api_client.py,sha256=krlSRmmAwtdMMyN1XzQhh1gihB1ERSIVslWQ-dqI1yU,7188
+biolib/biolib_api_client/app_types.py,sha256=FxSr4UqfnMhLe34p8bm02wsC3g1Jz8iaing5tRKDOQI,2442
 biolib/biolib_api_client/auth.py,sha256=kjm0ZHnH3I8so3su2sZbBxNHYp-ZUdrZ5lwQ0K36RSw,949
 biolib/biolib_api_client/biolib_app_api.py,sha256=DndlVxrNTes6DOaWyMINLGZQCRMWVvR7gwt5HVlyf5Y,4240
 biolib/biolib_api_client/biolib_job_api.py,sha256=IpFahcRzm7GNy8DJ-XHYe-x7r4Voba8o22IXw5puHn8,6782
 biolib/biolib_api_client/common_types.py,sha256=RH-1KNHqUF-EkTpfPOSTt5Mq1GPdfju_cqXDesscO1I,123
 biolib/biolib_api_client/job_types.py,sha256=Dl4NhU2xpgpXV-7YIoDf6WL63SLR5bni55OX8x5539M,1300
-biolib/biolib_api_client/lfs_types.py,sha256=xaGjE-yUyNVM3LyKdslJn5ZXWp6_kVCd4o-ch8Czfm4,227
+biolib/biolib_api_client/lfs_types.py,sha256=joZWP6-sa5_Ug_6xIp5fHAgEo_bqLE3rbleQocZtDcg,339
 biolib/biolib_api_client/user_state.py,sha256=XcgWV-MgVk88mIlMmnu8yHxMu8OCaw8o0tk7TVo5Hcg,637
 biolib/biolib_binary_format/__init__.py,sha256=HMl5SdX_VUWE4OQzi4Jf_yFvC7b0bSPOGPHYi9dWM2Q,185
 biolib/biolib_binary_format/base_bbf_package.py,sha256=vxRV4iKy0dXeDOlFWnMFI0hGnDBYDH5Cgh5gAfuObt8,959
@@ -46,10 +51,10 @@ biolib/biolib_errors.py,sha256=5m4lK2l39DafpoXBImEBD4EPH3ayXBX0JgtPzmGClow,689
 biolib/biolib_logging.py,sha256=J3E5H_LL5k6ZUim2C8gqN7E6lCBZMTpO4tnMpOPwG9U,2854
 biolib/cli/__init__.py,sha256=0v3c_J-U0k46c5ZWeQjLG_kTaKDJm81LBxQpDO2B_aI,1286
 biolib/cli/auth.py,sha256=rpWGmXs6Fz6CGrO9K8ibPRszOdXG78Vig_boKaVCD9A,2082
-biolib/cli/data_record.py,sha256=piN3QUbRAkMi4wpayghN4unFfuiNE5VCjI1gag4d8qg,1725
+biolib/cli/data_record.py,sha256=oDy8U6mv-h-hbeMihXRzVEvM-WrGQq6oBiBl3xDRaXs,3220
 biolib/cli/download_container.py,sha256=HIZVHOPmslGE5M2Dsp9r2cCkAEJx__vcsDz5Wt5LRos,483
 biolib/cli/init.py,sha256=wQOfii_au-d30Hp7DdH-WVw-WVraKvA_zY4za1w7DE8,821
-biolib/cli/lfs.py,sha256=S9Ov-HWwtpMeRcwclh0qItnzviOaQL4aI0nnaCcZ_MM,3771
+biolib/cli/lfs.py,sha256=z2qHUwink85mv9yDgifbVKkVwuyknGhMDTfly_gLKJM,4151
 biolib/cli/push.py,sha256=TFi7O9tJ3zFe0VmtVTV3Vh9_xIMHnrc41xxcaBKU46g,813
 biolib/cli/run.py,sha256=BbvXLQ-XibjQ71Y2d4URMH_8dflNVwM0i3TIWhw_u_c,1634
 biolib/cli/runtime.py,sha256=Xv-nrma5xX8NidWcvbUKcUvuN5TCarZa4A8mPVmF-z0,361
@@ -92,11 +97,8 @@ biolib/jobs/__init__.py,sha256=aIb2H2DHjQbM2Bs-dysFijhwFcL58Blp0Co0gimED3w,32
 biolib/jobs/job.py,sha256=npnARoP408SXD2UqyzFRJYdEJsP_gHoBh2xQkNegYqg,18884
 biolib/jobs/job_result.py,sha256=rALHiKYNaC9lHi_JJqBob1RubzNLwG9Z386kwRJjd2M,5885
 biolib/jobs/types.py,sha256=qhadtH2KDC2WUOOqPiwke0YgtQY4FtuB71Stekq1k48,970
-biolib/lfs/__init__.py,sha256=Qv8vdYeK43JecT4SsE93ZYE2VmNiZENdNpW8P9-omxs,115
-biolib/lfs/cache.py,sha256=pQS2np21rdJ6I3DpoOutnzPHpLOZgUIS8TMltUJk_k4,2226
-biolib/lfs/utils.py,sha256=HSs7F2wXklYhhv5tabfaeC5noXJyxRjbGD5IhWOVdxs,5918
-biolib/runtime/__init__.py,sha256=x1Ivydtu9TFTaX-Cofg_kGA-TI0zLon-ccrFiiVgBok,492
-biolib/sdk/__init__.py,sha256=wkQs7ltIpYK9Xw0-FLLacblemmlNGz8J2UmlM0noGSs,1749
+biolib/runtime/__init__.py,sha256=Fg2ZIAmUegurLKagpBNfRgLcOwR2VZSmXQpb-ryRwI0,505
+biolib/sdk/__init__.py,sha256=qJ_V_Edxolzi4VBQCrvem5lYIkJ0FVH3VZepSDuXjTc,1895
 biolib/tables.py,sha256=acH7VjwAbadLo8P84FSnKEZxCTVsF5rEg9VPuxElNs8,872
 biolib/templates/__init__.py,sha256=Yx62sSyDCDesRQDQgmbDsLpfgEh93fWE8r9u4g2azXk,36
 biolib/templates/example_app.py,sha256=EB3E3RT4SeO_ii5nVQqJpi5KDGNE_huF1ub-e5ZFveE,715
@@ -109,8 +111,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
 biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
 biolib/utils/seq_util.py,sha256=jC5WhH63FTD7SLFJbxQGA2hOt9NTwq9zHl_BEec1Z0c,4907
 biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
-pybiolib-1.1.2038.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
-pybiolib-1.1.2038.dist-info/METADATA,sha256=egfRWzBO8-r0fKEdCvVcZHQAXCUzf7zwmQEutCJElHc,1508
-pybiolib-1.1.2038.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-pybiolib-1.1.2038.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
-pybiolib-1.1.2038.dist-info/RECORD,,
+pybiolib-1.1.2097.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
+pybiolib-1.1.2097.dist-info/METADATA,sha256=-4wEBR8SXfG_VDlLRZR7UgrlKee5VydzL-L6wMKP17Y,1508
+pybiolib-1.1.2097.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+pybiolib-1.1.2097.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
+pybiolib-1.1.2097.dist-info/RECORD,,

biolib/lfs/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-from .cache import prune_lfs_cache
-from .utils import \
-    push_large_file_system, \
-    create_large_file_system

biolib/lfs/utils.py DELETED Viewed

@@ -1,153 +0,0 @@
-import io
-import os
-import zipfile as zf
-from pathlib import Path
-from biolib import utils, api
-from biolib.biolib_api_client import BiolibApiClient
-from biolib.biolib_api_client.lfs_types import LargeFileSystem, LargeFileSystemVersion
-from biolib.biolib_logging import logger
-from biolib.biolib_errors import BioLibError
-from biolib.typing_utils import List, Tuple, Iterator, Optional
-from biolib.utils.app_uri import parse_app_uri
-def get_files_and_size_of_directory(directory: str) -> Tuple[List[str], int]:
-    data_size = 0
-    file_list: List[str] = []
-    for path, _, files in os.walk(directory):
-        for file in files:
-            file_path = os.path.join(path, file)
-            if os.path.islink(file_path):
-                continue  # skip symlinks
-            relative_file_path = file_path[len(directory) + 1:]  # +1 to remove starting slash
-            file_list.append(relative_file_path)
-            data_size += os.path.getsize(file_path)
-    return file_list, data_size
-def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes]:
-    class ChunkedIOBuffer(io.RawIOBase):
-        def __init__(self, chunk_size: int):
-            super().__init__()
-            self.chunk_size = chunk_size
-            self.tmp_data = bytearray()
-        def get_buffer_size(self):
-            return len(self.tmp_data)
-        def read_chunk(self):
-            chunk = bytes(self.tmp_data[:self.chunk_size])
-            self.tmp_data = self.tmp_data[self.chunk_size:]
-            return chunk
-        def write(self, data):
-            data_length = len(data)
-            self.tmp_data += data
-            return data_length
-    # create chunked buffer to hold data temporarily
-    io_buffer = ChunkedIOBuffer(chunk_size)
-    # create zip writer that will write to the io buffer
-    zip_writer = zf.ZipFile(io_buffer, mode='w')  # type: ignore
-    for file_path in files:
-        # generate zip info and prepare zip pointer for writing
-        z_info = zf.ZipInfo.from_file(file_path)
-        zip_pointer = zip_writer.open(z_info, mode='w')
-        if Path(file_path).is_file():
-            # read file chunk by chunk
-            with open(file_path, 'br') as file_pointer:
-                while True:
-                    chunk = file_pointer.read(chunk_size)
-                    if len(chunk) == 0:
-                        break
-                    # write the chunk to the zip
-                    zip_pointer.write(chunk)
-                    # if writing the chunk caused us to go over chunk_size, flush it
-                    if io_buffer.get_buffer_size() > chunk_size:
-                        yield io_buffer.read_chunk()
-        zip_pointer.close()
-    # flush any remaining data in the stream (e.g. zip file meta data)
-    zip_writer.close()
-    while True:
-        chunk = io_buffer.read_chunk()
-        if len(chunk) == 0:
-            break
-        yield chunk
-def create_large_file_system(lfs_uri: str) -> str:
-    BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Large File System')
-    uri_parsed = parse_app_uri(lfs_uri)
-    response = api.client.post(
-        path='/lfs/',
-        data={
-            'account_handle': uri_parsed['account_handle_normalized'],
-            'name': uri_parsed['app_name'],
-        },
-    )
-    lfs: LargeFileSystem = response.json()
-    logger.info(f"Successfully created new Large File System '{lfs['uri']}'")
-    return lfs['uri']
-def push_large_file_system(lfs_uri: str, input_dir: str, chunk_size_in_mb: Optional[int] = None) -> str:
-    BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Large File System')
-    if not os.path.isdir(input_dir):
-        raise BioLibError(f'Could not find folder at {input_dir}')
-    if os.path.realpath(input_dir) == '/':
-        raise BioLibError('Pushing your root directory is not possible')
-    original_working_dir = os.getcwd()
-    os.chdir(input_dir)
-    files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
-    if data_size_in_bytes > 4_500_000_000_000:
-        raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
-    min_chunk_size_bytes = 10_000_000
-    chunk_size_in_bytes: int
-    if chunk_size_in_mb:
-        chunk_size_in_bytes = chunk_size_in_mb * 1_000_000  # Convert megabytes to bytes
-        if chunk_size_in_bytes < min_chunk_size_bytes:
-            logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
-            chunk_size_in_bytes = min_chunk_size_bytes
-    else:
-        # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
-        chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
-    data_size_in_mb = round(data_size_in_bytes / 10 ** 6)
-    print(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
-    response = api.client.post(path='/lfs/versions/', data={'resource_uri': lfs_uri})
-    lfs_version: LargeFileSystemVersion = response.json()
-    iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
-    multipart_uploader = utils.MultiPartUploader(
-        use_process_pool=True,
-        get_presigned_upload_url_request=dict(
-            headers=None,
-            requires_biolib_auth=True,
-            path=f"/lfs/versions/{lfs_version['uuid']}/presigned_upload_url/",
-        ),
-        complete_upload_request=dict(
-            headers=None,
-            requires_biolib_auth=True,
-            path=f"/lfs/versions/{lfs_version['uuid']}/complete_upload/",
-        ),
-    )
-    multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
-    os.chdir(original_working_dir)
-    logger.info(f"Successfully pushed a new LFS version '{lfs_version['uri']}'")
-    return lfs_version['uri']

/biolib/{lfs → _internal/lfs}/cache.py RENAMED Viewed

File without changes

{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/LICENSE RENAMED Viewed

File without changes

{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/WHEEL RENAMED Viewed

File without changes

{pybiolib-1.1.2038.dist-info → pybiolib-1.1.2097.dist-info}/entry_points.txt RENAMED Viewed

File without changes

pybiolib 1.1.2038__py3-none-any.whl → 1.1.2097__py3-none-any.whl

pybiolib 1.1.2038py3-none-any.whl → 1.1.2097py3-none-any.whl