PyPI - pybiolib - Versions diffs - 1.1.1747__py3-none-any.whl → 1.1.2193__py3-none-any.whl - Mend

pybiolib 1.1.1747py3-none-any.whl → 1.1.2193py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

biolib/__init__.py +18 -5
biolib/_data_record/data_record.py +278 -0
biolib/_internal/data_record/__init__.py +1 -0
biolib/_internal/data_record/data_record.py +97 -0
biolib/_internal/data_record/remote_storage_endpoint.py +38 -0
biolib/_internal/file_utils.py +77 -0
biolib/_internal/fuse_mount/__init__.py +1 -0
biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
biolib/_internal/http_client.py +42 -23
biolib/_internal/lfs/__init__.py +1 -0
biolib/_internal/libs/__init__.py +1 -0
biolib/_internal/libs/fusepy/__init__.py +1257 -0
biolib/_internal/push_application.py +22 -37
biolib/_internal/runtime.py +19 -0
biolib/_internal/types/__init__.py +4 -0
biolib/_internal/types/app.py +9 -0
biolib/_internal/types/data_record.py +40 -0
biolib/_internal/types/experiment.py +10 -0
biolib/_internal/types/resource.py +14 -0
biolib/_internal/types/typing.py +7 -0
biolib/_internal/utils/__init__.py +18 -0
biolib/_runtime/runtime.py +80 -0
biolib/api/__init__.py +1 -0
biolib/api/client.py +39 -17
biolib/app/app.py +40 -72
biolib/app/search_apps.py +8 -12
biolib/biolib_api_client/api_client.py +22 -10
biolib/biolib_api_client/app_types.py +2 -1
biolib/biolib_api_client/biolib_app_api.py +1 -1
biolib/biolib_api_client/biolib_job_api.py +6 -0
biolib/biolib_api_client/job_types.py +4 -4
biolib/biolib_api_client/lfs_types.py +8 -2
biolib/biolib_binary_format/remote_endpoints.py +12 -10
biolib/biolib_binary_format/utils.py +41 -4
biolib/cli/__init__.py +6 -2
biolib/cli/auth.py +58 -0
biolib/cli/data_record.py +80 -0
biolib/cli/download_container.py +3 -1
biolib/cli/init.py +1 -0
biolib/cli/lfs.py +45 -11
biolib/cli/push.py +1 -1
biolib/cli/run.py +3 -2
biolib/cli/start.py +1 -0
biolib/compute_node/cloud_utils/cloud_utils.py +15 -18
biolib/compute_node/job_worker/cache_state.py +1 -1
biolib/compute_node/job_worker/executors/docker_executor.py +134 -114
biolib/compute_node/job_worker/job_storage.py +3 -4
biolib/compute_node/job_worker/job_worker.py +31 -15
biolib/compute_node/remote_host_proxy.py +75 -70
biolib/compute_node/webserver/webserver_types.py +0 -1
biolib/experiments/experiment.py +75 -44
biolib/jobs/job.py +125 -47
biolib/jobs/job_result.py +46 -21
biolib/jobs/types.py +1 -1
biolib/runtime/__init__.py +14 -1
biolib/sdk/__init__.py +29 -5
biolib/typing_utils.py +2 -7
biolib/user/sign_in.py +10 -14
biolib/utils/__init__.py +1 -1
biolib/utils/app_uri.py +11 -4
biolib/utils/cache_state.py +2 -2
biolib/utils/seq_util.py +38 -30
{pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
pybiolib-1.1.2193.dist-info/RECORD +123 -0
{pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +1 -1
biolib/biolib_api_client/biolib_account_api.py +0 -8
biolib/biolib_api_client/biolib_large_file_system_api.py +0 -34
biolib/experiments/types.py +0 -9
biolib/lfs/__init__.py +0 -6
biolib/lfs/utils.py +0 -237
biolib/runtime/results.py +0 -20
pybiolib-1.1.1747.dist-info/RECORD +0 -108
/biolib/{lfs → _internal/lfs}/cache.py +0 -0
{pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
{pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0

biolib/app/search_apps.py CHANGED Viewed

@@ -7,41 +7,37 @@ from biolib.typing_utils import Optional, List
 def search_apps(
         search_query: Optional[str] = None,
         team: Optional[str] = None,
-        count: int = 100
-    ) -> List[str]:
+        count: int = 100,
+) -> List[str]:
     query_exceeded_page_size = False
     params = {
         'page_size': count,
     }
     if team:
-        if not team.startswith("@"):
-            team = "@biolib.com/" + team
+        if not team.startswith('@'):
+            team = '@biolib.com/' + team
         params['account_handle'] = team
     if search_query:
         params['search'] = search_query
-    apps_json = api.client.get(
-        path='/apps/',
-        params=params
-    ).json()
+    apps_json = api.client.get(path='/apps/', params=params).json()
     if apps_json['count'] > count:
         query_exceeded_page_size = True
     apps = [app['resource_uri'] for app in apps_json['results']]
-    if not utils.BASE_URL_IS_PUBLIC_BIOLIB and (not team or team.lower().startswith("@biolib.com")):
+    if not utils.BASE_URL_IS_PUBLIC_BIOLIB and (not team or team.lower().startswith('@biolib.com')):
         # Also get federated apps if running on enterprise deployment
         public_biolib_apps_json = api.client.get(
             authenticate=False,
             path='https://biolib.com/api/apps/',
-            params=params
+            params=params,
         ).json()
         if public_biolib_apps_json['count'] > count:
             query_exceeded_page_size = True
-        apps.extend([f'@biolib.com/{app["resource_uri"]}' for app in public_biolib_apps_json['results']])
+        apps.extend([f"@biolib.com/{app['resource_uri']}" for app in public_biolib_apps_json['results']])
     if query_exceeded_page_size:
         print(f'Search results exceeded {count}, use the argument "count" to increase the amount of results returned')

biolib/biolib_api_client/api_client.py CHANGED Viewed

@@ -6,6 +6,7 @@ import os
 from datetime import datetime, timezone
 from json.decoder import JSONDecodeError
+from biolib._runtime.runtime import Runtime
 from biolib._internal.http_client import HttpClient
 from biolib.typing_utils import Optional
 from biolib.biolib_errors import BioLibError
@@ -61,16 +62,18 @@ class _ApiClient:
             return
         if self.access_token:
-            decoded_token = self._decode_jwt_without_checking_signature(self.access_token)
+            decoded_token = self.decode_jwt_without_checking_signature(self.access_token)
             if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60:  # 60 second buffer
                 # Token has not expired yet
                 return
         # TODO: Implement nicer error handling
         try:
-            response = HttpClient.request(method='POST',
-                                        url=f'{self.base_url}/api/user/token/refresh/',
-                                        data={'refresh': self.refresh_token})
+            response = HttpClient.request(
+                method='POST',
+                url=f'{self.base_url}/api/user/token/refresh/',
+                data={'refresh': self.refresh_token},
+            )
         except Exception as exception:
             logger.error('Sign in with refresh token failed')
             raise exception
@@ -111,9 +114,11 @@ class _ApiClient:
     def sign_in_with_api_token(self, api_token: str) -> None:
         logger_no_user_data.debug('ApiClient: Signing in with BIOLIB_TOKEN...')
         try:
-            response = HttpClient.request(method='POST',
-                                        url=f'{self.base_url}/api/user/api_tokens/exchange/',
-                                        data={'token': api_token})
+            response = HttpClient.request(
+                method='POST',
+                url=f'{self.base_url}/api/user/api_tokens/exchange/',
+                data={'token': api_token},
+            )
         except Exception as exception:
             logger.error('Sign in with API token failed')
             raise exception
@@ -127,7 +132,7 @@ class _ApiClient:
         self.refresh_token = json_response['refresh_token']
     @staticmethod
-    def _decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
+    def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
         jwt_bytes = jwt.encode('utf-8')
         try:
@@ -187,9 +192,16 @@ class BiolibApiClient:
         api_client.refresh_access_token()
     @staticmethod
-    def assert_is_signed_in(authenticated_action_description: str) -> None:
+    def is_reauthentication_needed() -> bool:
         api_client = BiolibApiClient.get()
-        if not api_client.is_signed_in:
+        if not api_client.is_signed_in and not Runtime.check_is_environment_biolib_app():
+            return True
+        else:
+            return False
+    @staticmethod
+    def assert_is_signed_in(authenticated_action_description: str) -> None:
+        if BiolibApiClient.is_reauthentication_needed():
             raise BioLibError(
                 f'You must be signed in to {authenticated_action_description}. '
                 f'Please set the environment variable "BIOLIB_TOKEN"'

biolib/biolib_api_client/app_types.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from enum import Enum
-from biolib.typing_utils import TypedDict, List, Optional, Dict, Literal
 from biolib.biolib_api_client.common_types import SemanticVersion
+from biolib.typing_utils import Dict, List, Literal, Optional, TypedDict
 class AppVersionSlim(SemanticVersion):
@@ -16,6 +16,7 @@ class AppVersion(AppVersionSlim):
     source_code_license: str
     stdout_render_type: Literal['text', 'markdown']
     main_output_file: Optional[str]
+    app_uri: str
 class App(TypedDict):

biolib/biolib_api_client/biolib_app_api.py CHANGED Viewed

@@ -35,7 +35,7 @@ def encode_multipart(data, files):
     line_array.append('')
     data_encoded = b'\r\n'.join([line.encode() if isinstance(line, str) else line for line in line_array])
-    return 'multipart/form-data; boundary={}'.format(boundary), data_encoded
+    return f'multipart/form-data; boundary={boundary}', data_encoded
 def _get_git_branch_name() -> str:

biolib/biolib_api_client/biolib_job_api.py CHANGED Viewed

@@ -46,6 +46,7 @@ class BiolibJobApi:
             experiment_uuid: Optional[str] = None,
             timeout: Optional[int] = None,
             notify: bool = False,
+            requested_machine_count: Optional[int] = None,
     ):
         data = {
             'app_version_id': app_version_id,
@@ -73,6 +74,9 @@ class BiolibJobApi:
                 'requested_machine': machine
             })
+        if requested_machine_count:
+            data.update({'requested_machine_count': requested_machine_count})
         if experiment_uuid:
             data['experiment_uuid'] = experiment_uuid
@@ -156,6 +160,7 @@ class BiolibJobApi:
             caller_job_uuid: Optional[str] = None,
             requested_timeout_seconds: Optional[int] = None,
             notify: bool = False,
+            requested_machine_count: Optional[int] = None,
     ) -> Dict:
         job_dict: Dict = biolib.api.client.post(
             path='/jobs/create_job_with_data/',
@@ -171,6 +176,7 @@ class BiolibJobApi:
                 'client-version': BIOLIB_PACKAGE_VERSION,
                 'experiment-uuid': experiment_uuid,
                 'requested-machine': requested_machine,
+                'requested-machine-count': str(requested_machine_count) if requested_machine_count else None,
                 'result-name-prefix': result_name_prefix,
                 'requested-timeout-seconds': str(requested_timeout_seconds) if requested_timeout_seconds else None,
                 'notify': 'true' if notify else 'false',

biolib/biolib_api_client/job_types.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from enum import Enum
-from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo
-from biolib.typing_utils import TypedDict, Optional, List
 from biolib.biolib_api_client.app_types import AppVersionOnJob, RemoteHost
+from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo
+from biolib.typing_utils import List, Optional, TypedDict
 class JobState(Enum):
@@ -15,6 +14,7 @@ class JobState(Enum):
 class _Job(TypedDict):
+    app_uri: str
     app_version: AppVersionOnJob
     arguments_override_command: bool
     auth_token: str
@@ -22,10 +22,10 @@ class _Job(TypedDict):
     created_at: str
     federated_job_uuid: Optional[str]
     public_id: str
-    uuid: str
     remote_hosts_with_warning: List[RemoteHost]
     state: str
     user_id: Optional[str]
+    uuid: str
 # type optional keys with total=False

biolib/biolib_api_client/lfs_types.py CHANGED Viewed

@@ -1,13 +1,19 @@
 from biolib.typing_utils import TypedDict
-class LargeFileSystemVersion(TypedDict):
+class DataRecordVersion(TypedDict):
     presigned_download_url: str
     size_bytes: int
     uri: str
     uuid: str
-class LargeFileSystem(TypedDict):
+class DataRecordInfo(TypedDict):
     uri: str
     uuid: str
+class DataRecordVersionInfo(TypedDict):
+    resource_uri: str
+    resource_uuid: str
+    resource_version_uuid: str

biolib/biolib_binary_format/remote_endpoints.py CHANGED Viewed

@@ -1,25 +1,27 @@
 from datetime import datetime, timedelta
-# from urllib.parse import urlparse, parse_qs
-from biolib.biolib_logging import logger
 from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
 from biolib.biolib_binary_format.utils import RemoteEndpoint
+# from urllib.parse import urlparse, parse_qs
+from biolib.biolib_logging import logger
+from biolib.typing_utils import Literal
-class RemoteJobStorageResultEndpoint(RemoteEndpoint):
-    def __init__(self, job_id: str, job_auth_token: str):
-        self._job_id = job_id
-        self._job_auth_token = job_auth_token
+class RemoteJobStorageEndpoint(RemoteEndpoint):
+    def __init__(self, job_uuid: str, job_auth_token: str, storage_type: Literal['input', 'output']):
         self._expires_at = None
+        self._job_auth_token = job_auth_token
+        self._job_uuid = job_uuid
         self._presigned_url = None
+        self._storage_type: Literal['input', 'output'] = storage_type
     def get_remote_url(self):
         if not self._presigned_url or datetime.utcnow() > self._expires_at:
             self._presigned_url = BiolibJobApi.get_job_storage_download_url(
                 job_auth_token=self._job_auth_token,
-                job_uuid=self._job_id,
-                storage_type='results'
+                job_uuid=self._job_uuid,
+                storage_type='results' if self._storage_type == 'output' else 'input',
             )
             self._expires_at = datetime.utcnow() + timedelta(minutes=8)
             # TODO: Use expires at from url
@@ -27,6 +29,6 @@ class RemoteJobStorageResultEndpoint(RemoteEndpoint):
             # query_params = parse_qs(parsed_url.query)
             # time_at_generation = datetime.datetime.strptime(query_params['X-Amz-Date'][0], '%Y%m%dT%H%M%SZ')
             # self._expires_at = time_at_generation + timedelta(seconds=int(query_params['X-Amz-Expires'][0]))
-            logger.debug(f'Job "{self._job_id}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
+            logger.debug(f'Job "{self._job_uuid}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
         return self._presigned_url

biolib/biolib_binary_format/utils.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from abc import ABC, abstractmethod
 import io
+import math
+from typing import Optional, Callable
+from biolib.typing_utils import Iterator
 from biolib._internal.http_client import HttpClient
@@ -106,10 +108,18 @@ class InMemoryIndexableBuffer(IndexableBuffer):
 class LazyLoadedFile:
-    def __init__(self, path: str, buffer: IndexableBuffer, start: int, length: int):
+    def __init__(
+            self,
+            path: str,
+            buffer: IndexableBuffer,
+            start: Optional[int],
+            length: int,
+            start_func: Optional[Callable[[], int]] = None,
+    ):
         self._path = path
         self._buffer = buffer
         self._start = start
+        self._start_func = start_func
         self._length = length
     def __repr__(self) -> str:
@@ -119,8 +129,16 @@ class LazyLoadedFile:
     def path(self) -> str:
         return self._path
+    @property
+    def name(self) -> str:
+        return self._path.split('/')[-1]
     @property
     def start(self) -> int:
+        if self._start is None:
+            assert self._start_func is not None, 'No start function or start value'
+            self._start = self._start_func()
         return self._start
     @property
@@ -130,5 +148,24 @@ class LazyLoadedFile:
     def get_file_handle(self) -> io.BufferedIOBase:
         return io.BytesIO(self.get_data())
-    def get_data(self) -> bytes:
-        return self._buffer.get_data(start=self._start, length=self._length)
+    def get_data(self, start=0, length=None) -> bytes:
+        start_offset = start + self.start
+        # make sure length doesn't go outside file boundaries
+        length_to_end_of_file = max(self._length - start, 0)
+        if length is None:
+            length_to_request = length_to_end_of_file
+        else:
+            length_to_request = min(length, length_to_end_of_file)
+        return self._buffer.get_data(start=start_offset, length=length_to_request)
+    def get_data_iterator(self) -> Iterator[bytes]:
+        if self._length == 0:
+            yield b''
+        else:
+            chunk_size = 10_000_000
+            chunks_to_yield = math.ceil(self._length / chunk_size)
+            for chunk_idx in range(chunks_to_yield - 1):
+                yield self._buffer.get_data(start=self.start+chunk_idx*chunk_size, length=chunk_size)
+            data_already_yielded = (chunks_to_yield - 1)*chunk_size
+            yield self._buffer.get_data(start=self.start+data_already_yielded,
+                                        length=self._length - data_already_yielded)

biolib/cli/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ import click
 from biolib import utils
 from biolib.biolib_logging import logger, logger_no_user_data
-from biolib.cli import init, lfs, push, run, start, runtime, download_container
+from biolib.cli import auth, data_record, download_container, init, lfs, push, run, runtime, start
 @click.version_option(version=utils.BIOLIB_PACKAGE_VERSION, prog_name='pybiolib')
@@ -20,13 +20,17 @@ def cli() -> None:
     logger_no_user_data.configure(default_log_level=logging.WARNING)
+cli.add_command(auth.login)
+cli.add_command(auth.logout)
+cli.add_command(auth.whoami)
+cli.add_command(download_container.download_container)
 cli.add_command(init.init)
 cli.add_command(lfs.lfs)
 cli.add_command(push.push)
 cli.add_command(run.run)
 cli.add_command(runtime.runtime)
 cli.add_command(start.start)
-cli.add_command(download_container.download_container)
+cli.add_command(data_record.data_record)
 # allow this script to be called without poetry in dev e.g. by an IDE debugger
 if utils.IS_DEV and __name__ == '__main__':

biolib/cli/auth.py ADDED Viewed

@@ -0,0 +1,58 @@
+import logging
+import sys
+import click
+from biolib import api, biolib_errors
+from biolib.biolib_api_client.api_client import BiolibApiClient
+from biolib.biolib_logging import logger, logger_no_user_data
+from biolib.user import sign_in, sign_out
+@click.command(help='Login your to BioLib account with web browser')
+@click.option(
+    '-w',
+    is_flag=True,
+    default=False,
+    required=False,
+    type=bool,
+    help='Automatically open the login page in the default web browser',
+)
+def login(w: bool) -> None:  # pylint: disable=invalid-name
+    logger.configure(default_log_level=logging.INFO)
+    logger_no_user_data.configure(default_log_level=logging.INFO)
+    sign_in(open_in_default_browser=w)
+@click.command(help='Logout of your BioLib account')
+def logout() -> None:
+    logger.configure(default_log_level=logging.INFO)
+    logger_no_user_data.configure(default_log_level=logging.INFO)
+    sign_out()
+@click.command(help='Prints out the full name of the user logged in')
+def whoami() -> None:
+    client = BiolibApiClient.get()
+    if client.is_signed_in:
+        user_uuid = None
+        if client.access_token is None:
+            print('Unable to fetch user credentials. Please try logging out and logging in again.')
+            exit(1)
+        try:
+            user_uuid = client.decode_jwt_without_checking_signature(jwt=client.access_token)['payload']['public_id']
+        except biolib_errors.BioLibError as error:
+            print(
+                f'Unable to reference user public_id in access token:\n {error.message}',
+                file=sys.stderr,
+            )
+            exit(1)
+        response = api.client.get(path=f'/user/{user_uuid}/')
+        user_dict = response.json()
+        email = user_dict['email']
+        intrinsic_account = [account for account in user_dict['accounts'] if account['role'] == 'intrinsic'][0]
+        display_name = intrinsic_account['display_name']
+        print(f'Name: {display_name}\nEmail: {email}\nLogged into: {client.base_url}')
+    else:
+        print('Not logged in', file=sys.stderr)
+        exit(1)

biolib/cli/data_record.py ADDED Viewed

@@ -0,0 +1,80 @@
+import json
+import logging
+import os
+from typing import Dict, List
+import click
+from biolib._data_record.data_record import DataRecord
+from biolib.biolib_logging import logger, logger_no_user_data
+from biolib.typing_utils import Optional
+@click.group(help='Data Records')
+def data_record() -> None:
+    logger.configure(default_log_level=logging.INFO)
+    logger_no_user_data.configure(default_log_level=logging.INFO)
+@data_record.command(help='Create a Data Record')
+@click.argument('uri', required=True)
+@click.option('--data-path', required=True, type=click.Path(exists=True))
+@click.option('--record-type', required=False, type=str, default=None)
+def create(uri: str, data_path: str, record_type: Optional[str]) -> None:
+    DataRecord.create(destination=uri, data_path=data_path, record_type=record_type)
+@data_record.command(help='Update a Data Record')
+@click.argument('uri', required=True)
+@click.option('--data-path', required=True, type=click.Path(exists=True))
+@click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
+def update(uri: str, data_path: str, chunk_size: Optional[int]) -> None:
+    DataRecord.get_by_uri(uri=uri).update(data_path=data_path, chunk_size_in_mb=chunk_size)
+@data_record.command(help='Download files from a Data Record')
+@click.argument('uri', required=True)
+@click.option('--file', required=False, type=str)
+@click.option('--path-filter', required=False, type=str, hide_input=True)
+def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
+    record = DataRecord.get_by_uri(uri=uri)
+    if file is not None:
+        try:
+            file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file][0]
+        except IndexError:
+            raise Exception('File not found in data record') from None
+        assert not os.path.exists(file_obj.name), 'File already exists in current directory'
+        with open(file_obj.name, 'wb') as file_handle:
+            file_handle.write(file_obj.get_data())
+    else:
+        assert not os.path.exists(record.name), f'Directory with name {record.name} already exists in current directory'
+        record.save_files(output_dir=record.name, path_filter=path_filter)
+@data_record.command(help='Describe a Data Record')
+@click.argument('uri', required=True)
+@click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
+def describe(uri: str, output_as_json: bool) -> None:
+    record = DataRecord.get_by_uri(uri)
+    files_info: List[Dict] = []
+    total_size_in_bytes = 0
+    for file in record.list_files():
+        files_info.append({'path': file.path, 'size_bytes': file.length})
+        total_size_in_bytes += file.length
+    if output_as_json:
+        print(
+            json.dumps(
+                obj={'uri': record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
+                indent=4,
+            )
+        )
+    else:
+        print(f'Data Record {record.uri}\ntotal {total_size_in_bytes} bytes\n')
+        print('size bytes    path')
+        for file_info in files_info:
+            size_string = str(file_info['size_bytes'])
+            leading_space_string = ' ' * (10 - len(size_string))
+            print(f"{leading_space_string}{size_string}    {file_info['path']}")

biolib/cli/download_container.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import logging
 import click
 from biolib.biolib_download_container import download_container_from_uri
 from biolib.biolib_logging import logger, logger_no_user_data
-@click.command(help='Push an application to BioLib', name='download-container')
+@click.command(help='Pull an application from BioLib', name='download-container', hidden=True)
 @click.argument('uri')
 def download_container(uri: str) -> None:
     logger.configure(default_log_level=logging.INFO)

biolib/cli/init.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import sys
 import click
 from biolib import templates

biolib/cli/lfs.py CHANGED Viewed

@@ -1,12 +1,15 @@
+import json
 import logging
+import os
 import sys
+from typing import Dict, List
 import click
-import biolib.lfs
 from biolib import biolib_errors
-from biolib.biolib_logging import logger_no_user_data, logger
-from biolib.lfs import push_large_file_system, create_large_file_system, describe_large_file_system, prune_lfs_cache
+from biolib._data_record.data_record import DataRecord
+from biolib._internal.lfs import prune_lfs_cache
+from biolib.biolib_logging import logger, logger_no_user_data
 from biolib.typing_utils import Optional
@@ -18,9 +21,10 @@ def lfs() -> None:
 @lfs.command(help='Create a Large File System')
 @click.argument('uri', required=True)
 def create(uri: str) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record create" instead.')
     logger.configure(default_log_level=logging.INFO)
     logger_no_user_data.configure(default_log_level=logging.INFO)
-    create_large_file_system(lfs_uri=uri)
+    DataRecord.create(destination=uri)
 @lfs.command(help='Push a new version of a Large File System')
@@ -28,10 +32,11 @@ def create(uri: str) -> None:
 @click.option('--path', required=True, type=click.Path(exists=True))
 @click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
 def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record update" instead.')
     logger.configure(default_log_level=logging.INFO)
     logger_no_user_data.configure(default_log_level=logging.INFO)
     try:
-        push_large_file_system(lfs_uri=uri, input_dir=path, chunk_size_in_mb=chunk_size)
+        DataRecord.get_by_uri(uri=uri).update(data_path=path, chunk_size_in_mb=chunk_size)
     except biolib_errors.BioLibError as error:
         print(f'An error occurred:\n{error.message}', file=sys.stderr)
         exit(1)
@@ -41,12 +46,20 @@ def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
 @click.argument('uri', required=True)
 @click.option('--file-path', required=True, type=str)
 def download_file(uri: str, file_path: str) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record download" instead.')
     logger.configure(default_log_level=logging.INFO)
     logger_no_user_data.configure(default_log_level=logging.INFO)
     try:
-        data = biolib.lfs.get_file_data_from_large_file_system(lfs_uri=uri, file_path=file_path)
-        with open(file_path, mode='wb') as file:
-            file.write(data)
+        record = DataRecord.get_by_uri(uri=uri)
+        try:
+            file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file_path][0]
+        except IndexError:
+            raise Exception('File not found in data record') from None
+        assert not os.path.exists(file_obj.name), 'File already exists in current directory'
+        with open(file_obj.name, 'wb') as file_handle:
+            file_handle.write(file_obj.get_data())
     except biolib_errors.BioLibError as error:
         print(f'An error occurred:\n{error.message}', file=sys.stderr)
         exit(1)
@@ -54,9 +67,30 @@ def download_file(uri: str, file_path: str) -> None:
 @lfs.command(help='Describe a Large File System')
 @click.argument('uri', required=True)
-@click.option('--json', is_flag=True, default=False, required=False, help='Format output as JSON')
-def describe(uri: str, json: bool) -> None:
-    describe_large_file_system(lfs_uri=uri, output_as_json=json)
+@click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
+def describe(uri: str, output_as_json: bool) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record describe" instead.')
+    data_record = DataRecord.get_by_uri(uri)
+    files_info: List[Dict] = []
+    total_size_in_bytes = 0
+    for file in data_record.list_files():
+        files_info.append({'path': file.path, 'size_bytes': file.length})
+        total_size_in_bytes += file.length
+    if output_as_json:
+        print(
+            json.dumps(
+                obj={'uri': data_record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
+                indent=4,
+            )
+        )
+    else:
+        print(f'Large File System {data_record.uri}\ntotal {total_size_in_bytes} bytes\n')
+        print('size bytes    path')
+        for file_info in files_info:
+            size_string = str(file_info['size_bytes'])
+            leading_space_string = ' ' * (10 - len(size_string))
+            print(f"{leading_space_string}{size_string}    {file_info['path']}")
 @lfs.command(help='Prune LFS cache', hidden=True)

biolib/cli/push.py CHANGED Viewed

@@ -3,8 +3,8 @@ from typing import Optional
 import click
-from biolib.biolib_logging import logger, logger_no_user_data
 from biolib._internal.push_application import push_application
+from biolib.biolib_logging import logger, logger_no_user_data
 @click.command(help='Push an application to BioLib')

pybiolib 1.1.1747__py3-none-any.whl → 1.1.2193__py3-none-any.whl

pybiolib 1.1.1747py3-none-any.whl → 1.1.2193py3-none-any.whl