PyPI - pybiolib - Versions diffs - 1.1.1881__py3-none-any.whl → 1.1.2193__py3-none-any.whl - Mend

pybiolib 1.1.1881py3-none-any.whl → 1.1.2193py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

biolib/__init__.py +11 -4
biolib/_data_record/data_record.py +278 -0
biolib/_internal/data_record/__init__.py +1 -1
biolib/_internal/data_record/data_record.py +95 -151
biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
biolib/_internal/file_utils.py +77 -0
biolib/_internal/fuse_mount/__init__.py +1 -0
biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
biolib/_internal/http_client.py +29 -9
biolib/_internal/lfs/__init__.py +1 -0
biolib/_internal/libs/__init__.py +1 -0
biolib/_internal/libs/fusepy/__init__.py +1257 -0
biolib/_internal/push_application.py +1 -1
biolib/_internal/runtime.py +2 -56
biolib/_internal/types/__init__.py +4 -0
biolib/_internal/types/app.py +9 -0
biolib/_internal/types/data_record.py +40 -0
biolib/_internal/types/experiment.py +10 -0
biolib/_internal/types/resource.py +14 -0
biolib/_internal/types/typing.py +7 -0
biolib/_runtime/runtime.py +80 -0
biolib/api/__init__.py +1 -0
biolib/api/client.py +39 -17
biolib/app/app.py +34 -71
biolib/biolib_api_client/api_client.py +9 -2
biolib/biolib_api_client/app_types.py +2 -2
biolib/biolib_api_client/biolib_job_api.py +6 -0
biolib/biolib_api_client/job_types.py +4 -4
biolib/biolib_api_client/lfs_types.py +8 -2
biolib/biolib_binary_format/remote_endpoints.py +12 -10
biolib/biolib_binary_format/utils.py +23 -3
biolib/cli/auth.py +1 -1
biolib/cli/data_record.py +43 -6
biolib/cli/lfs.py +10 -6
biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
biolib/compute_node/job_worker/executors/docker_executor.py +126 -108
biolib/compute_node/job_worker/job_storage.py +3 -4
biolib/compute_node/job_worker/job_worker.py +25 -15
biolib/compute_node/remote_host_proxy.py +61 -84
biolib/compute_node/webserver/webserver_types.py +0 -1
biolib/experiments/experiment.py +75 -44
biolib/jobs/job.py +98 -19
biolib/jobs/job_result.py +46 -21
biolib/jobs/types.py +1 -1
biolib/runtime/__init__.py +2 -1
biolib/sdk/__init__.py +18 -7
biolib/typing_utils.py +2 -7
biolib/user/sign_in.py +2 -2
biolib/utils/seq_util.py +38 -35
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/RECORD +55 -44
biolib/experiments/types.py +0 -9
biolib/lfs/__init__.py +0 -4
biolib/lfs/utils.py +0 -153
/biolib/{lfs → _internal/lfs}/cache.py +0 -0
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +0 -0
{pybiolib-1.1.1881.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0

biolib/biolib_api_client/lfs_types.py CHANGED Viewed

@@ -1,13 +1,19 @@
 from biolib.typing_utils import TypedDict
-class LargeFileSystemVersion(TypedDict):
+class DataRecordVersion(TypedDict):
     presigned_download_url: str
     size_bytes: int
     uri: str
     uuid: str
-class LargeFileSystem(TypedDict):
+class DataRecordInfo(TypedDict):
     uri: str
     uuid: str
+class DataRecordVersionInfo(TypedDict):
+    resource_uri: str
+    resource_uuid: str
+    resource_version_uuid: str

biolib/biolib_binary_format/remote_endpoints.py CHANGED Viewed

@@ -1,25 +1,27 @@
 from datetime import datetime, timedelta
-# from urllib.parse import urlparse, parse_qs
-from biolib.biolib_logging import logger
 from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
 from biolib.biolib_binary_format.utils import RemoteEndpoint
+# from urllib.parse import urlparse, parse_qs
+from biolib.biolib_logging import logger
+from biolib.typing_utils import Literal
-class RemoteJobStorageResultEndpoint(RemoteEndpoint):
-    def __init__(self, job_id: str, job_auth_token: str):
-        self._job_id = job_id
-        self._job_auth_token = job_auth_token
+class RemoteJobStorageEndpoint(RemoteEndpoint):
+    def __init__(self, job_uuid: str, job_auth_token: str, storage_type: Literal['input', 'output']):
         self._expires_at = None
+        self._job_auth_token = job_auth_token
+        self._job_uuid = job_uuid
         self._presigned_url = None
+        self._storage_type: Literal['input', 'output'] = storage_type
     def get_remote_url(self):
         if not self._presigned_url or datetime.utcnow() > self._expires_at:
             self._presigned_url = BiolibJobApi.get_job_storage_download_url(
                 job_auth_token=self._job_auth_token,
-                job_uuid=self._job_id,
-                storage_type='results'
+                job_uuid=self._job_uuid,
+                storage_type='results' if self._storage_type == 'output' else 'input',
             )
             self._expires_at = datetime.utcnow() + timedelta(minutes=8)
             # TODO: Use expires at from url
@@ -27,6 +29,6 @@ class RemoteJobStorageResultEndpoint(RemoteEndpoint):
             # query_params = parse_qs(parsed_url.query)
             # time_at_generation = datetime.datetime.strptime(query_params['X-Amz-Date'][0], '%Y%m%dT%H%M%SZ')
             # self._expires_at = time_at_generation + timedelta(seconds=int(query_params['X-Amz-Expires'][0]))
-            logger.debug(f'Job "{self._job_id}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
+            logger.debug(f'Job "{self._job_uuid}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
         return self._presigned_url

biolib/biolib_binary_format/utils.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from abc import ABC, abstractmethod
 import io
+import math
 from typing import Optional, Callable
+from biolib.typing_utils import Iterator
 from biolib._internal.http_client import HttpClient
@@ -147,5 +148,24 @@ class LazyLoadedFile:
     def get_file_handle(self) -> io.BufferedIOBase:
         return io.BytesIO(self.get_data())
-    def get_data(self) -> bytes:
-        return self._buffer.get_data(start=self.start, length=self._length)
+    def get_data(self, start=0, length=None) -> bytes:
+        start_offset = start + self.start
+        # make sure length doesn't go outside file boundaries
+        length_to_end_of_file = max(self._length - start, 0)
+        if length is None:
+            length_to_request = length_to_end_of_file
+        else:
+            length_to_request = min(length, length_to_end_of_file)
+        return self._buffer.get_data(start=start_offset, length=length_to_request)
+    def get_data_iterator(self) -> Iterator[bytes]:
+        if self._length == 0:
+            yield b''
+        else:
+            chunk_size = 10_000_000
+            chunks_to_yield = math.ceil(self._length / chunk_size)
+            for chunk_idx in range(chunks_to_yield - 1):
+                yield self._buffer.get_data(start=self.start+chunk_idx*chunk_size, length=chunk_size)
+            data_already_yielded = (chunks_to_yield - 1)*chunk_size
+            yield self._buffer.get_data(start=self.start+data_already_yielded,
+                                        length=self._length - data_already_yielded)

biolib/cli/auth.py CHANGED Viewed

@@ -52,7 +52,7 @@ def whoami() -> None:
         email = user_dict['email']
         intrinsic_account = [account for account in user_dict['accounts'] if account['role'] == 'intrinsic'][0]
         display_name = intrinsic_account['display_name']
-        print(f'Name: {display_name}\nEmail: {email}')
+        print(f'Name: {display_name}\nEmail: {email}\nLogged into: {client.base_url}')
     else:
         print('Not logged in', file=sys.stderr)
         exit(1)

biolib/cli/data_record.py CHANGED Viewed

@@ -1,9 +1,11 @@
+import json
 import logging
 import os
+from typing import Dict, List
 import click
-from biolib._internal.data_record import DataRecord
+from biolib._data_record.data_record import DataRecord
 from biolib.biolib_logging import logger, logger_no_user_data
 from biolib.typing_utils import Optional
@@ -15,11 +17,19 @@ def data_record() -> None:
 @data_record.command(help='Create a Data Record')
-@click.option('--destination', type=str, required=True)
+@click.argument('uri', required=True)
+@click.option('--data-path', required=True, type=click.Path(exists=True))
+@click.option('--record-type', required=False, type=str, default=None)
+def create(uri: str, data_path: str, record_type: Optional[str]) -> None:
+    DataRecord.create(destination=uri, data_path=data_path, record_type=record_type)
+@data_record.command(help='Update a Data Record')
+@click.argument('uri', required=True)
 @click.option('--data-path', required=True, type=click.Path(exists=True))
-@click.option('--name', type=str, required=False)
-def create(destination: str, data_path: str, name: Optional[str] = None) -> None:
-    DataRecord.create(destination, data_path, name)
+@click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
+def update(uri: str, data_path: str, chunk_size: Optional[int]) -> None:
+    DataRecord.get_by_uri(uri=uri).update(data_path=data_path, chunk_size_in_mb=chunk_size)
 @data_record.command(help='Download files from a Data Record')
@@ -27,7 +37,7 @@ def create(destination: str, data_path: str, name: Optional[str] = None) -> None
 @click.option('--file', required=False, type=str)
 @click.option('--path-filter', required=False, type=str, hide_input=True)
 def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
-    record = DataRecord(uri=uri)
+    record = DataRecord.get_by_uri(uri=uri)
     if file is not None:
         try:
             file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file][0]
@@ -41,3 +51,30 @@ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
     else:
         assert not os.path.exists(record.name), f'Directory with name {record.name} already exists in current directory'
         record.save_files(output_dir=record.name, path_filter=path_filter)
+@data_record.command(help='Describe a Data Record')
+@click.argument('uri', required=True)
+@click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
+def describe(uri: str, output_as_json: bool) -> None:
+    record = DataRecord.get_by_uri(uri)
+    files_info: List[Dict] = []
+    total_size_in_bytes = 0
+    for file in record.list_files():
+        files_info.append({'path': file.path, 'size_bytes': file.length})
+        total_size_in_bytes += file.length
+    if output_as_json:
+        print(
+            json.dumps(
+                obj={'uri': record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
+                indent=4,
+            )
+        )
+    else:
+        print(f'Data Record {record.uri}\ntotal {total_size_in_bytes} bytes\n')
+        print('size bytes    path')
+        for file_info in files_info:
+            size_string = str(file_info['size_bytes'])
+            leading_space_string = ' ' * (10 - len(size_string))
+            print(f"{leading_space_string}{size_string}    {file_info['path']}")

biolib/cli/lfs.py CHANGED Viewed

@@ -7,9 +7,9 @@ from typing import Dict, List
 import click
 from biolib import biolib_errors
-from biolib._internal.data_record import DataRecord
+from biolib._data_record.data_record import DataRecord
+from biolib._internal.lfs import prune_lfs_cache
 from biolib.biolib_logging import logger, logger_no_user_data
-from biolib.lfs import create_large_file_system, prune_lfs_cache, push_large_file_system
 from biolib.typing_utils import Optional
@@ -21,9 +21,10 @@ def lfs() -> None:
 @lfs.command(help='Create a Large File System')
 @click.argument('uri', required=True)
 def create(uri: str) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record create" instead.')
     logger.configure(default_log_level=logging.INFO)
     logger_no_user_data.configure(default_log_level=logging.INFO)
-    create_large_file_system(lfs_uri=uri)
+    DataRecord.create(destination=uri)
 @lfs.command(help='Push a new version of a Large File System')
@@ -31,10 +32,11 @@ def create(uri: str) -> None:
 @click.option('--path', required=True, type=click.Path(exists=True))
 @click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
 def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record update" instead.')
     logger.configure(default_log_level=logging.INFO)
     logger_no_user_data.configure(default_log_level=logging.INFO)
     try:
-        push_large_file_system(lfs_uri=uri, input_dir=path, chunk_size_in_mb=chunk_size)
+        DataRecord.get_by_uri(uri=uri).update(data_path=path, chunk_size_in_mb=chunk_size)
     except biolib_errors.BioLibError as error:
         print(f'An error occurred:\n{error.message}', file=sys.stderr)
         exit(1)
@@ -44,10 +46,11 @@ def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
 @click.argument('uri', required=True)
 @click.option('--file-path', required=True, type=str)
 def download_file(uri: str, file_path: str) -> None:
+    logger.warning('This is command deprecated, please use "biolib data-record download" instead.')
     logger.configure(default_log_level=logging.INFO)
     logger_no_user_data.configure(default_log_level=logging.INFO)
     try:
-        record = DataRecord(uri=uri)
+        record = DataRecord.get_by_uri(uri=uri)
         try:
             file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file_path][0]
         except IndexError:
@@ -66,7 +69,8 @@ def download_file(uri: str, file_path: str) -> None:
 @click.argument('uri', required=True)
 @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
 def describe(uri: str, output_as_json: bool) -> None:
-    data_record = DataRecord(uri)
+    logger.warning('This is command deprecated, please use "biolib data-record describe" instead.')
+    data_record = DataRecord.get_by_uri(uri)
     files_info: List[Dict] = []
     total_size_in_bytes = 0
     for file in data_record.list_files():

biolib/compute_node/cloud_utils/cloud_utils.py CHANGED Viewed

@@ -7,11 +7,11 @@ import time
 from datetime import datetime
 from socket import gethostbyname, gethostname
-from biolib import utils, api
-from biolib.biolib_logging import logger_no_user_data
-from biolib.typing_utils import Optional, List, Dict, cast
+from biolib import api, utils
 from biolib.biolib_api_client import BiolibApiClient
-from biolib.compute_node.webserver.webserver_types import WebserverConfig, ComputeNodeInfo, ShutdownTimes
+from biolib.biolib_logging import logger_no_user_data
+from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo, ShutdownTimes, WebserverConfig
+from biolib.typing_utils import Dict, List, Optional, cast
 def trust_ceritificates(certs_data: List[str]) -> None:
@@ -54,15 +54,12 @@ class CloudUtils:
                 pybiolib_version=utils.BIOLIB_PACKAGE_VERSION,
             ),
             base_url=CloudUtils._get_environment_variable_or_fail('BIOLIB_BASE_URL'),
-            s3_general_storage_bucket_name=CloudUtils._get_environment_variable_or_fail(
-                'BIOLIB_S3_GENERAL_STORAGE_BUCKET_NAME',
-            ),
             is_dev=os.environ.get('BIOLIB_DEV') == 'TRUE',
             shutdown_times=ShutdownTimes(
                 auto_shutdown_time_in_seconds=CloudUtils._get_environment_variable_as_int(
                     'BIOLIB_CLOUD_AUTO_SHUTDOWN_TIME_IN_SECONDS'
                 ),
-            )
+            ),
         )
         return CloudUtils._webserver_config
@@ -84,7 +81,7 @@ class CloudUtils:
             except BaseException as error_object:
                 logger_no_user_data.error(f'Failed to deregister got error: {error_object}')
         else:
-            logger_no_user_data.error("Not deregistering as environment is not cloud")
+            logger_no_user_data.error('Not deregistering as environment is not cloud')
     @staticmethod
     def shutdown() -> None:
@@ -98,7 +95,7 @@ class CloudUtils:
             except Exception as error:  # pylint: disable=broad-except
                 logger_no_user_data.error(f'Failed to shutdown got error: {error}')
         else:
-            logger_no_user_data.error("Not running shutdown as environment is not cloud")
+            logger_no_user_data.error('Not running shutdown as environment is not cloud')
     @staticmethod
     def deregister_and_shutdown() -> None:
@@ -131,7 +128,7 @@ class CloudUtils:
                     'auth_token': config['compute_node_info']['auth_token'],
                     'cloud_job_id': cloud_job_id,
                     'system_exception_code': system_exception_code,
-                    'exit_code': exit_code
+                    'exit_code': exit_code,
                 },
             )
         except BaseException as error:
@@ -152,14 +149,14 @@ class CloudUtils:
                 data=cast(Dict[str, str], compute_node_info),
             )
             if response.status_code != 201:
-                raise Exception("Non 201 error code")
+                raise Exception('Non 201 error code')
             else:
-                logger_no_user_data.info("Compute node registered!")
+                logger_no_user_data.info('Compute node registered!')
                 response_data = response.json()
-                logger_no_user_data.info(f"Got data on register: {json.dumps(response_data)}")
+                logger_no_user_data.info(f'Got data on register: {json.dumps(response_data)}')
                 certs = []
-                for federation in response_data["federation"]:
-                    for cert_b64 in federation["certs_b64"]:
+                for federation in response_data['federation']:
+                    for cert_b64 in federation['certs_b64']:
                         certs.append(base64.b64decode(cert_b64).decode())
                 trust_ceritificates(certs)

pybiolib 1.1.1881__py3-none-any.whl → 1.1.2193__py3-none-any.whl

pybiolib 1.1.1881py3-none-any.whl → 1.1.2193py3-none-any.whl