PyPI - pybiolib - Versions diffs - 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl - Mend

pybiolib 1.2.883py3-none-any.whl → 1.2.1890py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

biolib/__init__.py +33 -10
biolib/_data_record/data_record.py +220 -126
biolib/_index/index.py +55 -0
biolib/_index/query_result.py +103 -0
biolib/_internal/add_copilot_prompts.py +24 -11
biolib/_internal/add_gui_files.py +81 -0
biolib/_internal/data_record/__init__.py +1 -1
biolib/_internal/data_record/data_record.py +1 -18
biolib/_internal/data_record/push_data.py +65 -16
biolib/_internal/data_record/remote_storage_endpoint.py +18 -13
biolib/_internal/file_utils.py +48 -0
biolib/_internal/lfs/cache.py +4 -2
biolib/_internal/push_application.py +95 -24
biolib/_internal/runtime.py +2 -0
biolib/_internal/string_utils.py +13 -0
biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-general.instructions.md +5 -0
biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
biolib/_internal/templates/{init_template → github_workflow_template}/.github/workflows/biolib.yml +7 -2
biolib/_internal/templates/gitignore_template/.gitignore +10 -0
biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
biolib/_internal/templates/gui_template/App.tsx +53 -0
biolib/_internal/templates/gui_template/Dockerfile +27 -0
biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
biolib/_internal/templates/gui_template/index.css +5 -0
biolib/_internal/templates/gui_template/index.html +13 -0
biolib/_internal/templates/gui_template/index.tsx +10 -0
biolib/_internal/templates/gui_template/package.json +27 -0
biolib/_internal/templates/gui_template/tsconfig.json +24 -0
biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
biolib/_internal/templates/gui_template/vite.config.mts +10 -0
biolib/_internal/templates/init_template/.biolib/config.yml +1 -0
biolib/_internal/templates/init_template/Dockerfile +5 -1
biolib/_internal/templates/init_template/run.py +6 -15
biolib/_internal/templates/init_template/run.sh +1 -0
biolib/_internal/templates/templates.py +21 -1
biolib/_internal/utils/__init__.py +47 -0
biolib/_internal/utils/auth.py +46 -0
biolib/_internal/utils/job_url.py +33 -0
biolib/_internal/utils/multinode.py +12 -14
biolib/_runtime/runtime.py +15 -2
biolib/_session/session.py +7 -5
biolib/_shared/__init__.py +0 -0
biolib/_shared/types/__init__.py +74 -0
biolib/_shared/types/account.py +12 -0
biolib/_shared/types/account_member.py +8 -0
biolib/{_internal → _shared}/types/experiment.py +1 -0
biolib/_shared/types/resource.py +37 -0
biolib/_shared/types/resource_deploy_key.py +11 -0
biolib/{_internal → _shared}/types/resource_version.py +8 -2
biolib/_shared/types/user.py +19 -0
biolib/_shared/utils/__init__.py +7 -0
biolib/_shared/utils/resource_uri.py +75 -0
biolib/api/client.py +5 -48
biolib/app/app.py +97 -55
biolib/biolib_api_client/api_client.py +3 -47
biolib/biolib_api_client/app_types.py +1 -1
biolib/biolib_api_client/biolib_app_api.py +31 -6
biolib/biolib_api_client/biolib_job_api.py +1 -1
biolib/biolib_api_client/user_state.py +34 -2
biolib/biolib_binary_format/module_input.py +8 -0
biolib/biolib_binary_format/remote_endpoints.py +3 -3
biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
biolib/biolib_logging.py +1 -1
biolib/cli/__init__.py +2 -2
biolib/cli/auth.py +4 -16
biolib/cli/data_record.py +82 -0
biolib/cli/index.py +32 -0
biolib/cli/init.py +393 -71
biolib/cli/lfs.py +1 -1
biolib/cli/run.py +9 -6
biolib/cli/start.py +14 -1
biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
biolib/compute_node/job_worker/executors/docker_types.py +1 -1
biolib/compute_node/job_worker/executors/types.py +6 -5
biolib/compute_node/job_worker/job_storage.py +2 -1
biolib/compute_node/job_worker/job_worker.py +155 -90
biolib/compute_node/job_worker/large_file_system.py +2 -6
biolib/compute_node/job_worker/network_alloc.py +99 -0
biolib/compute_node/job_worker/network_buffer.py +240 -0
biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
biolib/compute_node/remote_host_proxy.py +163 -79
biolib/compute_node/utils.py +2 -0
biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
biolib/compute_node/webserver/proxy_utils.py +28 -0
biolib/compute_node/webserver/webserver.py +64 -19
biolib/experiments/experiment.py +111 -16
biolib/jobs/job.py +128 -31
biolib/jobs/job_result.py +74 -34
biolib/jobs/types.py +1 -0
biolib/sdk/__init__.py +28 -3
biolib/typing_utils.py +1 -1
biolib/utils/cache_state.py +8 -5
biolib/utils/multipart_uploader.py +24 -18
biolib/utils/seq_util.py +1 -1
pybiolib-1.2.1890.dist-info/METADATA +41 -0
pybiolib-1.2.1890.dist-info/RECORD +177 -0
{pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
biolib/_internal/llm_instructions/.github/instructions/style-react-ts.instructions.md +0 -22
biolib/_internal/templates/init_template/.gitignore +0 -2
biolib/_internal/types/__init__.py +0 -6
biolib/_internal/types/resource.py +0 -18
biolib/biolib_download_container.py +0 -38
biolib/cli/download_container.py +0 -14
biolib/utils/app_uri.py +0 -57
pybiolib-1.2.883.dist-info/METADATA +0 -50
pybiolib-1.2.883.dist-info/RECORD +0 -148
pybiolib-1.2.883.dist-info/entry_points.txt +0 -3
/biolib/{_internal/llm_instructions → _index}/__init__.py +0 -0
/biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/general-app-knowledge.instructions.md +0 -0
/biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-python.instructions.md +0 -0
/biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_app_inputs.prompt.md +0 -0
/biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_run_apps.prompt.md +0 -0
/biolib/{_internal → _shared}/types/app.py +0 -0
/biolib/{_internal → _shared}/types/data_record.py +0 -0
/biolib/{_internal → _shared}/types/file_node.py +0 -0
/biolib/{_internal → _shared}/types/push.py +0 -0
/biolib/{_internal → _shared}/types/resource_permission.py +0 -0
/biolib/{_internal → _shared}/types/result.py +0 -0
/biolib/{_internal → _shared}/types/typing.py +0 -0
{pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info/licenses}/LICENSE +0 -0

biolib/app/app.py CHANGED Viewed

@@ -1,11 +1,16 @@
+import copy
 import io
 import json
 import os
+import posixpath
 import random
 import string
 from pathlib import Path
 from biolib import utils
+from biolib._internal.file_utils import path_to_renamed_path
+from biolib._runtime.runtime import Runtime
+from biolib._shared.utils import parse_resource_uri
 from biolib.api.client import ApiClient
 from biolib.biolib_api_client import JobState
 from biolib.biolib_api_client.app_types import App, AppVersion
@@ -18,13 +23,24 @@ from biolib.compute_node.job_worker.job_worker import JobWorker
 from biolib.experiments.experiment import Experiment
 from biolib.jobs.job import Result
 from biolib.typing_utils import Dict, Optional
-from biolib.utils.app_uri import parse_app_uri
-from biolib._runtime.runtime import Runtime
+class JsonStringIO(io.StringIO):
+    pass
 class BioLibApp:
-    def __init__(self, uri: str, _api_client: Optional[ApiClient] = None, suppress_version_warning: bool = False):
+    def __init__(
+        self,
+        uri: str,
+        _api_client: Optional[ApiClient] = None,
+        suppress_version_warning: bool = False,
+        _experiment: Optional[str] = None,
+    ):
         self._api_client: Optional[ApiClient] = _api_client
+        self._experiment = _experiment
+        self._input_uri = uri
+        self._parsed_input_uri = parse_resource_uri(uri)
         app_response = BiolibAppApi.get_by_uri(uri=uri, api_client=self._api_client)
         self._app: App = app_response['app']
@@ -32,16 +48,19 @@ class BioLibApp:
         self._app_version: AppVersion = app_response['app_version']
         if not suppress_version_warning:
-            parsed_uri = parse_app_uri(uri)
-            if parsed_uri['version'] is None:
+            if self._parsed_input_uri['version'] is None:
                 if Runtime.check_is_environment_biolib_app():
                     logger.warning(
                         f"No version specified in URI '{uri}'. This will use the default version, "
-                        f"which may change behaviour over time. Consider locking down the exact version, "
+                        f'which may change behaviour over time. Consider locking down the exact version, '
                         f"e.g. '{uri}:1.2.3'"
                     )
-        logger.info(f'Loaded project {self._app_uri}')
+        if self._parsed_input_uri['tag']:
+            semantic_version = f"{self._app_version['major']}.{self._app_version['minor']}.{self._app_version['patch']}"
+            logger.info(f'Loaded {self._input_uri} (resolved to {semantic_version})')
+        else:
+            logger.info(f'Loaded {self._app_uri}')
     def __str__(self) -> str:
         return self._app_uri
@@ -70,7 +89,7 @@ class BioLibApp:
         result_prefix: Optional[str] = None,
         timeout: Optional[int] = None,
         notify: bool = False,
-        machine_count: Optional[int] = None,
+        max_workers: Optional[int] = None,
         experiment: Optional[str] = None,
         temporary_client_secrets: Optional[Dict[str, str]] = None,
         check: bool = False,
@@ -83,22 +102,18 @@ class BioLibApp:
             raise ValueError('The argument "check" cannot be True when blocking is False')
         if not experiment_id:
-            experiment_instance = Experiment(experiment) if experiment else Experiment.get_experiment_in_context()
+            experiment_to_use = experiment if experiment is not None else self._experiment
+            experiment_instance: Optional[Experiment]
+            if experiment_to_use:
+                experiment_instance = Experiment(experiment_to_use, _api_client=self._api_client)
+            else:
+                experiment_instance = Experiment.get_experiment_in_context()
             experiment_id = experiment_instance.uuid if experiment_instance else None
         module_input_serialized = self._get_serialized_module_input(args, stdin, files)
         if machine == 'local':
-            if not blocking:
-                raise BioLibError('The argument "blocking" cannot be False when running locally')
-            if experiment_id:
-                logger.warning('The argument "experiment_id" is ignored when running locally')
-            if result_prefix:
-                logger.warning('The argument "result_prefix" is ignored when running locally')
-            return self._run_locally(module_input_serialized)
+            raise BioLibError('Running applications locally with machine="local" is no longer supported.')
         job = Result._start_job_in_cloud(  # pylint: disable=protected-access
             app_uri=self._app_uri,
@@ -110,11 +125,12 @@ class BioLibApp:
             override_command=override_command,
             result_prefix=result_prefix,
             timeout=timeout,
-            requested_machine_count=machine_count,
+            requested_machine_count=max_workers,
             temporary_client_secrets=temporary_client_secrets,
             api_client=self._api_client,
         )
-        logger.info(f'View the result in your browser at: {utils.BIOLIB_BASE_URL}/results/{job.id}/')
+        if utils.IS_RUNNING_IN_NOTEBOOK:
+            logger.info(f'View the result in your browser at: {utils.BIOLIB_BASE_URL}/results/{job.id}/')
         if blocking:
             # TODO: Deprecate utils.STREAM_STDOUT and always stream logs by simply calling job.stream_logs()
             if utils.IS_RUNNING_IN_NOTEBOOK:
@@ -151,6 +167,8 @@ Example: "app.cli('--help')"
     def _get_serialized_module_input(args=None, stdin=None, files=None) -> bytes:
         if args is None:
             args = []
+        else:
+            args = copy.copy(args)
         if stdin is None:
             stdin = b''
@@ -168,21 +186,72 @@ Example: "app.cli('--help')"
             files = []
         files_dict = {}
+        if isinstance(files, list):
+            for file_path in files:
+                path = Path(file_path)
+                if path.is_dir():
+                    renamed_dir = path_to_renamed_path(file_path)
+                    for filename in path.rglob('*'):
+                        if filename.is_dir():
+                            continue
+                        with open(filename, 'rb') as f:
+                            relative_to_dir = filename.resolve().relative_to(path.resolve())
+                            files_dict[posixpath.join(renamed_dir, relative_to_dir.as_posix())] = f.read()
+                else:
+                    with open(path, 'rb') as f:
+                        files_dict[path_to_renamed_path(str(path))] = f.read()
+        elif isinstance(files, dict):
+            files_dict = {}
+            for key, value in files.items():
+                if '//' in key:
+                    raise BioLibError(f"File path '{key}' contains double slashes which are not allowed")
+                if not key.startswith('/'):
+                    key = '/' + key
+                files_dict[key] = value
+        else:
+            raise Exception('The given files input must be list or dict or None')
         for idx, arg in enumerate(args):
             if isinstance(arg, str):
                 if os.path.isfile(arg) or os.path.isdir(arg):
-                    files.append(arg)
-                    args[idx] = Path(arg).name
+                    if os.path.isfile(arg):
+                        with open(arg, 'rb') as f:
+                            files_dict[path_to_renamed_path(arg)] = f.read()
+                    elif os.path.isdir(arg):
+                        path = Path(arg)
+                        renamed_dir = path_to_renamed_path(arg)
+                        for filename in path.rglob('*'):
+                            if filename.is_dir():
+                                continue
+                            with open(filename, 'rb') as f:
+                                relative_to_dir = filename.resolve().relative_to(path.resolve())
+                                files_dict[posixpath.join(renamed_dir, relative_to_dir.as_posix())] = f.read()
+                    args[idx] = path_to_renamed_path(arg, prefix_with_slash=False)
                 # support --myarg=file.txt
                 elif os.path.isfile(arg.split('=')[-1]) or os.path.isdir(arg.split('=')[-1]):
-                    files.append(arg.split('=')[-1])
-                    args[idx] = arg.split('=')[0] + '=' + Path(arg.split('=')[-1]).name
+                    file_path = arg.split('=')[-1]
+                    if os.path.isfile(file_path):
+                        with open(file_path, 'rb') as f:
+                            files_dict[path_to_renamed_path(file_path)] = f.read()
+                    elif os.path.isdir(file_path):
+                        path = Path(file_path)
+                        renamed_dir = path_to_renamed_path(file_path)
+                        for filename in path.rglob('*'):
+                            if filename.is_dir():
+                                continue
+                            with open(filename, 'rb') as f:
+                                relative_to_dir = filename.resolve().relative_to(path.resolve())
+                                files_dict[posixpath.join(renamed_dir, relative_to_dir.as_posix())] = f.read()
+                    args[idx] = arg.split('=')[0] + '=' + path_to_renamed_path(file_path, prefix_with_slash=False)
                 else:
                     pass  # a normal string arg was given
             else:
                 tmp_filename = f'input_{"".join(random.choices(string.ascii_letters + string.digits, k=7))}'
-                if isinstance(arg, io.StringIO):
+                if isinstance(arg, JsonStringIO):
+                    file_data = arg.getvalue().encode()
+                    tmp_filename += '.json'
+                elif isinstance(arg, io.StringIO):
                     file_data = arg.getvalue().encode()
                 elif isinstance(arg, io.BytesIO):
                     file_data = arg.getvalue()
@@ -191,33 +260,6 @@ Example: "app.cli('--help')"
                 files_dict[f'/{tmp_filename}'] = file_data
                 args[idx] = tmp_filename
-        if isinstance(files, list):
-            for file in files:
-                path = Path(file).absolute()
-                # Recursively add data from files if dir
-                if path.is_dir():
-                    for filename in path.rglob('*'):
-                        if filename.is_dir():
-                            continue
-                        file = open(filename, 'rb')
-                        relative_path = '/' + path.name + '/' + '/'.join(filename.relative_to(path).parts)
-                        files_dict[relative_path] = file.read()
-                        file.close()
-                # Add file data
-                else:
-                    file = open(path, 'rb')
-                    path_short = '/' + path.name
-                    files_dict[path_short] = file.read()
-                    file.close()
-        elif isinstance(files, dict):
-            files_dict.update(files)
-        else:
-            raise Exception('The given files input must be list or dict or None')
         module_input_serialized: bytes = ModuleInput().serialize(
             stdin=stdin,
             arguments=args,
@@ -228,7 +270,7 @@ Example: "app.cli('--help')"
     def _run_locally(self, module_input_serialized: bytes) -> Result:
         job_dict = BiolibJobApi.create(
             app_version_id=self._app_version['public_id'],
-            app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix'],
+            app_resource_name_prefix=parse_resource_uri(self._app_uri)['resource_prefix'],
         )
         job = Result(job_dict)
@@ -253,7 +295,7 @@ Example: "app.cli('--help')"
                 continue
             if isinstance(value, dict):
-                value = io.StringIO(json.dumps(value))
+                value = JsonStringIO(json.dumps(value))
             elif isinstance(value, (int, float)):  # Cast numeric values to strings
                 value = str(value)

biolib/biolib_api_client/api_client.py CHANGED Viewed

@@ -1,15 +1,13 @@
-import base64
-import binascii
-import json
 import os
 from datetime import datetime, timezone
 from json.decoder import JSONDecodeError
 from biolib._internal.http_client import HttpClient
+from biolib._internal.utils.auth import decode_jwt_without_checking_signature
 from biolib._runtime.runtime import Runtime
 from biolib.biolib_errors import BioLibError
 from biolib.biolib_logging import logger, logger_no_user_data
-from biolib.typing_utils import Any, Dict, Optional, TypedDict
+from biolib.typing_utils import Optional, TypedDict
 from .user_state import UserState
@@ -19,10 +17,6 @@ class UserTokens(TypedDict):
     refresh: str
-class JwtDecodeError(Exception):
-    pass
 class _ApiClient:
     def __init__(self, base_url: str, access_token: Optional[str] = None):
         self.base_url: str = base_url
@@ -60,7 +54,7 @@ class _ApiClient:
             return
         if self.access_token:
-            decoded_token = self.decode_jwt_without_checking_signature(self.access_token)
+            decoded_token = decode_jwt_without_checking_signature(self.access_token)
             if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60:  # 60 second buffer
                 # Token has not expired yet
                 return
@@ -132,44 +126,6 @@ class _ApiClient:
         self.access_token = json_response['access_token']
         self.refresh_token = json_response['refresh_token']
-    @staticmethod
-    def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
-        jwt_bytes = jwt.encode('utf-8')
-        try:
-            signing_input, _ = jwt_bytes.rsplit(b'.', 1)
-            header_segment, payload_segment = signing_input.split(b'.', 1)
-        except ValueError as error:
-            raise JwtDecodeError('Not enough segments') from error
-        try:
-            header_data = base64.urlsafe_b64decode(header_segment)
-        except (TypeError, binascii.Error) as error:
-            raise JwtDecodeError('Invalid header padding') from error
-        try:
-            header = json.loads(header_data)
-        except ValueError as error:
-            raise JwtDecodeError(f'Invalid header string: {error}') from error
-        if not isinstance(header, dict):
-            raise JwtDecodeError('Invalid header string: must be a json object')
-        try:
-            payload_data = base64.urlsafe_b64decode(payload_segment)
-        except (TypeError, binascii.Error) as error:
-            raise JwtDecodeError('Invalid payload padding') from error
-        try:
-            payload = json.loads(payload_data)
-        except ValueError as error:
-            raise JwtDecodeError(f'Invalid payload string: {error}') from error
-        if not isinstance(header, dict):
-            raise JwtDecodeError('Invalid payload string: must be a json object')
-        return dict(header=header, payload=payload)
 class BiolibApiClient:
     api_client: Optional[_ApiClient] = None

biolib/biolib_api_client/app_types.py CHANGED Viewed

@@ -78,6 +78,7 @@ class _Module(TypedDict):
     large_file_systems: List[LargeFileSystemMapping]
     name: str
     output_files_mappings: List[FilesMapping]
+    ports: List[int]
     source_files_mappings: List[FilesMapping]
     working_directory: str
@@ -90,7 +91,6 @@ class Module(_Module, total=False):
 class _AppVersionOnJob(TypedDict):
     created_at: str
     client_side_executable_zip: Optional[str]
-    consumes_stdin: bool
     is_runnable_by_user: bool
     public_id: str
     remote_hosts: List[RemoteHost]

biolib/biolib_api_client/biolib_app_api.py CHANGED Viewed

@@ -57,6 +57,22 @@ def _get_git_branch_name() -> str:
         return ''
+def _get_git_commit_hash() -> str:
+    try:
+        github_actions_commit_hash = os.getenv('GITHUB_SHA')
+        if github_actions_commit_hash:
+            return github_actions_commit_hash
+        gitlab_ci_commit_hash = os.getenv('CI_COMMIT_SHA')
+        if gitlab_ci_commit_hash:
+            return gitlab_ci_commit_hash
+        result = subprocess.run(['git', 'rev-parse', 'HEAD'], check=True, stdout=subprocess.PIPE, text=True)
+        return result.stdout.strip()
+    except BaseException:
+        return ''
 def _get_git_repository_url() -> str:
     try:
         result = subprocess.run(['git', 'remote', 'get-url', 'origin'], check=True, stdout=subprocess.PIPE, text=True)
@@ -64,16 +80,15 @@ def _get_git_repository_url() -> str:
     except BaseException:
         return ''
-def _get_app_uri_from_str(input_str: str) -> str:
+def _get_resource_uri_from_str(input_str: str) -> str:
     parsed_base_url = urllib.parse.urlparse(load_base_url_from_env())
     parsed_uri = urllib.parse.urlparse(input_str)
     if parsed_uri.netloc != '' and parsed_base_url.netloc != parsed_uri.netloc:
-        raise biolib_errors.ValidationError(
-            f'Invalid URI. The hostname "{parsed_base_url.netloc}" is not recognized.'
-        )
+        raise biolib_errors.ValidationError(f'Invalid URI. The hostname "{parsed_base_url.netloc}" is not recognized.')
     elif parsed_uri.netloc != '' and parsed_uri.path[1] != '@':
         uri = f'@{parsed_uri.netloc}{parsed_uri.path}'
-    elif parsed_uri.netloc == '' and parsed_uri.path.startswith (parsed_base_url.netloc):
+    elif parsed_uri.netloc == '' and parsed_uri.path.startswith(parsed_base_url.netloc):
         uri = f'@{parsed_uri.path}'
     else:
         uri = parsed_uri.path
@@ -86,7 +101,7 @@ def _get_app_uri_from_str(input_str: str) -> str:
 class BiolibAppApi:
     @staticmethod
     def get_by_uri(uri: str, api_client: Optional[ApiClient] = None) -> AppGetResponse:
-        uri = _get_app_uri_from_str(uri)
+        uri = _get_resource_uri_from_str(uri)
         api = api_client or biolib.api.client
         try:
             response = api.get(path='/app/', params={'uri': uri})
@@ -99,6 +114,15 @@ class BiolibAppApi:
             raise error
+    @staticmethod
+    def create_app(uri: str):
+        uri = _get_resource_uri_from_str(uri)
+        try:
+            response = biolib.api.client.post(path='/resources/apps/', data={'uri': uri})
+            return response.json()
+        except HttpError as error:
+            raise error
     @staticmethod
     def push_app_version(
         app_id,
@@ -116,6 +140,7 @@ class BiolibAppApi:
                 'state': 'published',
                 'app_version_id_to_copy_images_from': app_version_id_to_copy_images_from,
                 'git_branch_name': _get_git_branch_name(),
+                'git_commit_hash': _get_git_commit_hash(),
                 'git_repository_url': _get_git_repository_url(),
             }
             if semantic_version:

biolib/biolib_api_client/biolib_job_api.py CHANGED Viewed

@@ -25,7 +25,7 @@ def _get_user_info() -> Optional[str]:
     if utils.BASE_URL_IS_PUBLIC_BIOLIB:
         return None
-    enterprise_agent_info_opt_env_vars = ['DOMINO_STARTING_USERNAME', 'USER']
+    enterprise_agent_info_opt_env_vars = ['BIOLIB_OPT_USER', 'DOMINO_STARTING_USERNAME', 'USER']
     for env_var in enterprise_agent_info_opt_env_vars:
         env_var_value = os.getenv(env_var)

biolib/biolib_api_client/user_state.py CHANGED Viewed

@@ -1,6 +1,6 @@
+from biolib.biolib_logging import logger_no_user_data
+from biolib.typing_utils import Optional, TypedDict
 from biolib.utils.cache_state import CacheState
-from biolib.typing_utils import TypedDict, Optional
 # TODO: Save job keys in the user state instead of a separate state file
 # UuidStr = str
@@ -15,6 +15,9 @@ class UserStateType(TypedDict):
 class UserState(CacheState[UserStateType]):
+    def __init__(self) -> None:
+        super().__init__(fail_fast_on_lock_acquire=True)
+        self._is_in_memory_only: bool = False
     @property
     def _state_path(self) -> str:
@@ -22,3 +25,32 @@ class UserState(CacheState[UserStateType]):
     def _get_default_state(self) -> UserStateType:
         return UserStateType(refresh_token=None)
+    def __enter__(self) -> UserStateType:
+        if self._is_in_memory_only:
+            if self._state is None:
+                self._state = self._get_default_state()
+            return self._state
+        try:
+            return super().__enter__()
+        except Exception as error:
+            logger_no_user_data.warning(
+                f'UserState: Could not access state file, continuing with in-memory state only. '
+                f'Login state will not persist across Python processes. Error: {error}'
+            )
+            self._is_in_memory_only = True
+            if self._state is None:
+                self._state = self._get_default_state()
+            return self._state
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        if self._is_in_memory_only:
+            return
+        try:
+            super().__exit__(exc_type, exc_val, exc_tb)
+        except Exception as error:
+            logger_no_user_data.warning(
+                f'UserState: Could not write state file. '
+                f'Login state will not persist across Python processes. Error: {error}'
+            )
+            self._is_in_memory_only = True

biolib/biolib_binary_format/module_input.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from biolib.biolib_binary_format.base_bbf_package import BioLibBinaryFormatBasePackage
+from biolib.biolib_logging import logger
 from biolib.typing_utils import TypedDict, Dict, List
@@ -14,6 +15,10 @@ class ModuleInput(BioLibBinaryFormatBasePackage):
         self.package_type = 1
     def serialize(self, stdin, arguments, files) -> bytes:
+        for path in files.keys():
+            if '//' in path:
+                raise ValueError(f"File path '{path}' contains double slashes which are not allowed")
         bbf_data = bytearray()
         bbf_data.extend(self.version.to_bytes(1, 'big'))
         bbf_data.extend(self.package_type.to_bytes(1, 'big'))
@@ -67,6 +72,9 @@ class ModuleInput(BioLibBinaryFormatBasePackage):
             data_len = self.get_data(8, output_type='int')
             path = self.get_data(path_len, output_type='str')
             data = self.get_data(data_len)
+            if '//' in path:
+                # TODO: Raise ValueError here once backwards compatibility period is over
+                logger.warning(f"File path '{path}' contains double slashes which are not allowed")
             files[path] = bytes(data)
         return ModuleInputDict(stdin=stdin, arguments=arguments, files=files)

biolib/biolib_binary_format/remote_endpoints.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
 from biolib.biolib_binary_format.utils import RemoteEndpoint
@@ -17,13 +17,13 @@ class RemoteJobStorageEndpoint(RemoteEndpoint):
         self._storage_type: Literal['input', 'output'] = storage_type
     def get_remote_url(self):
-        if not self._presigned_url or datetime.utcnow() > self._expires_at:
+        if not self._presigned_url or not self._expires_at or datetime.now(timezone.utc) > self._expires_at:
             self._presigned_url = BiolibJobApi.get_job_storage_download_url(
                 job_auth_token=self._job_auth_token,
                 job_uuid=self._job_uuid,
                 storage_type='results' if self._storage_type == 'output' else 'input',
             )
-            self._expires_at = datetime.utcnow() + timedelta(minutes=8)
+            self._expires_at = datetime.now(timezone.utc) + timedelta(minutes=8)
             # TODO: Use expires at from url
             # parsed_url = urlparse(self._presigned_url)
             # query_params = parse_qs(parsed_url.query)

biolib/biolib_binary_format/remote_stream_seeker.py CHANGED Viewed

@@ -1,45 +1,59 @@
 from biolib.biolib_binary_format.utils import IndexableBuffer
+from biolib.biolib_logging import logger
 from biolib.typing_utils import Iterable
 class StreamSeeker:
     def __init__(
-            self,
-            upstream_buffer: IndexableBuffer,
-            files_data_start: int,
-            files_data_end: int,
-            download_chunk_size_in_bytes: int,
+        self,
+        upstream_buffer: IndexableBuffer,
+        files_data_start: int,
+        files_data_end: int,
+        max_chunk_size: int,
     ):
         self._upstream_buffer = upstream_buffer
         self._files_data_end = files_data_end
-        self._download_chunk_size_in_bytes = download_chunk_size_in_bytes
+        self._max_chunk_size = max_chunk_size
         self._buffer_start = files_data_start
         self._buffer = bytearray()
-    def seek_and_read(self, file_start: int, file_length: int) -> Iterable[bytes]:
+    def seek_and_read(self, file_start: int, file_length: int, read_ahead_bytes: int = 0) -> Iterable[bytes]:
         assert file_start >= self._buffer_start
-        self._buffer = self._buffer[file_start - self._buffer_start:]  # Returns empty array if "out of bounds"
+        self._buffer = self._buffer[file_start - self._buffer_start :]
         self._buffer_start = file_start
         while True:
             file_byte_count_remaining = file_length - (self._buffer_start - file_start)
-            if file_byte_count_remaining == 0:
+            if file_byte_count_remaining <= 0:
                 return
-            start_of_fetch = self._buffer_start + len(self._buffer)
-            byte_count_left_in_stream = self._files_data_end - start_of_fetch
-            if byte_count_left_in_stream != 0:
-                # Only fetch if there is still data left upstream
-                if self._download_chunk_size_in_bytes > len(self._buffer):
-                    # Only fetch if size of buffer is below chunk size
-                    self._buffer.extend(self._upstream_buffer.get_data(
-                        start=start_of_fetch,
-                        length=min(byte_count_left_in_stream, self._download_chunk_size_in_bytes),
-                    ))
-            bytes_to_yield = self._buffer[:file_byte_count_remaining]  # Returns empty array if "out of bounds"
-            yield bytes_to_yield
-            self._buffer = self._buffer[file_byte_count_remaining:]  # Returns empty array if "out of bounds"
-            self._buffer_start += len(bytes_to_yield)
+            if len(self._buffer) > 0:
+                take = min(file_byte_count_remaining, len(self._buffer))
+                chunk = self._buffer[:take]
+                if chunk:
+                    yield chunk
+                self._buffer = self._buffer[take:]
+                self._buffer_start += take
+            else:
+                start_of_fetch = self._buffer_start + len(self._buffer)
+                bytes_left_in_stream = self._files_data_end - start_of_fetch
+                if bytes_left_in_stream <= 0:
+                    logger.error(
+                        'StreamSeeker: no bytes left upstream (start_of_fetch=%d, files_data_end=%d)',
+                        start_of_fetch,
+                        self._files_data_end,
+                    )
+                    return
+                fetch_size = min(self._max_chunk_size, file_byte_count_remaining + read_ahead_bytes)
+                if fetch_size > bytes_left_in_stream:
+                    logger.error(
+                        'StreamSeeker: fetch_size (%d) > bytes_left_in_stream (%d); clamping',
+                        fetch_size,
+                        bytes_left_in_stream,
+                    )
+                    fetch_size = bytes_left_in_stream
+                fetched_data = self._upstream_buffer.get_data(start=start_of_fetch, length=fetch_size)
+                self._buffer.extend(fetched_data)

biolib/biolib_logging.py CHANGED Viewed

@@ -62,7 +62,7 @@ def _get_no_user_data_logger() -> _BioLibLogger:
     # TODO: Simplify by refactoring to env BIOLIB_ENVIRONMENT_IS_CLOUD: boolean
     if os.getenv('BIOLIB_CLOUD_ENVIRONMENT', '').lower() == 'non-enclave':
-        handler = logging.FileHandler(filename='/tmp/biolib_no_user_data.log')
+        handler = logging.FileHandler(filename='/biolib/logs/biolib_no_user_data.log')
         formatter = logging.Formatter(_DEFAULT_LOGGER_FORMAT)
         handler.setFormatter(formatter)
         _logger_no_user_data.addHandler(handler)

biolib/cli/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ import click
 from biolib import utils
 from biolib.biolib_logging import logger, logger_no_user_data
-from biolib.cli import auth, data_record, download_container, init, lfs, push, run, runtime, sdk, start
+from biolib.cli import auth, data_record, index, init, lfs, push, run, runtime, sdk, start
 @click.version_option(version=utils.BIOLIB_PACKAGE_VERSION, prog_name='pybiolib')
@@ -23,7 +23,6 @@ def cli() -> None:
 cli.add_command(auth.login)
 cli.add_command(auth.logout)
 cli.add_command(auth.whoami)
-cli.add_command(download_container.download_container)
 cli.add_command(init.init)
 cli.add_command(lfs.lfs)
 cli.add_command(push.push)
@@ -31,6 +30,7 @@ cli.add_command(run.run)
 cli.add_command(runtime.runtime)
 cli.add_command(start.start)
 cli.add_command(data_record.data_record)
+cli.add_command(index.index)
 cli.add_command(sdk.sdk)
 # allow this script to be called without poetry in dev e.g. by an IDE debugger

pybiolib 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl

pybiolib 1.2.883py3-none-any.whl → 1.2.1890py3-none-any.whl