PyPI - pybiolib - Versions diffs - 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl - Mend

pybiolib 1.2.883py3-none-any.whl → 1.2.1890py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

biolib/__init__.py +33 -10
biolib/_data_record/data_record.py +220 -126
biolib/_index/index.py +55 -0
biolib/_index/query_result.py +103 -0
biolib/_internal/add_copilot_prompts.py +24 -11
biolib/_internal/add_gui_files.py +81 -0
biolib/_internal/data_record/__init__.py +1 -1
biolib/_internal/data_record/data_record.py +1 -18
biolib/_internal/data_record/push_data.py +65 -16
biolib/_internal/data_record/remote_storage_endpoint.py +18 -13
biolib/_internal/file_utils.py +48 -0
biolib/_internal/lfs/cache.py +4 -2
biolib/_internal/push_application.py +95 -24
biolib/_internal/runtime.py +2 -0
biolib/_internal/string_utils.py +13 -0
biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-general.instructions.md +5 -0
biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
biolib/_internal/templates/{init_template → github_workflow_template}/.github/workflows/biolib.yml +7 -2
biolib/_internal/templates/gitignore_template/.gitignore +10 -0
biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
biolib/_internal/templates/gui_template/App.tsx +53 -0
biolib/_internal/templates/gui_template/Dockerfile +27 -0
biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
biolib/_internal/templates/gui_template/index.css +5 -0
biolib/_internal/templates/gui_template/index.html +13 -0
biolib/_internal/templates/gui_template/index.tsx +10 -0
biolib/_internal/templates/gui_template/package.json +27 -0
biolib/_internal/templates/gui_template/tsconfig.json +24 -0
biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
biolib/_internal/templates/gui_template/vite.config.mts +10 -0
biolib/_internal/templates/init_template/.biolib/config.yml +1 -0
biolib/_internal/templates/init_template/Dockerfile +5 -1
biolib/_internal/templates/init_template/run.py +6 -15
biolib/_internal/templates/init_template/run.sh +1 -0
biolib/_internal/templates/templates.py +21 -1
biolib/_internal/utils/__init__.py +47 -0
biolib/_internal/utils/auth.py +46 -0
biolib/_internal/utils/job_url.py +33 -0
biolib/_internal/utils/multinode.py +12 -14
biolib/_runtime/runtime.py +15 -2
biolib/_session/session.py +7 -5
biolib/_shared/__init__.py +0 -0
biolib/_shared/types/__init__.py +74 -0
biolib/_shared/types/account.py +12 -0
biolib/_shared/types/account_member.py +8 -0
biolib/{_internal → _shared}/types/experiment.py +1 -0
biolib/_shared/types/resource.py +37 -0
biolib/_shared/types/resource_deploy_key.py +11 -0
biolib/{_internal → _shared}/types/resource_version.py +8 -2
biolib/_shared/types/user.py +19 -0
biolib/_shared/utils/__init__.py +7 -0
biolib/_shared/utils/resource_uri.py +75 -0
biolib/api/client.py +5 -48
biolib/app/app.py +97 -55
biolib/biolib_api_client/api_client.py +3 -47
biolib/biolib_api_client/app_types.py +1 -1
biolib/biolib_api_client/biolib_app_api.py +31 -6
biolib/biolib_api_client/biolib_job_api.py +1 -1
biolib/biolib_api_client/user_state.py +34 -2
biolib/biolib_binary_format/module_input.py +8 -0
biolib/biolib_binary_format/remote_endpoints.py +3 -3
biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
biolib/biolib_logging.py +1 -1
biolib/cli/__init__.py +2 -2
biolib/cli/auth.py +4 -16
biolib/cli/data_record.py +82 -0
biolib/cli/index.py +32 -0
biolib/cli/init.py +393 -71
biolib/cli/lfs.py +1 -1
biolib/cli/run.py +9 -6
biolib/cli/start.py +14 -1
biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
biolib/compute_node/job_worker/executors/docker_types.py +1 -1
biolib/compute_node/job_worker/executors/types.py +6 -5
biolib/compute_node/job_worker/job_storage.py +2 -1
biolib/compute_node/job_worker/job_worker.py +155 -90
biolib/compute_node/job_worker/large_file_system.py +2 -6
biolib/compute_node/job_worker/network_alloc.py +99 -0
biolib/compute_node/job_worker/network_buffer.py +240 -0
biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
biolib/compute_node/remote_host_proxy.py +163 -79
biolib/compute_node/utils.py +2 -0
biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
biolib/compute_node/webserver/proxy_utils.py +28 -0
biolib/compute_node/webserver/webserver.py +64 -19
biolib/experiments/experiment.py +111 -16
biolib/jobs/job.py +128 -31
biolib/jobs/job_result.py +74 -34
biolib/jobs/types.py +1 -0
biolib/sdk/__init__.py +28 -3
biolib/typing_utils.py +1 -1
biolib/utils/cache_state.py +8 -5
biolib/utils/multipart_uploader.py +24 -18
biolib/utils/seq_util.py +1 -1
pybiolib-1.2.1890.dist-info/METADATA +41 -0
pybiolib-1.2.1890.dist-info/RECORD +177 -0
{pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
biolib/_internal/llm_instructions/.github/instructions/style-react-ts.instructions.md +0 -22
biolib/_internal/templates/init_template/.gitignore +0 -2
biolib/_internal/types/__init__.py +0 -6
biolib/_internal/types/resource.py +0 -18
biolib/biolib_download_container.py +0 -38
biolib/cli/download_container.py +0 -14
biolib/utils/app_uri.py +0 -57
pybiolib-1.2.883.dist-info/METADATA +0 -50
pybiolib-1.2.883.dist-info/RECORD +0 -148
pybiolib-1.2.883.dist-info/entry_points.txt +0 -3
/biolib/{_internal/llm_instructions → _index}/__init__.py +0 -0
/biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/general-app-knowledge.instructions.md +0 -0
/biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-python.instructions.md +0 -0
/biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_app_inputs.prompt.md +0 -0
/biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_run_apps.prompt.md +0 -0
/biolib/{_internal → _shared}/types/app.py +0 -0
/biolib/{_internal → _shared}/types/data_record.py +0 -0
/biolib/{_internal → _shared}/types/file_node.py +0 -0
/biolib/{_internal → _shared}/types/push.py +0 -0
/biolib/{_internal → _shared}/types/resource_permission.py +0 -0
/biolib/{_internal → _shared}/types/result.py +0 -0
/biolib/{_internal → _shared}/types/typing.py +0 -0
{pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info/licenses}/LICENSE +0 -0

biolib/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# ruff: noqa: I001
 # Imports to hide
 import os
 from urllib.parse import urlparse as _urlparse
@@ -15,6 +16,7 @@ from biolib.jobs.job import Result as _Result
 from biolib import user as _user
 from biolib.typing_utils import List, Optional, cast as _cast
 from biolib._data_record.data_record import DataRecord as _DataRecord
+from biolib._internal.utils.job_url import parse_result_id_or_url as _parse_result_id_or_url
 import biolib.api
 import biolib.app
@@ -22,7 +24,6 @@ import biolib.cli
 import biolib.sdk
 import biolib.utils
 # ------------------------------------ Function definitions for public Python API ------------------------------------
@@ -83,43 +84,65 @@ def search(
 def get_job(job_id: str, job_token: Optional[str] = None) -> _Result:
-    r"""Get a job by its ID.
+    r"""Get a job by its ID or full URL.
     Args:
-        job_id (str): The UUID of the job to retrieve
+        job_id (str): The UUID of the job to retrieve, or a full URL to the job.
+            Can be either:
+            - Job UUID (e.g., 'abc123')
+            - Full URL (e.g., 'https://biolib.com/result/abc123/?token=xyz789')
+            - Full URL with token parameter (e.g., 'biolib.com/result/abc123/token=xyz789')
         job_token (str, optional): Authentication token for accessing the job.
             Only needed for jobs that aren't owned by the current user.
+            If the URL contains a token, this parameter is ignored.
     Returns:
         Job: The job object
     Example::
+        >>> # Get by UUID
         >>> job = biolib.get_job('abc123')
-        >>> # Access shared job
+        >>> # Get with explicit token
         >>> job = biolib.get_job('abc123', job_token='xyz789')
+        >>> # Get by full URL with token
+        >>> job = biolib.get_job('https://biolib.com/result/abc123/?token=xyz789')
+        >>> # Get by URL with inline token format
+        >>> job = biolib.get_job('biolib.com/result/abc123/token=xyz789')
     """
-    return _Result.create_from_uuid(uuid=job_id, auth_token=job_token)
+    uuid, token = _parse_result_id_or_url(job_id, job_token)
+    return _Result.create_from_uuid(uuid=uuid, auth_token=token)
 def get_result(result_id: str, result_token: Optional[str] = None) -> _Result:
-    r"""Get a result by its ID.
+    r"""Get a result by its ID or full URL.
     Args:
-        result_id (str): The UUID of the result to retrieve
+        result_id (str): The UUID of the result to retrieve, or a full URL to the result.
+            Can be either:
+            - Result UUID (e.g., 'abc123')
+            - Full URL (e.g., 'https://biolib.com/result/abc123/?token=xyz789')
+            - Full URL with token parameter (e.g., 'biolib.com/result/abc123/token=xyz789')
         result_token (str, optional): Authentication token for accessing the result.
-            Only needed for result that aren't owned by the current user.
+            Only needed for results that aren't owned by the current user.
+            If the URL contains a token, this parameter is ignored.
     Returns:
         Result: The result object
     Example::
+        >>> # Get by UUID
         >>> result = biolib.get_result('abc123')
-        >>> # Access shared result
+        >>> # Get with explicit token
         >>> result = biolib.get_result('abc123', result_token='xyz789')
+        >>> # Get by full URL with token
+        >>> result = biolib.get_result('https://biolib.com/result/abc123/?token=xyz789')
+        >>> # Get by URL with inline token format
+        >>> result = biolib.get_result('biolib.com/result/abc123/token=xyz789')
     """
-    return _Result.create_from_uuid(uuid=result_id, auth_token=result_token)
+    uuid, token = _parse_result_id_or_url(result_id, result_token)
+    return _Result.create_from_uuid(uuid=uuid, auth_token=token)
 def get_data_record(uri: str) -> _DataRecord:

biolib/_data_record/data_record.py CHANGED Viewed

@@ -3,52 +3,157 @@ from collections import namedtuple
 from datetime import datetime
 from pathlib import Path
 from struct import Struct
-from typing import Callable, Dict, Iterable, List, Optional, Union, cast
+from typing import Callable, Dict, Iterable, Iterator, List, Optional, Union, cast
 from biolib import api
-from biolib._internal import types
-from biolib._internal.data_record import get_data_record_state_from_uri
 from biolib._internal.data_record.data_record import validate_sqlite_v1
 from biolib._internal.data_record.push_data import (
+    _upload_from_iterator,
     push_data_path,
     validate_data_path_and_get_files_and_size_of_directory,
 )
 from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
 from biolib._internal.http_client import HttpClient
-from biolib._internal.types.file_node import ZipFileNodeDict
+from biolib._shared import types
+from biolib._shared.types import ResourceDetailedDict, ResourceVersionDetailedDict, ZipFileNodeDict
+from biolib._shared.utils import parse_resource_uri
 from biolib.api import client as api_client
 from biolib.biolib_api_client import BiolibApiClient
-from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersion, DataRecordVersionInfo
+from biolib.biolib_api_client.biolib_app_api import _get_resource_uri_from_str
+from biolib.biolib_api_client.lfs_types import DataRecordInfo
 from biolib.biolib_binary_format import LazyLoadedFile
 from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
 from biolib.biolib_logging import logger
-from biolib.utils.app_uri import parse_app_uri
 PathFilter = Union[str, List[str], Callable[[str], bool]]
 class DataRecord:
-    def __init__(self, _internal_state: DataRecordVersionInfo):
+    def __init__(self, _internal_state: ResourceDetailedDict):
         self._state = _internal_state
     def __repr__(self):
-        return f'DataRecord: {self._state["resource_uri"]}'
+        return f'DataRecord: {self._state["uri"]}'
     @property
     def uri(self) -> str:
-        return self._state['resource_uri']
+        return self._state['uri']
     @property
     def uuid(self) -> str:
-        return self._state['resource_uuid']
+        return self._state['uuid']
     @property
     def name(self) -> str:
-        uri_parsed = parse_app_uri(self._state['resource_uri'], use_account_as_name_default=False)
-        if not uri_parsed['app_name']:
+        uri_parsed = parse_resource_uri(self._state['uri'], use_account_as_name_default=False)
+        if not uri_parsed['resource_name']:
             raise ValueError('Expected parameter "resource_uri" to contain resource name')
-        return uri_parsed['app_name']
+        return uri_parsed['resource_name']
+    @staticmethod
+    def get_by_uri(uri: str) -> 'DataRecord':
+        normalized_uri = _get_resource_uri_from_str(uri)
+        resource_dict: ResourceDetailedDict = api_client.get(path='/resource/', params={'uri': normalized_uri}).json()
+        if resource_dict['type'] != 'data-record':
+            raise Exception(f'Resource "{resource_dict["uri"]}" is not a Data Record')
+        return DataRecord(_internal_state=resource_dict)
+    @staticmethod
+    def create(destination: str, data_path: Optional[str] = None, record_type: Optional[str] = None) -> 'DataRecord':
+        BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Data Record')
+        if data_path is not None:
+            assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
+        uri_parsed = parse_resource_uri(destination, use_account_as_name_default=False)
+        if uri_parsed['resource_name_normalized']:
+            data_record_uri = destination
+        else:
+            record_name = 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
+            data_record_uri = f'{destination}/{record_name}'
+        response = api.client.post(
+            path='/resources/data-records/',
+            data={
+                'uri': data_record_uri,
+                'type': record_type,
+            },
+        )
+        data_record_info: DataRecordInfo = response.json()
+        logger.info(f"Successfully created new Data Record '{data_record_info['uri']}'")
+        data_record = DataRecord.get_by_uri(uri=data_record_info['uri'])
+        if data_path is not None:
+            data_record.update(data_path=data_path)
+        return data_record
+    @staticmethod
+    def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
+        # TODO: Simplify when backend exposes /api/resources/ instead of /api/apps/
+        max_page_size = 1_000
+        params: Dict[str, Union[str, int]] = {
+            'page_size': str(count or max_page_size),
+            'resource_type': 'data-record',
+        }
+        if uri:
+            uri_parsed = parse_resource_uri(uri, use_account_as_name_default=False)
+            params['account_handle'] = uri_parsed['account_handle_normalized']
+            if uri_parsed['resource_name_normalized']:
+                params['app_name'] = uri_parsed['resource_name_normalized']
+        results = api_client.get(path='/apps/', params=params).json()['results']
+        if count is None and len(results) == max_page_size:
+            logger.warning(
+                f'Fetch results exceeded maximum count of {max_page_size}. Some data records might not be fetched.'
+            )
+        return [
+            DataRecord(
+                _internal_state=ResourceDetailedDict(
+                    uri=result['resource_uri'],
+                    uuid=result['public_id'],
+                    name=result['name'],
+                    created_at=result['created_at'],
+                    type=result['type'],
+                    description=result['description'],
+                    account_uuid=result['account_id'],
+                    experiment=None,
+                )
+            )
+            for result in results
+        ]
+    @staticmethod
+    def clone(
+        source: 'DataRecord',
+        destination: 'DataRecord',
+        on_progress: Optional[Callable[[int, int], None]] = None,
+    ) -> 'DataRecord':
+        BiolibApiClient.assert_is_signed_in(authenticated_action_description='clone a Data Record')
+        # pylint: disable=protected-access
+        total_size_in_bytes = source._get_zip_size_bytes()
+        if total_size_in_bytes == 0:
+            raise ValueError('Source data record has no data to clone')
+        min_chunk_size_bytes = 10_000_000
+        chunk_size_in_bytes = max(min_chunk_size_bytes, int(total_size_in_bytes / 9_000))
+        zip_iterator = source._iter_zip_bytes(chunk_size_bytes=chunk_size_in_bytes)
+        new_resource_version_uuid = _upload_from_iterator(
+            resource_uuid=destination._state['uuid'],
+            payload_iterator=zip_iterator,
+            payload_size_in_bytes=total_size_in_bytes,
+            publish=True,
+            on_progress=on_progress,
+        )
+        # pylint: enable=protected-access
+        logger.info(f"Successfully cloned data to '{destination.uri}'")
+        return DataRecord._get_by_version_uuid(new_resource_version_uuid)
     def list_files(
         self,
@@ -71,9 +176,7 @@ class DataRecord:
         return files
     def download_zip(self, output_path: str):
-        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
-            resource_version_uuid=self._state['resource_version_uuid'],
-        )
+        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(uri=self.uri)
         HttpClient.request(url=remote_storage_endpoint.get_remote_url(), response_path=output_path)
     def download_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
@@ -113,126 +216,37 @@ class DataRecord:
                 else:
                     raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
-        response = api.client.post(path='/lfs/versions/', data={'resource_uuid': self._state['resource_uuid']})
-        data_record_version: DataRecordVersion = response.json()
-        resource_version_uuid = data_record_version['uuid']
-        push_data_path(
+        new_resource_version_uuid = push_data_path(
             data_path=data_path,
             data_size_in_bytes=data_size_in_bytes,
             files_to_zip=files_to_zip,
-            resource_version_uuid=resource_version_uuid,
+            resource_uuid=self._state['uuid'],
             chunk_size_in_mb=chunk_size_in_mb,
+            publish=True,
         )
-        api.client.patch(
-            path=f'/resources/versions/{resource_version_uuid}/',
-            data={'state': 'published', 'set_as_active': True},
-        )
+        updated_record = DataRecord._get_by_version_uuid(new_resource_version_uuid)
+        self._state = updated_record._state  # pylint: disable=protected-access
+        logger.info(f"Successfully pushed a new Data Record version '{self.uri}'")
-        logger.info(f"Successfully pushed a new Data Record version '{data_record_version['uri']}'")
-        self._state = get_data_record_state_from_uri(data_record_version['uri'])
+    def delete(self) -> None:
+        """Delete the data record.
-    @staticmethod
-    def get_by_uri(uri: str) -> 'DataRecord':
-        return DataRecord(_internal_state=get_data_record_state_from_uri(uri))
+        Example::
+            >>> record = DataRecord.get_by_uri("account/data-record")
+            >>> record.delete()
+        """
+        try:
+            api_client.delete(path=f'/apps/{self.uuid}/')
+            logger.info(f'Data record {self.uri} deleted')
+        except Exception as error:
+            raise Exception(f'Failed to delete data record {self.uri} due to: {error}') from error
     @staticmethod
-    def create(destination: str, data_path: Optional[str] = None, record_type: Optional[str] = None) -> 'DataRecord':
-        BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Data Record')
-        if data_path is not None:
-            assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
-        uri_parsed = parse_app_uri(destination, use_account_as_name_default=False)
-        if uri_parsed['app_name_normalized']:
-            data_record_uri = destination
-        else:
-            record_name = 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
-            data_record_uri = f'{destination}/{record_name}'
-        response = api.client.post(
-            path='/resources/data-records/',
-            data={
-                'uri': data_record_uri,
-                'type': record_type,
-            },
-        )
-        data_record_info: DataRecordInfo = response.json()
-        logger.info(f"Successfully created new Data Record '{data_record_info['uri']}'")
-        data_record = DataRecord.get_by_uri(uri=data_record_info['uri'])
-        if data_path is not None:
-            data_record.update(data_path=data_path)
-        return data_record
-    @staticmethod
-    def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
-        max_page_size = 1_000
-        params: Dict[str, Union[str, int]] = {
-            'page_size': str(count or max_page_size),
-            'resource_type': 'data-record',
-        }
-        if uri:
-            uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
-            params['account_handle'] = uri_parsed['account_handle_normalized']
-            if uri_parsed['app_name_normalized']:
-                params['app_name'] = uri_parsed['app_name_normalized']
-        results = api_client.get(path='/apps/', params=params).json()['results']
-        if count is None and len(results) == max_page_size:
-            logger.warning(
-                f'Fetch results exceeded maximum count of {max_page_size}. Some data records might not be fetched.'
-            )
-        return [
-            DataRecord(
-                _internal_state={
-                    'resource_uri': result['resource_uri'],
-                    'resource_uuid': result['public_id'],
-                    'resource_version_uuid': result['active_version'],
-                }
-            )
-            for result in results
-        ]
-    def _fetch_files(
-        self,
-        max_count: Optional[int],
-        path_filter: Optional[PathFilter] = None,
-    ) -> Iterable[LazyLoadedFile]:
-        if path_filter and not (isinstance(path_filter, (str, list)) or callable(path_filter)):
-            raise Exception('Expected path_filter to be a string, a list of strings or a function')
-        path_filters = (
-            [path_filter] if isinstance(path_filter, str) else path_filter if isinstance(path_filter, list) else []
-        )
-        resource_version_uuid = self._state['resource_version_uuid']
-        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(resource_version_uuid)
-        page: Optional[int] = 1
-        yielded_files: int = 0
-        while page:
-            response = api.client.post(
-                path=f'/proxy/files/data-record-versions/{resource_version_uuid}/query/',
-                data=dict(page=page, page_size=1_000, path_filters=path_filters),
-            ).json()
-            for file_node_dict in cast(List[ZipFileNodeDict], response['results']):
-                if file_node_dict['is_dir']:
-                    continue
-                if callable(path_filter) and not path_filter(file_node_dict['dir_path'] + file_node_dict['name']):
-                    continue
-                yield self._get_file(remote_storage_endpoint, file_node_dict)
-                yielded_files += 1
-                if max_count is not None and yielded_files >= max_count:
-                    page = None
-                    break
-            page = page + 1 if page is not None and response['page_count'] > page else None
+    def _get_by_version_uuid(version_uuid: str) -> 'DataRecord':
+        response = api.client.get(path=f'/lfs/versions/{version_uuid}/')
+        version_info = response.json()
+        return DataRecord.get_by_uri(version_info['uri'])
     @staticmethod
     def _get_file(
@@ -282,5 +296,85 @@ class DataRecord:
             start_func=file_start_func,
         )
+    def _get_version(self) -> ResourceVersionDetailedDict:
+        if 'version' not in self._state:
+            # Version might be missing in state if initialized from the fetch method (list of data records)
+            self._state = self.get_by_uri(self.uri)._state
+        version = self._state.get('version')
+        if version is None:
+            raise Exception(f'Data Record "{self._state["uri"]}" has no active version')
+        return version
+    def _fetch_files(
+        self,
+        max_count: Optional[int],
+        path_filter: Optional[PathFilter] = None,
+    ) -> Iterable[LazyLoadedFile]:
+        if path_filter and not (isinstance(path_filter, (str, list)) or callable(path_filter)):
+            raise Exception('Expected path_filter to be a string, a list of strings or a function')
+        path_filters = (
+            [path_filter] if isinstance(path_filter, str) else path_filter if isinstance(path_filter, list) else []
+        )
+        version = self._get_version()
+        resource_version_uuid = version['uuid']
+        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(uri=self.uri)
+        page: Optional[int] = 1
+        yielded_files: int = 0
+        while page:
+            response = api.client.post(
+                path=f'/proxy/files/data-record-versions/{resource_version_uuid}/query/',
+                data=dict(page=page, page_size=1_000, path_filters=path_filters),
+            ).json()
+            for file_node_dict in cast(List[ZipFileNodeDict], response['results']):
+                if file_node_dict['is_dir']:
+                    continue
+                if callable(path_filter) and not path_filter(file_node_dict['dir_path'] + file_node_dict['name']):
+                    continue
+                yield self._get_file(remote_storage_endpoint, file_node_dict)
+                yielded_files += 1
+                if max_count is not None and yielded_files >= max_count:
+                    page = None
+                    break
+            page = page + 1 if page is not None and response['page_count'] > page else None
     def _get_detailed_dict(self) -> types.DataRecordDetailedDict:
         return cast(types.DataRecordDetailedDict, api_client.get(f'/resources/data-records/{self.uuid}/').json())
+    def _get_zip_size_bytes(self) -> int:
+        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(uri=self.uri)
+        presigned_url = remote_storage_endpoint.get_remote_url()
+        response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
+        content_range = response.headers.get('Content-Range', '')
+        if not content_range or '/' not in content_range:
+            raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
+        total_size = int(content_range.split('/')[1])
+        return total_size
+    def _iter_zip_bytes(self, chunk_size_bytes: int) -> Iterator[bytes]:
+        remote_storage_endpoint = DataRecordRemoteStorageEndpoint(uri=self.uri)
+        presigned_url = remote_storage_endpoint.get_remote_url()
+        response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
+        content_range = response.headers.get('Content-Range', '')
+        if not content_range or '/' not in content_range:
+            raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
+        total_size = int(content_range.split('/')[1])
+        for start in range(0, total_size, chunk_size_bytes):
+            end = min(start + chunk_size_bytes - 1, total_size - 1)
+            presigned_url = remote_storage_endpoint.get_remote_url()
+            response = HttpClient.request(
+                url=presigned_url,
+                headers={'range': f'bytes={start}-{end}'},
+                timeout_in_seconds=300,
+            )
+            yield response.content

biolib/_index/index.py ADDED Viewed

@@ -0,0 +1,55 @@
+import json
+from typing import Any, Dict
+from biolib import api
+from biolib._shared.types import ResourceDetailedDict
+from biolib.biolib_api_client import BiolibApiClient
+from biolib.biolib_api_client.biolib_app_api import _get_resource_uri_from_str
+from biolib.biolib_logging import logger
+class Index:
+    def __init__(self, _internal_state: ResourceDetailedDict):
+        self._state = _internal_state
+    def __repr__(self) -> str:
+        return f'Index: {self._state["uri"]}'
+    @property
+    def uri(self) -> str:
+        return self._state['uri']
+    @property
+    def id(self) -> str:
+        return f'{self._state["account_uuid"]}.{self._state["uuid"]}'.replace('-', '_')
+    @staticmethod
+    def get_by_uri(uri: str) -> 'Index':
+        normalized_uri = _get_resource_uri_from_str(uri)
+        response: ResourceDetailedDict = api.client.get(path='/resource/', params={'uri': normalized_uri}).json()
+        if response['type'] != 'index':
+            raise Exception(f'Resource "{response["uri"]}" is not an Index')
+        return Index(_internal_state=response)
+    @staticmethod
+    def create(uri: str, config: Dict[str, Any]) -> str:
+        BiolibApiClient.assert_is_signed_in(authenticated_action_description='create an Index')
+        response = api.client.post(
+            path='/resources/indexes/',
+            data={
+                'uri': uri,
+                'index_config': config,
+            },
+        )
+        result = response.json()
+        created_uri: str = result['uri']
+        logger.info(f"Successfully created Index '{created_uri}'")
+        return created_uri
+    @staticmethod
+    def create_from_config_file(uri: str, config_path: str) -> str:
+        with open(config_path) as config_file:
+            index_config = json.load(config_file)
+        return Index.create(uri=uri, config=index_config)

biolib/_index/query_result.py ADDED Viewed

@@ -0,0 +1,103 @@
+import json
+from typing import Any, Dict, Iterator, List, Optional, Union
+from biolib import api
+from biolib._internal.http_client import HttpResponse
+from biolib._internal.utils import base64_encode_string
+from biolib._internal.utils.auth import decode_jwt_without_checking_signature
+from biolib._runtime.runtime import Runtime
+from biolib.biolib_api_client import BiolibApiClient
+from biolib.biolib_errors import BioLibError
+def _get_index_basic_auth_header() -> Optional[str]:
+    if Runtime.check_is_environment_biolib_app():
+        return None
+    deprecated_api_client = BiolibApiClient.get()
+    deprecated_api_client.refresh_access_token()
+    access_token = deprecated_api_client.access_token
+    if not access_token:
+        return None
+    decoded_token = decode_jwt_without_checking_signature(access_token)
+    user_uuid: Optional[str] = decoded_token['payload'].get('public_id')
+    if not user_uuid:
+        return None
+    normalized_user_uuid = user_uuid.replace('-', '_')
+    credentials = f'biolib_user|{normalized_user_uuid}:{access_token}'
+    return f'Basic {base64_encode_string(credentials)}'
+class IndexQueryResult:
+    """Result wrapper for index query responses."""
+    def __init__(self, response: HttpResponse, data_format: str):
+        self._response = response
+        self._data_format = data_format
+        self._json_data: Optional[Dict[str, Any]] = None
+        if data_format == 'json':
+            content = self._response.content
+            if content:
+                self._json_data = json.loads(content.decode('utf-8'))
+    def iter_rows(self) -> Iterator[Dict[str, Any]]:
+        """Return an iterator over the rows in the query result.
+        Returns:
+            Iterator[Dict[str, Any]]: An iterator yielding each row as a dictionary.
+        """
+        if self._json_data is None:
+            raise BioLibError('iter_rows() is only available when data_format is "json"')
+        return iter(self._json_data['data'])
+def query_index(
+    query: str,
+    data: Optional[Union[List[Dict[str, Any]], bytes]] = None,
+    data_format: str = 'json',
+) -> IndexQueryResult:
+    """Query the BioLib index with a SQL-like query.
+    Args:
+        query: The SQL query string to execute.
+        data: Optional input data. If data_format is "json", this should be a list of
+            dictionaries that will be JSON encoded. Otherwise, pass raw bytes.
+        data_format: The format for the query. Defaults to "json".
+    Returns:
+        IndexQueryResult: A result object wrapping the query response.
+    Raises:
+        BioLibError: If the query fails or returns a non-successful HTTP status code.
+    """
+    data_format = data_format.lower()
+    params: Dict[str, Union[str, int]] = {'default_format': data_format.upper()}
+    if data is not None:
+        params['query'] = query
+    if data is not None:
+        if data_format == 'json':
+            body: bytes = '\n'.join(json.dumps(item, ensure_ascii=False) for item in data).encode('utf-8')
+        else:
+            body = data  # type: ignore[assignment]
+    else:
+        body = query.encode('utf-8')
+    response = api.client.post(
+        path='proxy/index',
+        data=body,
+        params=params,
+        headers={
+            'Content-Type': 'text/plain; charset=utf-8',
+            'Authorization': _get_index_basic_auth_header(),
+        },
+        authenticate=False,
+    )
+    if response.status_code < 200 or response.status_code >= 300:
+        raise BioLibError(f'Index query failed with status code {response.status_code}: {response.text}')
+    return IndexQueryResult(response, data_format)

pybiolib 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl

pybiolib 1.2.883py3-none-any.whl → 1.2.1890py3-none-any.whl