pybiolib 1.1.1730__tar.gz → 1.1.1990__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/PKG-INFO +1 -2
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/__init__.py +8 -2
- pybiolib-1.1.1990/biolib/_internal/data_record/__init__.py +1 -0
- pybiolib-1.1.1990/biolib/_internal/data_record/data_record.py +166 -0
- pybiolib-1.1.1990/biolib/_internal/data_record/remote_storage_endpoint.py +27 -0
- pybiolib-1.1.1990/biolib/_internal/fuse_mount/__init__.py +1 -0
- pybiolib-1.1.1990/biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/_internal/http_client.py +42 -23
- pybiolib-1.1.1990/biolib/_internal/libs/__init__.py +1 -0
- pybiolib-1.1.1990/biolib/_internal/libs/fusepy/__init__.py +1257 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/_internal/push_application.py +22 -37
- pybiolib-1.1.1990/biolib/_internal/runtime.py +96 -0
- pybiolib-1.1.1990/biolib/_internal/utils/__init__.py +18 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/app/app.py +38 -72
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/app/search_apps.py +8 -12
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/api_client.py +14 -9
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/app_types.py +2 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/auth.py +0 -12
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/biolib_app_api.py +5 -8
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/job_types.py +2 -1
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/remote_endpoints.py +12 -10
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/utils.py +41 -4
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/__init__.py +6 -2
- pybiolib-1.1.1990/biolib/cli/auth.py +58 -0
- pybiolib-1.1.1990/biolib/cli/data_record.py +43 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/download_container.py +3 -1
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/init.py +1 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/lfs.py +39 -9
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/run.py +3 -2
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/start.py +1 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/cloud_utils/cloud_utils.py +4 -3
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/cache_state.py +1 -1
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/docker_executor.py +126 -112
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_storage.py +3 -4
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_worker.py +34 -15
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/remote_host_proxy.py +67 -29
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/worker_thread.py +2 -2
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/experiments/experiment.py +29 -30
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/jobs/job.py +119 -43
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/jobs/job_result.py +16 -16
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/jobs/types.py +1 -1
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/lfs/__init__.py +0 -2
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/lfs/utils.py +23 -107
- pybiolib-1.1.1990/biolib/runtime/__init__.py +13 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/sdk/__init__.py +17 -4
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/user/sign_in.py +8 -12
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/__init__.py +1 -1
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/app_uri.py +11 -4
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/cache_state.py +2 -2
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/multipart_uploader.py +23 -16
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/seq_util.py +29 -13
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/zip/remote_zip.py +9 -17
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/pyproject.toml +14 -12
- pybiolib-1.1.1730/biolib/biolib_api_client/biolib_account_api.py +0 -8
- pybiolib-1.1.1730/biolib/biolib_api_client/biolib_large_file_system_api.py +0 -34
- pybiolib-1.1.1730/biolib/runtime/__init__.py +0 -1
- pybiolib-1.1.1730/biolib/runtime/results.py +0 -20
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/LICENSE +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/README.md +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/_internal/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/api/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/api/client.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/app/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/biolib_job_api.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/common_types.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/lfs_types.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/user_state.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/base_bbf_package.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/file_in_container.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/module_input.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/module_output_v2.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/remote_stream_seeker.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/saved_job.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/stdout_and_stderr.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/system_exception.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/system_status_update.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_docker_client/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_download_container.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_errors.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_logging.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/push.py +1 -1
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/runtime.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/.gitignore +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/cloud_utils/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/cache_types.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/docker_image_cache.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/docker_types.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/tars/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/types.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/large_file_system.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/mappings.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/utilization_reporter_thread.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/utils.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/socker_listener_thread.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/socket_sender_thread.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/utils.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/gunicorn_flask_application.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/webserver.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/webserver_types.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/webserver_utils.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/experiments/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/experiments/types.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/jobs/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/lfs/cache.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/tables.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/templates/__init__.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/templates/example_app.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/typing_utils.py +0 -0
- {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/user/__init__.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: pybiolib
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.1990
|
4
4
|
Summary: BioLib Python Client
|
5
5
|
Home-page: https://github.com/biolib
|
6
6
|
License: MIT
|
@@ -25,7 +25,6 @@ Requires-Dist: flask (>=2.0.1) ; extra == "compute-node"
|
|
25
25
|
Requires-Dist: gunicorn (>=20.1.0) ; extra == "compute-node"
|
26
26
|
Requires-Dist: importlib-metadata (>=1.6.1)
|
27
27
|
Requires-Dist: pyyaml (>=5.3.1)
|
28
|
-
Requires-Dist: requests (>=2.25.1)
|
29
28
|
Requires-Dist: rich (>=12.4.4)
|
30
29
|
Requires-Dist: typing_extensions (>=3.10.0) ; python_version < "3.8"
|
31
30
|
Description-Content-Type: text/markdown
|
@@ -13,10 +13,12 @@ from biolib.biolib_api_client import BiolibApiClient as _BioLibApiClient, App
|
|
13
13
|
from biolib.jobs import Job as _Job
|
14
14
|
from biolib import user as _user
|
15
15
|
from biolib.typing_utils import List, Optional
|
16
|
+
from biolib._internal.data_record import DataRecord as _DataRecord
|
16
17
|
|
17
18
|
import biolib.api
|
18
19
|
import biolib.app
|
19
20
|
import biolib.cli
|
21
|
+
import biolib.sdk
|
20
22
|
import biolib.utils
|
21
23
|
|
22
24
|
|
@@ -33,8 +35,8 @@ def load(uri: str) -> _BioLibApp:
|
|
33
35
|
def search(
|
34
36
|
search_query: Optional[str] = None,
|
35
37
|
team: Optional[str] = None,
|
36
|
-
count: int = 100
|
37
|
-
|
38
|
+
count: int = 100,
|
39
|
+
) -> List[str]:
|
38
40
|
apps: List[str] = search_apps(search_query, team, count)
|
39
41
|
return apps
|
40
42
|
|
@@ -47,6 +49,10 @@ def fetch_jobs(count: int = 25) -> List[_Job]:
|
|
47
49
|
return _Job.fetch_jobs(count)
|
48
50
|
|
49
51
|
|
52
|
+
def fetch_data_records(uri: Optional[str] = None, count: Optional[int] = None) -> List[_DataRecord]:
|
53
|
+
return _DataRecord.fetch(uri, count)
|
54
|
+
|
55
|
+
|
50
56
|
def get_experiment(name: str) -> Experiment:
|
51
57
|
return Experiment(name)
|
52
58
|
|
@@ -0,0 +1 @@
|
|
1
|
+
from .data_record import DataRecord
|
@@ -0,0 +1,166 @@
|
|
1
|
+
import os
|
2
|
+
from collections import namedtuple
|
3
|
+
from datetime import datetime
|
4
|
+
from fnmatch import fnmatch
|
5
|
+
from struct import Struct
|
6
|
+
from typing import Callable, Dict, List, Optional, Union, cast
|
7
|
+
|
8
|
+
from biolib import lfs
|
9
|
+
from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
|
10
|
+
from biolib._internal.http_client import HttpClient
|
11
|
+
from biolib.api import client as api_client
|
12
|
+
from biolib.biolib_api_client import AppGetResponse
|
13
|
+
from biolib.biolib_binary_format import LazyLoadedFile
|
14
|
+
from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
|
15
|
+
from biolib.biolib_logging import logger
|
16
|
+
from biolib.utils.app_uri import parse_app_uri
|
17
|
+
from biolib.utils.zip.remote_zip import RemoteZip # type: ignore
|
18
|
+
|
19
|
+
PathFilter = Union[str, Callable[[str], bool]]
|
20
|
+
|
21
|
+
|
22
|
+
class DataRecord:
|
23
|
+
def __init__(self, uri: str):
|
24
|
+
self._uri = uri
|
25
|
+
uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
|
26
|
+
if not uri_parsed['app_name']:
|
27
|
+
raise ValueError('Expected parameter "uri" to contain resource name')
|
28
|
+
|
29
|
+
self._name = uri_parsed['app_name']
|
30
|
+
|
31
|
+
def __repr__(self):
|
32
|
+
return f'DataRecord: {self._uri}'
|
33
|
+
|
34
|
+
@property
|
35
|
+
def uri(self) -> str:
|
36
|
+
return self._uri
|
37
|
+
|
38
|
+
@property
|
39
|
+
def name(self) -> str:
|
40
|
+
return self._name
|
41
|
+
|
42
|
+
def list_files(self, path_filter: Optional[PathFilter] = None) -> List[LazyLoadedFile]:
|
43
|
+
app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
|
44
|
+
remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
|
45
|
+
resource_version_uuid=app_response['app_version']['public_id'],
|
46
|
+
)
|
47
|
+
files: List[LazyLoadedFile] = []
|
48
|
+
with RemoteZip(url=remote_storage_endpoint.get_remote_url()) as remote_zip:
|
49
|
+
central_directory = remote_zip.get_central_directory()
|
50
|
+
for file_info in central_directory.values():
|
51
|
+
files.append(self._get_file(remote_storage_endpoint, file_info))
|
52
|
+
|
53
|
+
return self._get_filtered_files(files=files, path_filter=path_filter) if path_filter else files
|
54
|
+
|
55
|
+
def download_zip(self, output_path: str):
|
56
|
+
app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
|
57
|
+
remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
|
58
|
+
resource_version_uuid=app_response['app_version']['public_id'],
|
59
|
+
)
|
60
|
+
HttpClient.request(url=remote_storage_endpoint.get_remote_url(), response_path=output_path)
|
61
|
+
|
62
|
+
def download_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
|
63
|
+
filtered_files = self.list_files(path_filter=path_filter)
|
64
|
+
|
65
|
+
if len(filtered_files) == 0:
|
66
|
+
logger.debug('No files to save')
|
67
|
+
return
|
68
|
+
|
69
|
+
for file in filtered_files:
|
70
|
+
file_path = os.path.join(output_dir, file.path)
|
71
|
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
72
|
+
with open(file_path, mode='wb') as file_handle:
|
73
|
+
for chunk in file.get_data_iterator():
|
74
|
+
file_handle.write(chunk)
|
75
|
+
|
76
|
+
def save_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
|
77
|
+
self.download_files(output_dir=output_dir, path_filter=path_filter)
|
78
|
+
|
79
|
+
@staticmethod
|
80
|
+
def create(destination: str, data_path: str, name: Optional[str] = None) -> 'DataRecord':
|
81
|
+
assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
|
82
|
+
record_name = name if name else 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
|
83
|
+
record_uri = lfs.create_large_file_system(lfs_uri=f'{destination}/{record_name}')
|
84
|
+
record_version_uri = lfs.push_large_file_system(lfs_uri=record_uri, input_dir=data_path)
|
85
|
+
return DataRecord(uri=record_version_uri)
|
86
|
+
|
87
|
+
@staticmethod
|
88
|
+
def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
|
89
|
+
max_page_size = 1_000
|
90
|
+
params: Dict[str, Union[str, int]] = {
|
91
|
+
'page_size': str(count or max_page_size),
|
92
|
+
'resource_type': 'data-record',
|
93
|
+
}
|
94
|
+
if uri:
|
95
|
+
uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
|
96
|
+
params['account_handle'] = uri_parsed['account_handle_normalized']
|
97
|
+
if uri_parsed['app_name_normalized']:
|
98
|
+
params['app_name'] = uri_parsed['app_name_normalized']
|
99
|
+
|
100
|
+
results = api_client.get(path='/apps/', params=params).json()['results']
|
101
|
+
if count is None and len(results) == max_page_size:
|
102
|
+
logger.warning(
|
103
|
+
f'Fetch results exceeded maximum count of {max_page_size}. Some data records might not be fetched.'
|
104
|
+
)
|
105
|
+
|
106
|
+
return [DataRecord(result['resource_uri']) for result in results]
|
107
|
+
|
108
|
+
@staticmethod
|
109
|
+
def _get_file(remote_storage_endpoint: DataRecordRemoteStorageEndpoint, file_info: Dict) -> LazyLoadedFile:
|
110
|
+
local_file_header_signature_bytes = b'\x50\x4b\x03\x04'
|
111
|
+
local_file_header_struct = Struct('<H2sHHHIIIHH')
|
112
|
+
LocalFileHeader = namedtuple(
|
113
|
+
'LocalFileHeader',
|
114
|
+
(
|
115
|
+
'version',
|
116
|
+
'flags',
|
117
|
+
'compression_raw',
|
118
|
+
'mod_time',
|
119
|
+
'mod_date',
|
120
|
+
'crc_32_expected',
|
121
|
+
'compressed_size_raw',
|
122
|
+
'uncompressed_size_raw',
|
123
|
+
'file_name_len',
|
124
|
+
'extra_field_len',
|
125
|
+
),
|
126
|
+
)
|
127
|
+
|
128
|
+
local_file_header_start = file_info['header_offset'] + len(local_file_header_signature_bytes)
|
129
|
+
local_file_header_end = local_file_header_start + local_file_header_struct.size
|
130
|
+
|
131
|
+
def file_start_func() -> int:
|
132
|
+
local_file_header_response = HttpClient.request(
|
133
|
+
url=remote_storage_endpoint.get_remote_url(),
|
134
|
+
headers={'range': f'bytes={local_file_header_start}-{local_file_header_end - 1}'},
|
135
|
+
timeout_in_seconds=300,
|
136
|
+
)
|
137
|
+
local_file_header = LocalFileHeader._make(
|
138
|
+
local_file_header_struct.unpack(local_file_header_response.content)
|
139
|
+
)
|
140
|
+
file_start: int = (
|
141
|
+
local_file_header_end + local_file_header.file_name_len + local_file_header.extra_field_len
|
142
|
+
)
|
143
|
+
return file_start
|
144
|
+
|
145
|
+
return LazyLoadedFile(
|
146
|
+
buffer=RemoteIndexableBuffer(endpoint=remote_storage_endpoint),
|
147
|
+
length=file_info['file_size'],
|
148
|
+
path=file_info['filename'],
|
149
|
+
start=None,
|
150
|
+
start_func=file_start_func,
|
151
|
+
)
|
152
|
+
|
153
|
+
@staticmethod
|
154
|
+
def _get_filtered_files(files: List[LazyLoadedFile], path_filter: PathFilter) -> List[LazyLoadedFile]:
|
155
|
+
if not (isinstance(path_filter, str) or callable(path_filter)):
|
156
|
+
raise Exception('Expected path_filter to be a string or a function')
|
157
|
+
|
158
|
+
if callable(path_filter):
|
159
|
+
return list(filter(lambda x: path_filter(x.path), files)) # type: ignore
|
160
|
+
|
161
|
+
glob_filter = cast(str, path_filter)
|
162
|
+
|
163
|
+
def _filter_function(file: LazyLoadedFile) -> bool:
|
164
|
+
return fnmatch(file.path, glob_filter)
|
165
|
+
|
166
|
+
return list(filter(_filter_function, files))
|
@@ -0,0 +1,27 @@
|
|
1
|
+
from datetime import datetime, timedelta
|
2
|
+
|
3
|
+
from biolib.api import client as api_client
|
4
|
+
from biolib.biolib_api_client.lfs_types import LargeFileSystemVersion
|
5
|
+
from biolib.biolib_binary_format.utils import RemoteEndpoint
|
6
|
+
from biolib.biolib_logging import logger
|
7
|
+
|
8
|
+
|
9
|
+
class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
|
10
|
+
def __init__(self, resource_version_uuid: str):
|
11
|
+
self._resource_version_uuid: str = resource_version_uuid
|
12
|
+
self._expires_at = None
|
13
|
+
self._presigned_url = None
|
14
|
+
|
15
|
+
def get_remote_url(self):
|
16
|
+
if not self._presigned_url or datetime.utcnow() > self._expires_at:
|
17
|
+
lfs_version: LargeFileSystemVersion = api_client.get(
|
18
|
+
path=f'/lfs/versions/{self._resource_version_uuid}/',
|
19
|
+
).json()
|
20
|
+
self._presigned_url = lfs_version['presigned_download_url']
|
21
|
+
self._expires_at = datetime.utcnow() + timedelta(minutes=8)
|
22
|
+
logger.debug(
|
23
|
+
f'DataRecord "{self._resource_version_uuid}" fetched presigned URL '
|
24
|
+
f'with expiry at {self._expires_at.isoformat()}'
|
25
|
+
)
|
26
|
+
|
27
|
+
return self._presigned_url
|
@@ -0,0 +1 @@
|
|
1
|
+
from .experiment_fuse_mount import ExperimentFuseMount
|
@@ -0,0 +1,209 @@
|
|
1
|
+
import errno
|
2
|
+
import os
|
3
|
+
import stat
|
4
|
+
from datetime import datetime, timezone
|
5
|
+
from time import time
|
6
|
+
|
7
|
+
from biolib._internal.libs.fusepy import FUSE, FuseOSError, Operations
|
8
|
+
from biolib.biolib_errors import BioLibError
|
9
|
+
from biolib.jobs import Job
|
10
|
+
from biolib.typing_utils import Dict, List, Optional, Tuple, TypedDict
|
11
|
+
|
12
|
+
|
13
|
+
class _AttributeDict(TypedDict):
|
14
|
+
st_atime: int
|
15
|
+
st_ctime: int
|
16
|
+
st_gid: int
|
17
|
+
st_mode: int
|
18
|
+
st_mtime: int
|
19
|
+
st_nlink: int
|
20
|
+
st_size: int
|
21
|
+
st_uid: int
|
22
|
+
|
23
|
+
|
24
|
+
_SUCCESS_CODE = 0
|
25
|
+
|
26
|
+
|
27
|
+
class ExperimentFuseMount(Operations):
|
28
|
+
def __init__(self, experiment):
|
29
|
+
self._experiment = experiment
|
30
|
+
self._job_names_map: Optional[Dict[str, Job]] = None
|
31
|
+
self._jobs_last_fetched_at: float = 0.0
|
32
|
+
self._mounted_at_epoch_seconds: int = int(time())
|
33
|
+
|
34
|
+
@staticmethod
|
35
|
+
def mount_experiment(experiment, mount_path: str) -> None:
|
36
|
+
FUSE(
|
37
|
+
operations=ExperimentFuseMount(experiment),
|
38
|
+
mountpoint=mount_path,
|
39
|
+
nothreads=True,
|
40
|
+
foreground=True,
|
41
|
+
allow_other=False,
|
42
|
+
)
|
43
|
+
|
44
|
+
def getattr(self, path: str, fh=None) -> _AttributeDict:
|
45
|
+
if path == '/':
|
46
|
+
return self._get_directory_attributes(timestamp_epoch_seconds=self._mounted_at_epoch_seconds)
|
47
|
+
|
48
|
+
job, path_in_job = self._parse_path(path)
|
49
|
+
job_finished_at_epoch_seconds: int = int(
|
50
|
+
datetime.fromisoformat(job.to_dict()['finished_at'].rstrip('Z')).replace(tzinfo=timezone.utc).timestamp()
|
51
|
+
)
|
52
|
+
|
53
|
+
if path_in_job == '/':
|
54
|
+
return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
|
55
|
+
|
56
|
+
try:
|
57
|
+
file = job.get_output_file(path_in_job)
|
58
|
+
return self._get_file_attributes(
|
59
|
+
timestamp_epoch_seconds=job_finished_at_epoch_seconds,
|
60
|
+
size_in_bytes=file.length,
|
61
|
+
)
|
62
|
+
except BioLibError:
|
63
|
+
# file not found
|
64
|
+
pass
|
65
|
+
|
66
|
+
file_paths_in_job = [file.path for file in job.list_output_files()]
|
67
|
+
|
68
|
+
for file_path_in_job in file_paths_in_job:
|
69
|
+
if file_path_in_job.startswith(path_in_job):
|
70
|
+
return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
|
71
|
+
|
72
|
+
raise FuseOSError(errno.ENOENT) from None # No such file or directory
|
73
|
+
|
74
|
+
def readdir(self, path: str, fh: int) -> List[str]:
|
75
|
+
directory_entries = ['.', '..']
|
76
|
+
|
77
|
+
if path == '/':
|
78
|
+
directory_entries.extend(self._get_job_names_map(refresh_jobs=True).keys())
|
79
|
+
else:
|
80
|
+
job, path_in_job = self._parse_path(path)
|
81
|
+
dir_path_in_job = '/' if path_in_job == '/' else path_in_job + '/'
|
82
|
+
depth = dir_path_in_job.count('/')
|
83
|
+
directory_entries.extend(
|
84
|
+
set(
|
85
|
+
[
|
86
|
+
file.path.split('/')[depth]
|
87
|
+
for file in job.list_output_files()
|
88
|
+
if file.path.startswith(dir_path_in_job)
|
89
|
+
]
|
90
|
+
)
|
91
|
+
)
|
92
|
+
|
93
|
+
return directory_entries
|
94
|
+
|
95
|
+
def open(self, path: str, flags: int) -> int:
|
96
|
+
job, path_in_job = self._parse_path(path)
|
97
|
+
try:
|
98
|
+
job.get_output_file(path_in_job)
|
99
|
+
except BioLibError:
|
100
|
+
# file not found
|
101
|
+
raise FuseOSError(errno.ENOENT) from None
|
102
|
+
|
103
|
+
return 1234 # dummy file handle
|
104
|
+
|
105
|
+
def read(self, path: str, size: int, offset: int, fh: int) -> bytes:
|
106
|
+
job, path_in_job = self._parse_path(path)
|
107
|
+
try:
|
108
|
+
file = job.get_output_file(path_in_job)
|
109
|
+
except BioLibError:
|
110
|
+
raise FuseOSError(errno.ENOENT) from None # No such file or directory
|
111
|
+
|
112
|
+
return file.get_data(start=offset, length=size)
|
113
|
+
|
114
|
+
def release(self, path: str, fh: int) -> int:
|
115
|
+
return _SUCCESS_CODE
|
116
|
+
|
117
|
+
def releasedir(self, path: str, fh: int) -> int:
|
118
|
+
return _SUCCESS_CODE
|
119
|
+
|
120
|
+
def flush(self, path: str, fh: int) -> int:
|
121
|
+
return _SUCCESS_CODE
|
122
|
+
|
123
|
+
@staticmethod
|
124
|
+
def _get_directory_attributes(timestamp_epoch_seconds: int) -> _AttributeDict:
|
125
|
+
return _AttributeDict(
|
126
|
+
st_atime=timestamp_epoch_seconds,
|
127
|
+
st_ctime=timestamp_epoch_seconds,
|
128
|
+
st_gid=os.getgid(),
|
129
|
+
st_mode=stat.S_IFDIR | 0o555, # Directory that is readable and executable by owner, group, and others.
|
130
|
+
st_mtime=timestamp_epoch_seconds,
|
131
|
+
st_nlink=1,
|
132
|
+
st_size=1,
|
133
|
+
st_uid=os.getuid(),
|
134
|
+
)
|
135
|
+
|
136
|
+
@staticmethod
|
137
|
+
def _get_file_attributes(timestamp_epoch_seconds: int, size_in_bytes: int) -> _AttributeDict:
|
138
|
+
return _AttributeDict(
|
139
|
+
st_atime=timestamp_epoch_seconds,
|
140
|
+
st_ctime=timestamp_epoch_seconds,
|
141
|
+
st_gid=os.getgid(),
|
142
|
+
st_mode=stat.S_IFREG | 0o444, # Regular file with read permissions for owner, group, and others.
|
143
|
+
st_mtime=timestamp_epoch_seconds,
|
144
|
+
st_nlink=1,
|
145
|
+
st_size=size_in_bytes,
|
146
|
+
st_uid=os.getuid(),
|
147
|
+
)
|
148
|
+
|
149
|
+
def _get_job_names_map(self, refresh_jobs=False) -> Dict[str, Job]:
|
150
|
+
current_time = time()
|
151
|
+
if not self._job_names_map or (current_time - self._jobs_last_fetched_at > 1 and refresh_jobs):
|
152
|
+
self._jobs_last_fetched_at = current_time
|
153
|
+
self._job_names_map = {job.get_name(): job for job in self._experiment.get_jobs(status='completed')}
|
154
|
+
|
155
|
+
return self._job_names_map
|
156
|
+
|
157
|
+
def _parse_path(self, path: str) -> Tuple[Job, str]:
|
158
|
+
path_splitted = path.split('/')
|
159
|
+
job_name = path_splitted[1]
|
160
|
+
path_in_job = '/' + '/'.join(path_splitted[2:])
|
161
|
+
job = self._get_job_names_map().get(job_name)
|
162
|
+
if not job:
|
163
|
+
raise FuseOSError(errno.ENOENT) # No such file or directory
|
164
|
+
|
165
|
+
return job, path_in_job
|
166
|
+
|
167
|
+
# ----------------------------------- File system methods not implemented below -----------------------------------
|
168
|
+
|
169
|
+
def chmod(self, path, mode):
|
170
|
+
raise FuseOSError(errno.EACCES)
|
171
|
+
|
172
|
+
def chown(self, path, uid, gid):
|
173
|
+
raise FuseOSError(errno.EACCES)
|
174
|
+
|
175
|
+
def mknod(self, path, mode, dev):
|
176
|
+
raise FuseOSError(errno.EACCES)
|
177
|
+
|
178
|
+
def rmdir(self, path):
|
179
|
+
raise FuseOSError(errno.EACCES)
|
180
|
+
|
181
|
+
def mkdir(self, path, mode):
|
182
|
+
raise FuseOSError(errno.EACCES)
|
183
|
+
|
184
|
+
def unlink(self, path):
|
185
|
+
raise FuseOSError(errno.EACCES)
|
186
|
+
|
187
|
+
def symlink(self, target, source):
|
188
|
+
raise FuseOSError(errno.EACCES)
|
189
|
+
|
190
|
+
def rename(self, old, new):
|
191
|
+
raise FuseOSError(errno.EACCES)
|
192
|
+
|
193
|
+
def link(self, target, source):
|
194
|
+
raise FuseOSError(errno.EACCES)
|
195
|
+
|
196
|
+
def utimens(self, path, times=None):
|
197
|
+
raise FuseOSError(errno.EACCES)
|
198
|
+
|
199
|
+
def create(self, path, mode, fi=None):
|
200
|
+
raise FuseOSError(errno.EACCES)
|
201
|
+
|
202
|
+
def write(self, path, data, offset, fh):
|
203
|
+
raise FuseOSError(errno.EACCES)
|
204
|
+
|
205
|
+
def truncate(self, path, length, fh=None):
|
206
|
+
raise FuseOSError(errno.EACCES)
|
207
|
+
|
208
|
+
def fsync(self, path, datasync, fh):
|
209
|
+
raise FuseOSError(errno.EACCES)
|
@@ -1,25 +1,25 @@
|
|
1
1
|
import json
|
2
2
|
import platform
|
3
|
-
import
|
3
|
+
import shutil
|
4
4
|
import socket
|
5
5
|
import ssl
|
6
6
|
import subprocess
|
7
|
-
import
|
7
|
+
import time
|
8
8
|
import urllib.error
|
9
9
|
import urllib.parse
|
10
|
+
import urllib.request
|
10
11
|
|
11
12
|
from biolib.biolib_logging import logger_no_user_data
|
12
|
-
from biolib.typing_utils import Dict, Optional, Union,
|
13
|
+
from biolib.typing_utils import Dict, Literal, Optional, Union, cast
|
14
|
+
|
15
|
+
_HttpMethod = Literal['GET', 'POST', 'PATCH', 'PUT']
|
13
16
|
|
14
17
|
|
15
18
|
def _create_ssl_context():
|
16
19
|
context = ssl.create_default_context()
|
17
20
|
try:
|
18
21
|
if platform.system() == 'Darwin':
|
19
|
-
certificates = subprocess.check_output(
|
20
|
-
"security find-certificate -a -p",
|
21
|
-
shell=True
|
22
|
-
).decode('utf-8')
|
22
|
+
certificates = subprocess.check_output('security find-certificate -a -p', shell=True).decode('utf-8')
|
23
23
|
context.load_verify_locations(cadata=certificates)
|
24
24
|
except BaseException:
|
25
25
|
pass
|
@@ -33,7 +33,7 @@ class HttpError(urllib.error.HTTPError):
|
|
33
33
|
code=http_error.code,
|
34
34
|
msg=http_error.msg, # type: ignore
|
35
35
|
hdrs=http_error.hdrs, # type: ignore
|
36
|
-
fp=http_error.fp
|
36
|
+
fp=http_error.fp,
|
37
37
|
)
|
38
38
|
|
39
39
|
def __str__(self):
|
@@ -42,15 +42,24 @@ class HttpError(urllib.error.HTTPError):
|
|
42
42
|
|
43
43
|
|
44
44
|
class HttpResponse:
|
45
|
-
def __init__(self, response):
|
45
|
+
def __init__(self, response, response_path) -> None:
|
46
46
|
self.headers: Dict[str, str] = dict(response.headers)
|
47
47
|
self.status_code: int = int(response.status)
|
48
|
-
self.
|
48
|
+
self.response_path = response_path
|
49
|
+
if self.response_path:
|
50
|
+
with open(self.response_path, 'wb') as out_file:
|
51
|
+
shutil.copyfileobj(response, out_file)
|
52
|
+
else:
|
53
|
+
self.content: bytes = response.read()
|
49
54
|
self.url: str = response.geturl()
|
50
55
|
|
51
56
|
@property
|
52
57
|
def text(self) -> str:
|
53
|
-
|
58
|
+
if self.response_path:
|
59
|
+
with open(self.response_path, 'rb') as fp:
|
60
|
+
return cast(str, fp.read().decode('utf-8'))
|
61
|
+
else:
|
62
|
+
return cast(str, self.content.decode('utf-8'))
|
54
63
|
|
55
64
|
def json(self):
|
56
65
|
return json.loads(self.text)
|
@@ -61,12 +70,13 @@ class HttpClient:
|
|
61
70
|
|
62
71
|
@staticmethod
|
63
72
|
def request(
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
73
|
+
url: str,
|
74
|
+
method: Optional[_HttpMethod] = None,
|
75
|
+
data: Optional[Union[Dict, bytes]] = None,
|
76
|
+
headers: Optional[Dict[str, str]] = None,
|
77
|
+
retries: int = 5,
|
78
|
+
timeout_in_seconds: Optional[int] = None,
|
79
|
+
response_path: Optional[str] = None,
|
70
80
|
) -> HttpResponse:
|
71
81
|
if not HttpClient.ssl_context:
|
72
82
|
HttpClient.ssl_context = _create_ssl_context()
|
@@ -84,7 +94,7 @@ class HttpClient:
|
|
84
94
|
if timeout_in_seconds is None:
|
85
95
|
timeout_in_seconds = 60 if isinstance(data, dict) else 180 # TODO: Calculate timeout based on data size
|
86
96
|
|
87
|
-
last_error: Optional[
|
97
|
+
last_error: Optional[Exception] = None
|
88
98
|
for retry_count in range(retries + 1):
|
89
99
|
if retry_count > 0:
|
90
100
|
time.sleep(5 * retry_count)
|
@@ -95,23 +105,32 @@ class HttpClient:
|
|
95
105
|
context=HttpClient.ssl_context,
|
96
106
|
timeout=timeout_in_seconds,
|
97
107
|
) as response:
|
98
|
-
return HttpResponse(response)
|
108
|
+
return HttpResponse(response, response_path)
|
99
109
|
|
100
110
|
except urllib.error.HTTPError as error:
|
101
|
-
if error.code ==
|
102
|
-
logger_no_user_data.
|
111
|
+
if error.code == 429:
|
112
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with status 429 for "{url}"')
|
113
|
+
last_error = error
|
114
|
+
elif error.code == 502:
|
115
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with status 502 for "{url}"')
|
103
116
|
last_error = error
|
104
117
|
elif error.code == 503:
|
105
|
-
logger_no_user_data.
|
118
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with status 503 for "{url}"')
|
119
|
+
last_error = error
|
120
|
+
elif error.code == 504:
|
121
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with status 504 for "{url}"')
|
106
122
|
last_error = error
|
107
123
|
else:
|
108
124
|
raise HttpError(error) from None
|
109
125
|
|
110
126
|
except urllib.error.URLError as error:
|
111
127
|
if isinstance(error.reason, socket.timeout):
|
112
|
-
logger_no_user_data.
|
128
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
|
113
129
|
last_error = error
|
114
130
|
else:
|
115
131
|
raise error
|
132
|
+
except socket.timeout as error:
|
133
|
+
logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
|
134
|
+
last_error = error
|
116
135
|
|
117
136
|
raise last_error or Exception(f'HTTP {method} request failed after {retries} retries for "{url}"')
|
@@ -0,0 +1 @@
|
|
1
|
+
# Note: this directory is purely for libraries to be directly included instead of as dependencies
|