pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +357 -11
- biolib/_data_record/data_record.py +380 -0
- biolib/_index/__init__.py +0 -0
- biolib/_index/index.py +55 -0
- biolib/_index/query_result.py +103 -0
- biolib/_internal/__init__.py +0 -0
- biolib/_internal/add_copilot_prompts.py +58 -0
- biolib/_internal/add_gui_files.py +81 -0
- biolib/_internal/data_record/__init__.py +1 -0
- biolib/_internal/data_record/data_record.py +85 -0
- biolib/_internal/data_record/push_data.py +116 -0
- biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
- biolib/_internal/errors.py +5 -0
- biolib/_internal/file_utils.py +125 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +159 -0
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/lfs/cache.py +51 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +488 -0
- biolib/_internal/runtime.py +22 -0
- biolib/_internal/string_utils.py +13 -0
- biolib/_internal/templates/__init__.py +1 -0
- biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
- biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
- biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
- biolib/_internal/templates/gitignore_template/.gitignore +10 -0
- biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
- biolib/_internal/templates/gui_template/App.tsx +53 -0
- biolib/_internal/templates/gui_template/Dockerfile +27 -0
- biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
- biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
- biolib/_internal/templates/gui_template/index.css +5 -0
- biolib/_internal/templates/gui_template/index.html +13 -0
- biolib/_internal/templates/gui_template/index.tsx +10 -0
- biolib/_internal/templates/gui_template/package.json +27 -0
- biolib/_internal/templates/gui_template/tsconfig.json +24 -0
- biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
- biolib/_internal/templates/gui_template/vite.config.mts +10 -0
- biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
- biolib/_internal/templates/init_template/Dockerfile +14 -0
- biolib/_internal/templates/init_template/requirements.txt +1 -0
- biolib/_internal/templates/init_template/run.py +12 -0
- biolib/_internal/templates/init_template/run.sh +4 -0
- biolib/_internal/templates/templates.py +25 -0
- biolib/_internal/tree_utils.py +106 -0
- biolib/_internal/utils/__init__.py +65 -0
- biolib/_internal/utils/auth.py +46 -0
- biolib/_internal/utils/job_url.py +33 -0
- biolib/_internal/utils/multinode.py +263 -0
- biolib/_runtime/runtime.py +157 -0
- biolib/_session/session.py +44 -0
- biolib/_shared/__init__.py +0 -0
- biolib/_shared/types/__init__.py +74 -0
- biolib/_shared/types/account.py +12 -0
- biolib/_shared/types/account_member.py +8 -0
- biolib/_shared/types/app.py +9 -0
- biolib/_shared/types/data_record.py +40 -0
- biolib/_shared/types/experiment.py +32 -0
- biolib/_shared/types/file_node.py +17 -0
- biolib/_shared/types/push.py +6 -0
- biolib/_shared/types/resource.py +37 -0
- biolib/_shared/types/resource_deploy_key.py +11 -0
- biolib/_shared/types/resource_permission.py +14 -0
- biolib/_shared/types/resource_version.py +19 -0
- biolib/_shared/types/result.py +14 -0
- biolib/_shared/types/typing.py +10 -0
- biolib/_shared/types/user.py +19 -0
- biolib/_shared/utils/__init__.py +7 -0
- biolib/_shared/utils/resource_uri.py +75 -0
- biolib/api/__init__.py +6 -0
- biolib/api/client.py +168 -0
- biolib/app/app.py +252 -49
- biolib/app/search_apps.py +45 -0
- biolib/biolib_api_client/api_client.py +126 -31
- biolib/biolib_api_client/app_types.py +24 -4
- biolib/biolib_api_client/auth.py +31 -8
- biolib/biolib_api_client/biolib_app_api.py +147 -52
- biolib/biolib_api_client/biolib_job_api.py +161 -141
- biolib/biolib_api_client/job_types.py +21 -5
- biolib/biolib_api_client/lfs_types.py +7 -23
- biolib/biolib_api_client/user_state.py +56 -0
- biolib/biolib_binary_format/__init__.py +1 -4
- biolib/biolib_binary_format/file_in_container.py +105 -0
- biolib/biolib_binary_format/module_input.py +24 -7
- biolib/biolib_binary_format/module_output_v2.py +149 -0
- biolib/biolib_binary_format/remote_endpoints.py +34 -0
- biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
- biolib/biolib_binary_format/saved_job.py +3 -2
- biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
- biolib/biolib_binary_format/system_status_update.py +3 -2
- biolib/biolib_binary_format/utils.py +175 -0
- biolib/biolib_docker_client/__init__.py +11 -2
- biolib/biolib_errors.py +36 -0
- biolib/biolib_logging.py +27 -10
- biolib/cli/__init__.py +38 -0
- biolib/cli/auth.py +46 -0
- biolib/cli/data_record.py +164 -0
- biolib/cli/index.py +32 -0
- biolib/cli/init.py +421 -0
- biolib/cli/lfs.py +101 -0
- biolib/cli/push.py +50 -0
- biolib/cli/run.py +63 -0
- biolib/cli/runtime.py +14 -0
- biolib/cli/sdk.py +16 -0
- biolib/cli/start.py +56 -0
- biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
- biolib/compute_node/job_worker/cache_state.py +66 -88
- biolib/compute_node/job_worker/cache_types.py +1 -6
- biolib/compute_node/job_worker/docker_image_cache.py +112 -37
- biolib/compute_node/job_worker/executors/__init__.py +0 -3
- biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
- biolib/compute_node/job_worker/executors/docker_types.py +9 -1
- biolib/compute_node/job_worker/executors/types.py +19 -9
- biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
- biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
- biolib/compute_node/job_worker/job_storage.py +108 -0
- biolib/compute_node/job_worker/job_worker.py +397 -212
- biolib/compute_node/job_worker/large_file_system.py +87 -38
- biolib/compute_node/job_worker/network_alloc.py +99 -0
- biolib/compute_node/job_worker/network_buffer.py +240 -0
- biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
- biolib/compute_node/job_worker/utils.py +9 -24
- biolib/compute_node/remote_host_proxy.py +400 -98
- biolib/compute_node/utils.py +31 -9
- biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
- biolib/compute_node/webserver/proxy_utils.py +28 -0
- biolib/compute_node/webserver/webserver.py +130 -44
- biolib/compute_node/webserver/webserver_types.py +2 -6
- biolib/compute_node/webserver/webserver_utils.py +77 -12
- biolib/compute_node/webserver/worker_thread.py +183 -42
- biolib/experiments/__init__.py +0 -0
- biolib/experiments/experiment.py +356 -0
- biolib/jobs/__init__.py +1 -0
- biolib/jobs/job.py +741 -0
- biolib/jobs/job_result.py +185 -0
- biolib/jobs/types.py +50 -0
- biolib/py.typed +0 -0
- biolib/runtime/__init__.py +14 -0
- biolib/sdk/__init__.py +91 -0
- biolib/tables.py +34 -0
- biolib/typing_utils.py +2 -7
- biolib/user/__init__.py +1 -0
- biolib/user/sign_in.py +54 -0
- biolib/utils/__init__.py +162 -0
- biolib/utils/cache_state.py +94 -0
- biolib/utils/multipart_uploader.py +194 -0
- biolib/utils/seq_util.py +150 -0
- biolib/utils/zip/remote_zip.py +640 -0
- pybiolib-1.2.1890.dist-info/METADATA +41 -0
- pybiolib-1.2.1890.dist-info/RECORD +177 -0
- {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
- pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
- README.md +0 -17
- biolib/app/app_result.py +0 -68
- biolib/app/utils.py +0 -62
- biolib/biolib-js/0-biolib.worker.js +0 -1
- biolib/biolib-js/1-biolib.worker.js +0 -1
- biolib/biolib-js/2-biolib.worker.js +0 -1
- biolib/biolib-js/3-biolib.worker.js +0 -1
- biolib/biolib-js/4-biolib.worker.js +0 -1
- biolib/biolib-js/5-biolib.worker.js +0 -1
- biolib/biolib-js/6-biolib.worker.js +0 -1
- biolib/biolib-js/index.html +0 -10
- biolib/biolib-js/main-biolib.js +0 -1
- biolib/biolib_api_client/biolib_account_api.py +0 -21
- biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
- biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
- biolib/biolib_binary_format/module_output.py +0 -58
- biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
- biolib/biolib_push.py +0 -114
- biolib/cli.py +0 -203
- biolib/cli_utils.py +0 -273
- biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
- biolib/compute_node/enclave/__init__.py +0 -2
- biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
- biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
- biolib/compute_node/job_worker/executors/base_executor.py +0 -18
- biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
- biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
- biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
- biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
- biolib/lfs.py +0 -196
- biolib/pyppeteer/.circleci/config.yml +0 -100
- biolib/pyppeteer/.coveragerc +0 -3
- biolib/pyppeteer/.gitignore +0 -89
- biolib/pyppeteer/.pre-commit-config.yaml +0 -28
- biolib/pyppeteer/CHANGES.md +0 -253
- biolib/pyppeteer/CONTRIBUTING.md +0 -26
- biolib/pyppeteer/LICENSE +0 -12
- biolib/pyppeteer/README.md +0 -137
- biolib/pyppeteer/docs/Makefile +0 -177
- biolib/pyppeteer/docs/_static/custom.css +0 -28
- biolib/pyppeteer/docs/_templates/layout.html +0 -10
- biolib/pyppeteer/docs/changes.md +0 -1
- biolib/pyppeteer/docs/conf.py +0 -299
- biolib/pyppeteer/docs/index.md +0 -21
- biolib/pyppeteer/docs/make.bat +0 -242
- biolib/pyppeteer/docs/reference.md +0 -211
- biolib/pyppeteer/docs/server.py +0 -60
- biolib/pyppeteer/poetry.lock +0 -1699
- biolib/pyppeteer/pyppeteer/__init__.py +0 -135
- biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
- biolib/pyppeteer/pyppeteer/browser.py +0 -401
- biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
- biolib/pyppeteer/pyppeteer/command.py +0 -22
- biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
- biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
- biolib/pyppeteer/pyppeteer/coverage.py +0 -346
- biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
- biolib/pyppeteer/pyppeteer/dialog.py +0 -79
- biolib/pyppeteer/pyppeteer/domworld.py +0 -597
- biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
- biolib/pyppeteer/pyppeteer/errors.py +0 -48
- biolib/pyppeteer/pyppeteer/events.py +0 -63
- biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
- biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
- biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
- biolib/pyppeteer/pyppeteer/helpers.py +0 -245
- biolib/pyppeteer/pyppeteer/input.py +0 -371
- biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
- biolib/pyppeteer/pyppeteer/launcher.py +0 -683
- biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
- biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
- biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
- biolib/pyppeteer/pyppeteer/multimap.py +0 -82
- biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
- biolib/pyppeteer/pyppeteer/options.py +0 -8
- biolib/pyppeteer/pyppeteer/page.py +0 -1728
- biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
- biolib/pyppeteer/pyppeteer/target.py +0 -147
- biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
- biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
- biolib/pyppeteer/pyppeteer/tracing.py +0 -93
- biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
- biolib/pyppeteer/pyppeteer/util.py +0 -18
- biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
- biolib/pyppeteer/pyppeteer/worker.py +0 -101
- biolib/pyppeteer/pyproject.toml +0 -97
- biolib/pyppeteer/spell.txt +0 -137
- biolib/pyppeteer/tox.ini +0 -72
- biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
- biolib/start_cli.py +0 -7
- biolib/utils.py +0 -47
- biolib/validators/validate_app_version.py +0 -183
- biolib/validators/validate_argument.py +0 -134
- biolib/validators/validate_module.py +0 -323
- biolib/validators/validate_zip_file.py +0 -40
- biolib/validators/validator_utils.py +0 -103
- pybiolib-0.2.951.dist-info/LICENSE +0 -21
- pybiolib-0.2.951.dist-info/METADATA +0 -61
- pybiolib-0.2.951.dist-info/RECORD +0 -153
- pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
- /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import shutil
|
|
3
|
-
import subprocess
|
|
4
3
|
import time
|
|
5
4
|
import zipfile
|
|
6
5
|
|
|
7
6
|
import docker.types # type: ignore
|
|
8
7
|
|
|
8
|
+
from biolib import utils
|
|
9
9
|
from biolib.biolib_errors import BioLibError
|
|
10
|
+
from biolib.biolib_logging import logger_no_user_data
|
|
10
11
|
from biolib.compute_node.job_worker.cache_state import LfsCacheState
|
|
11
12
|
from biolib.compute_node.job_worker.cache_types import LargeFileSystemCache, StoragePartition
|
|
12
13
|
from biolib.typing_utils import TypedDict, Optional, Callable
|
|
13
14
|
|
|
14
15
|
from biolib.biolib_api_client import LargeFileSystemMapping
|
|
16
|
+
from biolib.utils import download_presigned_s3_url
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class StatusUpdate(TypedDict):
|
|
@@ -42,22 +44,25 @@ class LargeFileSystem:
|
|
|
42
44
|
lfs_mapping: LargeFileSystemMapping,
|
|
43
45
|
send_status_update: Callable[[StatusUpdate], None],
|
|
44
46
|
):
|
|
47
|
+
if not utils.IS_RUNNING_IN_CLOUD:
|
|
48
|
+
raise LargeFileSystemError('Large File System is currently not supported in local compute environments')
|
|
49
|
+
|
|
50
|
+
if lfs_mapping['size_bytes'] is None:
|
|
51
|
+
raise LargeFileSystemError('Error: You attempted to attach an LFS without a known size.')
|
|
52
|
+
|
|
45
53
|
self._job_id: str = job_id
|
|
46
54
|
self._lfs_mapping: LargeFileSystemMapping = lfs_mapping
|
|
47
55
|
self._path_on_disk: Optional[str] = None
|
|
56
|
+
self._path_on_disk_for_write: Optional[str] = None
|
|
48
57
|
self._send_status_update: Callable[[StatusUpdate], None] = send_status_update
|
|
49
58
|
|
|
50
|
-
@property
|
|
51
|
-
def _is_initialized(self) -> bool:
|
|
52
|
-
return self._path_on_disk is not None
|
|
53
|
-
|
|
54
59
|
@property
|
|
55
60
|
def uuid(self) -> str:
|
|
56
61
|
return self._lfs_mapping['uuid']
|
|
57
62
|
|
|
58
63
|
@property
|
|
59
64
|
def docker_mount(self) -> docker.types.Mount:
|
|
60
|
-
if not self.
|
|
65
|
+
if not self._path_on_disk:
|
|
61
66
|
raise LargeFileSystemError('LargeFileSystem not initialized')
|
|
62
67
|
|
|
63
68
|
return docker.types.Mount(
|
|
@@ -68,19 +73,32 @@ class LargeFileSystem:
|
|
|
68
73
|
)
|
|
69
74
|
|
|
70
75
|
def initialize(self) -> None:
|
|
71
|
-
if self.
|
|
76
|
+
if self._path_on_disk:
|
|
77
|
+
logger_no_user_data.debug(f'LFS {self.uuid} is already initialized')
|
|
72
78
|
return
|
|
73
79
|
|
|
74
80
|
lfs_size_bytes = self._lfs_mapping['size_bytes']
|
|
81
|
+
logger_no_user_data.debug(f'Initializing LFS {self.uuid} of size {lfs_size_bytes} bytes...')
|
|
82
|
+
|
|
83
|
+
readonly_cache_state = LfsCacheState().get_read_only_dict_without_lock()
|
|
84
|
+
if readonly_cache_state:
|
|
85
|
+
readonly_lfs: Optional[LargeFileSystemCache] = readonly_cache_state['large_file_systems'].get(self.uuid)
|
|
86
|
+
if readonly_lfs and readonly_lfs['state'] == 'ready':
|
|
87
|
+
logger_no_user_data.debug(f'LFS {self.uuid} found to be ready in cache')
|
|
88
|
+
storage_partition = readonly_cache_state['storage_partitions'][readonly_lfs['storage_partition_uuid']]
|
|
89
|
+
self._path_on_disk = f"{storage_partition['path']}/lfs/{self.uuid}/data"
|
|
90
|
+
self._path_on_disk_for_write = f'{LfsCacheState().storage_path_for_write}/lfs/{self.uuid}/data'
|
|
91
|
+
return
|
|
75
92
|
|
|
76
93
|
lfs_is_already_downloading = False
|
|
77
|
-
|
|
78
94
|
with LfsCacheState() as cache_state:
|
|
79
|
-
lfs_cache = cache_state['large_file_systems'].get(self.uuid)
|
|
95
|
+
lfs_cache: Optional[LargeFileSystemCache] = cache_state['large_file_systems'].get(self.uuid)
|
|
80
96
|
|
|
81
97
|
if lfs_cache is None:
|
|
82
|
-
|
|
98
|
+
logger_no_user_data.debug(f'LFS {self.uuid} was not found in cache')
|
|
83
99
|
storage_partition_to_use: Optional[StoragePartition] = None
|
|
100
|
+
logger_no_user_data.debug(f"Storage partitions to check: {cache_state['storage_partitions'].values()}")
|
|
101
|
+
|
|
84
102
|
for storage_partition in cache_state['storage_partitions'].values():
|
|
85
103
|
free_space_bytes = storage_partition['total_size_bytes'] - storage_partition['allocated_size_bytes']
|
|
86
104
|
if lfs_size_bytes < free_space_bytes:
|
|
@@ -93,7 +111,6 @@ class LargeFileSystem:
|
|
|
93
111
|
storage_partition_to_use['allocated_size_bytes'] += lfs_size_bytes
|
|
94
112
|
|
|
95
113
|
cache_state['large_file_systems'][self.uuid] = LargeFileSystemCache(
|
|
96
|
-
active_jobs=[self._job_id],
|
|
97
114
|
last_used_at=LfsCacheState.get_timestamp_now(),
|
|
98
115
|
size_bytes=lfs_size_bytes,
|
|
99
116
|
state='downloading',
|
|
@@ -102,11 +119,15 @@ class LargeFileSystem:
|
|
|
102
119
|
)
|
|
103
120
|
|
|
104
121
|
self._path_on_disk = f"{storage_partition_to_use['path']}/lfs/{self.uuid}/data"
|
|
122
|
+
self._path_on_disk_for_write = f'{LfsCacheState().storage_path_for_write}/lfs/{self.uuid}/data'
|
|
123
|
+
logger_no_user_data.debug(f'Using path {self._path_on_disk} for LFS')
|
|
105
124
|
|
|
106
125
|
else:
|
|
107
|
-
lfs_cache['
|
|
126
|
+
logger_no_user_data.debug(f"LFS {self.uuid} found in cache with state {lfs_cache['state']}")
|
|
127
|
+
lfs_cache['last_used_at'] = LfsCacheState.get_timestamp_now()
|
|
108
128
|
storage_partition = cache_state['storage_partitions'][lfs_cache['storage_partition_uuid']]
|
|
109
129
|
self._path_on_disk = f"{storage_partition['path']}/lfs/{self.uuid}/data"
|
|
130
|
+
self._path_on_disk_for_write = f'{LfsCacheState().storage_path_for_write}/lfs/{self.uuid}/data'
|
|
110
131
|
|
|
111
132
|
if lfs_cache['state'] == 'ready':
|
|
112
133
|
return
|
|
@@ -126,24 +147,36 @@ class LargeFileSystem:
|
|
|
126
147
|
progress=30,
|
|
127
148
|
log_message=f'Downloading Large File System "{self.uuid}"...',
|
|
128
149
|
))
|
|
129
|
-
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
self._download_and_unzip()
|
|
153
|
+
except Exception as error:
|
|
154
|
+
logger_no_user_data.error(
|
|
155
|
+
f'Failed to download LFS {self.uuid} got error: {error}. Cleaning up LFS cache state...'
|
|
156
|
+
)
|
|
157
|
+
self._remove_from_state()
|
|
158
|
+
raise error
|
|
159
|
+
|
|
130
160
|
self._send_status_update(StatusUpdate(
|
|
131
161
|
progress=33,
|
|
132
162
|
log_message=f'Large File System "{self.uuid}" downloaded.',
|
|
133
163
|
))
|
|
134
164
|
with LfsCacheState() as cache_state:
|
|
135
165
|
cache_state['large_file_systems'][self.uuid]['state'] = 'ready'
|
|
166
|
+
logger_no_user_data.debug(f'LFS cache state: {cache_state}')
|
|
136
167
|
|
|
137
|
-
|
|
138
|
-
if not self._is_initialized:
|
|
139
|
-
return
|
|
168
|
+
logger_no_user_data.debug(f'LFS {self.uuid} is initialized')
|
|
140
169
|
|
|
170
|
+
def _remove_from_state(self) -> None:
|
|
141
171
|
with LfsCacheState() as cache_state:
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
172
|
+
lfs = cache_state['large_file_systems'][self.uuid]
|
|
173
|
+
storage_partition = cache_state['storage_partitions'][lfs['storage_partition_uuid']]
|
|
174
|
+
storage_partition['allocated_size_bytes'] -= lfs['size_bytes']
|
|
175
|
+
|
|
176
|
+
cache_state['large_file_systems'].pop(self.uuid)
|
|
177
|
+
logger_no_user_data.debug(f'LFS cache state: {cache_state}')
|
|
145
178
|
|
|
146
|
-
|
|
179
|
+
logger_no_user_data.debug('Cleaned up LFS cache state')
|
|
147
180
|
|
|
148
181
|
def _wait_for_lfs_to_be_ready(self) -> None:
|
|
149
182
|
# Timeout after 15 min
|
|
@@ -156,34 +189,50 @@ class LargeFileSystem:
|
|
|
156
189
|
raise LargeFileSystemError(f'Waiting for Large File System "{self.uuid}" downloading timed out')
|
|
157
190
|
|
|
158
191
|
def _download_and_unzip(self) -> None:
|
|
159
|
-
|
|
160
|
-
|
|
192
|
+
logger_no_user_data.debug(f'Starting download and unzip of LFS {self.uuid}')
|
|
161
193
|
lfs_size_bytes = self._lfs_mapping['size_bytes']
|
|
162
|
-
|
|
163
194
|
tmp_storage_dir: Optional[str] = None
|
|
164
|
-
|
|
195
|
+
|
|
196
|
+
for path in LfsCacheState().tmp_storage_paths:
|
|
165
197
|
disk_usage = shutil.disk_usage(path)
|
|
198
|
+
logger_no_user_data.debug(f'Path {path} has disk usage: {disk_usage}')
|
|
166
199
|
if lfs_size_bytes < disk_usage.free:
|
|
167
200
|
tmp_storage_dir = path
|
|
168
201
|
|
|
169
202
|
if tmp_storage_dir is None:
|
|
170
203
|
raise LargeFileSystemError('No temporary storage available for downloading Large File System')
|
|
171
204
|
|
|
172
|
-
s3_data_zip_uri = f's3://{s3_lfs_bucket_name}/lfs/versions/{self.uuid}/data.zip'
|
|
173
205
|
tmp_data_zip_path = f'{tmp_storage_dir}/lfs-{self.uuid}-data.zip'
|
|
206
|
+
logger_no_user_data.debug(f'Downloading LFS zip to path {tmp_data_zip_path}...')
|
|
174
207
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
)
|
|
180
|
-
|
|
181
|
-
if download_result.returncode != 0:
|
|
182
|
-
raise LargeFileSystemError(
|
|
183
|
-
f'Failed to download Large File System data.zip: {download_result.stderr.decode()}'
|
|
208
|
+
try:
|
|
209
|
+
download_presigned_s3_url(
|
|
210
|
+
presigned_url=self._lfs_mapping['presigned_download_url'],
|
|
211
|
+
output_file_path=tmp_data_zip_path,
|
|
184
212
|
)
|
|
213
|
+
except Exception as error:
|
|
214
|
+
logger_no_user_data.error(
|
|
215
|
+
f'Failed to download Large File System data.zip got error: {error}. Removing tmp_data_zip_path...'
|
|
216
|
+
)
|
|
217
|
+
if os.path.exists(tmp_data_zip_path):
|
|
218
|
+
os.remove(tmp_data_zip_path)
|
|
219
|
+
logger_no_user_data.debug(f'Removed {tmp_data_zip_path}')
|
|
220
|
+
|
|
221
|
+
raise LargeFileSystemError(f'Failed to download Large File System data.zip got error: {error}') from error
|
|
222
|
+
|
|
223
|
+
try:
|
|
224
|
+
logger_no_user_data.debug(f'Extracting {tmp_data_zip_path} to {self._path_on_disk_for_write} ...')
|
|
225
|
+
with zipfile.ZipFile(tmp_data_zip_path, 'r') as zip_ref:
|
|
226
|
+
zip_ref.extractall(self._path_on_disk_for_write)
|
|
227
|
+
except Exception as error:
|
|
228
|
+
logger_no_user_data.error(
|
|
229
|
+
f'Failed to unzip {tmp_data_zip_path} got error: {error}. '
|
|
230
|
+
f'Removing {self._path_on_disk_for_write}...'
|
|
231
|
+
)
|
|
232
|
+
if self._path_on_disk_for_write is not None and os.path.exists(self._path_on_disk_for_write):
|
|
233
|
+
shutil.rmtree(self._path_on_disk_for_write)
|
|
185
234
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
235
|
+
raise error
|
|
236
|
+
finally:
|
|
237
|
+
os.remove(tmp_data_zip_path)
|
|
238
|
+
logger_no_user_data.debug(f'Removed {tmp_data_zip_path}')
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import ipaddress
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Dict, Optional, cast
|
|
5
|
+
|
|
6
|
+
from docker.errors import APIError
|
|
7
|
+
from docker.models.networks import Network
|
|
8
|
+
from docker.types import IPAMConfig, IPAMPool
|
|
9
|
+
|
|
10
|
+
from biolib.biolib_errors import BioLibError
|
|
11
|
+
from biolib.biolib_logging import logger_no_user_data
|
|
12
|
+
from biolib.compute_node.remote_host_proxy import get_static_ip_from_network
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _iter_network_subnets(existing_network):
|
|
16
|
+
ipam_config = existing_network.attrs.get('IPAM', {}).get('Config', [])
|
|
17
|
+
for cfg in ipam_config:
|
|
18
|
+
subnet_str = cfg.get('Subnet')
|
|
19
|
+
if not subnet_str:
|
|
20
|
+
continue
|
|
21
|
+
try:
|
|
22
|
+
yield ipaddress.ip_network(subnet_str, strict=False)
|
|
23
|
+
except ValueError:
|
|
24
|
+
continue
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _find_overlap(candidate_network, existing_networks):
|
|
28
|
+
for existing in existing_networks:
|
|
29
|
+
for subnet in _iter_network_subnets(existing):
|
|
30
|
+
if candidate_network.overlaps(subnet):
|
|
31
|
+
return existing, str(subnet)
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _allocate_network_with_retries(
|
|
36
|
+
name_prefix: str,
|
|
37
|
+
docker_client,
|
|
38
|
+
internal: bool = True,
|
|
39
|
+
driver: str = 'bridge',
|
|
40
|
+
max_attempts: int = 10,
|
|
41
|
+
labels: Optional[Dict[str, str]] = None,
|
|
42
|
+
) -> Network:
|
|
43
|
+
base_network = ipaddress.ip_network('172.28.0.0/16', strict=False)
|
|
44
|
+
|
|
45
|
+
suffix = uuid.uuid4().hex
|
|
46
|
+
full_name = f'{name_prefix}{suffix}'
|
|
47
|
+
name_hash = int(hashlib.sha256(full_name.encode()).hexdigest(), 16)
|
|
48
|
+
starting_offset = name_hash % 256
|
|
49
|
+
|
|
50
|
+
for attempt in range(max_attempts):
|
|
51
|
+
offset = (starting_offset + attempt) % 256
|
|
52
|
+
|
|
53
|
+
if base_network.prefixlen == 16:
|
|
54
|
+
third_octet = offset
|
|
55
|
+
candidate_subnet = f'{base_network.network_address.exploded.rsplit(".", 2)[0]}.{third_octet}.0/24'
|
|
56
|
+
else:
|
|
57
|
+
candidate_subnet = f'{base_network.network_address.exploded.rsplit(".", 1)[0]}.{offset}.0/24'
|
|
58
|
+
|
|
59
|
+
candidate_network = ipaddress.ip_network(candidate_subnet, strict=False)
|
|
60
|
+
|
|
61
|
+
existing_networks = docker_client.networks.list()
|
|
62
|
+
overlap = _find_overlap(candidate_network, existing_networks)
|
|
63
|
+
if overlap:
|
|
64
|
+
existing_network, existing_subnet = overlap
|
|
65
|
+
logger_no_user_data.debug(
|
|
66
|
+
f'Subnet {candidate_subnet} conflicts with existing network '
|
|
67
|
+
f'{existing_network.name} ({existing_subnet}), trying next candidate'
|
|
68
|
+
)
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
ipam_pool = IPAMPool(subnet=candidate_subnet)
|
|
72
|
+
computed_ipam_config = IPAMConfig(pool_configs=[ipam_pool])
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
network = cast(
|
|
76
|
+
Network,
|
|
77
|
+
docker_client.networks.create(
|
|
78
|
+
name=full_name,
|
|
79
|
+
internal=internal,
|
|
80
|
+
driver=driver,
|
|
81
|
+
ipam=computed_ipam_config,
|
|
82
|
+
labels=labels or {},
|
|
83
|
+
),
|
|
84
|
+
)
|
|
85
|
+
static_ip = get_static_ip_from_network(network, offset=2)
|
|
86
|
+
logger_no_user_data.debug(
|
|
87
|
+
f'Created network {full_name} with subnet {candidate_subnet} and static IP {static_ip}'
|
|
88
|
+
)
|
|
89
|
+
return network
|
|
90
|
+
except APIError as api_error:
|
|
91
|
+
logger_no_user_data.debug(
|
|
92
|
+
f'Network creation failed with Docker API error for subnet {candidate_subnet}: {api_error}, '
|
|
93
|
+
f'trying next candidate (attempt {attempt + 1}/{max_attempts})'
|
|
94
|
+
)
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
raise BioLibError(
|
|
98
|
+
f'Failed to allocate and create network {full_name} after {max_attempts} attempts. ' f'Base CIDR: 172.28.0.0/16'
|
|
99
|
+
)
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import socket
|
|
5
|
+
import time
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
|
|
8
|
+
from docker.errors import NotFound
|
|
9
|
+
from docker.models.networks import Network
|
|
10
|
+
|
|
11
|
+
from biolib import utils
|
|
12
|
+
from biolib.biolib_docker_client import BiolibDockerClient
|
|
13
|
+
from biolib.biolib_logging import logger_no_user_data
|
|
14
|
+
from biolib.compute_node.job_worker.network_alloc import _allocate_network_with_retries
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class NetworkBuffer:
|
|
18
|
+
BUFFER_SIZE = 25
|
|
19
|
+
NETWORK_NAME_PREFIX = 'biolib-remote-host-network-'
|
|
20
|
+
NETWORK_LABEL = 'biolib-role=remote-host-network'
|
|
21
|
+
|
|
22
|
+
_BIOLIB_DIR = '/biolib' if utils.IS_RUNNING_IN_CLOUD else '/tmp/biolib'
|
|
23
|
+
_NETWORKS_FILE = os.path.join(_BIOLIB_DIR, 'remote-host-networks.json')
|
|
24
|
+
_LOCK_FILE = os.path.join(_BIOLIB_DIR, 'remote-host-networks.lock')
|
|
25
|
+
_LOCK_TIMEOUT_SECONDS = 60
|
|
26
|
+
_STALE_LOCK_THRESHOLD_SECONDS = 600
|
|
27
|
+
|
|
28
|
+
_instance: Optional['NetworkBuffer'] = None
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
os.makedirs(self._BIOLIB_DIR, exist_ok=True)
|
|
32
|
+
self._docker = BiolibDockerClient.get_docker_client()
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def get_instance(cls) -> 'NetworkBuffer':
|
|
36
|
+
if cls._instance is None:
|
|
37
|
+
cls._instance = cls()
|
|
38
|
+
return cls._instance
|
|
39
|
+
|
|
40
|
+
def _acquire_lock(self) -> None:
|
|
41
|
+
start_time = time.time()
|
|
42
|
+
retry_count = 0
|
|
43
|
+
|
|
44
|
+
while time.time() - start_time < self._LOCK_TIMEOUT_SECONDS:
|
|
45
|
+
try:
|
|
46
|
+
with open(self._LOCK_FILE, 'x') as lock_file:
|
|
47
|
+
lock_info = {
|
|
48
|
+
'pid': os.getpid(),
|
|
49
|
+
'hostname': socket.gethostname(),
|
|
50
|
+
'started_at': time.time(),
|
|
51
|
+
}
|
|
52
|
+
json.dump(lock_info, lock_file)
|
|
53
|
+
return
|
|
54
|
+
except FileExistsError:
|
|
55
|
+
if retry_count == 0:
|
|
56
|
+
self._check_and_remove_stale_lock()
|
|
57
|
+
|
|
58
|
+
time.sleep(0.5)
|
|
59
|
+
retry_count += 1
|
|
60
|
+
|
|
61
|
+
raise RuntimeError(
|
|
62
|
+
f'Failed to acquire network buffer lock after {self._LOCK_TIMEOUT_SECONDS}s: {self._LOCK_FILE}'
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def _check_and_remove_stale_lock(self) -> None:
|
|
66
|
+
try:
|
|
67
|
+
if not os.path.exists(self._LOCK_FILE):
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
lock_mtime = os.path.getmtime(self._LOCK_FILE)
|
|
71
|
+
lock_age = time.time() - lock_mtime
|
|
72
|
+
|
|
73
|
+
if lock_age > self._STALE_LOCK_THRESHOLD_SECONDS:
|
|
74
|
+
try:
|
|
75
|
+
with open(self._LOCK_FILE) as f:
|
|
76
|
+
lock_info = json.load(f)
|
|
77
|
+
lock_pid = lock_info.get('pid')
|
|
78
|
+
|
|
79
|
+
if lock_pid:
|
|
80
|
+
try:
|
|
81
|
+
os.kill(lock_pid, 0)
|
|
82
|
+
logger_no_user_data.warning(
|
|
83
|
+
f'Lock file is old ({lock_age:.0f}s) but process {lock_pid} is still alive'
|
|
84
|
+
)
|
|
85
|
+
return
|
|
86
|
+
except (OSError, ProcessLookupError):
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
logger_no_user_data.warning(
|
|
93
|
+
f'Removing stale lock file (age: {lock_age:.0f}s, threshold: {self._STALE_LOCK_THRESHOLD_SECONDS}s)'
|
|
94
|
+
)
|
|
95
|
+
os.remove(self._LOCK_FILE)
|
|
96
|
+
|
|
97
|
+
except Exception as error:
|
|
98
|
+
logger_no_user_data.debug(f'Error checking stale lock: {error}')
|
|
99
|
+
|
|
100
|
+
def _release_lock(self) -> None:
|
|
101
|
+
with contextlib.suppress(FileNotFoundError):
|
|
102
|
+
os.remove(self._LOCK_FILE)
|
|
103
|
+
|
|
104
|
+
def _read_available_networks(self) -> List[str]:
|
|
105
|
+
if not os.path.exists(self._NETWORKS_FILE):
|
|
106
|
+
return []
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
with open(self._NETWORKS_FILE) as f:
|
|
110
|
+
network_ids = json.load(f)
|
|
111
|
+
if not isinstance(network_ids, list):
|
|
112
|
+
logger_no_user_data.error(
|
|
113
|
+
f'Invalid network buffer file format (expected list, got {type(network_ids).__name__})'
|
|
114
|
+
)
|
|
115
|
+
self._backup_corrupted_file()
|
|
116
|
+
return []
|
|
117
|
+
return network_ids
|
|
118
|
+
except json.JSONDecodeError as error:
|
|
119
|
+
logger_no_user_data.error(f'Corrupted network buffer file: {error}')
|
|
120
|
+
self._backup_corrupted_file()
|
|
121
|
+
return []
|
|
122
|
+
except Exception as error:
|
|
123
|
+
logger_no_user_data.error(f'Failed to read network buffer file: {error}')
|
|
124
|
+
return []
|
|
125
|
+
|
|
126
|
+
def _write_available_networks(self, network_ids: List[str]) -> None:
|
|
127
|
+
temp_file = f'{self._NETWORKS_FILE}.tmp'
|
|
128
|
+
try:
|
|
129
|
+
with open(temp_file, 'w') as f:
|
|
130
|
+
json.dump(network_ids, f, indent=2)
|
|
131
|
+
f.flush()
|
|
132
|
+
os.fsync(f.fileno())
|
|
133
|
+
|
|
134
|
+
os.replace(temp_file, self._NETWORKS_FILE)
|
|
135
|
+
except Exception as error:
|
|
136
|
+
logger_no_user_data.error(f'Failed to write network buffer file: {error}')
|
|
137
|
+
with contextlib.suppress(FileNotFoundError):
|
|
138
|
+
os.remove(temp_file)
|
|
139
|
+
raise
|
|
140
|
+
|
|
141
|
+
def _backup_corrupted_file(self) -> None:
|
|
142
|
+
try:
|
|
143
|
+
timestamp = int(time.time())
|
|
144
|
+
backup_path = f'{self._NETWORKS_FILE}.corrupt-{timestamp}'
|
|
145
|
+
os.rename(self._NETWORKS_FILE, backup_path)
|
|
146
|
+
logger_no_user_data.error(f'Backed up corrupted file to {backup_path}')
|
|
147
|
+
except Exception as error:
|
|
148
|
+
logger_no_user_data.error(f'Failed to backup corrupted file: {error}')
|
|
149
|
+
|
|
150
|
+
def allocate_networks(self, job_id: str, count: int) -> List[Network]:
|
|
151
|
+
try:
|
|
152
|
+
self._acquire_lock()
|
|
153
|
+
|
|
154
|
+
available_ids = self._read_available_networks()
|
|
155
|
+
allocated: List[Network] = []
|
|
156
|
+
|
|
157
|
+
for _ in range(count):
|
|
158
|
+
network = None
|
|
159
|
+
|
|
160
|
+
while available_ids and network is None:
|
|
161
|
+
net_id = available_ids.pop(0)
|
|
162
|
+
try:
|
|
163
|
+
network = self._docker.networks.get(net_id)
|
|
164
|
+
logger_no_user_data.debug(
|
|
165
|
+
f'Allocated network {network.id} ({network.name}) from buffer for job {job_id}'
|
|
166
|
+
)
|
|
167
|
+
except NotFound:
|
|
168
|
+
logger_no_user_data.warning(
|
|
169
|
+
f'Network {net_id} in buffer file no longer exists in Docker, skipping'
|
|
170
|
+
)
|
|
171
|
+
network = None
|
|
172
|
+
|
|
173
|
+
if network is None:
|
|
174
|
+
logger_no_user_data.debug(f'Buffer exhausted, creating network on-the-fly for job {job_id}')
|
|
175
|
+
network = self._create_network()
|
|
176
|
+
|
|
177
|
+
allocated.append(network)
|
|
178
|
+
|
|
179
|
+
self._write_available_networks(available_ids)
|
|
180
|
+
return allocated
|
|
181
|
+
|
|
182
|
+
except RuntimeError as error:
|
|
183
|
+
logger_no_user_data.warning(f'Lock acquisition failed: {error}. Creating networks on-the-fly.')
|
|
184
|
+
allocated = []
|
|
185
|
+
for _ in range(count):
|
|
186
|
+
network = self._create_network()
|
|
187
|
+
allocated.append(network)
|
|
188
|
+
return allocated
|
|
189
|
+
|
|
190
|
+
finally:
|
|
191
|
+
self._release_lock()
|
|
192
|
+
|
|
193
|
+
def fill_buffer(self) -> int:
|
|
194
|
+
try:
|
|
195
|
+
self._acquire_lock()
|
|
196
|
+
|
|
197
|
+
available_ids = self._read_available_networks()
|
|
198
|
+
current_count = len(available_ids)
|
|
199
|
+
needed = self.BUFFER_SIZE - current_count
|
|
200
|
+
|
|
201
|
+
if needed <= 0:
|
|
202
|
+
logger_no_user_data.debug(
|
|
203
|
+
f'Buffer already has {current_count} available networks (target: {self.BUFFER_SIZE})'
|
|
204
|
+
)
|
|
205
|
+
return 0
|
|
206
|
+
|
|
207
|
+
logger_no_user_data.debug(
|
|
208
|
+
f'Filling buffer: current={current_count}, target={self.BUFFER_SIZE}, creating={needed}'
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
created_count = 0
|
|
212
|
+
for _ in range(needed):
|
|
213
|
+
try:
|
|
214
|
+
network = self._create_network()
|
|
215
|
+
if network.id:
|
|
216
|
+
available_ids.append(network.id)
|
|
217
|
+
created_count += 1
|
|
218
|
+
logger_no_user_data.debug(f'Created buffer network {network.id} ({created_count}/{needed})')
|
|
219
|
+
else:
|
|
220
|
+
logger_no_user_data.error('Created network has no ID, skipping')
|
|
221
|
+
except Exception as error:
|
|
222
|
+
logger_no_user_data.error(f'Failed to create buffer network: {error}')
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
self._write_available_networks(available_ids)
|
|
226
|
+
logger_no_user_data.debug(f'Buffer fill complete: created {created_count} networks')
|
|
227
|
+
return created_count
|
|
228
|
+
|
|
229
|
+
finally:
|
|
230
|
+
self._release_lock()
|
|
231
|
+
|
|
232
|
+
def _create_network(self) -> Network:
|
|
233
|
+
network = _allocate_network_with_retries(
|
|
234
|
+
name_prefix=self.NETWORK_NAME_PREFIX,
|
|
235
|
+
docker_client=self._docker,
|
|
236
|
+
internal=True,
|
|
237
|
+
driver='bridge',
|
|
238
|
+
labels={'biolib-role': 'remote-host-network'},
|
|
239
|
+
)
|
|
240
|
+
return network
|