pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +357 -11
- biolib/_data_record/data_record.py +380 -0
- biolib/_index/__init__.py +0 -0
- biolib/_index/index.py +55 -0
- biolib/_index/query_result.py +103 -0
- biolib/_internal/__init__.py +0 -0
- biolib/_internal/add_copilot_prompts.py +58 -0
- biolib/_internal/add_gui_files.py +81 -0
- biolib/_internal/data_record/__init__.py +1 -0
- biolib/_internal/data_record/data_record.py +85 -0
- biolib/_internal/data_record/push_data.py +116 -0
- biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
- biolib/_internal/errors.py +5 -0
- biolib/_internal/file_utils.py +125 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +159 -0
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/lfs/cache.py +51 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +488 -0
- biolib/_internal/runtime.py +22 -0
- biolib/_internal/string_utils.py +13 -0
- biolib/_internal/templates/__init__.py +1 -0
- biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
- biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
- biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
- biolib/_internal/templates/gitignore_template/.gitignore +10 -0
- biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
- biolib/_internal/templates/gui_template/App.tsx +53 -0
- biolib/_internal/templates/gui_template/Dockerfile +27 -0
- biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
- biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
- biolib/_internal/templates/gui_template/index.css +5 -0
- biolib/_internal/templates/gui_template/index.html +13 -0
- biolib/_internal/templates/gui_template/index.tsx +10 -0
- biolib/_internal/templates/gui_template/package.json +27 -0
- biolib/_internal/templates/gui_template/tsconfig.json +24 -0
- biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
- biolib/_internal/templates/gui_template/vite.config.mts +10 -0
- biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
- biolib/_internal/templates/init_template/Dockerfile +14 -0
- biolib/_internal/templates/init_template/requirements.txt +1 -0
- biolib/_internal/templates/init_template/run.py +12 -0
- biolib/_internal/templates/init_template/run.sh +4 -0
- biolib/_internal/templates/templates.py +25 -0
- biolib/_internal/tree_utils.py +106 -0
- biolib/_internal/utils/__init__.py +65 -0
- biolib/_internal/utils/auth.py +46 -0
- biolib/_internal/utils/job_url.py +33 -0
- biolib/_internal/utils/multinode.py +263 -0
- biolib/_runtime/runtime.py +157 -0
- biolib/_session/session.py +44 -0
- biolib/_shared/__init__.py +0 -0
- biolib/_shared/types/__init__.py +74 -0
- biolib/_shared/types/account.py +12 -0
- biolib/_shared/types/account_member.py +8 -0
- biolib/_shared/types/app.py +9 -0
- biolib/_shared/types/data_record.py +40 -0
- biolib/_shared/types/experiment.py +32 -0
- biolib/_shared/types/file_node.py +17 -0
- biolib/_shared/types/push.py +6 -0
- biolib/_shared/types/resource.py +37 -0
- biolib/_shared/types/resource_deploy_key.py +11 -0
- biolib/_shared/types/resource_permission.py +14 -0
- biolib/_shared/types/resource_version.py +19 -0
- biolib/_shared/types/result.py +14 -0
- biolib/_shared/types/typing.py +10 -0
- biolib/_shared/types/user.py +19 -0
- biolib/_shared/utils/__init__.py +7 -0
- biolib/_shared/utils/resource_uri.py +75 -0
- biolib/api/__init__.py +6 -0
- biolib/api/client.py +168 -0
- biolib/app/app.py +252 -49
- biolib/app/search_apps.py +45 -0
- biolib/biolib_api_client/api_client.py +126 -31
- biolib/biolib_api_client/app_types.py +24 -4
- biolib/biolib_api_client/auth.py +31 -8
- biolib/biolib_api_client/biolib_app_api.py +147 -52
- biolib/biolib_api_client/biolib_job_api.py +161 -141
- biolib/biolib_api_client/job_types.py +21 -5
- biolib/biolib_api_client/lfs_types.py +7 -23
- biolib/biolib_api_client/user_state.py +56 -0
- biolib/biolib_binary_format/__init__.py +1 -4
- biolib/biolib_binary_format/file_in_container.py +105 -0
- biolib/biolib_binary_format/module_input.py +24 -7
- biolib/biolib_binary_format/module_output_v2.py +149 -0
- biolib/biolib_binary_format/remote_endpoints.py +34 -0
- biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
- biolib/biolib_binary_format/saved_job.py +3 -2
- biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
- biolib/biolib_binary_format/system_status_update.py +3 -2
- biolib/biolib_binary_format/utils.py +175 -0
- biolib/biolib_docker_client/__init__.py +11 -2
- biolib/biolib_errors.py +36 -0
- biolib/biolib_logging.py +27 -10
- biolib/cli/__init__.py +38 -0
- biolib/cli/auth.py +46 -0
- biolib/cli/data_record.py +164 -0
- biolib/cli/index.py +32 -0
- biolib/cli/init.py +421 -0
- biolib/cli/lfs.py +101 -0
- biolib/cli/push.py +50 -0
- biolib/cli/run.py +63 -0
- biolib/cli/runtime.py +14 -0
- biolib/cli/sdk.py +16 -0
- biolib/cli/start.py +56 -0
- biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
- biolib/compute_node/job_worker/cache_state.py +66 -88
- biolib/compute_node/job_worker/cache_types.py +1 -6
- biolib/compute_node/job_worker/docker_image_cache.py +112 -37
- biolib/compute_node/job_worker/executors/__init__.py +0 -3
- biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
- biolib/compute_node/job_worker/executors/docker_types.py +9 -1
- biolib/compute_node/job_worker/executors/types.py +19 -9
- biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
- biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
- biolib/compute_node/job_worker/job_storage.py +108 -0
- biolib/compute_node/job_worker/job_worker.py +397 -212
- biolib/compute_node/job_worker/large_file_system.py +87 -38
- biolib/compute_node/job_worker/network_alloc.py +99 -0
- biolib/compute_node/job_worker/network_buffer.py +240 -0
- biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
- biolib/compute_node/job_worker/utils.py +9 -24
- biolib/compute_node/remote_host_proxy.py +400 -98
- biolib/compute_node/utils.py +31 -9
- biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
- biolib/compute_node/webserver/proxy_utils.py +28 -0
- biolib/compute_node/webserver/webserver.py +130 -44
- biolib/compute_node/webserver/webserver_types.py +2 -6
- biolib/compute_node/webserver/webserver_utils.py +77 -12
- biolib/compute_node/webserver/worker_thread.py +183 -42
- biolib/experiments/__init__.py +0 -0
- biolib/experiments/experiment.py +356 -0
- biolib/jobs/__init__.py +1 -0
- biolib/jobs/job.py +741 -0
- biolib/jobs/job_result.py +185 -0
- biolib/jobs/types.py +50 -0
- biolib/py.typed +0 -0
- biolib/runtime/__init__.py +14 -0
- biolib/sdk/__init__.py +91 -0
- biolib/tables.py +34 -0
- biolib/typing_utils.py +2 -7
- biolib/user/__init__.py +1 -0
- biolib/user/sign_in.py +54 -0
- biolib/utils/__init__.py +162 -0
- biolib/utils/cache_state.py +94 -0
- biolib/utils/multipart_uploader.py +194 -0
- biolib/utils/seq_util.py +150 -0
- biolib/utils/zip/remote_zip.py +640 -0
- pybiolib-1.2.1890.dist-info/METADATA +41 -0
- pybiolib-1.2.1890.dist-info/RECORD +177 -0
- {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
- pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
- README.md +0 -17
- biolib/app/app_result.py +0 -68
- biolib/app/utils.py +0 -62
- biolib/biolib-js/0-biolib.worker.js +0 -1
- biolib/biolib-js/1-biolib.worker.js +0 -1
- biolib/biolib-js/2-biolib.worker.js +0 -1
- biolib/biolib-js/3-biolib.worker.js +0 -1
- biolib/biolib-js/4-biolib.worker.js +0 -1
- biolib/biolib-js/5-biolib.worker.js +0 -1
- biolib/biolib-js/6-biolib.worker.js +0 -1
- biolib/biolib-js/index.html +0 -10
- biolib/biolib-js/main-biolib.js +0 -1
- biolib/biolib_api_client/biolib_account_api.py +0 -21
- biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
- biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
- biolib/biolib_binary_format/module_output.py +0 -58
- biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
- biolib/biolib_push.py +0 -114
- biolib/cli.py +0 -203
- biolib/cli_utils.py +0 -273
- biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
- biolib/compute_node/enclave/__init__.py +0 -2
- biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
- biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
- biolib/compute_node/job_worker/executors/base_executor.py +0 -18
- biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
- biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
- biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
- biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
- biolib/lfs.py +0 -196
- biolib/pyppeteer/.circleci/config.yml +0 -100
- biolib/pyppeteer/.coveragerc +0 -3
- biolib/pyppeteer/.gitignore +0 -89
- biolib/pyppeteer/.pre-commit-config.yaml +0 -28
- biolib/pyppeteer/CHANGES.md +0 -253
- biolib/pyppeteer/CONTRIBUTING.md +0 -26
- biolib/pyppeteer/LICENSE +0 -12
- biolib/pyppeteer/README.md +0 -137
- biolib/pyppeteer/docs/Makefile +0 -177
- biolib/pyppeteer/docs/_static/custom.css +0 -28
- biolib/pyppeteer/docs/_templates/layout.html +0 -10
- biolib/pyppeteer/docs/changes.md +0 -1
- biolib/pyppeteer/docs/conf.py +0 -299
- biolib/pyppeteer/docs/index.md +0 -21
- biolib/pyppeteer/docs/make.bat +0 -242
- biolib/pyppeteer/docs/reference.md +0 -211
- biolib/pyppeteer/docs/server.py +0 -60
- biolib/pyppeteer/poetry.lock +0 -1699
- biolib/pyppeteer/pyppeteer/__init__.py +0 -135
- biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
- biolib/pyppeteer/pyppeteer/browser.py +0 -401
- biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
- biolib/pyppeteer/pyppeteer/command.py +0 -22
- biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
- biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
- biolib/pyppeteer/pyppeteer/coverage.py +0 -346
- biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
- biolib/pyppeteer/pyppeteer/dialog.py +0 -79
- biolib/pyppeteer/pyppeteer/domworld.py +0 -597
- biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
- biolib/pyppeteer/pyppeteer/errors.py +0 -48
- biolib/pyppeteer/pyppeteer/events.py +0 -63
- biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
- biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
- biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
- biolib/pyppeteer/pyppeteer/helpers.py +0 -245
- biolib/pyppeteer/pyppeteer/input.py +0 -371
- biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
- biolib/pyppeteer/pyppeteer/launcher.py +0 -683
- biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
- biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
- biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
- biolib/pyppeteer/pyppeteer/multimap.py +0 -82
- biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
- biolib/pyppeteer/pyppeteer/options.py +0 -8
- biolib/pyppeteer/pyppeteer/page.py +0 -1728
- biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
- biolib/pyppeteer/pyppeteer/target.py +0 -147
- biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
- biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
- biolib/pyppeteer/pyppeteer/tracing.py +0 -93
- biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
- biolib/pyppeteer/pyppeteer/util.py +0 -18
- biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
- biolib/pyppeteer/pyppeteer/worker.py +0 -101
- biolib/pyppeteer/pyproject.toml +0 -97
- biolib/pyppeteer/spell.txt +0 -137
- biolib/pyppeteer/tox.ini +0 -72
- biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
- biolib/start_cli.py +0 -7
- biolib/utils.py +0 -47
- biolib/validators/validate_app_version.py +0 -183
- biolib/validators/validate_argument.py +0 -134
- biolib/validators/validate_module.py +0 -323
- biolib/validators/validate_zip_file.py +0 -40
- biolib/validators/validator_utils.py +0 -103
- pybiolib-0.2.951.dist-info/LICENSE +0 -21
- pybiolib-0.2.951.dist-info/METADATA +0 -61
- pybiolib-0.2.951.dist-info/RECORD +0 -153
- pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
- /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
|
@@ -1,54 +1,63 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
import io
|
|
2
3
|
import json
|
|
3
|
-
import logging
|
|
4
|
-
import socket
|
|
5
|
-
import shlex
|
|
6
|
-
import zipfile
|
|
7
|
-
from time import time
|
|
8
|
-
from queue import Queue
|
|
9
4
|
import multiprocessing
|
|
10
5
|
import os
|
|
6
|
+
import shlex
|
|
11
7
|
import signal
|
|
8
|
+
import socket
|
|
9
|
+
import sys
|
|
10
|
+
import tempfile
|
|
11
|
+
import zipfile
|
|
12
|
+
from queue import Queue
|
|
13
|
+
from time import time
|
|
12
14
|
from types import FrameType
|
|
13
15
|
|
|
14
|
-
import requests
|
|
15
|
-
from Crypto.Cipher import AES
|
|
16
|
-
from Crypto.Random import get_random_bytes
|
|
17
16
|
from docker.models.networks import Network # type: ignore
|
|
17
|
+
from docker.types import IPAMConfig, IPAMPool # type: ignore
|
|
18
18
|
|
|
19
|
-
from biolib.compute_node.job_worker.large_file_system import LargeFileSystem
|
|
20
|
-
from biolib.biolib_errors import DockerContainerNotFoundDuringExecutionException
|
|
21
|
-
from biolib.compute_node.job_worker.job_max_runtime_timer_thread import JobMaxRuntimeTimerThread
|
|
22
|
-
from biolib.compute_node.remote_host_proxy import RemoteHostProxy
|
|
23
|
-
from biolib.typing_utils import Optional, List, Dict
|
|
24
19
|
from biolib import utils
|
|
25
|
-
from biolib.
|
|
20
|
+
from biolib._internal.http_client import HttpClient
|
|
21
|
+
from biolib.biolib_api_client import (
|
|
22
|
+
AppVersionOnJob,
|
|
23
|
+
BiolibApiClient,
|
|
24
|
+
CreatedJobDict,
|
|
25
|
+
JobWrapper,
|
|
26
|
+
Module,
|
|
27
|
+
ModuleEnvironment,
|
|
28
|
+
)
|
|
26
29
|
from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
|
|
30
|
+
from biolib.biolib_binary_format import (
|
|
31
|
+
InMemoryIndexableBuffer,
|
|
32
|
+
ModuleInput,
|
|
33
|
+
ModuleOutputV2,
|
|
34
|
+
SavedJob,
|
|
35
|
+
SystemException,
|
|
36
|
+
SystemStatusUpdate,
|
|
37
|
+
)
|
|
38
|
+
from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
|
|
27
39
|
from biolib.biolib_docker_client import BiolibDockerClient
|
|
28
|
-
from biolib.
|
|
29
|
-
from biolib.
|
|
30
|
-
from biolib.compute_node.job_worker.executors
|
|
31
|
-
from biolib.compute_node.job_worker.executors.types import LocalExecutorOptions, StatusUpdate
|
|
32
|
-
from biolib.compute_node.
|
|
33
|
-
from biolib.compute_node.
|
|
40
|
+
from biolib.biolib_errors import BioLibError, DockerContainerNotFoundDuringExecutionException, StorageDownloadFailed
|
|
41
|
+
from biolib.biolib_logging import logger, logger_no_user_data
|
|
42
|
+
from biolib.compute_node.job_worker.executors import DockerExecutor
|
|
43
|
+
from biolib.compute_node.job_worker.executors.types import LocalExecutorOptions, StatusUpdate
|
|
44
|
+
from biolib.compute_node.job_worker.job_legacy_input_wait_timeout_thread import JobLegacyInputWaitTimeout
|
|
45
|
+
from biolib.compute_node.job_worker.job_max_runtime_timer_thread import JobMaxRuntimeTimerThread
|
|
46
|
+
from biolib.compute_node.job_worker.job_storage import JobStorage
|
|
47
|
+
from biolib.compute_node.job_worker.large_file_system import LargeFileSystem
|
|
34
48
|
from biolib.compute_node.job_worker.mappings import Mappings, path_without_first_folder
|
|
49
|
+
from biolib.compute_node.job_worker.network_buffer import NetworkBuffer
|
|
35
50
|
from biolib.compute_node.job_worker.utils import ComputeProcessException, log_disk_and_memory_usage_info
|
|
36
|
-
from biolib.compute_node.
|
|
37
|
-
from biolib.compute_node.
|
|
38
|
-
from biolib.
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
try:
|
|
42
|
-
from biolib.compute_node.enclave.nitro_secure_module_utils import NitroSecureModuleUtils
|
|
43
|
-
except ImportError:
|
|
44
|
-
pass
|
|
51
|
+
from biolib.compute_node.remote_host_proxy import RemoteHostMapping, RemoteHostProxy, get_static_ip_from_network
|
|
52
|
+
from biolib.compute_node.socker_listener_thread import SocketListenerThread
|
|
53
|
+
from biolib.compute_node.socket_sender_thread import SocketSenderThread
|
|
54
|
+
from biolib.compute_node.utils import SystemExceptionCodeMap, SystemExceptionCodes, get_package_type
|
|
55
|
+
from biolib.typing_utils import Dict, List, Optional
|
|
45
56
|
|
|
46
|
-
DEFAULT_BUFFER_SIZE = 1024
|
|
47
57
|
SOCKET_HOST = '127.0.0.1'
|
|
48
58
|
|
|
49
59
|
|
|
50
60
|
class JobWorkerProcess(multiprocessing.Process):
|
|
51
|
-
|
|
52
61
|
# note: this method is run in the parent process
|
|
53
62
|
def __init__(self, socket_port: int, log_level: int):
|
|
54
63
|
super().__init__()
|
|
@@ -57,56 +66,71 @@ class JobWorkerProcess(multiprocessing.Process):
|
|
|
57
66
|
|
|
58
67
|
# note: this method is run in the newly started process once called with .start()
|
|
59
68
|
def run(self) -> None:
|
|
60
|
-
|
|
69
|
+
JobWorker(self._socket_port, self._log_level).run_handle_message_loop()
|
|
61
70
|
|
|
62
71
|
|
|
63
|
-
class
|
|
72
|
+
class JobWorker:
|
|
64
73
|
_STOP_HANDLE_MESSAGE_LOOP = b'STOP_HANDLE_MESSAGE_LOOP'
|
|
65
74
|
|
|
66
|
-
def __init__(self, socket_port: int, log_level: int):
|
|
75
|
+
def __init__(self, socket_port: Optional[int] = None, log_level: Optional[int] = None):
|
|
67
76
|
try:
|
|
68
|
-
|
|
77
|
+
if log_level:
|
|
78
|
+
logger.setLevel(log_level)
|
|
69
79
|
|
|
70
80
|
# handle interrupt from keyboard (CTRL + C)
|
|
71
81
|
signal.signal(signal.SIGINT, self._handle_exit_gracefully)
|
|
72
82
|
# handle termination signal from parent
|
|
73
83
|
signal.signal(signal.SIGTERM, self._handle_exit_gracefully)
|
|
74
84
|
|
|
85
|
+
try:
|
|
86
|
+
docker_client = BiolibDockerClient.get_docker_client()
|
|
87
|
+
networks = docker_client.networks.list()
|
|
88
|
+
logger_no_user_data.debug(f'Docker networks at JobWorker init: {[net.name for net in networks]}')
|
|
89
|
+
except Exception as error:
|
|
90
|
+
logger_no_user_data.debug(f'Failed to list docker networks at init: {error}')
|
|
91
|
+
|
|
75
92
|
self._socket_port = socket_port
|
|
76
93
|
self._received_messages_queue: Queue = Queue()
|
|
77
94
|
self._messages_to_send_queue: Queue = Queue()
|
|
95
|
+
self._legacy_input_wait_timeout_thread: Optional[JobLegacyInputWaitTimeout] = None
|
|
78
96
|
|
|
79
97
|
self._app_version_id_to_runtime_zip: Dict[str, bytes] = {}
|
|
80
|
-
self._jobs: Dict[str,
|
|
98
|
+
self._jobs: Dict[str, CreatedJobDict] = {}
|
|
81
99
|
self._root_job_wrapper: Optional[JobWrapper] = None
|
|
82
100
|
|
|
83
101
|
self._remote_host_proxies: List[RemoteHostProxy] = []
|
|
84
102
|
self._internal_network: Optional[Network] = None
|
|
85
|
-
self.
|
|
86
|
-
self._executors: List[BaseExecutor] = []
|
|
103
|
+
self._executors: List[DockerExecutor] = []
|
|
87
104
|
self.is_cleaning_up: bool = False
|
|
105
|
+
self._network_buffer = NetworkBuffer.get_instance()
|
|
106
|
+
|
|
107
|
+
self.job_temporary_dir: Optional[str] = None
|
|
88
108
|
|
|
89
|
-
if utils.BIOLIB_IS_RUNNING_IN_ENCLAVE:
|
|
90
|
-
self._nsm_util = NitroSecureModuleUtils()
|
|
91
|
-
self._aes_key_buffer = b''
|
|
92
|
-
logger.setLevel(logging.DEBUG)
|
|
93
109
|
except Exception as exception:
|
|
94
110
|
raise ComputeProcessException(
|
|
95
111
|
exception,
|
|
96
112
|
SystemExceptionCodes.FAILED_TO_INIT_COMPUTE_PROCESS_VARIABLES.value,
|
|
97
113
|
self.send_system_exception,
|
|
98
|
-
may_contain_user_data=False
|
|
114
|
+
may_contain_user_data=False,
|
|
99
115
|
) from exception
|
|
100
116
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
117
|
+
if socket_port:
|
|
118
|
+
self._connect_to_parent()
|
|
119
|
+
|
|
120
|
+
def _handle_exit_gracefully(
|
|
121
|
+
self,
|
|
122
|
+
signum: int,
|
|
123
|
+
frame: Optional[FrameType], # pylint: disable=unused-argument
|
|
124
|
+
) -> None:
|
|
125
|
+
job_id = self._root_job_wrapper['job']['public_id'] if self._root_job_wrapper else None
|
|
126
|
+
logger_no_user_data.debug(
|
|
127
|
+
f'_JobWorker ({job_id}) got exit signal {signal.Signals(signum).name}' # pylint: disable=no-member
|
|
128
|
+
)
|
|
105
129
|
self._received_messages_queue.put(self._STOP_HANDLE_MESSAGE_LOOP)
|
|
106
130
|
self._cleanup()
|
|
107
131
|
|
|
108
132
|
def run_handle_message_loop(self):
|
|
109
|
-
|
|
133
|
+
logger_no_user_data.debug(f'Started JobWorkerProcess {os.getpid()}')
|
|
110
134
|
while True:
|
|
111
135
|
try:
|
|
112
136
|
package = self._received_messages_queue.get()
|
|
@@ -114,30 +138,57 @@ class _JobWorker:
|
|
|
114
138
|
break
|
|
115
139
|
|
|
116
140
|
package_type = get_package_type(package)
|
|
117
|
-
if package_type == 'RsaEncryptedAesPackage':
|
|
118
|
-
encrypted_aes_key, iv, _, encrypted_data = RsaEncryptedAesPackage(package).deserialize()
|
|
119
|
-
self._aes_key_buffer = self._nsm_util.decrypt(encrypted_aes_key)
|
|
120
|
-
aes_key = AES.new(self._aes_key_buffer, AES.MODE_GCM, iv)
|
|
121
|
-
|
|
122
|
-
package = aes_key.decrypt(encrypted_data)
|
|
123
|
-
package_type = get_package_type(package)
|
|
124
|
-
|
|
125
141
|
if package_type == 'SavedJob':
|
|
126
142
|
self._handle_save_job_wrapper(package)
|
|
127
143
|
if utils.IS_RUNNING_IN_CLOUD:
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
144
|
+
job = self._root_job_wrapper['job']
|
|
145
|
+
job_uuid = job['public_id']
|
|
146
|
+
max_runtime_in_seconds = self._root_job_wrapper['cloud_job']['max_runtime_in_seconds']
|
|
147
|
+
logger_no_user_data.debug(
|
|
148
|
+
f'Job "{job_uuid}" will have max run time set to {max_runtime_in_seconds} seconds'
|
|
149
|
+
)
|
|
150
|
+
JobMaxRuntimeTimerThread(
|
|
151
|
+
job_worker=self,
|
|
152
|
+
max_runtime_in_seconds=max_runtime_in_seconds,
|
|
153
|
+
).start()
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
module_input_path = os.path.join(self.job_temporary_dir, JobStorage.module_input_file_name)
|
|
157
|
+
JobStorage.download_module_input(job=job, path=module_input_path)
|
|
158
|
+
except StorageDownloadFailed:
|
|
159
|
+
# Expect module input to be handled in a separate ModuleInput package
|
|
160
|
+
self._legacy_input_wait_timeout_thread = JobLegacyInputWaitTimeout(
|
|
161
|
+
input_max_wait_in_seconds=120,
|
|
162
|
+
job_uuid=job_uuid,
|
|
163
|
+
send_system_exception=self.send_system_exception,
|
|
164
|
+
)
|
|
165
|
+
self._legacy_input_wait_timeout_thread.start()
|
|
166
|
+
continue
|
|
167
|
+
except Exception as error:
|
|
168
|
+
raise error
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
self._run_root_job(module_input_path)
|
|
172
|
+
|
|
173
|
+
# This error occurs when trying to access the container after the job worker has cleaned it up.
|
|
174
|
+
# In that case stop the computation.
|
|
175
|
+
except DockerContainerNotFoundDuringExecutionException as err:
|
|
176
|
+
if self.is_cleaning_up:
|
|
177
|
+
break
|
|
178
|
+
else:
|
|
179
|
+
raise err
|
|
131
180
|
|
|
132
181
|
elif package_type == 'ModuleInput':
|
|
133
182
|
if not self._root_job_wrapper:
|
|
134
183
|
raise Exception('No job saved yet')
|
|
135
184
|
|
|
185
|
+
if self._legacy_input_wait_timeout_thread:
|
|
186
|
+
self._legacy_input_wait_timeout_thread.stop()
|
|
187
|
+
|
|
136
188
|
try:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
)
|
|
189
|
+
module_input_path = os.path.join(self.job_temporary_dir, JobStorage.module_input_file_name)
|
|
190
|
+
open(module_input_path, 'wb').write(package)
|
|
191
|
+
self._run_root_job(module_input_path)
|
|
141
192
|
|
|
142
193
|
# This error occurs when trying to access the container after the job worker has cleaned it up.
|
|
143
194
|
# In that case stop the computation.
|
|
@@ -147,15 +198,8 @@ class _JobWorker:
|
|
|
147
198
|
else:
|
|
148
199
|
raise err
|
|
149
200
|
|
|
150
|
-
if utils.BIOLIB_IS_RUNNING_IN_ENCLAVE:
|
|
151
|
-
module_output_to_send = self._wrap_in_aes_encrypted_package(module_output_serialized)
|
|
152
|
-
else:
|
|
153
|
-
module_output_to_send = module_output_serialized
|
|
154
|
-
|
|
155
|
-
self._messages_to_send_queue.put(module_output_to_send)
|
|
156
|
-
|
|
157
201
|
else:
|
|
158
|
-
|
|
202
|
+
logger_no_user_data.error('Package type from parent was not recognized')
|
|
159
203
|
|
|
160
204
|
self._received_messages_queue.task_done()
|
|
161
205
|
except ComputeProcessException:
|
|
@@ -163,116 +207,211 @@ class _JobWorker:
|
|
|
163
207
|
|
|
164
208
|
except Exception as exception:
|
|
165
209
|
raise ComputeProcessException(
|
|
166
|
-
exception,
|
|
167
|
-
SystemExceptionCodes.UNKOWN_COMPUTE_PROCESS_ERROR.value,
|
|
168
|
-
self.send_system_exception
|
|
210
|
+
exception, SystemExceptionCodes.UNKNOWN_COMPUTE_PROCESS_ERROR.value, self.send_system_exception
|
|
169
211
|
) from exception
|
|
170
212
|
|
|
171
213
|
def _cleanup(self) -> None:
|
|
172
214
|
self.is_cleaning_up = True
|
|
173
215
|
|
|
216
|
+
logger_no_user_data.debug('Cleaning up executers...')
|
|
217
|
+
for executor in self._executors:
|
|
218
|
+
executor.cleanup()
|
|
219
|
+
|
|
174
220
|
proxy_count = len(self._remote_host_proxies)
|
|
221
|
+
cleaned_networks = set()
|
|
222
|
+
|
|
175
223
|
if proxy_count > 0:
|
|
224
|
+
logger_no_user_data.debug('Cleaning up proxies...')
|
|
176
225
|
proxy_cleanup_start_time = time()
|
|
177
226
|
|
|
178
227
|
for proxy in self._remote_host_proxies:
|
|
179
228
|
try:
|
|
180
229
|
proxy.terminate()
|
|
181
230
|
except Exception as exception: # pylint: disable=broad-except
|
|
182
|
-
|
|
231
|
+
logger_no_user_data.error('Failed to clean up remote host proxy')
|
|
232
|
+
logger.error(exception)
|
|
233
|
+
|
|
234
|
+
for network in proxy.get_remote_host_networks():
|
|
235
|
+
try:
|
|
236
|
+
self._cleanup_network(network)
|
|
237
|
+
cleaned_networks.add(network.id)
|
|
238
|
+
except Exception as exception: # pylint: disable=broad-except
|
|
239
|
+
logger_no_user_data.error(f'Failed to clean up network {network.name}')
|
|
240
|
+
logger.error(exception)
|
|
183
241
|
|
|
184
242
|
self._remote_host_proxies = []
|
|
185
|
-
|
|
243
|
+
logger_no_user_data.debug(f'Cleaned up {proxy_count} proxies in {time() - proxy_cleanup_start_time}')
|
|
186
244
|
|
|
187
|
-
|
|
245
|
+
logger_no_user_data.debug('Cleaning up networks...')
|
|
246
|
+
if self._internal_network and self._internal_network.id not in cleaned_networks:
|
|
247
|
+
self._cleanup_network(self._internal_network)
|
|
188
248
|
self._internal_network = None
|
|
189
|
-
self._cleanup_network(self._public_network)
|
|
190
|
-
self._public_network = None
|
|
191
249
|
|
|
192
|
-
|
|
193
|
-
|
|
250
|
+
try:
|
|
251
|
+
logger_no_user_data.debug('Refilling network buffer...')
|
|
252
|
+
created = self._network_buffer.fill_buffer()
|
|
253
|
+
logger_no_user_data.debug(f'Refilled buffer with {created} new networks')
|
|
254
|
+
except Exception as exception: # pylint: disable=broad-except
|
|
255
|
+
logger_no_user_data.error('Failed to refill network buffer')
|
|
256
|
+
logger.error(exception)
|
|
257
|
+
|
|
258
|
+
logger_no_user_data.debug('Cleaned up networks...')
|
|
194
259
|
|
|
195
260
|
@staticmethod
|
|
196
261
|
def _cleanup_network(network: Optional[Network]) -> None:
|
|
197
262
|
if network:
|
|
198
263
|
network_cleanup_start_time = time()
|
|
199
|
-
network_name = network
|
|
264
|
+
network_name = network.name
|
|
200
265
|
try:
|
|
201
266
|
network.remove()
|
|
202
267
|
except Exception as exception: # pylint: disable=broad-except
|
|
203
|
-
|
|
204
|
-
|
|
268
|
+
logger_no_user_data.error(f'Failed to clean up {network_name}')
|
|
269
|
+
logger.error(exception)
|
|
270
|
+
|
|
271
|
+
logger_no_user_data.debug(f'Removed network {network_name} in {time() - network_cleanup_start_time}')
|
|
205
272
|
|
|
206
273
|
def _handle_save_job_wrapper(self, package: bytes):
|
|
207
274
|
job_wrapper_json_string = SavedJob(package).deserialize()
|
|
208
275
|
job_wrapper: JobWrapper = json.loads(job_wrapper_json_string)
|
|
209
|
-
BiolibApiClient.initialize(
|
|
210
|
-
base_url=job_wrapper['BASE_URL'],
|
|
211
|
-
access_token=job_wrapper['access_token']
|
|
212
|
-
)
|
|
276
|
+
BiolibApiClient.initialize(base_url=job_wrapper['BASE_URL'], access_token=job_wrapper['access_token'])
|
|
213
277
|
self._root_job_wrapper = job_wrapper
|
|
278
|
+
if not utils.IS_RUNNING_IN_CLOUD:
|
|
279
|
+
job_wrapper['cloud_job'] = None
|
|
280
|
+
|
|
281
|
+
self.job_temporary_dir = job_wrapper['job_temporary_dir']
|
|
282
|
+
|
|
214
283
|
job = job_wrapper['job']
|
|
215
284
|
self._jobs[job['public_id']] = job
|
|
216
285
|
|
|
286
|
+
app_version = job['app_version']
|
|
287
|
+
modules = app_version.get('modules', [])
|
|
288
|
+
for module in modules:
|
|
289
|
+
module_ports = module.get('ports', [])
|
|
290
|
+
if module_ports:
|
|
291
|
+
logger_no_user_data.debug(
|
|
292
|
+
f"Job '{job['public_id']}' module '{module['name']}' has ports: {module_ports}"
|
|
293
|
+
)
|
|
294
|
+
|
|
217
295
|
if job['app_version'].get('modules') is not None and BiolibDockerClient.is_docker_running():
|
|
218
|
-
self.
|
|
296
|
+
self._start_network_and_remote_host_proxies(job)
|
|
219
297
|
|
|
220
298
|
# TODO: start downloading runtime zip already at this point
|
|
221
299
|
|
|
222
|
-
def
|
|
300
|
+
def _start_network_and_remote_host_proxies(self, job: CreatedJobDict) -> None:
|
|
301
|
+
app_version = job['app_version']
|
|
223
302
|
job_id = job['public_id']
|
|
224
|
-
remote_hosts =
|
|
303
|
+
remote_hosts = app_version['remote_hosts']
|
|
304
|
+
docker_client = BiolibDockerClient.get_docker_client()
|
|
305
|
+
try:
|
|
306
|
+
name_hash = int(hashlib.sha256(job_id.encode()).hexdigest(), 16)
|
|
307
|
+
third_octet = name_hash % 256
|
|
308
|
+
internal_subnet = f'172.29.{third_octet}.0/24'
|
|
309
|
+
|
|
310
|
+
ipam_pool = IPAMPool(subnet=internal_subnet)
|
|
311
|
+
ipam_config = IPAMConfig(pool_configs=[ipam_pool])
|
|
312
|
+
|
|
313
|
+
self._internal_network = docker_client.networks.create(
|
|
314
|
+
name=f'biolib-sandboxed-network-{job_id}',
|
|
315
|
+
internal=True,
|
|
316
|
+
driver='bridge',
|
|
317
|
+
ipam=ipam_config,
|
|
318
|
+
)
|
|
319
|
+
logger_no_user_data.debug(f'Created internal network for job {job_id} with subnet {internal_subnet}')
|
|
320
|
+
except Exception as exception:
|
|
321
|
+
raise ComputeProcessException(
|
|
322
|
+
exception,
|
|
323
|
+
SystemExceptionCodes.FAILED_TO_CREATE_DOCKER_NETWORKS.value,
|
|
324
|
+
self.send_system_exception,
|
|
325
|
+
may_contain_user_data=False,
|
|
326
|
+
) from exception
|
|
225
327
|
|
|
226
328
|
if len(remote_hosts) > 0:
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
try:
|
|
230
|
-
self._internal_network = docker_client.networks.create(
|
|
231
|
-
name=f'biolib-sandboxed-network-{job_id}',
|
|
232
|
-
internal=True,
|
|
233
|
-
driver='bridge',
|
|
234
|
-
)
|
|
235
|
-
self._public_network = docker_client.networks.create(
|
|
236
|
-
name=f'biolib-proxy-network-{job_id}',
|
|
237
|
-
internal=False,
|
|
238
|
-
driver='bridge',
|
|
239
|
-
)
|
|
240
|
-
except Exception as exception:
|
|
241
|
-
raise ComputeProcessException(
|
|
242
|
-
exception,
|
|
243
|
-
SystemExceptionCodes.FAILED_TO_CREATE_DOCKER_NETWORKS.value,
|
|
244
|
-
self.send_system_exception,
|
|
245
|
-
may_contain_user_data=False
|
|
246
|
-
) from exception
|
|
247
|
-
logger.debug(f'Starting remote host proxies for job: {job_id}')
|
|
329
|
+
logger_no_user_data.debug(f'Job "{job_id}" starting proxy for remote hosts: {remote_hosts}')
|
|
330
|
+
created_networks: List[Network] = []
|
|
248
331
|
try:
|
|
332
|
+
hostname_to_ports: Dict[str, List[int]] = {}
|
|
249
333
|
for remote_host in remote_hosts:
|
|
334
|
+
if ':' in remote_host['hostname']:
|
|
335
|
+
hostname, port_str = remote_host['hostname'].split(':')
|
|
336
|
+
port = int(port_str)
|
|
337
|
+
else:
|
|
338
|
+
port = 443
|
|
339
|
+
hostname = remote_host['hostname']
|
|
340
|
+
|
|
341
|
+
if hostname in hostname_to_ports:
|
|
342
|
+
hostname_to_ports[hostname].append(port)
|
|
343
|
+
else:
|
|
344
|
+
hostname_to_ports[hostname] = [port]
|
|
345
|
+
|
|
346
|
+
remote_host_mappings: List[RemoteHostMapping] = []
|
|
347
|
+
networks = self._network_buffer.allocate_networks(job_id, len(hostname_to_ports))
|
|
348
|
+
created_networks.extend(networks)
|
|
349
|
+
|
|
350
|
+
for (hostname, ports), network in zip(hostname_to_ports.items(), networks):
|
|
351
|
+
static_ip = get_static_ip_from_network(network, offset=2)
|
|
352
|
+
|
|
353
|
+
mapping = RemoteHostMapping(
|
|
354
|
+
hostname=hostname,
|
|
355
|
+
ports=ports,
|
|
356
|
+
network=network,
|
|
357
|
+
static_ip=static_ip,
|
|
358
|
+
)
|
|
359
|
+
remote_host_mappings.append(mapping)
|
|
360
|
+
|
|
361
|
+
if remote_host_mappings:
|
|
250
362
|
remote_host_proxy = RemoteHostProxy(
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
job_id,
|
|
363
|
+
remote_host_mappings=remote_host_mappings,
|
|
364
|
+
job=job,
|
|
365
|
+
app_caller_network=None,
|
|
255
366
|
)
|
|
256
367
|
remote_host_proxy.start()
|
|
257
368
|
self._remote_host_proxies.append(remote_host_proxy)
|
|
369
|
+
num_hosts = len(remote_host_mappings)
|
|
370
|
+
logger_no_user_data.debug(f'Started single proxy container for {num_hosts} remote hosts')
|
|
258
371
|
|
|
259
372
|
except Exception as exception:
|
|
373
|
+
for network in created_networks:
|
|
374
|
+
self._cleanup_network(network)
|
|
375
|
+
|
|
260
376
|
raise ComputeProcessException(
|
|
261
377
|
exception,
|
|
262
378
|
SystemExceptionCodes.FAILED_TO_START_REMOTE_HOST_PROXIES.value,
|
|
263
379
|
self.send_system_exception,
|
|
264
|
-
may_contain_user_data=False
|
|
380
|
+
may_contain_user_data=False,
|
|
265
381
|
) from exception
|
|
266
382
|
|
|
267
|
-
|
|
383
|
+
if utils.IS_RUNNING_IN_CLOUD:
|
|
384
|
+
try:
|
|
385
|
+
app_caller_proxy = RemoteHostProxy(
|
|
386
|
+
remote_host_mappings=[],
|
|
387
|
+
job=job,
|
|
388
|
+
app_caller_network=self._internal_network,
|
|
389
|
+
)
|
|
390
|
+
app_caller_proxy.start()
|
|
391
|
+
self._remote_host_proxies.append(app_caller_proxy)
|
|
392
|
+
logger_no_user_data.debug('Started app caller proxy')
|
|
393
|
+
except Exception as exception:
|
|
394
|
+
raise ComputeProcessException(
|
|
395
|
+
exception,
|
|
396
|
+
SystemExceptionCodes.FAILED_TO_START_REMOTE_HOST_PROXIES.value,
|
|
397
|
+
self.send_system_exception,
|
|
398
|
+
may_contain_user_data=False,
|
|
399
|
+
) from exception
|
|
268
400
|
|
|
269
|
-
def _run_app_version(
|
|
270
|
-
|
|
401
|
+
def _run_app_version(
|
|
402
|
+
self,
|
|
403
|
+
app_version_id: str,
|
|
404
|
+
module_input_path: str,
|
|
405
|
+
caller_job: CreatedJobDict,
|
|
406
|
+
main_module_output_path: str,
|
|
407
|
+
) -> None:
|
|
408
|
+
job: CreatedJobDict = BiolibJobApi.create(app_version_id, caller_job=caller_job['public_id'])
|
|
271
409
|
self._jobs[job['public_id']] = job
|
|
272
|
-
|
|
410
|
+
self._run_job(job, module_input_path, main_module_output_path)
|
|
273
411
|
|
|
274
|
-
def _run_job(self, job:
|
|
275
|
-
|
|
412
|
+
def _run_job(self, job: CreatedJobDict, module_input_path: str, main_module_output_path: str) -> None:
|
|
413
|
+
job_uuid = job['public_id']
|
|
414
|
+
logger_no_user_data.info(f'Job "{job_uuid}" running...')
|
|
276
415
|
if self._root_job_wrapper is None:
|
|
277
416
|
raise Exception('root_job_wrapper was None')
|
|
278
417
|
|
|
@@ -281,89 +420,125 @@ class _JobWorker:
|
|
|
281
420
|
root_job = self._jobs[root_job['caller_job']]
|
|
282
421
|
|
|
283
422
|
root_job_id = root_job['public_id']
|
|
423
|
+
if job.get('arguments_override_command') and not job['app_version']['app']['allow_client_side_execution']:
|
|
424
|
+
raise ComputeProcessException(
|
|
425
|
+
Exception('Command override not allowed'),
|
|
426
|
+
SystemExceptionCodes.COMMAND_OVERRIDE_NOT_ALLOWED.value,
|
|
427
|
+
self.send_system_exception,
|
|
428
|
+
)
|
|
284
429
|
|
|
285
430
|
modules = job['app_version'].get('modules')
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
job=job,
|
|
292
|
-
root_job_id=root_job_id,
|
|
293
|
-
),
|
|
294
|
-
module_input_serialized,
|
|
431
|
+
if not modules:
|
|
432
|
+
raise ComputeProcessException(
|
|
433
|
+
Exception('No modules found on job'),
|
|
434
|
+
SystemExceptionCodes.NO_MODULES_FOUND_ON_JOB.value,
|
|
435
|
+
self.send_system_exception,
|
|
295
436
|
)
|
|
296
437
|
|
|
297
438
|
main_module = self._get_module_from_name(modules, module_name='main')
|
|
298
439
|
|
|
299
|
-
|
|
300
|
-
|
|
440
|
+
source_files_are_mapped = False
|
|
301
441
|
lfs_dict: Dict[str, LargeFileSystem] = {}
|
|
302
442
|
for module in modules:
|
|
443
|
+
if len(module['source_files_mappings']) > 0:
|
|
444
|
+
source_files_are_mapped = True
|
|
445
|
+
|
|
303
446
|
for lfs_mapping in module['large_file_systems']:
|
|
447
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" creating LFS for module "{module["name"]}"...')
|
|
304
448
|
lfs = LargeFileSystem(
|
|
305
449
|
job_id=job['public_id'],
|
|
306
450
|
lfs_mapping=lfs_mapping,
|
|
307
451
|
send_status_update=self._send_status_update,
|
|
308
452
|
)
|
|
453
|
+
logger_no_user_data.debug(f'Job "{job_uuid}" created object for LFS "{lfs.uuid}"')
|
|
454
|
+
|
|
309
455
|
lfs.initialize()
|
|
310
456
|
lfs_dict[lfs.uuid] = lfs
|
|
311
457
|
|
|
312
|
-
|
|
458
|
+
runtime_zip_bytes: Optional[bytes] = None
|
|
459
|
+
if source_files_are_mapped:
|
|
460
|
+
runtime_zip_bytes = self._get_runtime_zip_as_bytes(root_job_id=root_job_id, app_version=job['app_version'])
|
|
461
|
+
|
|
462
|
+
self._run_module(
|
|
313
463
|
LocalExecutorOptions(
|
|
314
464
|
access_token=self._root_job_wrapper['access_token'],
|
|
315
465
|
biolib_base_url=self._root_job_wrapper['BASE_URL'],
|
|
316
466
|
compute_node_info=self._root_job_wrapper.get('compute_node_info'),
|
|
317
467
|
internal_network=self._internal_network,
|
|
318
468
|
job=job,
|
|
469
|
+
cloud_job=self._root_job_wrapper['cloud_job'],
|
|
319
470
|
large_file_systems=lfs_dict,
|
|
320
471
|
module=main_module,
|
|
472
|
+
module_input_path=module_input_path,
|
|
473
|
+
module_output_path=main_module_output_path,
|
|
321
474
|
remote_host_proxies=self._remote_host_proxies,
|
|
322
475
|
root_job_id=root_job_id,
|
|
323
|
-
runtime_zip_bytes=
|
|
476
|
+
runtime_zip_bytes=runtime_zip_bytes,
|
|
324
477
|
send_status_update=self._send_status_update,
|
|
325
478
|
send_system_exception=self.send_system_exception,
|
|
326
|
-
|
|
327
|
-
|
|
479
|
+
send_stdout_and_stderr=self.send_stdout_and_stderr,
|
|
480
|
+
)
|
|
328
481
|
)
|
|
329
482
|
|
|
330
|
-
for lfs in lfs_dict.values():
|
|
331
|
-
lfs.detach()
|
|
332
|
-
|
|
333
483
|
if utils.IS_RUNNING_IN_CLOUD:
|
|
334
484
|
# Log memory and disk after pulling and executing module
|
|
335
485
|
log_disk_and_memory_usage_info()
|
|
336
486
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
487
|
+
def _run_module(
|
|
488
|
+
self,
|
|
489
|
+
options: LocalExecutorOptions,
|
|
490
|
+
) -> None:
|
|
340
491
|
module = options['module']
|
|
341
|
-
|
|
492
|
+
job_id = options['job']['public_id']
|
|
493
|
+
module_output_path = options['module_output_path']
|
|
494
|
+
module_input_path = options['module_input_path']
|
|
495
|
+
logger_no_user_data.debug(f'Job "{job_id}" running module "{module["name"]}"...')
|
|
342
496
|
|
|
497
|
+
executor_instance: DockerExecutor
|
|
343
498
|
if module['environment'] == ModuleEnvironment.BIOLIB_APP.value:
|
|
499
|
+
if not self.job_temporary_dir:
|
|
500
|
+
raise BioLibError('Undefined job_temporary_dir')
|
|
501
|
+
logger_no_user_data.debug(f'Job "{job_id}" starting child job...')
|
|
502
|
+
with open(module_input_path, 'rb') as fp:
|
|
503
|
+
module_input_serialized = fp.read()
|
|
344
504
|
module_input = ModuleInput(module_input_serialized).deserialize()
|
|
345
505
|
module_input_with_runtime_zip = self._add_runtime_zip_and_command_to_module_input(options, module_input)
|
|
346
506
|
module_input_with_runtime_zip_serialized = ModuleInput().serialize(
|
|
347
507
|
stdin=module_input_with_runtime_zip['stdin'],
|
|
348
508
|
arguments=module_input_with_runtime_zip['arguments'],
|
|
349
|
-
files=module_input_with_runtime_zip['files']
|
|
509
|
+
files=module_input_with_runtime_zip['files'],
|
|
510
|
+
)
|
|
511
|
+
module_input_path_new = os.path.join(self.job_temporary_dir, 'runtime.' + JobStorage.module_input_file_name)
|
|
512
|
+
open(module_input_path_new, 'wb').write(module_input_with_runtime_zip_serialized)
|
|
513
|
+
return self._run_app_version(
|
|
514
|
+
module['image_uri'],
|
|
515
|
+
module_input_path_new,
|
|
516
|
+
options['job'],
|
|
517
|
+
module_output_path,
|
|
350
518
|
)
|
|
351
|
-
return self._run_app_version(module['image_uri'], module_input_with_runtime_zip_serialized, options['job'])
|
|
352
|
-
|
|
353
|
-
elif module['environment'] == ModuleEnvironment.BIOLIB_ECR.value:
|
|
354
|
-
executor_instance = DockerExecutor(options)
|
|
355
519
|
|
|
356
|
-
elif module['environment'] == ModuleEnvironment.
|
|
357
|
-
|
|
520
|
+
elif module['environment'] == ModuleEnvironment.BIOLIB_ECR.value and BiolibDockerClient.is_docker_running():
|
|
521
|
+
try:
|
|
522
|
+
executor_instance = DockerExecutor(options)
|
|
523
|
+
except Exception as exception:
|
|
524
|
+
raise ComputeProcessException(
|
|
525
|
+
exception,
|
|
526
|
+
SystemExceptionCodes.FAILED_TO_INITIALIZE_DOCKER_EXECUTOR.value,
|
|
527
|
+
self.send_system_exception,
|
|
528
|
+
may_contain_user_data=False,
|
|
529
|
+
) from exception
|
|
358
530
|
else:
|
|
359
|
-
|
|
531
|
+
err_string = f'Job "{job_id}" hit unsupported module environment "{module["environment"]}"'
|
|
532
|
+
logger_no_user_data.error(err_string)
|
|
533
|
+
raise Exception(err_string)
|
|
534
|
+
|
|
360
535
|
self._executors.append(executor_instance)
|
|
361
536
|
|
|
362
537
|
if utils.IS_RUNNING_IN_CLOUD:
|
|
363
538
|
# Log memory and disk before pulling and executing module
|
|
364
539
|
log_disk_and_memory_usage_info()
|
|
365
540
|
|
|
366
|
-
|
|
541
|
+
executor_instance.execute_module()
|
|
367
542
|
|
|
368
543
|
def _connect_to_parent(self):
|
|
369
544
|
try:
|
|
@@ -375,7 +550,7 @@ class _JobWorker:
|
|
|
375
550
|
exception,
|
|
376
551
|
SystemExceptionCodes.FAILED_TO_CONNECT_TO_WORKER_THREAD_SOCKET.value,
|
|
377
552
|
self.send_system_exception,
|
|
378
|
-
may_contain_user_data=False
|
|
553
|
+
may_contain_user_data=False,
|
|
379
554
|
) from exception
|
|
380
555
|
|
|
381
556
|
try:
|
|
@@ -386,24 +561,9 @@ class _JobWorker:
|
|
|
386
561
|
exception,
|
|
387
562
|
SystemExceptionCodes.FAILED_TO_START_SENDER_THREAD_OR_RECEIVER_THREAD.value,
|
|
388
563
|
self.send_system_exception,
|
|
389
|
-
may_contain_user_data=False
|
|
564
|
+
may_contain_user_data=False,
|
|
390
565
|
) from exception
|
|
391
566
|
|
|
392
|
-
try:
|
|
393
|
-
if utils.BIOLIB_IS_RUNNING_IN_ENCLAVE:
|
|
394
|
-
attestation_document = self._nsm_util.get_attestation_doc()
|
|
395
|
-
else:
|
|
396
|
-
attestation_document = b'Running locally'
|
|
397
|
-
except Exception as exception:
|
|
398
|
-
raise ComputeProcessException(
|
|
399
|
-
exception,
|
|
400
|
-
SystemExceptionCodes.FAILED_TO_GET_ATTESTATION_DOCUMENT.value,
|
|
401
|
-
self.send_system_exception,
|
|
402
|
-
may_contain_user_data=False
|
|
403
|
-
) from exception
|
|
404
|
-
|
|
405
|
-
self._messages_to_send_queue.put(AttestationDocument().serialize(attestation_document))
|
|
406
|
-
|
|
407
567
|
# TODO: move this mapping logic to the ModuleInput class
|
|
408
568
|
def _add_runtime_zip_and_command_to_module_input(self, options: LocalExecutorOptions, module_input):
|
|
409
569
|
module = options['module']
|
|
@@ -429,7 +589,7 @@ class _JobWorker:
|
|
|
429
589
|
exception,
|
|
430
590
|
SystemExceptionCodes.FAILED_TO_CREATE_NEW_JOB.value,
|
|
431
591
|
self.send_system_exception,
|
|
432
|
-
may_contain_user_data=False
|
|
592
|
+
may_contain_user_data=False,
|
|
433
593
|
) from exception
|
|
434
594
|
|
|
435
595
|
return module_input
|
|
@@ -444,26 +604,22 @@ class _JobWorker:
|
|
|
444
604
|
runtime_zip_bytes: Optional[bytes] = self._app_version_id_to_runtime_zip.get(app_version['public_id'])
|
|
445
605
|
|
|
446
606
|
if runtime_zip_bytes is None:
|
|
447
|
-
|
|
448
|
-
with open(runtime_zip_url, mode='rb') as runtime_zip_file:
|
|
449
|
-
runtime_zip_bytes = runtime_zip_file.read()
|
|
450
|
-
|
|
451
|
-
else:
|
|
452
|
-
self._send_status_update(StatusUpdate(progress=25, log_message='Downloading Source Files...'))
|
|
607
|
+
self._send_status_update(StatusUpdate(progress=25, log_message='Downloading Source Files...'))
|
|
453
608
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
609
|
+
start_time = time()
|
|
610
|
+
logger_no_user_data.debug(f'Job "{root_job_id}" downloading runtime zip...')
|
|
611
|
+
try:
|
|
612
|
+
runtime_zip_bytes = HttpClient.request(url=runtime_zip_url).content
|
|
613
|
+
except Exception as exception:
|
|
614
|
+
raise ComputeProcessException(
|
|
615
|
+
exception,
|
|
616
|
+
SystemExceptionCodes.FAILED_TO_DOWNLOAD_RUNTIME_ZIP.value,
|
|
617
|
+
self.send_system_exception,
|
|
618
|
+
may_contain_user_data=False,
|
|
619
|
+
) from exception
|
|
620
|
+
finally:
|
|
621
|
+
download_time = time() - start_time
|
|
622
|
+
logger_no_user_data.debug(f'Job "{root_job_id}" download of runtime zip took: {download_time}s')
|
|
467
623
|
|
|
468
624
|
self._app_version_id_to_runtime_zip[app_version['public_id']] = runtime_zip_bytes
|
|
469
625
|
|
|
@@ -476,31 +632,24 @@ class _JobWorker:
|
|
|
476
632
|
return module
|
|
477
633
|
raise Exception(f'Could not find module with name {module_name}')
|
|
478
634
|
|
|
479
|
-
def
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
encrypted_package, tag = aes_key.encrypt_and_digest(package)
|
|
483
|
-
aes_encrypted_package = AesEncryptedPackage().serialize(iv, tag, encrypted_package)
|
|
484
|
-
return aes_encrypted_package
|
|
485
|
-
|
|
486
|
-
def send_system_exception(self, biolib_exception_code: SystemExceptionCodes) -> None:
|
|
487
|
-
if utils.BIOLIB_IS_RUNNING_IN_ENCLAVE:
|
|
488
|
-
CloudUtils.log(
|
|
489
|
-
log_message=str(biolib_exception_code), # TODO: Switch back to logging full error message
|
|
490
|
-
level=logging.ERROR
|
|
491
|
-
)
|
|
635
|
+
def send_system_exception(self, biolib_exception_code: int) -> None:
|
|
636
|
+
system_exception_string = SystemExceptionCodeMap.get(biolib_exception_code)
|
|
637
|
+
logger_no_user_data.error(f'Hit system exception: {system_exception_string} ({biolib_exception_code})')
|
|
492
638
|
|
|
493
639
|
system_exception_package = SystemException().serialize(biolib_exception_code)
|
|
494
640
|
self._messages_to_send_queue.put(system_exception_package)
|
|
495
641
|
|
|
642
|
+
def send_stdout_and_stderr(self, stdout_and_stderr_bytes: bytes) -> None:
|
|
643
|
+
if utils.IS_RUNNING_IN_CLOUD:
|
|
644
|
+
stdout_and_stderr_package = StdoutAndStderr().serialize(stdout_and_stderr_bytes=stdout_and_stderr_bytes)
|
|
645
|
+
self._messages_to_send_queue.put(stdout_and_stderr_package)
|
|
646
|
+
else:
|
|
647
|
+
sys.stdout.write(stdout_and_stderr_bytes.decode())
|
|
648
|
+
if not utils.IS_RUNNING_IN_NOTEBOOK: # for some reason flushing in jupyter notebooks breaks \r handling
|
|
649
|
+
sys.stdout.flush()
|
|
650
|
+
|
|
496
651
|
def _send_status_update(self, status_update: StatusUpdate) -> None:
|
|
497
652
|
try:
|
|
498
|
-
if utils.BIOLIB_IS_RUNNING_IN_ENCLAVE:
|
|
499
|
-
CloudUtils.log(
|
|
500
|
-
log_message=status_update['log_message'],
|
|
501
|
-
level=logging.INFO
|
|
502
|
-
)
|
|
503
|
-
|
|
504
653
|
status_update_package = SystemStatusUpdate().serialize(
|
|
505
654
|
status_update['progress'],
|
|
506
655
|
status_update['log_message'],
|
|
@@ -512,5 +661,41 @@ class _JobWorker:
|
|
|
512
661
|
exception,
|
|
513
662
|
SystemExceptionCodes.FAILED_TO_SEND_STATUS_UPDATE.value,
|
|
514
663
|
self.send_system_exception,
|
|
515
|
-
may_contain_user_data=False
|
|
664
|
+
may_contain_user_data=False,
|
|
516
665
|
) from exception
|
|
666
|
+
|
|
667
|
+
def _run_root_job(self, module_input_path: str) -> str:
|
|
668
|
+
# Make typechecker happy
|
|
669
|
+
if not self._root_job_wrapper or not self.job_temporary_dir:
|
|
670
|
+
raise BioLibError('Undefined job_wrapper or job_temporary_dir')
|
|
671
|
+
|
|
672
|
+
main_module_output_path = os.path.join(self.job_temporary_dir, JobStorage.module_output_file_name)
|
|
673
|
+
self._run_job(
|
|
674
|
+
job=self._root_job_wrapper['job'],
|
|
675
|
+
module_input_path=module_input_path,
|
|
676
|
+
main_module_output_path=main_module_output_path,
|
|
677
|
+
)
|
|
678
|
+
self._send_status_update(StatusUpdate(progress=94, log_message='Computation finished'))
|
|
679
|
+
return main_module_output_path
|
|
680
|
+
|
|
681
|
+
def run_job_locally(self, job_dict: CreatedJobDict, module_input_serialized: bytes) -> ModuleOutputV2:
|
|
682
|
+
try:
|
|
683
|
+
with tempfile.TemporaryDirectory() as job_temporary_dir:
|
|
684
|
+
self.job_temporary_dir = job_temporary_dir
|
|
685
|
+
self._root_job_wrapper = JobWrapper(
|
|
686
|
+
access_token=BiolibApiClient.get().access_token or '',
|
|
687
|
+
BASE_URL=BiolibApiClient.get().base_url,
|
|
688
|
+
cloud_job=None,
|
|
689
|
+
compute_node_info=None,
|
|
690
|
+
job=job_dict,
|
|
691
|
+
job_temporary_dir=job_temporary_dir,
|
|
692
|
+
)
|
|
693
|
+
self._start_network_and_remote_host_proxies(job_dict)
|
|
694
|
+
module_input_path = os.path.join(self.job_temporary_dir, JobStorage.module_input_file_name)
|
|
695
|
+
open(module_input_path, 'wb').write(module_input_serialized)
|
|
696
|
+
module_output_path = self._run_root_job(module_input_path)
|
|
697
|
+
with open(module_output_path, mode='rb') as module_output_file:
|
|
698
|
+
module_output_serialized = module_output_file.read()
|
|
699
|
+
return ModuleOutputV2(InMemoryIndexableBuffer(module_output_serialized))
|
|
700
|
+
finally:
|
|
701
|
+
self._cleanup()
|