pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +357 -11
- biolib/_data_record/data_record.py +380 -0
- biolib/_index/__init__.py +0 -0
- biolib/_index/index.py +55 -0
- biolib/_index/query_result.py +103 -0
- biolib/_internal/__init__.py +0 -0
- biolib/_internal/add_copilot_prompts.py +58 -0
- biolib/_internal/add_gui_files.py +81 -0
- biolib/_internal/data_record/__init__.py +1 -0
- biolib/_internal/data_record/data_record.py +85 -0
- biolib/_internal/data_record/push_data.py +116 -0
- biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
- biolib/_internal/errors.py +5 -0
- biolib/_internal/file_utils.py +125 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +159 -0
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/lfs/cache.py +51 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +488 -0
- biolib/_internal/runtime.py +22 -0
- biolib/_internal/string_utils.py +13 -0
- biolib/_internal/templates/__init__.py +1 -0
- biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
- biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
- biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
- biolib/_internal/templates/gitignore_template/.gitignore +10 -0
- biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
- biolib/_internal/templates/gui_template/App.tsx +53 -0
- biolib/_internal/templates/gui_template/Dockerfile +27 -0
- biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
- biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
- biolib/_internal/templates/gui_template/index.css +5 -0
- biolib/_internal/templates/gui_template/index.html +13 -0
- biolib/_internal/templates/gui_template/index.tsx +10 -0
- biolib/_internal/templates/gui_template/package.json +27 -0
- biolib/_internal/templates/gui_template/tsconfig.json +24 -0
- biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
- biolib/_internal/templates/gui_template/vite.config.mts +10 -0
- biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
- biolib/_internal/templates/init_template/Dockerfile +14 -0
- biolib/_internal/templates/init_template/requirements.txt +1 -0
- biolib/_internal/templates/init_template/run.py +12 -0
- biolib/_internal/templates/init_template/run.sh +4 -0
- biolib/_internal/templates/templates.py +25 -0
- biolib/_internal/tree_utils.py +106 -0
- biolib/_internal/utils/__init__.py +65 -0
- biolib/_internal/utils/auth.py +46 -0
- biolib/_internal/utils/job_url.py +33 -0
- biolib/_internal/utils/multinode.py +263 -0
- biolib/_runtime/runtime.py +157 -0
- biolib/_session/session.py +44 -0
- biolib/_shared/__init__.py +0 -0
- biolib/_shared/types/__init__.py +74 -0
- biolib/_shared/types/account.py +12 -0
- biolib/_shared/types/account_member.py +8 -0
- biolib/_shared/types/app.py +9 -0
- biolib/_shared/types/data_record.py +40 -0
- biolib/_shared/types/experiment.py +32 -0
- biolib/_shared/types/file_node.py +17 -0
- biolib/_shared/types/push.py +6 -0
- biolib/_shared/types/resource.py +37 -0
- biolib/_shared/types/resource_deploy_key.py +11 -0
- biolib/_shared/types/resource_permission.py +14 -0
- biolib/_shared/types/resource_version.py +19 -0
- biolib/_shared/types/result.py +14 -0
- biolib/_shared/types/typing.py +10 -0
- biolib/_shared/types/user.py +19 -0
- biolib/_shared/utils/__init__.py +7 -0
- biolib/_shared/utils/resource_uri.py +75 -0
- biolib/api/__init__.py +6 -0
- biolib/api/client.py +168 -0
- biolib/app/app.py +252 -49
- biolib/app/search_apps.py +45 -0
- biolib/biolib_api_client/api_client.py +126 -31
- biolib/biolib_api_client/app_types.py +24 -4
- biolib/biolib_api_client/auth.py +31 -8
- biolib/biolib_api_client/biolib_app_api.py +147 -52
- biolib/biolib_api_client/biolib_job_api.py +161 -141
- biolib/biolib_api_client/job_types.py +21 -5
- biolib/biolib_api_client/lfs_types.py +7 -23
- biolib/biolib_api_client/user_state.py +56 -0
- biolib/biolib_binary_format/__init__.py +1 -4
- biolib/biolib_binary_format/file_in_container.py +105 -0
- biolib/biolib_binary_format/module_input.py +24 -7
- biolib/biolib_binary_format/module_output_v2.py +149 -0
- biolib/biolib_binary_format/remote_endpoints.py +34 -0
- biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
- biolib/biolib_binary_format/saved_job.py +3 -2
- biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
- biolib/biolib_binary_format/system_status_update.py +3 -2
- biolib/biolib_binary_format/utils.py +175 -0
- biolib/biolib_docker_client/__init__.py +11 -2
- biolib/biolib_errors.py +36 -0
- biolib/biolib_logging.py +27 -10
- biolib/cli/__init__.py +38 -0
- biolib/cli/auth.py +46 -0
- biolib/cli/data_record.py +164 -0
- biolib/cli/index.py +32 -0
- biolib/cli/init.py +421 -0
- biolib/cli/lfs.py +101 -0
- biolib/cli/push.py +50 -0
- biolib/cli/run.py +63 -0
- biolib/cli/runtime.py +14 -0
- biolib/cli/sdk.py +16 -0
- biolib/cli/start.py +56 -0
- biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
- biolib/compute_node/job_worker/cache_state.py +66 -88
- biolib/compute_node/job_worker/cache_types.py +1 -6
- biolib/compute_node/job_worker/docker_image_cache.py +112 -37
- biolib/compute_node/job_worker/executors/__init__.py +0 -3
- biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
- biolib/compute_node/job_worker/executors/docker_types.py +9 -1
- biolib/compute_node/job_worker/executors/types.py +19 -9
- biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
- biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
- biolib/compute_node/job_worker/job_storage.py +108 -0
- biolib/compute_node/job_worker/job_worker.py +397 -212
- biolib/compute_node/job_worker/large_file_system.py +87 -38
- biolib/compute_node/job_worker/network_alloc.py +99 -0
- biolib/compute_node/job_worker/network_buffer.py +240 -0
- biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
- biolib/compute_node/job_worker/utils.py +9 -24
- biolib/compute_node/remote_host_proxy.py +400 -98
- biolib/compute_node/utils.py +31 -9
- biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
- biolib/compute_node/webserver/proxy_utils.py +28 -0
- biolib/compute_node/webserver/webserver.py +130 -44
- biolib/compute_node/webserver/webserver_types.py +2 -6
- biolib/compute_node/webserver/webserver_utils.py +77 -12
- biolib/compute_node/webserver/worker_thread.py +183 -42
- biolib/experiments/__init__.py +0 -0
- biolib/experiments/experiment.py +356 -0
- biolib/jobs/__init__.py +1 -0
- biolib/jobs/job.py +741 -0
- biolib/jobs/job_result.py +185 -0
- biolib/jobs/types.py +50 -0
- biolib/py.typed +0 -0
- biolib/runtime/__init__.py +14 -0
- biolib/sdk/__init__.py +91 -0
- biolib/tables.py +34 -0
- biolib/typing_utils.py +2 -7
- biolib/user/__init__.py +1 -0
- biolib/user/sign_in.py +54 -0
- biolib/utils/__init__.py +162 -0
- biolib/utils/cache_state.py +94 -0
- biolib/utils/multipart_uploader.py +194 -0
- biolib/utils/seq_util.py +150 -0
- biolib/utils/zip/remote_zip.py +640 -0
- pybiolib-1.2.1890.dist-info/METADATA +41 -0
- pybiolib-1.2.1890.dist-info/RECORD +177 -0
- {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
- pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
- README.md +0 -17
- biolib/app/app_result.py +0 -68
- biolib/app/utils.py +0 -62
- biolib/biolib-js/0-biolib.worker.js +0 -1
- biolib/biolib-js/1-biolib.worker.js +0 -1
- biolib/biolib-js/2-biolib.worker.js +0 -1
- biolib/biolib-js/3-biolib.worker.js +0 -1
- biolib/biolib-js/4-biolib.worker.js +0 -1
- biolib/biolib-js/5-biolib.worker.js +0 -1
- biolib/biolib-js/6-biolib.worker.js +0 -1
- biolib/biolib-js/index.html +0 -10
- biolib/biolib-js/main-biolib.js +0 -1
- biolib/biolib_api_client/biolib_account_api.py +0 -21
- biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
- biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
- biolib/biolib_binary_format/module_output.py +0 -58
- biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
- biolib/biolib_push.py +0 -114
- biolib/cli.py +0 -203
- biolib/cli_utils.py +0 -273
- biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
- biolib/compute_node/enclave/__init__.py +0 -2
- biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
- biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
- biolib/compute_node/job_worker/executors/base_executor.py +0 -18
- biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
- biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
- biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
- biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
- biolib/lfs.py +0 -196
- biolib/pyppeteer/.circleci/config.yml +0 -100
- biolib/pyppeteer/.coveragerc +0 -3
- biolib/pyppeteer/.gitignore +0 -89
- biolib/pyppeteer/.pre-commit-config.yaml +0 -28
- biolib/pyppeteer/CHANGES.md +0 -253
- biolib/pyppeteer/CONTRIBUTING.md +0 -26
- biolib/pyppeteer/LICENSE +0 -12
- biolib/pyppeteer/README.md +0 -137
- biolib/pyppeteer/docs/Makefile +0 -177
- biolib/pyppeteer/docs/_static/custom.css +0 -28
- biolib/pyppeteer/docs/_templates/layout.html +0 -10
- biolib/pyppeteer/docs/changes.md +0 -1
- biolib/pyppeteer/docs/conf.py +0 -299
- biolib/pyppeteer/docs/index.md +0 -21
- biolib/pyppeteer/docs/make.bat +0 -242
- biolib/pyppeteer/docs/reference.md +0 -211
- biolib/pyppeteer/docs/server.py +0 -60
- biolib/pyppeteer/poetry.lock +0 -1699
- biolib/pyppeteer/pyppeteer/__init__.py +0 -135
- biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
- biolib/pyppeteer/pyppeteer/browser.py +0 -401
- biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
- biolib/pyppeteer/pyppeteer/command.py +0 -22
- biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
- biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
- biolib/pyppeteer/pyppeteer/coverage.py +0 -346
- biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
- biolib/pyppeteer/pyppeteer/dialog.py +0 -79
- biolib/pyppeteer/pyppeteer/domworld.py +0 -597
- biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
- biolib/pyppeteer/pyppeteer/errors.py +0 -48
- biolib/pyppeteer/pyppeteer/events.py +0 -63
- biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
- biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
- biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
- biolib/pyppeteer/pyppeteer/helpers.py +0 -245
- biolib/pyppeteer/pyppeteer/input.py +0 -371
- biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
- biolib/pyppeteer/pyppeteer/launcher.py +0 -683
- biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
- biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
- biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
- biolib/pyppeteer/pyppeteer/multimap.py +0 -82
- biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
- biolib/pyppeteer/pyppeteer/options.py +0 -8
- biolib/pyppeteer/pyppeteer/page.py +0 -1728
- biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
- biolib/pyppeteer/pyppeteer/target.py +0 -147
- biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
- biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
- biolib/pyppeteer/pyppeteer/tracing.py +0 -93
- biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
- biolib/pyppeteer/pyppeteer/util.py +0 -18
- biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
- biolib/pyppeteer/pyppeteer/worker.py +0 -101
- biolib/pyppeteer/pyproject.toml +0 -97
- biolib/pyppeteer/spell.txt +0 -137
- biolib/pyppeteer/tox.ini +0 -72
- biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
- biolib/start_cli.py +0 -7
- biolib/utils.py +0 -47
- biolib/validators/validate_app_version.py +0 -183
- biolib/validators/validate_argument.py +0 -134
- biolib/validators/validate_module.py +0 -323
- biolib/validators/validate_zip_file.py +0 -40
- biolib/validators/validator_utils.py +0 -103
- pybiolib-0.2.951.dist-info/LICENSE +0 -21
- pybiolib-0.2.951.dist-info/METADATA +0 -61
- pybiolib-0.2.951.dist-info/RECORD +0 -153
- pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
- /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from biolib._internal.utils import PathFilter, filter_lazy_loaded_files
|
|
5
|
+
from biolib.biolib_binary_format import ModuleOutputV2
|
|
6
|
+
from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
|
|
7
|
+
from biolib.biolib_binary_format.remote_stream_seeker import StreamSeeker
|
|
8
|
+
from biolib.biolib_binary_format.utils import LazyLoadedFile, RemoteIndexableBuffer
|
|
9
|
+
from biolib.biolib_errors import BioLibError
|
|
10
|
+
from biolib.biolib_logging import logger
|
|
11
|
+
from biolib.typing_utils import Dict, List, Optional
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class JobResult:
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
job_uuid: str,
|
|
18
|
+
job_auth_token: str,
|
|
19
|
+
module_output: Optional[ModuleOutputV2] = None,
|
|
20
|
+
):
|
|
21
|
+
self._job_uuid: str = job_uuid
|
|
22
|
+
self._job_auth_token: str = job_auth_token
|
|
23
|
+
|
|
24
|
+
self._module_output: Optional[ModuleOutputV2] = module_output
|
|
25
|
+
|
|
26
|
+
def get_stdout(self) -> bytes:
|
|
27
|
+
return self._get_module_output().get_stdout()
|
|
28
|
+
|
|
29
|
+
def get_stderr(self) -> bytes:
|
|
30
|
+
return self._get_module_output().get_stderr()
|
|
31
|
+
|
|
32
|
+
def get_exit_code(self) -> int:
|
|
33
|
+
return self._get_module_output().get_exit_code()
|
|
34
|
+
|
|
35
|
+
def save_files(
|
|
36
|
+
self,
|
|
37
|
+
output_dir: str,
|
|
38
|
+
path_filter: Optional[PathFilter] = None,
|
|
39
|
+
skip_file_if_exists: bool = False,
|
|
40
|
+
overwrite: bool = False,
|
|
41
|
+
flat: bool = False,
|
|
42
|
+
) -> None:
|
|
43
|
+
module_output = self._get_module_output()
|
|
44
|
+
output_files = module_output.get_files()
|
|
45
|
+
filtered_output_files = filter_lazy_loaded_files(output_files, path_filter) if path_filter else output_files
|
|
46
|
+
|
|
47
|
+
if len(filtered_output_files) == 0:
|
|
48
|
+
logger.debug('No output files to save')
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
if flat:
|
|
52
|
+
basename_to_paths: Dict[str, List[str]] = {}
|
|
53
|
+
for file in filtered_output_files:
|
|
54
|
+
basename = Path(file.path).name
|
|
55
|
+
if basename not in basename_to_paths:
|
|
56
|
+
basename_to_paths[basename] = []
|
|
57
|
+
basename_to_paths[basename].append(file.path)
|
|
58
|
+
|
|
59
|
+
duplicates = {basename: paths for basename, paths in basename_to_paths.items() if len(paths) > 1}
|
|
60
|
+
|
|
61
|
+
if duplicates:
|
|
62
|
+
max_shown = 3
|
|
63
|
+
error_parts = []
|
|
64
|
+
sorted_basenames = sorted(duplicates.keys())
|
|
65
|
+
|
|
66
|
+
for basename in sorted_basenames[:max_shown]:
|
|
67
|
+
paths = duplicates[basename]
|
|
68
|
+
error_parts.append(f' {basename}: ({", ".join(paths)})')
|
|
69
|
+
|
|
70
|
+
error_message = 'Cannot save files in flat mode: duplicate filenames detected:\n' + '\n'.join(
|
|
71
|
+
error_parts
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if len(duplicates) > max_shown:
|
|
75
|
+
remaining = len(duplicates) - max_shown
|
|
76
|
+
error_message += f'\n (and {remaining} more)'
|
|
77
|
+
|
|
78
|
+
raise BioLibError(error_message)
|
|
79
|
+
|
|
80
|
+
major_gap_threshold = 50_000
|
|
81
|
+
n = len(filtered_output_files)
|
|
82
|
+
|
|
83
|
+
next_break_end = [0] * n
|
|
84
|
+
if n > 0:
|
|
85
|
+
next_break_end[n - 1] = filtered_output_files[n - 1].start + filtered_output_files[n - 1].length
|
|
86
|
+
for i in range(n - 2, -1, -1):
|
|
87
|
+
end_i = filtered_output_files[i].start + filtered_output_files[i].length
|
|
88
|
+
gap = filtered_output_files[i + 1].start - end_i
|
|
89
|
+
if gap >= major_gap_threshold:
|
|
90
|
+
next_break_end[i] = end_i
|
|
91
|
+
else:
|
|
92
|
+
next_break_end[i] = next_break_end[i + 1]
|
|
93
|
+
|
|
94
|
+
total_files_data_to_download_in_bytes = 0
|
|
95
|
+
file_read_ahead_map = {}
|
|
96
|
+
for i, file in enumerate(filtered_output_files):
|
|
97
|
+
total_files_data_to_download_in_bytes += file.length
|
|
98
|
+
end_i = file.start + file.length
|
|
99
|
+
read_ahead_bytes = max(0, next_break_end[i] - end_i)
|
|
100
|
+
file_read_ahead_map[i] = read_ahead_bytes
|
|
101
|
+
|
|
102
|
+
# Assume files are in order
|
|
103
|
+
first_file = filtered_output_files[0]
|
|
104
|
+
last_file = filtered_output_files[len(filtered_output_files) - 1]
|
|
105
|
+
stream_seeker = StreamSeeker(
|
|
106
|
+
files_data_start=first_file.start,
|
|
107
|
+
files_data_end=last_file.start + last_file.length,
|
|
108
|
+
max_chunk_size=min(total_files_data_to_download_in_bytes, 10_000_000),
|
|
109
|
+
upstream_buffer=module_output.buffer,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
logger.info(f'Saving {len(filtered_output_files)} files to {output_dir}...')
|
|
113
|
+
for file_index, file in enumerate(filtered_output_files):
|
|
114
|
+
if flat:
|
|
115
|
+
destination_file_path = Path(output_dir) / Path(file.path).name
|
|
116
|
+
else:
|
|
117
|
+
# Remove leading slash of file_path
|
|
118
|
+
destination_file_path = Path(output_dir) / Path(file.path.lstrip('/'))
|
|
119
|
+
|
|
120
|
+
if destination_file_path.exists():
|
|
121
|
+
if skip_file_if_exists:
|
|
122
|
+
logger.info(f'Skipping {destination_file_path} as a file with that name already exists locally.')
|
|
123
|
+
continue
|
|
124
|
+
elif not overwrite:
|
|
125
|
+
raise BioLibError(f'File {destination_file_path} already exists. Set overwrite=True to overwrite.')
|
|
126
|
+
else:
|
|
127
|
+
destination_file_path.rename(
|
|
128
|
+
f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}'
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
dir_path = destination_file_path.parent
|
|
132
|
+
if dir_path:
|
|
133
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
|
|
135
|
+
# write content to temporary (partial) file
|
|
136
|
+
partial_path = destination_file_path.with_suffix(
|
|
137
|
+
destination_file_path.suffix + f'.{self._job_uuid}.partial_biolib_download'
|
|
138
|
+
)
|
|
139
|
+
file_start = file.start
|
|
140
|
+
data_to_download = file.length
|
|
141
|
+
if partial_path.exists():
|
|
142
|
+
data_already_downloaded = partial_path.stat().st_size
|
|
143
|
+
file_start += data_already_downloaded
|
|
144
|
+
data_to_download -= data_already_downloaded
|
|
145
|
+
|
|
146
|
+
read_ahead_bytes = file_read_ahead_map[file_index]
|
|
147
|
+
|
|
148
|
+
with open(partial_path, mode='ab') as partial_file:
|
|
149
|
+
for chunk in stream_seeker.seek_and_read(
|
|
150
|
+
file_start=file_start, file_length=data_to_download, read_ahead_bytes=read_ahead_bytes
|
|
151
|
+
):
|
|
152
|
+
partial_file.write(chunk)
|
|
153
|
+
|
|
154
|
+
# rename partial file to actual file name
|
|
155
|
+
partial_path.rename(destination_file_path)
|
|
156
|
+
|
|
157
|
+
def get_output_file(self, filename) -> LazyLoadedFile:
|
|
158
|
+
files = self._get_module_output().get_files()
|
|
159
|
+
filtered_files = filter_lazy_loaded_files(files, path_filter=filename)
|
|
160
|
+
if not filtered_files:
|
|
161
|
+
raise BioLibError(f'File {filename} not found in results.')
|
|
162
|
+
|
|
163
|
+
if len(filtered_files) != 1:
|
|
164
|
+
raise BioLibError(f'Found multiple results for filename {filename}.')
|
|
165
|
+
|
|
166
|
+
return filtered_files[0]
|
|
167
|
+
|
|
168
|
+
def list_output_files(self, path_filter: Optional[PathFilter] = None) -> List[LazyLoadedFile]:
|
|
169
|
+
files = self._get_module_output().get_files()
|
|
170
|
+
if not path_filter:
|
|
171
|
+
return files
|
|
172
|
+
|
|
173
|
+
return filter_lazy_loaded_files(files, path_filter)
|
|
174
|
+
|
|
175
|
+
def _get_module_output(self) -> ModuleOutputV2:
|
|
176
|
+
if self._module_output is None:
|
|
177
|
+
remote_job_storage_endpoint = RemoteJobStorageEndpoint(
|
|
178
|
+
job_auth_token=self._job_auth_token,
|
|
179
|
+
job_uuid=self._job_uuid,
|
|
180
|
+
storage_type='output',
|
|
181
|
+
)
|
|
182
|
+
buffer = RemoteIndexableBuffer(endpoint=remote_job_storage_endpoint)
|
|
183
|
+
self._module_output = ModuleOutputV2(buffer)
|
|
184
|
+
|
|
185
|
+
return self._module_output
|
biolib/jobs/types.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from biolib.typing_utils import List, Literal, Optional, TypedDict
|
|
2
|
+
|
|
3
|
+
JobState = Literal['in_progress', 'completed', 'failed', 'cancelled']
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class _BaseCloudJobDict(TypedDict):
|
|
7
|
+
created_at: str
|
|
8
|
+
finished_at: Optional[str]
|
|
9
|
+
uuid: str
|
|
10
|
+
error_code: int
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CloudJobDict(_BaseCloudJobDict):
|
|
14
|
+
started_at: Optional[str]
|
|
15
|
+
compute_node_url: Optional[str]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CloudJobStartedDict(_BaseCloudJobDict):
|
|
19
|
+
started_at: str
|
|
20
|
+
compute_node_url: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Result(TypedDict):
|
|
24
|
+
name: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class JobDict(TypedDict):
|
|
28
|
+
app_uri: str
|
|
29
|
+
arguments_override_command: bool
|
|
30
|
+
auth_token: str
|
|
31
|
+
created_at: str
|
|
32
|
+
ended_at: Optional[str]
|
|
33
|
+
requested_machine: str
|
|
34
|
+
runtime_seconds: int
|
|
35
|
+
main_result: Result
|
|
36
|
+
started_at: str
|
|
37
|
+
state: JobState
|
|
38
|
+
uuid: str
|
|
39
|
+
cloud_job: Optional[CloudJobDict]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class BasePaginatedResponse(TypedDict):
|
|
43
|
+
current_page_number: int
|
|
44
|
+
object_count: int
|
|
45
|
+
page_count: int
|
|
46
|
+
page_size: int
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class JobsPaginatedResponse(BasePaginatedResponse):
|
|
50
|
+
results: List[JobDict]
|
biolib/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
from biolib._runtime.runtime import Runtime as _Runtime
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def set_main_result_prefix(result_prefix: str) -> None:
|
|
7
|
+
warnings.warn(
|
|
8
|
+
'The "biolib.runtime.set_main_result_prefix" function is deprecated. '
|
|
9
|
+
'It will be removed in future releases from mid 2024. '
|
|
10
|
+
'Please use "from biolib.sdk import Runtime" and then "Runtime.set_main_result_prefix" instead.',
|
|
11
|
+
DeprecationWarning,
|
|
12
|
+
stacklevel=2,
|
|
13
|
+
)
|
|
14
|
+
_Runtime.set_main_result_prefix(result_prefix)
|
biolib/sdk/__init__.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
|
+
|
|
3
|
+
# Imports to hide and use as private internal utils
|
|
4
|
+
from biolib._data_record.data_record import DataRecord as _DataRecord
|
|
5
|
+
from biolib._index.index import Index as _Index
|
|
6
|
+
from biolib._index.query_result import IndexQueryResult
|
|
7
|
+
from biolib._index.query_result import query_index as _query_index
|
|
8
|
+
from biolib._internal.push_application import push_application as _push_application
|
|
9
|
+
from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
|
|
10
|
+
from biolib._runtime.runtime import Runtime as _Runtime
|
|
11
|
+
from biolib._session.session import Session as _Session
|
|
12
|
+
from biolib.app import BioLibApp as _BioLibApp
|
|
13
|
+
|
|
14
|
+
# Classes to expose as public API
|
|
15
|
+
Runtime = _Runtime
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_session(
|
|
19
|
+
refresh_token: str,
|
|
20
|
+
base_url: Optional[str] = None,
|
|
21
|
+
client_type: Optional[str] = None,
|
|
22
|
+
experiment: Optional[str] = None,
|
|
23
|
+
) -> _Session:
|
|
24
|
+
return _Session.get_session(
|
|
25
|
+
refresh_token=refresh_token,
|
|
26
|
+
base_url=base_url,
|
|
27
|
+
client_type=client_type,
|
|
28
|
+
experiment=experiment,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def push_app_version(uri: str, path: str) -> _BioLibApp:
|
|
33
|
+
push_data = _push_application(
|
|
34
|
+
app_uri=uri,
|
|
35
|
+
app_path=path,
|
|
36
|
+
app_version_to_copy_images_from=None,
|
|
37
|
+
set_as_active=False,
|
|
38
|
+
set_as_published=False,
|
|
39
|
+
)
|
|
40
|
+
if not push_data:
|
|
41
|
+
raise Exception('Failed to push application; please check the logs for more details')
|
|
42
|
+
|
|
43
|
+
uri = f'{push_data["app_uri"]}:{push_data["sematic_version"]}'
|
|
44
|
+
return _BioLibApp(uri)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def set_app_version_as_default(app_version: _BioLibApp) -> None:
|
|
48
|
+
app_version_uuid = app_version.version['public_id']
|
|
49
|
+
_set_app_version_as_active(app_version_uuid)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_app_version_pytest_plugin(app_version: _BioLibApp):
|
|
53
|
+
try:
|
|
54
|
+
import pytest # type: ignore # pylint: disable=import-outside-toplevel,import-error
|
|
55
|
+
except BaseException:
|
|
56
|
+
raise Exception('Failed to import pytest; please make sure it is installed') from None
|
|
57
|
+
|
|
58
|
+
class AppVersionFixturePlugin:
|
|
59
|
+
def __init__(self, app_version_ref):
|
|
60
|
+
self.app_version_ref = app_version_ref
|
|
61
|
+
|
|
62
|
+
@pytest.fixture(scope='session')
|
|
63
|
+
def app_version(self, request): # pylint: disable=unused-argument
|
|
64
|
+
return self.app_version_ref
|
|
65
|
+
|
|
66
|
+
return AppVersionFixturePlugin(app_version)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def create_data_record(
|
|
70
|
+
destination: str,
|
|
71
|
+
data_path: str,
|
|
72
|
+
name: Optional[str] = None,
|
|
73
|
+
record_type: Optional[str] = None,
|
|
74
|
+
) -> _DataRecord:
|
|
75
|
+
return _DataRecord.create(
|
|
76
|
+
destination=f'{destination}/{name}' if name else destination,
|
|
77
|
+
data_path=data_path,
|
|
78
|
+
record_type=record_type,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def get_index(uri: str) -> _Index:
|
|
83
|
+
return _Index.get_by_uri(uri)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def query_index(
|
|
87
|
+
query: str,
|
|
88
|
+
data: Optional[Union[List[Dict[str, Any]], bytes]] = None,
|
|
89
|
+
data_format: str = 'json',
|
|
90
|
+
) -> IndexQueryResult:
|
|
91
|
+
return _query_index(query=query, data=data, data_format=data_format)
|
biolib/tables.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from rich.table import Column, Table
|
|
5
|
+
|
|
6
|
+
from biolib.typing_utils import Any, List
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BioLibTable:
|
|
10
|
+
def __init__(self, columns_to_row_map: OrderedDict, rows: List[Any], title):
|
|
11
|
+
self.title = title
|
|
12
|
+
self.rows = rows
|
|
13
|
+
self.columns_to_row_map = columns_to_row_map
|
|
14
|
+
self.table = self._create_table()
|
|
15
|
+
|
|
16
|
+
def _create_table(self) -> Table:
|
|
17
|
+
columns = [Column(header=header, **meta['params']) for header, meta in self.columns_to_row_map.items()]
|
|
18
|
+
table = Table(*columns, title=self.title)
|
|
19
|
+
for row in self.rows:
|
|
20
|
+
row_values: List[str] = []
|
|
21
|
+
for column in self.columns_to_row_map.values():
|
|
22
|
+
keys = column['key'].split('.')
|
|
23
|
+
value = row[keys[0]]
|
|
24
|
+
for key in keys[1:]:
|
|
25
|
+
if not value or key not in value:
|
|
26
|
+
continue
|
|
27
|
+
value = value[key]
|
|
28
|
+
row_values.append(str(value))
|
|
29
|
+
table.add_row(*row_values)
|
|
30
|
+
return table
|
|
31
|
+
|
|
32
|
+
def print_table(self):
|
|
33
|
+
console = Console()
|
|
34
|
+
console.print(self.table)
|
biolib/typing_utils.py
CHANGED
|
@@ -1,7 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
# import and expose everything from the typing module
|
|
4
|
-
from typing import * # pylint: disable=wildcard-import, unused-wildcard-import
|
|
5
|
-
|
|
6
|
-
if sys.version_info < (3, 8):
|
|
7
|
-
from typing_extensions import TypedDict, Literal
|
|
1
|
+
# TODO: Deprecate and later remove this file
|
|
2
|
+
from biolib._shared.types.typing import * # pylint: disable=wildcard-import, unused-wildcard-import
|
biolib/user/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .sign_in import sign_in, sign_out
|
biolib/user/sign_in.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import webbrowser
|
|
3
|
+
|
|
4
|
+
from biolib._internal.utils import open_browser_window_from_notebook
|
|
5
|
+
from biolib.biolib_api_client import BiolibApiClient
|
|
6
|
+
from biolib.biolib_api_client.auth import BiolibAuthChallengeApi
|
|
7
|
+
from biolib.biolib_logging import logger_no_user_data
|
|
8
|
+
from biolib.utils import IS_RUNNING_IN_NOTEBOOK
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def sign_out() -> None:
|
|
12
|
+
api_client = BiolibApiClient.get(attempt_sign_in=False)
|
|
13
|
+
api_client.sign_out()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def sign_in(open_in_default_browser: bool = False) -> None:
|
|
17
|
+
if not BiolibApiClient.is_reauthentication_needed():
|
|
18
|
+
logger_no_user_data.info('Already signed in')
|
|
19
|
+
return
|
|
20
|
+
|
|
21
|
+
api_client = BiolibApiClient.get()
|
|
22
|
+
auth_challenge = BiolibAuthChallengeApi.create_auth_challenge()
|
|
23
|
+
auth_challenge_token = auth_challenge['token']
|
|
24
|
+
|
|
25
|
+
client_type = 'notebook' if IS_RUNNING_IN_NOTEBOOK else 'cli'
|
|
26
|
+
|
|
27
|
+
frontend_sign_in_url = f'{api_client.base_url}/sign-in/request/{client_type}/?token={auth_challenge_token}'
|
|
28
|
+
|
|
29
|
+
if IS_RUNNING_IN_NOTEBOOK:
|
|
30
|
+
print(f'Opening authorization page at: {frontend_sign_in_url}')
|
|
31
|
+
print('If your browser does not open automatically, click on the link above.')
|
|
32
|
+
open_browser_window_from_notebook(frontend_sign_in_url)
|
|
33
|
+
elif open_in_default_browser:
|
|
34
|
+
print(f'Opening authorization page at: {frontend_sign_in_url}')
|
|
35
|
+
print('If your browser does not open automatically, click on the link above.')
|
|
36
|
+
webbrowser.open(frontend_sign_in_url)
|
|
37
|
+
else:
|
|
38
|
+
print('Please copy and paste the following link into your browser:')
|
|
39
|
+
print(frontend_sign_in_url)
|
|
40
|
+
|
|
41
|
+
for _ in range(100):
|
|
42
|
+
time.sleep(3)
|
|
43
|
+
auth_challenge_status = BiolibAuthChallengeApi.get_auth_challenge_status(token=auth_challenge_token)
|
|
44
|
+
|
|
45
|
+
if auth_challenge_status['state'] != 'awaiting':
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
user_tokens = auth_challenge_status.get('user_tokens')
|
|
49
|
+
if user_tokens:
|
|
50
|
+
api_client.set_user_tokens(user_tokens)
|
|
51
|
+
print('Successfully signed in!')
|
|
52
|
+
|
|
53
|
+
else:
|
|
54
|
+
print(f"Sign in failed. Got state: {auth_challenge_status['state']}\nPlease try again")
|
biolib/utils/__init__.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import collections.abc
|
|
2
|
+
import multiprocessing
|
|
3
|
+
import os
|
|
4
|
+
import socket
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
from importlib_metadata import version, PackageNotFoundError
|
|
8
|
+
|
|
9
|
+
from biolib.typing_utils import Optional
|
|
10
|
+
from biolib.utils.seq_util import SeqUtil, SeqUtilRecord
|
|
11
|
+
from biolib._internal.http_client import HttpClient
|
|
12
|
+
from biolib.biolib_logging import logger_no_user_data, logger
|
|
13
|
+
from biolib.typing_utils import Tuple, Iterator
|
|
14
|
+
from .multipart_uploader import MultiPartUploader, get_chunk_iterator_from_bytes
|
|
15
|
+
|
|
16
|
+
# try fetching version, if it fails (usually when in dev), add default
|
|
17
|
+
try:
|
|
18
|
+
BIOLIB_PACKAGE_VERSION = version('pybiolib')
|
|
19
|
+
except PackageNotFoundError:
|
|
20
|
+
BIOLIB_PACKAGE_VERSION = '0.0.0'
|
|
21
|
+
|
|
22
|
+
IS_DEV = os.getenv('BIOLIB_DEV', '').upper() == 'TRUE'
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def load_base_url_from_env() -> str:
|
|
26
|
+
base_url = os.getenv('BIOLIB_BASE_URL')
|
|
27
|
+
if base_url:
|
|
28
|
+
return base_url.lower().rstrip('/')
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
search_list = []
|
|
32
|
+
with open('/etc/resolv.conf') as file:
|
|
33
|
+
for line in file:
|
|
34
|
+
line_trimmed = line.strip()
|
|
35
|
+
if line_trimmed.startswith('search'):
|
|
36
|
+
search_list = line_trimmed.split()[1:]
|
|
37
|
+
logger.debug(f'Found search list: {search_list} when resolving base url.')
|
|
38
|
+
break
|
|
39
|
+
|
|
40
|
+
for search_host in search_list:
|
|
41
|
+
host_to_try = f'biolib.{search_host}'
|
|
42
|
+
try:
|
|
43
|
+
if len(socket.getaddrinfo(host_to_try, 443)) > 0:
|
|
44
|
+
return f'https://{host_to_try}'.lower()
|
|
45
|
+
except BaseException: # pylint: disable=broad-except
|
|
46
|
+
pass
|
|
47
|
+
except BaseException: # pylint: disable=broad-except
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
return 'https://biolib.com'
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
BIOLIB_BASE_URL: Optional[str] = None
|
|
54
|
+
BIOLIB_SITE_HOSTNAME: Optional[str] = None
|
|
55
|
+
|
|
56
|
+
BIOLIB_CLOUD_BASE_URL = os.getenv('BIOLIB_CLOUD_BASE_URL', '').lower()
|
|
57
|
+
|
|
58
|
+
BIOLIB_PACKAGE_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
59
|
+
|
|
60
|
+
BIOLIB_CLOUD_ENVIRONMENT = os.getenv('BIOLIB_CLOUD_ENVIRONMENT', '').lower()
|
|
61
|
+
|
|
62
|
+
BIOLIB_SECRETS_TMPFS_PATH = os.environ.get('BIOLIB_SECRETS_TMPFS_PATH')
|
|
63
|
+
|
|
64
|
+
IS_RUNNING_IN_CLOUD = BIOLIB_CLOUD_ENVIRONMENT == 'non-enclave'
|
|
65
|
+
|
|
66
|
+
BASE_URL_IS_PUBLIC_BIOLIB: Optional[bool] = None
|
|
67
|
+
|
|
68
|
+
# sys.stdout is an instance of OutStream in Jupyter and Colab which does not have .buffer
|
|
69
|
+
if not hasattr(sys.stdout, 'buffer'):
|
|
70
|
+
IS_RUNNING_IN_NOTEBOOK = True
|
|
71
|
+
else:
|
|
72
|
+
IS_RUNNING_IN_NOTEBOOK = False
|
|
73
|
+
|
|
74
|
+
STREAM_STDOUT = False
|
|
75
|
+
|
|
76
|
+
if BIOLIB_CLOUD_ENVIRONMENT and not IS_RUNNING_IN_CLOUD:
|
|
77
|
+
logger_no_user_data.warning((
|
|
78
|
+
'BIOLIB_CLOUD_ENVIRONMENT defined but does not specify the cloud environment correctly. ',
|
|
79
|
+
'The compute node will not act as a cloud compute node'
|
|
80
|
+
))
|
|
81
|
+
|
|
82
|
+
ByteRangeTuple = Tuple[int, int]
|
|
83
|
+
DownloadChunkInputTuple = Tuple[ByteRangeTuple, str]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _download_chunk(input_tuple: DownloadChunkInputTuple) -> bytes:
|
|
87
|
+
byte_range, presigned_url = input_tuple
|
|
88
|
+
start, end = byte_range
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
response = HttpClient.request(
|
|
92
|
+
url=presigned_url,
|
|
93
|
+
headers={'range': f'bytes={start}-{end}'},
|
|
94
|
+
timeout_in_seconds=300, # timeout after 5 min
|
|
95
|
+
retries=20,
|
|
96
|
+
retry_on_http_500=True,
|
|
97
|
+
)
|
|
98
|
+
except Exception as exception:
|
|
99
|
+
logger_no_user_data.exception("Hit error downloading chunk")
|
|
100
|
+
logger_no_user_data.error(exception)
|
|
101
|
+
raise exception
|
|
102
|
+
logger_no_user_data.debug(f'Returning raw data for part {start}')
|
|
103
|
+
return response.content
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class ChunkIterator(collections.abc.Iterator):
|
|
107
|
+
|
|
108
|
+
def __init__(self, file_size: int, chunk_size: int, presigned_url: str):
|
|
109
|
+
self._semaphore = multiprocessing.BoundedSemaphore(20) # support 20 chunks to be processed at once
|
|
110
|
+
self._iterator = self._get_chunk_input_iterator(file_size, chunk_size, presigned_url)
|
|
111
|
+
|
|
112
|
+
def __iter__(self):
|
|
113
|
+
return self
|
|
114
|
+
|
|
115
|
+
def __next__(self):
|
|
116
|
+
if self._semaphore.acquire(timeout=1800):
|
|
117
|
+
return next(self._iterator)
|
|
118
|
+
else:
|
|
119
|
+
raise Exception('Did not receive work within 30 min.')
|
|
120
|
+
|
|
121
|
+
def chunk_completed(self) -> None:
|
|
122
|
+
self._semaphore.release()
|
|
123
|
+
|
|
124
|
+
@staticmethod
|
|
125
|
+
def _get_chunk_input_iterator(
|
|
126
|
+
file_size: int,
|
|
127
|
+
chunk_size: int,
|
|
128
|
+
presigned_url: str,
|
|
129
|
+
) -> Iterator[DownloadChunkInputTuple]:
|
|
130
|
+
for index in range(0, file_size, chunk_size):
|
|
131
|
+
byte_range: ByteRangeTuple = (index, index + chunk_size - 1)
|
|
132
|
+
yield byte_range, presigned_url
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def download_presigned_s3_url(presigned_url: str, output_file_path: str) -> None:
|
|
136
|
+
chunk_size = 50_000_000
|
|
137
|
+
|
|
138
|
+
response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-1'})
|
|
139
|
+
file_size = int(response.headers['Content-Range'].split('/')[1])
|
|
140
|
+
|
|
141
|
+
chunk_iterator = ChunkIterator(file_size, chunk_size, presigned_url)
|
|
142
|
+
|
|
143
|
+
bytes_written = 0
|
|
144
|
+
# use 16 cores, unless less is available
|
|
145
|
+
process_pool = multiprocessing.Pool(processes=min(16, multiprocessing.cpu_count() - 1))
|
|
146
|
+
try:
|
|
147
|
+
with open(output_file_path, 'ab') as output_file:
|
|
148
|
+
for index, data in enumerate(process_pool.imap(_download_chunk, chunk_iterator)):
|
|
149
|
+
logger_no_user_data.debug(f'Writing part {index} to file...')
|
|
150
|
+
output_file.write(data)
|
|
151
|
+
|
|
152
|
+
bytes_written += chunk_size
|
|
153
|
+
approx_progress_percent = min(bytes_written / file_size * 100, 100)
|
|
154
|
+
logger_no_user_data.debug(
|
|
155
|
+
f'Wrote part {index} of {file_size} to file, '
|
|
156
|
+
f'the approximate progress is {round(approx_progress_percent, 2)}%'
|
|
157
|
+
)
|
|
158
|
+
chunk_iterator.chunk_completed()
|
|
159
|
+
finally:
|
|
160
|
+
logger_no_user_data.debug('Closing process poll...')
|
|
161
|
+
process_pool.close()
|
|
162
|
+
logger_no_user_data.debug('Process poll closed.')
|