pybiolib 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +33 -10
- biolib/_data_record/data_record.py +220 -126
- biolib/_index/index.py +55 -0
- biolib/_index/query_result.py +103 -0
- biolib/_internal/add_copilot_prompts.py +24 -11
- biolib/_internal/add_gui_files.py +81 -0
- biolib/_internal/data_record/__init__.py +1 -1
- biolib/_internal/data_record/data_record.py +1 -18
- biolib/_internal/data_record/push_data.py +65 -16
- biolib/_internal/data_record/remote_storage_endpoint.py +18 -13
- biolib/_internal/file_utils.py +48 -0
- biolib/_internal/lfs/cache.py +4 -2
- biolib/_internal/push_application.py +95 -24
- biolib/_internal/runtime.py +2 -0
- biolib/_internal/string_utils.py +13 -0
- biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-general.instructions.md +5 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
- biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
- biolib/_internal/templates/{init_template → github_workflow_template}/.github/workflows/biolib.yml +7 -2
- biolib/_internal/templates/gitignore_template/.gitignore +10 -0
- biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
- biolib/_internal/templates/gui_template/App.tsx +53 -0
- biolib/_internal/templates/gui_template/Dockerfile +27 -0
- biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
- biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
- biolib/_internal/templates/gui_template/index.css +5 -0
- biolib/_internal/templates/gui_template/index.html +13 -0
- biolib/_internal/templates/gui_template/index.tsx +10 -0
- biolib/_internal/templates/gui_template/package.json +27 -0
- biolib/_internal/templates/gui_template/tsconfig.json +24 -0
- biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
- biolib/_internal/templates/gui_template/vite.config.mts +10 -0
- biolib/_internal/templates/init_template/.biolib/config.yml +1 -0
- biolib/_internal/templates/init_template/Dockerfile +5 -1
- biolib/_internal/templates/init_template/run.py +6 -15
- biolib/_internal/templates/init_template/run.sh +1 -0
- biolib/_internal/templates/templates.py +21 -1
- biolib/_internal/utils/__init__.py +47 -0
- biolib/_internal/utils/auth.py +46 -0
- biolib/_internal/utils/job_url.py +33 -0
- biolib/_internal/utils/multinode.py +12 -14
- biolib/_runtime/runtime.py +15 -2
- biolib/_session/session.py +7 -5
- biolib/_shared/__init__.py +0 -0
- biolib/_shared/types/__init__.py +74 -0
- biolib/_shared/types/account.py +12 -0
- biolib/_shared/types/account_member.py +8 -0
- biolib/{_internal → _shared}/types/experiment.py +1 -0
- biolib/_shared/types/resource.py +37 -0
- biolib/_shared/types/resource_deploy_key.py +11 -0
- biolib/{_internal → _shared}/types/resource_version.py +8 -2
- biolib/_shared/types/user.py +19 -0
- biolib/_shared/utils/__init__.py +7 -0
- biolib/_shared/utils/resource_uri.py +75 -0
- biolib/api/client.py +5 -48
- biolib/app/app.py +97 -55
- biolib/biolib_api_client/api_client.py +3 -47
- biolib/biolib_api_client/app_types.py +1 -1
- biolib/biolib_api_client/biolib_app_api.py +31 -6
- biolib/biolib_api_client/biolib_job_api.py +1 -1
- biolib/biolib_api_client/user_state.py +34 -2
- biolib/biolib_binary_format/module_input.py +8 -0
- biolib/biolib_binary_format/remote_endpoints.py +3 -3
- biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
- biolib/biolib_logging.py +1 -1
- biolib/cli/__init__.py +2 -2
- biolib/cli/auth.py +4 -16
- biolib/cli/data_record.py +82 -0
- biolib/cli/index.py +32 -0
- biolib/cli/init.py +393 -71
- biolib/cli/lfs.py +1 -1
- biolib/cli/run.py +9 -6
- biolib/cli/start.py +14 -1
- biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
- biolib/compute_node/job_worker/executors/docker_types.py +1 -1
- biolib/compute_node/job_worker/executors/types.py +6 -5
- biolib/compute_node/job_worker/job_storage.py +2 -1
- biolib/compute_node/job_worker/job_worker.py +155 -90
- biolib/compute_node/job_worker/large_file_system.py +2 -6
- biolib/compute_node/job_worker/network_alloc.py +99 -0
- biolib/compute_node/job_worker/network_buffer.py +240 -0
- biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
- biolib/compute_node/remote_host_proxy.py +163 -79
- biolib/compute_node/utils.py +2 -0
- biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
- biolib/compute_node/webserver/proxy_utils.py +28 -0
- biolib/compute_node/webserver/webserver.py +64 -19
- biolib/experiments/experiment.py +111 -16
- biolib/jobs/job.py +128 -31
- biolib/jobs/job_result.py +74 -34
- biolib/jobs/types.py +1 -0
- biolib/sdk/__init__.py +28 -3
- biolib/typing_utils.py +1 -1
- biolib/utils/cache_state.py +8 -5
- biolib/utils/multipart_uploader.py +24 -18
- biolib/utils/seq_util.py +1 -1
- pybiolib-1.2.1890.dist-info/METADATA +41 -0
- pybiolib-1.2.1890.dist-info/RECORD +177 -0
- {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
- pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
- biolib/_internal/llm_instructions/.github/instructions/style-react-ts.instructions.md +0 -22
- biolib/_internal/templates/init_template/.gitignore +0 -2
- biolib/_internal/types/__init__.py +0 -6
- biolib/_internal/types/resource.py +0 -18
- biolib/biolib_download_container.py +0 -38
- biolib/cli/download_container.py +0 -14
- biolib/utils/app_uri.py +0 -57
- pybiolib-1.2.883.dist-info/METADATA +0 -50
- pybiolib-1.2.883.dist-info/RECORD +0 -148
- pybiolib-1.2.883.dist-info/entry_points.txt +0 -3
- /biolib/{_internal/llm_instructions → _index}/__init__.py +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/general-app-knowledge.instructions.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-python.instructions.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_app_inputs.prompt.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_run_apps.prompt.md +0 -0
- /biolib/{_internal → _shared}/types/app.py +0 -0
- /biolib/{_internal → _shared}/types/data_record.py +0 -0
- /biolib/{_internal → _shared}/types/file_node.py +0 -0
- /biolib/{_internal → _shared}/types/push.py +0 -0
- /biolib/{_internal → _shared}/types/resource_permission.py +0 -0
- /biolib/{_internal → _shared}/types/result.py +0 -0
- /biolib/{_internal → _shared}/types/typing.py +0 -0
- {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info/licenses}/LICENSE +0 -0
|
@@ -2,10 +2,10 @@ import os
|
|
|
2
2
|
import shutil
|
|
3
3
|
import sys
|
|
4
4
|
|
|
5
|
-
from biolib._internal import
|
|
5
|
+
from biolib._internal.templates import templates
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def add_copilot_prompts(force: bool,
|
|
8
|
+
def add_copilot_prompts(force: bool, silent: bool = False) -> None:
|
|
9
9
|
current_working_directory = os.getcwd()
|
|
10
10
|
config_file_path = f'{current_working_directory}/.biolib/config.yml'
|
|
11
11
|
if not os.path.exists(config_file_path):
|
|
@@ -14,32 +14,45 @@ Error: Current directory has not been initialized as a BioLib application.
|
|
|
14
14
|
Please run the \"biolib init\" command first"""
|
|
15
15
|
print(err_string, file=sys.stderr)
|
|
16
16
|
exit(1)
|
|
17
|
-
source_path = os.path.join(
|
|
17
|
+
source_path = os.path.join(templates.copilot_template(), '.github')
|
|
18
18
|
destination_path = os.path.join(current_working_directory, '.github')
|
|
19
19
|
|
|
20
20
|
conflicting_files = []
|
|
21
|
+
files_to_overwrite = set()
|
|
21
22
|
|
|
22
23
|
for root, _, filenames in os.walk(source_path):
|
|
23
24
|
relative_dir = os.path.relpath(root, source_path)
|
|
24
25
|
destination_dir = os.path.join(destination_path, relative_dir)
|
|
25
26
|
for filename in filenames:
|
|
26
|
-
if 'style' in filename and not style:
|
|
27
|
-
continue
|
|
28
27
|
source_file = os.path.join(root, filename)
|
|
29
28
|
destination_file = os.path.join(destination_dir, filename)
|
|
30
29
|
if os.path.exists(destination_file) and not force:
|
|
31
30
|
with open(source_file, 'rb') as fsrc, open(destination_file, 'rb') as fdest:
|
|
32
31
|
if fsrc.read() != fdest.read():
|
|
33
32
|
conflicting_files.append(os.path.relpath(destination_file, current_working_directory))
|
|
34
|
-
else:
|
|
35
|
-
os.makedirs(destination_dir, exist_ok=True)
|
|
36
|
-
shutil.copy2(source_file, destination_file)
|
|
37
33
|
|
|
38
34
|
if conflicting_files:
|
|
39
|
-
print('The following files
|
|
35
|
+
print('The following files already exist and would be overwritten:')
|
|
40
36
|
for conflicting_file in conflicting_files:
|
|
41
|
-
print(f' {conflicting_file}'
|
|
42
|
-
|
|
37
|
+
print(f' {conflicting_file}')
|
|
38
|
+
print()
|
|
39
|
+
|
|
40
|
+
for conflicting_file in conflicting_files:
|
|
41
|
+
choice = input(f'Overwrite {conflicting_file}? [y/N]: ').lower().strip()
|
|
42
|
+
if choice in ['y', 'yes']:
|
|
43
|
+
files_to_overwrite.add(conflicting_file)
|
|
44
|
+
|
|
45
|
+
for root, _, filenames in os.walk(source_path):
|
|
46
|
+
relative_dir = os.path.relpath(root, source_path)
|
|
47
|
+
destination_dir = os.path.join(destination_path, relative_dir)
|
|
48
|
+
for filename in filenames:
|
|
49
|
+
source_file = os.path.join(root, filename)
|
|
50
|
+
destination_file = os.path.join(destination_dir, filename)
|
|
51
|
+
relative_file_path = os.path.relpath(destination_file, current_working_directory)
|
|
52
|
+
|
|
53
|
+
if not os.path.exists(destination_file) or force or relative_file_path in files_to_overwrite:
|
|
54
|
+
os.makedirs(destination_dir, exist_ok=True)
|
|
55
|
+
shutil.copy2(source_file, destination_file)
|
|
43
56
|
|
|
44
57
|
if not silent:
|
|
45
58
|
print(f'Prompt and instruction files added to {destination_path}/')
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
from biolib._internal.templates import templates
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def add_gui_files(force=False, silent=False) -> None:
|
|
8
|
+
cwd = os.getcwd()
|
|
9
|
+
template_dir = templates.gui_template()
|
|
10
|
+
|
|
11
|
+
root_files = ['package.json', 'Dockerfile', 'vite.config.mts', '.yarnrc.yml']
|
|
12
|
+
|
|
13
|
+
conflicting_files = []
|
|
14
|
+
files_to_overwrite = set()
|
|
15
|
+
|
|
16
|
+
for root, _, filenames in os.walk(template_dir):
|
|
17
|
+
relative_dir = os.path.relpath(root, template_dir)
|
|
18
|
+
|
|
19
|
+
for filename in filenames:
|
|
20
|
+
if filename in root_files:
|
|
21
|
+
destination_dir = cwd
|
|
22
|
+
else:
|
|
23
|
+
if relative_dir == '.':
|
|
24
|
+
destination_dir = os.path.join(cwd, 'gui')
|
|
25
|
+
else:
|
|
26
|
+
destination_dir = os.path.join(cwd, 'gui', relative_dir)
|
|
27
|
+
|
|
28
|
+
source_file = os.path.join(root, filename)
|
|
29
|
+
destination_file = os.path.join(destination_dir, filename)
|
|
30
|
+
|
|
31
|
+
if filename == 'Dockerfile':
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
if os.path.exists(destination_file) and not force:
|
|
35
|
+
with open(source_file, 'rb') as fsrc, open(destination_file, 'rb') as fdest:
|
|
36
|
+
if fsrc.read() != fdest.read():
|
|
37
|
+
conflicting_files.append(os.path.relpath(destination_file, cwd))
|
|
38
|
+
|
|
39
|
+
if conflicting_files:
|
|
40
|
+
print('The following files already exist and would be overwritten:')
|
|
41
|
+
for conflicting_file in conflicting_files:
|
|
42
|
+
print(f' {conflicting_file}')
|
|
43
|
+
print()
|
|
44
|
+
|
|
45
|
+
for conflicting_file in conflicting_files:
|
|
46
|
+
choice = input(f'Overwrite {conflicting_file}? [y/N]: ').lower().strip()
|
|
47
|
+
if choice in ['y', 'yes']:
|
|
48
|
+
files_to_overwrite.add(conflicting_file)
|
|
49
|
+
|
|
50
|
+
for root, _, filenames in os.walk(template_dir):
|
|
51
|
+
relative_dir = os.path.relpath(root, template_dir)
|
|
52
|
+
|
|
53
|
+
for filename in filenames:
|
|
54
|
+
if filename in root_files:
|
|
55
|
+
destination_dir = cwd
|
|
56
|
+
else:
|
|
57
|
+
if relative_dir == '.':
|
|
58
|
+
destination_dir = os.path.join(cwd, 'gui')
|
|
59
|
+
else:
|
|
60
|
+
destination_dir = os.path.join(cwd, 'gui', relative_dir)
|
|
61
|
+
|
|
62
|
+
source_file = os.path.join(root, filename)
|
|
63
|
+
destination_file = os.path.join(destination_dir, filename)
|
|
64
|
+
relative_file_path = os.path.relpath(destination_file, cwd)
|
|
65
|
+
|
|
66
|
+
should_force = force or filename == 'Dockerfile'
|
|
67
|
+
if not os.path.exists(destination_file) or should_force or relative_file_path in files_to_overwrite:
|
|
68
|
+
os.makedirs(destination_dir, exist_ok=True)
|
|
69
|
+
shutil.copy2(source_file, destination_file)
|
|
70
|
+
|
|
71
|
+
gitignore_path = os.path.join(cwd, '.gitignore')
|
|
72
|
+
with open(gitignore_path, 'a') as gitignore_file:
|
|
73
|
+
gitignore_file.write('\n# gui\n')
|
|
74
|
+
gitignore_file.write('.yarn\n')
|
|
75
|
+
gitignore_file.write('dist\n')
|
|
76
|
+
gitignore_file.write('yarn.lock\n')
|
|
77
|
+
gitignore_file.write('tsconfig.tsbuildinfo\n')
|
|
78
|
+
gitignore_file.write('node_modules\n')
|
|
79
|
+
|
|
80
|
+
if not silent:
|
|
81
|
+
print('gui files added to project root and gui/ subdirectory')
|
|
@@ -1 +1 @@
|
|
|
1
|
-
from .data_record import
|
|
1
|
+
from .data_record import validate_sqlite_v1
|
|
@@ -1,11 +1,7 @@
|
|
|
1
1
|
import sqlite3
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
from biolib.
|
|
5
|
-
from biolib.api import client as api_client
|
|
6
|
-
from biolib.biolib_api_client import AppGetResponse
|
|
7
|
-
from biolib.biolib_api_client.biolib_app_api import _get_app_uri_from_str
|
|
8
|
-
from biolib.biolib_api_client.lfs_types import DataRecordVersionInfo
|
|
4
|
+
from biolib._shared.types import SqliteV1DatabaseSchema
|
|
9
5
|
|
|
10
6
|
|
|
11
7
|
def get_actual_schema(db_path):
|
|
@@ -82,19 +78,6 @@ def verify_schema(specification: SqliteV1DatabaseSchema, actual_schema: SqliteV1
|
|
|
82
78
|
)
|
|
83
79
|
|
|
84
80
|
|
|
85
|
-
def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
|
|
86
|
-
normalized_uri = _get_app_uri_from_str(uri)
|
|
87
|
-
app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': normalized_uri}).json()
|
|
88
|
-
resource_uri = app_response['app_version']['app_uri']
|
|
89
|
-
if app_response['app']['type'] != 'data-record':
|
|
90
|
-
raise Exception(f'Resource "{resource_uri}" is not a Data Record')
|
|
91
|
-
return DataRecordVersionInfo(
|
|
92
|
-
resource_uri=app_response['app_version']['app_uri'],
|
|
93
|
-
resource_uuid=app_response['app']['public_id'],
|
|
94
|
-
resource_version_uuid=app_response['app_version']['public_id'],
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
|
|
98
81
|
def validate_sqlite_v1(schema: SqliteV1DatabaseSchema, sqlite_file: Path):
|
|
99
82
|
actual_schema = get_actual_schema(sqlite_file)
|
|
100
83
|
print(schema)
|
|
@@ -1,10 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import os
|
|
4
|
+
from typing import Callable, Iterator
|
|
2
5
|
|
|
3
|
-
|
|
6
|
+
import biolib.api as api
|
|
4
7
|
from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
|
|
5
|
-
from biolib._internal.types.typing import List, Optional, Tuple
|
|
6
8
|
from biolib.biolib_errors import BioLibError
|
|
7
9
|
from biolib.biolib_logging import logger
|
|
10
|
+
from biolib.typing_utils import List, Optional, Tuple
|
|
11
|
+
from biolib.utils import MultiPartUploader
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _upload_from_iterator(
|
|
15
|
+
payload_iterator: Iterator[bytes],
|
|
16
|
+
payload_size_in_bytes: int,
|
|
17
|
+
resource_uuid: Optional[str] = None,
|
|
18
|
+
resource_version_uuid: Optional[str] = None,
|
|
19
|
+
use_process_pool: bool = False,
|
|
20
|
+
publish: bool = False,
|
|
21
|
+
on_progress: Optional[Callable[[int, int], None]] = None,
|
|
22
|
+
) -> str:
|
|
23
|
+
if (resource_uuid is None) == (resource_version_uuid is None):
|
|
24
|
+
raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
|
|
25
|
+
|
|
26
|
+
if resource_version_uuid is None:
|
|
27
|
+
response = api.client.post(
|
|
28
|
+
path='/lfs/versions/',
|
|
29
|
+
data={'resource_uuid': resource_uuid},
|
|
30
|
+
)
|
|
31
|
+
resource_version_uuid = response.json()['uuid']
|
|
32
|
+
|
|
33
|
+
multipart_uploader = MultiPartUploader(
|
|
34
|
+
use_process_pool=use_process_pool,
|
|
35
|
+
get_presigned_upload_url_request={
|
|
36
|
+
'headers': None,
|
|
37
|
+
'requires_biolib_auth': True,
|
|
38
|
+
'path': f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
|
|
39
|
+
},
|
|
40
|
+
complete_upload_request={
|
|
41
|
+
'headers': None,
|
|
42
|
+
'requires_biolib_auth': True,
|
|
43
|
+
'path': f'/lfs/versions/{resource_version_uuid}/complete_upload/',
|
|
44
|
+
},
|
|
45
|
+
on_progress=on_progress,
|
|
46
|
+
)
|
|
47
|
+
multipart_uploader.upload(payload_iterator=payload_iterator, payload_size_in_bytes=payload_size_in_bytes)
|
|
48
|
+
|
|
49
|
+
if publish:
|
|
50
|
+
api.client.patch(
|
|
51
|
+
path=f'/resources/versions/{resource_version_uuid}/',
|
|
52
|
+
data={'state': 'published', 'set_as_active': True},
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
return resource_version_uuid
|
|
8
56
|
|
|
9
57
|
|
|
10
58
|
def validate_data_path_and_get_files_and_size_of_directory(data_path: str) -> Tuple[List[str], int]:
|
|
@@ -28,9 +76,14 @@ def push_data_path(
|
|
|
28
76
|
data_path: str,
|
|
29
77
|
data_size_in_bytes: int,
|
|
30
78
|
files_to_zip: List[str],
|
|
31
|
-
|
|
79
|
+
resource_uuid: Optional[str] = None,
|
|
80
|
+
resource_version_uuid: Optional[str] = None,
|
|
32
81
|
chunk_size_in_mb: Optional[int] = None,
|
|
33
|
-
|
|
82
|
+
publish: bool = False,
|
|
83
|
+
) -> str:
|
|
84
|
+
if (resource_uuid is None) == (resource_version_uuid is None):
|
|
85
|
+
raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
|
|
86
|
+
|
|
34
87
|
original_working_dir = os.getcwd()
|
|
35
88
|
os.chdir(data_path)
|
|
36
89
|
|
|
@@ -49,19 +102,15 @@ def push_data_path(
|
|
|
49
102
|
logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
|
|
50
103
|
|
|
51
104
|
iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
|
|
52
|
-
|
|
105
|
+
|
|
106
|
+
new_resource_version_uuid = _upload_from_iterator(
|
|
107
|
+
payload_iterator=iterable_zip_stream,
|
|
108
|
+
payload_size_in_bytes=data_size_in_bytes,
|
|
109
|
+
resource_uuid=resource_uuid,
|
|
110
|
+
resource_version_uuid=resource_version_uuid,
|
|
53
111
|
use_process_pool=True,
|
|
54
|
-
|
|
55
|
-
headers=None,
|
|
56
|
-
requires_biolib_auth=True,
|
|
57
|
-
path=f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
|
|
58
|
-
),
|
|
59
|
-
complete_upload_request=dict(
|
|
60
|
-
headers=None,
|
|
61
|
-
requires_biolib_auth=True,
|
|
62
|
-
path=f'/lfs/versions/{resource_version_uuid}/complete_upload/',
|
|
63
|
-
),
|
|
112
|
+
publish=publish,
|
|
64
113
|
)
|
|
65
114
|
|
|
66
|
-
multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
|
|
67
115
|
os.chdir(original_working_dir)
|
|
116
|
+
return new_resource_version_uuid
|
|
@@ -1,38 +1,43 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from datetime import datetime, timedelta
|
|
2
|
+
from datetime import datetime, timedelta, timezone
|
|
3
3
|
from urllib.parse import urlparse
|
|
4
4
|
|
|
5
|
+
from biolib._shared.types import ResourceDetailedDict
|
|
5
6
|
from biolib.api import client as api_client
|
|
6
|
-
from biolib.biolib_api_client.lfs_types import DataRecordVersion
|
|
7
7
|
from biolib.biolib_binary_format.utils import RemoteEndpoint
|
|
8
8
|
from biolib.biolib_logging import logger
|
|
9
9
|
from biolib.typing_utils import Optional
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
|
|
13
|
-
def __init__(self,
|
|
14
|
-
self.
|
|
13
|
+
def __init__(self, uri: str):
|
|
14
|
+
self._uri: str = uri
|
|
15
15
|
self._expires_at: Optional[datetime] = None
|
|
16
16
|
self._presigned_url: Optional[str] = None
|
|
17
17
|
|
|
18
18
|
def get_remote_url(self) -> str:
|
|
19
|
-
if not self._presigned_url or not self._expires_at or datetime.
|
|
20
|
-
|
|
21
|
-
path=
|
|
19
|
+
if not self._presigned_url or not self._expires_at or datetime.now(timezone.utc) > self._expires_at:
|
|
20
|
+
resource_response: ResourceDetailedDict = api_client.get(
|
|
21
|
+
path='/resource/',
|
|
22
|
+
params={'uri': self._uri},
|
|
22
23
|
).json()
|
|
23
24
|
|
|
25
|
+
version = resource_response.get('version')
|
|
26
|
+
assets = version.get('assets') if version else None
|
|
27
|
+
if not assets:
|
|
28
|
+
raise Exception(f'Resource "{self._uri}" has no downloadable assets')
|
|
29
|
+
|
|
30
|
+
download_url = assets['download_url']
|
|
24
31
|
app_caller_proxy_job_storage_base_url = os.getenv('BIOLIB_CLOUD_JOB_STORAGE_BASE_URL', '')
|
|
25
32
|
if app_caller_proxy_job_storage_base_url:
|
|
26
|
-
|
|
27
|
-
parsed_url = urlparse(lfs_version['presigned_download_url'])
|
|
33
|
+
parsed_url = urlparse(download_url)
|
|
28
34
|
self._presigned_url = f'{app_caller_proxy_job_storage_base_url}{parsed_url.path}?{parsed_url.query}'
|
|
29
35
|
else:
|
|
30
|
-
self._presigned_url =
|
|
36
|
+
self._presigned_url = download_url
|
|
31
37
|
|
|
32
|
-
self._expires_at = datetime.
|
|
38
|
+
self._expires_at = datetime.now(timezone.utc) + timedelta(minutes=8)
|
|
33
39
|
logger.debug(
|
|
34
|
-
f'DataRecord "{self.
|
|
35
|
-
f'with expiry at {self._expires_at.isoformat()}'
|
|
40
|
+
f'DataRecord "{self._uri}" fetched presigned URL ' f'with expiry at {self._expires_at.isoformat()}'
|
|
36
41
|
)
|
|
37
42
|
|
|
38
43
|
return self._presigned_url
|
biolib/_internal/file_utils.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
import hashlib
|
|
1
2
|
import io
|
|
2
3
|
import os
|
|
4
|
+
import posixpath
|
|
3
5
|
import zipfile as zf
|
|
4
6
|
from pathlib import Path
|
|
5
7
|
|
|
@@ -75,3 +77,49 @@ def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes
|
|
|
75
77
|
if len(chunk) == 0:
|
|
76
78
|
break
|
|
77
79
|
yield chunk
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def path_to_renamed_path(path_str: str, prefix_with_slash: bool = True) -> str:
|
|
83
|
+
"""
|
|
84
|
+
Normalize file paths consistently:
|
|
85
|
+
- If path contains '..' (relative path going up), convert to absolute path
|
|
86
|
+
- If relative path not containing '..', keep as is, but prepend / if prefix_with_slash=True
|
|
87
|
+
- If absolute path that is subpath of current directory, convert to relative path
|
|
88
|
+
- If absolute path not subpath of current directory, hash the folder path and keep filename
|
|
89
|
+
"""
|
|
90
|
+
path = Path(path_str)
|
|
91
|
+
current_dir = Path.cwd()
|
|
92
|
+
|
|
93
|
+
if '..' in path.parts:
|
|
94
|
+
resolved_path = path.resolve()
|
|
95
|
+
try:
|
|
96
|
+
relative_path = resolved_path.relative_to(current_dir)
|
|
97
|
+
result = str(relative_path)
|
|
98
|
+
except ValueError:
|
|
99
|
+
folder_path = str(resolved_path.parent)
|
|
100
|
+
filename = resolved_path.name
|
|
101
|
+
folder_hash = hashlib.md5(folder_path.encode()).hexdigest()[:6]
|
|
102
|
+
result = f'/{folder_hash}/{filename}'
|
|
103
|
+
elif path.is_absolute():
|
|
104
|
+
try:
|
|
105
|
+
resolved_path = path.resolve()
|
|
106
|
+
relative_path = resolved_path.relative_to(current_dir)
|
|
107
|
+
result = str(relative_path)
|
|
108
|
+
except ValueError:
|
|
109
|
+
folder_path = str(path.parent)
|
|
110
|
+
filename = path.name
|
|
111
|
+
folder_hash = hashlib.md5(folder_path.encode()).hexdigest()[:6]
|
|
112
|
+
result = f'/{folder_hash}/{filename}'
|
|
113
|
+
else:
|
|
114
|
+
result = path_str
|
|
115
|
+
|
|
116
|
+
if prefix_with_slash:
|
|
117
|
+
if not result.startswith('/'):
|
|
118
|
+
result = '/' + result
|
|
119
|
+
# Normalize to handle cases like '/./mydir' -> '/mydir' and remove trailing slashes.
|
|
120
|
+
# Required because downstream Mappings class does exact string-prefix matching.
|
|
121
|
+
return posixpath.normpath(result)
|
|
122
|
+
else:
|
|
123
|
+
if result.startswith('/'):
|
|
124
|
+
result = result[1:]
|
|
125
|
+
return posixpath.normpath(result)
|
biolib/_internal/lfs/cache.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import subprocess
|
|
3
|
-
from datetime import datetime, timedelta
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
4
|
|
|
5
5
|
from biolib.biolib_logging import logger_no_user_data
|
|
6
6
|
from biolib.compute_node.job_worker.cache_state import LfsCacheState
|
|
@@ -9,7 +9,7 @@ from biolib.compute_node.job_worker.cache_state import LfsCacheState
|
|
|
9
9
|
def prune_lfs_cache(dry_run: bool) -> None:
|
|
10
10
|
logger_no_user_data.info(f'Pruning LFS cache (dry run = {dry_run})...')
|
|
11
11
|
|
|
12
|
-
current_time = datetime.
|
|
12
|
+
current_time = datetime.now(timezone.utc)
|
|
13
13
|
paths_to_delete = set()
|
|
14
14
|
|
|
15
15
|
with LfsCacheState() as state:
|
|
@@ -24,6 +24,8 @@ def prune_lfs_cache(dry_run: bool) -> None:
|
|
|
24
24
|
lfs_uuids_to_keep_in_state = set()
|
|
25
25
|
for lfs_uuid, lfs in state['large_file_systems'].items():
|
|
26
26
|
last_used_at = datetime.fromisoformat(lfs['last_used_at'])
|
|
27
|
+
if last_used_at.tzinfo is None:
|
|
28
|
+
last_used_at = last_used_at.replace(tzinfo=timezone.utc)
|
|
27
29
|
lfs_time_to_live_in_days = 60 if lfs['state'] == 'ready' else 7
|
|
28
30
|
|
|
29
31
|
if last_used_at < current_time - timedelta(days=lfs_time_to_live_in_days):
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
3
|
import re
|
|
3
4
|
import sys
|
|
@@ -14,14 +15,14 @@ from biolib._internal.data_record.push_data import (
|
|
|
14
15
|
)
|
|
15
16
|
from biolib._internal.errors import AuthenticationError
|
|
16
17
|
from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
|
|
17
|
-
from biolib.
|
|
18
|
+
from biolib._shared.types import PushResponseDict
|
|
19
|
+
from biolib._shared.utils import parse_resource_uri
|
|
18
20
|
from biolib.biolib_api_client import BiolibApiClient
|
|
19
21
|
from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
|
|
20
22
|
from biolib.biolib_docker_client import BiolibDockerClient
|
|
21
23
|
from biolib.biolib_errors import BioLibError
|
|
22
24
|
from biolib.biolib_logging import logger
|
|
23
|
-
from biolib.typing_utils import Iterable, Optional, Set, TypedDict
|
|
24
|
-
from biolib.utils.app_uri import parse_app_uri
|
|
25
|
+
from biolib.typing_utils import Dict, Iterable, Optional, Set, TypedDict, Union
|
|
25
26
|
|
|
26
27
|
REGEX_MARKDOWN_INLINE_IMAGE = re.compile(r'!\[(?P<alt>.*)\]\((?P<src>.*)\)')
|
|
27
28
|
|
|
@@ -108,8 +109,10 @@ def _process_docker_status_updates_with_progress_bar(status_updates: Iterable[Do
|
|
|
108
109
|
|
|
109
110
|
|
|
110
111
|
def _process_docker_status_updates_with_logging(status_updates: Iterable[DockerStatusUpdate], action: str) -> None:
|
|
111
|
-
layer_progress = {}
|
|
112
|
-
layer_status = {}
|
|
112
|
+
layer_progress: Dict[str, float] = {}
|
|
113
|
+
layer_status: Dict[str, str] = {}
|
|
114
|
+
layer_details: Dict[str, Dict[str, int]] = {}
|
|
115
|
+
layer_bytes_at_last_log: Dict[str, int] = {}
|
|
113
116
|
last_log_time = time.time()
|
|
114
117
|
|
|
115
118
|
logger.info(f'{action} Docker image...')
|
|
@@ -127,6 +130,7 @@ def _process_docker_status_updates_with_logging(status_updates: Iterable[DockerS
|
|
|
127
130
|
percentage = (current / total * 100) if total > 0 else 0
|
|
128
131
|
layer_progress[layer_id] = percentage
|
|
129
132
|
layer_status[layer_id] = f'{action.lower()}'
|
|
133
|
+
layer_details[layer_id] = {'current': current, 'total': total}
|
|
130
134
|
elif update.get('status') == 'Layer already exists':
|
|
131
135
|
layer_progress[layer_id] = 100
|
|
132
136
|
layer_status[layer_id] = 'already exists'
|
|
@@ -145,14 +149,33 @@ def _process_docker_status_updates_with_logging(status_updates: Iterable[DockerS
|
|
|
145
149
|
logger.info(f'{action} Docker image - {status}')
|
|
146
150
|
|
|
147
151
|
if current_time - last_log_time >= 10.0:
|
|
148
|
-
_log_progress_summary(
|
|
152
|
+
_log_progress_summary(
|
|
153
|
+
action,
|
|
154
|
+
layer_progress,
|
|
155
|
+
layer_status,
|
|
156
|
+
layer_details,
|
|
157
|
+
layer_bytes_at_last_log,
|
|
158
|
+
current_time - last_log_time,
|
|
159
|
+
)
|
|
160
|
+
layer_bytes_at_last_log = {lid: details['current'] for lid, details in layer_details.items()}
|
|
149
161
|
last_log_time = current_time
|
|
150
162
|
|
|
151
|
-
_log_progress_summary(
|
|
163
|
+
_log_progress_summary(
|
|
164
|
+
action, layer_progress, layer_status, layer_details, layer_bytes_at_last_log, time.time() - last_log_time
|
|
165
|
+
)
|
|
166
|
+
if action == 'Pushing':
|
|
167
|
+
logger.info('Pushing final image manifest...')
|
|
152
168
|
logger.info(f'{action} Docker image completed')
|
|
153
169
|
|
|
154
170
|
|
|
155
|
-
def _log_progress_summary(
|
|
171
|
+
def _log_progress_summary(
|
|
172
|
+
action: str,
|
|
173
|
+
layer_progress: Dict[str, float],
|
|
174
|
+
layer_status: Dict[str, str],
|
|
175
|
+
layer_details: Dict[str, Dict[str, int]],
|
|
176
|
+
layer_bytes_at_last_log: Dict[str, int],
|
|
177
|
+
time_delta: float,
|
|
178
|
+
) -> None:
|
|
156
179
|
if not layer_progress and not layer_status:
|
|
157
180
|
return
|
|
158
181
|
|
|
@@ -171,7 +194,36 @@ def _log_progress_summary(action: str, layer_progress: dict, layer_status: dict)
|
|
|
171
194
|
if status in ['preparing', 'waiting', 'pushing', 'uploading'] and layer_progress.get(layer_id, 0) < 100
|
|
172
195
|
]
|
|
173
196
|
|
|
174
|
-
if active_layers:
|
|
197
|
+
if active_layers and layer_details:
|
|
198
|
+
total_bytes_transferred = 0
|
|
199
|
+
layer_info_parts = []
|
|
200
|
+
|
|
201
|
+
for layer_id in active_layers[:5]:
|
|
202
|
+
if layer_id in layer_details:
|
|
203
|
+
details = layer_details[layer_id]
|
|
204
|
+
current = details['current']
|
|
205
|
+
total = details['total']
|
|
206
|
+
percentage = layer_progress.get(layer_id, 0)
|
|
207
|
+
|
|
208
|
+
bytes_since_last = current - layer_bytes_at_last_log.get(layer_id, 0)
|
|
209
|
+
total_bytes_transferred += bytes_since_last
|
|
210
|
+
|
|
211
|
+
current_mb = current / (1024 * 1024)
|
|
212
|
+
total_mb = total / (1024 * 1024)
|
|
213
|
+
layer_info_parts.append(f'{layer_id}: {current_mb:.1f}/{total_mb:.1f} MB ({percentage:.1f}%)')
|
|
214
|
+
|
|
215
|
+
speed_info = ''
|
|
216
|
+
if time_delta > 0 and total_bytes_transferred > 0:
|
|
217
|
+
speed_mbps = (total_bytes_transferred / (1024 * 1024)) / time_delta
|
|
218
|
+
speed_info = f' @ {speed_mbps:.2f} MB/s'
|
|
219
|
+
|
|
220
|
+
more_layers_info = ''
|
|
221
|
+
if len(active_layers) > 5:
|
|
222
|
+
more_layers_info = f' (+ {len(active_layers) - 5} more)'
|
|
223
|
+
|
|
224
|
+
if layer_info_parts:
|
|
225
|
+
logger.info(f'Active layers: {", ".join(layer_info_parts)}{speed_info}{more_layers_info}')
|
|
226
|
+
elif active_layers:
|
|
175
227
|
logger.info(f'Active layers: {", ".join(active_layers[:5])}{"..." if len(active_layers) > 5 else ""}')
|
|
176
228
|
|
|
177
229
|
|
|
@@ -193,10 +245,12 @@ def push_application(
|
|
|
193
245
|
set_as_published: bool,
|
|
194
246
|
dry_run: bool = False,
|
|
195
247
|
) -> Optional[PushResponseDict]:
|
|
196
|
-
|
|
197
|
-
|
|
248
|
+
app_uri = app_uri.rstrip('/')
|
|
249
|
+
parsed_uri = parse_resource_uri(app_uri)
|
|
250
|
+
resource_name = parsed_uri['resource_name']
|
|
198
251
|
|
|
199
|
-
|
|
252
|
+
app_uri_prefix = f"@{parsed_uri['resource_prefix']}/" if parsed_uri['resource_prefix'] is not None else ''
|
|
253
|
+
app_uri_to_fetch = f"{app_uri_prefix}{parsed_uri['account_handle_normalized']}/{resource_name}"
|
|
200
254
|
|
|
201
255
|
version = parsed_uri['version']
|
|
202
256
|
semantic_version = f"{version['major']}.{version['minor']}.{version['patch']}" if version else None
|
|
@@ -205,14 +259,14 @@ def push_application(
|
|
|
205
259
|
|
|
206
260
|
api_client = BiolibApiClient.get()
|
|
207
261
|
if not api_client.is_signed_in:
|
|
208
|
-
|
|
209
|
-
if
|
|
262
|
+
github_repository = os.getenv('GITHUB_REPOSITORY')
|
|
263
|
+
if github_repository and not api_client.resource_deploy_key:
|
|
264
|
+
github_secrets_url = f'https://github.com/{github_repository}/settings/secrets/actions/new'
|
|
210
265
|
raise AuthenticationError(
|
|
211
266
|
'You must be authenticated to push an application.\n'
|
|
212
267
|
'Please set the environment variable "BIOLIB_TOKEN=[your_deploy_token]"\n'
|
|
213
268
|
f'You can get a deploy key at: {api_client.base_url}/{app_uri_to_fetch}/settings/keys/\n'
|
|
214
|
-
'Then add it to your GitHub repository at: '
|
|
215
|
-
'Settings -> Secrets and variables -> Actions -> Repository secrets'
|
|
269
|
+
f'Then add it to your GitHub repository at: {github_secrets_url}'
|
|
216
270
|
)
|
|
217
271
|
else:
|
|
218
272
|
raise AuthenticationError(
|
|
@@ -233,19 +287,34 @@ def push_application(
|
|
|
233
287
|
app_data_path: Optional[Path] = None
|
|
234
288
|
try:
|
|
235
289
|
with open(config_yml_path) as config_yml_file:
|
|
236
|
-
|
|
290
|
+
try:
|
|
291
|
+
config = json.loads(json.dumps(yaml.safe_load(config_yml_file.read())))
|
|
292
|
+
except (TypeError, ValueError) as e:
|
|
293
|
+
raise BioLibError(
|
|
294
|
+
f'The .biolib/config.yml file contains data types that are not supported '
|
|
295
|
+
f'(must be JSON-serializable). Please ensure only standard JSON types '
|
|
296
|
+
f'(str, int, float, bool, list, dict, null) are used. Original error: {e}'
|
|
297
|
+
) from e
|
|
298
|
+
|
|
299
|
+
if 'assets' in config and 'app_data' not in config:
|
|
300
|
+
config['app_data'] = config.pop('assets')
|
|
301
|
+
elif 'assets' in config and 'app_data' in config:
|
|
302
|
+
raise BioLibError(
|
|
303
|
+
'In .biolib/config.yml you cannot specify both "app_data" and "assets" fields. Please use only one.'
|
|
304
|
+
)
|
|
237
305
|
|
|
238
306
|
app_data = config.get('app_data')
|
|
239
307
|
if app_data:
|
|
308
|
+
field_name = 'app_data' if 'app_data' in config else 'assets'
|
|
240
309
|
if not isinstance(app_data, str):
|
|
241
310
|
raise BioLibError(
|
|
242
|
-
f'In .biolib/config.yml the value of "
|
|
311
|
+
f'In .biolib/config.yml the value of "{field_name}" must be a string but got {type(app_data)}'
|
|
243
312
|
)
|
|
244
313
|
|
|
245
314
|
app_data_path = app_path_absolute.joinpath(app_data).resolve()
|
|
246
315
|
if not app_data_path.is_dir():
|
|
247
316
|
raise BioLibError(
|
|
248
|
-
'In .biolib/config.yml the value of "
|
|
317
|
+
f'In .biolib/config.yml the value of "{field_name}" must be a path to a directory '
|
|
249
318
|
'in the application directory'
|
|
250
319
|
)
|
|
251
320
|
|
|
@@ -316,10 +385,6 @@ def push_application(
|
|
|
316
385
|
app_response = BiolibAppApi.get_by_uri(app_uri_to_fetch)
|
|
317
386
|
app = app_response['app']
|
|
318
387
|
|
|
319
|
-
if app_data and not app['allow_client_side_execution']:
|
|
320
|
-
raise BioLibError(
|
|
321
|
-
'To push a version with app_data the app must be set to "Allow Client-Side Source Code Access"'
|
|
322
|
-
)
|
|
323
388
|
if dry_run:
|
|
324
389
|
logger.info('Successfully completed dry-run. No new version was pushed.')
|
|
325
390
|
return None
|
|
@@ -405,9 +470,15 @@ def push_application(
|
|
|
405
470
|
logger.info(f'Successfully pushed {docker_image_name}')
|
|
406
471
|
|
|
407
472
|
app_version_uuid = new_app_version_json['public_id']
|
|
473
|
+
complete_push_data: Dict[str, Union[bool, str]] = {
|
|
474
|
+
'set_as_active': set_as_active,
|
|
475
|
+
'set_as_published': set_as_published,
|
|
476
|
+
}
|
|
477
|
+
if parsed_uri['tag']:
|
|
478
|
+
complete_push_data['tag'] = parsed_uri['tag']
|
|
408
479
|
api.client.post(
|
|
409
480
|
path=f'/app-versions/{app_version_uuid}/complete-push/',
|
|
410
|
-
data=
|
|
481
|
+
data=complete_push_data,
|
|
411
482
|
)
|
|
412
483
|
|
|
413
484
|
sematic_version = f"{new_app_version_json['major']}.{new_app_version_json['minor']}.{new_app_version_json['patch']}"
|
biolib/_internal/runtime.py
CHANGED
|
@@ -4,10 +4,12 @@ from biolib.typing_utils import TypedDict
|
|
|
4
4
|
class RuntimeJobDataDict(TypedDict):
|
|
5
5
|
version: str
|
|
6
6
|
job_requested_machine: str
|
|
7
|
+
job_requested_machine_spot: bool
|
|
7
8
|
job_uuid: str
|
|
8
9
|
job_auth_token: str
|
|
9
10
|
app_uri: str
|
|
10
11
|
is_environment_biolib_cloud: bool
|
|
12
|
+
job_reserved_machines: int
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class BioLibRuntimeError(Exception):
|