pybiolib 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. biolib/__init__.py +33 -10
  2. biolib/_data_record/data_record.py +220 -126
  3. biolib/_index/index.py +55 -0
  4. biolib/_index/query_result.py +103 -0
  5. biolib/_internal/add_copilot_prompts.py +24 -11
  6. biolib/_internal/add_gui_files.py +81 -0
  7. biolib/_internal/data_record/__init__.py +1 -1
  8. biolib/_internal/data_record/data_record.py +1 -18
  9. biolib/_internal/data_record/push_data.py +65 -16
  10. biolib/_internal/data_record/remote_storage_endpoint.py +18 -13
  11. biolib/_internal/file_utils.py +48 -0
  12. biolib/_internal/lfs/cache.py +4 -2
  13. biolib/_internal/push_application.py +95 -24
  14. biolib/_internal/runtime.py +2 -0
  15. biolib/_internal/string_utils.py +13 -0
  16. biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-general.instructions.md +5 -0
  17. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  18. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  19. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  20. biolib/_internal/templates/{init_template → github_workflow_template}/.github/workflows/biolib.yml +7 -2
  21. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  22. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  23. biolib/_internal/templates/gui_template/App.tsx +53 -0
  24. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  25. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  26. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  27. biolib/_internal/templates/gui_template/index.css +5 -0
  28. biolib/_internal/templates/gui_template/index.html +13 -0
  29. biolib/_internal/templates/gui_template/index.tsx +10 -0
  30. biolib/_internal/templates/gui_template/package.json +27 -0
  31. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  32. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  33. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  34. biolib/_internal/templates/init_template/.biolib/config.yml +1 -0
  35. biolib/_internal/templates/init_template/Dockerfile +5 -1
  36. biolib/_internal/templates/init_template/run.py +6 -15
  37. biolib/_internal/templates/init_template/run.sh +1 -0
  38. biolib/_internal/templates/templates.py +21 -1
  39. biolib/_internal/utils/__init__.py +47 -0
  40. biolib/_internal/utils/auth.py +46 -0
  41. biolib/_internal/utils/job_url.py +33 -0
  42. biolib/_internal/utils/multinode.py +12 -14
  43. biolib/_runtime/runtime.py +15 -2
  44. biolib/_session/session.py +7 -5
  45. biolib/_shared/__init__.py +0 -0
  46. biolib/_shared/types/__init__.py +74 -0
  47. biolib/_shared/types/account.py +12 -0
  48. biolib/_shared/types/account_member.py +8 -0
  49. biolib/{_internal → _shared}/types/experiment.py +1 -0
  50. biolib/_shared/types/resource.py +37 -0
  51. biolib/_shared/types/resource_deploy_key.py +11 -0
  52. biolib/{_internal → _shared}/types/resource_version.py +8 -2
  53. biolib/_shared/types/user.py +19 -0
  54. biolib/_shared/utils/__init__.py +7 -0
  55. biolib/_shared/utils/resource_uri.py +75 -0
  56. biolib/api/client.py +5 -48
  57. biolib/app/app.py +97 -55
  58. biolib/biolib_api_client/api_client.py +3 -47
  59. biolib/biolib_api_client/app_types.py +1 -1
  60. biolib/biolib_api_client/biolib_app_api.py +31 -6
  61. biolib/biolib_api_client/biolib_job_api.py +1 -1
  62. biolib/biolib_api_client/user_state.py +34 -2
  63. biolib/biolib_binary_format/module_input.py +8 -0
  64. biolib/biolib_binary_format/remote_endpoints.py +3 -3
  65. biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
  66. biolib/biolib_logging.py +1 -1
  67. biolib/cli/__init__.py +2 -2
  68. biolib/cli/auth.py +4 -16
  69. biolib/cli/data_record.py +82 -0
  70. biolib/cli/index.py +32 -0
  71. biolib/cli/init.py +393 -71
  72. biolib/cli/lfs.py +1 -1
  73. biolib/cli/run.py +9 -6
  74. biolib/cli/start.py +14 -1
  75. biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
  76. biolib/compute_node/job_worker/executors/docker_types.py +1 -1
  77. biolib/compute_node/job_worker/executors/types.py +6 -5
  78. biolib/compute_node/job_worker/job_storage.py +2 -1
  79. biolib/compute_node/job_worker/job_worker.py +155 -90
  80. biolib/compute_node/job_worker/large_file_system.py +2 -6
  81. biolib/compute_node/job_worker/network_alloc.py +99 -0
  82. biolib/compute_node/job_worker/network_buffer.py +240 -0
  83. biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
  84. biolib/compute_node/remote_host_proxy.py +163 -79
  85. biolib/compute_node/utils.py +2 -0
  86. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  87. biolib/compute_node/webserver/proxy_utils.py +28 -0
  88. biolib/compute_node/webserver/webserver.py +64 -19
  89. biolib/experiments/experiment.py +111 -16
  90. biolib/jobs/job.py +128 -31
  91. biolib/jobs/job_result.py +74 -34
  92. biolib/jobs/types.py +1 -0
  93. biolib/sdk/__init__.py +28 -3
  94. biolib/typing_utils.py +1 -1
  95. biolib/utils/cache_state.py +8 -5
  96. biolib/utils/multipart_uploader.py +24 -18
  97. biolib/utils/seq_util.py +1 -1
  98. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  99. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  100. {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  101. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  102. biolib/_internal/llm_instructions/.github/instructions/style-react-ts.instructions.md +0 -22
  103. biolib/_internal/templates/init_template/.gitignore +0 -2
  104. biolib/_internal/types/__init__.py +0 -6
  105. biolib/_internal/types/resource.py +0 -18
  106. biolib/biolib_download_container.py +0 -38
  107. biolib/cli/download_container.py +0 -14
  108. biolib/utils/app_uri.py +0 -57
  109. pybiolib-1.2.883.dist-info/METADATA +0 -50
  110. pybiolib-1.2.883.dist-info/RECORD +0 -148
  111. pybiolib-1.2.883.dist-info/entry_points.txt +0 -3
  112. /biolib/{_internal/llm_instructions → _index}/__init__.py +0 -0
  113. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/general-app-knowledge.instructions.md +0 -0
  114. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-python.instructions.md +0 -0
  115. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_app_inputs.prompt.md +0 -0
  116. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_run_apps.prompt.md +0 -0
  117. /biolib/{_internal → _shared}/types/app.py +0 -0
  118. /biolib/{_internal → _shared}/types/data_record.py +0 -0
  119. /biolib/{_internal → _shared}/types/file_node.py +0 -0
  120. /biolib/{_internal → _shared}/types/push.py +0 -0
  121. /biolib/{_internal → _shared}/types/resource_permission.py +0 -0
  122. /biolib/{_internal → _shared}/types/result.py +0 -0
  123. /biolib/{_internal → _shared}/types/typing.py +0 -0
  124. {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info/licenses}/LICENSE +0 -0
@@ -2,10 +2,10 @@ import os
2
2
  import shutil
3
3
  import sys
4
4
 
5
- from biolib._internal import llm_instructions
5
+ from biolib._internal.templates import templates
6
6
 
7
7
 
8
- def add_copilot_prompts(force: bool, style: bool = True, silent: bool = False) -> None:
8
+ def add_copilot_prompts(force: bool, silent: bool = False) -> None:
9
9
  current_working_directory = os.getcwd()
10
10
  config_file_path = f'{current_working_directory}/.biolib/config.yml'
11
11
  if not os.path.exists(config_file_path):
@@ -14,32 +14,45 @@ Error: Current directory has not been initialized as a BioLib application.
14
14
  Please run the \"biolib init\" command first"""
15
15
  print(err_string, file=sys.stderr)
16
16
  exit(1)
17
- source_path = os.path.join(os.path.dirname(llm_instructions.__file__), '.github')
17
+ source_path = os.path.join(templates.copilot_template(), '.github')
18
18
  destination_path = os.path.join(current_working_directory, '.github')
19
19
 
20
20
  conflicting_files = []
21
+ files_to_overwrite = set()
21
22
 
22
23
  for root, _, filenames in os.walk(source_path):
23
24
  relative_dir = os.path.relpath(root, source_path)
24
25
  destination_dir = os.path.join(destination_path, relative_dir)
25
26
  for filename in filenames:
26
- if 'style' in filename and not style:
27
- continue
28
27
  source_file = os.path.join(root, filename)
29
28
  destination_file = os.path.join(destination_dir, filename)
30
29
  if os.path.exists(destination_file) and not force:
31
30
  with open(source_file, 'rb') as fsrc, open(destination_file, 'rb') as fdest:
32
31
  if fsrc.read() != fdest.read():
33
32
  conflicting_files.append(os.path.relpath(destination_file, current_working_directory))
34
- else:
35
- os.makedirs(destination_dir, exist_ok=True)
36
- shutil.copy2(source_file, destination_file)
37
33
 
38
34
  if conflicting_files:
39
- print('The following files were not overwritten. Use --force to override them:', file=sys.stderr)
35
+ print('The following files already exist and would be overwritten:')
40
36
  for conflicting_file in conflicting_files:
41
- print(f' {conflicting_file}', file=sys.stderr)
42
- exit(1)
37
+ print(f' {conflicting_file}')
38
+ print()
39
+
40
+ for conflicting_file in conflicting_files:
41
+ choice = input(f'Overwrite {conflicting_file}? [y/N]: ').lower().strip()
42
+ if choice in ['y', 'yes']:
43
+ files_to_overwrite.add(conflicting_file)
44
+
45
+ for root, _, filenames in os.walk(source_path):
46
+ relative_dir = os.path.relpath(root, source_path)
47
+ destination_dir = os.path.join(destination_path, relative_dir)
48
+ for filename in filenames:
49
+ source_file = os.path.join(root, filename)
50
+ destination_file = os.path.join(destination_dir, filename)
51
+ relative_file_path = os.path.relpath(destination_file, current_working_directory)
52
+
53
+ if not os.path.exists(destination_file) or force or relative_file_path in files_to_overwrite:
54
+ os.makedirs(destination_dir, exist_ok=True)
55
+ shutil.copy2(source_file, destination_file)
43
56
 
44
57
  if not silent:
45
58
  print(f'Prompt and instruction files added to {destination_path}/')
@@ -0,0 +1,81 @@
1
+ import os
2
+ import shutil
3
+
4
+ from biolib._internal.templates import templates
5
+
6
+
7
+ def add_gui_files(force=False, silent=False) -> None:
8
+ cwd = os.getcwd()
9
+ template_dir = templates.gui_template()
10
+
11
+ root_files = ['package.json', 'Dockerfile', 'vite.config.mts', '.yarnrc.yml']
12
+
13
+ conflicting_files = []
14
+ files_to_overwrite = set()
15
+
16
+ for root, _, filenames in os.walk(template_dir):
17
+ relative_dir = os.path.relpath(root, template_dir)
18
+
19
+ for filename in filenames:
20
+ if filename in root_files:
21
+ destination_dir = cwd
22
+ else:
23
+ if relative_dir == '.':
24
+ destination_dir = os.path.join(cwd, 'gui')
25
+ else:
26
+ destination_dir = os.path.join(cwd, 'gui', relative_dir)
27
+
28
+ source_file = os.path.join(root, filename)
29
+ destination_file = os.path.join(destination_dir, filename)
30
+
31
+ if filename == 'Dockerfile':
32
+ continue
33
+
34
+ if os.path.exists(destination_file) and not force:
35
+ with open(source_file, 'rb') as fsrc, open(destination_file, 'rb') as fdest:
36
+ if fsrc.read() != fdest.read():
37
+ conflicting_files.append(os.path.relpath(destination_file, cwd))
38
+
39
+ if conflicting_files:
40
+ print('The following files already exist and would be overwritten:')
41
+ for conflicting_file in conflicting_files:
42
+ print(f' {conflicting_file}')
43
+ print()
44
+
45
+ for conflicting_file in conflicting_files:
46
+ choice = input(f'Overwrite {conflicting_file}? [y/N]: ').lower().strip()
47
+ if choice in ['y', 'yes']:
48
+ files_to_overwrite.add(conflicting_file)
49
+
50
+ for root, _, filenames in os.walk(template_dir):
51
+ relative_dir = os.path.relpath(root, template_dir)
52
+
53
+ for filename in filenames:
54
+ if filename in root_files:
55
+ destination_dir = cwd
56
+ else:
57
+ if relative_dir == '.':
58
+ destination_dir = os.path.join(cwd, 'gui')
59
+ else:
60
+ destination_dir = os.path.join(cwd, 'gui', relative_dir)
61
+
62
+ source_file = os.path.join(root, filename)
63
+ destination_file = os.path.join(destination_dir, filename)
64
+ relative_file_path = os.path.relpath(destination_file, cwd)
65
+
66
+ should_force = force or filename == 'Dockerfile'
67
+ if not os.path.exists(destination_file) or should_force or relative_file_path in files_to_overwrite:
68
+ os.makedirs(destination_dir, exist_ok=True)
69
+ shutil.copy2(source_file, destination_file)
70
+
71
+ gitignore_path = os.path.join(cwd, '.gitignore')
72
+ with open(gitignore_path, 'a') as gitignore_file:
73
+ gitignore_file.write('\n# gui\n')
74
+ gitignore_file.write('.yarn\n')
75
+ gitignore_file.write('dist\n')
76
+ gitignore_file.write('yarn.lock\n')
77
+ gitignore_file.write('tsconfig.tsbuildinfo\n')
78
+ gitignore_file.write('node_modules\n')
79
+
80
+ if not silent:
81
+ print('gui files added to project root and gui/ subdirectory')
@@ -1 +1 @@
1
- from .data_record import get_data_record_state_from_uri, validate_sqlite_v1
1
+ from .data_record import validate_sqlite_v1
@@ -1,11 +1,7 @@
1
1
  import sqlite3
2
2
  from pathlib import Path
3
3
 
4
- from biolib._internal.types.data_record import SqliteV1DatabaseSchema
5
- from biolib.api import client as api_client
6
- from biolib.biolib_api_client import AppGetResponse
7
- from biolib.biolib_api_client.biolib_app_api import _get_app_uri_from_str
8
- from biolib.biolib_api_client.lfs_types import DataRecordVersionInfo
4
+ from biolib._shared.types import SqliteV1DatabaseSchema
9
5
 
10
6
 
11
7
  def get_actual_schema(db_path):
@@ -82,19 +78,6 @@ def verify_schema(specification: SqliteV1DatabaseSchema, actual_schema: SqliteV1
82
78
  )
83
79
 
84
80
 
85
- def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
86
- normalized_uri = _get_app_uri_from_str(uri)
87
- app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': normalized_uri}).json()
88
- resource_uri = app_response['app_version']['app_uri']
89
- if app_response['app']['type'] != 'data-record':
90
- raise Exception(f'Resource "{resource_uri}" is not a Data Record')
91
- return DataRecordVersionInfo(
92
- resource_uri=app_response['app_version']['app_uri'],
93
- resource_uuid=app_response['app']['public_id'],
94
- resource_version_uuid=app_response['app_version']['public_id'],
95
- )
96
-
97
-
98
81
  def validate_sqlite_v1(schema: SqliteV1DatabaseSchema, sqlite_file: Path):
99
82
  actual_schema = get_actual_schema(sqlite_file)
100
83
  print(schema)
@@ -1,10 +1,58 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
4
+ from typing import Callable, Iterator
2
5
 
3
- from biolib import utils
6
+ import biolib.api as api
4
7
  from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
5
- from biolib._internal.types.typing import List, Optional, Tuple
6
8
  from biolib.biolib_errors import BioLibError
7
9
  from biolib.biolib_logging import logger
10
+ from biolib.typing_utils import List, Optional, Tuple
11
+ from biolib.utils import MultiPartUploader
12
+
13
+
14
+ def _upload_from_iterator(
15
+ payload_iterator: Iterator[bytes],
16
+ payload_size_in_bytes: int,
17
+ resource_uuid: Optional[str] = None,
18
+ resource_version_uuid: Optional[str] = None,
19
+ use_process_pool: bool = False,
20
+ publish: bool = False,
21
+ on_progress: Optional[Callable[[int, int], None]] = None,
22
+ ) -> str:
23
+ if (resource_uuid is None) == (resource_version_uuid is None):
24
+ raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
25
+
26
+ if resource_version_uuid is None:
27
+ response = api.client.post(
28
+ path='/lfs/versions/',
29
+ data={'resource_uuid': resource_uuid},
30
+ )
31
+ resource_version_uuid = response.json()['uuid']
32
+
33
+ multipart_uploader = MultiPartUploader(
34
+ use_process_pool=use_process_pool,
35
+ get_presigned_upload_url_request={
36
+ 'headers': None,
37
+ 'requires_biolib_auth': True,
38
+ 'path': f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
39
+ },
40
+ complete_upload_request={
41
+ 'headers': None,
42
+ 'requires_biolib_auth': True,
43
+ 'path': f'/lfs/versions/{resource_version_uuid}/complete_upload/',
44
+ },
45
+ on_progress=on_progress,
46
+ )
47
+ multipart_uploader.upload(payload_iterator=payload_iterator, payload_size_in_bytes=payload_size_in_bytes)
48
+
49
+ if publish:
50
+ api.client.patch(
51
+ path=f'/resources/versions/{resource_version_uuid}/',
52
+ data={'state': 'published', 'set_as_active': True},
53
+ )
54
+
55
+ return resource_version_uuid
8
56
 
9
57
 
10
58
  def validate_data_path_and_get_files_and_size_of_directory(data_path: str) -> Tuple[List[str], int]:
@@ -28,9 +76,14 @@ def push_data_path(
28
76
  data_path: str,
29
77
  data_size_in_bytes: int,
30
78
  files_to_zip: List[str],
31
- resource_version_uuid: str,
79
+ resource_uuid: Optional[str] = None,
80
+ resource_version_uuid: Optional[str] = None,
32
81
  chunk_size_in_mb: Optional[int] = None,
33
- ) -> None:
82
+ publish: bool = False,
83
+ ) -> str:
84
+ if (resource_uuid is None) == (resource_version_uuid is None):
85
+ raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
86
+
34
87
  original_working_dir = os.getcwd()
35
88
  os.chdir(data_path)
36
89
 
@@ -49,19 +102,15 @@ def push_data_path(
49
102
  logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
50
103
 
51
104
  iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
52
- multipart_uploader = utils.MultiPartUploader(
105
+
106
+ new_resource_version_uuid = _upload_from_iterator(
107
+ payload_iterator=iterable_zip_stream,
108
+ payload_size_in_bytes=data_size_in_bytes,
109
+ resource_uuid=resource_uuid,
110
+ resource_version_uuid=resource_version_uuid,
53
111
  use_process_pool=True,
54
- get_presigned_upload_url_request=dict(
55
- headers=None,
56
- requires_biolib_auth=True,
57
- path=f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
58
- ),
59
- complete_upload_request=dict(
60
- headers=None,
61
- requires_biolib_auth=True,
62
- path=f'/lfs/versions/{resource_version_uuid}/complete_upload/',
63
- ),
112
+ publish=publish,
64
113
  )
65
114
 
66
- multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
67
115
  os.chdir(original_working_dir)
116
+ return new_resource_version_uuid
@@ -1,38 +1,43 @@
1
1
  import os
2
- from datetime import datetime, timedelta
2
+ from datetime import datetime, timedelta, timezone
3
3
  from urllib.parse import urlparse
4
4
 
5
+ from biolib._shared.types import ResourceDetailedDict
5
6
  from biolib.api import client as api_client
6
- from biolib.biolib_api_client.lfs_types import DataRecordVersion
7
7
  from biolib.biolib_binary_format.utils import RemoteEndpoint
8
8
  from biolib.biolib_logging import logger
9
9
  from biolib.typing_utils import Optional
10
10
 
11
11
 
12
12
  class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
13
- def __init__(self, resource_version_uuid: str):
14
- self._resource_version_uuid: str = resource_version_uuid
13
+ def __init__(self, uri: str):
14
+ self._uri: str = uri
15
15
  self._expires_at: Optional[datetime] = None
16
16
  self._presigned_url: Optional[str] = None
17
17
 
18
18
  def get_remote_url(self) -> str:
19
- if not self._presigned_url or not self._expires_at or datetime.utcnow() > self._expires_at:
20
- lfs_version: DataRecordVersion = api_client.get(
21
- path=f'/lfs/versions/{self._resource_version_uuid}/',
19
+ if not self._presigned_url or not self._expires_at or datetime.now(timezone.utc) > self._expires_at:
20
+ resource_response: ResourceDetailedDict = api_client.get(
21
+ path='/resource/',
22
+ params={'uri': self._uri},
22
23
  ).json()
23
24
 
25
+ version = resource_response.get('version')
26
+ assets = version.get('assets') if version else None
27
+ if not assets:
28
+ raise Exception(f'Resource "{self._uri}" has no downloadable assets')
29
+
30
+ download_url = assets['download_url']
24
31
  app_caller_proxy_job_storage_base_url = os.getenv('BIOLIB_CLOUD_JOB_STORAGE_BASE_URL', '')
25
32
  if app_caller_proxy_job_storage_base_url:
26
- # Done to hit App Caller Proxy when downloading from inside an app
27
- parsed_url = urlparse(lfs_version['presigned_download_url'])
33
+ parsed_url = urlparse(download_url)
28
34
  self._presigned_url = f'{app_caller_proxy_job_storage_base_url}{parsed_url.path}?{parsed_url.query}'
29
35
  else:
30
- self._presigned_url = lfs_version['presigned_download_url']
36
+ self._presigned_url = download_url
31
37
 
32
- self._expires_at = datetime.utcnow() + timedelta(minutes=8)
38
+ self._expires_at = datetime.now(timezone.utc) + timedelta(minutes=8)
33
39
  logger.debug(
34
- f'DataRecord "{self._resource_version_uuid}" fetched presigned URL '
35
- f'with expiry at {self._expires_at.isoformat()}'
40
+ f'DataRecord "{self._uri}" fetched presigned URL ' f'with expiry at {self._expires_at.isoformat()}'
36
41
  )
37
42
 
38
43
  return self._presigned_url
@@ -1,5 +1,7 @@
1
+ import hashlib
1
2
  import io
2
3
  import os
4
+ import posixpath
3
5
  import zipfile as zf
4
6
  from pathlib import Path
5
7
 
@@ -75,3 +77,49 @@ def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes
75
77
  if len(chunk) == 0:
76
78
  break
77
79
  yield chunk
80
+
81
+
82
+ def path_to_renamed_path(path_str: str, prefix_with_slash: bool = True) -> str:
83
+ """
84
+ Normalize file paths consistently:
85
+ - If path contains '..' (relative path going up), convert to absolute path
86
+ - If relative path not containing '..', keep as is, but prepend / if prefix_with_slash=True
87
+ - If absolute path that is subpath of current directory, convert to relative path
88
+ - If absolute path not subpath of current directory, hash the folder path and keep filename
89
+ """
90
+ path = Path(path_str)
91
+ current_dir = Path.cwd()
92
+
93
+ if '..' in path.parts:
94
+ resolved_path = path.resolve()
95
+ try:
96
+ relative_path = resolved_path.relative_to(current_dir)
97
+ result = str(relative_path)
98
+ except ValueError:
99
+ folder_path = str(resolved_path.parent)
100
+ filename = resolved_path.name
101
+ folder_hash = hashlib.md5(folder_path.encode()).hexdigest()[:6]
102
+ result = f'/{folder_hash}/{filename}'
103
+ elif path.is_absolute():
104
+ try:
105
+ resolved_path = path.resolve()
106
+ relative_path = resolved_path.relative_to(current_dir)
107
+ result = str(relative_path)
108
+ except ValueError:
109
+ folder_path = str(path.parent)
110
+ filename = path.name
111
+ folder_hash = hashlib.md5(folder_path.encode()).hexdigest()[:6]
112
+ result = f'/{folder_hash}/{filename}'
113
+ else:
114
+ result = path_str
115
+
116
+ if prefix_with_slash:
117
+ if not result.startswith('/'):
118
+ result = '/' + result
119
+ # Normalize to handle cases like '/./mydir' -> '/mydir' and remove trailing slashes.
120
+ # Required because downstream Mappings class does exact string-prefix matching.
121
+ return posixpath.normpath(result)
122
+ else:
123
+ if result.startswith('/'):
124
+ result = result[1:]
125
+ return posixpath.normpath(result)
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import subprocess
3
- from datetime import datetime, timedelta
3
+ from datetime import datetime, timedelta, timezone
4
4
 
5
5
  from biolib.biolib_logging import logger_no_user_data
6
6
  from biolib.compute_node.job_worker.cache_state import LfsCacheState
@@ -9,7 +9,7 @@ from biolib.compute_node.job_worker.cache_state import LfsCacheState
9
9
  def prune_lfs_cache(dry_run: bool) -> None:
10
10
  logger_no_user_data.info(f'Pruning LFS cache (dry run = {dry_run})...')
11
11
 
12
- current_time = datetime.utcnow()
12
+ current_time = datetime.now(timezone.utc)
13
13
  paths_to_delete = set()
14
14
 
15
15
  with LfsCacheState() as state:
@@ -24,6 +24,8 @@ def prune_lfs_cache(dry_run: bool) -> None:
24
24
  lfs_uuids_to_keep_in_state = set()
25
25
  for lfs_uuid, lfs in state['large_file_systems'].items():
26
26
  last_used_at = datetime.fromisoformat(lfs['last_used_at'])
27
+ if last_used_at.tzinfo is None:
28
+ last_used_at = last_used_at.replace(tzinfo=timezone.utc)
27
29
  lfs_time_to_live_in_days = 60 if lfs['state'] == 'ready' else 7
28
30
 
29
31
  if last_used_at < current_time - timedelta(days=lfs_time_to_live_in_days):
@@ -1,3 +1,4 @@
1
+ import json
1
2
  import os
2
3
  import re
3
4
  import sys
@@ -14,14 +15,14 @@ from biolib._internal.data_record.push_data import (
14
15
  )
15
16
  from biolib._internal.errors import AuthenticationError
16
17
  from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
17
- from biolib._internal.types.push import PushResponseDict
18
+ from biolib._shared.types import PushResponseDict
19
+ from biolib._shared.utils import parse_resource_uri
18
20
  from biolib.biolib_api_client import BiolibApiClient
19
21
  from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
20
22
  from biolib.biolib_docker_client import BiolibDockerClient
21
23
  from biolib.biolib_errors import BioLibError
22
24
  from biolib.biolib_logging import logger
23
- from biolib.typing_utils import Iterable, Optional, Set, TypedDict
24
- from biolib.utils.app_uri import parse_app_uri
25
+ from biolib.typing_utils import Dict, Iterable, Optional, Set, TypedDict, Union
25
26
 
26
27
  REGEX_MARKDOWN_INLINE_IMAGE = re.compile(r'!\[(?P<alt>.*)\]\((?P<src>.*)\)')
27
28
 
@@ -108,8 +109,10 @@ def _process_docker_status_updates_with_progress_bar(status_updates: Iterable[Do
108
109
 
109
110
 
110
111
  def _process_docker_status_updates_with_logging(status_updates: Iterable[DockerStatusUpdate], action: str) -> None:
111
- layer_progress = {}
112
- layer_status = {}
112
+ layer_progress: Dict[str, float] = {}
113
+ layer_status: Dict[str, str] = {}
114
+ layer_details: Dict[str, Dict[str, int]] = {}
115
+ layer_bytes_at_last_log: Dict[str, int] = {}
113
116
  last_log_time = time.time()
114
117
 
115
118
  logger.info(f'{action} Docker image...')
@@ -127,6 +130,7 @@ def _process_docker_status_updates_with_logging(status_updates: Iterable[DockerS
127
130
  percentage = (current / total * 100) if total > 0 else 0
128
131
  layer_progress[layer_id] = percentage
129
132
  layer_status[layer_id] = f'{action.lower()}'
133
+ layer_details[layer_id] = {'current': current, 'total': total}
130
134
  elif update.get('status') == 'Layer already exists':
131
135
  layer_progress[layer_id] = 100
132
136
  layer_status[layer_id] = 'already exists'
@@ -145,14 +149,33 @@ def _process_docker_status_updates_with_logging(status_updates: Iterable[DockerS
145
149
  logger.info(f'{action} Docker image - {status}')
146
150
 
147
151
  if current_time - last_log_time >= 10.0:
148
- _log_progress_summary(action, layer_progress, layer_status)
152
+ _log_progress_summary(
153
+ action,
154
+ layer_progress,
155
+ layer_status,
156
+ layer_details,
157
+ layer_bytes_at_last_log,
158
+ current_time - last_log_time,
159
+ )
160
+ layer_bytes_at_last_log = {lid: details['current'] for lid, details in layer_details.items()}
149
161
  last_log_time = current_time
150
162
 
151
- _log_progress_summary(action, layer_progress, layer_status)
163
+ _log_progress_summary(
164
+ action, layer_progress, layer_status, layer_details, layer_bytes_at_last_log, time.time() - last_log_time
165
+ )
166
+ if action == 'Pushing':
167
+ logger.info('Pushing final image manifest...')
152
168
  logger.info(f'{action} Docker image completed')
153
169
 
154
170
 
155
- def _log_progress_summary(action: str, layer_progress: dict, layer_status: dict) -> None:
171
+ def _log_progress_summary(
172
+ action: str,
173
+ layer_progress: Dict[str, float],
174
+ layer_status: Dict[str, str],
175
+ layer_details: Dict[str, Dict[str, int]],
176
+ layer_bytes_at_last_log: Dict[str, int],
177
+ time_delta: float,
178
+ ) -> None:
156
179
  if not layer_progress and not layer_status:
157
180
  return
158
181
 
@@ -171,7 +194,36 @@ def _log_progress_summary(action: str, layer_progress: dict, layer_status: dict)
171
194
  if status in ['preparing', 'waiting', 'pushing', 'uploading'] and layer_progress.get(layer_id, 0) < 100
172
195
  ]
173
196
 
174
- if active_layers:
197
+ if active_layers and layer_details:
198
+ total_bytes_transferred = 0
199
+ layer_info_parts = []
200
+
201
+ for layer_id in active_layers[:5]:
202
+ if layer_id in layer_details:
203
+ details = layer_details[layer_id]
204
+ current = details['current']
205
+ total = details['total']
206
+ percentage = layer_progress.get(layer_id, 0)
207
+
208
+ bytes_since_last = current - layer_bytes_at_last_log.get(layer_id, 0)
209
+ total_bytes_transferred += bytes_since_last
210
+
211
+ current_mb = current / (1024 * 1024)
212
+ total_mb = total / (1024 * 1024)
213
+ layer_info_parts.append(f'{layer_id}: {current_mb:.1f}/{total_mb:.1f} MB ({percentage:.1f}%)')
214
+
215
+ speed_info = ''
216
+ if time_delta > 0 and total_bytes_transferred > 0:
217
+ speed_mbps = (total_bytes_transferred / (1024 * 1024)) / time_delta
218
+ speed_info = f' @ {speed_mbps:.2f} MB/s'
219
+
220
+ more_layers_info = ''
221
+ if len(active_layers) > 5:
222
+ more_layers_info = f' (+ {len(active_layers) - 5} more)'
223
+
224
+ if layer_info_parts:
225
+ logger.info(f'Active layers: {", ".join(layer_info_parts)}{speed_info}{more_layers_info}')
226
+ elif active_layers:
175
227
  logger.info(f'Active layers: {", ".join(active_layers[:5])}{"..." if len(active_layers) > 5 else ""}')
176
228
 
177
229
 
@@ -193,10 +245,12 @@ def push_application(
193
245
  set_as_published: bool,
194
246
  dry_run: bool = False,
195
247
  ) -> Optional[PushResponseDict]:
196
- parsed_uri = parse_app_uri(app_uri)
197
- app_name = parsed_uri['app_name']
248
+ app_uri = app_uri.rstrip('/')
249
+ parsed_uri = parse_resource_uri(app_uri)
250
+ resource_name = parsed_uri['resource_name']
198
251
 
199
- app_uri_to_fetch = f"@{parsed_uri['resource_name_prefix']}/{parsed_uri['account_handle_normalized']}/{app_name}"
252
+ app_uri_prefix = f"@{parsed_uri['resource_prefix']}/" if parsed_uri['resource_prefix'] is not None else ''
253
+ app_uri_to_fetch = f"{app_uri_prefix}{parsed_uri['account_handle_normalized']}/{resource_name}"
200
254
 
201
255
  version = parsed_uri['version']
202
256
  semantic_version = f"{version['major']}.{version['minor']}.{version['patch']}" if version else None
@@ -205,14 +259,14 @@ def push_application(
205
259
 
206
260
  api_client = BiolibApiClient.get()
207
261
  if not api_client.is_signed_in:
208
- github_ref = os.getenv('GITHUB_REF')
209
- if github_ref and not api_client.resource_deploy_key:
262
+ github_repository = os.getenv('GITHUB_REPOSITORY')
263
+ if github_repository and not api_client.resource_deploy_key:
264
+ github_secrets_url = f'https://github.com/{github_repository}/settings/secrets/actions/new'
210
265
  raise AuthenticationError(
211
266
  'You must be authenticated to push an application.\n'
212
267
  'Please set the environment variable "BIOLIB_TOKEN=[your_deploy_token]"\n'
213
268
  f'You can get a deploy key at: {api_client.base_url}/{app_uri_to_fetch}/settings/keys/\n'
214
- 'Then add it to your GitHub repository at: '
215
- 'Settings -> Secrets and variables -> Actions -> Repository secrets'
269
+ f'Then add it to your GitHub repository at: {github_secrets_url}'
216
270
  )
217
271
  else:
218
272
  raise AuthenticationError(
@@ -233,19 +287,34 @@ def push_application(
233
287
  app_data_path: Optional[Path] = None
234
288
  try:
235
289
  with open(config_yml_path) as config_yml_file:
236
- config = yaml.safe_load(config_yml_file.read())
290
+ try:
291
+ config = json.loads(json.dumps(yaml.safe_load(config_yml_file.read())))
292
+ except (TypeError, ValueError) as e:
293
+ raise BioLibError(
294
+ f'The .biolib/config.yml file contains data types that are not supported '
295
+ f'(must be JSON-serializable). Please ensure only standard JSON types '
296
+ f'(str, int, float, bool, list, dict, null) are used. Original error: {e}'
297
+ ) from e
298
+
299
+ if 'assets' in config and 'app_data' not in config:
300
+ config['app_data'] = config.pop('assets')
301
+ elif 'assets' in config and 'app_data' in config:
302
+ raise BioLibError(
303
+ 'In .biolib/config.yml you cannot specify both "app_data" and "assets" fields. Please use only one.'
304
+ )
237
305
 
238
306
  app_data = config.get('app_data')
239
307
  if app_data:
308
+ field_name = 'app_data' if 'app_data' in config else 'assets'
240
309
  if not isinstance(app_data, str):
241
310
  raise BioLibError(
242
- f'In .biolib/config.yml the value of "app_data" must be a string but got {type(app_data)}'
311
+ f'In .biolib/config.yml the value of "{field_name}" must be a string but got {type(app_data)}'
243
312
  )
244
313
 
245
314
  app_data_path = app_path_absolute.joinpath(app_data).resolve()
246
315
  if not app_data_path.is_dir():
247
316
  raise BioLibError(
248
- 'In .biolib/config.yml the value of "app_data" must be a path to a directory '
317
+ f'In .biolib/config.yml the value of "{field_name}" must be a path to a directory '
249
318
  'in the application directory'
250
319
  )
251
320
 
@@ -316,10 +385,6 @@ def push_application(
316
385
  app_response = BiolibAppApi.get_by_uri(app_uri_to_fetch)
317
386
  app = app_response['app']
318
387
 
319
- if app_data and not app['allow_client_side_execution']:
320
- raise BioLibError(
321
- 'To push a version with app_data the app must be set to "Allow Client-Side Source Code Access"'
322
- )
323
388
  if dry_run:
324
389
  logger.info('Successfully completed dry-run. No new version was pushed.')
325
390
  return None
@@ -405,9 +470,15 @@ def push_application(
405
470
  logger.info(f'Successfully pushed {docker_image_name}')
406
471
 
407
472
  app_version_uuid = new_app_version_json['public_id']
473
+ complete_push_data: Dict[str, Union[bool, str]] = {
474
+ 'set_as_active': set_as_active,
475
+ 'set_as_published': set_as_published,
476
+ }
477
+ if parsed_uri['tag']:
478
+ complete_push_data['tag'] = parsed_uri['tag']
408
479
  api.client.post(
409
480
  path=f'/app-versions/{app_version_uuid}/complete-push/',
410
- data={'set_as_active': set_as_active, 'set_as_published': set_as_published},
481
+ data=complete_push_data,
411
482
  )
412
483
 
413
484
  sematic_version = f"{new_app_version_json['major']}.{new_app_version_json['minor']}.{new_app_version_json['patch']}"
@@ -4,10 +4,12 @@ from biolib.typing_utils import TypedDict
4
4
  class RuntimeJobDataDict(TypedDict):
5
5
  version: str
6
6
  job_requested_machine: str
7
+ job_requested_machine_spot: bool
7
8
  job_uuid: str
8
9
  job_auth_token: str
9
10
  app_uri: str
10
11
  is_environment_biolib_cloud: bool
12
+ job_reserved_machines: int
11
13
 
12
14
 
13
15
  class BioLibRuntimeError(Exception):