pybiolib 1.2.1056__py3-none-any.whl → 1.2.1727__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pybiolib might be problematic. Click here for more details.

Files changed (93) hide show
  1. biolib/__init__.py +33 -10
  2. biolib/_data_record/data_record.py +103 -26
  3. biolib/_index/__init__.py +0 -0
  4. biolib/_index/index.py +51 -0
  5. biolib/_index/types.py +7 -0
  6. biolib/_internal/data_record/data_record.py +1 -1
  7. biolib/_internal/data_record/push_data.py +65 -16
  8. biolib/_internal/data_record/remote_storage_endpoint.py +3 -3
  9. biolib/_internal/file_utils.py +7 -4
  10. biolib/_internal/index/__init__.py +1 -0
  11. biolib/_internal/index/index.py +18 -0
  12. biolib/_internal/lfs/cache.py +4 -2
  13. biolib/_internal/push_application.py +89 -23
  14. biolib/_internal/runtime.py +2 -0
  15. biolib/_internal/templates/gui_template/App.tsx +38 -2
  16. biolib/_internal/templates/gui_template/Dockerfile +2 -0
  17. biolib/_internal/templates/gui_template/biolib-sdk.ts +37 -0
  18. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  19. biolib/_internal/templates/gui_template/package.json +1 -0
  20. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +49 -0
  21. biolib/_internal/templates/gui_template/vite.config.mts +2 -1
  22. biolib/_internal/templates/init_template/.github/workflows/biolib.yml +6 -1
  23. biolib/_internal/templates/init_template/Dockerfile +2 -0
  24. biolib/_internal/utils/__init__.py +40 -0
  25. biolib/_internal/utils/auth.py +46 -0
  26. biolib/_internal/utils/job_url.py +33 -0
  27. biolib/_runtime/runtime.py +9 -0
  28. biolib/_session/session.py +7 -5
  29. biolib/_shared/__init__.py +0 -0
  30. biolib/_shared/types/__init__.py +74 -0
  31. biolib/_shared/types/resource.py +37 -0
  32. biolib/_shared/types/resource_deploy_key.py +11 -0
  33. biolib/{_internal → _shared}/types/resource_version.py +8 -2
  34. biolib/_shared/utils/__init__.py +7 -0
  35. biolib/_shared/utils/resource_uri.py +75 -0
  36. biolib/api/client.py +3 -47
  37. biolib/app/app.py +57 -33
  38. biolib/biolib_api_client/api_client.py +3 -47
  39. biolib/biolib_api_client/app_types.py +1 -6
  40. biolib/biolib_api_client/biolib_app_api.py +17 -0
  41. biolib/biolib_binary_format/module_input.py +8 -0
  42. biolib/biolib_binary_format/remote_endpoints.py +3 -3
  43. biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
  44. biolib/cli/__init__.py +2 -1
  45. biolib/cli/data_record.py +82 -0
  46. biolib/cli/index.py +32 -0
  47. biolib/cli/init.py +39 -1
  48. biolib/cli/lfs.py +1 -1
  49. biolib/cli/run.py +8 -5
  50. biolib/cli/start.py +14 -1
  51. biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
  52. biolib/compute_node/job_worker/executors/docker_types.py +1 -1
  53. biolib/compute_node/job_worker/executors/types.py +6 -5
  54. biolib/compute_node/job_worker/job_worker.py +149 -93
  55. biolib/compute_node/job_worker/large_file_system.py +2 -6
  56. biolib/compute_node/job_worker/network_alloc.py +99 -0
  57. biolib/compute_node/job_worker/network_buffer.py +240 -0
  58. biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
  59. biolib/compute_node/remote_host_proxy.py +139 -79
  60. biolib/compute_node/utils.py +2 -0
  61. biolib/compute_node/webserver/compute_node_results_proxy.py +188 -0
  62. biolib/compute_node/webserver/proxy_utils.py +28 -0
  63. biolib/compute_node/webserver/webserver.py +64 -19
  64. biolib/experiments/experiment.py +111 -16
  65. biolib/jobs/job.py +119 -29
  66. biolib/jobs/job_result.py +70 -33
  67. biolib/jobs/types.py +1 -0
  68. biolib/sdk/__init__.py +17 -2
  69. biolib/typing_utils.py +1 -1
  70. biolib/utils/cache_state.py +2 -2
  71. biolib/utils/multipart_uploader.py +24 -18
  72. biolib/utils/seq_util.py +1 -1
  73. pybiolib-1.2.1727.dist-info/METADATA +41 -0
  74. {pybiolib-1.2.1056.dist-info → pybiolib-1.2.1727.dist-info}/RECORD +103 -85
  75. {pybiolib-1.2.1056.dist-info → pybiolib-1.2.1727.dist-info}/WHEEL +1 -1
  76. pybiolib-1.2.1727.dist-info/entry_points.txt +2 -0
  77. biolib/_internal/types/__init__.py +0 -6
  78. biolib/_internal/types/resource.py +0 -18
  79. biolib/utils/app_uri.py +0 -57
  80. pybiolib-1.2.1056.dist-info/METADATA +0 -50
  81. pybiolib-1.2.1056.dist-info/entry_points.txt +0 -3
  82. /biolib/{_internal → _shared}/types/account.py +0 -0
  83. /biolib/{_internal → _shared}/types/account_member.py +0 -0
  84. /biolib/{_internal → _shared}/types/app.py +0 -0
  85. /biolib/{_internal → _shared}/types/data_record.py +0 -0
  86. /biolib/{_internal → _shared}/types/experiment.py +0 -0
  87. /biolib/{_internal → _shared}/types/file_node.py +0 -0
  88. /biolib/{_internal → _shared}/types/push.py +0 -0
  89. /biolib/{_internal → _shared}/types/resource_permission.py +0 -0
  90. /biolib/{_internal → _shared}/types/result.py +0 -0
  91. /biolib/{_internal → _shared}/types/typing.py +0 -0
  92. /biolib/{_internal → _shared}/types/user.py +0 -0
  93. {pybiolib-1.2.1056.dist-info → pybiolib-1.2.1727.dist-info/licenses}/LICENSE +0 -0
biolib/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
+ # ruff: noqa: I001
1
2
  # Imports to hide
2
3
  import os
3
4
  from urllib.parse import urlparse as _urlparse
@@ -15,6 +16,7 @@ from biolib.jobs.job import Result as _Result
15
16
  from biolib import user as _user
16
17
  from biolib.typing_utils import List, Optional, cast as _cast
17
18
  from biolib._data_record.data_record import DataRecord as _DataRecord
19
+ from biolib._internal.utils.job_url import parse_result_id_or_url as _parse_result_id_or_url
18
20
 
19
21
  import biolib.api
20
22
  import biolib.app
@@ -22,7 +24,6 @@ import biolib.cli
22
24
  import biolib.sdk
23
25
  import biolib.utils
24
26
 
25
-
26
27
  # ------------------------------------ Function definitions for public Python API ------------------------------------
27
28
 
28
29
 
@@ -83,43 +84,65 @@ def search(
83
84
 
84
85
 
85
86
  def get_job(job_id: str, job_token: Optional[str] = None) -> _Result:
86
- r"""Get a job by its ID.
87
+ r"""Get a job by its ID or full URL.
87
88
 
88
89
  Args:
89
- job_id (str): The UUID of the job to retrieve
90
+ job_id (str): The UUID of the job to retrieve, or a full URL to the job.
91
+ Can be either:
92
+ - Job UUID (e.g., 'abc123')
93
+ - Full URL (e.g., 'https://biolib.com/result/abc123/?token=xyz789')
94
+ - Full URL with token parameter (e.g., 'biolib.com/result/abc123/token=xyz789')
90
95
  job_token (str, optional): Authentication token for accessing the job.
91
96
  Only needed for jobs that aren't owned by the current user.
97
+ If the URL contains a token, this parameter is ignored.
92
98
 
93
99
  Returns:
94
100
  Job: The job object
95
101
 
96
102
  Example::
97
103
 
104
+ >>> # Get by UUID
98
105
  >>> job = biolib.get_job('abc123')
99
- >>> # Access shared job
106
+ >>> # Get with explicit token
100
107
  >>> job = biolib.get_job('abc123', job_token='xyz789')
108
+ >>> # Get by full URL with token
109
+ >>> job = biolib.get_job('https://biolib.com/result/abc123/?token=xyz789')
110
+ >>> # Get by URL with inline token format
111
+ >>> job = biolib.get_job('biolib.com/result/abc123/token=xyz789')
101
112
  """
102
- return _Result.create_from_uuid(uuid=job_id, auth_token=job_token)
113
+ uuid, token = _parse_result_id_or_url(job_id, job_token)
114
+ return _Result.create_from_uuid(uuid=uuid, auth_token=token)
103
115
 
104
116
 
105
117
  def get_result(result_id: str, result_token: Optional[str] = None) -> _Result:
106
- r"""Get a result by its ID.
118
+ r"""Get a result by its ID or full URL.
107
119
 
108
120
  Args:
109
- result_id (str): The UUID of the result to retrieve
121
+ result_id (str): The UUID of the result to retrieve, or a full URL to the result.
122
+ Can be either:
123
+ - Result UUID (e.g., 'abc123')
124
+ - Full URL (e.g., 'https://biolib.com/result/abc123/?token=xyz789')
125
+ - Full URL with token parameter (e.g., 'biolib.com/result/abc123/token=xyz789')
110
126
  result_token (str, optional): Authentication token for accessing the result.
111
- Only needed for result that aren't owned by the current user.
127
+ Only needed for results that aren't owned by the current user.
128
+ If the URL contains a token, this parameter is ignored.
112
129
 
113
130
  Returns:
114
131
  Result: The result object
115
132
 
116
133
  Example::
117
134
 
135
+ >>> # Get by UUID
118
136
  >>> result = biolib.get_result('abc123')
119
- >>> # Access shared result
137
+ >>> # Get with explicit token
120
138
  >>> result = biolib.get_result('abc123', result_token='xyz789')
139
+ >>> # Get by full URL with token
140
+ >>> result = biolib.get_result('https://biolib.com/result/abc123/?token=xyz789')
141
+ >>> # Get by URL with inline token format
142
+ >>> result = biolib.get_result('biolib.com/result/abc123/token=xyz789')
121
143
  """
122
- return _Result.create_from_uuid(uuid=result_id, auth_token=result_token)
144
+ uuid, token = _parse_result_id_or_url(result_id, result_token)
145
+ return _Result.create_from_uuid(uuid=uuid, auth_token=token)
123
146
 
124
147
 
125
148
  def get_data_record(uri: str) -> _DataRecord:
@@ -3,26 +3,27 @@ from collections import namedtuple
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
5
  from struct import Struct
6
- from typing import Callable, Dict, Iterable, List, Optional, Union, cast
6
+ from typing import Callable, Dict, Iterable, Iterator, List, Optional, Union, cast
7
7
 
8
8
  from biolib import api
9
- from biolib._internal import types
10
9
  from biolib._internal.data_record import get_data_record_state_from_uri
11
10
  from biolib._internal.data_record.data_record import validate_sqlite_v1
12
11
  from biolib._internal.data_record.push_data import (
12
+ _upload_from_iterator,
13
13
  push_data_path,
14
14
  validate_data_path_and_get_files_and_size_of_directory,
15
15
  )
16
16
  from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
17
17
  from biolib._internal.http_client import HttpClient
18
- from biolib._internal.types.file_node import ZipFileNodeDict
18
+ from biolib._shared import types
19
+ from biolib._shared.types import ZipFileNodeDict
20
+ from biolib._shared.utils import parse_resource_uri
19
21
  from biolib.api import client as api_client
20
22
  from biolib.biolib_api_client import BiolibApiClient
21
- from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersion, DataRecordVersionInfo
23
+ from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersionInfo
22
24
  from biolib.biolib_binary_format import LazyLoadedFile
23
25
  from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
24
26
  from biolib.biolib_logging import logger
25
- from biolib.utils.app_uri import parse_app_uri
26
27
 
27
28
  PathFilter = Union[str, List[str], Callable[[str], bool]]
28
29
 
@@ -44,11 +45,11 @@ class DataRecord:
44
45
 
45
46
  @property
46
47
  def name(self) -> str:
47
- uri_parsed = parse_app_uri(self._state['resource_uri'], use_account_as_name_default=False)
48
- if not uri_parsed['app_name']:
48
+ uri_parsed = parse_resource_uri(self._state['resource_uri'], use_account_as_name_default=False)
49
+ if not uri_parsed['resource_name']:
49
50
  raise ValueError('Expected parameter "resource_uri" to contain resource name')
50
51
 
51
- return uri_parsed['app_name']
52
+ return uri_parsed['resource_name']
52
53
 
53
54
  def list_files(
54
55
  self,
@@ -113,37 +114,36 @@ class DataRecord:
113
114
  else:
114
115
  raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
115
116
 
116
- response = api.client.post(path='/lfs/versions/', data={'resource_uuid': self._state['resource_uuid']})
117
- data_record_version: DataRecordVersion = response.json()
118
- resource_version_uuid = data_record_version['uuid']
119
-
120
- push_data_path(
117
+ new_resource_version_uuid = push_data_path(
121
118
  data_path=data_path,
122
119
  data_size_in_bytes=data_size_in_bytes,
123
120
  files_to_zip=files_to_zip,
124
- resource_version_uuid=resource_version_uuid,
121
+ resource_uuid=self._state['resource_uuid'],
125
122
  chunk_size_in_mb=chunk_size_in_mb,
123
+ publish=True,
126
124
  )
127
125
 
128
- api.client.patch(
129
- path=f'/resources/versions/{resource_version_uuid}/',
130
- data={'state': 'published', 'set_as_active': True},
131
- )
132
-
133
- logger.info(f"Successfully pushed a new Data Record version '{data_record_version['uri']}'")
134
- self._state = get_data_record_state_from_uri(data_record_version['uri'])
126
+ updated_record = DataRecord._get_by_version_uuid(new_resource_version_uuid)
127
+ self._state = updated_record._state # pylint: disable=protected-access
128
+ logger.info(f"Successfully pushed a new Data Record version '{self.uri}'")
135
129
 
136
130
  @staticmethod
137
131
  def get_by_uri(uri: str) -> 'DataRecord':
138
132
  return DataRecord(_internal_state=get_data_record_state_from_uri(uri))
139
133
 
134
+ @staticmethod
135
+ def _get_by_version_uuid(version_uuid: str) -> 'DataRecord':
136
+ response = api.client.get(path=f'/lfs/versions/{version_uuid}/')
137
+ version_info = response.json()
138
+ return DataRecord.get_by_uri(version_info['uri'])
139
+
140
140
  @staticmethod
141
141
  def create(destination: str, data_path: Optional[str] = None, record_type: Optional[str] = None) -> 'DataRecord':
142
142
  BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Data Record')
143
143
  if data_path is not None:
144
144
  assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
145
- uri_parsed = parse_app_uri(destination, use_account_as_name_default=False)
146
- if uri_parsed['app_name_normalized']:
145
+ uri_parsed = parse_resource_uri(destination, use_account_as_name_default=False)
146
+ if uri_parsed['resource_name_normalized']:
147
147
  data_record_uri = destination
148
148
  else:
149
149
  record_name = 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
@@ -173,10 +173,10 @@ class DataRecord:
173
173
  'resource_type': 'data-record',
174
174
  }
175
175
  if uri:
176
- uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
176
+ uri_parsed = parse_resource_uri(uri, use_account_as_name_default=False)
177
177
  params['account_handle'] = uri_parsed['account_handle_normalized']
178
- if uri_parsed['app_name_normalized']:
179
- params['app_name'] = uri_parsed['app_name_normalized']
178
+ if uri_parsed['resource_name_normalized']:
179
+ params['app_name'] = uri_parsed['resource_name_normalized']
180
180
 
181
181
  results = api_client.get(path='/apps/', params=params).json()['results']
182
182
  if count is None and len(results) == max_page_size:
@@ -284,3 +284,80 @@ class DataRecord:
284
284
 
285
285
  def _get_detailed_dict(self) -> types.DataRecordDetailedDict:
286
286
  return cast(types.DataRecordDetailedDict, api_client.get(f'/resources/data-records/{self.uuid}/').json())
287
+
288
+ def _get_zip_size_bytes(self) -> int:
289
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
290
+ resource_version_uuid=self._state['resource_version_uuid'],
291
+ )
292
+ presigned_url = remote_storage_endpoint.get_remote_url()
293
+ response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
294
+ content_range = response.headers.get('Content-Range', '')
295
+ if not content_range or '/' not in content_range:
296
+ raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
297
+ total_size = int(content_range.split('/')[1])
298
+ return total_size
299
+
300
+ def _iter_zip_bytes(self, chunk_size_bytes: int) -> Iterator[bytes]:
301
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
302
+ resource_version_uuid=self._state['resource_version_uuid'],
303
+ )
304
+ presigned_url = remote_storage_endpoint.get_remote_url()
305
+ response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
306
+ content_range = response.headers.get('Content-Range', '')
307
+ if not content_range or '/' not in content_range:
308
+ raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
309
+ total_size = int(content_range.split('/')[1])
310
+
311
+ for start in range(0, total_size, chunk_size_bytes):
312
+ end = min(start + chunk_size_bytes - 1, total_size - 1)
313
+ presigned_url = remote_storage_endpoint.get_remote_url()
314
+ response = HttpClient.request(
315
+ url=presigned_url,
316
+ headers={'range': f'bytes={start}-{end}'},
317
+ timeout_in_seconds=300,
318
+ )
319
+ yield response.content
320
+
321
+ @staticmethod
322
+ def clone(
323
+ source: 'DataRecord',
324
+ destination: 'DataRecord',
325
+ on_progress: Optional[Callable[[int, int], None]] = None,
326
+ ) -> 'DataRecord':
327
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='clone a Data Record')
328
+
329
+ # pylint: disable=protected-access
330
+ total_size_in_bytes = source._get_zip_size_bytes()
331
+
332
+ if total_size_in_bytes == 0:
333
+ raise ValueError('Source data record has no data to clone')
334
+
335
+ min_chunk_size_bytes = 10_000_000
336
+ chunk_size_in_bytes = max(min_chunk_size_bytes, int(total_size_in_bytes / 9_000))
337
+
338
+ zip_iterator = source._iter_zip_bytes(chunk_size_bytes=chunk_size_in_bytes)
339
+
340
+ new_resource_version_uuid = _upload_from_iterator(
341
+ resource_uuid=destination._state['resource_uuid'],
342
+ payload_iterator=zip_iterator,
343
+ payload_size_in_bytes=total_size_in_bytes,
344
+ publish=True,
345
+ on_progress=on_progress,
346
+ )
347
+ # pylint: enable=protected-access
348
+
349
+ logger.info(f"Successfully cloned data to '{destination.uri}'")
350
+ return DataRecord._get_by_version_uuid(new_resource_version_uuid)
351
+
352
+ def delete(self) -> None:
353
+ """Delete the data record.
354
+
355
+ Example::
356
+ >>> record = DataRecord.get_by_uri("account/data-record")
357
+ >>> record.delete()
358
+ """
359
+ try:
360
+ api_client.delete(path=f'/apps/{self.uuid}/')
361
+ logger.info(f'Data record {self.uri} deleted')
362
+ except Exception as error:
363
+ raise Exception(f'Failed to delete data record {self.uri} due to: {error}') from error
File without changes
biolib/_index/index.py ADDED
@@ -0,0 +1,51 @@
1
+ import json
2
+ from typing import Any, Dict
3
+
4
+ from biolib import api
5
+ from biolib._index.types import IndexInfo
6
+ from biolib._internal.index import get_index_from_uri
7
+ from biolib.biolib_api_client import BiolibApiClient
8
+ from biolib.biolib_logging import logger
9
+
10
+
11
+ class Index:
12
+ def __init__(self, _internal_state: IndexInfo):
13
+ self._state = _internal_state
14
+
15
+ def __repr__(self) -> str:
16
+ return f'Index: {self._state["resource_uri"]}'
17
+
18
+ @property
19
+ def uri(self) -> str:
20
+ return self._state['resource_uri']
21
+
22
+ @property
23
+ def id(self) -> str:
24
+ return f'{self._state["group_uuid"]}.{self._state["resource_uuid"]}'.replace('-', '_')
25
+
26
+ @staticmethod
27
+ def get_by_uri(uri: str) -> 'Index':
28
+ return Index(_internal_state=get_index_from_uri(uri))
29
+
30
+ @staticmethod
31
+ def create(uri: str, config: Dict[str, Any]) -> str:
32
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='create an Index')
33
+
34
+ response = api.client.post(
35
+ path='/resources/indexes/',
36
+ data={
37
+ 'uri': uri,
38
+ 'index_config': config,
39
+ },
40
+ )
41
+ result = response.json()
42
+ created_uri: str = result['uri']
43
+ logger.info(f"Successfully created Index '{created_uri}'")
44
+ return created_uri
45
+
46
+ @staticmethod
47
+ def create_from_config_file(uri: str, config_path: str) -> str:
48
+ with open(config_path) as config_file:
49
+ index_config = json.load(config_file)
50
+
51
+ return Index.create(uri=uri, config=index_config)
biolib/_index/types.py ADDED
@@ -0,0 +1,7 @@
1
+ from typing import TypedDict
2
+
3
+
4
+ class IndexInfo(TypedDict):
5
+ resource_uri: str
6
+ resource_uuid: str
7
+ group_uuid: str
@@ -1,7 +1,7 @@
1
1
  import sqlite3
2
2
  from pathlib import Path
3
3
 
4
- from biolib._internal.types.data_record import SqliteV1DatabaseSchema
4
+ from biolib._shared.types import SqliteV1DatabaseSchema
5
5
  from biolib.api import client as api_client
6
6
  from biolib.biolib_api_client import AppGetResponse
7
7
  from biolib.biolib_api_client.biolib_app_api import _get_app_uri_from_str
@@ -1,10 +1,58 @@
1
+ from __future__ import annotations
2
+
1
3
  import os
4
+ from typing import Callable, Iterator
2
5
 
3
- from biolib import utils
6
+ import biolib.api as api
4
7
  from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
5
- from biolib._internal.types.typing import List, Optional, Tuple
6
8
  from biolib.biolib_errors import BioLibError
7
9
  from biolib.biolib_logging import logger
10
+ from biolib.typing_utils import List, Optional, Tuple
11
+ from biolib.utils import MultiPartUploader
12
+
13
+
14
+ def _upload_from_iterator(
15
+ payload_iterator: Iterator[bytes],
16
+ payload_size_in_bytes: int,
17
+ resource_uuid: Optional[str] = None,
18
+ resource_version_uuid: Optional[str] = None,
19
+ use_process_pool: bool = False,
20
+ publish: bool = False,
21
+ on_progress: Optional[Callable[[int, int], None]] = None,
22
+ ) -> str:
23
+ if (resource_uuid is None) == (resource_version_uuid is None):
24
+ raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
25
+
26
+ if resource_version_uuid is None:
27
+ response = api.client.post(
28
+ path='/lfs/versions/',
29
+ data={'resource_uuid': resource_uuid},
30
+ )
31
+ resource_version_uuid = response.json()['uuid']
32
+
33
+ multipart_uploader = MultiPartUploader(
34
+ use_process_pool=use_process_pool,
35
+ get_presigned_upload_url_request={
36
+ 'headers': None,
37
+ 'requires_biolib_auth': True,
38
+ 'path': f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
39
+ },
40
+ complete_upload_request={
41
+ 'headers': None,
42
+ 'requires_biolib_auth': True,
43
+ 'path': f'/lfs/versions/{resource_version_uuid}/complete_upload/',
44
+ },
45
+ on_progress=on_progress,
46
+ )
47
+ multipart_uploader.upload(payload_iterator=payload_iterator, payload_size_in_bytes=payload_size_in_bytes)
48
+
49
+ if publish:
50
+ api.client.patch(
51
+ path=f'/resources/versions/{resource_version_uuid}/',
52
+ data={'state': 'published', 'set_as_active': True},
53
+ )
54
+
55
+ return resource_version_uuid
8
56
 
9
57
 
10
58
  def validate_data_path_and_get_files_and_size_of_directory(data_path: str) -> Tuple[List[str], int]:
@@ -28,9 +76,14 @@ def push_data_path(
28
76
  data_path: str,
29
77
  data_size_in_bytes: int,
30
78
  files_to_zip: List[str],
31
- resource_version_uuid: str,
79
+ resource_uuid: Optional[str] = None,
80
+ resource_version_uuid: Optional[str] = None,
32
81
  chunk_size_in_mb: Optional[int] = None,
33
- ) -> None:
82
+ publish: bool = False,
83
+ ) -> str:
84
+ if (resource_uuid is None) == (resource_version_uuid is None):
85
+ raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
86
+
34
87
  original_working_dir = os.getcwd()
35
88
  os.chdir(data_path)
36
89
 
@@ -49,19 +102,15 @@ def push_data_path(
49
102
  logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
50
103
 
51
104
  iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
52
- multipart_uploader = utils.MultiPartUploader(
105
+
106
+ new_resource_version_uuid = _upload_from_iterator(
107
+ payload_iterator=iterable_zip_stream,
108
+ payload_size_in_bytes=data_size_in_bytes,
109
+ resource_uuid=resource_uuid,
110
+ resource_version_uuid=resource_version_uuid,
53
111
  use_process_pool=True,
54
- get_presigned_upload_url_request=dict(
55
- headers=None,
56
- requires_biolib_auth=True,
57
- path=f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
58
- ),
59
- complete_upload_request=dict(
60
- headers=None,
61
- requires_biolib_auth=True,
62
- path=f'/lfs/versions/{resource_version_uuid}/complete_upload/',
63
- ),
112
+ publish=publish,
64
113
  )
65
114
 
66
- multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
67
115
  os.chdir(original_working_dir)
116
+ return new_resource_version_uuid
@@ -1,5 +1,5 @@
1
1
  import os
2
- from datetime import datetime, timedelta
2
+ from datetime import datetime, timedelta, timezone
3
3
  from urllib.parse import urlparse
4
4
 
5
5
  from biolib.api import client as api_client
@@ -16,7 +16,7 @@ class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
16
16
  self._presigned_url: Optional[str] = None
17
17
 
18
18
  def get_remote_url(self) -> str:
19
- if not self._presigned_url or not self._expires_at or datetime.utcnow() > self._expires_at:
19
+ if not self._presigned_url or not self._expires_at or datetime.now(timezone.utc) > self._expires_at:
20
20
  lfs_version: DataRecordVersion = api_client.get(
21
21
  path=f'/lfs/versions/{self._resource_version_uuid}/',
22
22
  ).json()
@@ -29,7 +29,7 @@ class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
29
29
  else:
30
30
  self._presigned_url = lfs_version['presigned_download_url']
31
31
 
32
- self._expires_at = datetime.utcnow() + timedelta(minutes=8)
32
+ self._expires_at = datetime.now(timezone.utc) + timedelta(minutes=8)
33
33
  logger.debug(
34
34
  f'DataRecord "{self._resource_version_uuid}" fetched presigned URL '
35
35
  f'with expiry at {self._expires_at.isoformat()}'
@@ -1,6 +1,7 @@
1
1
  import hashlib
2
2
  import io
3
3
  import os
4
+ import posixpath
4
5
  import zipfile as zf
5
6
  from pathlib import Path
6
7
 
@@ -114,9 +115,11 @@ def path_to_renamed_path(path_str: str, prefix_with_slash: bool = True) -> str:
114
115
 
115
116
  if prefix_with_slash:
116
117
  if not result.startswith('/'):
117
- return '/' + result
118
- return result
118
+ result = '/' + result
119
+ # Normalize to handle cases like '/./mydir' -> '/mydir' and remove trailing slashes.
120
+ # Required because downstream Mappings class does exact string-prefix matching.
121
+ return posixpath.normpath(result)
119
122
  else:
120
123
  if result.startswith('/'):
121
- return result[1:]
122
- return result
124
+ result = result[1:]
125
+ return posixpath.normpath(result)
@@ -0,0 +1 @@
1
+ from .index import get_index_from_uri
@@ -0,0 +1,18 @@
1
+ from typing import Any, Dict
2
+
3
+ from biolib._index.types import IndexInfo
4
+ from biolib.api import client as api_client
5
+ from biolib.biolib_api_client.biolib_app_api import _get_app_uri_from_str
6
+
7
+
8
+ def get_index_from_uri(uri: str) -> IndexInfo:
9
+ normalized_uri = _get_app_uri_from_str(uri)
10
+ app_response: Dict[str, Any] = api_client.get(path='/app/', params={'uri': normalized_uri}).json()
11
+ resource_uri = app_response['app_version']['app_uri']
12
+ if app_response['app']['type'] != 'index':
13
+ raise Exception(f'Resource "{resource_uri}" is not an Index')
14
+ return IndexInfo(
15
+ resource_uri=app_response['app_version']['app_uri'],
16
+ resource_uuid=app_response['app']['public_id'],
17
+ group_uuid=app_response['app']['group_uuid'],
18
+ )
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import subprocess
3
- from datetime import datetime, timedelta
3
+ from datetime import datetime, timedelta, timezone
4
4
 
5
5
  from biolib.biolib_logging import logger_no_user_data
6
6
  from biolib.compute_node.job_worker.cache_state import LfsCacheState
@@ -9,7 +9,7 @@ from biolib.compute_node.job_worker.cache_state import LfsCacheState
9
9
  def prune_lfs_cache(dry_run: bool) -> None:
10
10
  logger_no_user_data.info(f'Pruning LFS cache (dry run = {dry_run})...')
11
11
 
12
- current_time = datetime.utcnow()
12
+ current_time = datetime.now(timezone.utc)
13
13
  paths_to_delete = set()
14
14
 
15
15
  with LfsCacheState() as state:
@@ -24,6 +24,8 @@ def prune_lfs_cache(dry_run: bool) -> None:
24
24
  lfs_uuids_to_keep_in_state = set()
25
25
  for lfs_uuid, lfs in state['large_file_systems'].items():
26
26
  last_used_at = datetime.fromisoformat(lfs['last_used_at'])
27
+ if last_used_at.tzinfo is None:
28
+ last_used_at = last_used_at.replace(tzinfo=timezone.utc)
27
29
  lfs_time_to_live_in_days = 60 if lfs['state'] == 'ready' else 7
28
30
 
29
31
  if last_used_at < current_time - timedelta(days=lfs_time_to_live_in_days):