pybiolib 1.1.2038__tar.gz → 1.1.2097__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/PKG-INFO +1 -1
  2. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/__init__.py +5 -1
  3. {pybiolib-1.1.2038/biolib/_internal/data_record → pybiolib-1.1.2097/biolib/_data_record}/data_record.py +72 -33
  4. pybiolib-1.1.2097/biolib/_internal/data_record/__init__.py +1 -0
  5. pybiolib-1.1.2097/biolib/_internal/data_record/data_record.py +73 -0
  6. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/_internal/data_record/remote_storage_endpoint.py +2 -2
  7. pybiolib-1.1.2097/biolib/_internal/file_utils.py +77 -0
  8. pybiolib-1.1.2097/biolib/_internal/lfs/__init__.py +1 -0
  9. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/_internal/push_application.py +1 -1
  10. pybiolib-1.1.2097/biolib/_internal/runtime.py +19 -0
  11. {pybiolib-1.1.2038/biolib/_internal → pybiolib-1.1.2097/biolib/_runtime}/runtime.py +6 -23
  12. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_api_client/api_client.py +1 -1
  13. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_api_client/app_types.py +1 -0
  14. pybiolib-1.1.2097/biolib/biolib_api_client/lfs_types.py +19 -0
  15. pybiolib-1.1.2097/biolib/cli/data_record.py +79 -0
  16. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/cli/lfs.py +10 -6
  17. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/runtime/__init__.py +1 -1
  18. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/sdk/__init__.py +9 -5
  19. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/pyproject.toml +1 -1
  20. pybiolib-1.1.2038/biolib/_internal/data_record/__init__.py +0 -1
  21. pybiolib-1.1.2038/biolib/biolib_api_client/lfs_types.py +0 -13
  22. pybiolib-1.1.2038/biolib/cli/data_record.py +0 -43
  23. pybiolib-1.1.2038/biolib/lfs/__init__.py +0 -4
  24. pybiolib-1.1.2038/biolib/lfs/utils.py +0 -153
  25. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/LICENSE +0 -0
  26. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/README.md +0 -0
  27. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/_internal/__init__.py +0 -0
  28. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/_internal/fuse_mount/__init__.py +0 -0
  29. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/_internal/fuse_mount/experiment_fuse_mount.py +0 -0
  30. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/_internal/http_client.py +0 -0
  31. {pybiolib-1.1.2038/biolib → pybiolib-1.1.2097/biolib/_internal}/lfs/cache.py +0 -0
  32. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/_internal/libs/__init__.py +0 -0
  33. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/_internal/libs/fusepy/__init__.py +0 -0
  34. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/_internal/utils/__init__.py +0 -0
  35. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/api/__init__.py +0 -0
  36. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/api/client.py +0 -0
  37. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/app/__init__.py +0 -0
  38. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/app/app.py +0 -0
  39. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/app/search_apps.py +0 -0
  40. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_api_client/__init__.py +0 -0
  41. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_api_client/auth.py +0 -0
  42. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_api_client/biolib_app_api.py +0 -0
  43. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_api_client/biolib_job_api.py +0 -0
  44. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_api_client/common_types.py +0 -0
  45. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_api_client/job_types.py +0 -0
  46. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_api_client/user_state.py +0 -0
  47. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/__init__.py +0 -0
  48. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/base_bbf_package.py +0 -0
  49. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/file_in_container.py +0 -0
  50. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/module_input.py +0 -0
  51. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/module_output_v2.py +0 -0
  52. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/remote_endpoints.py +0 -0
  53. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/remote_stream_seeker.py +0 -0
  54. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/saved_job.py +0 -0
  55. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/stdout_and_stderr.py +0 -0
  56. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/system_exception.py +0 -0
  57. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/system_status_update.py +0 -0
  58. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_binary_format/utils.py +0 -0
  59. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_docker_client/__init__.py +0 -0
  60. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_download_container.py +0 -0
  61. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_errors.py +0 -0
  62. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/biolib_logging.py +0 -0
  63. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/cli/__init__.py +0 -0
  64. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/cli/auth.py +0 -0
  65. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/cli/download_container.py +0 -0
  66. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/cli/init.py +0 -0
  67. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/cli/push.py +0 -0
  68. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/cli/run.py +0 -0
  69. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/cli/runtime.py +0 -0
  70. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/cli/start.py +0 -0
  71. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/.gitignore +0 -0
  72. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/__init__.py +0 -0
  73. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/cloud_utils/__init__.py +0 -0
  74. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/cloud_utils/cloud_utils.py +0 -0
  75. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/__init__.py +0 -0
  76. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/cache_state.py +0 -0
  77. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/cache_types.py +0 -0
  78. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/docker_image_cache.py +0 -0
  79. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/executors/__init__.py +0 -0
  80. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/executors/docker_executor.py +0 -0
  81. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/executors/docker_types.py +0 -0
  82. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/executors/tars/__init__.py +0 -0
  83. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/executors/types.py +0 -0
  84. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +0 -0
  85. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +0 -0
  86. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/job_storage.py +0 -0
  87. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/job_worker.py +0 -0
  88. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/large_file_system.py +0 -0
  89. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/mappings.py +0 -0
  90. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/utilization_reporter_thread.py +0 -0
  91. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/job_worker/utils.py +0 -0
  92. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/remote_host_proxy.py +0 -0
  93. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/socker_listener_thread.py +0 -0
  94. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/socket_sender_thread.py +0 -0
  95. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/utils.py +0 -0
  96. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/webserver/__init__.py +0 -0
  97. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/webserver/gunicorn_flask_application.py +0 -0
  98. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/webserver/webserver.py +0 -0
  99. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/webserver/webserver_types.py +0 -0
  100. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/webserver/webserver_utils.py +0 -0
  101. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/compute_node/webserver/worker_thread.py +0 -0
  102. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/experiments/__init__.py +0 -0
  103. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/experiments/experiment.py +0 -0
  104. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/experiments/types.py +0 -0
  105. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/jobs/__init__.py +0 -0
  106. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/jobs/job.py +0 -0
  107. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/jobs/job_result.py +0 -0
  108. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/jobs/types.py +0 -0
  109. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/tables.py +0 -0
  110. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/templates/__init__.py +0 -0
  111. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/templates/example_app.py +0 -0
  112. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/typing_utils.py +0 -0
  113. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/user/__init__.py +0 -0
  114. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/user/sign_in.py +0 -0
  115. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/utils/__init__.py +0 -0
  116. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/utils/app_uri.py +0 -0
  117. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/utils/cache_state.py +0 -0
  118. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/utils/multipart_uploader.py +0 -0
  119. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/utils/seq_util.py +0 -0
  120. {pybiolib-1.1.2038 → pybiolib-1.1.2097}/biolib/utils/zip/remote_zip.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.1.2038
3
+ Version: 1.1.2097
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -13,7 +13,7 @@ from biolib.biolib_api_client import BiolibApiClient as _BioLibApiClient, App
13
13
  from biolib.jobs import Job as _Job
14
14
  from biolib import user as _user
15
15
  from biolib.typing_utils import List, Optional
16
- from biolib._internal.data_record import DataRecord as _DataRecord
16
+ from biolib._data_record.data_record import DataRecord as _DataRecord
17
17
 
18
18
  import biolib.api
19
19
  import biolib.app
@@ -45,6 +45,10 @@ def get_job(job_id: str) -> _Job:
45
45
  return _Job.create_from_uuid(uuid=job_id)
46
46
 
47
47
 
48
+ def get_data_record(uri: str) -> _DataRecord:
49
+ return _DataRecord.get_by_uri(uri)
50
+
51
+
48
52
  def fetch_jobs(count: int = 25) -> List[_Job]:
49
53
  return _Job.fetch_jobs(count)
50
54
 
@@ -1,47 +1,50 @@
1
- import os
2
- from collections import namedtuple
3
- from datetime import datetime
4
- from fnmatch import fnmatch
5
- from struct import Struct
6
- from typing import Callable, Dict, List, Optional, Union, cast
7
-
8
- from biolib import lfs
1
+ from biolib import api
2
+ from biolib._internal.data_record import get_data_record_state_from_uri, push_data_record_version
9
3
  from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
10
4
  from biolib._internal.http_client import HttpClient
11
5
  from biolib.api import client as api_client
12
- from biolib.biolib_api_client import AppGetResponse
6
+ from biolib.biolib_api_client import BiolibApiClient
7
+ from biolib.biolib_api_client.lfs_types import DataRecordInfo, DataRecordVersionInfo
13
8
  from biolib.biolib_binary_format import LazyLoadedFile
14
9
  from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
15
10
  from biolib.biolib_logging import logger
11
+ from biolib.typing_utils import Optional as _Optional
16
12
  from biolib.utils.app_uri import parse_app_uri
17
- from biolib.utils.zip.remote_zip import RemoteZip # type: ignore
13
+ from biolib.utils.zip.remote_zip import RemoteZip
14
+
15
+
16
+ import os
17
+ from collections import namedtuple
18
+ from datetime import datetime
19
+ from fnmatch import fnmatch
20
+ from struct import Struct
21
+ from typing import Callable, Dict, List, cast, Union
18
22
 
19
23
  PathFilter = Union[str, Callable[[str], bool]]
20
24
 
21
25
 
22
26
  class DataRecord:
23
- def __init__(self, uri: str):
24
- self._uri = uri
27
+ def __init__(self, _internal_state: DataRecordVersionInfo):
28
+ self._state = _internal_state
25
29
 
26
30
  def __repr__(self):
27
- return f'DataRecord: {self._uri}'
31
+ return f'DataRecord: {self._state["resource_uri"]}'
28
32
 
29
33
  @property
30
34
  def uri(self) -> str:
31
- return self._uri
35
+ return self._state['resource_uri']
32
36
 
33
37
  @property
34
38
  def name(self) -> str:
35
- uri_parsed = parse_app_uri(self.uri, use_account_as_name_default=False)
39
+ uri_parsed = parse_app_uri(self._state['resource_uri'], use_account_as_name_default=False)
36
40
  if not uri_parsed['app_name']:
37
- raise ValueError('Expected parameter "uri" to contain resource name')
41
+ raise ValueError('Expected parameter "resource_uri" to contain resource name')
38
42
 
39
43
  return uri_parsed['app_name']
40
44
 
41
- def list_files(self, path_filter: Optional[PathFilter] = None) -> List[LazyLoadedFile]:
42
- app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
45
+ def list_files(self, path_filter: _Optional[PathFilter] = None) -> List[LazyLoadedFile]:
43
46
  remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
44
- resource_version_uuid=app_response['app_version']['public_id'],
47
+ resource_version_uuid=self._state['resource_version_uuid'],
45
48
  )
46
49
  files: List[LazyLoadedFile] = []
47
50
  with RemoteZip(url=remote_storage_endpoint.get_remote_url()) as remote_zip:
@@ -52,13 +55,12 @@ class DataRecord:
52
55
  return self._get_filtered_files(files=files, path_filter=path_filter) if path_filter else files
53
56
 
54
57
  def download_zip(self, output_path: str):
55
- app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
56
58
  remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
57
- resource_version_uuid=app_response['app_version']['public_id'],
59
+ resource_version_uuid=self._state['resource_version_uuid'],
58
60
  )
59
61
  HttpClient.request(url=remote_storage_endpoint.get_remote_url(), response_path=output_path)
60
62
 
61
- def download_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
63
+ def download_files(self, output_dir: str, path_filter: _Optional[PathFilter] = None) -> None:
62
64
  filtered_files = self.list_files(path_filter=path_filter)
63
65
 
64
66
  if len(filtered_files) == 0:
@@ -72,23 +74,51 @@ class DataRecord:
72
74
  for chunk in file.get_data_iterator():
73
75
  file_handle.write(chunk)
74
76
 
75
- def save_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
77
+ def save_files(self, output_dir: str, path_filter: _Optional[PathFilter] = None) -> None:
76
78
  self.download_files(output_dir=output_dir, path_filter=path_filter)
77
79
 
78
- def update(self, data_path: str) -> None:
80
+ def update(self, data_path: str, chunk_size_in_mb: _Optional[int] = None) -> None:
79
81
  assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
80
- self._uri = lfs.push_large_file_system(lfs_uri=self._uri, input_dir=data_path)
82
+ uri = push_data_record_version(
83
+ data_record_uuid=self._state['resource_uuid'], input_dir=data_path, chunk_size_in_mb=chunk_size_in_mb
84
+ )
85
+ self._state = get_data_record_state_from_uri(uri)
81
86
 
82
87
  @staticmethod
83
- def create(destination: str, data_path: str, name: Optional[str] = None) -> 'DataRecord':
84
- assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
85
- record_name = name if name else 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
86
- record_uri = lfs.create_large_file_system(lfs_uri=f'{destination}/{record_name}')
87
- record_version_uri = lfs.push_large_file_system(lfs_uri=record_uri, input_dir=data_path)
88
- return DataRecord(uri=record_version_uri)
88
+ def get_by_uri(uri: str) -> 'DataRecord':
89
+ return DataRecord(_internal_state=get_data_record_state_from_uri(uri))
89
90
 
90
91
  @staticmethod
91
- def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
92
+ def create(destination: str, data_path: _Optional[str] = None) -> 'DataRecord':
93
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Data Record')
94
+ if data_path is not None:
95
+ assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
96
+ uri_parsed = parse_app_uri(destination, use_account_as_name_default=False)
97
+ if uri_parsed['app_name_normalized']:
98
+ data_record_uri = destination
99
+ else:
100
+ record_name = 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
101
+ data_record_uri = f'{destination}/{record_name}'
102
+
103
+ uri_parsed = parse_app_uri(data_record_uri)
104
+ response = api.client.post(
105
+ path='/lfs/',
106
+ data={
107
+ 'account_handle': uri_parsed['account_handle_normalized'],
108
+ 'name': uri_parsed['app_name'],
109
+ },
110
+ )
111
+ data_record: DataRecordInfo = response.json()
112
+ logger.info(f"Successfully created new Data Record '{data_record['uri']}'")
113
+
114
+ if data_path is not None:
115
+ record_version_uri = push_data_record_version(data_record_uuid=data_record['uuid'], input_dir=data_path)
116
+ return DataRecord.get_by_uri(uri=record_version_uri)
117
+ else:
118
+ return DataRecord.get_by_uri(uri=data_record_uri)
119
+
120
+ @staticmethod
121
+ def fetch(uri: _Optional[str] = None, count: _Optional[int] = None) -> List['DataRecord']:
92
122
  max_page_size = 1_000
93
123
  params: Dict[str, Union[str, int]] = {
94
124
  'page_size': str(count or max_page_size),
@@ -106,7 +136,16 @@ class DataRecord:
106
136
  f'Fetch results exceeded maximum count of {max_page_size}. Some data records might not be fetched.'
107
137
  )
108
138
 
109
- return [DataRecord(result['resource_uri']) for result in results]
139
+ return [
140
+ DataRecord(
141
+ _internal_state={
142
+ 'resource_uri': result['resource_uri'],
143
+ 'resource_uuid': result['public_id'],
144
+ 'resource_version_uuid': result['active_version'],
145
+ }
146
+ )
147
+ for result in results
148
+ ]
110
149
 
111
150
  @staticmethod
112
151
  def _get_file(remote_storage_endpoint: DataRecordRemoteStorageEndpoint, file_info: Dict) -> LazyLoadedFile:
@@ -0,0 +1 @@
1
+ from .data_record import get_data_record_state_from_uri, push_data_record_version
@@ -0,0 +1,73 @@
1
+ import os
2
+ from typing import Optional
3
+
4
+ from biolib import api, utils
5
+ from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
6
+ from biolib.api import client as api_client
7
+ from biolib.biolib_api_client import AppGetResponse, BiolibApiClient
8
+ from biolib.biolib_api_client.lfs_types import DataRecordVersion, DataRecordVersionInfo
9
+ from biolib.biolib_errors import BioLibError
10
+ from biolib.biolib_logging import logger
11
+
12
+
13
+ def push_data_record_version(data_record_uuid: str, input_dir: str, chunk_size_in_mb: Optional[int] = None) -> str:
14
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Data Record')
15
+
16
+ if not os.path.isdir(input_dir):
17
+ raise BioLibError(f'Could not find folder at {input_dir}')
18
+
19
+ if os.path.realpath(input_dir) == '/':
20
+ raise BioLibError('Pushing your root directory is not possible')
21
+
22
+ original_working_dir = os.getcwd()
23
+ os.chdir(input_dir)
24
+ files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
25
+
26
+ if data_size_in_bytes > 4_500_000_000_000:
27
+ raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
28
+
29
+ min_chunk_size_bytes = 10_000_000
30
+ chunk_size_in_bytes: int
31
+ if chunk_size_in_mb:
32
+ chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
33
+ if chunk_size_in_bytes < min_chunk_size_bytes:
34
+ logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
35
+ chunk_size_in_bytes = min_chunk_size_bytes
36
+ else:
37
+ # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
38
+ chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
39
+
40
+ data_size_in_mb = round(data_size_in_bytes / 10**6)
41
+ print(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
42
+
43
+ response = api.client.post(path='/lfs/versions/', data={'resource_uuid': data_record_uuid})
44
+ data_record_version: DataRecordVersion = response.json()
45
+ iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
46
+
47
+ multipart_uploader = utils.MultiPartUploader(
48
+ use_process_pool=True,
49
+ get_presigned_upload_url_request=dict(
50
+ headers=None,
51
+ requires_biolib_auth=True,
52
+ path=f"/lfs/versions/{data_record_version['uuid']}/presigned_upload_url/",
53
+ ),
54
+ complete_upload_request=dict(
55
+ headers=None,
56
+ requires_biolib_auth=True,
57
+ path=f"/lfs/versions/{data_record_version['uuid']}/complete_upload/",
58
+ ),
59
+ )
60
+
61
+ multipart_uploader.upload(payload_iterator=iterable_zip_stream, payload_size_in_bytes=data_size_in_bytes)
62
+ os.chdir(original_working_dir)
63
+ logger.info(f"Successfully pushed a new Data Record version '{data_record_version['uri']}'")
64
+ return data_record_version['uri']
65
+
66
+
67
+ def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
68
+ app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': uri}).json()
69
+ return DataRecordVersionInfo(
70
+ resource_uri=app_response['app_version']['app_uri'],
71
+ resource_uuid=app_response['app']['public_id'],
72
+ resource_version_uuid=app_response['app_version']['public_id'],
73
+ )
@@ -3,7 +3,7 @@ from datetime import datetime, timedelta
3
3
  from urllib.parse import urlparse
4
4
 
5
5
  from biolib.api import client as api_client
6
- from biolib.biolib_api_client.lfs_types import LargeFileSystemVersion
6
+ from biolib.biolib_api_client.lfs_types import DataRecordVersion
7
7
  from biolib.biolib_binary_format.utils import RemoteEndpoint
8
8
  from biolib.biolib_logging import logger
9
9
  from biolib.typing_utils import Optional
@@ -17,7 +17,7 @@ class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
17
17
 
18
18
  def get_remote_url(self) -> str:
19
19
  if not self._presigned_url or not self._expires_at or datetime.utcnow() > self._expires_at:
20
- lfs_version: LargeFileSystemVersion = api_client.get(
20
+ lfs_version: DataRecordVersion = api_client.get(
21
21
  path=f'/lfs/versions/{self._resource_version_uuid}/',
22
22
  ).json()
23
23
 
@@ -0,0 +1,77 @@
1
+ import io
2
+ import os
3
+ import zipfile as zf
4
+ from pathlib import Path
5
+
6
+ from biolib.typing_utils import Iterator, List, Tuple
7
+
8
+
9
+ def get_files_and_size_of_directory(directory: str) -> Tuple[List[str], int]:
10
+ data_size = 0
11
+ file_list: List[str] = []
12
+
13
+ for path, _, files in os.walk(directory):
14
+ for file in files:
15
+ file_path = os.path.join(path, file)
16
+ if os.path.islink(file_path):
17
+ continue # skip symlinks
18
+
19
+ relative_file_path = file_path[len(directory) + 1 :] # +1 to remove starting slash
20
+ file_list.append(relative_file_path)
21
+ data_size += os.path.getsize(file_path)
22
+
23
+ return file_list, data_size
24
+
25
+
26
+ def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes]:
27
+ class ChunkedIOBuffer(io.RawIOBase):
28
+ def __init__(self, chunk_size: int):
29
+ super().__init__()
30
+ self.chunk_size = chunk_size
31
+ self.tmp_data = bytearray()
32
+
33
+ def get_buffer_size(self):
34
+ return len(self.tmp_data)
35
+
36
+ def read_chunk(self):
37
+ chunk = bytes(self.tmp_data[: self.chunk_size])
38
+ self.tmp_data = self.tmp_data[self.chunk_size :]
39
+ return chunk
40
+
41
+ def write(self, data):
42
+ data_length = len(data)
43
+ self.tmp_data += data
44
+ return data_length
45
+
46
+ # create chunked buffer to hold data temporarily
47
+ io_buffer = ChunkedIOBuffer(chunk_size)
48
+
49
+ # create zip writer that will write to the io buffer
50
+ zip_writer = zf.ZipFile(io_buffer, mode='w') # type: ignore
51
+
52
+ for file_path in files:
53
+ # generate zip info and prepare zip pointer for writing
54
+ z_info = zf.ZipInfo.from_file(file_path)
55
+ zip_pointer = zip_writer.open(z_info, mode='w')
56
+ if Path(file_path).is_file():
57
+ # read file chunk by chunk
58
+ with open(file_path, 'br') as file_pointer:
59
+ while True:
60
+ chunk = file_pointer.read(chunk_size)
61
+ if len(chunk) == 0:
62
+ break
63
+ # write the chunk to the zip
64
+ zip_pointer.write(chunk)
65
+ # if writing the chunk caused us to go over chunk_size, flush it
66
+ if io_buffer.get_buffer_size() > chunk_size:
67
+ yield io_buffer.read_chunk()
68
+
69
+ zip_pointer.close()
70
+
71
+ # flush any remaining data in the stream (e.g. zip file meta data)
72
+ zip_writer.close()
73
+ while True:
74
+ chunk = io_buffer.read_chunk()
75
+ if len(chunk) == 0:
76
+ break
77
+ yield chunk
@@ -0,0 +1 @@
1
+ from .cache import prune_lfs_cache
@@ -6,12 +6,12 @@ import rich.progress
6
6
  import yaml
7
7
 
8
8
  from biolib import api, utils
9
+ from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
9
10
  from biolib.biolib_api_client import BiolibApiClient
10
11
  from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
11
12
  from biolib.biolib_docker_client import BiolibDockerClient
12
13
  from biolib.biolib_errors import BioLibError
13
14
  from biolib.biolib_logging import logger
14
- from biolib.lfs.utils import get_files_and_size_of_directory, get_iterable_zip_stream
15
15
  from biolib.typing_utils import Iterable, Optional, Set, TypedDict
16
16
 
17
17
  REGEX_MARKDOWN_INLINE_IMAGE = re.compile(r'!\[(?P<alt>.*)\]\((?P<src>.*)\)')
@@ -0,0 +1,19 @@
1
+ from biolib.typing_utils import TypedDict
2
+
3
+
4
+ class RuntimeJobDataDict(TypedDict):
5
+ version: str
6
+ job_requested_machine: str
7
+ job_uuid: str
8
+ job_auth_token: str
9
+ app_uri: str
10
+
11
+
12
+ class BioLibRuntimeError(Exception):
13
+ pass
14
+
15
+
16
+ class BioLibRuntimeNotRecognizedError(BioLibRuntimeError):
17
+ def __init__(self, message='The runtime is not recognized as a BioLib app'):
18
+ self.message = message
19
+ super().__init__(self.message)
@@ -1,30 +1,13 @@
1
- import json
2
- import re
3
-
4
1
  from biolib import api
5
- from biolib.typing_utils import Optional, TypedDict, cast
6
-
7
-
8
- class RuntimeJobDataDict(TypedDict):
9
- version: str
10
- job_requested_machine: str
11
- job_uuid: str
12
- job_auth_token: str
13
- app_uri: str
2
+ from biolib._internal.runtime import BioLibRuntimeError, BioLibRuntimeNotRecognizedError, RuntimeJobDataDict
3
+ from biolib.typing_utils import cast, Optional as _Optional
14
4
 
15
-
16
- class BioLibRuntimeError(Exception):
17
- pass
18
-
19
-
20
- class BioLibRuntimeNotRecognizedError(BioLibRuntimeError):
21
- def __init__(self, message='The runtime is not recognized as a BioLib app'):
22
- self.message = message
23
- super().__init__(self.message)
5
+ import json
6
+ import re
24
7
 
25
8
 
26
9
  class Runtime:
27
- _job_data: Optional[RuntimeJobDataDict] = None
10
+ _job_data: _Optional[RuntimeJobDataDict] = None
28
11
 
29
12
  @staticmethod
30
13
  def check_is_environment_biolib_app() -> bool:
@@ -73,7 +56,7 @@ class Runtime:
73
56
  api.client.post(data={'note': note}, path=f'/jobs/{job_id}/notes/')
74
57
 
75
58
  @staticmethod
76
- def _try_to_get_job_data() -> Optional[RuntimeJobDataDict]:
59
+ def _try_to_get_job_data() -> _Optional[RuntimeJobDataDict]:
77
60
  if not Runtime._job_data:
78
61
  try:
79
62
  with open('/biolib/secrets/biolib_system_secret') as file:
@@ -6,7 +6,7 @@ import os
6
6
  from datetime import datetime, timezone
7
7
  from json.decoder import JSONDecodeError
8
8
 
9
- from biolib._internal.runtime import Runtime
9
+ from biolib._runtime.runtime import Runtime
10
10
  from biolib._internal.http_client import HttpClient
11
11
  from biolib.typing_utils import Optional
12
12
  from biolib.biolib_errors import BioLibError
@@ -16,6 +16,7 @@ class AppVersion(AppVersionSlim):
16
16
  source_code_license: str
17
17
  stdout_render_type: Literal['text', 'markdown']
18
18
  main_output_file: Optional[str]
19
+ app_uri: str
19
20
 
20
21
 
21
22
  class App(TypedDict):
@@ -0,0 +1,19 @@
1
+ from biolib.typing_utils import TypedDict
2
+
3
+
4
+ class DataRecordVersion(TypedDict):
5
+ presigned_download_url: str
6
+ size_bytes: int
7
+ uri: str
8
+ uuid: str
9
+
10
+
11
+ class DataRecordInfo(TypedDict):
12
+ uri: str
13
+ uuid: str
14
+
15
+
16
+ class DataRecordVersionInfo(TypedDict):
17
+ resource_uri: str
18
+ resource_uuid: str
19
+ resource_version_uuid: str
@@ -0,0 +1,79 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ from typing import Dict, List
5
+
6
+ import click
7
+
8
+ from biolib._data_record.data_record import DataRecord
9
+ from biolib.biolib_logging import logger, logger_no_user_data
10
+ from biolib.typing_utils import Optional
11
+
12
+
13
+ @click.group(help='Data Records')
14
+ def data_record() -> None:
15
+ logger.configure(default_log_level=logging.INFO)
16
+ logger_no_user_data.configure(default_log_level=logging.INFO)
17
+
18
+
19
+ @data_record.command(help='Create a Data Record')
20
+ @click.argument('uri', required=True)
21
+ @click.option('--data-path', required=True, type=click.Path(exists=True))
22
+ def create(uri: str, data_path: str) -> None:
23
+ DataRecord.create(destination=uri, data_path=data_path)
24
+
25
+
26
+ @data_record.command(help='Update a Data Record')
27
+ @click.argument('uri', required=True)
28
+ @click.option('--data-path', required=True, type=click.Path(exists=True))
29
+ @click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
30
+ def update(uri: str, data_path: str, chunk_size: Optional[int]) -> None:
31
+ DataRecord.get_by_uri(uri=uri).update(data_path=data_path, chunk_size_in_mb=chunk_size)
32
+
33
+
34
+ @data_record.command(help='Download files from a Data Record')
35
+ @click.argument('uri', required=True)
36
+ @click.option('--file', required=False, type=str)
37
+ @click.option('--path-filter', required=False, type=str, hide_input=True)
38
+ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
39
+ record = DataRecord.get_by_uri(uri=uri)
40
+ if file is not None:
41
+ try:
42
+ file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file][0]
43
+ except IndexError:
44
+ raise Exception('File not found in data record') from None
45
+
46
+ assert not os.path.exists(file_obj.name), 'File already exists in current directory'
47
+ with open(file_obj.name, 'wb') as file_handle:
48
+ file_handle.write(file_obj.get_data())
49
+
50
+ else:
51
+ assert not os.path.exists(record.name), f'Directory with name {record.name} already exists in current directory'
52
+ record.save_files(output_dir=record.name, path_filter=path_filter)
53
+
54
+
55
+ @data_record.command(help='Describe a Data Record')
56
+ @click.argument('uri', required=True)
57
+ @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
58
+ def describe(uri: str, output_as_json: bool) -> None:
59
+ record = DataRecord.get_by_uri(uri)
60
+ files_info: List[Dict] = []
61
+ total_size_in_bytes = 0
62
+ for file in record.list_files():
63
+ files_info.append({'path': file.path, 'size_bytes': file.length})
64
+ total_size_in_bytes += file.length
65
+
66
+ if output_as_json:
67
+ print(
68
+ json.dumps(
69
+ obj={'uri': record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
70
+ indent=4,
71
+ )
72
+ )
73
+ else:
74
+ print(f'Data Record {record.uri}\ntotal {total_size_in_bytes} bytes\n')
75
+ print('size bytes path')
76
+ for file_info in files_info:
77
+ size_string = str(file_info['size_bytes'])
78
+ leading_space_string = ' ' * (10 - len(size_string))
79
+ print(f"{leading_space_string}{size_string} {file_info['path']}")
@@ -7,9 +7,9 @@ from typing import Dict, List
7
7
  import click
8
8
 
9
9
  from biolib import biolib_errors
10
- from biolib._internal.data_record import DataRecord
10
+ from biolib._data_record.data_record import DataRecord
11
+ from biolib._internal.lfs import prune_lfs_cache
11
12
  from biolib.biolib_logging import logger, logger_no_user_data
12
- from biolib.lfs import create_large_file_system, prune_lfs_cache, push_large_file_system
13
13
  from biolib.typing_utils import Optional
14
14
 
15
15
 
@@ -21,9 +21,10 @@ def lfs() -> None:
21
21
  @lfs.command(help='Create a Large File System')
22
22
  @click.argument('uri', required=True)
23
23
  def create(uri: str) -> None:
24
+ logger.warning('This is command deprecated, please use "biolib data-record create" instead.')
24
25
  logger.configure(default_log_level=logging.INFO)
25
26
  logger_no_user_data.configure(default_log_level=logging.INFO)
26
- create_large_file_system(lfs_uri=uri)
27
+ DataRecord.create(destination=uri)
27
28
 
28
29
 
29
30
  @lfs.command(help='Push a new version of a Large File System')
@@ -31,10 +32,11 @@ def create(uri: str) -> None:
31
32
  @click.option('--path', required=True, type=click.Path(exists=True))
32
33
  @click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
33
34
  def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
35
+ logger.warning('This is command deprecated, please use "biolib data-record update" instead.')
34
36
  logger.configure(default_log_level=logging.INFO)
35
37
  logger_no_user_data.configure(default_log_level=logging.INFO)
36
38
  try:
37
- push_large_file_system(lfs_uri=uri, input_dir=path, chunk_size_in_mb=chunk_size)
39
+ DataRecord.get_by_uri(uri=uri).update(data_path=path, chunk_size_in_mb=chunk_size)
38
40
  except biolib_errors.BioLibError as error:
39
41
  print(f'An error occurred:\n{error.message}', file=sys.stderr)
40
42
  exit(1)
@@ -44,10 +46,11 @@ def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
44
46
  @click.argument('uri', required=True)
45
47
  @click.option('--file-path', required=True, type=str)
46
48
  def download_file(uri: str, file_path: str) -> None:
49
+ logger.warning('This is command deprecated, please use "biolib data-record download" instead.')
47
50
  logger.configure(default_log_level=logging.INFO)
48
51
  logger_no_user_data.configure(default_log_level=logging.INFO)
49
52
  try:
50
- record = DataRecord(uri=uri)
53
+ record = DataRecord.get_by_uri(uri=uri)
51
54
  try:
52
55
  file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file_path][0]
53
56
  except IndexError:
@@ -66,7 +69,8 @@ def download_file(uri: str, file_path: str) -> None:
66
69
  @click.argument('uri', required=True)
67
70
  @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
68
71
  def describe(uri: str, output_as_json: bool) -> None:
69
- data_record = DataRecord(uri)
72
+ logger.warning('This is command deprecated, please use "biolib data-record describe" instead.')
73
+ data_record = DataRecord.get_by_uri(uri)
70
74
  files_info: List[Dict] = []
71
75
  total_size_in_bytes = 0
72
76
  for file in data_record.list_files():
@@ -1,5 +1,5 @@
1
1
  import warnings
2
- from biolib.sdk import Runtime as _Runtime
2
+ from biolib._runtime.runtime import Runtime as _Runtime
3
3
 
4
4
 
5
5
  def set_main_result_prefix(result_prefix: str) -> None:
@@ -1,12 +1,12 @@
1
1
  # Imports to hide and use as private internal utils
2
+ from biolib._data_record.data_record import DataRecord as _DataRecord
2
3
  from biolib._internal.push_application import push_application as _push_application
3
4
  from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
4
5
  from biolib.app import BioLibApp as _BioLibApp
5
6
  from biolib.typing_utils import Optional as _Optional
6
7
 
7
- # Imports to expose as public API
8
- from biolib._internal.data_record import DataRecord
9
- from biolib._internal.runtime import Runtime
8
+ # Classes to expose as public API
9
+ from biolib._runtime.runtime import Runtime
10
10
 
11
11
 
12
12
  def push_app_version(uri: str, path: str) -> _BioLibApp:
@@ -42,5 +42,9 @@ def get_app_version_pytest_plugin(app_version: _BioLibApp):
42
42
  return AppVersionFixturePlugin(app_version)
43
43
 
44
44
 
45
- def create_data_record(destination: str, data_path: str, name: _Optional[str] = None) -> DataRecord:
46
- return DataRecord.create(destination, data_path, name)
45
+ def create_data_record(destination: str, data_path: str, name: _Optional[str] = None) -> _DataRecord:
46
+ if name:
47
+ destination_with_name = f"{destination}/{name}"
48
+ else:
49
+ destination_with_name = destination
50
+ return _DataRecord.create(destination_with_name, data_path)