pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. biolib/__init__.py +357 -11
  2. biolib/_data_record/data_record.py +380 -0
  3. biolib/_index/__init__.py +0 -0
  4. biolib/_index/index.py +55 -0
  5. biolib/_index/query_result.py +103 -0
  6. biolib/_internal/__init__.py +0 -0
  7. biolib/_internal/add_copilot_prompts.py +58 -0
  8. biolib/_internal/add_gui_files.py +81 -0
  9. biolib/_internal/data_record/__init__.py +1 -0
  10. biolib/_internal/data_record/data_record.py +85 -0
  11. biolib/_internal/data_record/push_data.py +116 -0
  12. biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
  13. biolib/_internal/errors.py +5 -0
  14. biolib/_internal/file_utils.py +125 -0
  15. biolib/_internal/fuse_mount/__init__.py +1 -0
  16. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  17. biolib/_internal/http_client.py +159 -0
  18. biolib/_internal/lfs/__init__.py +1 -0
  19. biolib/_internal/lfs/cache.py +51 -0
  20. biolib/_internal/libs/__init__.py +1 -0
  21. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  22. biolib/_internal/push_application.py +488 -0
  23. biolib/_internal/runtime.py +22 -0
  24. biolib/_internal/string_utils.py +13 -0
  25. biolib/_internal/templates/__init__.py +1 -0
  26. biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
  27. biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
  28. biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
  29. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  30. biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
  31. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  32. biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
  33. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  34. biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
  35. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  36. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  37. biolib/_internal/templates/gui_template/App.tsx +53 -0
  38. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  39. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  40. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  41. biolib/_internal/templates/gui_template/index.css +5 -0
  42. biolib/_internal/templates/gui_template/index.html +13 -0
  43. biolib/_internal/templates/gui_template/index.tsx +10 -0
  44. biolib/_internal/templates/gui_template/package.json +27 -0
  45. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  46. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  47. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  48. biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
  49. biolib/_internal/templates/init_template/Dockerfile +14 -0
  50. biolib/_internal/templates/init_template/requirements.txt +1 -0
  51. biolib/_internal/templates/init_template/run.py +12 -0
  52. biolib/_internal/templates/init_template/run.sh +4 -0
  53. biolib/_internal/templates/templates.py +25 -0
  54. biolib/_internal/tree_utils.py +106 -0
  55. biolib/_internal/utils/__init__.py +65 -0
  56. biolib/_internal/utils/auth.py +46 -0
  57. biolib/_internal/utils/job_url.py +33 -0
  58. biolib/_internal/utils/multinode.py +263 -0
  59. biolib/_runtime/runtime.py +157 -0
  60. biolib/_session/session.py +44 -0
  61. biolib/_shared/__init__.py +0 -0
  62. biolib/_shared/types/__init__.py +74 -0
  63. biolib/_shared/types/account.py +12 -0
  64. biolib/_shared/types/account_member.py +8 -0
  65. biolib/_shared/types/app.py +9 -0
  66. biolib/_shared/types/data_record.py +40 -0
  67. biolib/_shared/types/experiment.py +32 -0
  68. biolib/_shared/types/file_node.py +17 -0
  69. biolib/_shared/types/push.py +6 -0
  70. biolib/_shared/types/resource.py +37 -0
  71. biolib/_shared/types/resource_deploy_key.py +11 -0
  72. biolib/_shared/types/resource_permission.py +14 -0
  73. biolib/_shared/types/resource_version.py +19 -0
  74. biolib/_shared/types/result.py +14 -0
  75. biolib/_shared/types/typing.py +10 -0
  76. biolib/_shared/types/user.py +19 -0
  77. biolib/_shared/utils/__init__.py +7 -0
  78. biolib/_shared/utils/resource_uri.py +75 -0
  79. biolib/api/__init__.py +6 -0
  80. biolib/api/client.py +168 -0
  81. biolib/app/app.py +252 -49
  82. biolib/app/search_apps.py +45 -0
  83. biolib/biolib_api_client/api_client.py +126 -31
  84. biolib/biolib_api_client/app_types.py +24 -4
  85. biolib/biolib_api_client/auth.py +31 -8
  86. biolib/biolib_api_client/biolib_app_api.py +147 -52
  87. biolib/biolib_api_client/biolib_job_api.py +161 -141
  88. biolib/biolib_api_client/job_types.py +21 -5
  89. biolib/biolib_api_client/lfs_types.py +7 -23
  90. biolib/biolib_api_client/user_state.py +56 -0
  91. biolib/biolib_binary_format/__init__.py +1 -4
  92. biolib/biolib_binary_format/file_in_container.py +105 -0
  93. biolib/biolib_binary_format/module_input.py +24 -7
  94. biolib/biolib_binary_format/module_output_v2.py +149 -0
  95. biolib/biolib_binary_format/remote_endpoints.py +34 -0
  96. biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
  97. biolib/biolib_binary_format/saved_job.py +3 -2
  98. biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
  99. biolib/biolib_binary_format/system_status_update.py +3 -2
  100. biolib/biolib_binary_format/utils.py +175 -0
  101. biolib/biolib_docker_client/__init__.py +11 -2
  102. biolib/biolib_errors.py +36 -0
  103. biolib/biolib_logging.py +27 -10
  104. biolib/cli/__init__.py +38 -0
  105. biolib/cli/auth.py +46 -0
  106. biolib/cli/data_record.py +164 -0
  107. biolib/cli/index.py +32 -0
  108. biolib/cli/init.py +421 -0
  109. biolib/cli/lfs.py +101 -0
  110. biolib/cli/push.py +50 -0
  111. biolib/cli/run.py +63 -0
  112. biolib/cli/runtime.py +14 -0
  113. biolib/cli/sdk.py +16 -0
  114. biolib/cli/start.py +56 -0
  115. biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
  116. biolib/compute_node/job_worker/cache_state.py +66 -88
  117. biolib/compute_node/job_worker/cache_types.py +1 -6
  118. biolib/compute_node/job_worker/docker_image_cache.py +112 -37
  119. biolib/compute_node/job_worker/executors/__init__.py +0 -3
  120. biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
  121. biolib/compute_node/job_worker/executors/docker_types.py +9 -1
  122. biolib/compute_node/job_worker/executors/types.py +19 -9
  123. biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
  124. biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
  125. biolib/compute_node/job_worker/job_storage.py +108 -0
  126. biolib/compute_node/job_worker/job_worker.py +397 -212
  127. biolib/compute_node/job_worker/large_file_system.py +87 -38
  128. biolib/compute_node/job_worker/network_alloc.py +99 -0
  129. biolib/compute_node/job_worker/network_buffer.py +240 -0
  130. biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
  131. biolib/compute_node/job_worker/utils.py +9 -24
  132. biolib/compute_node/remote_host_proxy.py +400 -98
  133. biolib/compute_node/utils.py +31 -9
  134. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  135. biolib/compute_node/webserver/proxy_utils.py +28 -0
  136. biolib/compute_node/webserver/webserver.py +130 -44
  137. biolib/compute_node/webserver/webserver_types.py +2 -6
  138. biolib/compute_node/webserver/webserver_utils.py +77 -12
  139. biolib/compute_node/webserver/worker_thread.py +183 -42
  140. biolib/experiments/__init__.py +0 -0
  141. biolib/experiments/experiment.py +356 -0
  142. biolib/jobs/__init__.py +1 -0
  143. biolib/jobs/job.py +741 -0
  144. biolib/jobs/job_result.py +185 -0
  145. biolib/jobs/types.py +50 -0
  146. biolib/py.typed +0 -0
  147. biolib/runtime/__init__.py +14 -0
  148. biolib/sdk/__init__.py +91 -0
  149. biolib/tables.py +34 -0
  150. biolib/typing_utils.py +2 -7
  151. biolib/user/__init__.py +1 -0
  152. biolib/user/sign_in.py +54 -0
  153. biolib/utils/__init__.py +162 -0
  154. biolib/utils/cache_state.py +94 -0
  155. biolib/utils/multipart_uploader.py +194 -0
  156. biolib/utils/seq_util.py +150 -0
  157. biolib/utils/zip/remote_zip.py +640 -0
  158. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  159. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  160. {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  161. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  162. README.md +0 -17
  163. biolib/app/app_result.py +0 -68
  164. biolib/app/utils.py +0 -62
  165. biolib/biolib-js/0-biolib.worker.js +0 -1
  166. biolib/biolib-js/1-biolib.worker.js +0 -1
  167. biolib/biolib-js/2-biolib.worker.js +0 -1
  168. biolib/biolib-js/3-biolib.worker.js +0 -1
  169. biolib/biolib-js/4-biolib.worker.js +0 -1
  170. biolib/biolib-js/5-biolib.worker.js +0 -1
  171. biolib/biolib-js/6-biolib.worker.js +0 -1
  172. biolib/biolib-js/index.html +0 -10
  173. biolib/biolib-js/main-biolib.js +0 -1
  174. biolib/biolib_api_client/biolib_account_api.py +0 -21
  175. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
  176. biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
  177. biolib/biolib_binary_format/module_output.py +0 -58
  178. biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
  179. biolib/biolib_push.py +0 -114
  180. biolib/cli.py +0 -203
  181. biolib/cli_utils.py +0 -273
  182. biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
  183. biolib/compute_node/enclave/__init__.py +0 -2
  184. biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
  185. biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
  186. biolib/compute_node/job_worker/executors/base_executor.py +0 -18
  187. biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
  188. biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
  189. biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
  190. biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
  191. biolib/lfs.py +0 -196
  192. biolib/pyppeteer/.circleci/config.yml +0 -100
  193. biolib/pyppeteer/.coveragerc +0 -3
  194. biolib/pyppeteer/.gitignore +0 -89
  195. biolib/pyppeteer/.pre-commit-config.yaml +0 -28
  196. biolib/pyppeteer/CHANGES.md +0 -253
  197. biolib/pyppeteer/CONTRIBUTING.md +0 -26
  198. biolib/pyppeteer/LICENSE +0 -12
  199. biolib/pyppeteer/README.md +0 -137
  200. biolib/pyppeteer/docs/Makefile +0 -177
  201. biolib/pyppeteer/docs/_static/custom.css +0 -28
  202. biolib/pyppeteer/docs/_templates/layout.html +0 -10
  203. biolib/pyppeteer/docs/changes.md +0 -1
  204. biolib/pyppeteer/docs/conf.py +0 -299
  205. biolib/pyppeteer/docs/index.md +0 -21
  206. biolib/pyppeteer/docs/make.bat +0 -242
  207. biolib/pyppeteer/docs/reference.md +0 -211
  208. biolib/pyppeteer/docs/server.py +0 -60
  209. biolib/pyppeteer/poetry.lock +0 -1699
  210. biolib/pyppeteer/pyppeteer/__init__.py +0 -135
  211. biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
  212. biolib/pyppeteer/pyppeteer/browser.py +0 -401
  213. biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
  214. biolib/pyppeteer/pyppeteer/command.py +0 -22
  215. biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
  216. biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
  217. biolib/pyppeteer/pyppeteer/coverage.py +0 -346
  218. biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
  219. biolib/pyppeteer/pyppeteer/dialog.py +0 -79
  220. biolib/pyppeteer/pyppeteer/domworld.py +0 -597
  221. biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
  222. biolib/pyppeteer/pyppeteer/errors.py +0 -48
  223. biolib/pyppeteer/pyppeteer/events.py +0 -63
  224. biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
  225. biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
  226. biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
  227. biolib/pyppeteer/pyppeteer/helpers.py +0 -245
  228. biolib/pyppeteer/pyppeteer/input.py +0 -371
  229. biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
  230. biolib/pyppeteer/pyppeteer/launcher.py +0 -683
  231. biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
  232. biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
  233. biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
  234. biolib/pyppeteer/pyppeteer/multimap.py +0 -82
  235. biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
  236. biolib/pyppeteer/pyppeteer/options.py +0 -8
  237. biolib/pyppeteer/pyppeteer/page.py +0 -1728
  238. biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
  239. biolib/pyppeteer/pyppeteer/target.py +0 -147
  240. biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
  241. biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
  242. biolib/pyppeteer/pyppeteer/tracing.py +0 -93
  243. biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
  244. biolib/pyppeteer/pyppeteer/util.py +0 -18
  245. biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
  246. biolib/pyppeteer/pyppeteer/worker.py +0 -101
  247. biolib/pyppeteer/pyproject.toml +0 -97
  248. biolib/pyppeteer/spell.txt +0 -137
  249. biolib/pyppeteer/tox.ini +0 -72
  250. biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
  251. biolib/start_cli.py +0 -7
  252. biolib/utils.py +0 -47
  253. biolib/validators/validate_app_version.py +0 -183
  254. biolib/validators/validate_argument.py +0 -134
  255. biolib/validators/validate_module.py +0 -323
  256. biolib/validators/validate_zip_file.py +0 -40
  257. biolib/validators/validator_utils.py +0 -103
  258. pybiolib-0.2.951.dist-info/LICENSE +0 -21
  259. pybiolib-0.2.951.dist-info/METADATA +0 -61
  260. pybiolib-0.2.951.dist-info/RECORD +0 -153
  261. pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
  262. /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,380 @@
1
+ import os
2
+ from collections import namedtuple
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from struct import Struct
6
+ from typing import Callable, Dict, Iterable, Iterator, List, Optional, Union, cast
7
+
8
+ from biolib import api
9
+ from biolib._internal.data_record.data_record import validate_sqlite_v1
10
+ from biolib._internal.data_record.push_data import (
11
+ _upload_from_iterator,
12
+ push_data_path,
13
+ validate_data_path_and_get_files_and_size_of_directory,
14
+ )
15
+ from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
16
+ from biolib._internal.http_client import HttpClient
17
+ from biolib._shared import types
18
+ from biolib._shared.types import ResourceDetailedDict, ResourceVersionDetailedDict, ZipFileNodeDict
19
+ from biolib._shared.utils import parse_resource_uri
20
+ from biolib.api import client as api_client
21
+ from biolib.biolib_api_client import BiolibApiClient
22
+ from biolib.biolib_api_client.biolib_app_api import _get_resource_uri_from_str
23
+ from biolib.biolib_api_client.lfs_types import DataRecordInfo
24
+ from biolib.biolib_binary_format import LazyLoadedFile
25
+ from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
26
+ from biolib.biolib_logging import logger
27
+
28
+ PathFilter = Union[str, List[str], Callable[[str], bool]]
29
+
30
+
31
+ class DataRecord:
32
+ def __init__(self, _internal_state: ResourceDetailedDict):
33
+ self._state = _internal_state
34
+
35
+ def __repr__(self):
36
+ return f'DataRecord: {self._state["uri"]}'
37
+
38
+ @property
39
+ def uri(self) -> str:
40
+ return self._state['uri']
41
+
42
+ @property
43
+ def uuid(self) -> str:
44
+ return self._state['uuid']
45
+
46
+ @property
47
+ def name(self) -> str:
48
+ uri_parsed = parse_resource_uri(self._state['uri'], use_account_as_name_default=False)
49
+ if not uri_parsed['resource_name']:
50
+ raise ValueError('Expected parameter "resource_uri" to contain resource name')
51
+
52
+ return uri_parsed['resource_name']
53
+
54
+ @staticmethod
55
+ def get_by_uri(uri: str) -> 'DataRecord':
56
+ normalized_uri = _get_resource_uri_from_str(uri)
57
+ resource_dict: ResourceDetailedDict = api_client.get(path='/resource/', params={'uri': normalized_uri}).json()
58
+ if resource_dict['type'] != 'data-record':
59
+ raise Exception(f'Resource "{resource_dict["uri"]}" is not a Data Record')
60
+
61
+ return DataRecord(_internal_state=resource_dict)
62
+
63
+ @staticmethod
64
+ def create(destination: str, data_path: Optional[str] = None, record_type: Optional[str] = None) -> 'DataRecord':
65
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='create a Data Record')
66
+ if data_path is not None:
67
+ assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
68
+ uri_parsed = parse_resource_uri(destination, use_account_as_name_default=False)
69
+ if uri_parsed['resource_name_normalized']:
70
+ data_record_uri = destination
71
+ else:
72
+ record_name = 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
73
+ data_record_uri = f'{destination}/{record_name}'
74
+
75
+ response = api.client.post(
76
+ path='/resources/data-records/',
77
+ data={
78
+ 'uri': data_record_uri,
79
+ 'type': record_type,
80
+ },
81
+ )
82
+ data_record_info: DataRecordInfo = response.json()
83
+ logger.info(f"Successfully created new Data Record '{data_record_info['uri']}'")
84
+
85
+ data_record = DataRecord.get_by_uri(uri=data_record_info['uri'])
86
+ if data_path is not None:
87
+ data_record.update(data_path=data_path)
88
+
89
+ return data_record
90
+
91
+ @staticmethod
92
+ def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
93
+ # TODO: Simplify when backend exposes /api/resources/ instead of /api/apps/
94
+ max_page_size = 1_000
95
+ params: Dict[str, Union[str, int]] = {
96
+ 'page_size': str(count or max_page_size),
97
+ 'resource_type': 'data-record',
98
+ }
99
+ if uri:
100
+ uri_parsed = parse_resource_uri(uri, use_account_as_name_default=False)
101
+ params['account_handle'] = uri_parsed['account_handle_normalized']
102
+ if uri_parsed['resource_name_normalized']:
103
+ params['app_name'] = uri_parsed['resource_name_normalized']
104
+
105
+ results = api_client.get(path='/apps/', params=params).json()['results']
106
+ if count is None and len(results) == max_page_size:
107
+ logger.warning(
108
+ f'Fetch results exceeded maximum count of {max_page_size}. Some data records might not be fetched.'
109
+ )
110
+
111
+ return [
112
+ DataRecord(
113
+ _internal_state=ResourceDetailedDict(
114
+ uri=result['resource_uri'],
115
+ uuid=result['public_id'],
116
+ name=result['name'],
117
+ created_at=result['created_at'],
118
+ type=result['type'],
119
+ description=result['description'],
120
+ account_uuid=result['account_id'],
121
+ experiment=None,
122
+ )
123
+ )
124
+ for result in results
125
+ ]
126
+
127
+ @staticmethod
128
+ def clone(
129
+ source: 'DataRecord',
130
+ destination: 'DataRecord',
131
+ on_progress: Optional[Callable[[int, int], None]] = None,
132
+ ) -> 'DataRecord':
133
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='clone a Data Record')
134
+
135
+ # pylint: disable=protected-access
136
+ total_size_in_bytes = source._get_zip_size_bytes()
137
+
138
+ if total_size_in_bytes == 0:
139
+ raise ValueError('Source data record has no data to clone')
140
+
141
+ min_chunk_size_bytes = 10_000_000
142
+ chunk_size_in_bytes = max(min_chunk_size_bytes, int(total_size_in_bytes / 9_000))
143
+
144
+ zip_iterator = source._iter_zip_bytes(chunk_size_bytes=chunk_size_in_bytes)
145
+
146
+ new_resource_version_uuid = _upload_from_iterator(
147
+ resource_uuid=destination._state['uuid'],
148
+ payload_iterator=zip_iterator,
149
+ payload_size_in_bytes=total_size_in_bytes,
150
+ publish=True,
151
+ on_progress=on_progress,
152
+ )
153
+ # pylint: enable=protected-access
154
+
155
+ logger.info(f"Successfully cloned data to '{destination.uri}'")
156
+ return DataRecord._get_by_version_uuid(new_resource_version_uuid)
157
+
158
+ def list_files(
159
+ self,
160
+ path_filter: Optional[PathFilter] = None,
161
+ max_count: Optional[int] = 100_000,
162
+ ) -> List[LazyLoadedFile]:
163
+ files = list(
164
+ self._fetch_files(
165
+ path_filter=path_filter,
166
+ max_count=max_count + 1 if max_count is not None else None,
167
+ )
168
+ )
169
+
170
+ if max_count is not None and len(files) > max_count:
171
+ raise Exception(
172
+ f'list_files returned more than {max_count} files. '
173
+ f'Please set the keyword argument "max_count" to a higher number.'
174
+ )
175
+
176
+ return files
177
+
178
+ def download_zip(self, output_path: str):
179
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(uri=self.uri)
180
+ HttpClient.request(url=remote_storage_endpoint.get_remote_url(), response_path=output_path)
181
+
182
+ def download_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
183
+ filtered_files = self.list_files(path_filter=path_filter)
184
+
185
+ if len(filtered_files) == 0:
186
+ logger.debug('No files to save')
187
+ return
188
+
189
+ for file in filtered_files:
190
+ file_path = os.path.join(output_dir, file.path)
191
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
192
+ with open(file_path, mode='wb') as file_handle:
193
+ for chunk in file.get_data_iterator():
194
+ file_handle.write(chunk)
195
+
196
+ def save_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
197
+ self.download_files(output_dir=output_dir, path_filter=path_filter)
198
+
199
+ def update(self, data_path: str, chunk_size_in_mb: Optional[int] = None) -> None:
200
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='push data to a Data Record')
201
+ files_to_zip, data_size_in_bytes = validate_data_path_and_get_files_and_size_of_directory(data_path)
202
+
203
+ # validate data record
204
+ detailed_dict: types.DataRecordDetailedDict = self._get_detailed_dict()
205
+ if detailed_dict['type']:
206
+ # only validate if data record has a type
207
+ data_record_type: types.DataRecordTypeDict = detailed_dict['type']
208
+ logger.info(f"Validating data record of type {data_record_type['name']}")
209
+ for rule in data_record_type['validation_rules']:
210
+ logger.info(f"Validating rule {rule['type']} for {rule['path']}...")
211
+ if rule['type'] == 'sqlite-v1':
212
+ try:
213
+ validate_sqlite_v1(schema=rule['rule'], sqlite_file=Path(rule['path']))
214
+ except Exception as error:
215
+ raise Exception('Data Record Validation failed') from error
216
+ else:
217
+ raise Exception(f"Error processing data record validation: unknown rule type {rule['type']}")
218
+
219
+ new_resource_version_uuid = push_data_path(
220
+ data_path=data_path,
221
+ data_size_in_bytes=data_size_in_bytes,
222
+ files_to_zip=files_to_zip,
223
+ resource_uuid=self._state['uuid'],
224
+ chunk_size_in_mb=chunk_size_in_mb,
225
+ publish=True,
226
+ )
227
+
228
+ updated_record = DataRecord._get_by_version_uuid(new_resource_version_uuid)
229
+ self._state = updated_record._state # pylint: disable=protected-access
230
+ logger.info(f"Successfully pushed a new Data Record version '{self.uri}'")
231
+
232
+ def delete(self) -> None:
233
+ """Delete the data record.
234
+
235
+ Example::
236
+ >>> record = DataRecord.get_by_uri("account/data-record")
237
+ >>> record.delete()
238
+ """
239
+ try:
240
+ api_client.delete(path=f'/apps/{self.uuid}/')
241
+ logger.info(f'Data record {self.uri} deleted')
242
+ except Exception as error:
243
+ raise Exception(f'Failed to delete data record {self.uri} due to: {error}') from error
244
+
245
+ @staticmethod
246
+ def _get_by_version_uuid(version_uuid: str) -> 'DataRecord':
247
+ response = api.client.get(path=f'/lfs/versions/{version_uuid}/')
248
+ version_info = response.json()
249
+ return DataRecord.get_by_uri(version_info['uri'])
250
+
251
+ @staticmethod
252
+ def _get_file(
253
+ remote_storage_endpoint: DataRecordRemoteStorageEndpoint,
254
+ file_node_dict: ZipFileNodeDict,
255
+ ) -> LazyLoadedFile:
256
+ local_file_header_signature_bytes = b'\x50\x4b\x03\x04'
257
+ local_file_header_struct = Struct('<H2sHHHIIIHH')
258
+ LocalFileHeader = namedtuple(
259
+ 'LocalFileHeader',
260
+ (
261
+ 'version',
262
+ 'flags',
263
+ 'compression_raw',
264
+ 'mod_time',
265
+ 'mod_date',
266
+ 'crc_32_expected',
267
+ 'compressed_size_raw',
268
+ 'uncompressed_size_raw',
269
+ 'file_name_len',
270
+ 'extra_field_len',
271
+ ),
272
+ )
273
+
274
+ local_file_header_start = file_node_dict['zip_meta']['header_start'] + len(local_file_header_signature_bytes)
275
+ local_file_header_end = local_file_header_start + local_file_header_struct.size
276
+
277
+ def file_start_func() -> int:
278
+ local_file_header_response = HttpClient.request(
279
+ url=remote_storage_endpoint.get_remote_url(),
280
+ headers={'range': f'bytes={local_file_header_start}-{local_file_header_end - 1}'},
281
+ timeout_in_seconds=300,
282
+ )
283
+ local_file_header = LocalFileHeader._make(
284
+ local_file_header_struct.unpack(local_file_header_response.content)
285
+ )
286
+ file_start: int = (
287
+ local_file_header_end + local_file_header.file_name_len + local_file_header.extra_field_len
288
+ )
289
+ return file_start
290
+
291
+ return LazyLoadedFile(
292
+ buffer=RemoteIndexableBuffer(endpoint=remote_storage_endpoint),
293
+ length=file_node_dict['zip_meta']['size_on_disk'],
294
+ path=file_node_dict['dir_path'] + file_node_dict['name'],
295
+ start=None,
296
+ start_func=file_start_func,
297
+ )
298
+
299
+ def _get_version(self) -> ResourceVersionDetailedDict:
300
+ if 'version' not in self._state:
301
+ # Version might be missing in state if initialized from the fetch method (list of data records)
302
+ self._state = self.get_by_uri(self.uri)._state
303
+
304
+ version = self._state.get('version')
305
+ if version is None:
306
+ raise Exception(f'Data Record "{self._state["uri"]}" has no active version')
307
+
308
+ return version
309
+
310
+ def _fetch_files(
311
+ self,
312
+ max_count: Optional[int],
313
+ path_filter: Optional[PathFilter] = None,
314
+ ) -> Iterable[LazyLoadedFile]:
315
+ if path_filter and not (isinstance(path_filter, (str, list)) or callable(path_filter)):
316
+ raise Exception('Expected path_filter to be a string, a list of strings or a function')
317
+
318
+ path_filters = (
319
+ [path_filter] if isinstance(path_filter, str) else path_filter if isinstance(path_filter, list) else []
320
+ )
321
+
322
+ version = self._get_version()
323
+ resource_version_uuid = version['uuid']
324
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(uri=self.uri)
325
+
326
+ page: Optional[int] = 1
327
+ yielded_files: int = 0
328
+ while page:
329
+ response = api.client.post(
330
+ path=f'/proxy/files/data-record-versions/{resource_version_uuid}/query/',
331
+ data=dict(page=page, page_size=1_000, path_filters=path_filters),
332
+ ).json()
333
+
334
+ for file_node_dict in cast(List[ZipFileNodeDict], response['results']):
335
+ if file_node_dict['is_dir']:
336
+ continue
337
+
338
+ if callable(path_filter) and not path_filter(file_node_dict['dir_path'] + file_node_dict['name']):
339
+ continue
340
+
341
+ yield self._get_file(remote_storage_endpoint, file_node_dict)
342
+ yielded_files += 1
343
+
344
+ if max_count is not None and yielded_files >= max_count:
345
+ page = None
346
+ break
347
+
348
+ page = page + 1 if page is not None and response['page_count'] > page else None
349
+
350
+ def _get_detailed_dict(self) -> types.DataRecordDetailedDict:
351
+ return cast(types.DataRecordDetailedDict, api_client.get(f'/resources/data-records/{self.uuid}/').json())
352
+
353
+ def _get_zip_size_bytes(self) -> int:
354
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(uri=self.uri)
355
+ presigned_url = remote_storage_endpoint.get_remote_url()
356
+ response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
357
+ content_range = response.headers.get('Content-Range', '')
358
+ if not content_range or '/' not in content_range:
359
+ raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
360
+ total_size = int(content_range.split('/')[1])
361
+ return total_size
362
+
363
+ def _iter_zip_bytes(self, chunk_size_bytes: int) -> Iterator[bytes]:
364
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(uri=self.uri)
365
+ presigned_url = remote_storage_endpoint.get_remote_url()
366
+ response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-0'})
367
+ content_range = response.headers.get('Content-Range', '')
368
+ if not content_range or '/' not in content_range:
369
+ raise ValueError('Unable to determine zip size: Content-Range header missing or invalid')
370
+ total_size = int(content_range.split('/')[1])
371
+
372
+ for start in range(0, total_size, chunk_size_bytes):
373
+ end = min(start + chunk_size_bytes - 1, total_size - 1)
374
+ presigned_url = remote_storage_endpoint.get_remote_url()
375
+ response = HttpClient.request(
376
+ url=presigned_url,
377
+ headers={'range': f'bytes={start}-{end}'},
378
+ timeout_in_seconds=300,
379
+ )
380
+ yield response.content
File without changes
biolib/_index/index.py ADDED
@@ -0,0 +1,55 @@
1
+ import json
2
+ from typing import Any, Dict
3
+
4
+ from biolib import api
5
+ from biolib._shared.types import ResourceDetailedDict
6
+ from biolib.biolib_api_client import BiolibApiClient
7
+ from biolib.biolib_api_client.biolib_app_api import _get_resource_uri_from_str
8
+ from biolib.biolib_logging import logger
9
+
10
+
11
+ class Index:
12
+ def __init__(self, _internal_state: ResourceDetailedDict):
13
+ self._state = _internal_state
14
+
15
+ def __repr__(self) -> str:
16
+ return f'Index: {self._state["uri"]}'
17
+
18
+ @property
19
+ def uri(self) -> str:
20
+ return self._state['uri']
21
+
22
+ @property
23
+ def id(self) -> str:
24
+ return f'{self._state["account_uuid"]}.{self._state["uuid"]}'.replace('-', '_')
25
+
26
+ @staticmethod
27
+ def get_by_uri(uri: str) -> 'Index':
28
+ normalized_uri = _get_resource_uri_from_str(uri)
29
+ response: ResourceDetailedDict = api.client.get(path='/resource/', params={'uri': normalized_uri}).json()
30
+ if response['type'] != 'index':
31
+ raise Exception(f'Resource "{response["uri"]}" is not an Index')
32
+ return Index(_internal_state=response)
33
+
34
+ @staticmethod
35
+ def create(uri: str, config: Dict[str, Any]) -> str:
36
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='create an Index')
37
+
38
+ response = api.client.post(
39
+ path='/resources/indexes/',
40
+ data={
41
+ 'uri': uri,
42
+ 'index_config': config,
43
+ },
44
+ )
45
+ result = response.json()
46
+ created_uri: str = result['uri']
47
+ logger.info(f"Successfully created Index '{created_uri}'")
48
+ return created_uri
49
+
50
+ @staticmethod
51
+ def create_from_config_file(uri: str, config_path: str) -> str:
52
+ with open(config_path) as config_file:
53
+ index_config = json.load(config_file)
54
+
55
+ return Index.create(uri=uri, config=index_config)
@@ -0,0 +1,103 @@
1
+ import json
2
+ from typing import Any, Dict, Iterator, List, Optional, Union
3
+
4
+ from biolib import api
5
+ from biolib._internal.http_client import HttpResponse
6
+ from biolib._internal.utils import base64_encode_string
7
+ from biolib._internal.utils.auth import decode_jwt_without_checking_signature
8
+ from biolib._runtime.runtime import Runtime
9
+ from biolib.biolib_api_client import BiolibApiClient
10
+ from biolib.biolib_errors import BioLibError
11
+
12
+
13
+ def _get_index_basic_auth_header() -> Optional[str]:
14
+ if Runtime.check_is_environment_biolib_app():
15
+ return None
16
+
17
+ deprecated_api_client = BiolibApiClient.get()
18
+ deprecated_api_client.refresh_access_token()
19
+ access_token = deprecated_api_client.access_token
20
+ if not access_token:
21
+ return None
22
+
23
+ decoded_token = decode_jwt_without_checking_signature(access_token)
24
+ user_uuid: Optional[str] = decoded_token['payload'].get('public_id')
25
+ if not user_uuid:
26
+ return None
27
+
28
+ normalized_user_uuid = user_uuid.replace('-', '_')
29
+ credentials = f'biolib_user|{normalized_user_uuid}:{access_token}'
30
+ return f'Basic {base64_encode_string(credentials)}'
31
+
32
+
33
+ class IndexQueryResult:
34
+ """Result wrapper for index query responses."""
35
+
36
+ def __init__(self, response: HttpResponse, data_format: str):
37
+ self._response = response
38
+ self._data_format = data_format
39
+ self._json_data: Optional[Dict[str, Any]] = None
40
+ if data_format == 'json':
41
+ content = self._response.content
42
+ if content:
43
+ self._json_data = json.loads(content.decode('utf-8'))
44
+
45
+ def iter_rows(self) -> Iterator[Dict[str, Any]]:
46
+ """Return an iterator over the rows in the query result.
47
+
48
+ Returns:
49
+ Iterator[Dict[str, Any]]: An iterator yielding each row as a dictionary.
50
+ """
51
+ if self._json_data is None:
52
+ raise BioLibError('iter_rows() is only available when data_format is "json"')
53
+ return iter(self._json_data['data'])
54
+
55
+
56
+ def query_index(
57
+ query: str,
58
+ data: Optional[Union[List[Dict[str, Any]], bytes]] = None,
59
+ data_format: str = 'json',
60
+ ) -> IndexQueryResult:
61
+ """Query the BioLib index with a SQL-like query.
62
+
63
+ Args:
64
+ query: The SQL query string to execute.
65
+ data: Optional input data. If data_format is "json", this should be a list of
66
+ dictionaries that will be JSON encoded. Otherwise, pass raw bytes.
67
+ data_format: The format for the query. Defaults to "json".
68
+
69
+ Returns:
70
+ IndexQueryResult: A result object wrapping the query response.
71
+
72
+ Raises:
73
+ BioLibError: If the query fails or returns a non-successful HTTP status code.
74
+ """
75
+ data_format = data_format.lower()
76
+
77
+ params: Dict[str, Union[str, int]] = {'default_format': data_format.upper()}
78
+ if data is not None:
79
+ params['query'] = query
80
+
81
+ if data is not None:
82
+ if data_format == 'json':
83
+ body: bytes = '\n'.join(json.dumps(item, ensure_ascii=False) for item in data).encode('utf-8')
84
+ else:
85
+ body = data # type: ignore[assignment]
86
+ else:
87
+ body = query.encode('utf-8')
88
+
89
+ response = api.client.post(
90
+ path='proxy/index',
91
+ data=body,
92
+ params=params,
93
+ headers={
94
+ 'Content-Type': 'text/plain; charset=utf-8',
95
+ 'Authorization': _get_index_basic_auth_header(),
96
+ },
97
+ authenticate=False,
98
+ )
99
+
100
+ if response.status_code < 200 or response.status_code >= 300:
101
+ raise BioLibError(f'Index query failed with status code {response.status_code}: {response.text}')
102
+
103
+ return IndexQueryResult(response, data_format)
File without changes
@@ -0,0 +1,58 @@
1
+ import os
2
+ import shutil
3
+ import sys
4
+
5
+ from biolib._internal.templates import templates
6
+
7
+
8
+ def add_copilot_prompts(force: bool, silent: bool = False) -> None:
9
+ current_working_directory = os.getcwd()
10
+ config_file_path = f'{current_working_directory}/.biolib/config.yml'
11
+ if not os.path.exists(config_file_path):
12
+ err_string = """
13
+ Error: Current directory has not been initialized as a BioLib application.
14
+ Please run the \"biolib init\" command first"""
15
+ print(err_string, file=sys.stderr)
16
+ exit(1)
17
+ source_path = os.path.join(templates.copilot_template(), '.github')
18
+ destination_path = os.path.join(current_working_directory, '.github')
19
+
20
+ conflicting_files = []
21
+ files_to_overwrite = set()
22
+
23
+ for root, _, filenames in os.walk(source_path):
24
+ relative_dir = os.path.relpath(root, source_path)
25
+ destination_dir = os.path.join(destination_path, relative_dir)
26
+ for filename in filenames:
27
+ source_file = os.path.join(root, filename)
28
+ destination_file = os.path.join(destination_dir, filename)
29
+ if os.path.exists(destination_file) and not force:
30
+ with open(source_file, 'rb') as fsrc, open(destination_file, 'rb') as fdest:
31
+ if fsrc.read() != fdest.read():
32
+ conflicting_files.append(os.path.relpath(destination_file, current_working_directory))
33
+
34
+ if conflicting_files:
35
+ print('The following files already exist and would be overwritten:')
36
+ for conflicting_file in conflicting_files:
37
+ print(f' {conflicting_file}')
38
+ print()
39
+
40
+ for conflicting_file in conflicting_files:
41
+ choice = input(f'Overwrite {conflicting_file}? [y/N]: ').lower().strip()
42
+ if choice in ['y', 'yes']:
43
+ files_to_overwrite.add(conflicting_file)
44
+
45
+ for root, _, filenames in os.walk(source_path):
46
+ relative_dir = os.path.relpath(root, source_path)
47
+ destination_dir = os.path.join(destination_path, relative_dir)
48
+ for filename in filenames:
49
+ source_file = os.path.join(root, filename)
50
+ destination_file = os.path.join(destination_dir, filename)
51
+ relative_file_path = os.path.relpath(destination_file, current_working_directory)
52
+
53
+ if not os.path.exists(destination_file) or force or relative_file_path in files_to_overwrite:
54
+ os.makedirs(destination_dir, exist_ok=True)
55
+ shutil.copy2(source_file, destination_file)
56
+
57
+ if not silent:
58
+ print(f'Prompt and instruction files added to {destination_path}/')
@@ -0,0 +1,81 @@
1
+ import os
2
+ import shutil
3
+
4
+ from biolib._internal.templates import templates
5
+
6
+
7
+ def add_gui_files(force=False, silent=False) -> None:
8
+ cwd = os.getcwd()
9
+ template_dir = templates.gui_template()
10
+
11
+ root_files = ['package.json', 'Dockerfile', 'vite.config.mts', '.yarnrc.yml']
12
+
13
+ conflicting_files = []
14
+ files_to_overwrite = set()
15
+
16
+ for root, _, filenames in os.walk(template_dir):
17
+ relative_dir = os.path.relpath(root, template_dir)
18
+
19
+ for filename in filenames:
20
+ if filename in root_files:
21
+ destination_dir = cwd
22
+ else:
23
+ if relative_dir == '.':
24
+ destination_dir = os.path.join(cwd, 'gui')
25
+ else:
26
+ destination_dir = os.path.join(cwd, 'gui', relative_dir)
27
+
28
+ source_file = os.path.join(root, filename)
29
+ destination_file = os.path.join(destination_dir, filename)
30
+
31
+ if filename == 'Dockerfile':
32
+ continue
33
+
34
+ if os.path.exists(destination_file) and not force:
35
+ with open(source_file, 'rb') as fsrc, open(destination_file, 'rb') as fdest:
36
+ if fsrc.read() != fdest.read():
37
+ conflicting_files.append(os.path.relpath(destination_file, cwd))
38
+
39
+ if conflicting_files:
40
+ print('The following files already exist and would be overwritten:')
41
+ for conflicting_file in conflicting_files:
42
+ print(f' {conflicting_file}')
43
+ print()
44
+
45
+ for conflicting_file in conflicting_files:
46
+ choice = input(f'Overwrite {conflicting_file}? [y/N]: ').lower().strip()
47
+ if choice in ['y', 'yes']:
48
+ files_to_overwrite.add(conflicting_file)
49
+
50
+ for root, _, filenames in os.walk(template_dir):
51
+ relative_dir = os.path.relpath(root, template_dir)
52
+
53
+ for filename in filenames:
54
+ if filename in root_files:
55
+ destination_dir = cwd
56
+ else:
57
+ if relative_dir == '.':
58
+ destination_dir = os.path.join(cwd, 'gui')
59
+ else:
60
+ destination_dir = os.path.join(cwd, 'gui', relative_dir)
61
+
62
+ source_file = os.path.join(root, filename)
63
+ destination_file = os.path.join(destination_dir, filename)
64
+ relative_file_path = os.path.relpath(destination_file, cwd)
65
+
66
+ should_force = force or filename == 'Dockerfile'
67
+ if not os.path.exists(destination_file) or should_force or relative_file_path in files_to_overwrite:
68
+ os.makedirs(destination_dir, exist_ok=True)
69
+ shutil.copy2(source_file, destination_file)
70
+
71
+ gitignore_path = os.path.join(cwd, '.gitignore')
72
+ with open(gitignore_path, 'a') as gitignore_file:
73
+ gitignore_file.write('\n# gui\n')
74
+ gitignore_file.write('.yarn\n')
75
+ gitignore_file.write('dist\n')
76
+ gitignore_file.write('yarn.lock\n')
77
+ gitignore_file.write('tsconfig.tsbuildinfo\n')
78
+ gitignore_file.write('node_modules\n')
79
+
80
+ if not silent:
81
+ print('gui files added to project root and gui/ subdirectory')
@@ -0,0 +1 @@
1
+ from .data_record import validate_sqlite_v1