pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. biolib/__init__.py +357 -11
  2. biolib/_data_record/data_record.py +380 -0
  3. biolib/_index/__init__.py +0 -0
  4. biolib/_index/index.py +55 -0
  5. biolib/_index/query_result.py +103 -0
  6. biolib/_internal/__init__.py +0 -0
  7. biolib/_internal/add_copilot_prompts.py +58 -0
  8. biolib/_internal/add_gui_files.py +81 -0
  9. biolib/_internal/data_record/__init__.py +1 -0
  10. biolib/_internal/data_record/data_record.py +85 -0
  11. biolib/_internal/data_record/push_data.py +116 -0
  12. biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
  13. biolib/_internal/errors.py +5 -0
  14. biolib/_internal/file_utils.py +125 -0
  15. biolib/_internal/fuse_mount/__init__.py +1 -0
  16. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  17. biolib/_internal/http_client.py +159 -0
  18. biolib/_internal/lfs/__init__.py +1 -0
  19. biolib/_internal/lfs/cache.py +51 -0
  20. biolib/_internal/libs/__init__.py +1 -0
  21. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  22. biolib/_internal/push_application.py +488 -0
  23. biolib/_internal/runtime.py +22 -0
  24. biolib/_internal/string_utils.py +13 -0
  25. biolib/_internal/templates/__init__.py +1 -0
  26. biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
  27. biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
  28. biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
  29. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  30. biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
  31. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  32. biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
  33. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  34. biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
  35. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  36. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  37. biolib/_internal/templates/gui_template/App.tsx +53 -0
  38. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  39. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  40. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  41. biolib/_internal/templates/gui_template/index.css +5 -0
  42. biolib/_internal/templates/gui_template/index.html +13 -0
  43. biolib/_internal/templates/gui_template/index.tsx +10 -0
  44. biolib/_internal/templates/gui_template/package.json +27 -0
  45. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  46. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  47. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  48. biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
  49. biolib/_internal/templates/init_template/Dockerfile +14 -0
  50. biolib/_internal/templates/init_template/requirements.txt +1 -0
  51. biolib/_internal/templates/init_template/run.py +12 -0
  52. biolib/_internal/templates/init_template/run.sh +4 -0
  53. biolib/_internal/templates/templates.py +25 -0
  54. biolib/_internal/tree_utils.py +106 -0
  55. biolib/_internal/utils/__init__.py +65 -0
  56. biolib/_internal/utils/auth.py +46 -0
  57. biolib/_internal/utils/job_url.py +33 -0
  58. biolib/_internal/utils/multinode.py +263 -0
  59. biolib/_runtime/runtime.py +157 -0
  60. biolib/_session/session.py +44 -0
  61. biolib/_shared/__init__.py +0 -0
  62. biolib/_shared/types/__init__.py +74 -0
  63. biolib/_shared/types/account.py +12 -0
  64. biolib/_shared/types/account_member.py +8 -0
  65. biolib/_shared/types/app.py +9 -0
  66. biolib/_shared/types/data_record.py +40 -0
  67. biolib/_shared/types/experiment.py +32 -0
  68. biolib/_shared/types/file_node.py +17 -0
  69. biolib/_shared/types/push.py +6 -0
  70. biolib/_shared/types/resource.py +37 -0
  71. biolib/_shared/types/resource_deploy_key.py +11 -0
  72. biolib/_shared/types/resource_permission.py +14 -0
  73. biolib/_shared/types/resource_version.py +19 -0
  74. biolib/_shared/types/result.py +14 -0
  75. biolib/_shared/types/typing.py +10 -0
  76. biolib/_shared/types/user.py +19 -0
  77. biolib/_shared/utils/__init__.py +7 -0
  78. biolib/_shared/utils/resource_uri.py +75 -0
  79. biolib/api/__init__.py +6 -0
  80. biolib/api/client.py +168 -0
  81. biolib/app/app.py +252 -49
  82. biolib/app/search_apps.py +45 -0
  83. biolib/biolib_api_client/api_client.py +126 -31
  84. biolib/biolib_api_client/app_types.py +24 -4
  85. biolib/biolib_api_client/auth.py +31 -8
  86. biolib/biolib_api_client/biolib_app_api.py +147 -52
  87. biolib/biolib_api_client/biolib_job_api.py +161 -141
  88. biolib/biolib_api_client/job_types.py +21 -5
  89. biolib/biolib_api_client/lfs_types.py +7 -23
  90. biolib/biolib_api_client/user_state.py +56 -0
  91. biolib/biolib_binary_format/__init__.py +1 -4
  92. biolib/biolib_binary_format/file_in_container.py +105 -0
  93. biolib/biolib_binary_format/module_input.py +24 -7
  94. biolib/biolib_binary_format/module_output_v2.py +149 -0
  95. biolib/biolib_binary_format/remote_endpoints.py +34 -0
  96. biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
  97. biolib/biolib_binary_format/saved_job.py +3 -2
  98. biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
  99. biolib/biolib_binary_format/system_status_update.py +3 -2
  100. biolib/biolib_binary_format/utils.py +175 -0
  101. biolib/biolib_docker_client/__init__.py +11 -2
  102. biolib/biolib_errors.py +36 -0
  103. biolib/biolib_logging.py +27 -10
  104. biolib/cli/__init__.py +38 -0
  105. biolib/cli/auth.py +46 -0
  106. biolib/cli/data_record.py +164 -0
  107. biolib/cli/index.py +32 -0
  108. biolib/cli/init.py +421 -0
  109. biolib/cli/lfs.py +101 -0
  110. biolib/cli/push.py +50 -0
  111. biolib/cli/run.py +63 -0
  112. biolib/cli/runtime.py +14 -0
  113. biolib/cli/sdk.py +16 -0
  114. biolib/cli/start.py +56 -0
  115. biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
  116. biolib/compute_node/job_worker/cache_state.py +66 -88
  117. biolib/compute_node/job_worker/cache_types.py +1 -6
  118. biolib/compute_node/job_worker/docker_image_cache.py +112 -37
  119. biolib/compute_node/job_worker/executors/__init__.py +0 -3
  120. biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
  121. biolib/compute_node/job_worker/executors/docker_types.py +9 -1
  122. biolib/compute_node/job_worker/executors/types.py +19 -9
  123. biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
  124. biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
  125. biolib/compute_node/job_worker/job_storage.py +108 -0
  126. biolib/compute_node/job_worker/job_worker.py +397 -212
  127. biolib/compute_node/job_worker/large_file_system.py +87 -38
  128. biolib/compute_node/job_worker/network_alloc.py +99 -0
  129. biolib/compute_node/job_worker/network_buffer.py +240 -0
  130. biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
  131. biolib/compute_node/job_worker/utils.py +9 -24
  132. biolib/compute_node/remote_host_proxy.py +400 -98
  133. biolib/compute_node/utils.py +31 -9
  134. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  135. biolib/compute_node/webserver/proxy_utils.py +28 -0
  136. biolib/compute_node/webserver/webserver.py +130 -44
  137. biolib/compute_node/webserver/webserver_types.py +2 -6
  138. biolib/compute_node/webserver/webserver_utils.py +77 -12
  139. biolib/compute_node/webserver/worker_thread.py +183 -42
  140. biolib/experiments/__init__.py +0 -0
  141. biolib/experiments/experiment.py +356 -0
  142. biolib/jobs/__init__.py +1 -0
  143. biolib/jobs/job.py +741 -0
  144. biolib/jobs/job_result.py +185 -0
  145. biolib/jobs/types.py +50 -0
  146. biolib/py.typed +0 -0
  147. biolib/runtime/__init__.py +14 -0
  148. biolib/sdk/__init__.py +91 -0
  149. biolib/tables.py +34 -0
  150. biolib/typing_utils.py +2 -7
  151. biolib/user/__init__.py +1 -0
  152. biolib/user/sign_in.py +54 -0
  153. biolib/utils/__init__.py +162 -0
  154. biolib/utils/cache_state.py +94 -0
  155. biolib/utils/multipart_uploader.py +194 -0
  156. biolib/utils/seq_util.py +150 -0
  157. biolib/utils/zip/remote_zip.py +640 -0
  158. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  159. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  160. {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  161. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  162. README.md +0 -17
  163. biolib/app/app_result.py +0 -68
  164. biolib/app/utils.py +0 -62
  165. biolib/biolib-js/0-biolib.worker.js +0 -1
  166. biolib/biolib-js/1-biolib.worker.js +0 -1
  167. biolib/biolib-js/2-biolib.worker.js +0 -1
  168. biolib/biolib-js/3-biolib.worker.js +0 -1
  169. biolib/biolib-js/4-biolib.worker.js +0 -1
  170. biolib/biolib-js/5-biolib.worker.js +0 -1
  171. biolib/biolib-js/6-biolib.worker.js +0 -1
  172. biolib/biolib-js/index.html +0 -10
  173. biolib/biolib-js/main-biolib.js +0 -1
  174. biolib/biolib_api_client/biolib_account_api.py +0 -21
  175. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
  176. biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
  177. biolib/biolib_binary_format/module_output.py +0 -58
  178. biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
  179. biolib/biolib_push.py +0 -114
  180. biolib/cli.py +0 -203
  181. biolib/cli_utils.py +0 -273
  182. biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
  183. biolib/compute_node/enclave/__init__.py +0 -2
  184. biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
  185. biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
  186. biolib/compute_node/job_worker/executors/base_executor.py +0 -18
  187. biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
  188. biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
  189. biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
  190. biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
  191. biolib/lfs.py +0 -196
  192. biolib/pyppeteer/.circleci/config.yml +0 -100
  193. biolib/pyppeteer/.coveragerc +0 -3
  194. biolib/pyppeteer/.gitignore +0 -89
  195. biolib/pyppeteer/.pre-commit-config.yaml +0 -28
  196. biolib/pyppeteer/CHANGES.md +0 -253
  197. biolib/pyppeteer/CONTRIBUTING.md +0 -26
  198. biolib/pyppeteer/LICENSE +0 -12
  199. biolib/pyppeteer/README.md +0 -137
  200. biolib/pyppeteer/docs/Makefile +0 -177
  201. biolib/pyppeteer/docs/_static/custom.css +0 -28
  202. biolib/pyppeteer/docs/_templates/layout.html +0 -10
  203. biolib/pyppeteer/docs/changes.md +0 -1
  204. biolib/pyppeteer/docs/conf.py +0 -299
  205. biolib/pyppeteer/docs/index.md +0 -21
  206. biolib/pyppeteer/docs/make.bat +0 -242
  207. biolib/pyppeteer/docs/reference.md +0 -211
  208. biolib/pyppeteer/docs/server.py +0 -60
  209. biolib/pyppeteer/poetry.lock +0 -1699
  210. biolib/pyppeteer/pyppeteer/__init__.py +0 -135
  211. biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
  212. biolib/pyppeteer/pyppeteer/browser.py +0 -401
  213. biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
  214. biolib/pyppeteer/pyppeteer/command.py +0 -22
  215. biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
  216. biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
  217. biolib/pyppeteer/pyppeteer/coverage.py +0 -346
  218. biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
  219. biolib/pyppeteer/pyppeteer/dialog.py +0 -79
  220. biolib/pyppeteer/pyppeteer/domworld.py +0 -597
  221. biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
  222. biolib/pyppeteer/pyppeteer/errors.py +0 -48
  223. biolib/pyppeteer/pyppeteer/events.py +0 -63
  224. biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
  225. biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
  226. biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
  227. biolib/pyppeteer/pyppeteer/helpers.py +0 -245
  228. biolib/pyppeteer/pyppeteer/input.py +0 -371
  229. biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
  230. biolib/pyppeteer/pyppeteer/launcher.py +0 -683
  231. biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
  232. biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
  233. biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
  234. biolib/pyppeteer/pyppeteer/multimap.py +0 -82
  235. biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
  236. biolib/pyppeteer/pyppeteer/options.py +0 -8
  237. biolib/pyppeteer/pyppeteer/page.py +0 -1728
  238. biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
  239. biolib/pyppeteer/pyppeteer/target.py +0 -147
  240. biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
  241. biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
  242. biolib/pyppeteer/pyppeteer/tracing.py +0 -93
  243. biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
  244. biolib/pyppeteer/pyppeteer/util.py +0 -18
  245. biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
  246. biolib/pyppeteer/pyppeteer/worker.py +0 -101
  247. biolib/pyppeteer/pyproject.toml +0 -97
  248. biolib/pyppeteer/spell.txt +0 -137
  249. biolib/pyppeteer/tox.ini +0 -72
  250. biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
  251. biolib/start_cli.py +0 -7
  252. biolib/utils.py +0 -47
  253. biolib/validators/validate_app_version.py +0 -183
  254. biolib/validators/validate_argument.py +0 -134
  255. biolib/validators/validate_module.py +0 -323
  256. biolib/validators/validate_zip_file.py +0 -40
  257. biolib/validators/validator_utils.py +0 -103
  258. pybiolib-0.2.951.dist-info/LICENSE +0 -21
  259. pybiolib-0.2.951.dist-info/METADATA +0 -61
  260. pybiolib-0.2.951.dist-info/RECORD +0 -153
  261. pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
  262. /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,85 @@
1
+ import sqlite3
2
+ from pathlib import Path
3
+
4
+ from biolib._shared.types import SqliteV1DatabaseSchema
5
+
6
+
7
+ def get_actual_schema(db_path):
8
+ if not db_path.exists():
9
+ raise Exception(f'File {db_path} not found.')
10
+ conn = sqlite3.connect(db_path)
11
+ cursor = conn.cursor()
12
+
13
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
14
+ tables = cursor.fetchall()
15
+
16
+ actual_schema: SqliteV1DatabaseSchema = {'tables': {}}
17
+ for table in tables:
18
+ table_name = table[0]
19
+ cursor.execute(f'PRAGMA table_info({table_name});')
20
+ columns = cursor.fetchall()
21
+ actual_schema['tables'][table_name] = {'columns': {}}
22
+ for column in columns:
23
+ actual_schema['tables'][table_name]['columns'][column[1]] = {
24
+ 'type': column[2],
25
+ 'nullable': not bool(column[3]),
26
+ }
27
+
28
+ cursor.execute(f'PRAGMA foreign_key_list({table_name});')
29
+ foreign_keys = cursor.fetchall()
30
+ for fk in foreign_keys:
31
+ actual_schema['tables'][table_name]['columns'][fk[3]]['foreign_key'] = {'table': fk[2], 'column': fk[4]}
32
+
33
+ conn.close()
34
+ return actual_schema
35
+
36
+
37
+ def verify_schema(specification: SqliteV1DatabaseSchema, actual_schema: SqliteV1DatabaseSchema):
38
+ for table_name, table_spec in specification['tables'].items():
39
+ if table_name not in actual_schema['tables']:
40
+ raise Exception(f"Error: Table '{table_name}' is missing.")
41
+
42
+ for column_name, column_spec in table_spec['columns'].items():
43
+ if column_name not in actual_schema['tables'][table_name]['columns']:
44
+ raise Exception(f"Error: Column '{column_name}' in table '{table_name}' is missing.")
45
+
46
+ actual_column = actual_schema['tables'][table_name]['columns'][column_name]
47
+ if actual_column['type'] != column_spec['type']:
48
+ raise Exception(
49
+ f"Error: Column '{column_name}' in table '{table_name}' "
50
+ "has type '{actual_column['type']}' but expected '{column_spec['type']}'."
51
+ )
52
+
53
+ if not actual_column['nullable'] and column_spec.get('nullable', True):
54
+ raise Exception(
55
+ f"Error: Column '{column_name}' in table '{table_name}' is "
56
+ 'not nullable but should be nullable according to the specification.'
57
+ )
58
+
59
+ for column_name, column_spec in table_spec['columns'].items():
60
+ if column_spec.get('foreign_key'):
61
+ foreign_key_spec = column_spec['foreign_key']
62
+ if actual_schema['tables'][table_name]['columns'][column_name].get('foreign_key'):
63
+ fk = actual_schema['tables'][table_name]['columns'][column_name]['foreign_key']
64
+ if (
65
+ fk
66
+ and foreign_key_spec
67
+ and fk['table'] == foreign_key_spec['table']
68
+ and fk['column'] == foreign_key_spec['column']
69
+ ):
70
+ raise Exception(
71
+ f"Error: Column '{column_name}' in table '{table_name}' does "
72
+ 'not have the correct foreign key constraint.'
73
+ )
74
+ else:
75
+ raise Exception(
76
+ f"Error: Column '{column_name}' in table '{table_name}' does "
77
+ 'not have a foreign key constraint.'
78
+ )
79
+
80
+
81
+ def validate_sqlite_v1(schema: SqliteV1DatabaseSchema, sqlite_file: Path):
82
+ actual_schema = get_actual_schema(sqlite_file)
83
+ print(schema)
84
+ print(actual_schema)
85
+ verify_schema(specification=schema, actual_schema=actual_schema)
@@ -0,0 +1,116 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Callable, Iterator
5
+
6
+ import biolib.api as api
7
+ from biolib._internal.file_utils import get_files_and_size_of_directory, get_iterable_zip_stream
8
+ from biolib.biolib_errors import BioLibError
9
+ from biolib.biolib_logging import logger
10
+ from biolib.typing_utils import List, Optional, Tuple
11
+ from biolib.utils import MultiPartUploader
12
+
13
+
14
+ def _upload_from_iterator(
15
+ payload_iterator: Iterator[bytes],
16
+ payload_size_in_bytes: int,
17
+ resource_uuid: Optional[str] = None,
18
+ resource_version_uuid: Optional[str] = None,
19
+ use_process_pool: bool = False,
20
+ publish: bool = False,
21
+ on_progress: Optional[Callable[[int, int], None]] = None,
22
+ ) -> str:
23
+ if (resource_uuid is None) == (resource_version_uuid is None):
24
+ raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
25
+
26
+ if resource_version_uuid is None:
27
+ response = api.client.post(
28
+ path='/lfs/versions/',
29
+ data={'resource_uuid': resource_uuid},
30
+ )
31
+ resource_version_uuid = response.json()['uuid']
32
+
33
+ multipart_uploader = MultiPartUploader(
34
+ use_process_pool=use_process_pool,
35
+ get_presigned_upload_url_request={
36
+ 'headers': None,
37
+ 'requires_biolib_auth': True,
38
+ 'path': f'/lfs/versions/{resource_version_uuid}/presigned_upload_url/',
39
+ },
40
+ complete_upload_request={
41
+ 'headers': None,
42
+ 'requires_biolib_auth': True,
43
+ 'path': f'/lfs/versions/{resource_version_uuid}/complete_upload/',
44
+ },
45
+ on_progress=on_progress,
46
+ )
47
+ multipart_uploader.upload(payload_iterator=payload_iterator, payload_size_in_bytes=payload_size_in_bytes)
48
+
49
+ if publish:
50
+ api.client.patch(
51
+ path=f'/resources/versions/{resource_version_uuid}/',
52
+ data={'state': 'published', 'set_as_active': True},
53
+ )
54
+
55
+ return resource_version_uuid
56
+
57
+
58
+ def validate_data_path_and_get_files_and_size_of_directory(data_path: str) -> Tuple[List[str], int]:
59
+ assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
60
+
61
+ if os.path.realpath(data_path) == '/':
62
+ raise BioLibError('Pushing your root directory is not possible')
63
+
64
+ original_working_dir = os.getcwd()
65
+ os.chdir(data_path)
66
+ files_to_zip, data_size_in_bytes = get_files_and_size_of_directory(directory=os.getcwd())
67
+ os.chdir(original_working_dir)
68
+
69
+ if data_size_in_bytes > 4_500_000_000_000:
70
+ raise BioLibError('Attempted to push directory with a size larger than the limit of 4.5 TB')
71
+
72
+ return files_to_zip, data_size_in_bytes
73
+
74
+
75
+ def push_data_path(
76
+ data_path: str,
77
+ data_size_in_bytes: int,
78
+ files_to_zip: List[str],
79
+ resource_uuid: Optional[str] = None,
80
+ resource_version_uuid: Optional[str] = None,
81
+ chunk_size_in_mb: Optional[int] = None,
82
+ publish: bool = False,
83
+ ) -> str:
84
+ if (resource_uuid is None) == (resource_version_uuid is None):
85
+ raise ValueError('Must provide exactly one of resource_uuid or resource_version_uuid')
86
+
87
+ original_working_dir = os.getcwd()
88
+ os.chdir(data_path)
89
+
90
+ min_chunk_size_bytes = 10_000_000
91
+ chunk_size_in_bytes: int
92
+ if chunk_size_in_mb:
93
+ chunk_size_in_bytes = chunk_size_in_mb * 1_000_000 # Convert megabytes to bytes
94
+ if chunk_size_in_bytes < min_chunk_size_bytes:
95
+ logger.warning('Specified chunk size is too small, using minimum of 10 MB instead.')
96
+ chunk_size_in_bytes = min_chunk_size_bytes
97
+ else:
98
+ # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
99
+ chunk_size_in_bytes = max(min_chunk_size_bytes, int(data_size_in_bytes / 9_000))
100
+
101
+ data_size_in_mb = round(data_size_in_bytes / 10**6)
102
+ logger.info(f'Zipping {len(files_to_zip)} files, in total ~{data_size_in_mb}mb of data')
103
+
104
+ iterable_zip_stream = get_iterable_zip_stream(files=files_to_zip, chunk_size=chunk_size_in_bytes)
105
+
106
+ new_resource_version_uuid = _upload_from_iterator(
107
+ payload_iterator=iterable_zip_stream,
108
+ payload_size_in_bytes=data_size_in_bytes,
109
+ resource_uuid=resource_uuid,
110
+ resource_version_uuid=resource_version_uuid,
111
+ use_process_pool=True,
112
+ publish=publish,
113
+ )
114
+
115
+ os.chdir(original_working_dir)
116
+ return new_resource_version_uuid
@@ -0,0 +1,43 @@
1
+ import os
2
+ from datetime import datetime, timedelta, timezone
3
+ from urllib.parse import urlparse
4
+
5
+ from biolib._shared.types import ResourceDetailedDict
6
+ from biolib.api import client as api_client
7
+ from biolib.biolib_binary_format.utils import RemoteEndpoint
8
+ from biolib.biolib_logging import logger
9
+ from biolib.typing_utils import Optional
10
+
11
+
12
+ class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
13
+ def __init__(self, uri: str):
14
+ self._uri: str = uri
15
+ self._expires_at: Optional[datetime] = None
16
+ self._presigned_url: Optional[str] = None
17
+
18
+ def get_remote_url(self) -> str:
19
+ if not self._presigned_url or not self._expires_at or datetime.now(timezone.utc) > self._expires_at:
20
+ resource_response: ResourceDetailedDict = api_client.get(
21
+ path='/resource/',
22
+ params={'uri': self._uri},
23
+ ).json()
24
+
25
+ version = resource_response.get('version')
26
+ assets = version.get('assets') if version else None
27
+ if not assets:
28
+ raise Exception(f'Resource "{self._uri}" has no downloadable assets')
29
+
30
+ download_url = assets['download_url']
31
+ app_caller_proxy_job_storage_base_url = os.getenv('BIOLIB_CLOUD_JOB_STORAGE_BASE_URL', '')
32
+ if app_caller_proxy_job_storage_base_url:
33
+ parsed_url = urlparse(download_url)
34
+ self._presigned_url = f'{app_caller_proxy_job_storage_base_url}{parsed_url.path}?{parsed_url.query}'
35
+ else:
36
+ self._presigned_url = download_url
37
+
38
+ self._expires_at = datetime.now(timezone.utc) + timedelta(minutes=8)
39
+ logger.debug(
40
+ f'DataRecord "{self._uri}" fetched presigned URL ' f'with expiry at {self._expires_at.isoformat()}'
41
+ )
42
+
43
+ return self._presigned_url
@@ -0,0 +1,5 @@
1
+ from biolib.biolib_errors import BioLibError
2
+
3
+
4
+ class AuthenticationError(BioLibError):
5
+ """Raised when authentication is required but user is not signed in."""
@@ -0,0 +1,125 @@
1
+ import hashlib
2
+ import io
3
+ import os
4
+ import posixpath
5
+ import zipfile as zf
6
+ from pathlib import Path
7
+
8
+ from biolib.typing_utils import Iterator, List, Tuple
9
+
10
+
11
+ def get_files_and_size_of_directory(directory: str) -> Tuple[List[str], int]:
12
+ data_size = 0
13
+ file_list: List[str] = []
14
+
15
+ for path, _, files in os.walk(directory):
16
+ for file in files:
17
+ file_path = os.path.join(path, file)
18
+ if os.path.islink(file_path):
19
+ continue # skip symlinks
20
+
21
+ relative_file_path = file_path[len(directory) + 1 :] # +1 to remove starting slash
22
+ file_list.append(relative_file_path)
23
+ data_size += os.path.getsize(file_path)
24
+
25
+ return file_list, data_size
26
+
27
+
28
+ def get_iterable_zip_stream(files: List[str], chunk_size: int) -> Iterator[bytes]:
29
+ class ChunkedIOBuffer(io.RawIOBase):
30
+ def __init__(self, chunk_size: int):
31
+ super().__init__()
32
+ self.chunk_size = chunk_size
33
+ self.tmp_data = bytearray()
34
+
35
+ def get_buffer_size(self):
36
+ return len(self.tmp_data)
37
+
38
+ def read_chunk(self):
39
+ chunk = bytes(self.tmp_data[: self.chunk_size])
40
+ self.tmp_data = self.tmp_data[self.chunk_size :]
41
+ return chunk
42
+
43
+ def write(self, data):
44
+ data_length = len(data)
45
+ self.tmp_data += data
46
+ return data_length
47
+
48
+ # create chunked buffer to hold data temporarily
49
+ io_buffer = ChunkedIOBuffer(chunk_size)
50
+
51
+ # create zip writer that will write to the io buffer
52
+ zip_writer = zf.ZipFile(io_buffer, mode='w') # type: ignore
53
+
54
+ for file_path in files:
55
+ # generate zip info and prepare zip pointer for writing
56
+ z_info = zf.ZipInfo.from_file(file_path)
57
+ zip_pointer = zip_writer.open(z_info, mode='w')
58
+ if Path(file_path).is_file():
59
+ # read file chunk by chunk
60
+ with open(file_path, 'br') as file_pointer:
61
+ while True:
62
+ chunk = file_pointer.read(chunk_size)
63
+ if len(chunk) == 0:
64
+ break
65
+ # write the chunk to the zip
66
+ zip_pointer.write(chunk)
67
+ # if writing the chunk caused us to go over chunk_size, flush it
68
+ if io_buffer.get_buffer_size() > chunk_size:
69
+ yield io_buffer.read_chunk()
70
+
71
+ zip_pointer.close()
72
+
73
+ # flush any remaining data in the stream (e.g. zip file meta data)
74
+ zip_writer.close()
75
+ while True:
76
+ chunk = io_buffer.read_chunk()
77
+ if len(chunk) == 0:
78
+ break
79
+ yield chunk
80
+
81
+
82
+ def path_to_renamed_path(path_str: str, prefix_with_slash: bool = True) -> str:
83
+ """
84
+ Normalize file paths consistently:
85
+ - If path contains '..' (relative path going up), convert to absolute path
86
+ - If relative path not containing '..', keep as is, but prepend / if prefix_with_slash=True
87
+ - If absolute path that is subpath of current directory, convert to relative path
88
+ - If absolute path not subpath of current directory, hash the folder path and keep filename
89
+ """
90
+ path = Path(path_str)
91
+ current_dir = Path.cwd()
92
+
93
+ if '..' in path.parts:
94
+ resolved_path = path.resolve()
95
+ try:
96
+ relative_path = resolved_path.relative_to(current_dir)
97
+ result = str(relative_path)
98
+ except ValueError:
99
+ folder_path = str(resolved_path.parent)
100
+ filename = resolved_path.name
101
+ folder_hash = hashlib.md5(folder_path.encode()).hexdigest()[:6]
102
+ result = f'/{folder_hash}/{filename}'
103
+ elif path.is_absolute():
104
+ try:
105
+ resolved_path = path.resolve()
106
+ relative_path = resolved_path.relative_to(current_dir)
107
+ result = str(relative_path)
108
+ except ValueError:
109
+ folder_path = str(path.parent)
110
+ filename = path.name
111
+ folder_hash = hashlib.md5(folder_path.encode()).hexdigest()[:6]
112
+ result = f'/{folder_hash}/{filename}'
113
+ else:
114
+ result = path_str
115
+
116
+ if prefix_with_slash:
117
+ if not result.startswith('/'):
118
+ result = '/' + result
119
+ # Normalize to handle cases like '/./mydir' -> '/mydir' and remove trailing slashes.
120
+ # Required because downstream Mappings class does exact string-prefix matching.
121
+ return posixpath.normpath(result)
122
+ else:
123
+ if result.startswith('/'):
124
+ result = result[1:]
125
+ return posixpath.normpath(result)
@@ -0,0 +1 @@
1
+ from .experiment_fuse_mount import ExperimentFuseMount
@@ -0,0 +1,209 @@
1
+ import errno
2
+ import os
3
+ import stat
4
+ from datetime import datetime, timezone
5
+ from time import time
6
+
7
+ from biolib._internal.libs.fusepy import FUSE, FuseOSError, Operations
8
+ from biolib.biolib_errors import BioLibError
9
+ from biolib.jobs import Job
10
+ from biolib.typing_utils import Dict, List, Optional, Tuple, TypedDict
11
+
12
+
13
+ class _AttributeDict(TypedDict):
14
+ st_atime: int
15
+ st_ctime: int
16
+ st_gid: int
17
+ st_mode: int
18
+ st_mtime: int
19
+ st_nlink: int
20
+ st_size: int
21
+ st_uid: int
22
+
23
+
24
+ _SUCCESS_CODE = 0
25
+
26
+
27
+ class ExperimentFuseMount(Operations):
28
+ def __init__(self, experiment):
29
+ self._experiment = experiment
30
+ self._job_names_map: Optional[Dict[str, Job]] = None
31
+ self._jobs_last_fetched_at: float = 0.0
32
+ self._mounted_at_epoch_seconds: int = int(time())
33
+
34
+ @staticmethod
35
+ def mount_experiment(experiment, mount_path: str) -> None:
36
+ FUSE(
37
+ operations=ExperimentFuseMount(experiment),
38
+ mountpoint=mount_path,
39
+ nothreads=True,
40
+ foreground=True,
41
+ allow_other=False,
42
+ )
43
+
44
+ def getattr(self, path: str, fh=None) -> _AttributeDict:
45
+ if path == '/':
46
+ return self._get_directory_attributes(timestamp_epoch_seconds=self._mounted_at_epoch_seconds)
47
+
48
+ job, path_in_job = self._parse_path(path)
49
+ job_finished_at_epoch_seconds: int = int(
50
+ datetime.fromisoformat(job.to_dict()['finished_at'].rstrip('Z')).replace(tzinfo=timezone.utc).timestamp()
51
+ )
52
+
53
+ if path_in_job == '/':
54
+ return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
55
+
56
+ try:
57
+ file = job.get_output_file(path_in_job)
58
+ return self._get_file_attributes(
59
+ timestamp_epoch_seconds=job_finished_at_epoch_seconds,
60
+ size_in_bytes=file.length,
61
+ )
62
+ except BioLibError:
63
+ # file not found
64
+ pass
65
+
66
+ file_paths_in_job = [file.path for file in job.list_output_files()]
67
+
68
+ for file_path_in_job in file_paths_in_job:
69
+ if file_path_in_job.startswith(path_in_job):
70
+ return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
71
+
72
+ raise FuseOSError(errno.ENOENT) from None # No such file or directory
73
+
74
+ def readdir(self, path: str, fh: int) -> List[str]:
75
+ directory_entries = ['.', '..']
76
+
77
+ if path == '/':
78
+ directory_entries.extend(self._get_job_names_map(refresh_jobs=True).keys())
79
+ else:
80
+ job, path_in_job = self._parse_path(path)
81
+ dir_path_in_job = '/' if path_in_job == '/' else path_in_job + '/'
82
+ depth = dir_path_in_job.count('/')
83
+ directory_entries.extend(
84
+ set(
85
+ [
86
+ file.path.split('/')[depth]
87
+ for file in job.list_output_files()
88
+ if file.path.startswith(dir_path_in_job)
89
+ ]
90
+ )
91
+ )
92
+
93
+ return directory_entries
94
+
95
+ def open(self, path: str, flags: int) -> int:
96
+ job, path_in_job = self._parse_path(path)
97
+ try:
98
+ job.get_output_file(path_in_job)
99
+ except BioLibError:
100
+ # file not found
101
+ raise FuseOSError(errno.ENOENT) from None
102
+
103
+ return 1234 # dummy file handle
104
+
105
+ def read(self, path: str, size: int, offset: int, fh: int) -> bytes:
106
+ job, path_in_job = self._parse_path(path)
107
+ try:
108
+ file = job.get_output_file(path_in_job)
109
+ except BioLibError:
110
+ raise FuseOSError(errno.ENOENT) from None # No such file or directory
111
+
112
+ return file.get_data(start=offset, length=size)
113
+
114
+ def release(self, path: str, fh: int) -> int:
115
+ return _SUCCESS_CODE
116
+
117
+ def releasedir(self, path: str, fh: int) -> int:
118
+ return _SUCCESS_CODE
119
+
120
+ def flush(self, path: str, fh: int) -> int:
121
+ return _SUCCESS_CODE
122
+
123
+ @staticmethod
124
+ def _get_directory_attributes(timestamp_epoch_seconds: int) -> _AttributeDict:
125
+ return _AttributeDict(
126
+ st_atime=timestamp_epoch_seconds,
127
+ st_ctime=timestamp_epoch_seconds,
128
+ st_gid=os.getgid(),
129
+ st_mode=stat.S_IFDIR | 0o555, # Directory that is readable and executable by owner, group, and others.
130
+ st_mtime=timestamp_epoch_seconds,
131
+ st_nlink=1,
132
+ st_size=1,
133
+ st_uid=os.getuid(),
134
+ )
135
+
136
+ @staticmethod
137
+ def _get_file_attributes(timestamp_epoch_seconds: int, size_in_bytes: int) -> _AttributeDict:
138
+ return _AttributeDict(
139
+ st_atime=timestamp_epoch_seconds,
140
+ st_ctime=timestamp_epoch_seconds,
141
+ st_gid=os.getgid(),
142
+ st_mode=stat.S_IFREG | 0o444, # Regular file with read permissions for owner, group, and others.
143
+ st_mtime=timestamp_epoch_seconds,
144
+ st_nlink=1,
145
+ st_size=size_in_bytes,
146
+ st_uid=os.getuid(),
147
+ )
148
+
149
+ def _get_job_names_map(self, refresh_jobs=False) -> Dict[str, Job]:
150
+ current_time = time()
151
+ if not self._job_names_map or (current_time - self._jobs_last_fetched_at > 1 and refresh_jobs):
152
+ self._jobs_last_fetched_at = current_time
153
+ self._job_names_map = {job.get_name(): job for job in self._experiment.get_jobs(status='completed')}
154
+
155
+ return self._job_names_map
156
+
157
+ def _parse_path(self, path: str) -> Tuple[Job, str]:
158
+ path_splitted = path.split('/')
159
+ job_name = path_splitted[1]
160
+ path_in_job = '/' + '/'.join(path_splitted[2:])
161
+ job = self._get_job_names_map().get(job_name)
162
+ if not job:
163
+ raise FuseOSError(errno.ENOENT) # No such file or directory
164
+
165
+ return job, path_in_job
166
+
167
+ # ----------------------------------- File system methods not implemented below -----------------------------------
168
+
169
+ def chmod(self, path, mode):
170
+ raise FuseOSError(errno.EACCES)
171
+
172
+ def chown(self, path, uid, gid):
173
+ raise FuseOSError(errno.EACCES)
174
+
175
+ def mknod(self, path, mode, dev):
176
+ raise FuseOSError(errno.EACCES)
177
+
178
+ def rmdir(self, path):
179
+ raise FuseOSError(errno.EACCES)
180
+
181
+ def mkdir(self, path, mode):
182
+ raise FuseOSError(errno.EACCES)
183
+
184
+ def unlink(self, path):
185
+ raise FuseOSError(errno.EACCES)
186
+
187
+ def symlink(self, target, source):
188
+ raise FuseOSError(errno.EACCES)
189
+
190
+ def rename(self, old, new):
191
+ raise FuseOSError(errno.EACCES)
192
+
193
+ def link(self, target, source):
194
+ raise FuseOSError(errno.EACCES)
195
+
196
+ def utimens(self, path, times=None):
197
+ raise FuseOSError(errno.EACCES)
198
+
199
+ def create(self, path, mode, fi=None):
200
+ raise FuseOSError(errno.EACCES)
201
+
202
+ def write(self, path, data, offset, fh):
203
+ raise FuseOSError(errno.EACCES)
204
+
205
+ def truncate(self, path, length, fh=None):
206
+ raise FuseOSError(errno.EACCES)
207
+
208
+ def fsync(self, path, datasync, fh):
209
+ raise FuseOSError(errno.EACCES)