pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. biolib/__init__.py +357 -11
  2. biolib/_data_record/data_record.py +380 -0
  3. biolib/_index/__init__.py +0 -0
  4. biolib/_index/index.py +55 -0
  5. biolib/_index/query_result.py +103 -0
  6. biolib/_internal/__init__.py +0 -0
  7. biolib/_internal/add_copilot_prompts.py +58 -0
  8. biolib/_internal/add_gui_files.py +81 -0
  9. biolib/_internal/data_record/__init__.py +1 -0
  10. biolib/_internal/data_record/data_record.py +85 -0
  11. biolib/_internal/data_record/push_data.py +116 -0
  12. biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
  13. biolib/_internal/errors.py +5 -0
  14. biolib/_internal/file_utils.py +125 -0
  15. biolib/_internal/fuse_mount/__init__.py +1 -0
  16. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  17. biolib/_internal/http_client.py +159 -0
  18. biolib/_internal/lfs/__init__.py +1 -0
  19. biolib/_internal/lfs/cache.py +51 -0
  20. biolib/_internal/libs/__init__.py +1 -0
  21. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  22. biolib/_internal/push_application.py +488 -0
  23. biolib/_internal/runtime.py +22 -0
  24. biolib/_internal/string_utils.py +13 -0
  25. biolib/_internal/templates/__init__.py +1 -0
  26. biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
  27. biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
  28. biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
  29. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  30. biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
  31. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  32. biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
  33. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  34. biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
  35. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  36. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  37. biolib/_internal/templates/gui_template/App.tsx +53 -0
  38. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  39. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  40. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  41. biolib/_internal/templates/gui_template/index.css +5 -0
  42. biolib/_internal/templates/gui_template/index.html +13 -0
  43. biolib/_internal/templates/gui_template/index.tsx +10 -0
  44. biolib/_internal/templates/gui_template/package.json +27 -0
  45. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  46. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  47. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  48. biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
  49. biolib/_internal/templates/init_template/Dockerfile +14 -0
  50. biolib/_internal/templates/init_template/requirements.txt +1 -0
  51. biolib/_internal/templates/init_template/run.py +12 -0
  52. biolib/_internal/templates/init_template/run.sh +4 -0
  53. biolib/_internal/templates/templates.py +25 -0
  54. biolib/_internal/tree_utils.py +106 -0
  55. biolib/_internal/utils/__init__.py +65 -0
  56. biolib/_internal/utils/auth.py +46 -0
  57. biolib/_internal/utils/job_url.py +33 -0
  58. biolib/_internal/utils/multinode.py +263 -0
  59. biolib/_runtime/runtime.py +157 -0
  60. biolib/_session/session.py +44 -0
  61. biolib/_shared/__init__.py +0 -0
  62. biolib/_shared/types/__init__.py +74 -0
  63. biolib/_shared/types/account.py +12 -0
  64. biolib/_shared/types/account_member.py +8 -0
  65. biolib/_shared/types/app.py +9 -0
  66. biolib/_shared/types/data_record.py +40 -0
  67. biolib/_shared/types/experiment.py +32 -0
  68. biolib/_shared/types/file_node.py +17 -0
  69. biolib/_shared/types/push.py +6 -0
  70. biolib/_shared/types/resource.py +37 -0
  71. biolib/_shared/types/resource_deploy_key.py +11 -0
  72. biolib/_shared/types/resource_permission.py +14 -0
  73. biolib/_shared/types/resource_version.py +19 -0
  74. biolib/_shared/types/result.py +14 -0
  75. biolib/_shared/types/typing.py +10 -0
  76. biolib/_shared/types/user.py +19 -0
  77. biolib/_shared/utils/__init__.py +7 -0
  78. biolib/_shared/utils/resource_uri.py +75 -0
  79. biolib/api/__init__.py +6 -0
  80. biolib/api/client.py +168 -0
  81. biolib/app/app.py +252 -49
  82. biolib/app/search_apps.py +45 -0
  83. biolib/biolib_api_client/api_client.py +126 -31
  84. biolib/biolib_api_client/app_types.py +24 -4
  85. biolib/biolib_api_client/auth.py +31 -8
  86. biolib/biolib_api_client/biolib_app_api.py +147 -52
  87. biolib/biolib_api_client/biolib_job_api.py +161 -141
  88. biolib/biolib_api_client/job_types.py +21 -5
  89. biolib/biolib_api_client/lfs_types.py +7 -23
  90. biolib/biolib_api_client/user_state.py +56 -0
  91. biolib/biolib_binary_format/__init__.py +1 -4
  92. biolib/biolib_binary_format/file_in_container.py +105 -0
  93. biolib/biolib_binary_format/module_input.py +24 -7
  94. biolib/biolib_binary_format/module_output_v2.py +149 -0
  95. biolib/biolib_binary_format/remote_endpoints.py +34 -0
  96. biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
  97. biolib/biolib_binary_format/saved_job.py +3 -2
  98. biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
  99. biolib/biolib_binary_format/system_status_update.py +3 -2
  100. biolib/biolib_binary_format/utils.py +175 -0
  101. biolib/biolib_docker_client/__init__.py +11 -2
  102. biolib/biolib_errors.py +36 -0
  103. biolib/biolib_logging.py +27 -10
  104. biolib/cli/__init__.py +38 -0
  105. biolib/cli/auth.py +46 -0
  106. biolib/cli/data_record.py +164 -0
  107. biolib/cli/index.py +32 -0
  108. biolib/cli/init.py +421 -0
  109. biolib/cli/lfs.py +101 -0
  110. biolib/cli/push.py +50 -0
  111. biolib/cli/run.py +63 -0
  112. biolib/cli/runtime.py +14 -0
  113. biolib/cli/sdk.py +16 -0
  114. biolib/cli/start.py +56 -0
  115. biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
  116. biolib/compute_node/job_worker/cache_state.py +66 -88
  117. biolib/compute_node/job_worker/cache_types.py +1 -6
  118. biolib/compute_node/job_worker/docker_image_cache.py +112 -37
  119. biolib/compute_node/job_worker/executors/__init__.py +0 -3
  120. biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
  121. biolib/compute_node/job_worker/executors/docker_types.py +9 -1
  122. biolib/compute_node/job_worker/executors/types.py +19 -9
  123. biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
  124. biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
  125. biolib/compute_node/job_worker/job_storage.py +108 -0
  126. biolib/compute_node/job_worker/job_worker.py +397 -212
  127. biolib/compute_node/job_worker/large_file_system.py +87 -38
  128. biolib/compute_node/job_worker/network_alloc.py +99 -0
  129. biolib/compute_node/job_worker/network_buffer.py +240 -0
  130. biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
  131. biolib/compute_node/job_worker/utils.py +9 -24
  132. biolib/compute_node/remote_host_proxy.py +400 -98
  133. biolib/compute_node/utils.py +31 -9
  134. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  135. biolib/compute_node/webserver/proxy_utils.py +28 -0
  136. biolib/compute_node/webserver/webserver.py +130 -44
  137. biolib/compute_node/webserver/webserver_types.py +2 -6
  138. biolib/compute_node/webserver/webserver_utils.py +77 -12
  139. biolib/compute_node/webserver/worker_thread.py +183 -42
  140. biolib/experiments/__init__.py +0 -0
  141. biolib/experiments/experiment.py +356 -0
  142. biolib/jobs/__init__.py +1 -0
  143. biolib/jobs/job.py +741 -0
  144. biolib/jobs/job_result.py +185 -0
  145. biolib/jobs/types.py +50 -0
  146. biolib/py.typed +0 -0
  147. biolib/runtime/__init__.py +14 -0
  148. biolib/sdk/__init__.py +91 -0
  149. biolib/tables.py +34 -0
  150. biolib/typing_utils.py +2 -7
  151. biolib/user/__init__.py +1 -0
  152. biolib/user/sign_in.py +54 -0
  153. biolib/utils/__init__.py +162 -0
  154. biolib/utils/cache_state.py +94 -0
  155. biolib/utils/multipart_uploader.py +194 -0
  156. biolib/utils/seq_util.py +150 -0
  157. biolib/utils/zip/remote_zip.py +640 -0
  158. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  159. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  160. {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  161. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  162. README.md +0 -17
  163. biolib/app/app_result.py +0 -68
  164. biolib/app/utils.py +0 -62
  165. biolib/biolib-js/0-biolib.worker.js +0 -1
  166. biolib/biolib-js/1-biolib.worker.js +0 -1
  167. biolib/biolib-js/2-biolib.worker.js +0 -1
  168. biolib/biolib-js/3-biolib.worker.js +0 -1
  169. biolib/biolib-js/4-biolib.worker.js +0 -1
  170. biolib/biolib-js/5-biolib.worker.js +0 -1
  171. biolib/biolib-js/6-biolib.worker.js +0 -1
  172. biolib/biolib-js/index.html +0 -10
  173. biolib/biolib-js/main-biolib.js +0 -1
  174. biolib/biolib_api_client/biolib_account_api.py +0 -21
  175. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
  176. biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
  177. biolib/biolib_binary_format/module_output.py +0 -58
  178. biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
  179. biolib/biolib_push.py +0 -114
  180. biolib/cli.py +0 -203
  181. biolib/cli_utils.py +0 -273
  182. biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
  183. biolib/compute_node/enclave/__init__.py +0 -2
  184. biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
  185. biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
  186. biolib/compute_node/job_worker/executors/base_executor.py +0 -18
  187. biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
  188. biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
  189. biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
  190. biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
  191. biolib/lfs.py +0 -196
  192. biolib/pyppeteer/.circleci/config.yml +0 -100
  193. biolib/pyppeteer/.coveragerc +0 -3
  194. biolib/pyppeteer/.gitignore +0 -89
  195. biolib/pyppeteer/.pre-commit-config.yaml +0 -28
  196. biolib/pyppeteer/CHANGES.md +0 -253
  197. biolib/pyppeteer/CONTRIBUTING.md +0 -26
  198. biolib/pyppeteer/LICENSE +0 -12
  199. biolib/pyppeteer/README.md +0 -137
  200. biolib/pyppeteer/docs/Makefile +0 -177
  201. biolib/pyppeteer/docs/_static/custom.css +0 -28
  202. biolib/pyppeteer/docs/_templates/layout.html +0 -10
  203. biolib/pyppeteer/docs/changes.md +0 -1
  204. biolib/pyppeteer/docs/conf.py +0 -299
  205. biolib/pyppeteer/docs/index.md +0 -21
  206. biolib/pyppeteer/docs/make.bat +0 -242
  207. biolib/pyppeteer/docs/reference.md +0 -211
  208. biolib/pyppeteer/docs/server.py +0 -60
  209. biolib/pyppeteer/poetry.lock +0 -1699
  210. biolib/pyppeteer/pyppeteer/__init__.py +0 -135
  211. biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
  212. biolib/pyppeteer/pyppeteer/browser.py +0 -401
  213. biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
  214. biolib/pyppeteer/pyppeteer/command.py +0 -22
  215. biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
  216. biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
  217. biolib/pyppeteer/pyppeteer/coverage.py +0 -346
  218. biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
  219. biolib/pyppeteer/pyppeteer/dialog.py +0 -79
  220. biolib/pyppeteer/pyppeteer/domworld.py +0 -597
  221. biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
  222. biolib/pyppeteer/pyppeteer/errors.py +0 -48
  223. biolib/pyppeteer/pyppeteer/events.py +0 -63
  224. biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
  225. biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
  226. biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
  227. biolib/pyppeteer/pyppeteer/helpers.py +0 -245
  228. biolib/pyppeteer/pyppeteer/input.py +0 -371
  229. biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
  230. biolib/pyppeteer/pyppeteer/launcher.py +0 -683
  231. biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
  232. biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
  233. biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
  234. biolib/pyppeteer/pyppeteer/multimap.py +0 -82
  235. biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
  236. biolib/pyppeteer/pyppeteer/options.py +0 -8
  237. biolib/pyppeteer/pyppeteer/page.py +0 -1728
  238. biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
  239. biolib/pyppeteer/pyppeteer/target.py +0 -147
  240. biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
  241. biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
  242. biolib/pyppeteer/pyppeteer/tracing.py +0 -93
  243. biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
  244. biolib/pyppeteer/pyppeteer/util.py +0 -18
  245. biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
  246. biolib/pyppeteer/pyppeteer/worker.py +0 -101
  247. biolib/pyppeteer/pyproject.toml +0 -97
  248. biolib/pyppeteer/spell.txt +0 -137
  249. biolib/pyppeteer/tox.ini +0 -72
  250. biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
  251. biolib/start_cli.py +0 -7
  252. biolib/utils.py +0 -47
  253. biolib/validators/validate_app_version.py +0 -183
  254. biolib/validators/validate_argument.py +0 -134
  255. biolib/validators/validate_module.py +0 -323
  256. biolib/validators/validate_zip_file.py +0 -40
  257. biolib/validators/validator_utils.py +0 -103
  258. pybiolib-0.2.951.dist-info/LICENSE +0 -21
  259. pybiolib-0.2.951.dist-info/METADATA +0 -61
  260. pybiolib-0.2.951.dist-info/RECORD +0 -153
  261. pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
  262. /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
@@ -1,35 +1,19 @@
1
- from biolib.typing_utils import TypedDict, List
1
+ from biolib.typing_utils import TypedDict
2
2
 
3
3
 
4
- class LargeFileSystemVersion(TypedDict):
4
+ class DataRecordVersion(TypedDict):
5
5
  presigned_download_url: str
6
6
  size_bytes: int
7
7
  uri: str
8
8
  uuid: str
9
9
 
10
10
 
11
- class LargeFileSystem(TypedDict):
11
+ class DataRecordInfo(TypedDict):
12
12
  uri: str
13
13
  uuid: str
14
14
 
15
15
 
16
- class LfsVersionPresignedUploadUrlResponse(TypedDict):
17
- presigned_upload_url: str
18
-
19
-
20
- class LfsUploadPartMetadata(TypedDict):
21
- PartNumber: int
22
- ETag: str
23
-
24
-
25
- class ZipProxyFileMetadata(TypedDict):
26
- path: str
27
- size_bytes: int
28
-
29
-
30
- class ZipProxyFileListResponse(TypedDict):
31
- files: List[ZipProxyFileMetadata]
32
-
33
-
34
- class LargeFileSystemVersionMetadata(LargeFileSystemVersion):
35
- files: List[ZipProxyFileMetadata]
16
+ class DataRecordVersionInfo(TypedDict):
17
+ resource_uri: str
18
+ resource_uuid: str
19
+ resource_version_uuid: str
@@ -0,0 +1,56 @@
1
+ from biolib.biolib_logging import logger_no_user_data
2
+ from biolib.typing_utils import Optional, TypedDict
3
+ from biolib.utils.cache_state import CacheState
4
+
5
+ # TODO: Save job keys in the user state instead of a separate state file
6
+ # UuidStr = str
7
+ # class JobStateType(TypedDict):
8
+ # job_uuid: UuidStr
9
+ # aes_key: Optional[str]
10
+
11
+
12
+ class UserStateType(TypedDict):
13
+ refresh_token: Optional[str]
14
+ # jobs: Dict[UuidStr, JobStateType]
15
+
16
+
17
+ class UserState(CacheState[UserStateType]):
18
+ def __init__(self) -> None:
19
+ super().__init__(fail_fast_on_lock_acquire=True)
20
+ self._is_in_memory_only: bool = False
21
+
22
+ @property
23
+ def _state_path(self) -> str:
24
+ return f'{self._user_cache_dir}/user-state.json'
25
+
26
+ def _get_default_state(self) -> UserStateType:
27
+ return UserStateType(refresh_token=None)
28
+
29
+ def __enter__(self) -> UserStateType:
30
+ if self._is_in_memory_only:
31
+ if self._state is None:
32
+ self._state = self._get_default_state()
33
+ return self._state
34
+ try:
35
+ return super().__enter__()
36
+ except Exception as error:
37
+ logger_no_user_data.warning(
38
+ f'UserState: Could not access state file, continuing with in-memory state only. '
39
+ f'Login state will not persist across Python processes. Error: {error}'
40
+ )
41
+ self._is_in_memory_only = True
42
+ if self._state is None:
43
+ self._state = self._get_default_state()
44
+ return self._state
45
+
46
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
47
+ if self._is_in_memory_only:
48
+ return
49
+ try:
50
+ super().__exit__(exc_type, exc_val, exc_tb)
51
+ except Exception as error:
52
+ logger_no_user_data.warning(
53
+ f'UserState: Could not write state file. '
54
+ f'Login state will not persist across Python processes. Error: {error}'
55
+ )
56
+ self._is_in_memory_only = True
@@ -1,9 +1,6 @@
1
1
  from .module_input import *
2
- from .module_output import *
3
- from .aes_encrypted_package import *
4
- from .attestation_document import *
5
- from .rsa_encrypted_aes_package import *
6
2
  from .system_status_update import *
7
3
  from .saved_job import *
8
4
  from .system_exception import *
9
5
  from .base_bbf_package import *
6
+ from .module_output_v2 import *
@@ -0,0 +1,105 @@
1
+ import os.path
2
+ import tarfile
3
+ import tempfile
4
+
5
+ from docker.models.containers import Container # type: ignore
6
+
7
+ from biolib.typing_utils import Iterable, Optional
8
+
9
+
10
+ class FileInContainer:
11
+ def __init__(self, container: Container, path_in_container: str, overlay_upper_dir_path: Optional[str]):
12
+ self._container: Container = container
13
+ self._path_on_disk: Optional[str] = overlay_upper_dir_path + path_in_container if overlay_upper_dir_path \
14
+ else None
15
+
16
+ self._path_in_container: str = path_in_container
17
+ self._path: str = path_in_container
18
+ self._buffered_file_data: Optional[bytes] = None
19
+
20
+ def __repr__(self) -> str:
21
+ return f'FileInContainer({self.path})'
22
+
23
+ @property
24
+ def path(self) -> str:
25
+ return self._path
26
+
27
+ @path.setter
28
+ def path(self, value: str) -> None:
29
+ self._path = value
30
+
31
+ def is_file(self) -> bool:
32
+ if self._path_on_disk:
33
+ return os.path.isfile(self._path_on_disk)
34
+ else:
35
+ tmp_file = self._get_temp_file_from_container_via_tar()
36
+ if tmp_file:
37
+ os.remove(tmp_file)
38
+ return True
39
+ else:
40
+ return False
41
+
42
+ def get_data_size_in_bytes(self) -> int:
43
+ if self._path_on_disk:
44
+ return os.path.getsize(self._path_on_disk)
45
+ else:
46
+ tmp_file = self._get_temp_file_from_container_via_tar()
47
+ if tmp_file:
48
+ file_length = os.path.getsize(tmp_file)
49
+ os.remove(tmp_file)
50
+ return file_length
51
+ else:
52
+ return 0
53
+
54
+ def get_data_stream(self) -> Iterable[bytes]:
55
+ if self._path_on_disk:
56
+ with open(self._path_on_disk, mode='rb') as file:
57
+ while True:
58
+ chunk = file.read(1_000_000)
59
+ if not chunk:
60
+ return
61
+
62
+ yield chunk
63
+ else:
64
+ tmp_file = self._get_temp_file_from_container_via_tar()
65
+ if not tmp_file:
66
+ yield bytes()
67
+ return
68
+ else:
69
+ file = open(tmp_file, mode='rb')
70
+ while True:
71
+ chunk = file.read(1_000_000)
72
+ if not chunk:
73
+ file.close()
74
+ os.remove(tmp_file)
75
+ return
76
+
77
+ yield chunk
78
+
79
+ def _get_temp_file_from_container_via_tar(self) -> Optional[str]:
80
+ with tempfile.NamedTemporaryFile(mode='wb', delete=True) as tmp_io:
81
+ stream, _ = self._container.get_archive(path=self._path_in_container)
82
+ for chunk in stream:
83
+ tmp_io.write(chunk)
84
+
85
+ tmp_io.seek(0)
86
+ with tarfile.open(tmp_io.name) as tar:
87
+ members = tar.getmembers()
88
+ file_members = [member for member in members if member.isfile()]
89
+ if not file_members:
90
+ # Path was not a file
91
+ return None
92
+
93
+ assert len(file_members) == 1
94
+ file_obj = tar.extractfile(member=file_members[0])
95
+ if not file_obj:
96
+ # Path was not a file
97
+ return None
98
+ tmp_io_2 = tempfile.NamedTemporaryFile(mode='wb', delete=False)
99
+ while True:
100
+ chunk = file_obj.read(1_000_000)
101
+ if not chunk:
102
+ break
103
+ tmp_io_2.write(chunk)
104
+ tmp_io_2.close()
105
+ return tmp_io_2.name
@@ -1,4 +1,12 @@
1
1
  from biolib.biolib_binary_format.base_bbf_package import BioLibBinaryFormatBasePackage
2
+ from biolib.biolib_logging import logger
3
+ from biolib.typing_utils import TypedDict, Dict, List
4
+
5
+
6
+ class ModuleInputDict(TypedDict):
7
+ stdin: bytes
8
+ files: Dict[str, bytes]
9
+ arguments: List[str]
2
10
 
3
11
 
4
12
  class ModuleInput(BioLibBinaryFormatBasePackage):
@@ -6,7 +14,11 @@ class ModuleInput(BioLibBinaryFormatBasePackage):
6
14
  super().__init__(bbf)
7
15
  self.package_type = 1
8
16
 
9
- def serialize(self, stdin, arguments, files):
17
+ def serialize(self, stdin, arguments, files) -> bytes:
18
+ for path in files.keys():
19
+ if '//' in path:
20
+ raise ValueError(f"File path '{path}' contains double slashes which are not allowed")
21
+
10
22
  bbf_data = bytearray()
11
23
  bbf_data.extend(self.version.to_bytes(1, 'big'))
12
24
  bbf_data.extend(self.package_type.to_bytes(1, 'big'))
@@ -22,19 +34,21 @@ class ModuleInput(BioLibBinaryFormatBasePackage):
22
34
  bbf_data.extend(stdin)
23
35
 
24
36
  for argument in arguments:
25
- bbf_data.extend(len(argument).to_bytes(2, 'big'))
26
- bbf_data.extend(argument.encode())
37
+ encoded_argument = argument.encode()
38
+ bbf_data.extend(len(encoded_argument).to_bytes(2, 'big'))
39
+ bbf_data.extend(encoded_argument)
27
40
 
28
41
  for path, data in files.items():
29
- bbf_data.extend(len(path.encode()).to_bytes(4, 'big'))
42
+ encoded_path = path.encode()
43
+ bbf_data.extend(len(encoded_path).to_bytes(4, 'big'))
30
44
  bbf_data.extend(len(data).to_bytes(8, 'big'))
31
45
 
32
- bbf_data.extend(path.encode())
46
+ bbf_data.extend(encoded_path)
33
47
  bbf_data.extend(data)
34
48
 
35
49
  return bbf_data
36
50
 
37
- def deserialize(self):
51
+ def deserialize(self) -> ModuleInputDict:
38
52
  version = self.get_data(1, output_type='int')
39
53
  package_type = self.get_data(1, output_type='int')
40
54
  self.check_version_and_type(version=version, package_type=package_type, expected_package_type=self.package_type)
@@ -58,6 +72,9 @@ class ModuleInput(BioLibBinaryFormatBasePackage):
58
72
  data_len = self.get_data(8, output_type='int')
59
73
  path = self.get_data(path_len, output_type='str')
60
74
  data = self.get_data(data_len)
75
+ if '//' in path:
76
+ # TODO: Raise ValueError here once backwards compatibility period is over
77
+ logger.warning(f"File path '{path}' contains double slashes which are not allowed")
61
78
  files[path] = bytes(data)
62
79
 
63
- return {'stdin': stdin, 'arguments': arguments, 'files': files}
80
+ return ModuleInputDict(stdin=stdin, arguments=arguments, files=files)
@@ -0,0 +1,149 @@
1
+ from biolib.biolib_binary_format import BioLibBinaryFormatBasePackage
2
+ from biolib.biolib_binary_format.utils import IndexableBuffer, InMemoryIndexableBuffer, LazyLoadedFile
3
+ from biolib.biolib_binary_format.file_in_container import FileInContainer
4
+ from biolib.typing_utils import TypedDict, List, Optional
5
+
6
+
7
+ class Metadata(TypedDict):
8
+ version: int
9
+ type: int
10
+ stdout_length: int
11
+ stderr_length: int
12
+ files_info_length: int
13
+ files_data_length: int
14
+ exit_code: int
15
+
16
+
17
+ class ModuleOutputV2(BioLibBinaryFormatBasePackage):
18
+ _version = 1
19
+ _type = 11
20
+ _metadata_byte_lengths = dict(
21
+ # Note: order is important
22
+ version=1,
23
+ type=1,
24
+ stdout_length=8,
25
+ stderr_length=8,
26
+ files_info_length=8,
27
+ files_data_length=8,
28
+ exit_code=2,
29
+ )
30
+ _metadata_length = sum(_metadata_byte_lengths.values())
31
+ _file_path_length_bytes = 4
32
+ _file_data_length_bytes = 8
33
+
34
+ def __init__(self, buffer: IndexableBuffer):
35
+ super().__init__()
36
+ self._buffer = buffer
37
+
38
+ self._metadata: Optional[Metadata] = None
39
+ self._stdout: Optional[bytes] = None
40
+ self._stderr: Optional[bytes] = None
41
+ self._files: Optional[List[LazyLoadedFile]] = None
42
+
43
+ @property
44
+ def buffer(self) -> IndexableBuffer:
45
+ return self._buffer
46
+
47
+ def get_exit_code(self) -> int:
48
+ metadata = self._get_metadata()
49
+ return metadata['exit_code']
50
+
51
+ def get_stdout(self) -> bytes:
52
+ if self._stdout is None:
53
+ metadata = self._get_metadata()
54
+ self._stdout = self._buffer.get_data(start=self._metadata_length, length=metadata['stdout_length'])
55
+
56
+ return self._stdout
57
+
58
+ def get_stderr(self) -> bytes:
59
+ if self._stderr is None:
60
+ metadata = self._get_metadata()
61
+ self._stderr = self._buffer.get_data(
62
+ start=self._metadata_length + metadata['stdout_length'],
63
+ length=metadata['stderr_length'],
64
+ )
65
+
66
+ return self._stderr
67
+
68
+ def get_files(self) -> List[LazyLoadedFile]:
69
+ metadata = self._get_metadata()
70
+ if self._files is None:
71
+ self._files = []
72
+ if metadata['files_info_length'] == 0:
73
+ return self._files
74
+
75
+ files_info_start = self._metadata_length + metadata['stdout_length'] + metadata['stderr_length']
76
+ files_info_buffer = InMemoryIndexableBuffer(
77
+ data=self._buffer.get_data(start=files_info_start, length=metadata['files_info_length'])
78
+ )
79
+
80
+ files_data_pointer = files_info_start + metadata['files_info_length']
81
+ while files_info_buffer.pointer < len(files_info_buffer):
82
+ path_length = files_info_buffer.get_data_with_pointer_as_int(self._file_path_length_bytes)
83
+ path = files_info_buffer.get_data_with_pointer_as_string(path_length)
84
+ data_length = files_info_buffer.get_data_with_pointer_as_int(self._file_data_length_bytes)
85
+
86
+ data_start = files_data_pointer
87
+ files_data_pointer += data_length
88
+
89
+ self._files.append(LazyLoadedFile(path=path, buffer=self._buffer, start=data_start, length=data_length))
90
+
91
+ return self._files
92
+
93
+ def _get_metadata(self) -> Metadata:
94
+ if self._metadata is None:
95
+ metadata_buffer = InMemoryIndexableBuffer(self._buffer.get_data(start=0, length=self._metadata_length))
96
+
97
+ partial_metadata = {}
98
+ for field_name, field_length in self._metadata_byte_lengths.items():
99
+ value = metadata_buffer.get_data_with_pointer_as_int(length=field_length) # type: ignore
100
+ if field_name == 'version' and value != ModuleOutputV2._version:
101
+ raise Exception('Version does not match')
102
+
103
+ if field_name == 'type' and value != ModuleOutputV2._type:
104
+ raise Exception('Type does not match')
105
+
106
+ partial_metadata[field_name] = value
107
+
108
+ self._metadata = partial_metadata # type: ignore
109
+
110
+ return self._metadata # type: ignore
111
+
112
+ @staticmethod
113
+ def write_to_file(
114
+ output_file_path: str,
115
+ exit_code: int,
116
+ files: List[FileInContainer],
117
+ stderr: bytes,
118
+ stdout: bytes,
119
+ ) -> None:
120
+ with open(output_file_path, mode='wb') as output_file:
121
+ meta_lengths = ModuleOutputV2._metadata_byte_lengths
122
+
123
+ output_file.write(ModuleOutputV2._version.to_bytes(meta_lengths['version'], byteorder='big'))
124
+ output_file.write(ModuleOutputV2._type.to_bytes(meta_lengths['type'], byteorder='big'))
125
+ output_file.write(len(stdout).to_bytes(meta_lengths['stdout_length'], byteorder='big'))
126
+ output_file.write(len(stderr).to_bytes(meta_lengths['stdout_length'], byteorder='big'))
127
+
128
+ files_info = bytearray()
129
+ files_data_size = 0
130
+
131
+ for file in files:
132
+ file_size = file.get_data_size_in_bytes()
133
+ path_as_bytes = file.path.encode()
134
+ path_length_as_bytes = len(path_as_bytes).to_bytes(length=4, byteorder='big')
135
+ data_byte_length_as_bytes = file_size.to_bytes(length=8, byteorder='big')
136
+
137
+ files_info.extend(path_length_as_bytes + path_as_bytes + data_byte_length_as_bytes)
138
+ files_data_size += file_size
139
+
140
+ output_file.write(len(files_info).to_bytes(length=8, byteorder='big'))
141
+ output_file.write(files_data_size.to_bytes(length=8, byteorder='big'))
142
+ output_file.write(exit_code.to_bytes(length=meta_lengths['exit_code'], byteorder='big'))
143
+ output_file.write(stdout)
144
+ output_file.write(stderr)
145
+ output_file.write(files_info)
146
+
147
+ for file in files:
148
+ for chunk in file.get_data_stream():
149
+ output_file.write(chunk)
@@ -0,0 +1,34 @@
1
+ from datetime import datetime, timedelta, timezone
2
+
3
+ from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
4
+ from biolib.biolib_binary_format.utils import RemoteEndpoint
5
+
6
+ # from urllib.parse import urlparse, parse_qs
7
+ from biolib.biolib_logging import logger
8
+ from biolib.typing_utils import Literal
9
+
10
+
11
+ class RemoteJobStorageEndpoint(RemoteEndpoint):
12
+ def __init__(self, job_uuid: str, job_auth_token: str, storage_type: Literal['input', 'output']):
13
+ self._expires_at = None
14
+ self._job_auth_token = job_auth_token
15
+ self._job_uuid = job_uuid
16
+ self._presigned_url = None
17
+ self._storage_type: Literal['input', 'output'] = storage_type
18
+
19
+ def get_remote_url(self):
20
+ if not self._presigned_url or not self._expires_at or datetime.now(timezone.utc) > self._expires_at:
21
+ self._presigned_url = BiolibJobApi.get_job_storage_download_url(
22
+ job_auth_token=self._job_auth_token,
23
+ job_uuid=self._job_uuid,
24
+ storage_type='results' if self._storage_type == 'output' else 'input',
25
+ )
26
+ self._expires_at = datetime.now(timezone.utc) + timedelta(minutes=8)
27
+ # TODO: Use expires at from url
28
+ # parsed_url = urlparse(self._presigned_url)
29
+ # query_params = parse_qs(parsed_url.query)
30
+ # time_at_generation = datetime.datetime.strptime(query_params['X-Amz-Date'][0], '%Y%m%dT%H%M%SZ')
31
+ # self._expires_at = time_at_generation + timedelta(seconds=int(query_params['X-Amz-Expires'][0]))
32
+ logger.debug(f'Job "{self._job_uuid}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
33
+
34
+ return self._presigned_url
@@ -0,0 +1,59 @@
1
+ from biolib.biolib_binary_format.utils import IndexableBuffer
2
+ from biolib.biolib_logging import logger
3
+ from biolib.typing_utils import Iterable
4
+
5
+
6
+ class StreamSeeker:
7
+ def __init__(
8
+ self,
9
+ upstream_buffer: IndexableBuffer,
10
+ files_data_start: int,
11
+ files_data_end: int,
12
+ max_chunk_size: int,
13
+ ):
14
+ self._upstream_buffer = upstream_buffer
15
+ self._files_data_end = files_data_end
16
+ self._max_chunk_size = max_chunk_size
17
+
18
+ self._buffer_start = files_data_start
19
+ self._buffer = bytearray()
20
+
21
+ def seek_and_read(self, file_start: int, file_length: int, read_ahead_bytes: int = 0) -> Iterable[bytes]:
22
+ assert file_start >= self._buffer_start
23
+ self._buffer = self._buffer[file_start - self._buffer_start :]
24
+ self._buffer_start = file_start
25
+
26
+ while True:
27
+ file_byte_count_remaining = file_length - (self._buffer_start - file_start)
28
+ if file_byte_count_remaining <= 0:
29
+ return
30
+
31
+ if len(self._buffer) > 0:
32
+ take = min(file_byte_count_remaining, len(self._buffer))
33
+ chunk = self._buffer[:take]
34
+ if chunk:
35
+ yield chunk
36
+ self._buffer = self._buffer[take:]
37
+ self._buffer_start += take
38
+ else:
39
+ start_of_fetch = self._buffer_start + len(self._buffer)
40
+ bytes_left_in_stream = self._files_data_end - start_of_fetch
41
+ if bytes_left_in_stream <= 0:
42
+ logger.error(
43
+ 'StreamSeeker: no bytes left upstream (start_of_fetch=%d, files_data_end=%d)',
44
+ start_of_fetch,
45
+ self._files_data_end,
46
+ )
47
+ return
48
+
49
+ fetch_size = min(self._max_chunk_size, file_byte_count_remaining + read_ahead_bytes)
50
+ if fetch_size > bytes_left_in_stream:
51
+ logger.error(
52
+ 'StreamSeeker: fetch_size (%d) > bytes_left_in_stream (%d); clamping',
53
+ fetch_size,
54
+ bytes_left_in_stream,
55
+ )
56
+ fetch_size = bytes_left_in_stream
57
+
58
+ fetched_data = self._upstream_buffer.get_data(start=start_of_fetch, length=fetch_size)
59
+ self._buffer.extend(fetched_data)
@@ -11,8 +11,9 @@ class SavedJob(BioLibBinaryFormatBasePackage):
11
11
  bbf_data.extend(self.version.to_bytes(1, 'big'))
12
12
  bbf_data.extend(self.package_type.to_bytes(1, 'big'))
13
13
 
14
- bbf_data.extend(len(saved_job_json_string).to_bytes(4, 'big'))
15
- bbf_data.extend(saved_job_json_string.encode())
14
+ encoded_saved_job_json_string = saved_job_json_string.encode()
15
+ bbf_data.extend(len(encoded_saved_job_json_string).to_bytes(4, 'big'))
16
+ bbf_data.extend(encoded_saved_job_json_string)
16
17
 
17
18
  return bbf_data
18
19
 
@@ -1,18 +1,18 @@
1
1
  from biolib.biolib_binary_format.base_bbf_package import BioLibBinaryFormatBasePackage
2
2
 
3
3
 
4
- class AttestationDocument(BioLibBinaryFormatBasePackage):
4
+ class StdoutAndStderr(BioLibBinaryFormatBasePackage):
5
5
  def __init__(self, bbf=None):
6
6
  super().__init__(bbf)
7
- self.package_type = 4
7
+ self.package_type = 10
8
8
 
9
- def serialize(self, attestation_document_bytes):
9
+ def serialize(self, stdout_and_stderr_bytes):
10
10
  bbf_data = bytearray()
11
11
  bbf_data.extend(self.version.to_bytes(1, 'big'))
12
12
  bbf_data.extend(self.package_type.to_bytes(1, 'big'))
13
13
 
14
- bbf_data.extend(len(attestation_document_bytes).to_bytes(4, 'big'))
15
- bbf_data.extend(attestation_document_bytes)
14
+ bbf_data.extend(len(stdout_and_stderr_bytes).to_bytes(8, 'big'))
15
+ bbf_data.extend(stdout_and_stderr_bytes)
16
16
 
17
17
  return bbf_data
18
18
 
@@ -21,7 +21,7 @@ class AttestationDocument(BioLibBinaryFormatBasePackage):
21
21
  package_type = self.get_data(1, output_type='int')
22
22
  self.check_version_and_type(version=version, package_type=package_type, expected_package_type=self.package_type)
23
23
 
24
- attestation_document_len = self.get_data(4, output_type='int')
25
- attestation_document = self.get_data(attestation_document_len)
24
+ stdout_and_stderr_length = self.get_data(8, output_type='int')
25
+ stdout_and_stderr = self.get_data(stdout_and_stderr_length)
26
26
 
27
- return attestation_document
27
+ return stdout_and_stderr
@@ -12,8 +12,9 @@ class SystemStatusUpdate(BioLibBinaryFormatBasePackage):
12
12
  bbf_data.extend(self.package_type.to_bytes(1, 'big'))
13
13
 
14
14
  bbf_data.extend(progress.to_bytes(2, 'big'))
15
- bbf_data.extend(len(log_message).to_bytes(4, 'big'))
16
- bbf_data.extend(log_message.encode())
15
+ encoded_log_message = log_message.encode()
16
+ bbf_data.extend(len(encoded_log_message).to_bytes(4, 'big'))
17
+ bbf_data.extend(encoded_log_message)
17
18
 
18
19
  return bbf_data
19
20