pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. biolib/__init__.py +357 -11
  2. biolib/_data_record/data_record.py +380 -0
  3. biolib/_index/__init__.py +0 -0
  4. biolib/_index/index.py +55 -0
  5. biolib/_index/query_result.py +103 -0
  6. biolib/_internal/__init__.py +0 -0
  7. biolib/_internal/add_copilot_prompts.py +58 -0
  8. biolib/_internal/add_gui_files.py +81 -0
  9. biolib/_internal/data_record/__init__.py +1 -0
  10. biolib/_internal/data_record/data_record.py +85 -0
  11. biolib/_internal/data_record/push_data.py +116 -0
  12. biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
  13. biolib/_internal/errors.py +5 -0
  14. biolib/_internal/file_utils.py +125 -0
  15. biolib/_internal/fuse_mount/__init__.py +1 -0
  16. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  17. biolib/_internal/http_client.py +159 -0
  18. biolib/_internal/lfs/__init__.py +1 -0
  19. biolib/_internal/lfs/cache.py +51 -0
  20. biolib/_internal/libs/__init__.py +1 -0
  21. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  22. biolib/_internal/push_application.py +488 -0
  23. biolib/_internal/runtime.py +22 -0
  24. biolib/_internal/string_utils.py +13 -0
  25. biolib/_internal/templates/__init__.py +1 -0
  26. biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
  27. biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
  28. biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
  29. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  30. biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
  31. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  32. biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
  33. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  34. biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
  35. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  36. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  37. biolib/_internal/templates/gui_template/App.tsx +53 -0
  38. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  39. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  40. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  41. biolib/_internal/templates/gui_template/index.css +5 -0
  42. biolib/_internal/templates/gui_template/index.html +13 -0
  43. biolib/_internal/templates/gui_template/index.tsx +10 -0
  44. biolib/_internal/templates/gui_template/package.json +27 -0
  45. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  46. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  47. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  48. biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
  49. biolib/_internal/templates/init_template/Dockerfile +14 -0
  50. biolib/_internal/templates/init_template/requirements.txt +1 -0
  51. biolib/_internal/templates/init_template/run.py +12 -0
  52. biolib/_internal/templates/init_template/run.sh +4 -0
  53. biolib/_internal/templates/templates.py +25 -0
  54. biolib/_internal/tree_utils.py +106 -0
  55. biolib/_internal/utils/__init__.py +65 -0
  56. biolib/_internal/utils/auth.py +46 -0
  57. biolib/_internal/utils/job_url.py +33 -0
  58. biolib/_internal/utils/multinode.py +263 -0
  59. biolib/_runtime/runtime.py +157 -0
  60. biolib/_session/session.py +44 -0
  61. biolib/_shared/__init__.py +0 -0
  62. biolib/_shared/types/__init__.py +74 -0
  63. biolib/_shared/types/account.py +12 -0
  64. biolib/_shared/types/account_member.py +8 -0
  65. biolib/_shared/types/app.py +9 -0
  66. biolib/_shared/types/data_record.py +40 -0
  67. biolib/_shared/types/experiment.py +32 -0
  68. biolib/_shared/types/file_node.py +17 -0
  69. biolib/_shared/types/push.py +6 -0
  70. biolib/_shared/types/resource.py +37 -0
  71. biolib/_shared/types/resource_deploy_key.py +11 -0
  72. biolib/_shared/types/resource_permission.py +14 -0
  73. biolib/_shared/types/resource_version.py +19 -0
  74. biolib/_shared/types/result.py +14 -0
  75. biolib/_shared/types/typing.py +10 -0
  76. biolib/_shared/types/user.py +19 -0
  77. biolib/_shared/utils/__init__.py +7 -0
  78. biolib/_shared/utils/resource_uri.py +75 -0
  79. biolib/api/__init__.py +6 -0
  80. biolib/api/client.py +168 -0
  81. biolib/app/app.py +252 -49
  82. biolib/app/search_apps.py +45 -0
  83. biolib/biolib_api_client/api_client.py +126 -31
  84. biolib/biolib_api_client/app_types.py +24 -4
  85. biolib/biolib_api_client/auth.py +31 -8
  86. biolib/biolib_api_client/biolib_app_api.py +147 -52
  87. biolib/biolib_api_client/biolib_job_api.py +161 -141
  88. biolib/biolib_api_client/job_types.py +21 -5
  89. biolib/biolib_api_client/lfs_types.py +7 -23
  90. biolib/biolib_api_client/user_state.py +56 -0
  91. biolib/biolib_binary_format/__init__.py +1 -4
  92. biolib/biolib_binary_format/file_in_container.py +105 -0
  93. biolib/biolib_binary_format/module_input.py +24 -7
  94. biolib/biolib_binary_format/module_output_v2.py +149 -0
  95. biolib/biolib_binary_format/remote_endpoints.py +34 -0
  96. biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
  97. biolib/biolib_binary_format/saved_job.py +3 -2
  98. biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
  99. biolib/biolib_binary_format/system_status_update.py +3 -2
  100. biolib/biolib_binary_format/utils.py +175 -0
  101. biolib/biolib_docker_client/__init__.py +11 -2
  102. biolib/biolib_errors.py +36 -0
  103. biolib/biolib_logging.py +27 -10
  104. biolib/cli/__init__.py +38 -0
  105. biolib/cli/auth.py +46 -0
  106. biolib/cli/data_record.py +164 -0
  107. biolib/cli/index.py +32 -0
  108. biolib/cli/init.py +421 -0
  109. biolib/cli/lfs.py +101 -0
  110. biolib/cli/push.py +50 -0
  111. biolib/cli/run.py +63 -0
  112. biolib/cli/runtime.py +14 -0
  113. biolib/cli/sdk.py +16 -0
  114. biolib/cli/start.py +56 -0
  115. biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
  116. biolib/compute_node/job_worker/cache_state.py +66 -88
  117. biolib/compute_node/job_worker/cache_types.py +1 -6
  118. biolib/compute_node/job_worker/docker_image_cache.py +112 -37
  119. biolib/compute_node/job_worker/executors/__init__.py +0 -3
  120. biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
  121. biolib/compute_node/job_worker/executors/docker_types.py +9 -1
  122. biolib/compute_node/job_worker/executors/types.py +19 -9
  123. biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
  124. biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
  125. biolib/compute_node/job_worker/job_storage.py +108 -0
  126. biolib/compute_node/job_worker/job_worker.py +397 -212
  127. biolib/compute_node/job_worker/large_file_system.py +87 -38
  128. biolib/compute_node/job_worker/network_alloc.py +99 -0
  129. biolib/compute_node/job_worker/network_buffer.py +240 -0
  130. biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
  131. biolib/compute_node/job_worker/utils.py +9 -24
  132. biolib/compute_node/remote_host_proxy.py +400 -98
  133. biolib/compute_node/utils.py +31 -9
  134. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  135. biolib/compute_node/webserver/proxy_utils.py +28 -0
  136. biolib/compute_node/webserver/webserver.py +130 -44
  137. biolib/compute_node/webserver/webserver_types.py +2 -6
  138. biolib/compute_node/webserver/webserver_utils.py +77 -12
  139. biolib/compute_node/webserver/worker_thread.py +183 -42
  140. biolib/experiments/__init__.py +0 -0
  141. biolib/experiments/experiment.py +356 -0
  142. biolib/jobs/__init__.py +1 -0
  143. biolib/jobs/job.py +741 -0
  144. biolib/jobs/job_result.py +185 -0
  145. biolib/jobs/types.py +50 -0
  146. biolib/py.typed +0 -0
  147. biolib/runtime/__init__.py +14 -0
  148. biolib/sdk/__init__.py +91 -0
  149. biolib/tables.py +34 -0
  150. biolib/typing_utils.py +2 -7
  151. biolib/user/__init__.py +1 -0
  152. biolib/user/sign_in.py +54 -0
  153. biolib/utils/__init__.py +162 -0
  154. biolib/utils/cache_state.py +94 -0
  155. biolib/utils/multipart_uploader.py +194 -0
  156. biolib/utils/seq_util.py +150 -0
  157. biolib/utils/zip/remote_zip.py +640 -0
  158. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  159. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  160. {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  161. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  162. README.md +0 -17
  163. biolib/app/app_result.py +0 -68
  164. biolib/app/utils.py +0 -62
  165. biolib/biolib-js/0-biolib.worker.js +0 -1
  166. biolib/biolib-js/1-biolib.worker.js +0 -1
  167. biolib/biolib-js/2-biolib.worker.js +0 -1
  168. biolib/biolib-js/3-biolib.worker.js +0 -1
  169. biolib/biolib-js/4-biolib.worker.js +0 -1
  170. biolib/biolib-js/5-biolib.worker.js +0 -1
  171. biolib/biolib-js/6-biolib.worker.js +0 -1
  172. biolib/biolib-js/index.html +0 -10
  173. biolib/biolib-js/main-biolib.js +0 -1
  174. biolib/biolib_api_client/biolib_account_api.py +0 -21
  175. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
  176. biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
  177. biolib/biolib_binary_format/module_output.py +0 -58
  178. biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
  179. biolib/biolib_push.py +0 -114
  180. biolib/cli.py +0 -203
  181. biolib/cli_utils.py +0 -273
  182. biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
  183. biolib/compute_node/enclave/__init__.py +0 -2
  184. biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
  185. biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
  186. biolib/compute_node/job_worker/executors/base_executor.py +0 -18
  187. biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
  188. biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
  189. biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
  190. biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
  191. biolib/lfs.py +0 -196
  192. biolib/pyppeteer/.circleci/config.yml +0 -100
  193. biolib/pyppeteer/.coveragerc +0 -3
  194. biolib/pyppeteer/.gitignore +0 -89
  195. biolib/pyppeteer/.pre-commit-config.yaml +0 -28
  196. biolib/pyppeteer/CHANGES.md +0 -253
  197. biolib/pyppeteer/CONTRIBUTING.md +0 -26
  198. biolib/pyppeteer/LICENSE +0 -12
  199. biolib/pyppeteer/README.md +0 -137
  200. biolib/pyppeteer/docs/Makefile +0 -177
  201. biolib/pyppeteer/docs/_static/custom.css +0 -28
  202. biolib/pyppeteer/docs/_templates/layout.html +0 -10
  203. biolib/pyppeteer/docs/changes.md +0 -1
  204. biolib/pyppeteer/docs/conf.py +0 -299
  205. biolib/pyppeteer/docs/index.md +0 -21
  206. biolib/pyppeteer/docs/make.bat +0 -242
  207. biolib/pyppeteer/docs/reference.md +0 -211
  208. biolib/pyppeteer/docs/server.py +0 -60
  209. biolib/pyppeteer/poetry.lock +0 -1699
  210. biolib/pyppeteer/pyppeteer/__init__.py +0 -135
  211. biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
  212. biolib/pyppeteer/pyppeteer/browser.py +0 -401
  213. biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
  214. biolib/pyppeteer/pyppeteer/command.py +0 -22
  215. biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
  216. biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
  217. biolib/pyppeteer/pyppeteer/coverage.py +0 -346
  218. biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
  219. biolib/pyppeteer/pyppeteer/dialog.py +0 -79
  220. biolib/pyppeteer/pyppeteer/domworld.py +0 -597
  221. biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
  222. biolib/pyppeteer/pyppeteer/errors.py +0 -48
  223. biolib/pyppeteer/pyppeteer/events.py +0 -63
  224. biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
  225. biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
  226. biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
  227. biolib/pyppeteer/pyppeteer/helpers.py +0 -245
  228. biolib/pyppeteer/pyppeteer/input.py +0 -371
  229. biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
  230. biolib/pyppeteer/pyppeteer/launcher.py +0 -683
  231. biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
  232. biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
  233. biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
  234. biolib/pyppeteer/pyppeteer/multimap.py +0 -82
  235. biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
  236. biolib/pyppeteer/pyppeteer/options.py +0 -8
  237. biolib/pyppeteer/pyppeteer/page.py +0 -1728
  238. biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
  239. biolib/pyppeteer/pyppeteer/target.py +0 -147
  240. biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
  241. biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
  242. biolib/pyppeteer/pyppeteer/tracing.py +0 -93
  243. biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
  244. biolib/pyppeteer/pyppeteer/util.py +0 -18
  245. biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
  246. biolib/pyppeteer/pyppeteer/worker.py +0 -101
  247. biolib/pyppeteer/pyproject.toml +0 -97
  248. biolib/pyppeteer/spell.txt +0 -137
  249. biolib/pyppeteer/tox.ini +0 -72
  250. biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
  251. biolib/start_cli.py +0 -7
  252. biolib/utils.py +0 -47
  253. biolib/validators/validate_app_version.py +0 -183
  254. biolib/validators/validate_argument.py +0 -134
  255. biolib/validators/validate_module.py +0 -323
  256. biolib/validators/validate_zip_file.py +0 -40
  257. biolib/validators/validator_utils.py +0 -103
  258. pybiolib-0.2.951.dist-info/LICENSE +0 -21
  259. pybiolib-0.2.951.dist-info/METADATA +0 -61
  260. pybiolib-0.2.951.dist-info/RECORD +0 -153
  261. pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
  262. /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,185 @@
1
+ import time
2
+ from pathlib import Path
3
+
4
+ from biolib._internal.utils import PathFilter, filter_lazy_loaded_files
5
+ from biolib.biolib_binary_format import ModuleOutputV2
6
+ from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
7
+ from biolib.biolib_binary_format.remote_stream_seeker import StreamSeeker
8
+ from biolib.biolib_binary_format.utils import LazyLoadedFile, RemoteIndexableBuffer
9
+ from biolib.biolib_errors import BioLibError
10
+ from biolib.biolib_logging import logger
11
+ from biolib.typing_utils import Dict, List, Optional
12
+
13
+
14
+ class JobResult:
15
+ def __init__(
16
+ self,
17
+ job_uuid: str,
18
+ job_auth_token: str,
19
+ module_output: Optional[ModuleOutputV2] = None,
20
+ ):
21
+ self._job_uuid: str = job_uuid
22
+ self._job_auth_token: str = job_auth_token
23
+
24
+ self._module_output: Optional[ModuleOutputV2] = module_output
25
+
26
+ def get_stdout(self) -> bytes:
27
+ return self._get_module_output().get_stdout()
28
+
29
+ def get_stderr(self) -> bytes:
30
+ return self._get_module_output().get_stderr()
31
+
32
+ def get_exit_code(self) -> int:
33
+ return self._get_module_output().get_exit_code()
34
+
35
+ def save_files(
36
+ self,
37
+ output_dir: str,
38
+ path_filter: Optional[PathFilter] = None,
39
+ skip_file_if_exists: bool = False,
40
+ overwrite: bool = False,
41
+ flat: bool = False,
42
+ ) -> None:
43
+ module_output = self._get_module_output()
44
+ output_files = module_output.get_files()
45
+ filtered_output_files = filter_lazy_loaded_files(output_files, path_filter) if path_filter else output_files
46
+
47
+ if len(filtered_output_files) == 0:
48
+ logger.debug('No output files to save')
49
+ return
50
+
51
+ if flat:
52
+ basename_to_paths: Dict[str, List[str]] = {}
53
+ for file in filtered_output_files:
54
+ basename = Path(file.path).name
55
+ if basename not in basename_to_paths:
56
+ basename_to_paths[basename] = []
57
+ basename_to_paths[basename].append(file.path)
58
+
59
+ duplicates = {basename: paths for basename, paths in basename_to_paths.items() if len(paths) > 1}
60
+
61
+ if duplicates:
62
+ max_shown = 3
63
+ error_parts = []
64
+ sorted_basenames = sorted(duplicates.keys())
65
+
66
+ for basename in sorted_basenames[:max_shown]:
67
+ paths = duplicates[basename]
68
+ error_parts.append(f' {basename}: ({", ".join(paths)})')
69
+
70
+ error_message = 'Cannot save files in flat mode: duplicate filenames detected:\n' + '\n'.join(
71
+ error_parts
72
+ )
73
+
74
+ if len(duplicates) > max_shown:
75
+ remaining = len(duplicates) - max_shown
76
+ error_message += f'\n (and {remaining} more)'
77
+
78
+ raise BioLibError(error_message)
79
+
80
+ major_gap_threshold = 50_000
81
+ n = len(filtered_output_files)
82
+
83
+ next_break_end = [0] * n
84
+ if n > 0:
85
+ next_break_end[n - 1] = filtered_output_files[n - 1].start + filtered_output_files[n - 1].length
86
+ for i in range(n - 2, -1, -1):
87
+ end_i = filtered_output_files[i].start + filtered_output_files[i].length
88
+ gap = filtered_output_files[i + 1].start - end_i
89
+ if gap >= major_gap_threshold:
90
+ next_break_end[i] = end_i
91
+ else:
92
+ next_break_end[i] = next_break_end[i + 1]
93
+
94
+ total_files_data_to_download_in_bytes = 0
95
+ file_read_ahead_map = {}
96
+ for i, file in enumerate(filtered_output_files):
97
+ total_files_data_to_download_in_bytes += file.length
98
+ end_i = file.start + file.length
99
+ read_ahead_bytes = max(0, next_break_end[i] - end_i)
100
+ file_read_ahead_map[i] = read_ahead_bytes
101
+
102
+ # Assume files are in order
103
+ first_file = filtered_output_files[0]
104
+ last_file = filtered_output_files[len(filtered_output_files) - 1]
105
+ stream_seeker = StreamSeeker(
106
+ files_data_start=first_file.start,
107
+ files_data_end=last_file.start + last_file.length,
108
+ max_chunk_size=min(total_files_data_to_download_in_bytes, 10_000_000),
109
+ upstream_buffer=module_output.buffer,
110
+ )
111
+
112
+ logger.info(f'Saving {len(filtered_output_files)} files to {output_dir}...')
113
+ for file_index, file in enumerate(filtered_output_files):
114
+ if flat:
115
+ destination_file_path = Path(output_dir) / Path(file.path).name
116
+ else:
117
+ # Remove leading slash of file_path
118
+ destination_file_path = Path(output_dir) / Path(file.path.lstrip('/'))
119
+
120
+ if destination_file_path.exists():
121
+ if skip_file_if_exists:
122
+ logger.info(f'Skipping {destination_file_path} as a file with that name already exists locally.')
123
+ continue
124
+ elif not overwrite:
125
+ raise BioLibError(f'File {destination_file_path} already exists. Set overwrite=True to overwrite.')
126
+ else:
127
+ destination_file_path.rename(
128
+ f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}'
129
+ )
130
+
131
+ dir_path = destination_file_path.parent
132
+ if dir_path:
133
+ dir_path.mkdir(parents=True, exist_ok=True)
134
+
135
+ # write content to temporary (partial) file
136
+ partial_path = destination_file_path.with_suffix(
137
+ destination_file_path.suffix + f'.{self._job_uuid}.partial_biolib_download'
138
+ )
139
+ file_start = file.start
140
+ data_to_download = file.length
141
+ if partial_path.exists():
142
+ data_already_downloaded = partial_path.stat().st_size
143
+ file_start += data_already_downloaded
144
+ data_to_download -= data_already_downloaded
145
+
146
+ read_ahead_bytes = file_read_ahead_map[file_index]
147
+
148
+ with open(partial_path, mode='ab') as partial_file:
149
+ for chunk in stream_seeker.seek_and_read(
150
+ file_start=file_start, file_length=data_to_download, read_ahead_bytes=read_ahead_bytes
151
+ ):
152
+ partial_file.write(chunk)
153
+
154
+ # rename partial file to actual file name
155
+ partial_path.rename(destination_file_path)
156
+
157
+ def get_output_file(self, filename) -> LazyLoadedFile:
158
+ files = self._get_module_output().get_files()
159
+ filtered_files = filter_lazy_loaded_files(files, path_filter=filename)
160
+ if not filtered_files:
161
+ raise BioLibError(f'File {filename} not found in results.')
162
+
163
+ if len(filtered_files) != 1:
164
+ raise BioLibError(f'Found multiple results for filename {filename}.')
165
+
166
+ return filtered_files[0]
167
+
168
+ def list_output_files(self, path_filter: Optional[PathFilter] = None) -> List[LazyLoadedFile]:
169
+ files = self._get_module_output().get_files()
170
+ if not path_filter:
171
+ return files
172
+
173
+ return filter_lazy_loaded_files(files, path_filter)
174
+
175
+ def _get_module_output(self) -> ModuleOutputV2:
176
+ if self._module_output is None:
177
+ remote_job_storage_endpoint = RemoteJobStorageEndpoint(
178
+ job_auth_token=self._job_auth_token,
179
+ job_uuid=self._job_uuid,
180
+ storage_type='output',
181
+ )
182
+ buffer = RemoteIndexableBuffer(endpoint=remote_job_storage_endpoint)
183
+ self._module_output = ModuleOutputV2(buffer)
184
+
185
+ return self._module_output
biolib/jobs/types.py ADDED
@@ -0,0 +1,50 @@
1
+ from biolib.typing_utils import List, Literal, Optional, TypedDict
2
+
3
+ JobState = Literal['in_progress', 'completed', 'failed', 'cancelled']
4
+
5
+
6
+ class _BaseCloudJobDict(TypedDict):
7
+ created_at: str
8
+ finished_at: Optional[str]
9
+ uuid: str
10
+ error_code: int
11
+
12
+
13
+ class CloudJobDict(_BaseCloudJobDict):
14
+ started_at: Optional[str]
15
+ compute_node_url: Optional[str]
16
+
17
+
18
+ class CloudJobStartedDict(_BaseCloudJobDict):
19
+ started_at: str
20
+ compute_node_url: str
21
+
22
+
23
+ class Result(TypedDict):
24
+ name: str
25
+
26
+
27
+ class JobDict(TypedDict):
28
+ app_uri: str
29
+ arguments_override_command: bool
30
+ auth_token: str
31
+ created_at: str
32
+ ended_at: Optional[str]
33
+ requested_machine: str
34
+ runtime_seconds: int
35
+ main_result: Result
36
+ started_at: str
37
+ state: JobState
38
+ uuid: str
39
+ cloud_job: Optional[CloudJobDict]
40
+
41
+
42
+ class BasePaginatedResponse(TypedDict):
43
+ current_page_number: int
44
+ object_count: int
45
+ page_count: int
46
+ page_size: int
47
+
48
+
49
+ class JobsPaginatedResponse(BasePaginatedResponse):
50
+ results: List[JobDict]
biolib/py.typed ADDED
File without changes
@@ -0,0 +1,14 @@
1
+ import warnings
2
+
3
+ from biolib._runtime.runtime import Runtime as _Runtime
4
+
5
+
6
+ def set_main_result_prefix(result_prefix: str) -> None:
7
+ warnings.warn(
8
+ 'The "biolib.runtime.set_main_result_prefix" function is deprecated. '
9
+ 'It will be removed in future releases from mid 2024. '
10
+ 'Please use "from biolib.sdk import Runtime" and then "Runtime.set_main_result_prefix" instead.',
11
+ DeprecationWarning,
12
+ stacklevel=2,
13
+ )
14
+ _Runtime.set_main_result_prefix(result_prefix)
biolib/sdk/__init__.py ADDED
@@ -0,0 +1,91 @@
1
+ from typing import Any, Dict, List, Optional, Union
2
+
3
+ # Imports to hide and use as private internal utils
4
+ from biolib._data_record.data_record import DataRecord as _DataRecord
5
+ from biolib._index.index import Index as _Index
6
+ from biolib._index.query_result import IndexQueryResult
7
+ from biolib._index.query_result import query_index as _query_index
8
+ from biolib._internal.push_application import push_application as _push_application
9
+ from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
10
+ from biolib._runtime.runtime import Runtime as _Runtime
11
+ from biolib._session.session import Session as _Session
12
+ from biolib.app import BioLibApp as _BioLibApp
13
+
14
+ # Classes to expose as public API
15
+ Runtime = _Runtime
16
+
17
+
18
+ def get_session(
19
+ refresh_token: str,
20
+ base_url: Optional[str] = None,
21
+ client_type: Optional[str] = None,
22
+ experiment: Optional[str] = None,
23
+ ) -> _Session:
24
+ return _Session.get_session(
25
+ refresh_token=refresh_token,
26
+ base_url=base_url,
27
+ client_type=client_type,
28
+ experiment=experiment,
29
+ )
30
+
31
+
32
+ def push_app_version(uri: str, path: str) -> _BioLibApp:
33
+ push_data = _push_application(
34
+ app_uri=uri,
35
+ app_path=path,
36
+ app_version_to_copy_images_from=None,
37
+ set_as_active=False,
38
+ set_as_published=False,
39
+ )
40
+ if not push_data:
41
+ raise Exception('Failed to push application; please check the logs for more details')
42
+
43
+ uri = f'{push_data["app_uri"]}:{push_data["sematic_version"]}'
44
+ return _BioLibApp(uri)
45
+
46
+
47
+ def set_app_version_as_default(app_version: _BioLibApp) -> None:
48
+ app_version_uuid = app_version.version['public_id']
49
+ _set_app_version_as_active(app_version_uuid)
50
+
51
+
52
+ def get_app_version_pytest_plugin(app_version: _BioLibApp):
53
+ try:
54
+ import pytest # type: ignore # pylint: disable=import-outside-toplevel,import-error
55
+ except BaseException:
56
+ raise Exception('Failed to import pytest; please make sure it is installed') from None
57
+
58
+ class AppVersionFixturePlugin:
59
+ def __init__(self, app_version_ref):
60
+ self.app_version_ref = app_version_ref
61
+
62
+ @pytest.fixture(scope='session')
63
+ def app_version(self, request): # pylint: disable=unused-argument
64
+ return self.app_version_ref
65
+
66
+ return AppVersionFixturePlugin(app_version)
67
+
68
+
69
+ def create_data_record(
70
+ destination: str,
71
+ data_path: str,
72
+ name: Optional[str] = None,
73
+ record_type: Optional[str] = None,
74
+ ) -> _DataRecord:
75
+ return _DataRecord.create(
76
+ destination=f'{destination}/{name}' if name else destination,
77
+ data_path=data_path,
78
+ record_type=record_type,
79
+ )
80
+
81
+
82
+ def get_index(uri: str) -> _Index:
83
+ return _Index.get_by_uri(uri)
84
+
85
+
86
+ def query_index(
87
+ query: str,
88
+ data: Optional[Union[List[Dict[str, Any]], bytes]] = None,
89
+ data_format: str = 'json',
90
+ ) -> IndexQueryResult:
91
+ return _query_index(query=query, data=data, data_format=data_format)
biolib/tables.py ADDED
@@ -0,0 +1,34 @@
1
+ from collections import OrderedDict
2
+
3
+ from rich.console import Console
4
+ from rich.table import Column, Table
5
+
6
+ from biolib.typing_utils import Any, List
7
+
8
+
9
+ class BioLibTable:
10
+ def __init__(self, columns_to_row_map: OrderedDict, rows: List[Any], title):
11
+ self.title = title
12
+ self.rows = rows
13
+ self.columns_to_row_map = columns_to_row_map
14
+ self.table = self._create_table()
15
+
16
+ def _create_table(self) -> Table:
17
+ columns = [Column(header=header, **meta['params']) for header, meta in self.columns_to_row_map.items()]
18
+ table = Table(*columns, title=self.title)
19
+ for row in self.rows:
20
+ row_values: List[str] = []
21
+ for column in self.columns_to_row_map.values():
22
+ keys = column['key'].split('.')
23
+ value = row[keys[0]]
24
+ for key in keys[1:]:
25
+ if not value or key not in value:
26
+ continue
27
+ value = value[key]
28
+ row_values.append(str(value))
29
+ table.add_row(*row_values)
30
+ return table
31
+
32
+ def print_table(self):
33
+ console = Console()
34
+ console.print(self.table)
biolib/typing_utils.py CHANGED
@@ -1,7 +1,2 @@
1
- import sys
2
-
3
- # import and expose everything from the typing module
4
- from typing import * # pylint: disable=wildcard-import, unused-wildcard-import
5
-
6
- if sys.version_info < (3, 8):
7
- from typing_extensions import TypedDict, Literal
1
+ # TODO: Deprecate and later remove this file
2
+ from biolib._shared.types.typing import * # pylint: disable=wildcard-import, unused-wildcard-import
@@ -0,0 +1 @@
1
+ from .sign_in import sign_in, sign_out
biolib/user/sign_in.py ADDED
@@ -0,0 +1,54 @@
1
+ import time
2
+ import webbrowser
3
+
4
+ from biolib._internal.utils import open_browser_window_from_notebook
5
+ from biolib.biolib_api_client import BiolibApiClient
6
+ from biolib.biolib_api_client.auth import BiolibAuthChallengeApi
7
+ from biolib.biolib_logging import logger_no_user_data
8
+ from biolib.utils import IS_RUNNING_IN_NOTEBOOK
9
+
10
+
11
+ def sign_out() -> None:
12
+ api_client = BiolibApiClient.get(attempt_sign_in=False)
13
+ api_client.sign_out()
14
+
15
+
16
+ def sign_in(open_in_default_browser: bool = False) -> None:
17
+ if not BiolibApiClient.is_reauthentication_needed():
18
+ logger_no_user_data.info('Already signed in')
19
+ return
20
+
21
+ api_client = BiolibApiClient.get()
22
+ auth_challenge = BiolibAuthChallengeApi.create_auth_challenge()
23
+ auth_challenge_token = auth_challenge['token']
24
+
25
+ client_type = 'notebook' if IS_RUNNING_IN_NOTEBOOK else 'cli'
26
+
27
+ frontend_sign_in_url = f'{api_client.base_url}/sign-in/request/{client_type}/?token={auth_challenge_token}'
28
+
29
+ if IS_RUNNING_IN_NOTEBOOK:
30
+ print(f'Opening authorization page at: {frontend_sign_in_url}')
31
+ print('If your browser does not open automatically, click on the link above.')
32
+ open_browser_window_from_notebook(frontend_sign_in_url)
33
+ elif open_in_default_browser:
34
+ print(f'Opening authorization page at: {frontend_sign_in_url}')
35
+ print('If your browser does not open automatically, click on the link above.')
36
+ webbrowser.open(frontend_sign_in_url)
37
+ else:
38
+ print('Please copy and paste the following link into your browser:')
39
+ print(frontend_sign_in_url)
40
+
41
+ for _ in range(100):
42
+ time.sleep(3)
43
+ auth_challenge_status = BiolibAuthChallengeApi.get_auth_challenge_status(token=auth_challenge_token)
44
+
45
+ if auth_challenge_status['state'] != 'awaiting':
46
+ break
47
+
48
+ user_tokens = auth_challenge_status.get('user_tokens')
49
+ if user_tokens:
50
+ api_client.set_user_tokens(user_tokens)
51
+ print('Successfully signed in!')
52
+
53
+ else:
54
+ print(f"Sign in failed. Got state: {auth_challenge_status['state']}\nPlease try again")
@@ -0,0 +1,162 @@
1
+ import collections.abc
2
+ import multiprocessing
3
+ import os
4
+ import socket
5
+ import sys
6
+
7
+ from importlib_metadata import version, PackageNotFoundError
8
+
9
+ from biolib.typing_utils import Optional
10
+ from biolib.utils.seq_util import SeqUtil, SeqUtilRecord
11
+ from biolib._internal.http_client import HttpClient
12
+ from biolib.biolib_logging import logger_no_user_data, logger
13
+ from biolib.typing_utils import Tuple, Iterator
14
+ from .multipart_uploader import MultiPartUploader, get_chunk_iterator_from_bytes
15
+
16
+ # try fetching version, if it fails (usually when in dev), add default
17
+ try:
18
+ BIOLIB_PACKAGE_VERSION = version('pybiolib')
19
+ except PackageNotFoundError:
20
+ BIOLIB_PACKAGE_VERSION = '0.0.0'
21
+
22
+ IS_DEV = os.getenv('BIOLIB_DEV', '').upper() == 'TRUE'
23
+
24
+
25
+ def load_base_url_from_env() -> str:
26
+ base_url = os.getenv('BIOLIB_BASE_URL')
27
+ if base_url:
28
+ return base_url.lower().rstrip('/')
29
+
30
+ try:
31
+ search_list = []
32
+ with open('/etc/resolv.conf') as file:
33
+ for line in file:
34
+ line_trimmed = line.strip()
35
+ if line_trimmed.startswith('search'):
36
+ search_list = line_trimmed.split()[1:]
37
+ logger.debug(f'Found search list: {search_list} when resolving base url.')
38
+ break
39
+
40
+ for search_host in search_list:
41
+ host_to_try = f'biolib.{search_host}'
42
+ try:
43
+ if len(socket.getaddrinfo(host_to_try, 443)) > 0:
44
+ return f'https://{host_to_try}'.lower()
45
+ except BaseException: # pylint: disable=broad-except
46
+ pass
47
+ except BaseException: # pylint: disable=broad-except
48
+ pass
49
+
50
+ return 'https://biolib.com'
51
+
52
+
53
+ BIOLIB_BASE_URL: Optional[str] = None
54
+ BIOLIB_SITE_HOSTNAME: Optional[str] = None
55
+
56
+ BIOLIB_CLOUD_BASE_URL = os.getenv('BIOLIB_CLOUD_BASE_URL', '').lower()
57
+
58
+ BIOLIB_PACKAGE_ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
59
+
60
+ BIOLIB_CLOUD_ENVIRONMENT = os.getenv('BIOLIB_CLOUD_ENVIRONMENT', '').lower()
61
+
62
+ BIOLIB_SECRETS_TMPFS_PATH = os.environ.get('BIOLIB_SECRETS_TMPFS_PATH')
63
+
64
+ IS_RUNNING_IN_CLOUD = BIOLIB_CLOUD_ENVIRONMENT == 'non-enclave'
65
+
66
+ BASE_URL_IS_PUBLIC_BIOLIB: Optional[bool] = None
67
+
68
+ # sys.stdout is an instance of OutStream in Jupyter and Colab which does not have .buffer
69
+ if not hasattr(sys.stdout, 'buffer'):
70
+ IS_RUNNING_IN_NOTEBOOK = True
71
+ else:
72
+ IS_RUNNING_IN_NOTEBOOK = False
73
+
74
+ STREAM_STDOUT = False
75
+
76
+ if BIOLIB_CLOUD_ENVIRONMENT and not IS_RUNNING_IN_CLOUD:
77
+ logger_no_user_data.warning((
78
+ 'BIOLIB_CLOUD_ENVIRONMENT defined but does not specify the cloud environment correctly. ',
79
+ 'The compute node will not act as a cloud compute node'
80
+ ))
81
+
82
+ ByteRangeTuple = Tuple[int, int]
83
+ DownloadChunkInputTuple = Tuple[ByteRangeTuple, str]
84
+
85
+
86
+ def _download_chunk(input_tuple: DownloadChunkInputTuple) -> bytes:
87
+ byte_range, presigned_url = input_tuple
88
+ start, end = byte_range
89
+
90
+ try:
91
+ response = HttpClient.request(
92
+ url=presigned_url,
93
+ headers={'range': f'bytes={start}-{end}'},
94
+ timeout_in_seconds=300, # timeout after 5 min
95
+ retries=20,
96
+ retry_on_http_500=True,
97
+ )
98
+ except Exception as exception:
99
+ logger_no_user_data.exception("Hit error downloading chunk")
100
+ logger_no_user_data.error(exception)
101
+ raise exception
102
+ logger_no_user_data.debug(f'Returning raw data for part {start}')
103
+ return response.content
104
+
105
+
106
+ class ChunkIterator(collections.abc.Iterator):
107
+
108
+ def __init__(self, file_size: int, chunk_size: int, presigned_url: str):
109
+ self._semaphore = multiprocessing.BoundedSemaphore(20) # support 20 chunks to be processed at once
110
+ self._iterator = self._get_chunk_input_iterator(file_size, chunk_size, presigned_url)
111
+
112
+ def __iter__(self):
113
+ return self
114
+
115
+ def __next__(self):
116
+ if self._semaphore.acquire(timeout=1800):
117
+ return next(self._iterator)
118
+ else:
119
+ raise Exception('Did not receive work within 30 min.')
120
+
121
+ def chunk_completed(self) -> None:
122
+ self._semaphore.release()
123
+
124
+ @staticmethod
125
+ def _get_chunk_input_iterator(
126
+ file_size: int,
127
+ chunk_size: int,
128
+ presigned_url: str,
129
+ ) -> Iterator[DownloadChunkInputTuple]:
130
+ for index in range(0, file_size, chunk_size):
131
+ byte_range: ByteRangeTuple = (index, index + chunk_size - 1)
132
+ yield byte_range, presigned_url
133
+
134
+
135
+ def download_presigned_s3_url(presigned_url: str, output_file_path: str) -> None:
136
+ chunk_size = 50_000_000
137
+
138
+ response = HttpClient.request(url=presigned_url, headers={'range': 'bytes=0-1'})
139
+ file_size = int(response.headers['Content-Range'].split('/')[1])
140
+
141
+ chunk_iterator = ChunkIterator(file_size, chunk_size, presigned_url)
142
+
143
+ bytes_written = 0
144
+ # use 16 cores, unless less is available
145
+ process_pool = multiprocessing.Pool(processes=min(16, multiprocessing.cpu_count() - 1))
146
+ try:
147
+ with open(output_file_path, 'ab') as output_file:
148
+ for index, data in enumerate(process_pool.imap(_download_chunk, chunk_iterator)):
149
+ logger_no_user_data.debug(f'Writing part {index} to file...')
150
+ output_file.write(data)
151
+
152
+ bytes_written += chunk_size
153
+ approx_progress_percent = min(bytes_written / file_size * 100, 100)
154
+ logger_no_user_data.debug(
155
+ f'Wrote part {index} of {file_size} to file, '
156
+ f'the approximate progress is {round(approx_progress_percent, 2)}%'
157
+ )
158
+ chunk_iterator.chunk_completed()
159
+ finally:
160
+ logger_no_user_data.debug('Closing process poll...')
161
+ process_pool.close()
162
+ logger_no_user_data.debug('Process poll closed.')