pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. biolib/__init__.py +357 -11
  2. biolib/_data_record/data_record.py +380 -0
  3. biolib/_index/__init__.py +0 -0
  4. biolib/_index/index.py +55 -0
  5. biolib/_index/query_result.py +103 -0
  6. biolib/_internal/__init__.py +0 -0
  7. biolib/_internal/add_copilot_prompts.py +58 -0
  8. biolib/_internal/add_gui_files.py +81 -0
  9. biolib/_internal/data_record/__init__.py +1 -0
  10. biolib/_internal/data_record/data_record.py +85 -0
  11. biolib/_internal/data_record/push_data.py +116 -0
  12. biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
  13. biolib/_internal/errors.py +5 -0
  14. biolib/_internal/file_utils.py +125 -0
  15. biolib/_internal/fuse_mount/__init__.py +1 -0
  16. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  17. biolib/_internal/http_client.py +159 -0
  18. biolib/_internal/lfs/__init__.py +1 -0
  19. biolib/_internal/lfs/cache.py +51 -0
  20. biolib/_internal/libs/__init__.py +1 -0
  21. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  22. biolib/_internal/push_application.py +488 -0
  23. biolib/_internal/runtime.py +22 -0
  24. biolib/_internal/string_utils.py +13 -0
  25. biolib/_internal/templates/__init__.py +1 -0
  26. biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
  27. biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
  28. biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
  29. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  30. biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
  31. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  32. biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
  33. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  34. biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
  35. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  36. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  37. biolib/_internal/templates/gui_template/App.tsx +53 -0
  38. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  39. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  40. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  41. biolib/_internal/templates/gui_template/index.css +5 -0
  42. biolib/_internal/templates/gui_template/index.html +13 -0
  43. biolib/_internal/templates/gui_template/index.tsx +10 -0
  44. biolib/_internal/templates/gui_template/package.json +27 -0
  45. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  46. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  47. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  48. biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
  49. biolib/_internal/templates/init_template/Dockerfile +14 -0
  50. biolib/_internal/templates/init_template/requirements.txt +1 -0
  51. biolib/_internal/templates/init_template/run.py +12 -0
  52. biolib/_internal/templates/init_template/run.sh +4 -0
  53. biolib/_internal/templates/templates.py +25 -0
  54. biolib/_internal/tree_utils.py +106 -0
  55. biolib/_internal/utils/__init__.py +65 -0
  56. biolib/_internal/utils/auth.py +46 -0
  57. biolib/_internal/utils/job_url.py +33 -0
  58. biolib/_internal/utils/multinode.py +263 -0
  59. biolib/_runtime/runtime.py +157 -0
  60. biolib/_session/session.py +44 -0
  61. biolib/_shared/__init__.py +0 -0
  62. biolib/_shared/types/__init__.py +74 -0
  63. biolib/_shared/types/account.py +12 -0
  64. biolib/_shared/types/account_member.py +8 -0
  65. biolib/_shared/types/app.py +9 -0
  66. biolib/_shared/types/data_record.py +40 -0
  67. biolib/_shared/types/experiment.py +32 -0
  68. biolib/_shared/types/file_node.py +17 -0
  69. biolib/_shared/types/push.py +6 -0
  70. biolib/_shared/types/resource.py +37 -0
  71. biolib/_shared/types/resource_deploy_key.py +11 -0
  72. biolib/_shared/types/resource_permission.py +14 -0
  73. biolib/_shared/types/resource_version.py +19 -0
  74. biolib/_shared/types/result.py +14 -0
  75. biolib/_shared/types/typing.py +10 -0
  76. biolib/_shared/types/user.py +19 -0
  77. biolib/_shared/utils/__init__.py +7 -0
  78. biolib/_shared/utils/resource_uri.py +75 -0
  79. biolib/api/__init__.py +6 -0
  80. biolib/api/client.py +168 -0
  81. biolib/app/app.py +252 -49
  82. biolib/app/search_apps.py +45 -0
  83. biolib/biolib_api_client/api_client.py +126 -31
  84. biolib/biolib_api_client/app_types.py +24 -4
  85. biolib/biolib_api_client/auth.py +31 -8
  86. biolib/biolib_api_client/biolib_app_api.py +147 -52
  87. biolib/biolib_api_client/biolib_job_api.py +161 -141
  88. biolib/biolib_api_client/job_types.py +21 -5
  89. biolib/biolib_api_client/lfs_types.py +7 -23
  90. biolib/biolib_api_client/user_state.py +56 -0
  91. biolib/biolib_binary_format/__init__.py +1 -4
  92. biolib/biolib_binary_format/file_in_container.py +105 -0
  93. biolib/biolib_binary_format/module_input.py +24 -7
  94. biolib/biolib_binary_format/module_output_v2.py +149 -0
  95. biolib/biolib_binary_format/remote_endpoints.py +34 -0
  96. biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
  97. biolib/biolib_binary_format/saved_job.py +3 -2
  98. biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
  99. biolib/biolib_binary_format/system_status_update.py +3 -2
  100. biolib/biolib_binary_format/utils.py +175 -0
  101. biolib/biolib_docker_client/__init__.py +11 -2
  102. biolib/biolib_errors.py +36 -0
  103. biolib/biolib_logging.py +27 -10
  104. biolib/cli/__init__.py +38 -0
  105. biolib/cli/auth.py +46 -0
  106. biolib/cli/data_record.py +164 -0
  107. biolib/cli/index.py +32 -0
  108. biolib/cli/init.py +421 -0
  109. biolib/cli/lfs.py +101 -0
  110. biolib/cli/push.py +50 -0
  111. biolib/cli/run.py +63 -0
  112. biolib/cli/runtime.py +14 -0
  113. biolib/cli/sdk.py +16 -0
  114. biolib/cli/start.py +56 -0
  115. biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
  116. biolib/compute_node/job_worker/cache_state.py +66 -88
  117. biolib/compute_node/job_worker/cache_types.py +1 -6
  118. biolib/compute_node/job_worker/docker_image_cache.py +112 -37
  119. biolib/compute_node/job_worker/executors/__init__.py +0 -3
  120. biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
  121. biolib/compute_node/job_worker/executors/docker_types.py +9 -1
  122. biolib/compute_node/job_worker/executors/types.py +19 -9
  123. biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
  124. biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
  125. biolib/compute_node/job_worker/job_storage.py +108 -0
  126. biolib/compute_node/job_worker/job_worker.py +397 -212
  127. biolib/compute_node/job_worker/large_file_system.py +87 -38
  128. biolib/compute_node/job_worker/network_alloc.py +99 -0
  129. biolib/compute_node/job_worker/network_buffer.py +240 -0
  130. biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
  131. biolib/compute_node/job_worker/utils.py +9 -24
  132. biolib/compute_node/remote_host_proxy.py +400 -98
  133. biolib/compute_node/utils.py +31 -9
  134. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  135. biolib/compute_node/webserver/proxy_utils.py +28 -0
  136. biolib/compute_node/webserver/webserver.py +130 -44
  137. biolib/compute_node/webserver/webserver_types.py +2 -6
  138. biolib/compute_node/webserver/webserver_utils.py +77 -12
  139. biolib/compute_node/webserver/worker_thread.py +183 -42
  140. biolib/experiments/__init__.py +0 -0
  141. biolib/experiments/experiment.py +356 -0
  142. biolib/jobs/__init__.py +1 -0
  143. biolib/jobs/job.py +741 -0
  144. biolib/jobs/job_result.py +185 -0
  145. biolib/jobs/types.py +50 -0
  146. biolib/py.typed +0 -0
  147. biolib/runtime/__init__.py +14 -0
  148. biolib/sdk/__init__.py +91 -0
  149. biolib/tables.py +34 -0
  150. biolib/typing_utils.py +2 -7
  151. biolib/user/__init__.py +1 -0
  152. biolib/user/sign_in.py +54 -0
  153. biolib/utils/__init__.py +162 -0
  154. biolib/utils/cache_state.py +94 -0
  155. biolib/utils/multipart_uploader.py +194 -0
  156. biolib/utils/seq_util.py +150 -0
  157. biolib/utils/zip/remote_zip.py +640 -0
  158. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  159. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  160. {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  161. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  162. README.md +0 -17
  163. biolib/app/app_result.py +0 -68
  164. biolib/app/utils.py +0 -62
  165. biolib/biolib-js/0-biolib.worker.js +0 -1
  166. biolib/biolib-js/1-biolib.worker.js +0 -1
  167. biolib/biolib-js/2-biolib.worker.js +0 -1
  168. biolib/biolib-js/3-biolib.worker.js +0 -1
  169. biolib/biolib-js/4-biolib.worker.js +0 -1
  170. biolib/biolib-js/5-biolib.worker.js +0 -1
  171. biolib/biolib-js/6-biolib.worker.js +0 -1
  172. biolib/biolib-js/index.html +0 -10
  173. biolib/biolib-js/main-biolib.js +0 -1
  174. biolib/biolib_api_client/biolib_account_api.py +0 -21
  175. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
  176. biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
  177. biolib/biolib_binary_format/module_output.py +0 -58
  178. biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
  179. biolib/biolib_push.py +0 -114
  180. biolib/cli.py +0 -203
  181. biolib/cli_utils.py +0 -273
  182. biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
  183. biolib/compute_node/enclave/__init__.py +0 -2
  184. biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
  185. biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
  186. biolib/compute_node/job_worker/executors/base_executor.py +0 -18
  187. biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
  188. biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
  189. biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
  190. biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
  191. biolib/lfs.py +0 -196
  192. biolib/pyppeteer/.circleci/config.yml +0 -100
  193. biolib/pyppeteer/.coveragerc +0 -3
  194. biolib/pyppeteer/.gitignore +0 -89
  195. biolib/pyppeteer/.pre-commit-config.yaml +0 -28
  196. biolib/pyppeteer/CHANGES.md +0 -253
  197. biolib/pyppeteer/CONTRIBUTING.md +0 -26
  198. biolib/pyppeteer/LICENSE +0 -12
  199. biolib/pyppeteer/README.md +0 -137
  200. biolib/pyppeteer/docs/Makefile +0 -177
  201. biolib/pyppeteer/docs/_static/custom.css +0 -28
  202. biolib/pyppeteer/docs/_templates/layout.html +0 -10
  203. biolib/pyppeteer/docs/changes.md +0 -1
  204. biolib/pyppeteer/docs/conf.py +0 -299
  205. biolib/pyppeteer/docs/index.md +0 -21
  206. biolib/pyppeteer/docs/make.bat +0 -242
  207. biolib/pyppeteer/docs/reference.md +0 -211
  208. biolib/pyppeteer/docs/server.py +0 -60
  209. biolib/pyppeteer/poetry.lock +0 -1699
  210. biolib/pyppeteer/pyppeteer/__init__.py +0 -135
  211. biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
  212. biolib/pyppeteer/pyppeteer/browser.py +0 -401
  213. biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
  214. biolib/pyppeteer/pyppeteer/command.py +0 -22
  215. biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
  216. biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
  217. biolib/pyppeteer/pyppeteer/coverage.py +0 -346
  218. biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
  219. biolib/pyppeteer/pyppeteer/dialog.py +0 -79
  220. biolib/pyppeteer/pyppeteer/domworld.py +0 -597
  221. biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
  222. biolib/pyppeteer/pyppeteer/errors.py +0 -48
  223. biolib/pyppeteer/pyppeteer/events.py +0 -63
  224. biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
  225. biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
  226. biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
  227. biolib/pyppeteer/pyppeteer/helpers.py +0 -245
  228. biolib/pyppeteer/pyppeteer/input.py +0 -371
  229. biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
  230. biolib/pyppeteer/pyppeteer/launcher.py +0 -683
  231. biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
  232. biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
  233. biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
  234. biolib/pyppeteer/pyppeteer/multimap.py +0 -82
  235. biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
  236. biolib/pyppeteer/pyppeteer/options.py +0 -8
  237. biolib/pyppeteer/pyppeteer/page.py +0 -1728
  238. biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
  239. biolib/pyppeteer/pyppeteer/target.py +0 -147
  240. biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
  241. biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
  242. biolib/pyppeteer/pyppeteer/tracing.py +0 -93
  243. biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
  244. biolib/pyppeteer/pyppeteer/util.py +0 -18
  245. biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
  246. biolib/pyppeteer/pyppeteer/worker.py +0 -101
  247. biolib/pyppeteer/pyproject.toml +0 -97
  248. biolib/pyppeteer/spell.txt +0 -137
  249. biolib/pyppeteer/tox.ini +0 -72
  250. biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
  251. biolib/start_cli.py +0 -7
  252. biolib/utils.py +0 -47
  253. biolib/validators/validate_app_version.py +0 -183
  254. biolib/validators/validate_argument.py +0 -134
  255. biolib/validators/validate_module.py +0 -323
  256. biolib/validators/validate_zip_file.py +0 -40
  257. biolib/validators/validator_utils.py +0 -103
  258. pybiolib-0.2.951.dist-info/LICENSE +0 -21
  259. pybiolib-0.2.951.dist-info/METADATA +0 -61
  260. pybiolib-0.2.951.dist-info/RECORD +0 -153
  261. pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
  262. /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
@@ -1,17 +1,25 @@
1
+ import base64
2
+ import os
1
3
  import random
4
+ import shutil
5
+ import socket
2
6
  import sys
3
- import time
4
7
  import threading
5
- import socket
8
+ import time
6
9
  from queue import Queue
10
+ from typing import Optional
7
11
 
12
+ from biolib import api, utils
13
+ from biolib.biolib_binary_format import ModuleOutputV2, SystemException, SystemStatusUpdate
14
+ from biolib.biolib_binary_format.utils import LocalFileIndexableBuffer
15
+ from biolib.biolib_logging import logger, logger_no_user_data
16
+ from biolib.compute_node.cloud_utils import CloudUtils
8
17
  from biolib.compute_node.job_worker import JobWorkerProcess
18
+ from biolib.compute_node.job_worker.job_storage import JobStorage
9
19
  from biolib.compute_node.socker_listener_thread import SocketListenerThread
10
20
  from biolib.compute_node.socket_sender_thread import SocketSenderThread
21
+ from biolib.compute_node.utils import SystemExceptionCodes, WorkerThreadException, get_package_type
11
22
  from biolib.compute_node.webserver import webserver_utils
12
- from biolib.biolib_binary_format import AttestationDocument, SystemStatusUpdate, SystemException
13
- from biolib.compute_node.utils import get_package_type, WorkerThreadException, SystemExceptionCodes
14
- from biolib.biolib_logging import logger
15
23
 
16
24
  SOCKET_HOST = '127.0.0.1'
17
25
 
@@ -30,37 +38,86 @@ class WorkerThread(threading.Thread):
30
38
  self._sender_thread = None
31
39
  self._start_and_connect_to_compute_process()
32
40
 
33
- logger.debug(f"WorkerThread connected to port {self._socket_port}")
41
+ logger.debug(f'WorkerThread connected to port {self._socket_port}')
34
42
 
35
43
  except Exception as exception:
36
- raise WorkerThreadException(exception, SystemExceptionCodes.FAILED_TO_INITIALIZE_WORKER_THREAD.value,
37
- worker_thread=self) from exception
44
+ logger_no_user_data.error(exception)
45
+ raise WorkerThreadException(
46
+ exception,
47
+ SystemExceptionCodes.FAILED_TO_INITIALIZE_WORKER_THREAD.value,
48
+ worker_thread=self,
49
+ ) from exception
50
+
51
+ @property
52
+ def _job_uuid(self):
53
+ return self.compute_state['job_id']
54
+
55
+ @property
56
+ def _job_temporary_dir(self):
57
+ return self.compute_state['job_temporary_dir']
58
+
59
+ def _upload_module_output_and_get_exit_code(self) -> Optional[int]:
60
+ exit_code = None
61
+ try:
62
+ module_output_path = os.path.join(
63
+ self._job_temporary_dir,
64
+ JobStorage.module_output_file_name,
65
+ )
66
+ if os.path.exists(module_output_path):
67
+ module_output = ModuleOutputV2(buffer=LocalFileIndexableBuffer(filename=module_output_path))
68
+ exit_code = module_output.get_exit_code()
69
+ logger_no_user_data.debug(f'Got exit code: {exit_code}')
70
+ if utils.IS_RUNNING_IN_CLOUD:
71
+ JobStorage.upload_module_output(
72
+ job_temporary_dir=self._job_temporary_dir,
73
+ job_uuid=self._job_uuid,
74
+ )
75
+ except Exception as error:
76
+ logger_no_user_data.error(f'Could not upload module output or get exit code: {error}')
77
+ return exit_code
38
78
 
39
79
  def run(self):
40
80
  try:
41
81
  while True:
42
82
  package = self.compute_state['received_messages_queue'].get()
83
+ if package == b'CANCEL_JOB':
84
+ logger_no_user_data.info(f'Job "{self._job_uuid}" got cancel signal')
85
+ self.compute_state['status']['error_code'] = SystemExceptionCodes.CANCELLED_BY_USER.value
86
+ self.terminate()
87
+
43
88
  package_type = get_package_type(package)
44
89
 
45
- if package_type == 'AttestationDocument':
46
- self.compute_state['attestation_document'] = AttestationDocument(package).deserialize()
90
+ if package_type == 'StdoutAndStderr':
91
+ self.compute_state['status']['stdout_and_stderr_packages_b64'].append(
92
+ base64.b64encode(package).decode()
93
+ )
47
94
 
48
95
  elif package_type == 'SystemStatusUpdate':
49
96
  progress, log_message = SystemStatusUpdate(package).deserialize()
50
- self.compute_state['status']['status_updates'].append({'progress': progress,
51
- 'log_message': log_message})
97
+ self._set_status_update(progress, log_message)
98
+
99
+ # If 'Computation Finished'
100
+ if progress == 94:
101
+ self.compute_state['exit_code'] = self._upload_module_output_and_get_exit_code()
102
+ self._set_status_update(progress=95, log_message='Result Ready')
103
+ self.compute_state['is_completed'] = True
104
+ self.terminate()
52
105
 
53
106
  elif package_type == 'SystemException':
54
107
  error_code = SystemException(package).deserialize()
55
108
  self.compute_state['status']['error_code'] = error_code
56
- logger.debug("Hit error. Terminating Worker Thread and Compute Process")
109
+ logger.debug('Hit error. Terminating Worker Thread and Compute Process')
110
+ self.compute_state['progress'] = 95
57
111
  self.terminate()
58
112
 
59
- elif package_type == 'ModuleOutput' or package_type == 'AesEncryptedPackage':
60
- self.compute_state['result'] = package
61
- self.compute_state['status']['status_updates'].append({'progress': 95,
62
- 'log_message': 'Result Ready'})
63
- self.terminate()
113
+ elif package_type == 'AesEncryptedPackage':
114
+ if self.compute_state['progress'] == 94: # Check if encrypted package is ModuleOutput
115
+ self.compute_state['result'] = package
116
+ self.terminate()
117
+ else: # Else it is StdoutAndStderr
118
+ self.compute_state['status']['stdout_and_stderr_packages_b64'].append(
119
+ base64.b64encode(package).decode()
120
+ )
64
121
 
65
122
  else:
66
123
  raise Exception(f'Package type from child was not recognized: {package}')
@@ -68,30 +125,40 @@ class WorkerThread(threading.Thread):
68
125
  self.compute_state['received_messages_queue'].task_done()
69
126
 
70
127
  except Exception as exception:
71
- raise WorkerThreadException(exception, SystemExceptionCodes.FAILED_TO_HANDLE_PACKAGE_IN_WORKER_THREAD.value,
72
- worker_thread=self) from exception
128
+ raise WorkerThreadException(
129
+ exception,
130
+ SystemExceptionCodes.FAILED_TO_HANDLE_PACKAGE_IN_WORKER_THREAD.value,
131
+ worker_thread=self,
132
+ ) from exception
133
+
134
+ def _set_status_update(self, progress: int, log_message: str) -> None:
135
+ status_update = dict(progress=progress, log_message=log_message)
136
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" got system log: {status_update}')
137
+
138
+ self.compute_state['progress'] = progress
139
+ self.compute_state['status']['status_updates'].append(status_update)
73
140
 
74
141
  def _start_and_connect_to_compute_process(self):
75
142
  self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
76
- logger.debug(f"Trying to bind to socket on {SOCKET_HOST}:{self._socket_port}")
143
+ logger_no_user_data.debug(f'Trying to bind to socket on {SOCKET_HOST}:{self._socket_port}')
77
144
  self._socket.bind((SOCKET_HOST, self._socket_port))
78
145
 
79
- logger.debug(f"Starting to listen to socket on port {self._socket_port}")
146
+ logger_no_user_data.debug(f'Starting to listen to socket on port {self._socket_port}')
80
147
  self._socket.listen()
81
- logger.debug(f"Listening to port {self._socket_port}")
148
+ logger_no_user_data.debug(f'Listening to port {self._socket_port}')
82
149
 
83
150
  received_messages_queue = Queue()
84
151
  messages_to_send_queue = Queue()
85
152
 
86
153
  # Starting a thread for accepting connections before starting the process that should to connect to the socket
87
- logger.debug("Starting connection thread")
88
- self._connection_thread = threading.Thread(target=self._accept_new_socket_connection, args=[
89
- received_messages_queue,
90
- messages_to_send_queue
91
- ])
154
+ logger_no_user_data.debug('Starting connection thread')
155
+ self._connection_thread = threading.Thread(
156
+ target=self._accept_new_socket_connection,
157
+ args=[received_messages_queue, messages_to_send_queue],
158
+ )
92
159
  self._connection_thread.start()
93
- logger.debug("Started connection thread")
94
- logger.debug("Starting compute process")
160
+ logger_no_user_data.debug('Started connection thread')
161
+ logger_no_user_data.debug('Starting compute process')
95
162
 
96
163
  self._job_worker_process = JobWorkerProcess(socket_port=self._socket_port, log_level=logger.level)
97
164
  self._job_worker_process.start()
@@ -108,28 +175,102 @@ class WorkerThread(threading.Thread):
108
175
  self._sender_thread = SocketSenderThread(self._connection, messages_to_send_queue)
109
176
  self._sender_thread.start()
110
177
 
111
- def terminate(self):
178
+ def terminate(self) -> None:
179
+ cloud_job_uuid = self.compute_state['cloud_job_id']
180
+ system_exception_code = self.compute_state['status'].get('error_code')
181
+
182
+ if utils.IS_RUNNING_IN_CLOUD and system_exception_code not in [
183
+ SystemExceptionCodes.CANCELLED_BY_USER.value,
184
+ SystemExceptionCodes.EXCEEDED_MAX_JOB_RUNTIME.value,
185
+ ]:
186
+ CloudUtils.finish_cloud_job(
187
+ cloud_job_id=cloud_job_uuid,
188
+ system_exception_code=system_exception_code,
189
+ exit_code=self.compute_state.get('exit_code', None),
190
+ )
191
+
192
+ deregistered_due_to_error = False
112
193
  if self._job_worker_process:
113
- logger.debug(f'Terminating JobWorkerProcess with PID {self._job_worker_process.pid}')
194
+ logger_no_user_data.debug(
195
+ f'Job "{self._job_uuid}" terminating JobWorkerProcess with PID {self._job_worker_process.pid}'
196
+ )
114
197
  self._job_worker_process.terminate()
115
198
 
199
+ clean_up_timeout_in_seconds = 600
200
+ for _ in range(clean_up_timeout_in_seconds):
201
+ if self._job_worker_process.exitcode is not None:
202
+ logger_no_user_data.debug(
203
+ f'Job "{self._job_uuid}" worker process exitcode {self._job_worker_process.exitcode}'
204
+ )
205
+ break
206
+ else:
207
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" waiting for worker process to exit...')
208
+ time.sleep(1)
209
+
210
+ if self._job_worker_process.exitcode is None:
211
+ # TODO: Figure out if more error handling is necessary here
212
+ logger_no_user_data.error(
213
+ f'Job {self._job_uuid} worker process did not exit within {clean_up_timeout_in_seconds} seconds'
214
+ )
215
+ if utils.IS_RUNNING_IN_CLOUD:
216
+ logger_no_user_data.error('Deregistering compute node...')
217
+ CloudUtils.deregister(error='job_cleanup_timed_out')
218
+ deregistered_due_to_error = True
219
+
220
+ elif system_exception_code in [
221
+ SystemExceptionCodes.CANCELLED_BY_USER.value,
222
+ SystemExceptionCodes.EXCEEDED_MAX_JOB_RUNTIME.value,
223
+ ]:
224
+ self.compute_state['exit_code'] = self._upload_module_output_and_get_exit_code()
225
+ CloudUtils.finish_cloud_job(
226
+ cloud_job_id=cloud_job_uuid,
227
+ system_exception_code=system_exception_code,
228
+ exit_code=self.compute_state.get('exit_code', None),
229
+ )
230
+
231
+ # Delete result as error occurred
232
+ if system_exception_code and os.path.exists(self._job_temporary_dir):
233
+ shutil.rmtree(self._job_temporary_dir)
234
+
116
235
  if self._socket:
117
236
  self._socket.close()
118
237
 
119
238
  if self._connection:
120
239
  self._connection.close()
121
240
 
122
- if self.compute_state['result']:
123
- seconds_to_sleep = 60
124
- job_id = self.compute_state['job_id']
125
- logger.debug(f'Worker thread sleeping for {seconds_to_sleep} seconds before cleaning up job {job_id}')
126
- # sleep to see if the user has begun downloading the result
241
+ if self.compute_state['progress'] == 95:
242
+ seconds_to_sleep = 5
243
+ logger_no_user_data.debug(
244
+ f'Job "{self._job_uuid}" worker thread sleeping for {seconds_to_sleep} seconds before cleaning up'
245
+ )
246
+ # sleep to let the user start downloading the result
127
247
  time.sleep(seconds_to_sleep)
128
- if self.compute_state['result']:
129
- logger.debug(f'Cleaning up job {job_id} as result was not fetched within {seconds_to_sleep} seconds')
130
- webserver_utils.finalize_and_clean_up_compute_job(job_id)
248
+
249
+ compute_state_dict = webserver_utils.JOB_ID_TO_COMPUTE_STATE_DICT
250
+ if self._job_uuid in compute_state_dict:
251
+ # Delete result as user has not started download
252
+ if compute_state_dict[self._job_uuid]['progress'] == 95 and os.path.exists(self._job_temporary_dir):
253
+ shutil.rmtree(self._job_temporary_dir)
254
+
255
+ webserver_utils.JOB_ID_TO_COMPUTE_STATE_DICT.pop(self._job_uuid)
256
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" was cleaned up')
257
+ else:
258
+ logger_no_user_data.debug(
259
+ f'Job "{self._job_uuid}" could not be found, maybe it has already been cleaned up'
260
+ )
261
+
262
+ if utils.IS_RUNNING_IN_CLOUD:
263
+ config = CloudUtils.get_webserver_config()
264
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" reporting CloudJob "{cloud_job_uuid}" as cleaned up...')
265
+ api.client.post(
266
+ path=f'/internal/compute-nodes/cloud-jobs/{cloud_job_uuid}/cleaned-up/',
267
+ headers={'Compute-Node-Auth-Token': config['compute_node_info']['auth_token']},
268
+ )
269
+
270
+ if deregistered_due_to_error:
271
+ CloudUtils.shutdown() # shutdown now
131
272
  else:
132
- logger.debug(f'Job {job_id} already cleaned up')
273
+ webserver_utils.update_auto_shutdown_time()
133
274
 
134
- logger.debug("Terminating Worker Thread")
275
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" worker thread exiting...')
135
276
  sys.exit()
File without changes
@@ -0,0 +1,356 @@
1
+ import time
2
+ from collections import OrderedDict
3
+ from pathlib import Path
4
+
5
+ from biolib import api
6
+ from biolib._internal.utils import open_browser_window_from_notebook
7
+ from biolib._shared.types import DeprecatedExperimentDict, ExperimentDict, ResourceDetailedDict
8
+ from biolib.api.client import ApiClient
9
+ from biolib.biolib_api_client import BiolibApiClient
10
+ from biolib.biolib_errors import BioLibError
11
+ from biolib.jobs.job import Job
12
+ from biolib.jobs.job_result import PathFilter
13
+ from biolib.jobs.types import JobsPaginatedResponse
14
+ from biolib.tables import BioLibTable
15
+ from biolib.typing_utils import Dict, List, Optional, Union
16
+ from biolib.utils import IS_RUNNING_IN_NOTEBOOK
17
+
18
+
19
+ class Experiment:
20
+ _BIOLIB_EXPERIMENTS: List['Experiment'] = []
21
+
22
+ # Columns to print in table when showing Job
23
+ _table_columns_to_row_map = OrderedDict(
24
+ {
25
+ 'Name': {'key': 'name', 'params': {}},
26
+ 'Job Count': {'key': 'job_count', 'params': {}},
27
+ 'Created At': {'key': 'created_at', 'params': {}},
28
+ }
29
+ )
30
+
31
+ def __init__(
32
+ self,
33
+ uri: str,
34
+ _resource_dict: Optional[ResourceDetailedDict] = None,
35
+ _api_client: Optional[ApiClient] = None,
36
+ ):
37
+ self._api_client = _api_client or api.client
38
+ self._resource_dict: ResourceDetailedDict = _resource_dict or self._get_or_create_resource_dict(uri)
39
+
40
+ def __enter__(self):
41
+ Experiment._BIOLIB_EXPERIMENTS.append(self)
42
+
43
+ def __exit__(self, type, value, traceback): # pylint: disable=redefined-builtin
44
+ Experiment._BIOLIB_EXPERIMENTS.pop()
45
+
46
+ def __str__(self):
47
+ return f'Experiment: {self.uri}'
48
+
49
+ def __repr__(self):
50
+ return f'Experiment: {self.uri}'
51
+
52
+ @property
53
+ def uuid(self) -> str:
54
+ return self._resource_dict['uuid']
55
+
56
+ @property
57
+ def id(self) -> str:
58
+ return self.uuid
59
+
60
+ @property
61
+ def name(self) -> str:
62
+ return self._resource_dict['name']
63
+
64
+ @property
65
+ def uri(self) -> str:
66
+ return self._resource_dict['uri']
67
+
68
+ @property
69
+ def _experiment_dict(self) -> DeprecatedExperimentDict:
70
+ if not self._resource_dict['experiment']:
71
+ raise ValueError(f'Resource {self.uri} is not an Experiment')
72
+
73
+ return self._resource_dict['experiment']
74
+
75
+ @staticmethod
76
+ def get_experiment_in_context() -> Optional['Experiment']:
77
+ if Experiment._BIOLIB_EXPERIMENTS:
78
+ return Experiment._BIOLIB_EXPERIMENTS[-1]
79
+ return None
80
+
81
+ # Prints a table listing info about experiments accessible to the user
82
+ @staticmethod
83
+ def show_experiments(count: int = 25) -> None:
84
+ pagniated_response = api.client.get(path='/experiments/', params={'page_size': str(count)}).json()
85
+ experiment_dicts: List[ExperimentDict] = pagniated_response['results']
86
+ BioLibTable(
87
+ columns_to_row_map=Experiment._table_columns_to_row_map,
88
+ rows=experiment_dicts,
89
+ title='Experiments',
90
+ ).print_table()
91
+
92
+ @staticmethod
93
+ def get_by_uri(uri: str) -> 'Experiment':
94
+ query_param_key = 'uri' if '/' in uri else 'name'
95
+ resource_dict: ResourceDetailedDict = api.client.get('/resource/', params={query_param_key: uri}).json()
96
+ if not resource_dict['experiment']:
97
+ raise ValueError(f'Resource {uri} is not an experiment')
98
+
99
+ return Experiment(uri=resource_dict['uri'], _resource_dict=resource_dict)
100
+
101
+ def wait(self) -> None:
102
+ self._refetch()
103
+ while self._experiment_dict['job_running_count'] > 0:
104
+ print(f"Waiting for {self._experiment_dict['job_running_count']} jobs to finish", end='\r')
105
+ time.sleep(5)
106
+ self._refetch()
107
+
108
+ print(f'All jobs of experiment {self.name} have finished')
109
+
110
+ def add_job(self, job: Optional[Union[Job, str]] = None, job_id: Optional[str] = None) -> None:
111
+ if job_id is not None:
112
+ print(
113
+ 'WARNING: job_id argument is deprecated and may be removed in a future release.'
114
+ 'Please use job argument instead.'
115
+ )
116
+ elif isinstance(job, Job):
117
+ job_id = job.id
118
+ elif isinstance(job, str):
119
+ job_id = job
120
+ elif job is None and job_id is None:
121
+ raise BioLibError('A job ID or job object must be provided to add job')
122
+ self._api_client.post(
123
+ path=f'/experiments/{self.uuid}/jobs/',
124
+ data={'job_uuid': job_id},
125
+ )
126
+
127
+ def remove_job(self, job: Union[Job, str]) -> None:
128
+ if isinstance(job, Job):
129
+ job_id = job.id
130
+ elif isinstance(job, str):
131
+ job_id = job
132
+ else:
133
+ raise BioLibError('A job ID or job object must be provided to remove job')
134
+
135
+ self._api_client.delete(path=f'/experiments/{self.uuid}/jobs/{job_id}/')
136
+
137
+ def mount_files(self, mount_path: str) -> None:
138
+ try:
139
+ # Only attempt to import FUSE dependencies when strictly necessary
140
+ from biolib._internal.fuse_mount import ( # pylint: disable=import-outside-toplevel
141
+ ExperimentFuseMount as _ExperimentFuseMount,
142
+ )
143
+ except ImportError as error:
144
+ raise ImportError(
145
+ 'Failed to import FUSE mounting utils. Please ensure FUSE is installed on your system.'
146
+ ) from error
147
+
148
+ _ExperimentFuseMount.mount_experiment(experiment=self, mount_path=mount_path)
149
+
150
+ def export_job_list(self, export_format='dicts'):
151
+ valid_formats = ('dicts', 'dataframe')
152
+ if export_format not in valid_formats:
153
+ raise BioLibError(f'Format can only be one of {valid_formats}')
154
+
155
+ job_dict_list = [job.to_dict() for job in self.get_jobs()]
156
+ if export_format == 'dicts':
157
+ return job_dict_list
158
+
159
+ elif export_format == 'dataframe':
160
+ try:
161
+ import pandas as pd # type: ignore # pylint: disable=import-outside-toplevel
162
+ except ImportError as error:
163
+ raise ImportError(
164
+ 'Pandas must be installed to use this method. '
165
+ 'Alternatively, use .get_jobs() to get a list of job objects.'
166
+ ) from error
167
+
168
+ jobs_df = pd.DataFrame.from_dict(job_dict_list)
169
+ jobs_df.started_at = pd.to_datetime(jobs_df.started_at)
170
+ jobs_df.created_at = pd.to_datetime(jobs_df.created_at)
171
+ jobs_df.finished_at = pd.to_datetime(jobs_df.finished_at)
172
+ return jobs_df
173
+
174
+ # Prints a table containing info about this experiment
175
+ def show(self) -> None:
176
+ BioLibTable(
177
+ columns_to_row_map=Experiment._table_columns_to_row_map,
178
+ rows=[dict(**self._experiment_dict, name=self.name, created_at=self._resource_dict['created_at'])],
179
+ title=f'Experiment: {self.name}',
180
+ ).print_table()
181
+
182
+ # Prints a table listing info about the jobs in this experiment
183
+ def show_jobs(self) -> None:
184
+ response: JobsPaginatedResponse = self._api_client.get(
185
+ path=f'/experiments/{self.uuid}/jobs/',
186
+ params=dict(page_size=10),
187
+ ).json()
188
+ jobs: List[Job] = [Job(job_dict) for job_dict in response['results']]
189
+
190
+ BioLibTable(
191
+ columns_to_row_map=Job.table_columns_to_row_map,
192
+ rows=[job._job_dict for job in jobs], # pylint: disable=protected-access
193
+ title=f'Jobs in experiment: "{self.name}"',
194
+ ).print_table()
195
+
196
+ def get_jobs(self, status: Optional[str] = None) -> List[Job]:
197
+ job_states = ['in_progress', 'completed', 'failed', 'cancelled']
198
+ if status is not None and status not in job_states:
199
+ raise Exception('Invalid status filter')
200
+
201
+ url = f'/experiments/{self.uuid}/jobs/'
202
+ params: Dict[str, Union[str, int]] = dict(page_size=1_000)
203
+ if status:
204
+ params['status'] = status
205
+
206
+ response: JobsPaginatedResponse = self._api_client.get(url, params=params).json()
207
+ jobs: List[Job] = [Job(job_dict) for job_dict in response['results']]
208
+
209
+ for page_number in range(2, response['page_count'] + 1):
210
+ page_response: JobsPaginatedResponse = self._api_client.get(
211
+ url, params=dict(**params, page=page_number)
212
+ ).json()
213
+ jobs.extend([Job(job_dict) for job_dict in page_response['results']])
214
+
215
+ return jobs
216
+
217
+ def get_results(self, status: Optional[str] = None) -> List[Job]:
218
+ r"""Get a list of results in this experiment, optionally filtered by status.
219
+
220
+ Args:
221
+ status (str, optional): Filter results by status. One of:
222
+ 'in_progress', 'completed', 'failed', 'cancelled'
223
+
224
+ Returns:
225
+ List[Job]: List of result objects in this experiment
226
+
227
+ Example::
228
+
229
+ >>> # Get all results in the experiment
230
+ >>> results = experiment.get_results()
231
+ >>> # Get only completed results
232
+ >>> completed_results = experiment.get_results(status='completed')
233
+ """
234
+ return self.get_jobs(status=status)
235
+
236
+ def save_completed_results(
237
+ self,
238
+ output_dir: Optional[str] = None,
239
+ path_filter: Optional[PathFilter] = None,
240
+ skip_file_if_exists: bool = False,
241
+ overwrite: bool = False,
242
+ ) -> None:
243
+ r"""Save all completed results in this experiment to local folders.
244
+
245
+ Creates a folder structure with the experiment name as the root directory,
246
+ containing a subfolder for each completed result. Only results with
247
+ 'completed' status will be saved.
248
+
249
+ Args:
250
+ output_dir (str, optional): Base directory where the experiment folder
251
+ will be created. If None, uses the current working directory.
252
+ path_filter (PathFilter, optional): Filter to select which files in the results to save.
253
+ Can be a glob pattern string or a callable function.
254
+ skip_file_if_exists (bool, optional): Whether to skip files that already exist
255
+ locally instead of raising an error. Defaults to False.
256
+ overwrite (bool, optional): Whether to overwrite existing files.
257
+ Defaults to False.
258
+
259
+ Example::
260
+
261
+ >>> # Save all completed results to current directory
262
+ >>> experiment.save_completed_results()
263
+ >>> # This creates: ./experiment_name/result_1/, ./experiment_name/result_2/, etc.
264
+
265
+ >>> # Save to specific directory
266
+ >>> experiment.save_completed_results(output_dir="/path/to/save")
267
+ >>> # This creates: /path/to/save/experiment_name/result_1/, etc.
268
+ """
269
+ base_dir = Path(output_dir) if output_dir else Path.cwd()
270
+
271
+ if base_dir == Path('/'):
272
+ raise BioLibError("Cannot save experiment results to root directory '/'")
273
+
274
+ experiment_folder = base_dir / self.name
275
+ experiment_folder.mkdir(parents=True, exist_ok=True)
276
+
277
+ completed_results: List[Job] = []
278
+ failed_results = False
279
+ print('Getting experiment status...')
280
+ for result in self.get_results():
281
+ if result.get_status() == 'completed':
282
+ completed_results.append(result)
283
+ elif result.get_status() != 'in_progress':
284
+ failed_results = True
285
+
286
+ if failed_results:
287
+ print(
288
+ 'WARNING: Found failed or cancelled results in the experiment. '
289
+ 'Please verify you have all your results, and consider removing the failed ones.'
290
+ )
291
+ if not completed_results:
292
+ print(f"No completed results found in experiment '{self.name}'")
293
+ return
294
+
295
+ print(f"Saving {len(completed_results)} completed results from experiment '{self.name}' to {experiment_folder}")
296
+
297
+ for result in completed_results:
298
+ result_name = result.get_name()
299
+ result_folder = experiment_folder / result_name
300
+
301
+ result_folder.mkdir(parents=True, exist_ok=True)
302
+
303
+ result.save_files(
304
+ output_dir=str(result_folder),
305
+ path_filter=path_filter,
306
+ skip_file_if_exists=skip_file_if_exists,
307
+ overwrite=overwrite,
308
+ )
309
+
310
+ def rename(self, destination: str) -> None:
311
+ r"""Rename this experiment to a new URI.
312
+
313
+ Args:
314
+ destination (str): The new URI for the experiment
315
+ (e.g., 'username/new-experiment-name').
316
+
317
+ Example::
318
+
319
+ >>> experiment = biolib.get_experiment(uri='username/my-experiment')
320
+ >>> experiment.rename('username/my-renamed-experiment')
321
+ >>> print(experiment.uri)
322
+ 'username/my-renamed-experiment'
323
+ """
324
+ self._api_client.patch(f'/resources/{self.uuid}/', data={'uri': destination})
325
+ self._refetch()
326
+
327
+ def _get_resource_dict_by_uuid(self, uuid: str) -> ResourceDetailedDict:
328
+ resource_dict: ResourceDetailedDict = self._api_client.get(f'/resources/{uuid}/').json()
329
+ if not resource_dict['experiment']:
330
+ raise ValueError('Resource from URI is not an experiment')
331
+
332
+ return resource_dict
333
+
334
+ def _get_or_create_resource_dict(self, uri: str) -> ResourceDetailedDict:
335
+ response_dict = self._api_client.post(path='/experiments/', data={'uri' if '/' in uri else 'name': uri}).json()
336
+ return self._get_resource_dict_by_uuid(uuid=response_dict['uuid'])
337
+
338
+ def _refetch(self) -> None:
339
+ self._resource_dict = self._get_resource_dict_by_uuid(uuid=self._resource_dict['uuid'])
340
+
341
+ def open_browser(self) -> None:
342
+ """Open a browser window to view this experiment.
343
+
344
+ If running in a notebook, this will attempt to open a new browser window.
345
+ Otherwise, it will print a URL that you can copy and paste.
346
+ """
347
+ api_client = BiolibApiClient.get()
348
+ url_to_open = f'{api_client.base_url}/experiments/{self.id}/'
349
+
350
+ if IS_RUNNING_IN_NOTEBOOK:
351
+ print(f'Opening experiment page at: {url_to_open}')
352
+ print('If your browser does not open automatically, click on the link above.')
353
+ open_browser_window_from_notebook(url_to_open)
354
+ else:
355
+ print('Please copy and paste the following link into your browser:')
356
+ print(url_to_open)
@@ -0,0 +1 @@
1
+ from biolib.jobs.job import Job