pybiolib 0.2.951__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. biolib/__init__.py +357 -11
  2. biolib/_data_record/data_record.py +380 -0
  3. biolib/_index/__init__.py +0 -0
  4. biolib/_index/index.py +55 -0
  5. biolib/_index/query_result.py +103 -0
  6. biolib/_internal/__init__.py +0 -0
  7. biolib/_internal/add_copilot_prompts.py +58 -0
  8. biolib/_internal/add_gui_files.py +81 -0
  9. biolib/_internal/data_record/__init__.py +1 -0
  10. biolib/_internal/data_record/data_record.py +85 -0
  11. biolib/_internal/data_record/push_data.py +116 -0
  12. biolib/_internal/data_record/remote_storage_endpoint.py +43 -0
  13. biolib/_internal/errors.py +5 -0
  14. biolib/_internal/file_utils.py +125 -0
  15. biolib/_internal/fuse_mount/__init__.py +1 -0
  16. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  17. biolib/_internal/http_client.py +159 -0
  18. biolib/_internal/lfs/__init__.py +1 -0
  19. biolib/_internal/lfs/cache.py +51 -0
  20. biolib/_internal/libs/__init__.py +1 -0
  21. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  22. biolib/_internal/push_application.py +488 -0
  23. biolib/_internal/runtime.py +22 -0
  24. biolib/_internal/string_utils.py +13 -0
  25. biolib/_internal/templates/__init__.py +1 -0
  26. biolib/_internal/templates/copilot_template/.github/instructions/general-app-knowledge.instructions.md +10 -0
  27. biolib/_internal/templates/copilot_template/.github/instructions/style-general.instructions.md +20 -0
  28. biolib/_internal/templates/copilot_template/.github/instructions/style-python.instructions.md +16 -0
  29. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  30. biolib/_internal/templates/copilot_template/.github/prompts/biolib_app_inputs.prompt.md +11 -0
  31. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  32. biolib/_internal/templates/copilot_template/.github/prompts/biolib_run_apps.prompt.md +12 -0
  33. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  34. biolib/_internal/templates/github_workflow_template/.github/workflows/biolib.yml +21 -0
  35. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  36. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  37. biolib/_internal/templates/gui_template/App.tsx +53 -0
  38. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  39. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  40. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  41. biolib/_internal/templates/gui_template/index.css +5 -0
  42. biolib/_internal/templates/gui_template/index.html +13 -0
  43. biolib/_internal/templates/gui_template/index.tsx +10 -0
  44. biolib/_internal/templates/gui_template/package.json +27 -0
  45. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  46. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  47. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  48. biolib/_internal/templates/init_template/.biolib/config.yml +19 -0
  49. biolib/_internal/templates/init_template/Dockerfile +14 -0
  50. biolib/_internal/templates/init_template/requirements.txt +1 -0
  51. biolib/_internal/templates/init_template/run.py +12 -0
  52. biolib/_internal/templates/init_template/run.sh +4 -0
  53. biolib/_internal/templates/templates.py +25 -0
  54. biolib/_internal/tree_utils.py +106 -0
  55. biolib/_internal/utils/__init__.py +65 -0
  56. biolib/_internal/utils/auth.py +46 -0
  57. biolib/_internal/utils/job_url.py +33 -0
  58. biolib/_internal/utils/multinode.py +263 -0
  59. biolib/_runtime/runtime.py +157 -0
  60. biolib/_session/session.py +44 -0
  61. biolib/_shared/__init__.py +0 -0
  62. biolib/_shared/types/__init__.py +74 -0
  63. biolib/_shared/types/account.py +12 -0
  64. biolib/_shared/types/account_member.py +8 -0
  65. biolib/_shared/types/app.py +9 -0
  66. biolib/_shared/types/data_record.py +40 -0
  67. biolib/_shared/types/experiment.py +32 -0
  68. biolib/_shared/types/file_node.py +17 -0
  69. biolib/_shared/types/push.py +6 -0
  70. biolib/_shared/types/resource.py +37 -0
  71. biolib/_shared/types/resource_deploy_key.py +11 -0
  72. biolib/_shared/types/resource_permission.py +14 -0
  73. biolib/_shared/types/resource_version.py +19 -0
  74. biolib/_shared/types/result.py +14 -0
  75. biolib/_shared/types/typing.py +10 -0
  76. biolib/_shared/types/user.py +19 -0
  77. biolib/_shared/utils/__init__.py +7 -0
  78. biolib/_shared/utils/resource_uri.py +75 -0
  79. biolib/api/__init__.py +6 -0
  80. biolib/api/client.py +168 -0
  81. biolib/app/app.py +252 -49
  82. biolib/app/search_apps.py +45 -0
  83. biolib/biolib_api_client/api_client.py +126 -31
  84. biolib/biolib_api_client/app_types.py +24 -4
  85. biolib/biolib_api_client/auth.py +31 -8
  86. biolib/biolib_api_client/biolib_app_api.py +147 -52
  87. biolib/biolib_api_client/biolib_job_api.py +161 -141
  88. biolib/biolib_api_client/job_types.py +21 -5
  89. biolib/biolib_api_client/lfs_types.py +7 -23
  90. biolib/biolib_api_client/user_state.py +56 -0
  91. biolib/biolib_binary_format/__init__.py +1 -4
  92. biolib/biolib_binary_format/file_in_container.py +105 -0
  93. biolib/biolib_binary_format/module_input.py +24 -7
  94. biolib/biolib_binary_format/module_output_v2.py +149 -0
  95. biolib/biolib_binary_format/remote_endpoints.py +34 -0
  96. biolib/biolib_binary_format/remote_stream_seeker.py +59 -0
  97. biolib/biolib_binary_format/saved_job.py +3 -2
  98. biolib/biolib_binary_format/{attestation_document.py → stdout_and_stderr.py} +8 -8
  99. biolib/biolib_binary_format/system_status_update.py +3 -2
  100. biolib/biolib_binary_format/utils.py +175 -0
  101. biolib/biolib_docker_client/__init__.py +11 -2
  102. biolib/biolib_errors.py +36 -0
  103. biolib/biolib_logging.py +27 -10
  104. biolib/cli/__init__.py +38 -0
  105. biolib/cli/auth.py +46 -0
  106. biolib/cli/data_record.py +164 -0
  107. biolib/cli/index.py +32 -0
  108. biolib/cli/init.py +421 -0
  109. biolib/cli/lfs.py +101 -0
  110. biolib/cli/push.py +50 -0
  111. biolib/cli/run.py +63 -0
  112. biolib/cli/runtime.py +14 -0
  113. biolib/cli/sdk.py +16 -0
  114. biolib/cli/start.py +56 -0
  115. biolib/compute_node/cloud_utils/cloud_utils.py +110 -161
  116. biolib/compute_node/job_worker/cache_state.py +66 -88
  117. biolib/compute_node/job_worker/cache_types.py +1 -6
  118. biolib/compute_node/job_worker/docker_image_cache.py +112 -37
  119. biolib/compute_node/job_worker/executors/__init__.py +0 -3
  120. biolib/compute_node/job_worker/executors/docker_executor.py +532 -199
  121. biolib/compute_node/job_worker/executors/docker_types.py +9 -1
  122. biolib/compute_node/job_worker/executors/types.py +19 -9
  123. biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +30 -0
  124. biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +3 -5
  125. biolib/compute_node/job_worker/job_storage.py +108 -0
  126. biolib/compute_node/job_worker/job_worker.py +397 -212
  127. biolib/compute_node/job_worker/large_file_system.py +87 -38
  128. biolib/compute_node/job_worker/network_alloc.py +99 -0
  129. biolib/compute_node/job_worker/network_buffer.py +240 -0
  130. biolib/compute_node/job_worker/utilization_reporter_thread.py +197 -0
  131. biolib/compute_node/job_worker/utils.py +9 -24
  132. biolib/compute_node/remote_host_proxy.py +400 -98
  133. biolib/compute_node/utils.py +31 -9
  134. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  135. biolib/compute_node/webserver/proxy_utils.py +28 -0
  136. biolib/compute_node/webserver/webserver.py +130 -44
  137. biolib/compute_node/webserver/webserver_types.py +2 -6
  138. biolib/compute_node/webserver/webserver_utils.py +77 -12
  139. biolib/compute_node/webserver/worker_thread.py +183 -42
  140. biolib/experiments/__init__.py +0 -0
  141. biolib/experiments/experiment.py +356 -0
  142. biolib/jobs/__init__.py +1 -0
  143. biolib/jobs/job.py +741 -0
  144. biolib/jobs/job_result.py +185 -0
  145. biolib/jobs/types.py +50 -0
  146. biolib/py.typed +0 -0
  147. biolib/runtime/__init__.py +14 -0
  148. biolib/sdk/__init__.py +91 -0
  149. biolib/tables.py +34 -0
  150. biolib/typing_utils.py +2 -7
  151. biolib/user/__init__.py +1 -0
  152. biolib/user/sign_in.py +54 -0
  153. biolib/utils/__init__.py +162 -0
  154. biolib/utils/cache_state.py +94 -0
  155. biolib/utils/multipart_uploader.py +194 -0
  156. biolib/utils/seq_util.py +150 -0
  157. biolib/utils/zip/remote_zip.py +640 -0
  158. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  159. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  160. {pybiolib-0.2.951.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  161. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  162. README.md +0 -17
  163. biolib/app/app_result.py +0 -68
  164. biolib/app/utils.py +0 -62
  165. biolib/biolib-js/0-biolib.worker.js +0 -1
  166. biolib/biolib-js/1-biolib.worker.js +0 -1
  167. biolib/biolib-js/2-biolib.worker.js +0 -1
  168. biolib/biolib-js/3-biolib.worker.js +0 -1
  169. biolib/biolib-js/4-biolib.worker.js +0 -1
  170. biolib/biolib-js/5-biolib.worker.js +0 -1
  171. biolib/biolib-js/6-biolib.worker.js +0 -1
  172. biolib/biolib-js/index.html +0 -10
  173. biolib/biolib-js/main-biolib.js +0 -1
  174. biolib/biolib_api_client/biolib_account_api.py +0 -21
  175. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -108
  176. biolib/biolib_binary_format/aes_encrypted_package.py +0 -42
  177. biolib/biolib_binary_format/module_output.py +0 -58
  178. biolib/biolib_binary_format/rsa_encrypted_aes_package.py +0 -57
  179. biolib/biolib_push.py +0 -114
  180. biolib/cli.py +0 -203
  181. biolib/cli_utils.py +0 -273
  182. biolib/compute_node/cloud_utils/enclave_parent_types.py +0 -7
  183. biolib/compute_node/enclave/__init__.py +0 -2
  184. biolib/compute_node/enclave/enclave_remote_hosts.py +0 -53
  185. biolib/compute_node/enclave/nitro_secure_module_utils.py +0 -64
  186. biolib/compute_node/job_worker/executors/base_executor.py +0 -18
  187. biolib/compute_node/job_worker/executors/pyppeteer_executor.py +0 -173
  188. biolib/compute_node/job_worker/executors/remote/__init__.py +0 -1
  189. biolib/compute_node/job_worker/executors/remote/nitro_enclave_utils.py +0 -81
  190. biolib/compute_node/job_worker/executors/remote/remote_executor.py +0 -51
  191. biolib/lfs.py +0 -196
  192. biolib/pyppeteer/.circleci/config.yml +0 -100
  193. biolib/pyppeteer/.coveragerc +0 -3
  194. biolib/pyppeteer/.gitignore +0 -89
  195. biolib/pyppeteer/.pre-commit-config.yaml +0 -28
  196. biolib/pyppeteer/CHANGES.md +0 -253
  197. biolib/pyppeteer/CONTRIBUTING.md +0 -26
  198. biolib/pyppeteer/LICENSE +0 -12
  199. biolib/pyppeteer/README.md +0 -137
  200. biolib/pyppeteer/docs/Makefile +0 -177
  201. biolib/pyppeteer/docs/_static/custom.css +0 -28
  202. biolib/pyppeteer/docs/_templates/layout.html +0 -10
  203. biolib/pyppeteer/docs/changes.md +0 -1
  204. biolib/pyppeteer/docs/conf.py +0 -299
  205. biolib/pyppeteer/docs/index.md +0 -21
  206. biolib/pyppeteer/docs/make.bat +0 -242
  207. biolib/pyppeteer/docs/reference.md +0 -211
  208. biolib/pyppeteer/docs/server.py +0 -60
  209. biolib/pyppeteer/poetry.lock +0 -1699
  210. biolib/pyppeteer/pyppeteer/__init__.py +0 -135
  211. biolib/pyppeteer/pyppeteer/accessibility.py +0 -286
  212. biolib/pyppeteer/pyppeteer/browser.py +0 -401
  213. biolib/pyppeteer/pyppeteer/browser_fetcher.py +0 -194
  214. biolib/pyppeteer/pyppeteer/command.py +0 -22
  215. biolib/pyppeteer/pyppeteer/connection/__init__.py +0 -242
  216. biolib/pyppeteer/pyppeteer/connection/cdpsession.py +0 -101
  217. biolib/pyppeteer/pyppeteer/coverage.py +0 -346
  218. biolib/pyppeteer/pyppeteer/device_descriptors.py +0 -787
  219. biolib/pyppeteer/pyppeteer/dialog.py +0 -79
  220. biolib/pyppeteer/pyppeteer/domworld.py +0 -597
  221. biolib/pyppeteer/pyppeteer/emulation_manager.py +0 -53
  222. biolib/pyppeteer/pyppeteer/errors.py +0 -48
  223. biolib/pyppeteer/pyppeteer/events.py +0 -63
  224. biolib/pyppeteer/pyppeteer/execution_context.py +0 -156
  225. biolib/pyppeteer/pyppeteer/frame/__init__.py +0 -299
  226. biolib/pyppeteer/pyppeteer/frame/frame_manager.py +0 -306
  227. biolib/pyppeteer/pyppeteer/helpers.py +0 -245
  228. biolib/pyppeteer/pyppeteer/input.py +0 -371
  229. biolib/pyppeteer/pyppeteer/jshandle.py +0 -598
  230. biolib/pyppeteer/pyppeteer/launcher.py +0 -683
  231. biolib/pyppeteer/pyppeteer/lifecycle_watcher.py +0 -169
  232. biolib/pyppeteer/pyppeteer/models/__init__.py +0 -103
  233. biolib/pyppeteer/pyppeteer/models/_protocol.py +0 -12460
  234. biolib/pyppeteer/pyppeteer/multimap.py +0 -82
  235. biolib/pyppeteer/pyppeteer/network_manager.py +0 -678
  236. biolib/pyppeteer/pyppeteer/options.py +0 -8
  237. biolib/pyppeteer/pyppeteer/page.py +0 -1728
  238. biolib/pyppeteer/pyppeteer/pipe_transport.py +0 -59
  239. biolib/pyppeteer/pyppeteer/target.py +0 -147
  240. biolib/pyppeteer/pyppeteer/task_queue.py +0 -24
  241. biolib/pyppeteer/pyppeteer/timeout_settings.py +0 -36
  242. biolib/pyppeteer/pyppeteer/tracing.py +0 -93
  243. biolib/pyppeteer/pyppeteer/us_keyboard_layout.py +0 -305
  244. biolib/pyppeteer/pyppeteer/util.py +0 -18
  245. biolib/pyppeteer/pyppeteer/websocket_transport.py +0 -47
  246. biolib/pyppeteer/pyppeteer/worker.py +0 -101
  247. biolib/pyppeteer/pyproject.toml +0 -97
  248. biolib/pyppeteer/spell.txt +0 -137
  249. biolib/pyppeteer/tox.ini +0 -72
  250. biolib/pyppeteer/utils/generate_protocol_types.py +0 -603
  251. biolib/start_cli.py +0 -7
  252. biolib/utils.py +0 -47
  253. biolib/validators/validate_app_version.py +0 -183
  254. biolib/validators/validate_argument.py +0 -134
  255. biolib/validators/validate_module.py +0 -323
  256. biolib/validators/validate_zip_file.py +0 -40
  257. biolib/validators/validator_utils.py +0 -103
  258. pybiolib-0.2.951.dist-info/LICENSE +0 -21
  259. pybiolib-0.2.951.dist-info/METADATA +0 -61
  260. pybiolib-0.2.951.dist-info/RECORD +0 -153
  261. pybiolib-0.2.951.dist-info/entry_points.txt +0 -3
  262. /LICENSE → /pybiolib-1.2.1890.dist-info/licenses/LICENSE +0 -0
@@ -0,0 +1,94 @@
1
+ import abc
2
+ import json
3
+ import os
4
+ import time
5
+ from datetime import datetime, timezone
6
+
7
+ import appdirs # type: ignore
8
+
9
+ from biolib.biolib_errors import BioLibError
10
+ from biolib.biolib_logging import logger_no_user_data
11
+ from biolib.typing_utils import Generic, Optional, TypeVar
12
+
13
+ StateType = TypeVar('StateType') # pylint: disable=invalid-name
14
+
15
+
16
+ class CacheStateError(BioLibError):
17
+ pass
18
+
19
+
20
+ class CacheState(abc.ABC, Generic[StateType]):
21
+ @property
22
+ @abc.abstractmethod
23
+ def _state_path(self) -> str:
24
+ raise NotImplementedError
25
+
26
+ @abc.abstractmethod
27
+ def _get_default_state(self) -> StateType:
28
+ raise NotImplementedError
29
+
30
+ @property
31
+ def _user_cache_dir(self) -> str:
32
+ user_cache_dir: str = appdirs.user_cache_dir(appname='pybiolib', appauthor='biolib')
33
+ os.makedirs(user_cache_dir, exist_ok=True)
34
+ return user_cache_dir
35
+
36
+ @property
37
+ def _state_lock_path(self) -> str:
38
+ return f'{self._state_path}.lock'
39
+
40
+ def __init__(self, fail_fast_on_lock_acquire: bool = False) -> None:
41
+ self._state: Optional[StateType] = None
42
+ self._fail_fast_on_lock_acquire: bool = fail_fast_on_lock_acquire
43
+
44
+ def __enter__(self) -> StateType:
45
+ logger_no_user_data.debug(f'CacheState: Entering state path: {self._state_path}...')
46
+ try:
47
+ self._acquire_state_lock()
48
+ if os.path.exists(self._state_path):
49
+ with open(self._state_path, mode='r') as file:
50
+ self._state = json.loads(file.read())
51
+ else:
52
+ self._state = self._get_default_state()
53
+ with open(self._state_path, mode='w') as file:
54
+ file.write(json.dumps(self._state))
55
+
56
+ # Check for type checking
57
+ if self._state is None:
58
+ raise CacheStateError('Internal state is not defined')
59
+ except BaseException as error: # pylint: disable=broad-except
60
+ logger_no_user_data.debug(f'Could not get LFS lock, got error: {error}...')
61
+ raise error
62
+ return self._state
63
+
64
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
65
+ with open(self._state_path, mode='w') as file:
66
+ file.write(json.dumps(self._state))
67
+
68
+ self._release_state_lock()
69
+ logger_no_user_data.debug(f'CacheState: Exited state path: {self._state_path}')
70
+
71
+ def _acquire_state_lock(self) -> None:
72
+ for _ in range(10):
73
+ try:
74
+ lock_file = open(self._state_lock_path, mode='x')
75
+ lock_file.close()
76
+ return
77
+ except BaseException as error: # pylint: disable=broad-except
78
+ logger_no_user_data.debug(f'Failed to acquire lock file "{self._state_lock_path}". Got error: {error}')
79
+ if self._fail_fast_on_lock_acquire:
80
+ raise CacheStateError(f'Failed to acquire lock file "{self._state_lock_path}": {error}') from error
81
+
82
+ time.sleep(0.5)
83
+
84
+ raise CacheStateError(f'Cache state timed out waiting to acquire lock file "{self._state_lock_path}"')
85
+
86
+ def _release_state_lock(self) -> None:
87
+ if os.path.exists(self._state_lock_path):
88
+ os.remove(self._state_lock_path)
89
+ else:
90
+ raise CacheStateError('Cache state was not locked.')
91
+
92
+ @staticmethod
93
+ def get_timestamp_now() -> str:
94
+ return datetime.now(timezone.utc).isoformat()
@@ -0,0 +1,194 @@
1
+ import math
2
+ import multiprocessing
3
+ import multiprocessing.pool
4
+ import os
5
+ import time
6
+ from urllib.parse import urlparse
7
+
8
+ import biolib.api
9
+ from biolib._internal.http_client import HttpClient
10
+ from biolib.biolib_api_client import BiolibApiClient
11
+ from biolib.biolib_errors import BioLibError
12
+ from biolib.biolib_logging import logger, logger_no_user_data
13
+ from biolib.typing_utils import Callable, Dict, Iterator, List, Optional, Tuple, TypedDict
14
+
15
+
16
+ def get_chunk_iterator_from_bytes(byte_buffer: bytes, chunk_size_in_bytes: int = 50_000_000) -> Iterator[bytes]:
17
+ chunk_count = math.ceil(len(byte_buffer) / chunk_size_in_bytes)
18
+ for chunk_number in range(chunk_count):
19
+ start = chunk_size_in_bytes * chunk_number
20
+ stop = start + chunk_size_in_bytes
21
+ yield byte_buffer[start:stop]
22
+
23
+
24
+ def get_chunk_iterator_from_file_object(file_object, chunk_size_in_bytes: int = 50_000_000) -> Iterator[bytes]:
25
+ while True:
26
+ data = file_object.read(chunk_size_in_bytes)
27
+ if not data:
28
+ break
29
+ yield data
30
+
31
+
32
+ class RequestOptions(TypedDict):
33
+ headers: Optional[Dict[str, str]]
34
+ requires_biolib_auth: bool
35
+ path: str
36
+
37
+
38
+ class _PartMetadata(TypedDict):
39
+ ETag: str
40
+ PartNumber: int
41
+
42
+
43
+ _UploadChunkInputType = Tuple[int, bytes]
44
+ _UploadChunkReturnType = Tuple[_PartMetadata, int]
45
+
46
+
47
+ class MultiPartUploader:
48
+ def __init__(
49
+ self,
50
+ complete_upload_request: RequestOptions,
51
+ get_presigned_upload_url_request: RequestOptions,
52
+ start_multipart_upload_request: Optional[RequestOptions] = None,
53
+ use_process_pool: Optional[bool] = None,
54
+ on_progress: Optional[Callable[[int, int], None]] = None,
55
+ ):
56
+ self._complete_upload_request = complete_upload_request
57
+ self._get_presigned_upload_url_request = get_presigned_upload_url_request
58
+ self._start_multipart_upload_request = start_multipart_upload_request
59
+ self._bytes_uploaded: int = 0
60
+ self._use_process_pool = use_process_pool
61
+ self._on_progress = on_progress
62
+
63
+ def upload(self, payload_iterator: Iterator[bytes], payload_size_in_bytes: int) -> None:
64
+ parts: List[_PartMetadata] = []
65
+
66
+ iterator_with_index: Iterator[_UploadChunkInputType] = enumerate(payload_iterator, 1) # type: ignore
67
+ logger_no_user_data.debug(f'Starting multipart upload of payload with size {payload_size_in_bytes} bytes')
68
+
69
+ if self._start_multipart_upload_request:
70
+ try:
71
+ biolib.api.client.post(
72
+ authenticate=self._start_multipart_upload_request['requires_biolib_auth'],
73
+ headers=self._start_multipart_upload_request['headers'],
74
+ path=self._start_multipart_upload_request['path'],
75
+ )
76
+ except BaseException as error:
77
+ logger_no_user_data.debug(f'Failed to start multipart upload got error: {error}')
78
+ raise error
79
+
80
+ # if multiprocessing start method is spawn or we are running in a daemon process,
81
+ # multiprocessing.Pool may fail when called from script
82
+ if multiprocessing.get_start_method() == 'spawn' or multiprocessing.current_process().daemon:
83
+ logger_no_user_data.debug('Uploading multipart from main process...')
84
+ for chunk in iterator_with_index:
85
+ upload_chunk_response = self._upload_chunk(chunk)
86
+ self._update_progress_bar_and_parts(
87
+ upload_chunk_response=upload_chunk_response,
88
+ parts=parts,
89
+ payload_size_in_bytes=payload_size_in_bytes,
90
+ )
91
+ else:
92
+ # use 16 cores, unless less is available
93
+ pool_size = min(16, multiprocessing.cpu_count() - 1)
94
+ process_pool = (
95
+ multiprocessing.Pool(pool_size)
96
+ if self._use_process_pool
97
+ else multiprocessing.pool.ThreadPool(pool_size)
98
+ )
99
+
100
+ try:
101
+ response: _UploadChunkReturnType
102
+ for response in process_pool.imap(self._upload_chunk, iterator_with_index):
103
+ self._update_progress_bar_and_parts(
104
+ upload_chunk_response=response, parts=parts, payload_size_in_bytes=payload_size_in_bytes
105
+ )
106
+ finally:
107
+ logger_no_user_data.debug('Multipart upload closing process pool...')
108
+ process_pool.close()
109
+
110
+ requires_biolib_auth = self._complete_upload_request['requires_biolib_auth']
111
+ if requires_biolib_auth:
112
+ BiolibApiClient.refresh_auth_token()
113
+
114
+ logger_no_user_data.debug(f'Uploaded {len(parts)} parts, now calling complete upload...')
115
+ biolib.api.client.post(
116
+ authenticate=requires_biolib_auth,
117
+ headers=self._complete_upload_request['headers'],
118
+ data={'parts': parts, 'size_bytes': self._bytes_uploaded},
119
+ path=self._complete_upload_request['path'],
120
+ )
121
+
122
+ def _upload_chunk(self, _input: _UploadChunkInputType) -> _UploadChunkReturnType:
123
+ part_number, chunk = _input
124
+ requires_biolib_auth = self._get_presigned_upload_url_request['requires_biolib_auth']
125
+
126
+ for index in range(20): # will fail after approximately sum_i(i^2+2) = 41 min if range (20)
127
+ if requires_biolib_auth:
128
+ BiolibApiClient.refresh_auth_token()
129
+
130
+ logger_no_user_data.debug(f'Uploading part number {part_number} with size {len(chunk)} bytes...')
131
+ presigned_upload_url = None
132
+ try:
133
+ logger_no_user_data.debug(f'Getting upload URL for chunk {part_number}...')
134
+ get_url_response = biolib.api.client.get(
135
+ authenticate=requires_biolib_auth,
136
+ headers=self._get_presigned_upload_url_request['headers'],
137
+ params={'part_number': part_number},
138
+ path=self._get_presigned_upload_url_request['path'],
139
+ )
140
+
141
+ presigned_upload_url = get_url_response.json()['presigned_upload_url']
142
+
143
+ except Exception as error: # pylint: disable=broad-except
144
+ logger_no_user_data.warning(f'Error when getting url for part {part_number}. Retrying...')
145
+ logger.debug(f'Upload error: {error}')
146
+
147
+ if presigned_upload_url:
148
+ try:
149
+ app_caller_proxy_job_storage_base_url = os.getenv('BIOLIB_CLOUD_JOB_STORAGE_BASE_URL', '')
150
+ if app_caller_proxy_job_storage_base_url:
151
+ # Done to hit App Caller Proxy when uploading result from inside an app
152
+ parsed_url = urlparse(presigned_upload_url)
153
+ presigned_upload_url = (
154
+ f'{app_caller_proxy_job_storage_base_url}{parsed_url.path}?{parsed_url.query}'
155
+ )
156
+
157
+ put_chunk_response = HttpClient.request(
158
+ url=presigned_upload_url,
159
+ data=chunk,
160
+ method='PUT',
161
+ timeout_in_seconds=300,
162
+ )
163
+ return _PartMetadata(PartNumber=part_number, ETag=put_chunk_response.headers['ETag']), len(chunk)
164
+
165
+ except Exception as error: # pylint: disable=broad-except
166
+ logger_no_user_data.warning(f'Encountered error when uploading part {part_number}. Retrying...')
167
+ logger.debug(f'Upload error: {error} ({presigned_upload_url})')
168
+
169
+ time.sleep(index * index + 2)
170
+
171
+ logger_no_user_data.debug(f'Max retries hit, when uploading part {part_number}. Exiting...')
172
+ raise BioLibError(f'Max retries hit, when uploading part {part_number}. Exiting...')
173
+
174
+ def _update_progress_bar_and_parts(
175
+ self,
176
+ upload_chunk_response: _UploadChunkReturnType,
177
+ parts: List[_PartMetadata],
178
+ payload_size_in_bytes: int,
179
+ ) -> None:
180
+ part_metadata, chunk_byte_length = upload_chunk_response
181
+ part_number = part_metadata['PartNumber']
182
+
183
+ parts.append(part_metadata)
184
+ self._bytes_uploaded += chunk_byte_length
185
+
186
+ if self._on_progress is not None:
187
+ self._on_progress(self._bytes_uploaded, payload_size_in_bytes)
188
+
189
+ approx_progress_percent = min(self._bytes_uploaded / (payload_size_in_bytes + 1) * 100, 100)
190
+ approx_rounded_progress = round(approx_progress_percent, 2)
191
+ logger_no_user_data.debug(
192
+ f'Uploaded part number {part_number} with size {chunk_byte_length} bytes, '
193
+ f'the approximate progress is {approx_rounded_progress}%'
194
+ )
@@ -0,0 +1,150 @@
1
+ import re
2
+ from io import BufferedIOBase, TextIOBase
3
+
4
+ from biolib.typing_utils import Dict, Iterator, List, Optional, Union
5
+
6
+
7
+ class SeqUtilRecord:
8
+ def __init__(
9
+ self,
10
+ sequence: str,
11
+ sequence_id: str,
12
+ description: Optional['str'] = None,
13
+ properties: Optional[Dict[str, str]] = None,
14
+ ):
15
+ self.sequence = sequence
16
+ self.id = sequence_id # pylint: disable=invalid-name
17
+ self.description = description
18
+
19
+ if properties:
20
+ disallowed_pattern = re.compile(r'[=\[\]\n]')
21
+ for key, value in properties.items():
22
+ assert not bool(disallowed_pattern.search(key)), 'Key cannot contain characters =[] and newline'
23
+ assert not bool(disallowed_pattern.search(value)), 'Value cannot contain characters =[] and newline'
24
+ self.properties = properties
25
+ else:
26
+ self.properties = {}
27
+
28
+ def __repr__(self) -> str:
29
+ return f'{self.__class__.__name__} ({self.id})'
30
+
31
+
32
+ class SeqUtil:
33
+ @staticmethod
34
+ def parse_fasta(
35
+ input_file: Union[str, BufferedIOBase, None] = None,
36
+ default_header: Optional[str] = None,
37
+ allow_any_sequence_characters: bool = False,
38
+ use_strict_alphabet: Optional[bool] = False,
39
+ allow_empty_sequence: bool = True,
40
+ file_name: Optional[str] = None,
41
+ ) -> Iterator[SeqUtilRecord]:
42
+ def process_and_yield_record(header: str, sequence_lines: List[str]):
43
+ sequence = ''.join(sequence_lines)
44
+ sequence_id = header.split()[0]
45
+ if allow_any_sequence_characters and use_strict_alphabet:
46
+ raise Exception(
47
+ 'Error: Please choose either allow_any_sequence_characters or use_strict_alphabet'
48
+ )
49
+ if not allow_any_sequence_characters:
50
+ if use_strict_alphabet:
51
+ invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters_strict(sequence)
52
+ else:
53
+ invalid_sequence_characters = SeqUtil._find_invalid_sequence_characters(sequence)
54
+ if invalid_sequence_characters:
55
+ raise Exception(
56
+ f'Error: Invalid character ("{invalid_sequence_characters[0]}") found in sequence {sequence_id}'
57
+ )
58
+ if not allow_empty_sequence and not sequence:
59
+ raise Exception(f'Error: No sequence found for fasta entry {sequence_id}')
60
+ yield SeqUtilRecord(
61
+ sequence=sequence,
62
+ sequence_id=sequence_id,
63
+ description=header[len(sequence_id):].strip()
64
+ )
65
+
66
+ def line_generator_from_buffered_io_base(file_handle: BufferedIOBase) -> Iterator[str]:
67
+ for line in file_handle:
68
+ yield line.decode('utf-8')
69
+
70
+ def line_generator_from_text_io_base(file_handle: TextIOBase) -> Iterator[str]:
71
+ for line in file_handle:
72
+ yield line
73
+
74
+ if input_file is None:
75
+ if file_name:
76
+ input_file = file_name
77
+ else:
78
+ raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
79
+
80
+ file_handle = None
81
+ if isinstance(input_file, str):
82
+ file_handle = open(input_file, "rb")
83
+ line_iterator = line_generator_from_buffered_io_base(file_handle)
84
+ elif isinstance(input_file, BufferedIOBase):
85
+ line_iterator = line_generator_from_buffered_io_base(input_file)
86
+ elif isinstance(input_file, TextIOBase):
87
+ line_iterator = line_generator_from_text_io_base(input_file)
88
+ else:
89
+ raise ValueError('input_file must be a file name (str) or a BufferedIOBase object')
90
+
91
+ header = None
92
+ sequence_lines: List[str] = []
93
+
94
+ try:
95
+ for line_number, line in enumerate(line_iterator):
96
+ line = line.strip()
97
+ if not line:
98
+ continue # skip empty lines
99
+ if line.startswith('>'):
100
+ if header is not None:
101
+ yield from process_and_yield_record(header, sequence_lines)
102
+
103
+ header = line[1:].strip()
104
+ sequence_lines = []
105
+ else:
106
+ if header is None:
107
+ if default_header:
108
+ yield from process_and_yield_record(f"{default_header}{line_number}", [line])
109
+ else:
110
+ raise Exception(f'No header line found in FASTA file "{file_name}"')
111
+ else:
112
+ sequence_lines.append(line)
113
+
114
+ if header is not None:
115
+ yield from process_and_yield_record(header, sequence_lines)
116
+ finally:
117
+ if file_handle:
118
+ file_handle.close()
119
+
120
+ @staticmethod
121
+ def write_records_to_fasta(file_name: str, records: List[SeqUtilRecord]) -> None:
122
+ with open(file_name, mode='w') as file_handle:
123
+ for record in records:
124
+ optional_description = f' {record.description}' if record.description else ''
125
+ if record.properties:
126
+ for key, value in record.properties.items():
127
+ optional_description += f' [{key}={value}]'
128
+ sequence = '\n'.join(record.sequence[i : i + 80] for i in range(0, len(record.sequence), 80))
129
+ file_handle.write(f'>{record.id}{optional_description}\n{sequence}\n')
130
+
131
+ @staticmethod
132
+ def _find_invalid_sequence_characters(sequence: str) -> List[str]:
133
+ allowed_sequence_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.')
134
+ invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
135
+ return invalid_chars
136
+
137
+ @staticmethod
138
+ def _find_invalid_sequence_characters_strict(sequence: str) -> List[str]:
139
+ # Equivalent to fair-esm alphabet, compatible with ESM-models
140
+ # Excludes digits, '_' and 'J' (ambiguous letter only used in mass-spec NMR)
141
+ # https://github.com/facebookresearch/esm/blob/2b369911bb5b4b0dda914521b9475cad1656b2ac/esm/constants.py#L8
142
+ allowed_sequence_chars = set('lagvsertidpkqnfymhwcxbuzoLAGVSERTIDPKQNFYMHWCXBUZO-.')
143
+ invalid_chars = [char for char in sequence if char not in allowed_sequence_chars]
144
+ return invalid_chars
145
+
146
+ @staticmethod
147
+ def _find_invalid_sequence_id_characters(sequence: str) -> List[str]:
148
+ allowed_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.:*#')
149
+ invalid_chars = [char for char in sequence if char not in allowed_chars]
150
+ return invalid_chars