pybiolib 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. biolib/__init__.py +33 -10
  2. biolib/_data_record/data_record.py +220 -126
  3. biolib/_index/index.py +55 -0
  4. biolib/_index/query_result.py +103 -0
  5. biolib/_internal/add_copilot_prompts.py +24 -11
  6. biolib/_internal/add_gui_files.py +81 -0
  7. biolib/_internal/data_record/__init__.py +1 -1
  8. biolib/_internal/data_record/data_record.py +1 -18
  9. biolib/_internal/data_record/push_data.py +65 -16
  10. biolib/_internal/data_record/remote_storage_endpoint.py +18 -13
  11. biolib/_internal/file_utils.py +48 -0
  12. biolib/_internal/lfs/cache.py +4 -2
  13. biolib/_internal/push_application.py +95 -24
  14. biolib/_internal/runtime.py +2 -0
  15. biolib/_internal/string_utils.py +13 -0
  16. biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-general.instructions.md +5 -0
  17. biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
  18. biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
  19. biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
  20. biolib/_internal/templates/{init_template → github_workflow_template}/.github/workflows/biolib.yml +7 -2
  21. biolib/_internal/templates/gitignore_template/.gitignore +10 -0
  22. biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
  23. biolib/_internal/templates/gui_template/App.tsx +53 -0
  24. biolib/_internal/templates/gui_template/Dockerfile +27 -0
  25. biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
  26. biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
  27. biolib/_internal/templates/gui_template/index.css +5 -0
  28. biolib/_internal/templates/gui_template/index.html +13 -0
  29. biolib/_internal/templates/gui_template/index.tsx +10 -0
  30. biolib/_internal/templates/gui_template/package.json +27 -0
  31. biolib/_internal/templates/gui_template/tsconfig.json +24 -0
  32. biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
  33. biolib/_internal/templates/gui_template/vite.config.mts +10 -0
  34. biolib/_internal/templates/init_template/.biolib/config.yml +1 -0
  35. biolib/_internal/templates/init_template/Dockerfile +5 -1
  36. biolib/_internal/templates/init_template/run.py +6 -15
  37. biolib/_internal/templates/init_template/run.sh +1 -0
  38. biolib/_internal/templates/templates.py +21 -1
  39. biolib/_internal/utils/__init__.py +47 -0
  40. biolib/_internal/utils/auth.py +46 -0
  41. biolib/_internal/utils/job_url.py +33 -0
  42. biolib/_internal/utils/multinode.py +12 -14
  43. biolib/_runtime/runtime.py +15 -2
  44. biolib/_session/session.py +7 -5
  45. biolib/_shared/__init__.py +0 -0
  46. biolib/_shared/types/__init__.py +74 -0
  47. biolib/_shared/types/account.py +12 -0
  48. biolib/_shared/types/account_member.py +8 -0
  49. biolib/{_internal → _shared}/types/experiment.py +1 -0
  50. biolib/_shared/types/resource.py +37 -0
  51. biolib/_shared/types/resource_deploy_key.py +11 -0
  52. biolib/{_internal → _shared}/types/resource_version.py +8 -2
  53. biolib/_shared/types/user.py +19 -0
  54. biolib/_shared/utils/__init__.py +7 -0
  55. biolib/_shared/utils/resource_uri.py +75 -0
  56. biolib/api/client.py +5 -48
  57. biolib/app/app.py +97 -55
  58. biolib/biolib_api_client/api_client.py +3 -47
  59. biolib/biolib_api_client/app_types.py +1 -1
  60. biolib/biolib_api_client/biolib_app_api.py +31 -6
  61. biolib/biolib_api_client/biolib_job_api.py +1 -1
  62. biolib/biolib_api_client/user_state.py +34 -2
  63. biolib/biolib_binary_format/module_input.py +8 -0
  64. biolib/biolib_binary_format/remote_endpoints.py +3 -3
  65. biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
  66. biolib/biolib_logging.py +1 -1
  67. biolib/cli/__init__.py +2 -2
  68. biolib/cli/auth.py +4 -16
  69. biolib/cli/data_record.py +82 -0
  70. biolib/cli/index.py +32 -0
  71. biolib/cli/init.py +393 -71
  72. biolib/cli/lfs.py +1 -1
  73. biolib/cli/run.py +9 -6
  74. biolib/cli/start.py +14 -1
  75. biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
  76. biolib/compute_node/job_worker/executors/docker_types.py +1 -1
  77. biolib/compute_node/job_worker/executors/types.py +6 -5
  78. biolib/compute_node/job_worker/job_storage.py +2 -1
  79. biolib/compute_node/job_worker/job_worker.py +155 -90
  80. biolib/compute_node/job_worker/large_file_system.py +2 -6
  81. biolib/compute_node/job_worker/network_alloc.py +99 -0
  82. biolib/compute_node/job_worker/network_buffer.py +240 -0
  83. biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
  84. biolib/compute_node/remote_host_proxy.py +163 -79
  85. biolib/compute_node/utils.py +2 -0
  86. biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
  87. biolib/compute_node/webserver/proxy_utils.py +28 -0
  88. biolib/compute_node/webserver/webserver.py +64 -19
  89. biolib/experiments/experiment.py +111 -16
  90. biolib/jobs/job.py +128 -31
  91. biolib/jobs/job_result.py +74 -34
  92. biolib/jobs/types.py +1 -0
  93. biolib/sdk/__init__.py +28 -3
  94. biolib/typing_utils.py +1 -1
  95. biolib/utils/cache_state.py +8 -5
  96. biolib/utils/multipart_uploader.py +24 -18
  97. biolib/utils/seq_util.py +1 -1
  98. pybiolib-1.2.1890.dist-info/METADATA +41 -0
  99. pybiolib-1.2.1890.dist-info/RECORD +177 -0
  100. {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
  101. pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
  102. biolib/_internal/llm_instructions/.github/instructions/style-react-ts.instructions.md +0 -22
  103. biolib/_internal/templates/init_template/.gitignore +0 -2
  104. biolib/_internal/types/__init__.py +0 -6
  105. biolib/_internal/types/resource.py +0 -18
  106. biolib/biolib_download_container.py +0 -38
  107. biolib/cli/download_container.py +0 -14
  108. biolib/utils/app_uri.py +0 -57
  109. pybiolib-1.2.883.dist-info/METADATA +0 -50
  110. pybiolib-1.2.883.dist-info/RECORD +0 -148
  111. pybiolib-1.2.883.dist-info/entry_points.txt +0 -3
  112. /biolib/{_internal/llm_instructions → _index}/__init__.py +0 -0
  113. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/general-app-knowledge.instructions.md +0 -0
  114. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-python.instructions.md +0 -0
  115. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_app_inputs.prompt.md +0 -0
  116. /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_run_apps.prompt.md +0 -0
  117. /biolib/{_internal → _shared}/types/app.py +0 -0
  118. /biolib/{_internal → _shared}/types/data_record.py +0 -0
  119. /biolib/{_internal → _shared}/types/file_node.py +0 -0
  120. /biolib/{_internal → _shared}/types/push.py +0 -0
  121. /biolib/{_internal → _shared}/types/resource_permission.py +0 -0
  122. /biolib/{_internal → _shared}/types/result.py +0 -0
  123. /biolib/{_internal → _shared}/types/typing.py +0 -0
  124. {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info/licenses}/LICENSE +0 -0
biolib/app/app.py CHANGED
@@ -1,11 +1,16 @@
1
+ import copy
1
2
  import io
2
3
  import json
3
4
  import os
5
+ import posixpath
4
6
  import random
5
7
  import string
6
8
  from pathlib import Path
7
9
 
8
10
  from biolib import utils
11
+ from biolib._internal.file_utils import path_to_renamed_path
12
+ from biolib._runtime.runtime import Runtime
13
+ from biolib._shared.utils import parse_resource_uri
9
14
  from biolib.api.client import ApiClient
10
15
  from biolib.biolib_api_client import JobState
11
16
  from biolib.biolib_api_client.app_types import App, AppVersion
@@ -18,13 +23,24 @@ from biolib.compute_node.job_worker.job_worker import JobWorker
18
23
  from biolib.experiments.experiment import Experiment
19
24
  from biolib.jobs.job import Result
20
25
  from biolib.typing_utils import Dict, Optional
21
- from biolib.utils.app_uri import parse_app_uri
22
- from biolib._runtime.runtime import Runtime
26
+
27
+
28
+ class JsonStringIO(io.StringIO):
29
+ pass
23
30
 
24
31
 
25
32
  class BioLibApp:
26
- def __init__(self, uri: str, _api_client: Optional[ApiClient] = None, suppress_version_warning: bool = False):
33
+ def __init__(
34
+ self,
35
+ uri: str,
36
+ _api_client: Optional[ApiClient] = None,
37
+ suppress_version_warning: bool = False,
38
+ _experiment: Optional[str] = None,
39
+ ):
27
40
  self._api_client: Optional[ApiClient] = _api_client
41
+ self._experiment = _experiment
42
+ self._input_uri = uri
43
+ self._parsed_input_uri = parse_resource_uri(uri)
28
44
 
29
45
  app_response = BiolibAppApi.get_by_uri(uri=uri, api_client=self._api_client)
30
46
  self._app: App = app_response['app']
@@ -32,16 +48,19 @@ class BioLibApp:
32
48
  self._app_version: AppVersion = app_response['app_version']
33
49
 
34
50
  if not suppress_version_warning:
35
- parsed_uri = parse_app_uri(uri)
36
- if parsed_uri['version'] is None:
51
+ if self._parsed_input_uri['version'] is None:
37
52
  if Runtime.check_is_environment_biolib_app():
38
53
  logger.warning(
39
54
  f"No version specified in URI '{uri}'. This will use the default version, "
40
- f"which may change behaviour over time. Consider locking down the exact version, "
55
+ f'which may change behaviour over time. Consider locking down the exact version, '
41
56
  f"e.g. '{uri}:1.2.3'"
42
57
  )
43
58
 
44
- logger.info(f'Loaded project {self._app_uri}')
59
+ if self._parsed_input_uri['tag']:
60
+ semantic_version = f"{self._app_version['major']}.{self._app_version['minor']}.{self._app_version['patch']}"
61
+ logger.info(f'Loaded {self._input_uri} (resolved to {semantic_version})')
62
+ else:
63
+ logger.info(f'Loaded {self._app_uri}')
45
64
 
46
65
  def __str__(self) -> str:
47
66
  return self._app_uri
@@ -70,7 +89,7 @@ class BioLibApp:
70
89
  result_prefix: Optional[str] = None,
71
90
  timeout: Optional[int] = None,
72
91
  notify: bool = False,
73
- machine_count: Optional[int] = None,
92
+ max_workers: Optional[int] = None,
74
93
  experiment: Optional[str] = None,
75
94
  temporary_client_secrets: Optional[Dict[str, str]] = None,
76
95
  check: bool = False,
@@ -83,22 +102,18 @@ class BioLibApp:
83
102
  raise ValueError('The argument "check" cannot be True when blocking is False')
84
103
 
85
104
  if not experiment_id:
86
- experiment_instance = Experiment(experiment) if experiment else Experiment.get_experiment_in_context()
105
+ experiment_to_use = experiment if experiment is not None else self._experiment
106
+ experiment_instance: Optional[Experiment]
107
+ if experiment_to_use:
108
+ experiment_instance = Experiment(experiment_to_use, _api_client=self._api_client)
109
+ else:
110
+ experiment_instance = Experiment.get_experiment_in_context()
87
111
  experiment_id = experiment_instance.uuid if experiment_instance else None
88
112
 
89
113
  module_input_serialized = self._get_serialized_module_input(args, stdin, files)
90
114
 
91
115
  if machine == 'local':
92
- if not blocking:
93
- raise BioLibError('The argument "blocking" cannot be False when running locally')
94
-
95
- if experiment_id:
96
- logger.warning('The argument "experiment_id" is ignored when running locally')
97
-
98
- if result_prefix:
99
- logger.warning('The argument "result_prefix" is ignored when running locally')
100
-
101
- return self._run_locally(module_input_serialized)
116
+ raise BioLibError('Running applications locally with machine="local" is no longer supported.')
102
117
 
103
118
  job = Result._start_job_in_cloud( # pylint: disable=protected-access
104
119
  app_uri=self._app_uri,
@@ -110,11 +125,12 @@ class BioLibApp:
110
125
  override_command=override_command,
111
126
  result_prefix=result_prefix,
112
127
  timeout=timeout,
113
- requested_machine_count=machine_count,
128
+ requested_machine_count=max_workers,
114
129
  temporary_client_secrets=temporary_client_secrets,
115
130
  api_client=self._api_client,
116
131
  )
117
- logger.info(f'View the result in your browser at: {utils.BIOLIB_BASE_URL}/results/{job.id}/')
132
+ if utils.IS_RUNNING_IN_NOTEBOOK:
133
+ logger.info(f'View the result in your browser at: {utils.BIOLIB_BASE_URL}/results/{job.id}/')
118
134
  if blocking:
119
135
  # TODO: Deprecate utils.STREAM_STDOUT and always stream logs by simply calling job.stream_logs()
120
136
  if utils.IS_RUNNING_IN_NOTEBOOK:
@@ -151,6 +167,8 @@ Example: "app.cli('--help')"
151
167
  def _get_serialized_module_input(args=None, stdin=None, files=None) -> bytes:
152
168
  if args is None:
153
169
  args = []
170
+ else:
171
+ args = copy.copy(args)
154
172
 
155
173
  if stdin is None:
156
174
  stdin = b''
@@ -168,21 +186,72 @@ Example: "app.cli('--help')"
168
186
  files = []
169
187
 
170
188
  files_dict = {}
189
+ if isinstance(files, list):
190
+ for file_path in files:
191
+ path = Path(file_path)
192
+ if path.is_dir():
193
+ renamed_dir = path_to_renamed_path(file_path)
194
+ for filename in path.rglob('*'):
195
+ if filename.is_dir():
196
+ continue
197
+ with open(filename, 'rb') as f:
198
+ relative_to_dir = filename.resolve().relative_to(path.resolve())
199
+ files_dict[posixpath.join(renamed_dir, relative_to_dir.as_posix())] = f.read()
200
+ else:
201
+ with open(path, 'rb') as f:
202
+ files_dict[path_to_renamed_path(str(path))] = f.read()
203
+ elif isinstance(files, dict):
204
+ files_dict = {}
205
+ for key, value in files.items():
206
+ if '//' in key:
207
+ raise BioLibError(f"File path '{key}' contains double slashes which are not allowed")
208
+ if not key.startswith('/'):
209
+ key = '/' + key
210
+ files_dict[key] = value
211
+ else:
212
+ raise Exception('The given files input must be list or dict or None')
213
+
171
214
  for idx, arg in enumerate(args):
172
215
  if isinstance(arg, str):
173
216
  if os.path.isfile(arg) or os.path.isdir(arg):
174
- files.append(arg)
175
- args[idx] = Path(arg).name
217
+ if os.path.isfile(arg):
218
+ with open(arg, 'rb') as f:
219
+ files_dict[path_to_renamed_path(arg)] = f.read()
220
+ elif os.path.isdir(arg):
221
+ path = Path(arg)
222
+ renamed_dir = path_to_renamed_path(arg)
223
+ for filename in path.rglob('*'):
224
+ if filename.is_dir():
225
+ continue
226
+ with open(filename, 'rb') as f:
227
+ relative_to_dir = filename.resolve().relative_to(path.resolve())
228
+ files_dict[posixpath.join(renamed_dir, relative_to_dir.as_posix())] = f.read()
229
+ args[idx] = path_to_renamed_path(arg, prefix_with_slash=False)
176
230
 
177
231
  # support --myarg=file.txt
178
232
  elif os.path.isfile(arg.split('=')[-1]) or os.path.isdir(arg.split('=')[-1]):
179
- files.append(arg.split('=')[-1])
180
- args[idx] = arg.split('=')[0] + '=' + Path(arg.split('=')[-1]).name
233
+ file_path = arg.split('=')[-1]
234
+ if os.path.isfile(file_path):
235
+ with open(file_path, 'rb') as f:
236
+ files_dict[path_to_renamed_path(file_path)] = f.read()
237
+ elif os.path.isdir(file_path):
238
+ path = Path(file_path)
239
+ renamed_dir = path_to_renamed_path(file_path)
240
+ for filename in path.rglob('*'):
241
+ if filename.is_dir():
242
+ continue
243
+ with open(filename, 'rb') as f:
244
+ relative_to_dir = filename.resolve().relative_to(path.resolve())
245
+ files_dict[posixpath.join(renamed_dir, relative_to_dir.as_posix())] = f.read()
246
+ args[idx] = arg.split('=')[0] + '=' + path_to_renamed_path(file_path, prefix_with_slash=False)
181
247
  else:
182
248
  pass # a normal string arg was given
183
249
  else:
184
250
  tmp_filename = f'input_{"".join(random.choices(string.ascii_letters + string.digits, k=7))}'
185
- if isinstance(arg, io.StringIO):
251
+ if isinstance(arg, JsonStringIO):
252
+ file_data = arg.getvalue().encode()
253
+ tmp_filename += '.json'
254
+ elif isinstance(arg, io.StringIO):
186
255
  file_data = arg.getvalue().encode()
187
256
  elif isinstance(arg, io.BytesIO):
188
257
  file_data = arg.getvalue()
@@ -191,33 +260,6 @@ Example: "app.cli('--help')"
191
260
  files_dict[f'/{tmp_filename}'] = file_data
192
261
  args[idx] = tmp_filename
193
262
 
194
- if isinstance(files, list):
195
- for file in files:
196
- path = Path(file).absolute()
197
-
198
- # Recursively add data from files if dir
199
- if path.is_dir():
200
- for filename in path.rglob('*'):
201
- if filename.is_dir():
202
- continue
203
- file = open(filename, 'rb')
204
- relative_path = '/' + path.name + '/' + '/'.join(filename.relative_to(path).parts)
205
- files_dict[relative_path] = file.read()
206
- file.close()
207
-
208
- # Add file data
209
- else:
210
- file = open(path, 'rb')
211
- path_short = '/' + path.name
212
-
213
- files_dict[path_short] = file.read()
214
- file.close()
215
-
216
- elif isinstance(files, dict):
217
- files_dict.update(files)
218
- else:
219
- raise Exception('The given files input must be list or dict or None')
220
-
221
263
  module_input_serialized: bytes = ModuleInput().serialize(
222
264
  stdin=stdin,
223
265
  arguments=args,
@@ -228,7 +270,7 @@ Example: "app.cli('--help')"
228
270
  def _run_locally(self, module_input_serialized: bytes) -> Result:
229
271
  job_dict = BiolibJobApi.create(
230
272
  app_version_id=self._app_version['public_id'],
231
- app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix'],
273
+ app_resource_name_prefix=parse_resource_uri(self._app_uri)['resource_prefix'],
232
274
  )
233
275
  job = Result(job_dict)
234
276
 
@@ -253,7 +295,7 @@ Example: "app.cli('--help')"
253
295
  continue
254
296
 
255
297
  if isinstance(value, dict):
256
- value = io.StringIO(json.dumps(value))
298
+ value = JsonStringIO(json.dumps(value))
257
299
  elif isinstance(value, (int, float)): # Cast numeric values to strings
258
300
  value = str(value)
259
301
 
@@ -1,15 +1,13 @@
1
- import base64
2
- import binascii
3
- import json
4
1
  import os
5
2
  from datetime import datetime, timezone
6
3
  from json.decoder import JSONDecodeError
7
4
 
8
5
  from biolib._internal.http_client import HttpClient
6
+ from biolib._internal.utils.auth import decode_jwt_without_checking_signature
9
7
  from biolib._runtime.runtime import Runtime
10
8
  from biolib.biolib_errors import BioLibError
11
9
  from biolib.biolib_logging import logger, logger_no_user_data
12
- from biolib.typing_utils import Any, Dict, Optional, TypedDict
10
+ from biolib.typing_utils import Optional, TypedDict
13
11
 
14
12
  from .user_state import UserState
15
13
 
@@ -19,10 +17,6 @@ class UserTokens(TypedDict):
19
17
  refresh: str
20
18
 
21
19
 
22
- class JwtDecodeError(Exception):
23
- pass
24
-
25
-
26
20
  class _ApiClient:
27
21
  def __init__(self, base_url: str, access_token: Optional[str] = None):
28
22
  self.base_url: str = base_url
@@ -60,7 +54,7 @@ class _ApiClient:
60
54
  return
61
55
 
62
56
  if self.access_token:
63
- decoded_token = self.decode_jwt_without_checking_signature(self.access_token)
57
+ decoded_token = decode_jwt_without_checking_signature(self.access_token)
64
58
  if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60: # 60 second buffer
65
59
  # Token has not expired yet
66
60
  return
@@ -132,44 +126,6 @@ class _ApiClient:
132
126
  self.access_token = json_response['access_token']
133
127
  self.refresh_token = json_response['refresh_token']
134
128
 
135
- @staticmethod
136
- def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
137
- jwt_bytes = jwt.encode('utf-8')
138
-
139
- try:
140
- signing_input, _ = jwt_bytes.rsplit(b'.', 1)
141
- header_segment, payload_segment = signing_input.split(b'.', 1)
142
- except ValueError as error:
143
- raise JwtDecodeError('Not enough segments') from error
144
-
145
- try:
146
- header_data = base64.urlsafe_b64decode(header_segment)
147
- except (TypeError, binascii.Error) as error:
148
- raise JwtDecodeError('Invalid header padding') from error
149
-
150
- try:
151
- header = json.loads(header_data)
152
- except ValueError as error:
153
- raise JwtDecodeError(f'Invalid header string: {error}') from error
154
-
155
- if not isinstance(header, dict):
156
- raise JwtDecodeError('Invalid header string: must be a json object')
157
-
158
- try:
159
- payload_data = base64.urlsafe_b64decode(payload_segment)
160
- except (TypeError, binascii.Error) as error:
161
- raise JwtDecodeError('Invalid payload padding') from error
162
-
163
- try:
164
- payload = json.loads(payload_data)
165
- except ValueError as error:
166
- raise JwtDecodeError(f'Invalid payload string: {error}') from error
167
-
168
- if not isinstance(header, dict):
169
- raise JwtDecodeError('Invalid payload string: must be a json object')
170
-
171
- return dict(header=header, payload=payload)
172
-
173
129
 
174
130
  class BiolibApiClient:
175
131
  api_client: Optional[_ApiClient] = None
@@ -78,6 +78,7 @@ class _Module(TypedDict):
78
78
  large_file_systems: List[LargeFileSystemMapping]
79
79
  name: str
80
80
  output_files_mappings: List[FilesMapping]
81
+ ports: List[int]
81
82
  source_files_mappings: List[FilesMapping]
82
83
  working_directory: str
83
84
 
@@ -90,7 +91,6 @@ class Module(_Module, total=False):
90
91
  class _AppVersionOnJob(TypedDict):
91
92
  created_at: str
92
93
  client_side_executable_zip: Optional[str]
93
- consumes_stdin: bool
94
94
  is_runnable_by_user: bool
95
95
  public_id: str
96
96
  remote_hosts: List[RemoteHost]
@@ -57,6 +57,22 @@ def _get_git_branch_name() -> str:
57
57
  return ''
58
58
 
59
59
 
60
+ def _get_git_commit_hash() -> str:
61
+ try:
62
+ github_actions_commit_hash = os.getenv('GITHUB_SHA')
63
+ if github_actions_commit_hash:
64
+ return github_actions_commit_hash
65
+
66
+ gitlab_ci_commit_hash = os.getenv('CI_COMMIT_SHA')
67
+ if gitlab_ci_commit_hash:
68
+ return gitlab_ci_commit_hash
69
+
70
+ result = subprocess.run(['git', 'rev-parse', 'HEAD'], check=True, stdout=subprocess.PIPE, text=True)
71
+ return result.stdout.strip()
72
+ except BaseException:
73
+ return ''
74
+
75
+
60
76
  def _get_git_repository_url() -> str:
61
77
  try:
62
78
  result = subprocess.run(['git', 'remote', 'get-url', 'origin'], check=True, stdout=subprocess.PIPE, text=True)
@@ -64,16 +80,15 @@ def _get_git_repository_url() -> str:
64
80
  except BaseException:
65
81
  return ''
66
82
 
67
- def _get_app_uri_from_str(input_str: str) -> str:
83
+
84
+ def _get_resource_uri_from_str(input_str: str) -> str:
68
85
  parsed_base_url = urllib.parse.urlparse(load_base_url_from_env())
69
86
  parsed_uri = urllib.parse.urlparse(input_str)
70
87
  if parsed_uri.netloc != '' and parsed_base_url.netloc != parsed_uri.netloc:
71
- raise biolib_errors.ValidationError(
72
- f'Invalid URI. The hostname "{parsed_base_url.netloc}" is not recognized.'
73
- )
88
+ raise biolib_errors.ValidationError(f'Invalid URI. The hostname "{parsed_base_url.netloc}" is not recognized.')
74
89
  elif parsed_uri.netloc != '' and parsed_uri.path[1] != '@':
75
90
  uri = f'@{parsed_uri.netloc}{parsed_uri.path}'
76
- elif parsed_uri.netloc == '' and parsed_uri.path.startswith (parsed_base_url.netloc):
91
+ elif parsed_uri.netloc == '' and parsed_uri.path.startswith(parsed_base_url.netloc):
77
92
  uri = f'@{parsed_uri.path}'
78
93
  else:
79
94
  uri = parsed_uri.path
@@ -86,7 +101,7 @@ def _get_app_uri_from_str(input_str: str) -> str:
86
101
  class BiolibAppApi:
87
102
  @staticmethod
88
103
  def get_by_uri(uri: str, api_client: Optional[ApiClient] = None) -> AppGetResponse:
89
- uri = _get_app_uri_from_str(uri)
104
+ uri = _get_resource_uri_from_str(uri)
90
105
  api = api_client or biolib.api.client
91
106
  try:
92
107
  response = api.get(path='/app/', params={'uri': uri})
@@ -99,6 +114,15 @@ class BiolibAppApi:
99
114
 
100
115
  raise error
101
116
 
117
+ @staticmethod
118
+ def create_app(uri: str):
119
+ uri = _get_resource_uri_from_str(uri)
120
+ try:
121
+ response = biolib.api.client.post(path='/resources/apps/', data={'uri': uri})
122
+ return response.json()
123
+ except HttpError as error:
124
+ raise error
125
+
102
126
  @staticmethod
103
127
  def push_app_version(
104
128
  app_id,
@@ -116,6 +140,7 @@ class BiolibAppApi:
116
140
  'state': 'published',
117
141
  'app_version_id_to_copy_images_from': app_version_id_to_copy_images_from,
118
142
  'git_branch_name': _get_git_branch_name(),
143
+ 'git_commit_hash': _get_git_commit_hash(),
119
144
  'git_repository_url': _get_git_repository_url(),
120
145
  }
121
146
  if semantic_version:
@@ -25,7 +25,7 @@ def _get_user_info() -> Optional[str]:
25
25
  if utils.BASE_URL_IS_PUBLIC_BIOLIB:
26
26
  return None
27
27
 
28
- enterprise_agent_info_opt_env_vars = ['DOMINO_STARTING_USERNAME', 'USER']
28
+ enterprise_agent_info_opt_env_vars = ['BIOLIB_OPT_USER', 'DOMINO_STARTING_USERNAME', 'USER']
29
29
 
30
30
  for env_var in enterprise_agent_info_opt_env_vars:
31
31
  env_var_value = os.getenv(env_var)
@@ -1,6 +1,6 @@
1
+ from biolib.biolib_logging import logger_no_user_data
2
+ from biolib.typing_utils import Optional, TypedDict
1
3
  from biolib.utils.cache_state import CacheState
2
- from biolib.typing_utils import TypedDict, Optional
3
-
4
4
 
5
5
  # TODO: Save job keys in the user state instead of a separate state file
6
6
  # UuidStr = str
@@ -15,6 +15,9 @@ class UserStateType(TypedDict):
15
15
 
16
16
 
17
17
  class UserState(CacheState[UserStateType]):
18
+ def __init__(self) -> None:
19
+ super().__init__(fail_fast_on_lock_acquire=True)
20
+ self._is_in_memory_only: bool = False
18
21
 
19
22
  @property
20
23
  def _state_path(self) -> str:
@@ -22,3 +25,32 @@ class UserState(CacheState[UserStateType]):
22
25
 
23
26
  def _get_default_state(self) -> UserStateType:
24
27
  return UserStateType(refresh_token=None)
28
+
29
+ def __enter__(self) -> UserStateType:
30
+ if self._is_in_memory_only:
31
+ if self._state is None:
32
+ self._state = self._get_default_state()
33
+ return self._state
34
+ try:
35
+ return super().__enter__()
36
+ except Exception as error:
37
+ logger_no_user_data.warning(
38
+ f'UserState: Could not access state file, continuing with in-memory state only. '
39
+ f'Login state will not persist across Python processes. Error: {error}'
40
+ )
41
+ self._is_in_memory_only = True
42
+ if self._state is None:
43
+ self._state = self._get_default_state()
44
+ return self._state
45
+
46
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
47
+ if self._is_in_memory_only:
48
+ return
49
+ try:
50
+ super().__exit__(exc_type, exc_val, exc_tb)
51
+ except Exception as error:
52
+ logger_no_user_data.warning(
53
+ f'UserState: Could not write state file. '
54
+ f'Login state will not persist across Python processes. Error: {error}'
55
+ )
56
+ self._is_in_memory_only = True
@@ -1,4 +1,5 @@
1
1
  from biolib.biolib_binary_format.base_bbf_package import BioLibBinaryFormatBasePackage
2
+ from biolib.biolib_logging import logger
2
3
  from biolib.typing_utils import TypedDict, Dict, List
3
4
 
4
5
 
@@ -14,6 +15,10 @@ class ModuleInput(BioLibBinaryFormatBasePackage):
14
15
  self.package_type = 1
15
16
 
16
17
  def serialize(self, stdin, arguments, files) -> bytes:
18
+ for path in files.keys():
19
+ if '//' in path:
20
+ raise ValueError(f"File path '{path}' contains double slashes which are not allowed")
21
+
17
22
  bbf_data = bytearray()
18
23
  bbf_data.extend(self.version.to_bytes(1, 'big'))
19
24
  bbf_data.extend(self.package_type.to_bytes(1, 'big'))
@@ -67,6 +72,9 @@ class ModuleInput(BioLibBinaryFormatBasePackage):
67
72
  data_len = self.get_data(8, output_type='int')
68
73
  path = self.get_data(path_len, output_type='str')
69
74
  data = self.get_data(data_len)
75
+ if '//' in path:
76
+ # TODO: Raise ValueError here once backwards compatibility period is over
77
+ logger.warning(f"File path '{path}' contains double slashes which are not allowed")
70
78
  files[path] = bytes(data)
71
79
 
72
80
  return ModuleInputDict(stdin=stdin, arguments=arguments, files=files)
@@ -1,4 +1,4 @@
1
- from datetime import datetime, timedelta
1
+ from datetime import datetime, timedelta, timezone
2
2
 
3
3
  from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
4
4
  from biolib.biolib_binary_format.utils import RemoteEndpoint
@@ -17,13 +17,13 @@ class RemoteJobStorageEndpoint(RemoteEndpoint):
17
17
  self._storage_type: Literal['input', 'output'] = storage_type
18
18
 
19
19
  def get_remote_url(self):
20
- if not self._presigned_url or datetime.utcnow() > self._expires_at:
20
+ if not self._presigned_url or not self._expires_at or datetime.now(timezone.utc) > self._expires_at:
21
21
  self._presigned_url = BiolibJobApi.get_job_storage_download_url(
22
22
  job_auth_token=self._job_auth_token,
23
23
  job_uuid=self._job_uuid,
24
24
  storage_type='results' if self._storage_type == 'output' else 'input',
25
25
  )
26
- self._expires_at = datetime.utcnow() + timedelta(minutes=8)
26
+ self._expires_at = datetime.now(timezone.utc) + timedelta(minutes=8)
27
27
  # TODO: Use expires at from url
28
28
  # parsed_url = urlparse(self._presigned_url)
29
29
  # query_params = parse_qs(parsed_url.query)
@@ -1,45 +1,59 @@
1
1
  from biolib.biolib_binary_format.utils import IndexableBuffer
2
+ from biolib.biolib_logging import logger
2
3
  from biolib.typing_utils import Iterable
3
4
 
4
5
 
5
6
  class StreamSeeker:
6
7
  def __init__(
7
- self,
8
- upstream_buffer: IndexableBuffer,
9
- files_data_start: int,
10
- files_data_end: int,
11
- download_chunk_size_in_bytes: int,
8
+ self,
9
+ upstream_buffer: IndexableBuffer,
10
+ files_data_start: int,
11
+ files_data_end: int,
12
+ max_chunk_size: int,
12
13
  ):
13
14
  self._upstream_buffer = upstream_buffer
14
15
  self._files_data_end = files_data_end
15
- self._download_chunk_size_in_bytes = download_chunk_size_in_bytes
16
+ self._max_chunk_size = max_chunk_size
16
17
 
17
18
  self._buffer_start = files_data_start
18
19
  self._buffer = bytearray()
19
20
 
20
- def seek_and_read(self, file_start: int, file_length: int) -> Iterable[bytes]:
21
+ def seek_and_read(self, file_start: int, file_length: int, read_ahead_bytes: int = 0) -> Iterable[bytes]:
21
22
  assert file_start >= self._buffer_start
22
- self._buffer = self._buffer[file_start - self._buffer_start:] # Returns empty array if "out of bounds"
23
+ self._buffer = self._buffer[file_start - self._buffer_start :]
23
24
  self._buffer_start = file_start
24
25
 
25
26
  while True:
26
27
  file_byte_count_remaining = file_length - (self._buffer_start - file_start)
27
- if file_byte_count_remaining == 0:
28
+ if file_byte_count_remaining <= 0:
28
29
  return
29
30
 
30
- start_of_fetch = self._buffer_start + len(self._buffer)
31
- byte_count_left_in_stream = self._files_data_end - start_of_fetch
32
-
33
- if byte_count_left_in_stream != 0:
34
- # Only fetch if there is still data left upstream
35
- if self._download_chunk_size_in_bytes > len(self._buffer):
36
- # Only fetch if size of buffer is below chunk size
37
- self._buffer.extend(self._upstream_buffer.get_data(
38
- start=start_of_fetch,
39
- length=min(byte_count_left_in_stream, self._download_chunk_size_in_bytes),
40
- ))
41
-
42
- bytes_to_yield = self._buffer[:file_byte_count_remaining] # Returns empty array if "out of bounds"
43
- yield bytes_to_yield
44
- self._buffer = self._buffer[file_byte_count_remaining:] # Returns empty array if "out of bounds"
45
- self._buffer_start += len(bytes_to_yield)
31
+ if len(self._buffer) > 0:
32
+ take = min(file_byte_count_remaining, len(self._buffer))
33
+ chunk = self._buffer[:take]
34
+ if chunk:
35
+ yield chunk
36
+ self._buffer = self._buffer[take:]
37
+ self._buffer_start += take
38
+ else:
39
+ start_of_fetch = self._buffer_start + len(self._buffer)
40
+ bytes_left_in_stream = self._files_data_end - start_of_fetch
41
+ if bytes_left_in_stream <= 0:
42
+ logger.error(
43
+ 'StreamSeeker: no bytes left upstream (start_of_fetch=%d, files_data_end=%d)',
44
+ start_of_fetch,
45
+ self._files_data_end,
46
+ )
47
+ return
48
+
49
+ fetch_size = min(self._max_chunk_size, file_byte_count_remaining + read_ahead_bytes)
50
+ if fetch_size > bytes_left_in_stream:
51
+ logger.error(
52
+ 'StreamSeeker: fetch_size (%d) > bytes_left_in_stream (%d); clamping',
53
+ fetch_size,
54
+ bytes_left_in_stream,
55
+ )
56
+ fetch_size = bytes_left_in_stream
57
+
58
+ fetched_data = self._upstream_buffer.get_data(start=start_of_fetch, length=fetch_size)
59
+ self._buffer.extend(fetched_data)
biolib/biolib_logging.py CHANGED
@@ -62,7 +62,7 @@ def _get_no_user_data_logger() -> _BioLibLogger:
62
62
 
63
63
  # TODO: Simplify by refactoring to env BIOLIB_ENVIRONMENT_IS_CLOUD: boolean
64
64
  if os.getenv('BIOLIB_CLOUD_ENVIRONMENT', '').lower() == 'non-enclave':
65
- handler = logging.FileHandler(filename='/tmp/biolib_no_user_data.log')
65
+ handler = logging.FileHandler(filename='/biolib/logs/biolib_no_user_data.log')
66
66
  formatter = logging.Formatter(_DEFAULT_LOGGER_FORMAT)
67
67
  handler.setFormatter(formatter)
68
68
  _logger_no_user_data.addHandler(handler)
biolib/cli/__init__.py CHANGED
@@ -5,7 +5,7 @@ import click
5
5
 
6
6
  from biolib import utils
7
7
  from biolib.biolib_logging import logger, logger_no_user_data
8
- from biolib.cli import auth, data_record, download_container, init, lfs, push, run, runtime, sdk, start
8
+ from biolib.cli import auth, data_record, index, init, lfs, push, run, runtime, sdk, start
9
9
 
10
10
 
11
11
  @click.version_option(version=utils.BIOLIB_PACKAGE_VERSION, prog_name='pybiolib')
@@ -23,7 +23,6 @@ def cli() -> None:
23
23
  cli.add_command(auth.login)
24
24
  cli.add_command(auth.logout)
25
25
  cli.add_command(auth.whoami)
26
- cli.add_command(download_container.download_container)
27
26
  cli.add_command(init.init)
28
27
  cli.add_command(lfs.lfs)
29
28
  cli.add_command(push.push)
@@ -31,6 +30,7 @@ cli.add_command(run.run)
31
30
  cli.add_command(runtime.runtime)
32
31
  cli.add_command(start.start)
33
32
  cli.add_command(data_record.data_record)
33
+ cli.add_command(index.index)
34
34
  cli.add_command(sdk.sdk)
35
35
 
36
36
  # allow this script to be called without poetry in dev e.g. by an IDE debugger