pybiolib 1.2.883__py3-none-any.whl → 1.2.1890__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +33 -10
- biolib/_data_record/data_record.py +220 -126
- biolib/_index/index.py +55 -0
- biolib/_index/query_result.py +103 -0
- biolib/_internal/add_copilot_prompts.py +24 -11
- biolib/_internal/add_gui_files.py +81 -0
- biolib/_internal/data_record/__init__.py +1 -1
- biolib/_internal/data_record/data_record.py +1 -18
- biolib/_internal/data_record/push_data.py +65 -16
- biolib/_internal/data_record/remote_storage_endpoint.py +18 -13
- biolib/_internal/file_utils.py +48 -0
- biolib/_internal/lfs/cache.py +4 -2
- biolib/_internal/push_application.py +95 -24
- biolib/_internal/runtime.py +2 -0
- biolib/_internal/string_utils.py +13 -0
- biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-general.instructions.md +5 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
- biolib/_internal/templates/dashboard_template/.biolib/config.yml +5 -0
- biolib/_internal/templates/{init_template → github_workflow_template}/.github/workflows/biolib.yml +7 -2
- biolib/_internal/templates/gitignore_template/.gitignore +10 -0
- biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
- biolib/_internal/templates/gui_template/App.tsx +53 -0
- biolib/_internal/templates/gui_template/Dockerfile +27 -0
- biolib/_internal/templates/gui_template/biolib-sdk.ts +82 -0
- biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
- biolib/_internal/templates/gui_template/index.css +5 -0
- biolib/_internal/templates/gui_template/index.html +13 -0
- biolib/_internal/templates/gui_template/index.tsx +10 -0
- biolib/_internal/templates/gui_template/package.json +27 -0
- biolib/_internal/templates/gui_template/tsconfig.json +24 -0
- biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +50 -0
- biolib/_internal/templates/gui_template/vite.config.mts +10 -0
- biolib/_internal/templates/init_template/.biolib/config.yml +1 -0
- biolib/_internal/templates/init_template/Dockerfile +5 -1
- biolib/_internal/templates/init_template/run.py +6 -15
- biolib/_internal/templates/init_template/run.sh +1 -0
- biolib/_internal/templates/templates.py +21 -1
- biolib/_internal/utils/__init__.py +47 -0
- biolib/_internal/utils/auth.py +46 -0
- biolib/_internal/utils/job_url.py +33 -0
- biolib/_internal/utils/multinode.py +12 -14
- biolib/_runtime/runtime.py +15 -2
- biolib/_session/session.py +7 -5
- biolib/_shared/__init__.py +0 -0
- biolib/_shared/types/__init__.py +74 -0
- biolib/_shared/types/account.py +12 -0
- biolib/_shared/types/account_member.py +8 -0
- biolib/{_internal → _shared}/types/experiment.py +1 -0
- biolib/_shared/types/resource.py +37 -0
- biolib/_shared/types/resource_deploy_key.py +11 -0
- biolib/{_internal → _shared}/types/resource_version.py +8 -2
- biolib/_shared/types/user.py +19 -0
- biolib/_shared/utils/__init__.py +7 -0
- biolib/_shared/utils/resource_uri.py +75 -0
- biolib/api/client.py +5 -48
- biolib/app/app.py +97 -55
- biolib/biolib_api_client/api_client.py +3 -47
- biolib/biolib_api_client/app_types.py +1 -1
- biolib/biolib_api_client/biolib_app_api.py +31 -6
- biolib/biolib_api_client/biolib_job_api.py +1 -1
- biolib/biolib_api_client/user_state.py +34 -2
- biolib/biolib_binary_format/module_input.py +8 -0
- biolib/biolib_binary_format/remote_endpoints.py +3 -3
- biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
- biolib/biolib_logging.py +1 -1
- biolib/cli/__init__.py +2 -2
- biolib/cli/auth.py +4 -16
- biolib/cli/data_record.py +82 -0
- biolib/cli/index.py +32 -0
- biolib/cli/init.py +393 -71
- biolib/cli/lfs.py +1 -1
- biolib/cli/run.py +9 -6
- biolib/cli/start.py +14 -1
- biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
- biolib/compute_node/job_worker/executors/docker_types.py +1 -1
- biolib/compute_node/job_worker/executors/types.py +6 -5
- biolib/compute_node/job_worker/job_storage.py +2 -1
- biolib/compute_node/job_worker/job_worker.py +155 -90
- biolib/compute_node/job_worker/large_file_system.py +2 -6
- biolib/compute_node/job_worker/network_alloc.py +99 -0
- biolib/compute_node/job_worker/network_buffer.py +240 -0
- biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
- biolib/compute_node/remote_host_proxy.py +163 -79
- biolib/compute_node/utils.py +2 -0
- biolib/compute_node/webserver/compute_node_results_proxy.py +189 -0
- biolib/compute_node/webserver/proxy_utils.py +28 -0
- biolib/compute_node/webserver/webserver.py +64 -19
- biolib/experiments/experiment.py +111 -16
- biolib/jobs/job.py +128 -31
- biolib/jobs/job_result.py +74 -34
- biolib/jobs/types.py +1 -0
- biolib/sdk/__init__.py +28 -3
- biolib/typing_utils.py +1 -1
- biolib/utils/cache_state.py +8 -5
- biolib/utils/multipart_uploader.py +24 -18
- biolib/utils/seq_util.py +1 -1
- pybiolib-1.2.1890.dist-info/METADATA +41 -0
- pybiolib-1.2.1890.dist-info/RECORD +177 -0
- {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info}/WHEEL +1 -1
- pybiolib-1.2.1890.dist-info/entry_points.txt +2 -0
- biolib/_internal/llm_instructions/.github/instructions/style-react-ts.instructions.md +0 -22
- biolib/_internal/templates/init_template/.gitignore +0 -2
- biolib/_internal/types/__init__.py +0 -6
- biolib/_internal/types/resource.py +0 -18
- biolib/biolib_download_container.py +0 -38
- biolib/cli/download_container.py +0 -14
- biolib/utils/app_uri.py +0 -57
- pybiolib-1.2.883.dist-info/METADATA +0 -50
- pybiolib-1.2.883.dist-info/RECORD +0 -148
- pybiolib-1.2.883.dist-info/entry_points.txt +0 -3
- /biolib/{_internal/llm_instructions → _index}/__init__.py +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/general-app-knowledge.instructions.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-python.instructions.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_app_inputs.prompt.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_run_apps.prompt.md +0 -0
- /biolib/{_internal → _shared}/types/app.py +0 -0
- /biolib/{_internal → _shared}/types/data_record.py +0 -0
- /biolib/{_internal → _shared}/types/file_node.py +0 -0
- /biolib/{_internal → _shared}/types/push.py +0 -0
- /biolib/{_internal → _shared}/types/resource_permission.py +0 -0
- /biolib/{_internal → _shared}/types/result.py +0 -0
- /biolib/{_internal → _shared}/types/typing.py +0 -0
- {pybiolib-1.2.883.dist-info → pybiolib-1.2.1890.dist-info/licenses}/LICENSE +0 -0
biolib/app/app.py
CHANGED
|
@@ -1,11 +1,16 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
import io
|
|
2
3
|
import json
|
|
3
4
|
import os
|
|
5
|
+
import posixpath
|
|
4
6
|
import random
|
|
5
7
|
import string
|
|
6
8
|
from pathlib import Path
|
|
7
9
|
|
|
8
10
|
from biolib import utils
|
|
11
|
+
from biolib._internal.file_utils import path_to_renamed_path
|
|
12
|
+
from biolib._runtime.runtime import Runtime
|
|
13
|
+
from biolib._shared.utils import parse_resource_uri
|
|
9
14
|
from biolib.api.client import ApiClient
|
|
10
15
|
from biolib.biolib_api_client import JobState
|
|
11
16
|
from biolib.biolib_api_client.app_types import App, AppVersion
|
|
@@ -18,13 +23,24 @@ from biolib.compute_node.job_worker.job_worker import JobWorker
|
|
|
18
23
|
from biolib.experiments.experiment import Experiment
|
|
19
24
|
from biolib.jobs.job import Result
|
|
20
25
|
from biolib.typing_utils import Dict, Optional
|
|
21
|
-
|
|
22
|
-
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class JsonStringIO(io.StringIO):
|
|
29
|
+
pass
|
|
23
30
|
|
|
24
31
|
|
|
25
32
|
class BioLibApp:
|
|
26
|
-
def __init__(
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
uri: str,
|
|
36
|
+
_api_client: Optional[ApiClient] = None,
|
|
37
|
+
suppress_version_warning: bool = False,
|
|
38
|
+
_experiment: Optional[str] = None,
|
|
39
|
+
):
|
|
27
40
|
self._api_client: Optional[ApiClient] = _api_client
|
|
41
|
+
self._experiment = _experiment
|
|
42
|
+
self._input_uri = uri
|
|
43
|
+
self._parsed_input_uri = parse_resource_uri(uri)
|
|
28
44
|
|
|
29
45
|
app_response = BiolibAppApi.get_by_uri(uri=uri, api_client=self._api_client)
|
|
30
46
|
self._app: App = app_response['app']
|
|
@@ -32,16 +48,19 @@ class BioLibApp:
|
|
|
32
48
|
self._app_version: AppVersion = app_response['app_version']
|
|
33
49
|
|
|
34
50
|
if not suppress_version_warning:
|
|
35
|
-
|
|
36
|
-
if parsed_uri['version'] is None:
|
|
51
|
+
if self._parsed_input_uri['version'] is None:
|
|
37
52
|
if Runtime.check_is_environment_biolib_app():
|
|
38
53
|
logger.warning(
|
|
39
54
|
f"No version specified in URI '{uri}'. This will use the default version, "
|
|
40
|
-
f
|
|
55
|
+
f'which may change behaviour over time. Consider locking down the exact version, '
|
|
41
56
|
f"e.g. '{uri}:1.2.3'"
|
|
42
57
|
)
|
|
43
58
|
|
|
44
|
-
|
|
59
|
+
if self._parsed_input_uri['tag']:
|
|
60
|
+
semantic_version = f"{self._app_version['major']}.{self._app_version['minor']}.{self._app_version['patch']}"
|
|
61
|
+
logger.info(f'Loaded {self._input_uri} (resolved to {semantic_version})')
|
|
62
|
+
else:
|
|
63
|
+
logger.info(f'Loaded {self._app_uri}')
|
|
45
64
|
|
|
46
65
|
def __str__(self) -> str:
|
|
47
66
|
return self._app_uri
|
|
@@ -70,7 +89,7 @@ class BioLibApp:
|
|
|
70
89
|
result_prefix: Optional[str] = None,
|
|
71
90
|
timeout: Optional[int] = None,
|
|
72
91
|
notify: bool = False,
|
|
73
|
-
|
|
92
|
+
max_workers: Optional[int] = None,
|
|
74
93
|
experiment: Optional[str] = None,
|
|
75
94
|
temporary_client_secrets: Optional[Dict[str, str]] = None,
|
|
76
95
|
check: bool = False,
|
|
@@ -83,22 +102,18 @@ class BioLibApp:
|
|
|
83
102
|
raise ValueError('The argument "check" cannot be True when blocking is False')
|
|
84
103
|
|
|
85
104
|
if not experiment_id:
|
|
86
|
-
|
|
105
|
+
experiment_to_use = experiment if experiment is not None else self._experiment
|
|
106
|
+
experiment_instance: Optional[Experiment]
|
|
107
|
+
if experiment_to_use:
|
|
108
|
+
experiment_instance = Experiment(experiment_to_use, _api_client=self._api_client)
|
|
109
|
+
else:
|
|
110
|
+
experiment_instance = Experiment.get_experiment_in_context()
|
|
87
111
|
experiment_id = experiment_instance.uuid if experiment_instance else None
|
|
88
112
|
|
|
89
113
|
module_input_serialized = self._get_serialized_module_input(args, stdin, files)
|
|
90
114
|
|
|
91
115
|
if machine == 'local':
|
|
92
|
-
|
|
93
|
-
raise BioLibError('The argument "blocking" cannot be False when running locally')
|
|
94
|
-
|
|
95
|
-
if experiment_id:
|
|
96
|
-
logger.warning('The argument "experiment_id" is ignored when running locally')
|
|
97
|
-
|
|
98
|
-
if result_prefix:
|
|
99
|
-
logger.warning('The argument "result_prefix" is ignored when running locally')
|
|
100
|
-
|
|
101
|
-
return self._run_locally(module_input_serialized)
|
|
116
|
+
raise BioLibError('Running applications locally with machine="local" is no longer supported.')
|
|
102
117
|
|
|
103
118
|
job = Result._start_job_in_cloud( # pylint: disable=protected-access
|
|
104
119
|
app_uri=self._app_uri,
|
|
@@ -110,11 +125,12 @@ class BioLibApp:
|
|
|
110
125
|
override_command=override_command,
|
|
111
126
|
result_prefix=result_prefix,
|
|
112
127
|
timeout=timeout,
|
|
113
|
-
requested_machine_count=
|
|
128
|
+
requested_machine_count=max_workers,
|
|
114
129
|
temporary_client_secrets=temporary_client_secrets,
|
|
115
130
|
api_client=self._api_client,
|
|
116
131
|
)
|
|
117
|
-
|
|
132
|
+
if utils.IS_RUNNING_IN_NOTEBOOK:
|
|
133
|
+
logger.info(f'View the result in your browser at: {utils.BIOLIB_BASE_URL}/results/{job.id}/')
|
|
118
134
|
if blocking:
|
|
119
135
|
# TODO: Deprecate utils.STREAM_STDOUT and always stream logs by simply calling job.stream_logs()
|
|
120
136
|
if utils.IS_RUNNING_IN_NOTEBOOK:
|
|
@@ -151,6 +167,8 @@ Example: "app.cli('--help')"
|
|
|
151
167
|
def _get_serialized_module_input(args=None, stdin=None, files=None) -> bytes:
|
|
152
168
|
if args is None:
|
|
153
169
|
args = []
|
|
170
|
+
else:
|
|
171
|
+
args = copy.copy(args)
|
|
154
172
|
|
|
155
173
|
if stdin is None:
|
|
156
174
|
stdin = b''
|
|
@@ -168,21 +186,72 @@ Example: "app.cli('--help')"
|
|
|
168
186
|
files = []
|
|
169
187
|
|
|
170
188
|
files_dict = {}
|
|
189
|
+
if isinstance(files, list):
|
|
190
|
+
for file_path in files:
|
|
191
|
+
path = Path(file_path)
|
|
192
|
+
if path.is_dir():
|
|
193
|
+
renamed_dir = path_to_renamed_path(file_path)
|
|
194
|
+
for filename in path.rglob('*'):
|
|
195
|
+
if filename.is_dir():
|
|
196
|
+
continue
|
|
197
|
+
with open(filename, 'rb') as f:
|
|
198
|
+
relative_to_dir = filename.resolve().relative_to(path.resolve())
|
|
199
|
+
files_dict[posixpath.join(renamed_dir, relative_to_dir.as_posix())] = f.read()
|
|
200
|
+
else:
|
|
201
|
+
with open(path, 'rb') as f:
|
|
202
|
+
files_dict[path_to_renamed_path(str(path))] = f.read()
|
|
203
|
+
elif isinstance(files, dict):
|
|
204
|
+
files_dict = {}
|
|
205
|
+
for key, value in files.items():
|
|
206
|
+
if '//' in key:
|
|
207
|
+
raise BioLibError(f"File path '{key}' contains double slashes which are not allowed")
|
|
208
|
+
if not key.startswith('/'):
|
|
209
|
+
key = '/' + key
|
|
210
|
+
files_dict[key] = value
|
|
211
|
+
else:
|
|
212
|
+
raise Exception('The given files input must be list or dict or None')
|
|
213
|
+
|
|
171
214
|
for idx, arg in enumerate(args):
|
|
172
215
|
if isinstance(arg, str):
|
|
173
216
|
if os.path.isfile(arg) or os.path.isdir(arg):
|
|
174
|
-
|
|
175
|
-
|
|
217
|
+
if os.path.isfile(arg):
|
|
218
|
+
with open(arg, 'rb') as f:
|
|
219
|
+
files_dict[path_to_renamed_path(arg)] = f.read()
|
|
220
|
+
elif os.path.isdir(arg):
|
|
221
|
+
path = Path(arg)
|
|
222
|
+
renamed_dir = path_to_renamed_path(arg)
|
|
223
|
+
for filename in path.rglob('*'):
|
|
224
|
+
if filename.is_dir():
|
|
225
|
+
continue
|
|
226
|
+
with open(filename, 'rb') as f:
|
|
227
|
+
relative_to_dir = filename.resolve().relative_to(path.resolve())
|
|
228
|
+
files_dict[posixpath.join(renamed_dir, relative_to_dir.as_posix())] = f.read()
|
|
229
|
+
args[idx] = path_to_renamed_path(arg, prefix_with_slash=False)
|
|
176
230
|
|
|
177
231
|
# support --myarg=file.txt
|
|
178
232
|
elif os.path.isfile(arg.split('=')[-1]) or os.path.isdir(arg.split('=')[-1]):
|
|
179
|
-
|
|
180
|
-
|
|
233
|
+
file_path = arg.split('=')[-1]
|
|
234
|
+
if os.path.isfile(file_path):
|
|
235
|
+
with open(file_path, 'rb') as f:
|
|
236
|
+
files_dict[path_to_renamed_path(file_path)] = f.read()
|
|
237
|
+
elif os.path.isdir(file_path):
|
|
238
|
+
path = Path(file_path)
|
|
239
|
+
renamed_dir = path_to_renamed_path(file_path)
|
|
240
|
+
for filename in path.rglob('*'):
|
|
241
|
+
if filename.is_dir():
|
|
242
|
+
continue
|
|
243
|
+
with open(filename, 'rb') as f:
|
|
244
|
+
relative_to_dir = filename.resolve().relative_to(path.resolve())
|
|
245
|
+
files_dict[posixpath.join(renamed_dir, relative_to_dir.as_posix())] = f.read()
|
|
246
|
+
args[idx] = arg.split('=')[0] + '=' + path_to_renamed_path(file_path, prefix_with_slash=False)
|
|
181
247
|
else:
|
|
182
248
|
pass # a normal string arg was given
|
|
183
249
|
else:
|
|
184
250
|
tmp_filename = f'input_{"".join(random.choices(string.ascii_letters + string.digits, k=7))}'
|
|
185
|
-
if isinstance(arg,
|
|
251
|
+
if isinstance(arg, JsonStringIO):
|
|
252
|
+
file_data = arg.getvalue().encode()
|
|
253
|
+
tmp_filename += '.json'
|
|
254
|
+
elif isinstance(arg, io.StringIO):
|
|
186
255
|
file_data = arg.getvalue().encode()
|
|
187
256
|
elif isinstance(arg, io.BytesIO):
|
|
188
257
|
file_data = arg.getvalue()
|
|
@@ -191,33 +260,6 @@ Example: "app.cli('--help')"
|
|
|
191
260
|
files_dict[f'/{tmp_filename}'] = file_data
|
|
192
261
|
args[idx] = tmp_filename
|
|
193
262
|
|
|
194
|
-
if isinstance(files, list):
|
|
195
|
-
for file in files:
|
|
196
|
-
path = Path(file).absolute()
|
|
197
|
-
|
|
198
|
-
# Recursively add data from files if dir
|
|
199
|
-
if path.is_dir():
|
|
200
|
-
for filename in path.rglob('*'):
|
|
201
|
-
if filename.is_dir():
|
|
202
|
-
continue
|
|
203
|
-
file = open(filename, 'rb')
|
|
204
|
-
relative_path = '/' + path.name + '/' + '/'.join(filename.relative_to(path).parts)
|
|
205
|
-
files_dict[relative_path] = file.read()
|
|
206
|
-
file.close()
|
|
207
|
-
|
|
208
|
-
# Add file data
|
|
209
|
-
else:
|
|
210
|
-
file = open(path, 'rb')
|
|
211
|
-
path_short = '/' + path.name
|
|
212
|
-
|
|
213
|
-
files_dict[path_short] = file.read()
|
|
214
|
-
file.close()
|
|
215
|
-
|
|
216
|
-
elif isinstance(files, dict):
|
|
217
|
-
files_dict.update(files)
|
|
218
|
-
else:
|
|
219
|
-
raise Exception('The given files input must be list or dict or None')
|
|
220
|
-
|
|
221
263
|
module_input_serialized: bytes = ModuleInput().serialize(
|
|
222
264
|
stdin=stdin,
|
|
223
265
|
arguments=args,
|
|
@@ -228,7 +270,7 @@ Example: "app.cli('--help')"
|
|
|
228
270
|
def _run_locally(self, module_input_serialized: bytes) -> Result:
|
|
229
271
|
job_dict = BiolibJobApi.create(
|
|
230
272
|
app_version_id=self._app_version['public_id'],
|
|
231
|
-
app_resource_name_prefix=
|
|
273
|
+
app_resource_name_prefix=parse_resource_uri(self._app_uri)['resource_prefix'],
|
|
232
274
|
)
|
|
233
275
|
job = Result(job_dict)
|
|
234
276
|
|
|
@@ -253,7 +295,7 @@ Example: "app.cli('--help')"
|
|
|
253
295
|
continue
|
|
254
296
|
|
|
255
297
|
if isinstance(value, dict):
|
|
256
|
-
value =
|
|
298
|
+
value = JsonStringIO(json.dumps(value))
|
|
257
299
|
elif isinstance(value, (int, float)): # Cast numeric values to strings
|
|
258
300
|
value = str(value)
|
|
259
301
|
|
|
@@ -1,15 +1,13 @@
|
|
|
1
|
-
import base64
|
|
2
|
-
import binascii
|
|
3
|
-
import json
|
|
4
1
|
import os
|
|
5
2
|
from datetime import datetime, timezone
|
|
6
3
|
from json.decoder import JSONDecodeError
|
|
7
4
|
|
|
8
5
|
from biolib._internal.http_client import HttpClient
|
|
6
|
+
from biolib._internal.utils.auth import decode_jwt_without_checking_signature
|
|
9
7
|
from biolib._runtime.runtime import Runtime
|
|
10
8
|
from biolib.biolib_errors import BioLibError
|
|
11
9
|
from biolib.biolib_logging import logger, logger_no_user_data
|
|
12
|
-
from biolib.typing_utils import
|
|
10
|
+
from biolib.typing_utils import Optional, TypedDict
|
|
13
11
|
|
|
14
12
|
from .user_state import UserState
|
|
15
13
|
|
|
@@ -19,10 +17,6 @@ class UserTokens(TypedDict):
|
|
|
19
17
|
refresh: str
|
|
20
18
|
|
|
21
19
|
|
|
22
|
-
class JwtDecodeError(Exception):
|
|
23
|
-
pass
|
|
24
|
-
|
|
25
|
-
|
|
26
20
|
class _ApiClient:
|
|
27
21
|
def __init__(self, base_url: str, access_token: Optional[str] = None):
|
|
28
22
|
self.base_url: str = base_url
|
|
@@ -60,7 +54,7 @@ class _ApiClient:
|
|
|
60
54
|
return
|
|
61
55
|
|
|
62
56
|
if self.access_token:
|
|
63
|
-
decoded_token =
|
|
57
|
+
decoded_token = decode_jwt_without_checking_signature(self.access_token)
|
|
64
58
|
if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60: # 60 second buffer
|
|
65
59
|
# Token has not expired yet
|
|
66
60
|
return
|
|
@@ -132,44 +126,6 @@ class _ApiClient:
|
|
|
132
126
|
self.access_token = json_response['access_token']
|
|
133
127
|
self.refresh_token = json_response['refresh_token']
|
|
134
128
|
|
|
135
|
-
@staticmethod
|
|
136
|
-
def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
|
|
137
|
-
jwt_bytes = jwt.encode('utf-8')
|
|
138
|
-
|
|
139
|
-
try:
|
|
140
|
-
signing_input, _ = jwt_bytes.rsplit(b'.', 1)
|
|
141
|
-
header_segment, payload_segment = signing_input.split(b'.', 1)
|
|
142
|
-
except ValueError as error:
|
|
143
|
-
raise JwtDecodeError('Not enough segments') from error
|
|
144
|
-
|
|
145
|
-
try:
|
|
146
|
-
header_data = base64.urlsafe_b64decode(header_segment)
|
|
147
|
-
except (TypeError, binascii.Error) as error:
|
|
148
|
-
raise JwtDecodeError('Invalid header padding') from error
|
|
149
|
-
|
|
150
|
-
try:
|
|
151
|
-
header = json.loads(header_data)
|
|
152
|
-
except ValueError as error:
|
|
153
|
-
raise JwtDecodeError(f'Invalid header string: {error}') from error
|
|
154
|
-
|
|
155
|
-
if not isinstance(header, dict):
|
|
156
|
-
raise JwtDecodeError('Invalid header string: must be a json object')
|
|
157
|
-
|
|
158
|
-
try:
|
|
159
|
-
payload_data = base64.urlsafe_b64decode(payload_segment)
|
|
160
|
-
except (TypeError, binascii.Error) as error:
|
|
161
|
-
raise JwtDecodeError('Invalid payload padding') from error
|
|
162
|
-
|
|
163
|
-
try:
|
|
164
|
-
payload = json.loads(payload_data)
|
|
165
|
-
except ValueError as error:
|
|
166
|
-
raise JwtDecodeError(f'Invalid payload string: {error}') from error
|
|
167
|
-
|
|
168
|
-
if not isinstance(header, dict):
|
|
169
|
-
raise JwtDecodeError('Invalid payload string: must be a json object')
|
|
170
|
-
|
|
171
|
-
return dict(header=header, payload=payload)
|
|
172
|
-
|
|
173
129
|
|
|
174
130
|
class BiolibApiClient:
|
|
175
131
|
api_client: Optional[_ApiClient] = None
|
|
@@ -78,6 +78,7 @@ class _Module(TypedDict):
|
|
|
78
78
|
large_file_systems: List[LargeFileSystemMapping]
|
|
79
79
|
name: str
|
|
80
80
|
output_files_mappings: List[FilesMapping]
|
|
81
|
+
ports: List[int]
|
|
81
82
|
source_files_mappings: List[FilesMapping]
|
|
82
83
|
working_directory: str
|
|
83
84
|
|
|
@@ -90,7 +91,6 @@ class Module(_Module, total=False):
|
|
|
90
91
|
class _AppVersionOnJob(TypedDict):
|
|
91
92
|
created_at: str
|
|
92
93
|
client_side_executable_zip: Optional[str]
|
|
93
|
-
consumes_stdin: bool
|
|
94
94
|
is_runnable_by_user: bool
|
|
95
95
|
public_id: str
|
|
96
96
|
remote_hosts: List[RemoteHost]
|
|
@@ -57,6 +57,22 @@ def _get_git_branch_name() -> str:
|
|
|
57
57
|
return ''
|
|
58
58
|
|
|
59
59
|
|
|
60
|
+
def _get_git_commit_hash() -> str:
|
|
61
|
+
try:
|
|
62
|
+
github_actions_commit_hash = os.getenv('GITHUB_SHA')
|
|
63
|
+
if github_actions_commit_hash:
|
|
64
|
+
return github_actions_commit_hash
|
|
65
|
+
|
|
66
|
+
gitlab_ci_commit_hash = os.getenv('CI_COMMIT_SHA')
|
|
67
|
+
if gitlab_ci_commit_hash:
|
|
68
|
+
return gitlab_ci_commit_hash
|
|
69
|
+
|
|
70
|
+
result = subprocess.run(['git', 'rev-parse', 'HEAD'], check=True, stdout=subprocess.PIPE, text=True)
|
|
71
|
+
return result.stdout.strip()
|
|
72
|
+
except BaseException:
|
|
73
|
+
return ''
|
|
74
|
+
|
|
75
|
+
|
|
60
76
|
def _get_git_repository_url() -> str:
|
|
61
77
|
try:
|
|
62
78
|
result = subprocess.run(['git', 'remote', 'get-url', 'origin'], check=True, stdout=subprocess.PIPE, text=True)
|
|
@@ -64,16 +80,15 @@ def _get_git_repository_url() -> str:
|
|
|
64
80
|
except BaseException:
|
|
65
81
|
return ''
|
|
66
82
|
|
|
67
|
-
|
|
83
|
+
|
|
84
|
+
def _get_resource_uri_from_str(input_str: str) -> str:
|
|
68
85
|
parsed_base_url = urllib.parse.urlparse(load_base_url_from_env())
|
|
69
86
|
parsed_uri = urllib.parse.urlparse(input_str)
|
|
70
87
|
if parsed_uri.netloc != '' and parsed_base_url.netloc != parsed_uri.netloc:
|
|
71
|
-
raise biolib_errors.ValidationError(
|
|
72
|
-
f'Invalid URI. The hostname "{parsed_base_url.netloc}" is not recognized.'
|
|
73
|
-
)
|
|
88
|
+
raise biolib_errors.ValidationError(f'Invalid URI. The hostname "{parsed_base_url.netloc}" is not recognized.')
|
|
74
89
|
elif parsed_uri.netloc != '' and parsed_uri.path[1] != '@':
|
|
75
90
|
uri = f'@{parsed_uri.netloc}{parsed_uri.path}'
|
|
76
|
-
elif parsed_uri.netloc == '' and parsed_uri.path.startswith
|
|
91
|
+
elif parsed_uri.netloc == '' and parsed_uri.path.startswith(parsed_base_url.netloc):
|
|
77
92
|
uri = f'@{parsed_uri.path}'
|
|
78
93
|
else:
|
|
79
94
|
uri = parsed_uri.path
|
|
@@ -86,7 +101,7 @@ def _get_app_uri_from_str(input_str: str) -> str:
|
|
|
86
101
|
class BiolibAppApi:
|
|
87
102
|
@staticmethod
|
|
88
103
|
def get_by_uri(uri: str, api_client: Optional[ApiClient] = None) -> AppGetResponse:
|
|
89
|
-
uri =
|
|
104
|
+
uri = _get_resource_uri_from_str(uri)
|
|
90
105
|
api = api_client or biolib.api.client
|
|
91
106
|
try:
|
|
92
107
|
response = api.get(path='/app/', params={'uri': uri})
|
|
@@ -99,6 +114,15 @@ class BiolibAppApi:
|
|
|
99
114
|
|
|
100
115
|
raise error
|
|
101
116
|
|
|
117
|
+
@staticmethod
|
|
118
|
+
def create_app(uri: str):
|
|
119
|
+
uri = _get_resource_uri_from_str(uri)
|
|
120
|
+
try:
|
|
121
|
+
response = biolib.api.client.post(path='/resources/apps/', data={'uri': uri})
|
|
122
|
+
return response.json()
|
|
123
|
+
except HttpError as error:
|
|
124
|
+
raise error
|
|
125
|
+
|
|
102
126
|
@staticmethod
|
|
103
127
|
def push_app_version(
|
|
104
128
|
app_id,
|
|
@@ -116,6 +140,7 @@ class BiolibAppApi:
|
|
|
116
140
|
'state': 'published',
|
|
117
141
|
'app_version_id_to_copy_images_from': app_version_id_to_copy_images_from,
|
|
118
142
|
'git_branch_name': _get_git_branch_name(),
|
|
143
|
+
'git_commit_hash': _get_git_commit_hash(),
|
|
119
144
|
'git_repository_url': _get_git_repository_url(),
|
|
120
145
|
}
|
|
121
146
|
if semantic_version:
|
|
@@ -25,7 +25,7 @@ def _get_user_info() -> Optional[str]:
|
|
|
25
25
|
if utils.BASE_URL_IS_PUBLIC_BIOLIB:
|
|
26
26
|
return None
|
|
27
27
|
|
|
28
|
-
enterprise_agent_info_opt_env_vars = ['DOMINO_STARTING_USERNAME', 'USER']
|
|
28
|
+
enterprise_agent_info_opt_env_vars = ['BIOLIB_OPT_USER', 'DOMINO_STARTING_USERNAME', 'USER']
|
|
29
29
|
|
|
30
30
|
for env_var in enterprise_agent_info_opt_env_vars:
|
|
31
31
|
env_var_value = os.getenv(env_var)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
from biolib.biolib_logging import logger_no_user_data
|
|
2
|
+
from biolib.typing_utils import Optional, TypedDict
|
|
1
3
|
from biolib.utils.cache_state import CacheState
|
|
2
|
-
from biolib.typing_utils import TypedDict, Optional
|
|
3
|
-
|
|
4
4
|
|
|
5
5
|
# TODO: Save job keys in the user state instead of a separate state file
|
|
6
6
|
# UuidStr = str
|
|
@@ -15,6 +15,9 @@ class UserStateType(TypedDict):
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class UserState(CacheState[UserStateType]):
|
|
18
|
+
def __init__(self) -> None:
|
|
19
|
+
super().__init__(fail_fast_on_lock_acquire=True)
|
|
20
|
+
self._is_in_memory_only: bool = False
|
|
18
21
|
|
|
19
22
|
@property
|
|
20
23
|
def _state_path(self) -> str:
|
|
@@ -22,3 +25,32 @@ class UserState(CacheState[UserStateType]):
|
|
|
22
25
|
|
|
23
26
|
def _get_default_state(self) -> UserStateType:
|
|
24
27
|
return UserStateType(refresh_token=None)
|
|
28
|
+
|
|
29
|
+
def __enter__(self) -> UserStateType:
|
|
30
|
+
if self._is_in_memory_only:
|
|
31
|
+
if self._state is None:
|
|
32
|
+
self._state = self._get_default_state()
|
|
33
|
+
return self._state
|
|
34
|
+
try:
|
|
35
|
+
return super().__enter__()
|
|
36
|
+
except Exception as error:
|
|
37
|
+
logger_no_user_data.warning(
|
|
38
|
+
f'UserState: Could not access state file, continuing with in-memory state only. '
|
|
39
|
+
f'Login state will not persist across Python processes. Error: {error}'
|
|
40
|
+
)
|
|
41
|
+
self._is_in_memory_only = True
|
|
42
|
+
if self._state is None:
|
|
43
|
+
self._state = self._get_default_state()
|
|
44
|
+
return self._state
|
|
45
|
+
|
|
46
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
47
|
+
if self._is_in_memory_only:
|
|
48
|
+
return
|
|
49
|
+
try:
|
|
50
|
+
super().__exit__(exc_type, exc_val, exc_tb)
|
|
51
|
+
except Exception as error:
|
|
52
|
+
logger_no_user_data.warning(
|
|
53
|
+
f'UserState: Could not write state file. '
|
|
54
|
+
f'Login state will not persist across Python processes. Error: {error}'
|
|
55
|
+
)
|
|
56
|
+
self._is_in_memory_only = True
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from biolib.biolib_binary_format.base_bbf_package import BioLibBinaryFormatBasePackage
|
|
2
|
+
from biolib.biolib_logging import logger
|
|
2
3
|
from biolib.typing_utils import TypedDict, Dict, List
|
|
3
4
|
|
|
4
5
|
|
|
@@ -14,6 +15,10 @@ class ModuleInput(BioLibBinaryFormatBasePackage):
|
|
|
14
15
|
self.package_type = 1
|
|
15
16
|
|
|
16
17
|
def serialize(self, stdin, arguments, files) -> bytes:
|
|
18
|
+
for path in files.keys():
|
|
19
|
+
if '//' in path:
|
|
20
|
+
raise ValueError(f"File path '{path}' contains double slashes which are not allowed")
|
|
21
|
+
|
|
17
22
|
bbf_data = bytearray()
|
|
18
23
|
bbf_data.extend(self.version.to_bytes(1, 'big'))
|
|
19
24
|
bbf_data.extend(self.package_type.to_bytes(1, 'big'))
|
|
@@ -67,6 +72,9 @@ class ModuleInput(BioLibBinaryFormatBasePackage):
|
|
|
67
72
|
data_len = self.get_data(8, output_type='int')
|
|
68
73
|
path = self.get_data(path_len, output_type='str')
|
|
69
74
|
data = self.get_data(data_len)
|
|
75
|
+
if '//' in path:
|
|
76
|
+
# TODO: Raise ValueError here once backwards compatibility period is over
|
|
77
|
+
logger.warning(f"File path '{path}' contains double slashes which are not allowed")
|
|
70
78
|
files[path] = bytes(data)
|
|
71
79
|
|
|
72
80
|
return ModuleInputDict(stdin=stdin, arguments=arguments, files=files)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from datetime import datetime, timedelta
|
|
1
|
+
from datetime import datetime, timedelta, timezone
|
|
2
2
|
|
|
3
3
|
from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
|
|
4
4
|
from biolib.biolib_binary_format.utils import RemoteEndpoint
|
|
@@ -17,13 +17,13 @@ class RemoteJobStorageEndpoint(RemoteEndpoint):
|
|
|
17
17
|
self._storage_type: Literal['input', 'output'] = storage_type
|
|
18
18
|
|
|
19
19
|
def get_remote_url(self):
|
|
20
|
-
if not self._presigned_url or datetime.
|
|
20
|
+
if not self._presigned_url or not self._expires_at or datetime.now(timezone.utc) > self._expires_at:
|
|
21
21
|
self._presigned_url = BiolibJobApi.get_job_storage_download_url(
|
|
22
22
|
job_auth_token=self._job_auth_token,
|
|
23
23
|
job_uuid=self._job_uuid,
|
|
24
24
|
storage_type='results' if self._storage_type == 'output' else 'input',
|
|
25
25
|
)
|
|
26
|
-
self._expires_at = datetime.
|
|
26
|
+
self._expires_at = datetime.now(timezone.utc) + timedelta(minutes=8)
|
|
27
27
|
# TODO: Use expires at from url
|
|
28
28
|
# parsed_url = urlparse(self._presigned_url)
|
|
29
29
|
# query_params = parse_qs(parsed_url.query)
|
|
@@ -1,45 +1,59 @@
|
|
|
1
1
|
from biolib.biolib_binary_format.utils import IndexableBuffer
|
|
2
|
+
from biolib.biolib_logging import logger
|
|
2
3
|
from biolib.typing_utils import Iterable
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
class StreamSeeker:
|
|
6
7
|
def __init__(
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
8
|
+
self,
|
|
9
|
+
upstream_buffer: IndexableBuffer,
|
|
10
|
+
files_data_start: int,
|
|
11
|
+
files_data_end: int,
|
|
12
|
+
max_chunk_size: int,
|
|
12
13
|
):
|
|
13
14
|
self._upstream_buffer = upstream_buffer
|
|
14
15
|
self._files_data_end = files_data_end
|
|
15
|
-
self.
|
|
16
|
+
self._max_chunk_size = max_chunk_size
|
|
16
17
|
|
|
17
18
|
self._buffer_start = files_data_start
|
|
18
19
|
self._buffer = bytearray()
|
|
19
20
|
|
|
20
|
-
def seek_and_read(self, file_start: int, file_length: int) -> Iterable[bytes]:
|
|
21
|
+
def seek_and_read(self, file_start: int, file_length: int, read_ahead_bytes: int = 0) -> Iterable[bytes]:
|
|
21
22
|
assert file_start >= self._buffer_start
|
|
22
|
-
self._buffer = self._buffer[file_start - self._buffer_start:]
|
|
23
|
+
self._buffer = self._buffer[file_start - self._buffer_start :]
|
|
23
24
|
self._buffer_start = file_start
|
|
24
25
|
|
|
25
26
|
while True:
|
|
26
27
|
file_byte_count_remaining = file_length - (self._buffer_start - file_start)
|
|
27
|
-
if file_byte_count_remaining
|
|
28
|
+
if file_byte_count_remaining <= 0:
|
|
28
29
|
return
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
31
|
+
if len(self._buffer) > 0:
|
|
32
|
+
take = min(file_byte_count_remaining, len(self._buffer))
|
|
33
|
+
chunk = self._buffer[:take]
|
|
34
|
+
if chunk:
|
|
35
|
+
yield chunk
|
|
36
|
+
self._buffer = self._buffer[take:]
|
|
37
|
+
self._buffer_start += take
|
|
38
|
+
else:
|
|
39
|
+
start_of_fetch = self._buffer_start + len(self._buffer)
|
|
40
|
+
bytes_left_in_stream = self._files_data_end - start_of_fetch
|
|
41
|
+
if bytes_left_in_stream <= 0:
|
|
42
|
+
logger.error(
|
|
43
|
+
'StreamSeeker: no bytes left upstream (start_of_fetch=%d, files_data_end=%d)',
|
|
44
|
+
start_of_fetch,
|
|
45
|
+
self._files_data_end,
|
|
46
|
+
)
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
fetch_size = min(self._max_chunk_size, file_byte_count_remaining + read_ahead_bytes)
|
|
50
|
+
if fetch_size > bytes_left_in_stream:
|
|
51
|
+
logger.error(
|
|
52
|
+
'StreamSeeker: fetch_size (%d) > bytes_left_in_stream (%d); clamping',
|
|
53
|
+
fetch_size,
|
|
54
|
+
bytes_left_in_stream,
|
|
55
|
+
)
|
|
56
|
+
fetch_size = bytes_left_in_stream
|
|
57
|
+
|
|
58
|
+
fetched_data = self._upstream_buffer.get_data(start=start_of_fetch, length=fetch_size)
|
|
59
|
+
self._buffer.extend(fetched_data)
|
biolib/biolib_logging.py
CHANGED
|
@@ -62,7 +62,7 @@ def _get_no_user_data_logger() -> _BioLibLogger:
|
|
|
62
62
|
|
|
63
63
|
# TODO: Simplify by refactoring to env BIOLIB_ENVIRONMENT_IS_CLOUD: boolean
|
|
64
64
|
if os.getenv('BIOLIB_CLOUD_ENVIRONMENT', '').lower() == 'non-enclave':
|
|
65
|
-
handler = logging.FileHandler(filename='/
|
|
65
|
+
handler = logging.FileHandler(filename='/biolib/logs/biolib_no_user_data.log')
|
|
66
66
|
formatter = logging.Formatter(_DEFAULT_LOGGER_FORMAT)
|
|
67
67
|
handler.setFormatter(formatter)
|
|
68
68
|
_logger_no_user_data.addHandler(handler)
|
biolib/cli/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@ import click
|
|
|
5
5
|
|
|
6
6
|
from biolib import utils
|
|
7
7
|
from biolib.biolib_logging import logger, logger_no_user_data
|
|
8
|
-
from biolib.cli import auth, data_record,
|
|
8
|
+
from biolib.cli import auth, data_record, index, init, lfs, push, run, runtime, sdk, start
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
@click.version_option(version=utils.BIOLIB_PACKAGE_VERSION, prog_name='pybiolib')
|
|
@@ -23,7 +23,6 @@ def cli() -> None:
|
|
|
23
23
|
cli.add_command(auth.login)
|
|
24
24
|
cli.add_command(auth.logout)
|
|
25
25
|
cli.add_command(auth.whoami)
|
|
26
|
-
cli.add_command(download_container.download_container)
|
|
27
26
|
cli.add_command(init.init)
|
|
28
27
|
cli.add_command(lfs.lfs)
|
|
29
28
|
cli.add_command(push.push)
|
|
@@ -31,6 +30,7 @@ cli.add_command(run.run)
|
|
|
31
30
|
cli.add_command(runtime.runtime)
|
|
32
31
|
cli.add_command(start.start)
|
|
33
32
|
cli.add_command(data_record.data_record)
|
|
33
|
+
cli.add_command(index.index)
|
|
34
34
|
cli.add_command(sdk.sdk)
|
|
35
35
|
|
|
36
36
|
# allow this script to be called without poetry in dev e.g. by an IDE debugger
|