pybiolib 1.2.911__py3-none-any.whl → 1.2.1642__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pybiolib might be problematic. Click here for more details.
- biolib/__init__.py +33 -10
- biolib/_data_record/data_record.py +24 -11
- biolib/_index/index.py +51 -0
- biolib/_index/types.py +7 -0
- biolib/_internal/add_copilot_prompts.py +3 -5
- biolib/_internal/add_gui_files.py +59 -0
- biolib/_internal/data_record/data_record.py +1 -1
- biolib/_internal/data_record/push_data.py +1 -1
- biolib/_internal/data_record/remote_storage_endpoint.py +3 -3
- biolib/_internal/file_utils.py +48 -0
- biolib/_internal/index/__init__.py +1 -0
- biolib/_internal/index/index.py +18 -0
- biolib/_internal/lfs/cache.py +4 -2
- biolib/_internal/push_application.py +89 -23
- biolib/_internal/runtime.py +2 -0
- biolib/_internal/string_utils.py +13 -0
- biolib/_internal/templates/copilot_template/.github/instructions/style-react-ts.instructions.md +47 -0
- biolib/_internal/templates/copilot_template/.github/prompts/biolib_onboard_repo.prompt.md +19 -0
- biolib/_internal/templates/gui_template/.yarnrc.yml +1 -0
- biolib/_internal/templates/gui_template/App.tsx +53 -0
- biolib/_internal/templates/gui_template/Dockerfile +28 -0
- biolib/_internal/templates/gui_template/biolib-sdk.ts +37 -0
- biolib/_internal/templates/gui_template/dev-data/output.json +7 -0
- biolib/_internal/templates/gui_template/index.css +5 -0
- biolib/_internal/templates/gui_template/index.html +13 -0
- biolib/_internal/templates/gui_template/index.tsx +10 -0
- biolib/_internal/templates/gui_template/package.json +27 -0
- biolib/_internal/templates/gui_template/tsconfig.json +24 -0
- biolib/_internal/templates/gui_template/vite-plugin-dev-data.ts +49 -0
- biolib/_internal/templates/gui_template/vite.config.mts +9 -0
- biolib/_internal/templates/init_template/.biolib/config.yml +1 -0
- biolib/_internal/templates/init_template/.github/workflows/biolib.yml +6 -1
- biolib/_internal/templates/init_template/Dockerfile +2 -0
- biolib/_internal/templates/init_template/run.sh +1 -0
- biolib/_internal/templates/templates.py +9 -1
- biolib/_internal/utils/__init__.py +25 -0
- biolib/_internal/utils/job_url.py +33 -0
- biolib/_internal/utils/multinode.py +12 -14
- biolib/_runtime/runtime.py +15 -2
- biolib/_session/session.py +7 -5
- biolib/_shared/__init__.py +0 -0
- biolib/_shared/types/__init__.py +69 -0
- biolib/_shared/types/account.py +12 -0
- biolib/_shared/types/account_member.py +8 -0
- biolib/{_internal → _shared}/types/experiment.py +1 -0
- biolib/_shared/types/resource.py +17 -0
- biolib/_shared/types/resource_deploy_key.py +11 -0
- biolib/{_internal → _shared}/types/resource_permission.py +1 -1
- biolib/{_internal → _shared}/types/user.py +5 -5
- biolib/_shared/utils/__init__.py +7 -0
- biolib/_shared/utils/resource_uri.py +75 -0
- biolib/api/client.py +1 -1
- biolib/app/app.py +96 -45
- biolib/biolib_api_client/app_types.py +1 -0
- biolib/biolib_api_client/biolib_app_api.py +26 -0
- biolib/biolib_binary_format/module_input.py +8 -0
- biolib/biolib_binary_format/remote_endpoints.py +3 -3
- biolib/biolib_binary_format/remote_stream_seeker.py +39 -25
- biolib/biolib_logging.py +1 -1
- biolib/cli/__init__.py +2 -1
- biolib/cli/auth.py +4 -16
- biolib/cli/data_record.py +17 -0
- biolib/cli/index.py +32 -0
- biolib/cli/init.py +93 -11
- biolib/cli/lfs.py +1 -1
- biolib/cli/run.py +1 -1
- biolib/cli/start.py +14 -1
- biolib/compute_node/job_worker/executors/docker_executor.py +31 -9
- biolib/compute_node/job_worker/executors/docker_types.py +1 -1
- biolib/compute_node/job_worker/executors/types.py +6 -5
- biolib/compute_node/job_worker/job_storage.py +2 -1
- biolib/compute_node/job_worker/job_worker.py +155 -90
- biolib/compute_node/job_worker/large_file_system.py +2 -6
- biolib/compute_node/job_worker/network_alloc.py +99 -0
- biolib/compute_node/job_worker/network_buffer.py +240 -0
- biolib/compute_node/job_worker/utilization_reporter_thread.py +2 -2
- biolib/compute_node/remote_host_proxy.py +135 -67
- biolib/compute_node/utils.py +2 -0
- biolib/compute_node/webserver/compute_node_results_proxy.py +188 -0
- biolib/compute_node/webserver/proxy_utils.py +28 -0
- biolib/compute_node/webserver/webserver.py +64 -19
- biolib/experiments/experiment.py +98 -16
- biolib/jobs/job.py +128 -31
- biolib/jobs/job_result.py +73 -33
- biolib/jobs/types.py +1 -0
- biolib/sdk/__init__.py +17 -2
- biolib/typing_utils.py +1 -1
- biolib/utils/cache_state.py +2 -2
- biolib/utils/seq_util.py +1 -1
- {pybiolib-1.2.911.dist-info → pybiolib-1.2.1642.dist-info}/METADATA +4 -2
- pybiolib-1.2.1642.dist-info/RECORD +180 -0
- {pybiolib-1.2.911.dist-info → pybiolib-1.2.1642.dist-info}/WHEEL +1 -1
- biolib/_internal/llm_instructions/.github/instructions/style-react-ts.instructions.md +0 -22
- biolib/_internal/types/__init__.py +0 -6
- biolib/_internal/types/account.py +0 -10
- biolib/utils/app_uri.py +0 -57
- pybiolib-1.2.911.dist-info/RECORD +0 -150
- /biolib/{_internal/llm_instructions → _index}/__init__.py +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/general-app-knowledge.instructions.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-general.instructions.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/instructions/style-python.instructions.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_app_inputs.prompt.md +0 -0
- /biolib/_internal/{llm_instructions → templates/copilot_template}/.github/prompts/biolib_run_apps.prompt.md +0 -0
- /biolib/{_internal → _shared}/types/app.py +0 -0
- /biolib/{_internal → _shared}/types/data_record.py +0 -0
- /biolib/{_internal → _shared}/types/file_node.py +0 -0
- /biolib/{_internal → _shared}/types/push.py +0 -0
- /biolib/{_internal/types/resource.py → _shared/types/resource_types.py} +0 -0
- /biolib/{_internal → _shared}/types/resource_version.py +0 -0
- /biolib/{_internal → _shared}/types/result.py +0 -0
- /biolib/{_internal → _shared}/types/typing.py +0 -0
- {pybiolib-1.2.911.dist-info → pybiolib-1.2.1642.dist-info}/entry_points.txt +0 -0
- {pybiolib-1.2.911.dist-info → pybiolib-1.2.1642.dist-info/licenses}/LICENSE +0 -0
biolib/jobs/job.py
CHANGED
|
@@ -2,7 +2,7 @@ import base64
|
|
|
2
2
|
import sys
|
|
3
3
|
import time
|
|
4
4
|
from collections import OrderedDict
|
|
5
|
-
from datetime import datetime, timedelta
|
|
5
|
+
from datetime import datetime, timedelta, timezone
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from urllib.parse import urlparse
|
|
8
8
|
|
|
@@ -10,7 +10,8 @@ import biolib.api.client
|
|
|
10
10
|
from biolib import utils
|
|
11
11
|
from biolib._internal.http_client import HttpClient
|
|
12
12
|
from biolib._internal.tree_utils import build_tree_from_files, build_tree_str
|
|
13
|
-
from biolib._internal.utils import open_browser_window_from_notebook
|
|
13
|
+
from biolib._internal.utils import PathFilter, filter_lazy_loaded_files, open_browser_window_from_notebook
|
|
14
|
+
from biolib._shared.utils import parse_resource_uri
|
|
14
15
|
from biolib.api.client import ApiClient
|
|
15
16
|
from biolib.biolib_api_client import BiolibApiClient, CreatedJobDict
|
|
16
17
|
from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
|
|
@@ -18,16 +19,16 @@ from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
|
|
|
18
19
|
from biolib.biolib_binary_format import LazyLoadedFile, ModuleInput, ModuleInputDict, ModuleOutputV2
|
|
19
20
|
from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
|
|
20
21
|
from biolib.biolib_binary_format.stdout_and_stderr import StdoutAndStderr
|
|
22
|
+
from biolib.biolib_binary_format.utils import InMemoryIndexableBuffer
|
|
21
23
|
from biolib.biolib_errors import BioLibError, CloudJobFinishedError
|
|
22
24
|
from biolib.biolib_logging import logger, logger_no_user_data
|
|
23
25
|
from biolib.compute_node.job_worker.job_storage import JobStorage
|
|
24
26
|
from biolib.compute_node.utils import SystemExceptionCodeMap, SystemExceptionCodes
|
|
25
|
-
from biolib.jobs.job_result import JobResult
|
|
27
|
+
from biolib.jobs.job_result import JobResult
|
|
26
28
|
from biolib.jobs.types import CloudJobDict, CloudJobStartedDict, JobDict
|
|
27
29
|
from biolib.tables import BioLibTable
|
|
28
|
-
from biolib.typing_utils import Dict, List, Optional, Union, cast
|
|
30
|
+
from biolib.typing_utils import Dict, Generator, List, Optional, Tuple, Union, cast
|
|
29
31
|
from biolib.utils import IS_RUNNING_IN_NOTEBOOK
|
|
30
|
-
from biolib.utils.app_uri import parse_app_uri
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
class Result:
|
|
@@ -49,7 +50,7 @@ class Result:
|
|
|
49
50
|
self._auth_token: str = job_dict['auth_token']
|
|
50
51
|
|
|
51
52
|
self._job_dict: JobDict = job_dict
|
|
52
|
-
self._job_dict_last_fetched_at: datetime = datetime.
|
|
53
|
+
self._job_dict_last_fetched_at: datetime = datetime.now(timezone.utc)
|
|
53
54
|
self._result: Optional[JobResult] = None
|
|
54
55
|
self._cached_input_arguments: Optional[List[str]] = None
|
|
55
56
|
|
|
@@ -187,6 +188,45 @@ class Result:
|
|
|
187
188
|
"""
|
|
188
189
|
return self.result.list_output_files(path_filter=path_filter)
|
|
189
190
|
|
|
191
|
+
def list_input_files(
|
|
192
|
+
self,
|
|
193
|
+
path_filter: Optional[PathFilter] = None,
|
|
194
|
+
) -> List[LazyLoadedFile]:
|
|
195
|
+
"""List input files from the result.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
path_filter (PathFilter, optional): Filter to apply to the input files.
|
|
199
|
+
Can be a string glob pattern or a callable that takes a path string and returns a boolean.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
List[LazyLoadedFile]: List of input files.
|
|
203
|
+
|
|
204
|
+
Example::
|
|
205
|
+
>>> result = biolib.get_result("result_id")
|
|
206
|
+
>>> input_files = result.list_input_files()
|
|
207
|
+
>>> # Filter files with a glob pattern
|
|
208
|
+
>>> input_files = result.list_input_files("*.txt")
|
|
209
|
+
"""
|
|
210
|
+
presigned_download_url = BiolibJobApi.get_job_storage_download_url(
|
|
211
|
+
job_uuid=self.id,
|
|
212
|
+
job_auth_token=self._auth_token,
|
|
213
|
+
storage_type='input',
|
|
214
|
+
)
|
|
215
|
+
response = HttpClient.request(url=presigned_download_url)
|
|
216
|
+
module_input_serialized: bytes = response.content
|
|
217
|
+
module_input = ModuleInput(module_input_serialized).deserialize()
|
|
218
|
+
|
|
219
|
+
files = []
|
|
220
|
+
for path, data in module_input['files'].items():
|
|
221
|
+
buffer = InMemoryIndexableBuffer(data)
|
|
222
|
+
lazy_file = LazyLoadedFile(path=path, buffer=buffer, start=0, length=len(data))
|
|
223
|
+
files.append(lazy_file)
|
|
224
|
+
|
|
225
|
+
if not path_filter:
|
|
226
|
+
return files
|
|
227
|
+
|
|
228
|
+
return filter_lazy_loaded_files(files, path_filter)
|
|
229
|
+
|
|
190
230
|
def get_output_file(self, filename: str) -> LazyLoadedFile:
|
|
191
231
|
return self.result.get_output_file(filename=filename)
|
|
192
232
|
|
|
@@ -226,7 +266,7 @@ class Result:
|
|
|
226
266
|
|
|
227
267
|
return self._cached_input_arguments
|
|
228
268
|
|
|
229
|
-
def save_input_files(self, output_dir: str) -> None:
|
|
269
|
+
def save_input_files(self, output_dir: str, overwrite: bool = False) -> None:
|
|
230
270
|
logger.info('Downloading input files...')
|
|
231
271
|
module_input = self._get_module_input()
|
|
232
272
|
|
|
@@ -236,7 +276,12 @@ class Result:
|
|
|
236
276
|
# Remove leading slash of file_path
|
|
237
277
|
destination_file_path = Path(output_dir) / Path(path.lstrip('/'))
|
|
238
278
|
if destination_file_path.exists():
|
|
239
|
-
|
|
279
|
+
if not overwrite:
|
|
280
|
+
raise BioLibError(f'File {destination_file_path} already exists. Set overwrite=True to overwrite.')
|
|
281
|
+
else:
|
|
282
|
+
destination_file_path.rename(
|
|
283
|
+
f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}'
|
|
284
|
+
)
|
|
240
285
|
|
|
241
286
|
dir_path = destination_file_path.parent
|
|
242
287
|
if dir_path:
|
|
@@ -251,12 +296,44 @@ class Result:
|
|
|
251
296
|
self,
|
|
252
297
|
output_dir: str,
|
|
253
298
|
path_filter: Optional[PathFilter] = None,
|
|
254
|
-
skip_file_if_exists:
|
|
299
|
+
skip_file_if_exists: bool = False,
|
|
300
|
+
overwrite: bool = False,
|
|
301
|
+
flat: bool = False,
|
|
255
302
|
) -> None:
|
|
303
|
+
"""Save output files from the result to a local directory.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
output_dir (str): Directory path where files will be saved.
|
|
307
|
+
path_filter (PathFilter, optional): Filter to apply to output files.
|
|
308
|
+
Can be a string glob pattern or a callable that takes a path and returns a boolean.
|
|
309
|
+
skip_file_if_exists (bool, optional): If True, skip files that already exist locally.
|
|
310
|
+
Defaults to False.
|
|
311
|
+
overwrite (bool, optional): If True, overwrite existing files by renaming them with a timestamp.
|
|
312
|
+
Defaults to False.
|
|
313
|
+
flat (bool, optional): If True, save all files directly to output_dir using only their basenames,
|
|
314
|
+
without creating subdirectories. When enabled, raises an error if duplicate basenames exist
|
|
315
|
+
in the filtered output or if any basename already exists in output_dir. Defaults to False.
|
|
316
|
+
|
|
317
|
+
Raises:
|
|
318
|
+
BioLibError: If flat=True and duplicate basenames are found in filtered output.
|
|
319
|
+
BioLibError: If flat=True and a file with the same basename already exists in output_dir.
|
|
320
|
+
BioLibError: If a file already exists and neither skip_file_if_exists nor overwrite is True.
|
|
321
|
+
|
|
322
|
+
Example::
|
|
323
|
+
>>> result = biolib.get_result("result_id")
|
|
324
|
+
>>> # Save all files preserving directory structure
|
|
325
|
+
>>> result.save_files("./output")
|
|
326
|
+
>>> # Save files flat without subdirectories
|
|
327
|
+
>>> result.save_files("./output", flat=True)
|
|
328
|
+
>>> # Save only specific files
|
|
329
|
+
>>> result.save_files("./output", path_filter="*.txt")
|
|
330
|
+
"""
|
|
256
331
|
self.result.save_files(
|
|
257
332
|
output_dir=output_dir,
|
|
258
333
|
path_filter=path_filter,
|
|
259
334
|
skip_file_if_exists=skip_file_if_exists,
|
|
335
|
+
overwrite=overwrite,
|
|
336
|
+
flat=flat,
|
|
260
337
|
)
|
|
261
338
|
|
|
262
339
|
def get_status(self) -> str:
|
|
@@ -351,6 +428,7 @@ class Result:
|
|
|
351
428
|
>>> # Recompute with different arguments
|
|
352
429
|
>>> new_result = result.recompute(arguments=["--new-arg", "value"])
|
|
353
430
|
"""
|
|
431
|
+
self._refetch_job_dict()
|
|
354
432
|
app_response = BiolibAppApi.get_by_uri(uri=app_uri or self._job_dict['app_uri'])
|
|
355
433
|
|
|
356
434
|
job_storage_input = RemoteJobStorageEndpoint(
|
|
@@ -378,6 +456,7 @@ class Result:
|
|
|
378
456
|
app_uri=app_response['app_uri'],
|
|
379
457
|
app_version_uuid=app_response['app_version']['public_id'],
|
|
380
458
|
module_input_serialized=module_input_serialized,
|
|
459
|
+
override_command=self._job_dict['arguments_override_command'],
|
|
381
460
|
machine=machine if machine else original_requested_machine,
|
|
382
461
|
)
|
|
383
462
|
if blocking:
|
|
@@ -444,16 +523,11 @@ class Result:
|
|
|
444
523
|
return Result(job_dict)
|
|
445
524
|
|
|
446
525
|
@staticmethod
|
|
447
|
-
def
|
|
526
|
+
def _yield_logs_packages(stdout_and_stderr_packages_b64) -> Generator[Tuple[str, bytes], None, None]:
|
|
448
527
|
for stdout_and_stderr_package_b64 in stdout_and_stderr_packages_b64:
|
|
449
528
|
stdout_and_stderr_package = base64.b64decode(stdout_and_stderr_package_b64)
|
|
450
529
|
stdout_and_stderr = StdoutAndStderr(stdout_and_stderr_package).deserialize()
|
|
451
|
-
|
|
452
|
-
sys.stdout.write(stdout_and_stderr.decode())
|
|
453
|
-
if not IS_RUNNING_IN_NOTEBOOK: # for some reason flushing in jupyter notebooks breaks \r handling
|
|
454
|
-
sys.stdout.flush()
|
|
455
|
-
# flush after having processed all packages
|
|
456
|
-
sys.stdout.flush()
|
|
530
|
+
yield ('stdout', stdout_and_stderr)
|
|
457
531
|
|
|
458
532
|
def show(self) -> None:
|
|
459
533
|
self._refetch_job_dict()
|
|
@@ -463,21 +537,44 @@ class Result:
|
|
|
463
537
|
title=f'Result: {self._uuid}',
|
|
464
538
|
).print_table()
|
|
465
539
|
|
|
466
|
-
def stream_logs(self
|
|
540
|
+
def stream_logs(self, as_iterator: bool = False):
|
|
541
|
+
if as_iterator:
|
|
542
|
+
return self._iter_logs()
|
|
467
543
|
self._stream_logs()
|
|
544
|
+
return None
|
|
468
545
|
|
|
469
546
|
def _stream_logs(self, enable_print: bool = True) -> None:
|
|
547
|
+
try:
|
|
548
|
+
for stream_type, data in self._iter_logs(enable_print=enable_print):
|
|
549
|
+
if stream_type == 'stdout':
|
|
550
|
+
if IS_RUNNING_IN_NOTEBOOK:
|
|
551
|
+
sys.stdout.write(data.decode(encoding='utf-8', errors='replace'))
|
|
552
|
+
# Note: we avoid flush() in notebook as that breaks \r handling
|
|
553
|
+
else:
|
|
554
|
+
sys.stdout.buffer.write(data)
|
|
555
|
+
sys.stdout.buffer.flush()
|
|
556
|
+
elif stream_type == 'stderr':
|
|
557
|
+
if IS_RUNNING_IN_NOTEBOOK:
|
|
558
|
+
sys.stderr.write(data.decode(encoding='utf-8', errors='replace'))
|
|
559
|
+
# Note: we avoid flush() in notebook as that breaks \r handling
|
|
560
|
+
else:
|
|
561
|
+
sys.stderr.buffer.write(data)
|
|
562
|
+
sys.stderr.buffer.flush()
|
|
563
|
+
finally:
|
|
564
|
+
# Flush after having processed all packages
|
|
565
|
+
if IS_RUNNING_IN_NOTEBOOK:
|
|
566
|
+
sys.stdout.flush()
|
|
567
|
+
sys.stderr.flush()
|
|
568
|
+
|
|
569
|
+
def _iter_logs(self, enable_print: bool = True) -> Generator[Tuple[str, bytes], None, None]:
|
|
470
570
|
try:
|
|
471
571
|
cloud_job = self._get_cloud_job_awaiting_started()
|
|
472
572
|
except CloudJobFinishedError:
|
|
473
573
|
logger.info(f'--- The result {self.id} has already completed (no streaming will take place) ---')
|
|
474
574
|
logger.info('--- The stdout log is printed below: ---')
|
|
475
|
-
|
|
476
|
-
print(self.get_stdout().decode(), file=sys.stdout)
|
|
477
|
-
sys.stdout.flush()
|
|
575
|
+
yield ('stdout', self.get_stdout())
|
|
478
576
|
logger.info('--- The stderr log is printed below: ---')
|
|
479
|
-
|
|
480
|
-
sys.stderr.flush()
|
|
577
|
+
yield ('stderr', self.get_stderr())
|
|
481
578
|
logger.info(f'--- The job {self.id} has already completed. Its output was printed above. ---')
|
|
482
579
|
return
|
|
483
580
|
|
|
@@ -489,7 +586,7 @@ class Result:
|
|
|
489
586
|
logger_no_user_data.debug(f'Using cloud proxy URL from env var BIOLIB_CLOUD_BASE_URL: {compute_node_url}')
|
|
490
587
|
|
|
491
588
|
if enable_print:
|
|
492
|
-
self.
|
|
589
|
+
yield from self._yield_full_logs(node_url=compute_node_url)
|
|
493
590
|
|
|
494
591
|
final_status_messages: List[str] = []
|
|
495
592
|
while True:
|
|
@@ -508,8 +605,8 @@ class Result:
|
|
|
508
605
|
# Print the status before writing stdout and stderr
|
|
509
606
|
logger.info(f'Cloud: {status_update["log_message"]}')
|
|
510
607
|
|
|
511
|
-
if
|
|
512
|
-
self.
|
|
608
|
+
if enable_print:
|
|
609
|
+
yield from self._yield_logs_packages(status_json['stdout_and_stderr_packages_b64'])
|
|
513
610
|
|
|
514
611
|
if 'error_code' in status_json:
|
|
515
612
|
error_code = status_json['error_code']
|
|
@@ -526,7 +623,7 @@ class Result:
|
|
|
526
623
|
|
|
527
624
|
self.wait() # Wait for compute node to tell the backend that the job is finished
|
|
528
625
|
|
|
529
|
-
def
|
|
626
|
+
def _yield_full_logs(self, node_url: str) -> Generator[Tuple[str, bytes], None, None]:
|
|
530
627
|
try:
|
|
531
628
|
response_json = HttpClient.request(url=f'{node_url}/v1/job/{self._uuid}/status/?logs=full').json()
|
|
532
629
|
except Exception as error:
|
|
@@ -536,7 +633,7 @@ class Result:
|
|
|
536
633
|
for status_update in response_json.get('previous_status_updates', []):
|
|
537
634
|
logger.info(f'Cloud: {status_update["log_message"]}')
|
|
538
635
|
|
|
539
|
-
self.
|
|
636
|
+
yield from self._yield_logs_packages(response_json['streamed_logs_packages_b64'])
|
|
540
637
|
|
|
541
638
|
def _get_cloud_job_awaiting_started(self) -> CloudJobStartedDict:
|
|
542
639
|
retry_count = 0
|
|
@@ -582,11 +679,11 @@ class Result:
|
|
|
582
679
|
)
|
|
583
680
|
|
|
584
681
|
def _refetch_job_dict(self, force_refetch: Optional[bool] = False) -> None:
|
|
585
|
-
if not force_refetch and self._job_dict_last_fetched_at > datetime.
|
|
682
|
+
if not force_refetch and self._job_dict_last_fetched_at > datetime.now(timezone.utc) - timedelta(seconds=2):
|
|
586
683
|
return
|
|
587
684
|
|
|
588
685
|
self._job_dict = self._get_job_dict(self._uuid, self._auth_token)
|
|
589
|
-
self._job_dict_last_fetched_at = datetime.
|
|
686
|
+
self._job_dict_last_fetched_at = datetime.now(timezone.utc)
|
|
590
687
|
|
|
591
688
|
@staticmethod
|
|
592
689
|
def _start_job_in_cloud(
|
|
@@ -605,8 +702,8 @@ class Result:
|
|
|
605
702
|
) -> 'Result':
|
|
606
703
|
if len(module_input_serialized) < 500_000 and temporary_client_secrets is None:
|
|
607
704
|
_job_dict = BiolibJobApi.create_job_with_data(
|
|
608
|
-
app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
|
|
609
705
|
app_version_uuid=app_version_uuid,
|
|
706
|
+
app_resource_name_prefix=parse_resource_uri(app_uri)['resource_prefix'],
|
|
610
707
|
arguments_override_command=override_command,
|
|
611
708
|
experiment_uuid=experiment_id,
|
|
612
709
|
module_input_serialized=module_input_serialized,
|
|
@@ -620,8 +717,8 @@ class Result:
|
|
|
620
717
|
return Result(cast(JobDict, _job_dict))
|
|
621
718
|
|
|
622
719
|
job_dict: CreatedJobDict = BiolibJobApi.create(
|
|
623
|
-
app_resource_name_prefix=parse_app_uri(app_uri)['resource_name_prefix'],
|
|
624
720
|
app_version_id=app_version_uuid,
|
|
721
|
+
app_resource_name_prefix=parse_resource_uri(app_uri)['resource_prefix'],
|
|
625
722
|
experiment_uuid=experiment_id,
|
|
626
723
|
machine=machine,
|
|
627
724
|
notify=notify,
|
biolib/jobs/job_result.py
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
import time
|
|
2
|
-
from fnmatch import fnmatch
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
4
|
+
from biolib._internal.utils import PathFilter, filter_lazy_loaded_files
|
|
5
5
|
from biolib.biolib_binary_format import ModuleOutputV2
|
|
6
6
|
from biolib.biolib_binary_format.remote_endpoints import RemoteJobStorageEndpoint
|
|
7
7
|
from biolib.biolib_binary_format.remote_stream_seeker import StreamSeeker
|
|
8
8
|
from biolib.biolib_binary_format.utils import LazyLoadedFile, RemoteIndexableBuffer
|
|
9
9
|
from biolib.biolib_errors import BioLibError
|
|
10
10
|
from biolib.biolib_logging import logger
|
|
11
|
-
from biolib.typing_utils import
|
|
12
|
-
|
|
13
|
-
PathFilter = Union[str, Callable[[str], bool]]
|
|
11
|
+
from biolib.typing_utils import Dict, List, Optional
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
class JobResult:
|
|
@@ -38,17 +36,68 @@ class JobResult:
|
|
|
38
36
|
self,
|
|
39
37
|
output_dir: str,
|
|
40
38
|
path_filter: Optional[PathFilter] = None,
|
|
41
|
-
skip_file_if_exists:
|
|
39
|
+
skip_file_if_exists: bool = False,
|
|
40
|
+
overwrite: bool = False,
|
|
41
|
+
flat: bool = False,
|
|
42
42
|
) -> None:
|
|
43
43
|
module_output = self._get_module_output()
|
|
44
44
|
output_files = module_output.get_files()
|
|
45
|
-
filtered_output_files =
|
|
45
|
+
filtered_output_files = filter_lazy_loaded_files(output_files, path_filter) if path_filter else output_files
|
|
46
46
|
|
|
47
47
|
if len(filtered_output_files) == 0:
|
|
48
48
|
logger.debug('No output files to save')
|
|
49
49
|
return
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
if flat:
|
|
52
|
+
basename_to_paths: Dict[str, List[str]] = {}
|
|
53
|
+
for file in filtered_output_files:
|
|
54
|
+
basename = Path(file.path).name
|
|
55
|
+
if basename not in basename_to_paths:
|
|
56
|
+
basename_to_paths[basename] = []
|
|
57
|
+
basename_to_paths[basename].append(file.path)
|
|
58
|
+
|
|
59
|
+
duplicates = {basename: paths for basename, paths in basename_to_paths.items() if len(paths) > 1}
|
|
60
|
+
|
|
61
|
+
if duplicates:
|
|
62
|
+
max_shown = 3
|
|
63
|
+
error_parts = []
|
|
64
|
+
sorted_basenames = sorted(duplicates.keys())
|
|
65
|
+
|
|
66
|
+
for basename in sorted_basenames[:max_shown]:
|
|
67
|
+
paths = duplicates[basename]
|
|
68
|
+
error_parts.append(f' {basename}: ({", ".join(paths)})')
|
|
69
|
+
|
|
70
|
+
error_message = 'Cannot save files in flat mode: duplicate filenames detected:\n' + '\n'.join(
|
|
71
|
+
error_parts
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if len(duplicates) > max_shown:
|
|
75
|
+
remaining = len(duplicates) - max_shown
|
|
76
|
+
error_message += f'\n (and {remaining} more)'
|
|
77
|
+
|
|
78
|
+
raise BioLibError(error_message)
|
|
79
|
+
|
|
80
|
+
major_gap_threshold = 50_000
|
|
81
|
+
n = len(filtered_output_files)
|
|
82
|
+
|
|
83
|
+
next_break_end = [0] * n
|
|
84
|
+
if n > 0:
|
|
85
|
+
next_break_end[n - 1] = filtered_output_files[n - 1].start + filtered_output_files[n - 1].length
|
|
86
|
+
for i in range(n - 2, -1, -1):
|
|
87
|
+
end_i = filtered_output_files[i].start + filtered_output_files[i].length
|
|
88
|
+
gap = filtered_output_files[i + 1].start - end_i
|
|
89
|
+
if gap >= major_gap_threshold:
|
|
90
|
+
next_break_end[i] = end_i
|
|
91
|
+
else:
|
|
92
|
+
next_break_end[i] = next_break_end[i + 1]
|
|
93
|
+
|
|
94
|
+
total_files_data_to_download_in_bytes = 0
|
|
95
|
+
file_read_ahead_map = {}
|
|
96
|
+
for i, file in enumerate(filtered_output_files):
|
|
97
|
+
total_files_data_to_download_in_bytes += file.length
|
|
98
|
+
end_i = file.start + file.length
|
|
99
|
+
read_ahead_bytes = max(0, next_break_end[i] - end_i)
|
|
100
|
+
file_read_ahead_map[i] = read_ahead_bytes
|
|
52
101
|
|
|
53
102
|
# Assume files are in order
|
|
54
103
|
first_file = filtered_output_files[0]
|
|
@@ -56,18 +105,24 @@ class JobResult:
|
|
|
56
105
|
stream_seeker = StreamSeeker(
|
|
57
106
|
files_data_start=first_file.start,
|
|
58
107
|
files_data_end=last_file.start + last_file.length,
|
|
59
|
-
|
|
108
|
+
max_chunk_size=min(total_files_data_to_download_in_bytes, 10_000_000),
|
|
60
109
|
upstream_buffer=module_output.buffer,
|
|
61
110
|
)
|
|
62
111
|
|
|
63
112
|
logger.info(f'Saving {len(filtered_output_files)} files to {output_dir}...')
|
|
64
|
-
for file in filtered_output_files:
|
|
65
|
-
|
|
66
|
-
|
|
113
|
+
for file_index, file in enumerate(filtered_output_files):
|
|
114
|
+
if flat:
|
|
115
|
+
destination_file_path = Path(output_dir) / Path(file.path).name
|
|
116
|
+
else:
|
|
117
|
+
# Remove leading slash of file_path
|
|
118
|
+
destination_file_path = Path(output_dir) / Path(file.path.lstrip('/'))
|
|
119
|
+
|
|
67
120
|
if destination_file_path.exists():
|
|
68
121
|
if skip_file_if_exists:
|
|
69
122
|
print(f'Skipping {destination_file_path} as a file with that name already exists locally.')
|
|
70
123
|
continue
|
|
124
|
+
elif not overwrite:
|
|
125
|
+
raise BioLibError(f'File {destination_file_path} already exists. Set overwrite=True to overwrite.')
|
|
71
126
|
else:
|
|
72
127
|
destination_file_path.rename(
|
|
73
128
|
f'{destination_file_path}.biolib-renamed.{time.strftime("%Y%m%d%H%M%S")}'
|
|
@@ -88,8 +143,12 @@ class JobResult:
|
|
|
88
143
|
file_start += data_already_downloaded
|
|
89
144
|
data_to_download -= data_already_downloaded
|
|
90
145
|
|
|
146
|
+
read_ahead_bytes = file_read_ahead_map[file_index]
|
|
147
|
+
|
|
91
148
|
with open(partial_path, mode='ab') as partial_file:
|
|
92
|
-
for chunk in stream_seeker.seek_and_read(
|
|
149
|
+
for chunk in stream_seeker.seek_and_read(
|
|
150
|
+
file_start=file_start, file_length=data_to_download, read_ahead_bytes=read_ahead_bytes
|
|
151
|
+
):
|
|
93
152
|
partial_file.write(chunk)
|
|
94
153
|
|
|
95
154
|
# rename partial file to actual file name
|
|
@@ -97,7 +156,7 @@ class JobResult:
|
|
|
97
156
|
|
|
98
157
|
def get_output_file(self, filename) -> LazyLoadedFile:
|
|
99
158
|
files = self._get_module_output().get_files()
|
|
100
|
-
filtered_files =
|
|
159
|
+
filtered_files = filter_lazy_loaded_files(files, path_filter=filename)
|
|
101
160
|
if not filtered_files:
|
|
102
161
|
raise BioLibError(f'File {filename} not found in results.')
|
|
103
162
|
|
|
@@ -111,26 +170,7 @@ class JobResult:
|
|
|
111
170
|
if not path_filter:
|
|
112
171
|
return files
|
|
113
172
|
|
|
114
|
-
return
|
|
115
|
-
|
|
116
|
-
@staticmethod
|
|
117
|
-
def _get_filtered_files(files: List[LazyLoadedFile], path_filter: PathFilter) -> List[LazyLoadedFile]:
|
|
118
|
-
if not (isinstance(path_filter, str) or callable(path_filter)):
|
|
119
|
-
raise Exception('Expected path_filter to be a string or a function')
|
|
120
|
-
|
|
121
|
-
if callable(path_filter):
|
|
122
|
-
return list(filter(lambda x: path_filter(x.path), files)) # type: ignore
|
|
123
|
-
|
|
124
|
-
glob_filter = cast(str, path_filter)
|
|
125
|
-
|
|
126
|
-
# since all file paths start with /, make sure filter does too
|
|
127
|
-
if not glob_filter.startswith('/'):
|
|
128
|
-
glob_filter = '/' + glob_filter
|
|
129
|
-
|
|
130
|
-
def _filter_function(file: LazyLoadedFile) -> bool:
|
|
131
|
-
return fnmatch(file.path, glob_filter)
|
|
132
|
-
|
|
133
|
-
return list(filter(_filter_function, files))
|
|
173
|
+
return filter_lazy_loaded_files(files, path_filter)
|
|
134
174
|
|
|
135
175
|
def _get_module_output(self) -> ModuleOutputV2:
|
|
136
176
|
if self._module_output is None:
|
biolib/jobs/types.py
CHANGED
biolib/sdk/__init__.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import Optional
|
|
|
2
2
|
|
|
3
3
|
# Imports to hide and use as private internal utils
|
|
4
4
|
from biolib._data_record.data_record import DataRecord as _DataRecord
|
|
5
|
+
from biolib._index.index import Index as _Index
|
|
5
6
|
from biolib._internal.push_application import push_application as _push_application
|
|
6
7
|
from biolib._internal.push_application import set_app_version_as_active as _set_app_version_as_active
|
|
7
8
|
from biolib._runtime.runtime import Runtime as _Runtime
|
|
@@ -12,8 +13,18 @@ from biolib.app import BioLibApp as _BioLibApp
|
|
|
12
13
|
Runtime = _Runtime
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
def get_session(
|
|
16
|
-
|
|
16
|
+
def get_session(
|
|
17
|
+
refresh_token: str,
|
|
18
|
+
base_url: Optional[str] = None,
|
|
19
|
+
client_type: Optional[str] = None,
|
|
20
|
+
experiment: Optional[str] = None,
|
|
21
|
+
) -> _Session:
|
|
22
|
+
return _Session.get_session(
|
|
23
|
+
refresh_token=refresh_token,
|
|
24
|
+
base_url=base_url,
|
|
25
|
+
client_type=client_type,
|
|
26
|
+
experiment=experiment,
|
|
27
|
+
)
|
|
17
28
|
|
|
18
29
|
|
|
19
30
|
def push_app_version(uri: str, path: str) -> _BioLibApp:
|
|
@@ -64,3 +75,7 @@ def create_data_record(
|
|
|
64
75
|
data_path=data_path,
|
|
65
76
|
record_type=record_type,
|
|
66
77
|
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_index(uri: str) -> _Index:
|
|
81
|
+
return _Index.get_by_uri(uri)
|
biolib/typing_utils.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# TODO: Deprecate and later remove this file
|
|
2
|
-
from biolib.
|
|
2
|
+
from biolib._shared.types.typing import * # pylint: disable=wildcard-import, unused-wildcard-import
|
biolib/utils/cache_state.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
import abc
|
|
3
3
|
import json
|
|
4
4
|
import time
|
|
5
|
-
from datetime import datetime
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
6
|
|
|
7
7
|
import appdirs # type: ignore
|
|
8
8
|
|
|
@@ -88,4 +88,4 @@ class CacheState(abc.ABC, Generic[StateType]):
|
|
|
88
88
|
|
|
89
89
|
@staticmethod
|
|
90
90
|
def get_timestamp_now() -> str:
|
|
91
|
-
return datetime.
|
|
91
|
+
return datetime.now(timezone.utc).isoformat()
|
biolib/utils/seq_util.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: pybiolib
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.1642
|
|
4
4
|
Summary: BioLib Python Client
|
|
5
5
|
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Keywords: biolib
|
|
7
8
|
Author: biolib
|
|
8
9
|
Author-email: hello@biolib.com
|
|
@@ -17,6 +18,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
17
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
19
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
20
|
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
22
|
Provides-Extra: compute-node
|
|
21
23
|
Requires-Dist: appdirs (>=1.4.3)
|
|
22
24
|
Requires-Dist: click (>=8.0.0)
|