pybiolib 1.1.1730__tar.gz → 1.1.1990__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/PKG-INFO +1 -2
  2. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/__init__.py +8 -2
  3. pybiolib-1.1.1990/biolib/_internal/data_record/__init__.py +1 -0
  4. pybiolib-1.1.1990/biolib/_internal/data_record/data_record.py +166 -0
  5. pybiolib-1.1.1990/biolib/_internal/data_record/remote_storage_endpoint.py +27 -0
  6. pybiolib-1.1.1990/biolib/_internal/fuse_mount/__init__.py +1 -0
  7. pybiolib-1.1.1990/biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  8. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/_internal/http_client.py +42 -23
  9. pybiolib-1.1.1990/biolib/_internal/libs/__init__.py +1 -0
  10. pybiolib-1.1.1990/biolib/_internal/libs/fusepy/__init__.py +1257 -0
  11. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/_internal/push_application.py +22 -37
  12. pybiolib-1.1.1990/biolib/_internal/runtime.py +96 -0
  13. pybiolib-1.1.1990/biolib/_internal/utils/__init__.py +18 -0
  14. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/app/app.py +38 -72
  15. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/app/search_apps.py +8 -12
  16. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/api_client.py +14 -9
  17. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/app_types.py +2 -0
  18. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/auth.py +0 -12
  19. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/biolib_app_api.py +5 -8
  20. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/job_types.py +2 -1
  21. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/remote_endpoints.py +12 -10
  22. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/utils.py +41 -4
  23. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/__init__.py +6 -2
  24. pybiolib-1.1.1990/biolib/cli/auth.py +58 -0
  25. pybiolib-1.1.1990/biolib/cli/data_record.py +43 -0
  26. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/download_container.py +3 -1
  27. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/init.py +1 -0
  28. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/lfs.py +39 -9
  29. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/run.py +3 -2
  30. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/start.py +1 -0
  31. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/cloud_utils/cloud_utils.py +4 -3
  32. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/cache_state.py +1 -1
  33. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/docker_executor.py +126 -112
  34. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_storage.py +3 -4
  35. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_worker.py +34 -15
  36. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/remote_host_proxy.py +67 -29
  37. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/worker_thread.py +2 -2
  38. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/experiments/experiment.py +29 -30
  39. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/jobs/job.py +119 -43
  40. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/jobs/job_result.py +16 -16
  41. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/jobs/types.py +1 -1
  42. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/lfs/__init__.py +0 -2
  43. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/lfs/utils.py +23 -107
  44. pybiolib-1.1.1990/biolib/runtime/__init__.py +13 -0
  45. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/sdk/__init__.py +17 -4
  46. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/user/sign_in.py +8 -12
  47. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/__init__.py +1 -1
  48. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/app_uri.py +11 -4
  49. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/cache_state.py +2 -2
  50. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/multipart_uploader.py +23 -16
  51. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/seq_util.py +29 -13
  52. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/utils/zip/remote_zip.py +9 -17
  53. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/pyproject.toml +14 -12
  54. pybiolib-1.1.1730/biolib/biolib_api_client/biolib_account_api.py +0 -8
  55. pybiolib-1.1.1730/biolib/biolib_api_client/biolib_large_file_system_api.py +0 -34
  56. pybiolib-1.1.1730/biolib/runtime/__init__.py +0 -1
  57. pybiolib-1.1.1730/biolib/runtime/results.py +0 -20
  58. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/LICENSE +0 -0
  59. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/README.md +0 -0
  60. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/_internal/__init__.py +0 -0
  61. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/api/__init__.py +0 -0
  62. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/api/client.py +0 -0
  63. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/app/__init__.py +0 -0
  64. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/__init__.py +0 -0
  65. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/biolib_job_api.py +0 -0
  66. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/common_types.py +0 -0
  67. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/lfs_types.py +0 -0
  68. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_api_client/user_state.py +0 -0
  69. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/__init__.py +0 -0
  70. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/base_bbf_package.py +0 -0
  71. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/file_in_container.py +0 -0
  72. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/module_input.py +0 -0
  73. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/module_output_v2.py +0 -0
  74. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/remote_stream_seeker.py +0 -0
  75. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/saved_job.py +0 -0
  76. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/stdout_and_stderr.py +0 -0
  77. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/system_exception.py +0 -0
  78. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_binary_format/system_status_update.py +0 -0
  79. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_docker_client/__init__.py +0 -0
  80. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_download_container.py +0 -0
  81. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_errors.py +0 -0
  82. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/biolib_logging.py +0 -0
  83. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/push.py +1 -1
  84. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/cli/runtime.py +0 -0
  85. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/.gitignore +0 -0
  86. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/__init__.py +0 -0
  87. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/cloud_utils/__init__.py +0 -0
  88. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/__init__.py +0 -0
  89. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/cache_types.py +0 -0
  90. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/docker_image_cache.py +0 -0
  91. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/__init__.py +0 -0
  92. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/docker_types.py +0 -0
  93. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/tars/__init__.py +0 -0
  94. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/executors/types.py +0 -0
  95. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py +0 -0
  96. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/job_max_runtime_timer_thread.py +0 -0
  97. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/large_file_system.py +0 -0
  98. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/mappings.py +0 -0
  99. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/utilization_reporter_thread.py +0 -0
  100. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/job_worker/utils.py +0 -0
  101. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/socker_listener_thread.py +0 -0
  102. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/socket_sender_thread.py +0 -0
  103. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/utils.py +0 -0
  104. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/__init__.py +0 -0
  105. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/gunicorn_flask_application.py +0 -0
  106. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/webserver.py +0 -0
  107. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/webserver_types.py +0 -0
  108. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/compute_node/webserver/webserver_utils.py +0 -0
  109. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/experiments/__init__.py +0 -0
  110. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/experiments/types.py +0 -0
  111. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/jobs/__init__.py +0 -0
  112. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/lfs/cache.py +0 -0
  113. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/tables.py +0 -0
  114. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/templates/__init__.py +0 -0
  115. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/templates/example_app.py +0 -0
  116. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/typing_utils.py +0 -0
  117. {pybiolib-1.1.1730 → pybiolib-1.1.1990}/biolib/user/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.1.1730
3
+ Version: 1.1.1990
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -25,7 +25,6 @@ Requires-Dist: flask (>=2.0.1) ; extra == "compute-node"
25
25
  Requires-Dist: gunicorn (>=20.1.0) ; extra == "compute-node"
26
26
  Requires-Dist: importlib-metadata (>=1.6.1)
27
27
  Requires-Dist: pyyaml (>=5.3.1)
28
- Requires-Dist: requests (>=2.25.1)
29
28
  Requires-Dist: rich (>=12.4.4)
30
29
  Requires-Dist: typing_extensions (>=3.10.0) ; python_version < "3.8"
31
30
  Description-Content-Type: text/markdown
@@ -13,10 +13,12 @@ from biolib.biolib_api_client import BiolibApiClient as _BioLibApiClient, App
13
13
  from biolib.jobs import Job as _Job
14
14
  from biolib import user as _user
15
15
  from biolib.typing_utils import List, Optional
16
+ from biolib._internal.data_record import DataRecord as _DataRecord
16
17
 
17
18
  import biolib.api
18
19
  import biolib.app
19
20
  import biolib.cli
21
+ import biolib.sdk
20
22
  import biolib.utils
21
23
 
22
24
 
@@ -33,8 +35,8 @@ def load(uri: str) -> _BioLibApp:
33
35
  def search(
34
36
  search_query: Optional[str] = None,
35
37
  team: Optional[str] = None,
36
- count: int = 100
37
- ) -> List[str]:
38
+ count: int = 100,
39
+ ) -> List[str]:
38
40
  apps: List[str] = search_apps(search_query, team, count)
39
41
  return apps
40
42
 
@@ -47,6 +49,10 @@ def fetch_jobs(count: int = 25) -> List[_Job]:
47
49
  return _Job.fetch_jobs(count)
48
50
 
49
51
 
52
+ def fetch_data_records(uri: Optional[str] = None, count: Optional[int] = None) -> List[_DataRecord]:
53
+ return _DataRecord.fetch(uri, count)
54
+
55
+
50
56
  def get_experiment(name: str) -> Experiment:
51
57
  return Experiment(name)
52
58
 
@@ -0,0 +1 @@
1
+ from .data_record import DataRecord
@@ -0,0 +1,166 @@
1
+ import os
2
+ from collections import namedtuple
3
+ from datetime import datetime
4
+ from fnmatch import fnmatch
5
+ from struct import Struct
6
+ from typing import Callable, Dict, List, Optional, Union, cast
7
+
8
+ from biolib import lfs
9
+ from biolib._internal.data_record.remote_storage_endpoint import DataRecordRemoteStorageEndpoint
10
+ from biolib._internal.http_client import HttpClient
11
+ from biolib.api import client as api_client
12
+ from biolib.biolib_api_client import AppGetResponse
13
+ from biolib.biolib_binary_format import LazyLoadedFile
14
+ from biolib.biolib_binary_format.utils import RemoteIndexableBuffer
15
+ from biolib.biolib_logging import logger
16
+ from biolib.utils.app_uri import parse_app_uri
17
+ from biolib.utils.zip.remote_zip import RemoteZip # type: ignore
18
+
19
+ PathFilter = Union[str, Callable[[str], bool]]
20
+
21
+
22
+ class DataRecord:
23
+ def __init__(self, uri: str):
24
+ self._uri = uri
25
+ uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
26
+ if not uri_parsed['app_name']:
27
+ raise ValueError('Expected parameter "uri" to contain resource name')
28
+
29
+ self._name = uri_parsed['app_name']
30
+
31
+ def __repr__(self):
32
+ return f'DataRecord: {self._uri}'
33
+
34
+ @property
35
+ def uri(self) -> str:
36
+ return self._uri
37
+
38
+ @property
39
+ def name(self) -> str:
40
+ return self._name
41
+
42
+ def list_files(self, path_filter: Optional[PathFilter] = None) -> List[LazyLoadedFile]:
43
+ app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
44
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
45
+ resource_version_uuid=app_response['app_version']['public_id'],
46
+ )
47
+ files: List[LazyLoadedFile] = []
48
+ with RemoteZip(url=remote_storage_endpoint.get_remote_url()) as remote_zip:
49
+ central_directory = remote_zip.get_central_directory()
50
+ for file_info in central_directory.values():
51
+ files.append(self._get_file(remote_storage_endpoint, file_info))
52
+
53
+ return self._get_filtered_files(files=files, path_filter=path_filter) if path_filter else files
54
+
55
+ def download_zip(self, output_path: str):
56
+ app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': self._uri}).json()
57
+ remote_storage_endpoint = DataRecordRemoteStorageEndpoint(
58
+ resource_version_uuid=app_response['app_version']['public_id'],
59
+ )
60
+ HttpClient.request(url=remote_storage_endpoint.get_remote_url(), response_path=output_path)
61
+
62
+ def download_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
63
+ filtered_files = self.list_files(path_filter=path_filter)
64
+
65
+ if len(filtered_files) == 0:
66
+ logger.debug('No files to save')
67
+ return
68
+
69
+ for file in filtered_files:
70
+ file_path = os.path.join(output_dir, file.path)
71
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
72
+ with open(file_path, mode='wb') as file_handle:
73
+ for chunk in file.get_data_iterator():
74
+ file_handle.write(chunk)
75
+
76
+ def save_files(self, output_dir: str, path_filter: Optional[PathFilter] = None) -> None:
77
+ self.download_files(output_dir=output_dir, path_filter=path_filter)
78
+
79
+ @staticmethod
80
+ def create(destination: str, data_path: str, name: Optional[str] = None) -> 'DataRecord':
81
+ assert os.path.isdir(data_path), f'The path "{data_path}" is not a directory.'
82
+ record_name = name if name else 'data-record-' + datetime.now().isoformat().split('.')[0].replace(':', '-')
83
+ record_uri = lfs.create_large_file_system(lfs_uri=f'{destination}/{record_name}')
84
+ record_version_uri = lfs.push_large_file_system(lfs_uri=record_uri, input_dir=data_path)
85
+ return DataRecord(uri=record_version_uri)
86
+
87
+ @staticmethod
88
+ def fetch(uri: Optional[str] = None, count: Optional[int] = None) -> List['DataRecord']:
89
+ max_page_size = 1_000
90
+ params: Dict[str, Union[str, int]] = {
91
+ 'page_size': str(count or max_page_size),
92
+ 'resource_type': 'data-record',
93
+ }
94
+ if uri:
95
+ uri_parsed = parse_app_uri(uri, use_account_as_name_default=False)
96
+ params['account_handle'] = uri_parsed['account_handle_normalized']
97
+ if uri_parsed['app_name_normalized']:
98
+ params['app_name'] = uri_parsed['app_name_normalized']
99
+
100
+ results = api_client.get(path='/apps/', params=params).json()['results']
101
+ if count is None and len(results) == max_page_size:
102
+ logger.warning(
103
+ f'Fetch results exceeded maximum count of {max_page_size}. Some data records might not be fetched.'
104
+ )
105
+
106
+ return [DataRecord(result['resource_uri']) for result in results]
107
+
108
+ @staticmethod
109
+ def _get_file(remote_storage_endpoint: DataRecordRemoteStorageEndpoint, file_info: Dict) -> LazyLoadedFile:
110
+ local_file_header_signature_bytes = b'\x50\x4b\x03\x04'
111
+ local_file_header_struct = Struct('<H2sHHHIIIHH')
112
+ LocalFileHeader = namedtuple(
113
+ 'LocalFileHeader',
114
+ (
115
+ 'version',
116
+ 'flags',
117
+ 'compression_raw',
118
+ 'mod_time',
119
+ 'mod_date',
120
+ 'crc_32_expected',
121
+ 'compressed_size_raw',
122
+ 'uncompressed_size_raw',
123
+ 'file_name_len',
124
+ 'extra_field_len',
125
+ ),
126
+ )
127
+
128
+ local_file_header_start = file_info['header_offset'] + len(local_file_header_signature_bytes)
129
+ local_file_header_end = local_file_header_start + local_file_header_struct.size
130
+
131
+ def file_start_func() -> int:
132
+ local_file_header_response = HttpClient.request(
133
+ url=remote_storage_endpoint.get_remote_url(),
134
+ headers={'range': f'bytes={local_file_header_start}-{local_file_header_end - 1}'},
135
+ timeout_in_seconds=300,
136
+ )
137
+ local_file_header = LocalFileHeader._make(
138
+ local_file_header_struct.unpack(local_file_header_response.content)
139
+ )
140
+ file_start: int = (
141
+ local_file_header_end + local_file_header.file_name_len + local_file_header.extra_field_len
142
+ )
143
+ return file_start
144
+
145
+ return LazyLoadedFile(
146
+ buffer=RemoteIndexableBuffer(endpoint=remote_storage_endpoint),
147
+ length=file_info['file_size'],
148
+ path=file_info['filename'],
149
+ start=None,
150
+ start_func=file_start_func,
151
+ )
152
+
153
+ @staticmethod
154
+ def _get_filtered_files(files: List[LazyLoadedFile], path_filter: PathFilter) -> List[LazyLoadedFile]:
155
+ if not (isinstance(path_filter, str) or callable(path_filter)):
156
+ raise Exception('Expected path_filter to be a string or a function')
157
+
158
+ if callable(path_filter):
159
+ return list(filter(lambda x: path_filter(x.path), files)) # type: ignore
160
+
161
+ glob_filter = cast(str, path_filter)
162
+
163
+ def _filter_function(file: LazyLoadedFile) -> bool:
164
+ return fnmatch(file.path, glob_filter)
165
+
166
+ return list(filter(_filter_function, files))
@@ -0,0 +1,27 @@
1
+ from datetime import datetime, timedelta
2
+
3
+ from biolib.api import client as api_client
4
+ from biolib.biolib_api_client.lfs_types import LargeFileSystemVersion
5
+ from biolib.biolib_binary_format.utils import RemoteEndpoint
6
+ from biolib.biolib_logging import logger
7
+
8
+
9
+ class DataRecordRemoteStorageEndpoint(RemoteEndpoint):
10
+ def __init__(self, resource_version_uuid: str):
11
+ self._resource_version_uuid: str = resource_version_uuid
12
+ self._expires_at = None
13
+ self._presigned_url = None
14
+
15
+ def get_remote_url(self):
16
+ if not self._presigned_url or datetime.utcnow() > self._expires_at:
17
+ lfs_version: LargeFileSystemVersion = api_client.get(
18
+ path=f'/lfs/versions/{self._resource_version_uuid}/',
19
+ ).json()
20
+ self._presigned_url = lfs_version['presigned_download_url']
21
+ self._expires_at = datetime.utcnow() + timedelta(minutes=8)
22
+ logger.debug(
23
+ f'DataRecord "{self._resource_version_uuid}" fetched presigned URL '
24
+ f'with expiry at {self._expires_at.isoformat()}'
25
+ )
26
+
27
+ return self._presigned_url
@@ -0,0 +1 @@
1
+ from .experiment_fuse_mount import ExperimentFuseMount
@@ -0,0 +1,209 @@
1
+ import errno
2
+ import os
3
+ import stat
4
+ from datetime import datetime, timezone
5
+ from time import time
6
+
7
+ from biolib._internal.libs.fusepy import FUSE, FuseOSError, Operations
8
+ from biolib.biolib_errors import BioLibError
9
+ from biolib.jobs import Job
10
+ from biolib.typing_utils import Dict, List, Optional, Tuple, TypedDict
11
+
12
+
13
+ class _AttributeDict(TypedDict):
14
+ st_atime: int
15
+ st_ctime: int
16
+ st_gid: int
17
+ st_mode: int
18
+ st_mtime: int
19
+ st_nlink: int
20
+ st_size: int
21
+ st_uid: int
22
+
23
+
24
+ _SUCCESS_CODE = 0
25
+
26
+
27
+ class ExperimentFuseMount(Operations):
28
+ def __init__(self, experiment):
29
+ self._experiment = experiment
30
+ self._job_names_map: Optional[Dict[str, Job]] = None
31
+ self._jobs_last_fetched_at: float = 0.0
32
+ self._mounted_at_epoch_seconds: int = int(time())
33
+
34
+ @staticmethod
35
+ def mount_experiment(experiment, mount_path: str) -> None:
36
+ FUSE(
37
+ operations=ExperimentFuseMount(experiment),
38
+ mountpoint=mount_path,
39
+ nothreads=True,
40
+ foreground=True,
41
+ allow_other=False,
42
+ )
43
+
44
+ def getattr(self, path: str, fh=None) -> _AttributeDict:
45
+ if path == '/':
46
+ return self._get_directory_attributes(timestamp_epoch_seconds=self._mounted_at_epoch_seconds)
47
+
48
+ job, path_in_job = self._parse_path(path)
49
+ job_finished_at_epoch_seconds: int = int(
50
+ datetime.fromisoformat(job.to_dict()['finished_at'].rstrip('Z')).replace(tzinfo=timezone.utc).timestamp()
51
+ )
52
+
53
+ if path_in_job == '/':
54
+ return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
55
+
56
+ try:
57
+ file = job.get_output_file(path_in_job)
58
+ return self._get_file_attributes(
59
+ timestamp_epoch_seconds=job_finished_at_epoch_seconds,
60
+ size_in_bytes=file.length,
61
+ )
62
+ except BioLibError:
63
+ # file not found
64
+ pass
65
+
66
+ file_paths_in_job = [file.path for file in job.list_output_files()]
67
+
68
+ for file_path_in_job in file_paths_in_job:
69
+ if file_path_in_job.startswith(path_in_job):
70
+ return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
71
+
72
+ raise FuseOSError(errno.ENOENT) from None # No such file or directory
73
+
74
+ def readdir(self, path: str, fh: int) -> List[str]:
75
+ directory_entries = ['.', '..']
76
+
77
+ if path == '/':
78
+ directory_entries.extend(self._get_job_names_map(refresh_jobs=True).keys())
79
+ else:
80
+ job, path_in_job = self._parse_path(path)
81
+ dir_path_in_job = '/' if path_in_job == '/' else path_in_job + '/'
82
+ depth = dir_path_in_job.count('/')
83
+ directory_entries.extend(
84
+ set(
85
+ [
86
+ file.path.split('/')[depth]
87
+ for file in job.list_output_files()
88
+ if file.path.startswith(dir_path_in_job)
89
+ ]
90
+ )
91
+ )
92
+
93
+ return directory_entries
94
+
95
+ def open(self, path: str, flags: int) -> int:
96
+ job, path_in_job = self._parse_path(path)
97
+ try:
98
+ job.get_output_file(path_in_job)
99
+ except BioLibError:
100
+ # file not found
101
+ raise FuseOSError(errno.ENOENT) from None
102
+
103
+ return 1234 # dummy file handle
104
+
105
+ def read(self, path: str, size: int, offset: int, fh: int) -> bytes:
106
+ job, path_in_job = self._parse_path(path)
107
+ try:
108
+ file = job.get_output_file(path_in_job)
109
+ except BioLibError:
110
+ raise FuseOSError(errno.ENOENT) from None # No such file or directory
111
+
112
+ return file.get_data(start=offset, length=size)
113
+
114
+ def release(self, path: str, fh: int) -> int:
115
+ return _SUCCESS_CODE
116
+
117
+ def releasedir(self, path: str, fh: int) -> int:
118
+ return _SUCCESS_CODE
119
+
120
+ def flush(self, path: str, fh: int) -> int:
121
+ return _SUCCESS_CODE
122
+
123
+ @staticmethod
124
+ def _get_directory_attributes(timestamp_epoch_seconds: int) -> _AttributeDict:
125
+ return _AttributeDict(
126
+ st_atime=timestamp_epoch_seconds,
127
+ st_ctime=timestamp_epoch_seconds,
128
+ st_gid=os.getgid(),
129
+ st_mode=stat.S_IFDIR | 0o555, # Directory that is readable and executable by owner, group, and others.
130
+ st_mtime=timestamp_epoch_seconds,
131
+ st_nlink=1,
132
+ st_size=1,
133
+ st_uid=os.getuid(),
134
+ )
135
+
136
+ @staticmethod
137
+ def _get_file_attributes(timestamp_epoch_seconds: int, size_in_bytes: int) -> _AttributeDict:
138
+ return _AttributeDict(
139
+ st_atime=timestamp_epoch_seconds,
140
+ st_ctime=timestamp_epoch_seconds,
141
+ st_gid=os.getgid(),
142
+ st_mode=stat.S_IFREG | 0o444, # Regular file with read permissions for owner, group, and others.
143
+ st_mtime=timestamp_epoch_seconds,
144
+ st_nlink=1,
145
+ st_size=size_in_bytes,
146
+ st_uid=os.getuid(),
147
+ )
148
+
149
+ def _get_job_names_map(self, refresh_jobs=False) -> Dict[str, Job]:
150
+ current_time = time()
151
+ if not self._job_names_map or (current_time - self._jobs_last_fetched_at > 1 and refresh_jobs):
152
+ self._jobs_last_fetched_at = current_time
153
+ self._job_names_map = {job.get_name(): job for job in self._experiment.get_jobs(status='completed')}
154
+
155
+ return self._job_names_map
156
+
157
+ def _parse_path(self, path: str) -> Tuple[Job, str]:
158
+ path_splitted = path.split('/')
159
+ job_name = path_splitted[1]
160
+ path_in_job = '/' + '/'.join(path_splitted[2:])
161
+ job = self._get_job_names_map().get(job_name)
162
+ if not job:
163
+ raise FuseOSError(errno.ENOENT) # No such file or directory
164
+
165
+ return job, path_in_job
166
+
167
+ # ----------------------------------- File system methods not implemented below -----------------------------------
168
+
169
+ def chmod(self, path, mode):
170
+ raise FuseOSError(errno.EACCES)
171
+
172
+ def chown(self, path, uid, gid):
173
+ raise FuseOSError(errno.EACCES)
174
+
175
+ def mknod(self, path, mode, dev):
176
+ raise FuseOSError(errno.EACCES)
177
+
178
+ def rmdir(self, path):
179
+ raise FuseOSError(errno.EACCES)
180
+
181
+ def mkdir(self, path, mode):
182
+ raise FuseOSError(errno.EACCES)
183
+
184
+ def unlink(self, path):
185
+ raise FuseOSError(errno.EACCES)
186
+
187
+ def symlink(self, target, source):
188
+ raise FuseOSError(errno.EACCES)
189
+
190
+ def rename(self, old, new):
191
+ raise FuseOSError(errno.EACCES)
192
+
193
+ def link(self, target, source):
194
+ raise FuseOSError(errno.EACCES)
195
+
196
+ def utimens(self, path, times=None):
197
+ raise FuseOSError(errno.EACCES)
198
+
199
+ def create(self, path, mode, fi=None):
200
+ raise FuseOSError(errno.EACCES)
201
+
202
+ def write(self, path, data, offset, fh):
203
+ raise FuseOSError(errno.EACCES)
204
+
205
+ def truncate(self, path, length, fh=None):
206
+ raise FuseOSError(errno.EACCES)
207
+
208
+ def fsync(self, path, datasync, fh):
209
+ raise FuseOSError(errno.EACCES)
@@ -1,25 +1,25 @@
1
1
  import json
2
2
  import platform
3
- import time
3
+ import shutil
4
4
  import socket
5
5
  import ssl
6
6
  import subprocess
7
- import urllib.request
7
+ import time
8
8
  import urllib.error
9
9
  import urllib.parse
10
+ import urllib.request
10
11
 
11
12
  from biolib.biolib_logging import logger_no_user_data
12
- from biolib.typing_utils import Dict, Optional, Union, Literal, cast
13
+ from biolib.typing_utils import Dict, Literal, Optional, Union, cast
14
+
15
+ _HttpMethod = Literal['GET', 'POST', 'PATCH', 'PUT']
13
16
 
14
17
 
15
18
  def _create_ssl_context():
16
19
  context = ssl.create_default_context()
17
20
  try:
18
21
  if platform.system() == 'Darwin':
19
- certificates = subprocess.check_output(
20
- "security find-certificate -a -p",
21
- shell=True
22
- ).decode('utf-8')
22
+ certificates = subprocess.check_output('security find-certificate -a -p', shell=True).decode('utf-8')
23
23
  context.load_verify_locations(cadata=certificates)
24
24
  except BaseException:
25
25
  pass
@@ -33,7 +33,7 @@ class HttpError(urllib.error.HTTPError):
33
33
  code=http_error.code,
34
34
  msg=http_error.msg, # type: ignore
35
35
  hdrs=http_error.hdrs, # type: ignore
36
- fp=http_error.fp
36
+ fp=http_error.fp,
37
37
  )
38
38
 
39
39
  def __str__(self):
@@ -42,15 +42,24 @@ class HttpError(urllib.error.HTTPError):
42
42
 
43
43
 
44
44
  class HttpResponse:
45
- def __init__(self, response):
45
+ def __init__(self, response, response_path) -> None:
46
46
  self.headers: Dict[str, str] = dict(response.headers)
47
47
  self.status_code: int = int(response.status)
48
- self.content: bytes = response.read()
48
+ self.response_path = response_path
49
+ if self.response_path:
50
+ with open(self.response_path, 'wb') as out_file:
51
+ shutil.copyfileobj(response, out_file)
52
+ else:
53
+ self.content: bytes = response.read()
49
54
  self.url: str = response.geturl()
50
55
 
51
56
  @property
52
57
  def text(self) -> str:
53
- return cast(str, self.content.decode('utf-8'))
58
+ if self.response_path:
59
+ with open(self.response_path, 'rb') as fp:
60
+ return cast(str, fp.read().decode('utf-8'))
61
+ else:
62
+ return cast(str, self.content.decode('utf-8'))
54
63
 
55
64
  def json(self):
56
65
  return json.loads(self.text)
@@ -61,12 +70,13 @@ class HttpClient:
61
70
 
62
71
  @staticmethod
63
72
  def request(
64
- url: str,
65
- method: Optional[Literal['GET', 'POST', 'PATCH', 'PUT']] = None,
66
- data: Optional[Union[Dict, bytes]] = None,
67
- headers: Optional[Dict[str, str]] = None,
68
- retries: int = 5,
69
- timeout_in_seconds: Optional[int] = None,
73
+ url: str,
74
+ method: Optional[_HttpMethod] = None,
75
+ data: Optional[Union[Dict, bytes]] = None,
76
+ headers: Optional[Dict[str, str]] = None,
77
+ retries: int = 5,
78
+ timeout_in_seconds: Optional[int] = None,
79
+ response_path: Optional[str] = None,
70
80
  ) -> HttpResponse:
71
81
  if not HttpClient.ssl_context:
72
82
  HttpClient.ssl_context = _create_ssl_context()
@@ -84,7 +94,7 @@ class HttpClient:
84
94
  if timeout_in_seconds is None:
85
95
  timeout_in_seconds = 60 if isinstance(data, dict) else 180 # TODO: Calculate timeout based on data size
86
96
 
87
- last_error: Optional[urllib.error.URLError] = None
97
+ last_error: Optional[Exception] = None
88
98
  for retry_count in range(retries + 1):
89
99
  if retry_count > 0:
90
100
  time.sleep(5 * retry_count)
@@ -95,23 +105,32 @@ class HttpClient:
95
105
  context=HttpClient.ssl_context,
96
106
  timeout=timeout_in_seconds,
97
107
  ) as response:
98
- return HttpResponse(response)
108
+ return HttpResponse(response, response_path)
99
109
 
100
110
  except urllib.error.HTTPError as error:
101
- if error.code == 502:
102
- logger_no_user_data.debug(f'HTTP {method} request failed with status 502 for "{url}"')
111
+ if error.code == 429:
112
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 429 for "{url}"')
113
+ last_error = error
114
+ elif error.code == 502:
115
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 502 for "{url}"')
103
116
  last_error = error
104
117
  elif error.code == 503:
105
- logger_no_user_data.debug(f'HTTP {method} request failed with status 503 for "{url}"')
118
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 503 for "{url}"')
119
+ last_error = error
120
+ elif error.code == 504:
121
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 504 for "{url}"')
106
122
  last_error = error
107
123
  else:
108
124
  raise HttpError(error) from None
109
125
 
110
126
  except urllib.error.URLError as error:
111
127
  if isinstance(error.reason, socket.timeout):
112
- logger_no_user_data.debug(f'HTTP {method} request failed with read timeout for "{url}"')
128
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
113
129
  last_error = error
114
130
  else:
115
131
  raise error
132
+ except socket.timeout as error:
133
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
134
+ last_error = error
116
135
 
117
136
  raise last_error or Exception(f'HTTP {method} request failed after {retries} retries for "{url}"')
@@ -0,0 +1 @@
1
+ # Note: this directory is purely for libraries to be directly included instead of as dependencies