pybiolib 1.1.1881__py3-none-any.whl → 1.2.7.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/__init__.py +11 -4
- biolib/_data_record/data_record.py +278 -0
- biolib/_internal/data_record/__init__.py +1 -1
- biolib/_internal/data_record/data_record.py +97 -151
- biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
- biolib/_internal/file_utils.py +77 -0
- biolib/_internal/fuse_mount/__init__.py +1 -0
- biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
- biolib/_internal/http_client.py +31 -9
- biolib/_internal/lfs/__init__.py +1 -0
- biolib/_internal/libs/__init__.py +1 -0
- biolib/_internal/libs/fusepy/__init__.py +1257 -0
- biolib/_internal/push_application.py +6 -1
- biolib/_internal/runtime.py +3 -56
- biolib/_internal/types/__init__.py +4 -0
- biolib/_internal/types/app.py +9 -0
- biolib/_internal/types/data_record.py +40 -0
- biolib/_internal/types/experiment.py +10 -0
- biolib/_internal/types/resource.py +14 -0
- biolib/_internal/types/typing.py +7 -0
- biolib/_internal/utils/multinode.py +264 -0
- biolib/_runtime/runtime.py +84 -0
- biolib/api/__init__.py +1 -0
- biolib/api/client.py +39 -17
- biolib/app/app.py +34 -71
- biolib/biolib_api_client/api_client.py +9 -2
- biolib/biolib_api_client/app_types.py +3 -2
- biolib/biolib_api_client/biolib_job_api.py +6 -0
- biolib/biolib_api_client/job_types.py +4 -4
- biolib/biolib_api_client/lfs_types.py +8 -2
- biolib/biolib_binary_format/remote_endpoints.py +12 -10
- biolib/biolib_binary_format/utils.py +23 -3
- biolib/cli/auth.py +1 -1
- biolib/cli/data_record.py +45 -6
- biolib/cli/lfs.py +10 -6
- biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
- biolib/compute_node/job_worker/executors/docker_executor.py +127 -108
- biolib/compute_node/job_worker/job_storage.py +17 -5
- biolib/compute_node/job_worker/job_worker.py +25 -15
- biolib/compute_node/remote_host_proxy.py +72 -84
- biolib/compute_node/webserver/webserver_types.py +0 -1
- biolib/compute_node/webserver/worker_thread.py +42 -39
- biolib/experiments/experiment.py +75 -44
- biolib/jobs/job.py +98 -19
- biolib/jobs/job_result.py +46 -21
- biolib/jobs/types.py +1 -1
- biolib/runtime/__init__.py +2 -1
- biolib/sdk/__init__.py +18 -7
- biolib/typing_utils.py +2 -7
- biolib/user/sign_in.py +2 -2
- biolib/utils/seq_util.py +38 -35
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/METADATA +1 -1
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/RECORD +57 -45
- biolib/experiments/types.py +0 -9
- biolib/lfs/__init__.py +0 -4
- biolib/lfs/utils.py +0 -153
- /biolib/{lfs → _internal/lfs}/cache.py +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/WHEEL +0 -0
- {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/entry_points.txt +0 -0
biolib/app/app.py
CHANGED
@@ -1,29 +1,26 @@
|
|
1
|
-
import os
|
2
1
|
import io
|
3
|
-
import random
|
4
2
|
import json
|
3
|
+
import os
|
4
|
+
import random
|
5
5
|
import string
|
6
|
-
|
7
6
|
from pathlib import Path
|
7
|
+
|
8
8
|
from biolib import utils
|
9
|
-
from biolib.
|
10
|
-
from biolib.compute_node.job_worker.job_worker import JobWorker
|
11
|
-
from biolib.experiments.experiment import Experiment
|
12
|
-
from biolib.jobs import Job
|
13
|
-
from biolib.typing_utils import Optional, cast
|
14
|
-
from biolib.biolib_api_client import CreatedJobDict, JobState
|
15
|
-
from biolib.jobs.types import JobDict
|
9
|
+
from biolib.biolib_api_client import JobState
|
16
10
|
from biolib.biolib_api_client.app_types import App, AppVersion
|
17
|
-
from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
|
18
11
|
from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
|
12
|
+
from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
|
19
13
|
from biolib.biolib_binary_format import ModuleInput
|
20
14
|
from biolib.biolib_errors import BioLibError
|
21
15
|
from biolib.biolib_logging import logger
|
16
|
+
from biolib.compute_node.job_worker.job_worker import JobWorker
|
17
|
+
from biolib.experiments.experiment import Experiment
|
18
|
+
from biolib.jobs import Job
|
19
|
+
from biolib.typing_utils import Optional
|
22
20
|
from biolib.utils.app_uri import parse_app_uri
|
23
21
|
|
24
22
|
|
25
23
|
class BioLibApp:
|
26
|
-
|
27
24
|
def __init__(self, uri: str):
|
28
25
|
app_response = BiolibAppApi.get_by_uri(uri)
|
29
26
|
self._app: App = app_response['app']
|
@@ -48,17 +45,18 @@ class BioLibApp:
|
|
48
45
|
return self._app_version
|
49
46
|
|
50
47
|
def cli(
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
48
|
+
self,
|
49
|
+
args=None,
|
50
|
+
stdin=None,
|
51
|
+
files=None,
|
52
|
+
override_command=False,
|
53
|
+
machine='',
|
54
|
+
blocking: bool = True,
|
55
|
+
experiment_id: Optional[str] = None,
|
56
|
+
result_prefix: Optional[str] = None,
|
57
|
+
timeout: Optional[int] = None,
|
58
|
+
notify: bool = False,
|
59
|
+
machine_count: Optional[int] = None,
|
62
60
|
) -> Job:
|
63
61
|
if not experiment_id:
|
64
62
|
experiment = Experiment.get_experiment_in_context()
|
@@ -78,7 +76,9 @@ class BioLibApp:
|
|
78
76
|
|
79
77
|
return self._run_locally(module_input_serialized)
|
80
78
|
|
81
|
-
job =
|
79
|
+
job = Job._start_job_in_cloud( # pylint: disable=protected-access
|
80
|
+
app_uri=self._app_uri,
|
81
|
+
app_version_uuid=self._app_version['public_id'],
|
82
82
|
experiment_id=experiment_id,
|
83
83
|
machine=machine,
|
84
84
|
module_input_serialized=module_input_serialized,
|
@@ -86,6 +86,7 @@ class BioLibApp:
|
|
86
86
|
override_command=override_command,
|
87
87
|
result_prefix=result_prefix,
|
88
88
|
timeout=timeout,
|
89
|
+
requested_machine_count=machine_count,
|
89
90
|
)
|
90
91
|
if blocking:
|
91
92
|
# TODO: Deprecate utils.STREAM_STDOUT and always stream logs by simply calling job.stream_logs()
|
@@ -93,8 +94,8 @@ class BioLibApp:
|
|
93
94
|
utils.STREAM_STDOUT = True
|
94
95
|
|
95
96
|
enable_print = bool(
|
96
|
-
utils.STREAM_STDOUT
|
97
|
-
(self._app_version.get('main_output_file') or self._app_version.get('stdout_render_type') == 'text')
|
97
|
+
utils.STREAM_STDOUT
|
98
|
+
and (self._app_version.get('main_output_file') or self._app_version.get('stdout_render_type') == 'text')
|
98
99
|
)
|
99
100
|
job._stream_logs(enable_print=enable_print) # pylint: disable=protected-access
|
100
101
|
|
@@ -108,11 +109,11 @@ class BioLibApp:
|
|
108
109
|
self.cli()
|
109
110
|
|
110
111
|
else:
|
111
|
-
raise BioLibError(
|
112
|
+
raise BioLibError("""
|
112
113
|
Calling an app directly with app() is currently being reworked.
|
113
114
|
To use the previous functionality, please call app.cli() instead.
|
114
115
|
Example: "app.cli('--help')"
|
115
|
-
|
116
|
+
""")
|
116
117
|
|
117
118
|
@staticmethod
|
118
119
|
def _get_serialized_module_input(args=None, stdin=None, files=None) -> bytes:
|
@@ -142,9 +143,9 @@ Example: "app.cli('--help')"
|
|
142
143
|
args[idx] = Path(arg).name
|
143
144
|
|
144
145
|
# support --myarg=file.txt
|
145
|
-
elif os.path.isfile(arg.split(
|
146
|
-
files.append(arg.split(
|
147
|
-
args[idx] = arg.split(
|
146
|
+
elif os.path.isfile(arg.split('=')[-1]) or os.path.isdir(arg.split('=')[-1]):
|
147
|
+
files.append(arg.split('=')[-1])
|
148
|
+
args[idx] = arg.split('=')[0] + '=' + Path(arg.split('=')[-1]).name
|
148
149
|
else:
|
149
150
|
pass # a normal string arg was given
|
150
151
|
else:
|
@@ -154,7 +155,7 @@ Example: "app.cli('--help')"
|
|
154
155
|
elif isinstance(arg, io.BytesIO):
|
155
156
|
file_data = arg.getvalue()
|
156
157
|
else:
|
157
|
-
raise Exception(f
|
158
|
+
raise Exception(f'Unexpected type of argument: {arg}')
|
158
159
|
files_dict[f'/{tmp_filename}'] = file_data
|
159
160
|
args[idx] = tmp_filename
|
160
161
|
|
@@ -192,48 +193,10 @@ Example: "app.cli('--help')"
|
|
192
193
|
)
|
193
194
|
return module_input_serialized
|
194
195
|
|
195
|
-
def _start_in_cloud(
|
196
|
-
self,
|
197
|
-
module_input_serialized: bytes,
|
198
|
-
override_command: bool = False,
|
199
|
-
machine: Optional[str] = None,
|
200
|
-
experiment_id: Optional[str] = None,
|
201
|
-
result_prefix: Optional[str] = None,
|
202
|
-
timeout: Optional[int] = None,
|
203
|
-
notify: bool = False,
|
204
|
-
) -> Job:
|
205
|
-
if len(module_input_serialized) < 500_000:
|
206
|
-
_job_dict = BiolibJobApi.create_job_with_data(
|
207
|
-
app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix'],
|
208
|
-
app_version_uuid=self._app_version['public_id'],
|
209
|
-
arguments_override_command=override_command,
|
210
|
-
experiment_uuid=experiment_id,
|
211
|
-
module_input_serialized=module_input_serialized,
|
212
|
-
notify=notify,
|
213
|
-
requested_machine=machine,
|
214
|
-
requested_timeout_seconds=timeout,
|
215
|
-
result_name_prefix=result_prefix,
|
216
|
-
)
|
217
|
-
return Job(cast(JobDict, _job_dict))
|
218
|
-
|
219
|
-
job_dict: CreatedJobDict = BiolibJobApi.create(
|
220
|
-
app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix'],
|
221
|
-
app_version_id=self._app_version['public_id'],
|
222
|
-
experiment_uuid=experiment_id,
|
223
|
-
machine=machine,
|
224
|
-
notify=notify,
|
225
|
-
override_command=override_command,
|
226
|
-
timeout=timeout,
|
227
|
-
)
|
228
|
-
JobStorage.upload_module_input(job=job_dict, module_input_serialized=module_input_serialized)
|
229
|
-
cloud_job = BiolibJobApi.create_cloud_job(job_id=job_dict['public_id'], result_name_prefix=result_prefix)
|
230
|
-
logger.debug(f"Cloud: Job created with id {cloud_job['public_id']}")
|
231
|
-
return Job(cast(JobDict, job_dict))
|
232
|
-
|
233
196
|
def _run_locally(self, module_input_serialized: bytes) -> Job:
|
234
197
|
job_dict = BiolibJobApi.create(
|
235
198
|
app_version_id=self._app_version['public_id'],
|
236
|
-
app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix']
|
199
|
+
app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix'],
|
237
200
|
)
|
238
201
|
job = Job(job_dict)
|
239
202
|
|
@@ -6,7 +6,7 @@ import os
|
|
6
6
|
from datetime import datetime, timezone
|
7
7
|
from json.decoder import JSONDecodeError
|
8
8
|
|
9
|
-
from biolib.
|
9
|
+
from biolib._runtime.runtime import Runtime
|
10
10
|
from biolib._internal.http_client import HttpClient
|
11
11
|
from biolib.typing_utils import Optional
|
12
12
|
from biolib.biolib_errors import BioLibError
|
@@ -192,9 +192,16 @@ class BiolibApiClient:
|
|
192
192
|
api_client.refresh_access_token()
|
193
193
|
|
194
194
|
@staticmethod
|
195
|
-
def
|
195
|
+
def is_reauthentication_needed() -> bool:
|
196
196
|
api_client = BiolibApiClient.get()
|
197
197
|
if not api_client.is_signed_in and not Runtime.check_is_environment_biolib_app():
|
198
|
+
return True
|
199
|
+
else:
|
200
|
+
return False
|
201
|
+
|
202
|
+
@staticmethod
|
203
|
+
def assert_is_signed_in(authenticated_action_description: str) -> None:
|
204
|
+
if BiolibApiClient.is_reauthentication_needed():
|
198
205
|
raise BioLibError(
|
199
206
|
f'You must be signed in to {authenticated_action_description}. '
|
200
207
|
f'Please set the environment variable "BIOLIB_TOKEN"'
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from enum import Enum
|
2
2
|
|
3
|
-
from biolib.typing_utils import TypedDict, List, Optional, Dict, Literal
|
4
3
|
from biolib.biolib_api_client.common_types import SemanticVersion
|
4
|
+
from biolib.typing_utils import Dict, List, Literal, Optional, TypedDict
|
5
5
|
|
6
6
|
|
7
7
|
class AppVersionSlim(SemanticVersion):
|
@@ -16,6 +16,7 @@ class AppVersion(AppVersionSlim):
|
|
16
16
|
source_code_license: str
|
17
17
|
stdout_render_type: Literal['text', 'markdown']
|
18
18
|
main_output_file: Optional[str]
|
19
|
+
app_uri: str
|
19
20
|
|
20
21
|
|
21
22
|
class App(TypedDict):
|
@@ -31,6 +32,7 @@ class App(TypedDict):
|
|
31
32
|
public_id: str
|
32
33
|
state: str
|
33
34
|
resource_uri: str
|
35
|
+
type: str
|
34
36
|
|
35
37
|
|
36
38
|
class AppGetResponse(TypedDict):
|
@@ -99,7 +101,6 @@ class _AppVersionOnJob(TypedDict):
|
|
99
101
|
|
100
102
|
class AppOnJob(TypedDict):
|
101
103
|
allow_client_side_execution: bool
|
102
|
-
can_push_data_record_for_user: bool
|
103
104
|
state: Literal['public', 'draft']
|
104
105
|
|
105
106
|
|
@@ -46,6 +46,7 @@ class BiolibJobApi:
|
|
46
46
|
experiment_uuid: Optional[str] = None,
|
47
47
|
timeout: Optional[int] = None,
|
48
48
|
notify: bool = False,
|
49
|
+
requested_machine_count: Optional[int] = None,
|
49
50
|
):
|
50
51
|
data = {
|
51
52
|
'app_version_id': app_version_id,
|
@@ -73,6 +74,9 @@ class BiolibJobApi:
|
|
73
74
|
'requested_machine': machine
|
74
75
|
})
|
75
76
|
|
77
|
+
if requested_machine_count:
|
78
|
+
data.update({'requested_machine_count': requested_machine_count})
|
79
|
+
|
76
80
|
if experiment_uuid:
|
77
81
|
data['experiment_uuid'] = experiment_uuid
|
78
82
|
|
@@ -156,6 +160,7 @@ class BiolibJobApi:
|
|
156
160
|
caller_job_uuid: Optional[str] = None,
|
157
161
|
requested_timeout_seconds: Optional[int] = None,
|
158
162
|
notify: bool = False,
|
163
|
+
requested_machine_count: Optional[int] = None,
|
159
164
|
) -> Dict:
|
160
165
|
job_dict: Dict = biolib.api.client.post(
|
161
166
|
path='/jobs/create_job_with_data/',
|
@@ -171,6 +176,7 @@ class BiolibJobApi:
|
|
171
176
|
'client-version': BIOLIB_PACKAGE_VERSION,
|
172
177
|
'experiment-uuid': experiment_uuid,
|
173
178
|
'requested-machine': requested_machine,
|
179
|
+
'requested-machine-count': str(requested_machine_count) if requested_machine_count else None,
|
174
180
|
'result-name-prefix': result_name_prefix,
|
175
181
|
'requested-timeout-seconds': str(requested_timeout_seconds) if requested_timeout_seconds else None,
|
176
182
|
'notify': 'true' if notify else 'false',
|
@@ -1,9 +1,8 @@
|
|
1
1
|
from enum import Enum
|
2
2
|
|
3
|
-
from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo
|
4
|
-
from biolib.typing_utils import TypedDict, Optional, List
|
5
|
-
|
6
3
|
from biolib.biolib_api_client.app_types import AppVersionOnJob, RemoteHost
|
4
|
+
from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo
|
5
|
+
from biolib.typing_utils import List, Optional, TypedDict
|
7
6
|
|
8
7
|
|
9
8
|
class JobState(Enum):
|
@@ -15,6 +14,7 @@ class JobState(Enum):
|
|
15
14
|
|
16
15
|
|
17
16
|
class _Job(TypedDict):
|
17
|
+
app_uri: str
|
18
18
|
app_version: AppVersionOnJob
|
19
19
|
arguments_override_command: bool
|
20
20
|
auth_token: str
|
@@ -22,10 +22,10 @@ class _Job(TypedDict):
|
|
22
22
|
created_at: str
|
23
23
|
federated_job_uuid: Optional[str]
|
24
24
|
public_id: str
|
25
|
-
uuid: str
|
26
25
|
remote_hosts_with_warning: List[RemoteHost]
|
27
26
|
state: str
|
28
27
|
user_id: Optional[str]
|
28
|
+
uuid: str
|
29
29
|
|
30
30
|
|
31
31
|
# type optional keys with total=False
|
@@ -1,13 +1,19 @@
|
|
1
1
|
from biolib.typing_utils import TypedDict
|
2
2
|
|
3
3
|
|
4
|
-
class
|
4
|
+
class DataRecordVersion(TypedDict):
|
5
5
|
presigned_download_url: str
|
6
6
|
size_bytes: int
|
7
7
|
uri: str
|
8
8
|
uuid: str
|
9
9
|
|
10
10
|
|
11
|
-
class
|
11
|
+
class DataRecordInfo(TypedDict):
|
12
12
|
uri: str
|
13
13
|
uuid: str
|
14
|
+
|
15
|
+
|
16
|
+
class DataRecordVersionInfo(TypedDict):
|
17
|
+
resource_uri: str
|
18
|
+
resource_uuid: str
|
19
|
+
resource_version_uuid: str
|
@@ -1,25 +1,27 @@
|
|
1
1
|
from datetime import datetime, timedelta
|
2
|
-
# from urllib.parse import urlparse, parse_qs
|
3
|
-
|
4
|
-
from biolib.biolib_logging import logger
|
5
2
|
|
6
3
|
from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
|
7
4
|
from biolib.biolib_binary_format.utils import RemoteEndpoint
|
8
5
|
|
6
|
+
# from urllib.parse import urlparse, parse_qs
|
7
|
+
from biolib.biolib_logging import logger
|
8
|
+
from biolib.typing_utils import Literal
|
9
|
+
|
9
10
|
|
10
|
-
class
|
11
|
-
def __init__(self,
|
12
|
-
self._job_id = job_id
|
13
|
-
self._job_auth_token = job_auth_token
|
11
|
+
class RemoteJobStorageEndpoint(RemoteEndpoint):
|
12
|
+
def __init__(self, job_uuid: str, job_auth_token: str, storage_type: Literal['input', 'output']):
|
14
13
|
self._expires_at = None
|
14
|
+
self._job_auth_token = job_auth_token
|
15
|
+
self._job_uuid = job_uuid
|
15
16
|
self._presigned_url = None
|
17
|
+
self._storage_type: Literal['input', 'output'] = storage_type
|
16
18
|
|
17
19
|
def get_remote_url(self):
|
18
20
|
if not self._presigned_url or datetime.utcnow() > self._expires_at:
|
19
21
|
self._presigned_url = BiolibJobApi.get_job_storage_download_url(
|
20
22
|
job_auth_token=self._job_auth_token,
|
21
|
-
job_uuid=self.
|
22
|
-
storage_type='results'
|
23
|
+
job_uuid=self._job_uuid,
|
24
|
+
storage_type='results' if self._storage_type == 'output' else 'input',
|
23
25
|
)
|
24
26
|
self._expires_at = datetime.utcnow() + timedelta(minutes=8)
|
25
27
|
# TODO: Use expires at from url
|
@@ -27,6 +29,6 @@ class RemoteJobStorageResultEndpoint(RemoteEndpoint):
|
|
27
29
|
# query_params = parse_qs(parsed_url.query)
|
28
30
|
# time_at_generation = datetime.datetime.strptime(query_params['X-Amz-Date'][0], '%Y%m%dT%H%M%SZ')
|
29
31
|
# self._expires_at = time_at_generation + timedelta(seconds=int(query_params['X-Amz-Expires'][0]))
|
30
|
-
logger.debug(f'Job "{self.
|
32
|
+
logger.debug(f'Job "{self._job_uuid}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
|
31
33
|
|
32
34
|
return self._presigned_url
|
@@ -1,7 +1,8 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
import io
|
3
|
+
import math
|
3
4
|
from typing import Optional, Callable
|
4
|
-
|
5
|
+
from biolib.typing_utils import Iterator
|
5
6
|
from biolib._internal.http_client import HttpClient
|
6
7
|
|
7
8
|
|
@@ -147,5 +148,24 @@ class LazyLoadedFile:
|
|
147
148
|
def get_file_handle(self) -> io.BufferedIOBase:
|
148
149
|
return io.BytesIO(self.get_data())
|
149
150
|
|
150
|
-
def get_data(self) -> bytes:
|
151
|
-
|
151
|
+
def get_data(self, start=0, length=None) -> bytes:
|
152
|
+
start_offset = start + self.start
|
153
|
+
# make sure length doesn't go outside file boundaries
|
154
|
+
length_to_end_of_file = max(self._length - start, 0)
|
155
|
+
if length is None:
|
156
|
+
length_to_request = length_to_end_of_file
|
157
|
+
else:
|
158
|
+
length_to_request = min(length, length_to_end_of_file)
|
159
|
+
return self._buffer.get_data(start=start_offset, length=length_to_request)
|
160
|
+
|
161
|
+
def get_data_iterator(self) -> Iterator[bytes]:
|
162
|
+
if self._length == 0:
|
163
|
+
yield b''
|
164
|
+
else:
|
165
|
+
chunk_size = 10_000_000
|
166
|
+
chunks_to_yield = math.ceil(self._length / chunk_size)
|
167
|
+
for chunk_idx in range(chunks_to_yield - 1):
|
168
|
+
yield self._buffer.get_data(start=self.start+chunk_idx*chunk_size, length=chunk_size)
|
169
|
+
data_already_yielded = (chunks_to_yield - 1)*chunk_size
|
170
|
+
yield self._buffer.get_data(start=self.start+data_already_yielded,
|
171
|
+
length=self._length - data_already_yielded)
|
biolib/cli/auth.py
CHANGED
@@ -52,7 +52,7 @@ def whoami() -> None:
|
|
52
52
|
email = user_dict['email']
|
53
53
|
intrinsic_account = [account for account in user_dict['accounts'] if account['role'] == 'intrinsic'][0]
|
54
54
|
display_name = intrinsic_account['display_name']
|
55
|
-
print(f'Name: {display_name}\nEmail: {email}')
|
55
|
+
print(f'Name: {display_name}\nEmail: {email}\nLogged into: {client.base_url}')
|
56
56
|
else:
|
57
57
|
print('Not logged in', file=sys.stderr)
|
58
58
|
exit(1)
|
biolib/cli/data_record.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
+
import json
|
1
2
|
import logging
|
2
3
|
import os
|
4
|
+
from typing import Dict, List
|
3
5
|
|
4
6
|
import click
|
5
7
|
|
6
|
-
from biolib.
|
8
|
+
from biolib._data_record.data_record import DataRecord
|
9
|
+
from biolib.biolib_api_client import BiolibApiClient
|
7
10
|
from biolib.biolib_logging import logger, logger_no_user_data
|
8
11
|
from biolib.typing_utils import Optional
|
9
12
|
|
@@ -15,11 +18,19 @@ def data_record() -> None:
|
|
15
18
|
|
16
19
|
|
17
20
|
@data_record.command(help='Create a Data Record')
|
18
|
-
@click.
|
21
|
+
@click.argument('uri', required=True)
|
22
|
+
@click.option('--data-path', required=True, type=click.Path(exists=True))
|
23
|
+
@click.option('--record-type', required=False, type=str, default=None)
|
24
|
+
def create(uri: str, data_path: str, record_type: Optional[str]) -> None:
|
25
|
+
DataRecord.create(destination=uri, data_path=data_path, record_type=record_type)
|
26
|
+
|
27
|
+
|
28
|
+
@data_record.command(help='Update a Data Record')
|
29
|
+
@click.argument('uri', required=True)
|
19
30
|
@click.option('--data-path', required=True, type=click.Path(exists=True))
|
20
|
-
@click.option('--
|
21
|
-
def
|
22
|
-
DataRecord.
|
31
|
+
@click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
|
32
|
+
def update(uri: str, data_path: str, chunk_size: Optional[int]) -> None:
|
33
|
+
DataRecord.get_by_uri(uri=uri).update(data_path=data_path, chunk_size_in_mb=chunk_size)
|
23
34
|
|
24
35
|
|
25
36
|
@data_record.command(help='Download files from a Data Record')
|
@@ -27,7 +38,7 @@ def create(destination: str, data_path: str, name: Optional[str] = None) -> None
|
|
27
38
|
@click.option('--file', required=False, type=str)
|
28
39
|
@click.option('--path-filter', required=False, type=str, hide_input=True)
|
29
40
|
def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
|
30
|
-
record = DataRecord(uri=uri)
|
41
|
+
record = DataRecord.get_by_uri(uri=uri)
|
31
42
|
if file is not None:
|
32
43
|
try:
|
33
44
|
file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file][0]
|
@@ -41,3 +52,31 @@ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
|
|
41
52
|
else:
|
42
53
|
assert not os.path.exists(record.name), f'Directory with name {record.name} already exists in current directory'
|
43
54
|
record.save_files(output_dir=record.name, path_filter=path_filter)
|
55
|
+
|
56
|
+
|
57
|
+
@data_record.command(help='Describe a Data Record')
|
58
|
+
@click.argument('uri', required=True)
|
59
|
+
@click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
|
60
|
+
def describe(uri: str, output_as_json: bool) -> None:
|
61
|
+
BiolibApiClient.assert_is_signed_in(authenticated_action_description='get Data Record description')
|
62
|
+
record = DataRecord.get_by_uri(uri)
|
63
|
+
files_info: List[Dict] = []
|
64
|
+
total_size_in_bytes = 0
|
65
|
+
for file in record.list_files():
|
66
|
+
files_info.append({'path': file.path, 'size_bytes': file.length})
|
67
|
+
total_size_in_bytes += file.length
|
68
|
+
|
69
|
+
if output_as_json:
|
70
|
+
print(
|
71
|
+
json.dumps(
|
72
|
+
obj={'uri': record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
|
73
|
+
indent=4,
|
74
|
+
)
|
75
|
+
)
|
76
|
+
else:
|
77
|
+
print(f'Data Record {record.uri}\ntotal {total_size_in_bytes} bytes\n')
|
78
|
+
print('size bytes path')
|
79
|
+
for file_info in files_info:
|
80
|
+
size_string = str(file_info['size_bytes'])
|
81
|
+
leading_space_string = ' ' * (10 - len(size_string))
|
82
|
+
print(f"{leading_space_string}{size_string} {file_info['path']}")
|
biolib/cli/lfs.py
CHANGED
@@ -7,9 +7,9 @@ from typing import Dict, List
|
|
7
7
|
import click
|
8
8
|
|
9
9
|
from biolib import biolib_errors
|
10
|
-
from biolib.
|
10
|
+
from biolib._data_record.data_record import DataRecord
|
11
|
+
from biolib._internal.lfs import prune_lfs_cache
|
11
12
|
from biolib.biolib_logging import logger, logger_no_user_data
|
12
|
-
from biolib.lfs import create_large_file_system, prune_lfs_cache, push_large_file_system
|
13
13
|
from biolib.typing_utils import Optional
|
14
14
|
|
15
15
|
|
@@ -21,9 +21,10 @@ def lfs() -> None:
|
|
21
21
|
@lfs.command(help='Create a Large File System')
|
22
22
|
@click.argument('uri', required=True)
|
23
23
|
def create(uri: str) -> None:
|
24
|
+
logger.warning('This is command deprecated, please use "biolib data-record create" instead.')
|
24
25
|
logger.configure(default_log_level=logging.INFO)
|
25
26
|
logger_no_user_data.configure(default_log_level=logging.INFO)
|
26
|
-
|
27
|
+
DataRecord.create(destination=uri)
|
27
28
|
|
28
29
|
|
29
30
|
@lfs.command(help='Push a new version of a Large File System')
|
@@ -31,10 +32,11 @@ def create(uri: str) -> None:
|
|
31
32
|
@click.option('--path', required=True, type=click.Path(exists=True))
|
32
33
|
@click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
|
33
34
|
def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
|
35
|
+
logger.warning('This is command deprecated, please use "biolib data-record update" instead.')
|
34
36
|
logger.configure(default_log_level=logging.INFO)
|
35
37
|
logger_no_user_data.configure(default_log_level=logging.INFO)
|
36
38
|
try:
|
37
|
-
|
39
|
+
DataRecord.get_by_uri(uri=uri).update(data_path=path, chunk_size_in_mb=chunk_size)
|
38
40
|
except biolib_errors.BioLibError as error:
|
39
41
|
print(f'An error occurred:\n{error.message}', file=sys.stderr)
|
40
42
|
exit(1)
|
@@ -44,10 +46,11 @@ def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
|
|
44
46
|
@click.argument('uri', required=True)
|
45
47
|
@click.option('--file-path', required=True, type=str)
|
46
48
|
def download_file(uri: str, file_path: str) -> None:
|
49
|
+
logger.warning('This is command deprecated, please use "biolib data-record download" instead.')
|
47
50
|
logger.configure(default_log_level=logging.INFO)
|
48
51
|
logger_no_user_data.configure(default_log_level=logging.INFO)
|
49
52
|
try:
|
50
|
-
record = DataRecord(uri=uri)
|
53
|
+
record = DataRecord.get_by_uri(uri=uri)
|
51
54
|
try:
|
52
55
|
file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file_path][0]
|
53
56
|
except IndexError:
|
@@ -66,7 +69,8 @@ def download_file(uri: str, file_path: str) -> None:
|
|
66
69
|
@click.argument('uri', required=True)
|
67
70
|
@click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
|
68
71
|
def describe(uri: str, output_as_json: bool) -> None:
|
69
|
-
|
72
|
+
logger.warning('This is command deprecated, please use "biolib data-record describe" instead.')
|
73
|
+
data_record = DataRecord.get_by_uri(uri)
|
70
74
|
files_info: List[Dict] = []
|
71
75
|
total_size_in_bytes = 0
|
72
76
|
for file in data_record.list_files():
|
@@ -7,11 +7,11 @@ import time
|
|
7
7
|
from datetime import datetime
|
8
8
|
from socket import gethostbyname, gethostname
|
9
9
|
|
10
|
-
from biolib import
|
11
|
-
from biolib.biolib_logging import logger_no_user_data
|
12
|
-
from biolib.typing_utils import Optional, List, Dict, cast
|
10
|
+
from biolib import api, utils
|
13
11
|
from biolib.biolib_api_client import BiolibApiClient
|
14
|
-
from biolib.
|
12
|
+
from biolib.biolib_logging import logger_no_user_data
|
13
|
+
from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo, ShutdownTimes, WebserverConfig
|
14
|
+
from biolib.typing_utils import Dict, List, Optional, cast
|
15
15
|
|
16
16
|
|
17
17
|
def trust_ceritificates(certs_data: List[str]) -> None:
|
@@ -54,15 +54,12 @@ class CloudUtils:
|
|
54
54
|
pybiolib_version=utils.BIOLIB_PACKAGE_VERSION,
|
55
55
|
),
|
56
56
|
base_url=CloudUtils._get_environment_variable_or_fail('BIOLIB_BASE_URL'),
|
57
|
-
s3_general_storage_bucket_name=CloudUtils._get_environment_variable_or_fail(
|
58
|
-
'BIOLIB_S3_GENERAL_STORAGE_BUCKET_NAME',
|
59
|
-
),
|
60
57
|
is_dev=os.environ.get('BIOLIB_DEV') == 'TRUE',
|
61
58
|
shutdown_times=ShutdownTimes(
|
62
59
|
auto_shutdown_time_in_seconds=CloudUtils._get_environment_variable_as_int(
|
63
60
|
'BIOLIB_CLOUD_AUTO_SHUTDOWN_TIME_IN_SECONDS'
|
64
61
|
),
|
65
|
-
)
|
62
|
+
),
|
66
63
|
)
|
67
64
|
|
68
65
|
return CloudUtils._webserver_config
|
@@ -84,7 +81,7 @@ class CloudUtils:
|
|
84
81
|
except BaseException as error_object:
|
85
82
|
logger_no_user_data.error(f'Failed to deregister got error: {error_object}')
|
86
83
|
else:
|
87
|
-
logger_no_user_data.error(
|
84
|
+
logger_no_user_data.error('Not deregistering as environment is not cloud')
|
88
85
|
|
89
86
|
@staticmethod
|
90
87
|
def shutdown() -> None:
|
@@ -98,7 +95,7 @@ class CloudUtils:
|
|
98
95
|
except Exception as error: # pylint: disable=broad-except
|
99
96
|
logger_no_user_data.error(f'Failed to shutdown got error: {error}')
|
100
97
|
else:
|
101
|
-
logger_no_user_data.error(
|
98
|
+
logger_no_user_data.error('Not running shutdown as environment is not cloud')
|
102
99
|
|
103
100
|
@staticmethod
|
104
101
|
def deregister_and_shutdown() -> None:
|
@@ -131,7 +128,7 @@ class CloudUtils:
|
|
131
128
|
'auth_token': config['compute_node_info']['auth_token'],
|
132
129
|
'cloud_job_id': cloud_job_id,
|
133
130
|
'system_exception_code': system_exception_code,
|
134
|
-
'exit_code': exit_code
|
131
|
+
'exit_code': exit_code,
|
135
132
|
},
|
136
133
|
)
|
137
134
|
except BaseException as error:
|
@@ -152,14 +149,14 @@ class CloudUtils:
|
|
152
149
|
data=cast(Dict[str, str], compute_node_info),
|
153
150
|
)
|
154
151
|
if response.status_code != 201:
|
155
|
-
raise Exception(
|
152
|
+
raise Exception('Non 201 error code')
|
156
153
|
else:
|
157
|
-
logger_no_user_data.info(
|
154
|
+
logger_no_user_data.info('Compute node registered!')
|
158
155
|
response_data = response.json()
|
159
|
-
logger_no_user_data.info(f
|
156
|
+
logger_no_user_data.info(f'Got data on register: {json.dumps(response_data)}')
|
160
157
|
certs = []
|
161
|
-
for federation in response_data[
|
162
|
-
for cert_b64 in federation[
|
158
|
+
for federation in response_data['federation']:
|
159
|
+
for cert_b64 in federation['certs_b64']:
|
163
160
|
certs.append(base64.b64decode(cert_b64).decode())
|
164
161
|
trust_ceritificates(certs)
|
165
162
|
|