pybiolib 1.1.1881__py3-none-any.whl → 1.2.7.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. biolib/__init__.py +11 -4
  2. biolib/_data_record/data_record.py +278 -0
  3. biolib/_internal/data_record/__init__.py +1 -1
  4. biolib/_internal/data_record/data_record.py +97 -151
  5. biolib/_internal/data_record/remote_storage_endpoint.py +18 -7
  6. biolib/_internal/file_utils.py +77 -0
  7. biolib/_internal/fuse_mount/__init__.py +1 -0
  8. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  9. biolib/_internal/http_client.py +31 -9
  10. biolib/_internal/lfs/__init__.py +1 -0
  11. biolib/_internal/libs/__init__.py +1 -0
  12. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  13. biolib/_internal/push_application.py +6 -1
  14. biolib/_internal/runtime.py +3 -56
  15. biolib/_internal/types/__init__.py +4 -0
  16. biolib/_internal/types/app.py +9 -0
  17. biolib/_internal/types/data_record.py +40 -0
  18. biolib/_internal/types/experiment.py +10 -0
  19. biolib/_internal/types/resource.py +14 -0
  20. biolib/_internal/types/typing.py +7 -0
  21. biolib/_internal/utils/multinode.py +264 -0
  22. biolib/_runtime/runtime.py +84 -0
  23. biolib/api/__init__.py +1 -0
  24. biolib/api/client.py +39 -17
  25. biolib/app/app.py +34 -71
  26. biolib/biolib_api_client/api_client.py +9 -2
  27. biolib/biolib_api_client/app_types.py +3 -2
  28. biolib/biolib_api_client/biolib_job_api.py +6 -0
  29. biolib/biolib_api_client/job_types.py +4 -4
  30. biolib/biolib_api_client/lfs_types.py +8 -2
  31. biolib/biolib_binary_format/remote_endpoints.py +12 -10
  32. biolib/biolib_binary_format/utils.py +23 -3
  33. biolib/cli/auth.py +1 -1
  34. biolib/cli/data_record.py +45 -6
  35. biolib/cli/lfs.py +10 -6
  36. biolib/compute_node/cloud_utils/cloud_utils.py +13 -16
  37. biolib/compute_node/job_worker/executors/docker_executor.py +127 -108
  38. biolib/compute_node/job_worker/job_storage.py +17 -5
  39. biolib/compute_node/job_worker/job_worker.py +25 -15
  40. biolib/compute_node/remote_host_proxy.py +72 -84
  41. biolib/compute_node/webserver/webserver_types.py +0 -1
  42. biolib/compute_node/webserver/worker_thread.py +42 -39
  43. biolib/experiments/experiment.py +75 -44
  44. biolib/jobs/job.py +98 -19
  45. biolib/jobs/job_result.py +46 -21
  46. biolib/jobs/types.py +1 -1
  47. biolib/runtime/__init__.py +2 -1
  48. biolib/sdk/__init__.py +18 -7
  49. biolib/typing_utils.py +2 -7
  50. biolib/user/sign_in.py +2 -2
  51. biolib/utils/seq_util.py +38 -35
  52. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/METADATA +1 -1
  53. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/RECORD +57 -45
  54. biolib/experiments/types.py +0 -9
  55. biolib/lfs/__init__.py +0 -4
  56. biolib/lfs/utils.py +0 -153
  57. /biolib/{lfs → _internal/lfs}/cache.py +0 -0
  58. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/LICENSE +0 -0
  59. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/WHEEL +0 -0
  60. {pybiolib-1.1.1881.dist-info → pybiolib-1.2.7.dev0.dist-info}/entry_points.txt +0 -0
biolib/app/app.py CHANGED
@@ -1,29 +1,26 @@
1
- import os
2
1
  import io
3
- import random
4
2
  import json
3
+ import os
4
+ import random
5
5
  import string
6
-
7
6
  from pathlib import Path
7
+
8
8
  from biolib import utils
9
- from biolib.compute_node.job_worker.job_storage import JobStorage
10
- from biolib.compute_node.job_worker.job_worker import JobWorker
11
- from biolib.experiments.experiment import Experiment
12
- from biolib.jobs import Job
13
- from biolib.typing_utils import Optional, cast
14
- from biolib.biolib_api_client import CreatedJobDict, JobState
15
- from biolib.jobs.types import JobDict
9
+ from biolib.biolib_api_client import JobState
16
10
  from biolib.biolib_api_client.app_types import App, AppVersion
17
- from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
18
11
  from biolib.biolib_api_client.biolib_app_api import BiolibAppApi
12
+ from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
19
13
  from biolib.biolib_binary_format import ModuleInput
20
14
  from biolib.biolib_errors import BioLibError
21
15
  from biolib.biolib_logging import logger
16
+ from biolib.compute_node.job_worker.job_worker import JobWorker
17
+ from biolib.experiments.experiment import Experiment
18
+ from biolib.jobs import Job
19
+ from biolib.typing_utils import Optional
22
20
  from biolib.utils.app_uri import parse_app_uri
23
21
 
24
22
 
25
23
  class BioLibApp:
26
-
27
24
  def __init__(self, uri: str):
28
25
  app_response = BiolibAppApi.get_by_uri(uri)
29
26
  self._app: App = app_response['app']
@@ -48,17 +45,18 @@ class BioLibApp:
48
45
  return self._app_version
49
46
 
50
47
  def cli(
51
- self,
52
- args=None,
53
- stdin=None,
54
- files=None,
55
- override_command=False,
56
- machine='',
57
- blocking: bool = True,
58
- experiment_id: Optional[str] = None,
59
- result_prefix: Optional[str] = None,
60
- timeout: Optional[int] = None,
61
- notify: bool = False,
48
+ self,
49
+ args=None,
50
+ stdin=None,
51
+ files=None,
52
+ override_command=False,
53
+ machine='',
54
+ blocking: bool = True,
55
+ experiment_id: Optional[str] = None,
56
+ result_prefix: Optional[str] = None,
57
+ timeout: Optional[int] = None,
58
+ notify: bool = False,
59
+ machine_count: Optional[int] = None,
62
60
  ) -> Job:
63
61
  if not experiment_id:
64
62
  experiment = Experiment.get_experiment_in_context()
@@ -78,7 +76,9 @@ class BioLibApp:
78
76
 
79
77
  return self._run_locally(module_input_serialized)
80
78
 
81
- job = self._start_in_cloud(
79
+ job = Job._start_job_in_cloud( # pylint: disable=protected-access
80
+ app_uri=self._app_uri,
81
+ app_version_uuid=self._app_version['public_id'],
82
82
  experiment_id=experiment_id,
83
83
  machine=machine,
84
84
  module_input_serialized=module_input_serialized,
@@ -86,6 +86,7 @@ class BioLibApp:
86
86
  override_command=override_command,
87
87
  result_prefix=result_prefix,
88
88
  timeout=timeout,
89
+ requested_machine_count=machine_count,
89
90
  )
90
91
  if blocking:
91
92
  # TODO: Deprecate utils.STREAM_STDOUT and always stream logs by simply calling job.stream_logs()
@@ -93,8 +94,8 @@ class BioLibApp:
93
94
  utils.STREAM_STDOUT = True
94
95
 
95
96
  enable_print = bool(
96
- utils.STREAM_STDOUT and
97
- (self._app_version.get('main_output_file') or self._app_version.get('stdout_render_type') == 'text')
97
+ utils.STREAM_STDOUT
98
+ and (self._app_version.get('main_output_file') or self._app_version.get('stdout_render_type') == 'text')
98
99
  )
99
100
  job._stream_logs(enable_print=enable_print) # pylint: disable=protected-access
100
101
 
@@ -108,11 +109,11 @@ class BioLibApp:
108
109
  self.cli()
109
110
 
110
111
  else:
111
- raise BioLibError('''
112
+ raise BioLibError("""
112
113
  Calling an app directly with app() is currently being reworked.
113
114
  To use the previous functionality, please call app.cli() instead.
114
115
  Example: "app.cli('--help')"
115
- ''')
116
+ """)
116
117
 
117
118
  @staticmethod
118
119
  def _get_serialized_module_input(args=None, stdin=None, files=None) -> bytes:
@@ -142,9 +143,9 @@ Example: "app.cli('--help')"
142
143
  args[idx] = Path(arg).name
143
144
 
144
145
  # support --myarg=file.txt
145
- elif os.path.isfile(arg.split("=")[-1]) or os.path.isdir(arg.split("=")[-1]):
146
- files.append(arg.split("=")[-1])
147
- args[idx] = arg.split("=")[0] + '=' + Path(arg.split("=")[-1]).name
146
+ elif os.path.isfile(arg.split('=')[-1]) or os.path.isdir(arg.split('=')[-1]):
147
+ files.append(arg.split('=')[-1])
148
+ args[idx] = arg.split('=')[0] + '=' + Path(arg.split('=')[-1]).name
148
149
  else:
149
150
  pass # a normal string arg was given
150
151
  else:
@@ -154,7 +155,7 @@ Example: "app.cli('--help')"
154
155
  elif isinstance(arg, io.BytesIO):
155
156
  file_data = arg.getvalue()
156
157
  else:
157
- raise Exception(f"Unexpected type of argument: {arg}")
158
+ raise Exception(f'Unexpected type of argument: {arg}')
158
159
  files_dict[f'/{tmp_filename}'] = file_data
159
160
  args[idx] = tmp_filename
160
161
 
@@ -192,48 +193,10 @@ Example: "app.cli('--help')"
192
193
  )
193
194
  return module_input_serialized
194
195
 
195
- def _start_in_cloud(
196
- self,
197
- module_input_serialized: bytes,
198
- override_command: bool = False,
199
- machine: Optional[str] = None,
200
- experiment_id: Optional[str] = None,
201
- result_prefix: Optional[str] = None,
202
- timeout: Optional[int] = None,
203
- notify: bool = False,
204
- ) -> Job:
205
- if len(module_input_serialized) < 500_000:
206
- _job_dict = BiolibJobApi.create_job_with_data(
207
- app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix'],
208
- app_version_uuid=self._app_version['public_id'],
209
- arguments_override_command=override_command,
210
- experiment_uuid=experiment_id,
211
- module_input_serialized=module_input_serialized,
212
- notify=notify,
213
- requested_machine=machine,
214
- requested_timeout_seconds=timeout,
215
- result_name_prefix=result_prefix,
216
- )
217
- return Job(cast(JobDict, _job_dict))
218
-
219
- job_dict: CreatedJobDict = BiolibJobApi.create(
220
- app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix'],
221
- app_version_id=self._app_version['public_id'],
222
- experiment_uuid=experiment_id,
223
- machine=machine,
224
- notify=notify,
225
- override_command=override_command,
226
- timeout=timeout,
227
- )
228
- JobStorage.upload_module_input(job=job_dict, module_input_serialized=module_input_serialized)
229
- cloud_job = BiolibJobApi.create_cloud_job(job_id=job_dict['public_id'], result_name_prefix=result_prefix)
230
- logger.debug(f"Cloud: Job created with id {cloud_job['public_id']}")
231
- return Job(cast(JobDict, job_dict))
232
-
233
196
  def _run_locally(self, module_input_serialized: bytes) -> Job:
234
197
  job_dict = BiolibJobApi.create(
235
198
  app_version_id=self._app_version['public_id'],
236
- app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix']
199
+ app_resource_name_prefix=parse_app_uri(self._app_uri)['resource_name_prefix'],
237
200
  )
238
201
  job = Job(job_dict)
239
202
 
@@ -6,7 +6,7 @@ import os
6
6
  from datetime import datetime, timezone
7
7
  from json.decoder import JSONDecodeError
8
8
 
9
- from biolib._internal.runtime import Runtime
9
+ from biolib._runtime.runtime import Runtime
10
10
  from biolib._internal.http_client import HttpClient
11
11
  from biolib.typing_utils import Optional
12
12
  from biolib.biolib_errors import BioLibError
@@ -192,9 +192,16 @@ class BiolibApiClient:
192
192
  api_client.refresh_access_token()
193
193
 
194
194
  @staticmethod
195
- def assert_is_signed_in(authenticated_action_description: str) -> None:
195
+ def is_reauthentication_needed() -> bool:
196
196
  api_client = BiolibApiClient.get()
197
197
  if not api_client.is_signed_in and not Runtime.check_is_environment_biolib_app():
198
+ return True
199
+ else:
200
+ return False
201
+
202
+ @staticmethod
203
+ def assert_is_signed_in(authenticated_action_description: str) -> None:
204
+ if BiolibApiClient.is_reauthentication_needed():
198
205
  raise BioLibError(
199
206
  f'You must be signed in to {authenticated_action_description}. '
200
207
  f'Please set the environment variable "BIOLIB_TOKEN"'
@@ -1,7 +1,7 @@
1
1
  from enum import Enum
2
2
 
3
- from biolib.typing_utils import TypedDict, List, Optional, Dict, Literal
4
3
  from biolib.biolib_api_client.common_types import SemanticVersion
4
+ from biolib.typing_utils import Dict, List, Literal, Optional, TypedDict
5
5
 
6
6
 
7
7
  class AppVersionSlim(SemanticVersion):
@@ -16,6 +16,7 @@ class AppVersion(AppVersionSlim):
16
16
  source_code_license: str
17
17
  stdout_render_type: Literal['text', 'markdown']
18
18
  main_output_file: Optional[str]
19
+ app_uri: str
19
20
 
20
21
 
21
22
  class App(TypedDict):
@@ -31,6 +32,7 @@ class App(TypedDict):
31
32
  public_id: str
32
33
  state: str
33
34
  resource_uri: str
35
+ type: str
34
36
 
35
37
 
36
38
  class AppGetResponse(TypedDict):
@@ -99,7 +101,6 @@ class _AppVersionOnJob(TypedDict):
99
101
 
100
102
  class AppOnJob(TypedDict):
101
103
  allow_client_side_execution: bool
102
- can_push_data_record_for_user: bool
103
104
  state: Literal['public', 'draft']
104
105
 
105
106
 
@@ -46,6 +46,7 @@ class BiolibJobApi:
46
46
  experiment_uuid: Optional[str] = None,
47
47
  timeout: Optional[int] = None,
48
48
  notify: bool = False,
49
+ requested_machine_count: Optional[int] = None,
49
50
  ):
50
51
  data = {
51
52
  'app_version_id': app_version_id,
@@ -73,6 +74,9 @@ class BiolibJobApi:
73
74
  'requested_machine': machine
74
75
  })
75
76
 
77
+ if requested_machine_count:
78
+ data.update({'requested_machine_count': requested_machine_count})
79
+
76
80
  if experiment_uuid:
77
81
  data['experiment_uuid'] = experiment_uuid
78
82
 
@@ -156,6 +160,7 @@ class BiolibJobApi:
156
160
  caller_job_uuid: Optional[str] = None,
157
161
  requested_timeout_seconds: Optional[int] = None,
158
162
  notify: bool = False,
163
+ requested_machine_count: Optional[int] = None,
159
164
  ) -> Dict:
160
165
  job_dict: Dict = biolib.api.client.post(
161
166
  path='/jobs/create_job_with_data/',
@@ -171,6 +176,7 @@ class BiolibJobApi:
171
176
  'client-version': BIOLIB_PACKAGE_VERSION,
172
177
  'experiment-uuid': experiment_uuid,
173
178
  'requested-machine': requested_machine,
179
+ 'requested-machine-count': str(requested_machine_count) if requested_machine_count else None,
174
180
  'result-name-prefix': result_name_prefix,
175
181
  'requested-timeout-seconds': str(requested_timeout_seconds) if requested_timeout_seconds else None,
176
182
  'notify': 'true' if notify else 'false',
@@ -1,9 +1,8 @@
1
1
  from enum import Enum
2
2
 
3
- from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo
4
- from biolib.typing_utils import TypedDict, Optional, List
5
-
6
3
  from biolib.biolib_api_client.app_types import AppVersionOnJob, RemoteHost
4
+ from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo
5
+ from biolib.typing_utils import List, Optional, TypedDict
7
6
 
8
7
 
9
8
  class JobState(Enum):
@@ -15,6 +14,7 @@ class JobState(Enum):
15
14
 
16
15
 
17
16
  class _Job(TypedDict):
17
+ app_uri: str
18
18
  app_version: AppVersionOnJob
19
19
  arguments_override_command: bool
20
20
  auth_token: str
@@ -22,10 +22,10 @@ class _Job(TypedDict):
22
22
  created_at: str
23
23
  federated_job_uuid: Optional[str]
24
24
  public_id: str
25
- uuid: str
26
25
  remote_hosts_with_warning: List[RemoteHost]
27
26
  state: str
28
27
  user_id: Optional[str]
28
+ uuid: str
29
29
 
30
30
 
31
31
  # type optional keys with total=False
@@ -1,13 +1,19 @@
1
1
  from biolib.typing_utils import TypedDict
2
2
 
3
3
 
4
- class LargeFileSystemVersion(TypedDict):
4
+ class DataRecordVersion(TypedDict):
5
5
  presigned_download_url: str
6
6
  size_bytes: int
7
7
  uri: str
8
8
  uuid: str
9
9
 
10
10
 
11
- class LargeFileSystem(TypedDict):
11
+ class DataRecordInfo(TypedDict):
12
12
  uri: str
13
13
  uuid: str
14
+
15
+
16
+ class DataRecordVersionInfo(TypedDict):
17
+ resource_uri: str
18
+ resource_uuid: str
19
+ resource_version_uuid: str
@@ -1,25 +1,27 @@
1
1
  from datetime import datetime, timedelta
2
- # from urllib.parse import urlparse, parse_qs
3
-
4
- from biolib.biolib_logging import logger
5
2
 
6
3
  from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
7
4
  from biolib.biolib_binary_format.utils import RemoteEndpoint
8
5
 
6
+ # from urllib.parse import urlparse, parse_qs
7
+ from biolib.biolib_logging import logger
8
+ from biolib.typing_utils import Literal
9
+
9
10
 
10
- class RemoteJobStorageResultEndpoint(RemoteEndpoint):
11
- def __init__(self, job_id: str, job_auth_token: str):
12
- self._job_id = job_id
13
- self._job_auth_token = job_auth_token
11
+ class RemoteJobStorageEndpoint(RemoteEndpoint):
12
+ def __init__(self, job_uuid: str, job_auth_token: str, storage_type: Literal['input', 'output']):
14
13
  self._expires_at = None
14
+ self._job_auth_token = job_auth_token
15
+ self._job_uuid = job_uuid
15
16
  self._presigned_url = None
17
+ self._storage_type: Literal['input', 'output'] = storage_type
16
18
 
17
19
  def get_remote_url(self):
18
20
  if not self._presigned_url or datetime.utcnow() > self._expires_at:
19
21
  self._presigned_url = BiolibJobApi.get_job_storage_download_url(
20
22
  job_auth_token=self._job_auth_token,
21
- job_uuid=self._job_id,
22
- storage_type='results'
23
+ job_uuid=self._job_uuid,
24
+ storage_type='results' if self._storage_type == 'output' else 'input',
23
25
  )
24
26
  self._expires_at = datetime.utcnow() + timedelta(minutes=8)
25
27
  # TODO: Use expires at from url
@@ -27,6 +29,6 @@ class RemoteJobStorageResultEndpoint(RemoteEndpoint):
27
29
  # query_params = parse_qs(parsed_url.query)
28
30
  # time_at_generation = datetime.datetime.strptime(query_params['X-Amz-Date'][0], '%Y%m%dT%H%M%SZ')
29
31
  # self._expires_at = time_at_generation + timedelta(seconds=int(query_params['X-Amz-Expires'][0]))
30
- logger.debug(f'Job "{self._job_id}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
32
+ logger.debug(f'Job "{self._job_uuid}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
31
33
 
32
34
  return self._presigned_url
@@ -1,7 +1,8 @@
1
1
  from abc import ABC, abstractmethod
2
2
  import io
3
+ import math
3
4
  from typing import Optional, Callable
4
-
5
+ from biolib.typing_utils import Iterator
5
6
  from biolib._internal.http_client import HttpClient
6
7
 
7
8
 
@@ -147,5 +148,24 @@ class LazyLoadedFile:
147
148
  def get_file_handle(self) -> io.BufferedIOBase:
148
149
  return io.BytesIO(self.get_data())
149
150
 
150
- def get_data(self) -> bytes:
151
- return self._buffer.get_data(start=self.start, length=self._length)
151
+ def get_data(self, start=0, length=None) -> bytes:
152
+ start_offset = start + self.start
153
+ # make sure length doesn't go outside file boundaries
154
+ length_to_end_of_file = max(self._length - start, 0)
155
+ if length is None:
156
+ length_to_request = length_to_end_of_file
157
+ else:
158
+ length_to_request = min(length, length_to_end_of_file)
159
+ return self._buffer.get_data(start=start_offset, length=length_to_request)
160
+
161
+ def get_data_iterator(self) -> Iterator[bytes]:
162
+ if self._length == 0:
163
+ yield b''
164
+ else:
165
+ chunk_size = 10_000_000
166
+ chunks_to_yield = math.ceil(self._length / chunk_size)
167
+ for chunk_idx in range(chunks_to_yield - 1):
168
+ yield self._buffer.get_data(start=self.start+chunk_idx*chunk_size, length=chunk_size)
169
+ data_already_yielded = (chunks_to_yield - 1)*chunk_size
170
+ yield self._buffer.get_data(start=self.start+data_already_yielded,
171
+ length=self._length - data_already_yielded)
biolib/cli/auth.py CHANGED
@@ -52,7 +52,7 @@ def whoami() -> None:
52
52
  email = user_dict['email']
53
53
  intrinsic_account = [account for account in user_dict['accounts'] if account['role'] == 'intrinsic'][0]
54
54
  display_name = intrinsic_account['display_name']
55
- print(f'Name: {display_name}\nEmail: {email}')
55
+ print(f'Name: {display_name}\nEmail: {email}\nLogged into: {client.base_url}')
56
56
  else:
57
57
  print('Not logged in', file=sys.stderr)
58
58
  exit(1)
biolib/cli/data_record.py CHANGED
@@ -1,9 +1,12 @@
1
+ import json
1
2
  import logging
2
3
  import os
4
+ from typing import Dict, List
3
5
 
4
6
  import click
5
7
 
6
- from biolib._internal.data_record import DataRecord
8
+ from biolib._data_record.data_record import DataRecord
9
+ from biolib.biolib_api_client import BiolibApiClient
7
10
  from biolib.biolib_logging import logger, logger_no_user_data
8
11
  from biolib.typing_utils import Optional
9
12
 
@@ -15,11 +18,19 @@ def data_record() -> None:
15
18
 
16
19
 
17
20
  @data_record.command(help='Create a Data Record')
18
- @click.option('--destination', type=str, required=True)
21
+ @click.argument('uri', required=True)
22
+ @click.option('--data-path', required=True, type=click.Path(exists=True))
23
+ @click.option('--record-type', required=False, type=str, default=None)
24
+ def create(uri: str, data_path: str, record_type: Optional[str]) -> None:
25
+ DataRecord.create(destination=uri, data_path=data_path, record_type=record_type)
26
+
27
+
28
+ @data_record.command(help='Update a Data Record')
29
+ @click.argument('uri', required=True)
19
30
  @click.option('--data-path', required=True, type=click.Path(exists=True))
20
- @click.option('--name', type=str, required=False)
21
- def create(destination: str, data_path: str, name: Optional[str] = None) -> None:
22
- DataRecord.create(destination, data_path, name)
31
+ @click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
32
+ def update(uri: str, data_path: str, chunk_size: Optional[int]) -> None:
33
+ DataRecord.get_by_uri(uri=uri).update(data_path=data_path, chunk_size_in_mb=chunk_size)
23
34
 
24
35
 
25
36
  @data_record.command(help='Download files from a Data Record')
@@ -27,7 +38,7 @@ def create(destination: str, data_path: str, name: Optional[str] = None) -> None
27
38
  @click.option('--file', required=False, type=str)
28
39
  @click.option('--path-filter', required=False, type=str, hide_input=True)
29
40
  def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
30
- record = DataRecord(uri=uri)
41
+ record = DataRecord.get_by_uri(uri=uri)
31
42
  if file is not None:
32
43
  try:
33
44
  file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file][0]
@@ -41,3 +52,31 @@ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
41
52
  else:
42
53
  assert not os.path.exists(record.name), f'Directory with name {record.name} already exists in current directory'
43
54
  record.save_files(output_dir=record.name, path_filter=path_filter)
55
+
56
+
57
+ @data_record.command(help='Describe a Data Record')
58
+ @click.argument('uri', required=True)
59
+ @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
60
+ def describe(uri: str, output_as_json: bool) -> None:
61
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='get Data Record description')
62
+ record = DataRecord.get_by_uri(uri)
63
+ files_info: List[Dict] = []
64
+ total_size_in_bytes = 0
65
+ for file in record.list_files():
66
+ files_info.append({'path': file.path, 'size_bytes': file.length})
67
+ total_size_in_bytes += file.length
68
+
69
+ if output_as_json:
70
+ print(
71
+ json.dumps(
72
+ obj={'uri': record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
73
+ indent=4,
74
+ )
75
+ )
76
+ else:
77
+ print(f'Data Record {record.uri}\ntotal {total_size_in_bytes} bytes\n')
78
+ print('size bytes path')
79
+ for file_info in files_info:
80
+ size_string = str(file_info['size_bytes'])
81
+ leading_space_string = ' ' * (10 - len(size_string))
82
+ print(f"{leading_space_string}{size_string} {file_info['path']}")
biolib/cli/lfs.py CHANGED
@@ -7,9 +7,9 @@ from typing import Dict, List
7
7
  import click
8
8
 
9
9
  from biolib import biolib_errors
10
- from biolib._internal.data_record import DataRecord
10
+ from biolib._data_record.data_record import DataRecord
11
+ from biolib._internal.lfs import prune_lfs_cache
11
12
  from biolib.biolib_logging import logger, logger_no_user_data
12
- from biolib.lfs import create_large_file_system, prune_lfs_cache, push_large_file_system
13
13
  from biolib.typing_utils import Optional
14
14
 
15
15
 
@@ -21,9 +21,10 @@ def lfs() -> None:
21
21
  @lfs.command(help='Create a Large File System')
22
22
  @click.argument('uri', required=True)
23
23
  def create(uri: str) -> None:
24
+ logger.warning('This is command deprecated, please use "biolib data-record create" instead.')
24
25
  logger.configure(default_log_level=logging.INFO)
25
26
  logger_no_user_data.configure(default_log_level=logging.INFO)
26
- create_large_file_system(lfs_uri=uri)
27
+ DataRecord.create(destination=uri)
27
28
 
28
29
 
29
30
  @lfs.command(help='Push a new version of a Large File System')
@@ -31,10 +32,11 @@ def create(uri: str) -> None:
31
32
  @click.option('--path', required=True, type=click.Path(exists=True))
32
33
  @click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
33
34
  def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
35
+ logger.warning('This is command deprecated, please use "biolib data-record update" instead.')
34
36
  logger.configure(default_log_level=logging.INFO)
35
37
  logger_no_user_data.configure(default_log_level=logging.INFO)
36
38
  try:
37
- push_large_file_system(lfs_uri=uri, input_dir=path, chunk_size_in_mb=chunk_size)
39
+ DataRecord.get_by_uri(uri=uri).update(data_path=path, chunk_size_in_mb=chunk_size)
38
40
  except biolib_errors.BioLibError as error:
39
41
  print(f'An error occurred:\n{error.message}', file=sys.stderr)
40
42
  exit(1)
@@ -44,10 +46,11 @@ def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
44
46
  @click.argument('uri', required=True)
45
47
  @click.option('--file-path', required=True, type=str)
46
48
  def download_file(uri: str, file_path: str) -> None:
49
+ logger.warning('This is command deprecated, please use "biolib data-record download" instead.')
47
50
  logger.configure(default_log_level=logging.INFO)
48
51
  logger_no_user_data.configure(default_log_level=logging.INFO)
49
52
  try:
50
- record = DataRecord(uri=uri)
53
+ record = DataRecord.get_by_uri(uri=uri)
51
54
  try:
52
55
  file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file_path][0]
53
56
  except IndexError:
@@ -66,7 +69,8 @@ def download_file(uri: str, file_path: str) -> None:
66
69
  @click.argument('uri', required=True)
67
70
  @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
68
71
  def describe(uri: str, output_as_json: bool) -> None:
69
- data_record = DataRecord(uri)
72
+ logger.warning('This is command deprecated, please use "biolib data-record describe" instead.')
73
+ data_record = DataRecord.get_by_uri(uri)
70
74
  files_info: List[Dict] = []
71
75
  total_size_in_bytes = 0
72
76
  for file in data_record.list_files():
@@ -7,11 +7,11 @@ import time
7
7
  from datetime import datetime
8
8
  from socket import gethostbyname, gethostname
9
9
 
10
- from biolib import utils, api
11
- from biolib.biolib_logging import logger_no_user_data
12
- from biolib.typing_utils import Optional, List, Dict, cast
10
+ from biolib import api, utils
13
11
  from biolib.biolib_api_client import BiolibApiClient
14
- from biolib.compute_node.webserver.webserver_types import WebserverConfig, ComputeNodeInfo, ShutdownTimes
12
+ from biolib.biolib_logging import logger_no_user_data
13
+ from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo, ShutdownTimes, WebserverConfig
14
+ from biolib.typing_utils import Dict, List, Optional, cast
15
15
 
16
16
 
17
17
  def trust_ceritificates(certs_data: List[str]) -> None:
@@ -54,15 +54,12 @@ class CloudUtils:
54
54
  pybiolib_version=utils.BIOLIB_PACKAGE_VERSION,
55
55
  ),
56
56
  base_url=CloudUtils._get_environment_variable_or_fail('BIOLIB_BASE_URL'),
57
- s3_general_storage_bucket_name=CloudUtils._get_environment_variable_or_fail(
58
- 'BIOLIB_S3_GENERAL_STORAGE_BUCKET_NAME',
59
- ),
60
57
  is_dev=os.environ.get('BIOLIB_DEV') == 'TRUE',
61
58
  shutdown_times=ShutdownTimes(
62
59
  auto_shutdown_time_in_seconds=CloudUtils._get_environment_variable_as_int(
63
60
  'BIOLIB_CLOUD_AUTO_SHUTDOWN_TIME_IN_SECONDS'
64
61
  ),
65
- )
62
+ ),
66
63
  )
67
64
 
68
65
  return CloudUtils._webserver_config
@@ -84,7 +81,7 @@ class CloudUtils:
84
81
  except BaseException as error_object:
85
82
  logger_no_user_data.error(f'Failed to deregister got error: {error_object}')
86
83
  else:
87
- logger_no_user_data.error("Not deregistering as environment is not cloud")
84
+ logger_no_user_data.error('Not deregistering as environment is not cloud')
88
85
 
89
86
  @staticmethod
90
87
  def shutdown() -> None:
@@ -98,7 +95,7 @@ class CloudUtils:
98
95
  except Exception as error: # pylint: disable=broad-except
99
96
  logger_no_user_data.error(f'Failed to shutdown got error: {error}')
100
97
  else:
101
- logger_no_user_data.error("Not running shutdown as environment is not cloud")
98
+ logger_no_user_data.error('Not running shutdown as environment is not cloud')
102
99
 
103
100
  @staticmethod
104
101
  def deregister_and_shutdown() -> None:
@@ -131,7 +128,7 @@ class CloudUtils:
131
128
  'auth_token': config['compute_node_info']['auth_token'],
132
129
  'cloud_job_id': cloud_job_id,
133
130
  'system_exception_code': system_exception_code,
134
- 'exit_code': exit_code
131
+ 'exit_code': exit_code,
135
132
  },
136
133
  )
137
134
  except BaseException as error:
@@ -152,14 +149,14 @@ class CloudUtils:
152
149
  data=cast(Dict[str, str], compute_node_info),
153
150
  )
154
151
  if response.status_code != 201:
155
- raise Exception("Non 201 error code")
152
+ raise Exception('Non 201 error code')
156
153
  else:
157
- logger_no_user_data.info("Compute node registered!")
154
+ logger_no_user_data.info('Compute node registered!')
158
155
  response_data = response.json()
159
- logger_no_user_data.info(f"Got data on register: {json.dumps(response_data)}")
156
+ logger_no_user_data.info(f'Got data on register: {json.dumps(response_data)}')
160
157
  certs = []
161
- for federation in response_data["federation"]:
162
- for cert_b64 in federation["certs_b64"]:
158
+ for federation in response_data['federation']:
159
+ for cert_b64 in federation['certs_b64']:
163
160
  certs.append(base64.b64decode(cert_b64).decode())
164
161
  trust_ceritificates(certs)
165
162