pybiolib 1.1.1747__py3-none-any.whl → 1.1.2193__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. biolib/__init__.py +18 -5
  2. biolib/_data_record/data_record.py +278 -0
  3. biolib/_internal/data_record/__init__.py +1 -0
  4. biolib/_internal/data_record/data_record.py +97 -0
  5. biolib/_internal/data_record/remote_storage_endpoint.py +38 -0
  6. biolib/_internal/file_utils.py +77 -0
  7. biolib/_internal/fuse_mount/__init__.py +1 -0
  8. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  9. biolib/_internal/http_client.py +42 -23
  10. biolib/_internal/lfs/__init__.py +1 -0
  11. biolib/_internal/libs/__init__.py +1 -0
  12. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  13. biolib/_internal/push_application.py +22 -37
  14. biolib/_internal/runtime.py +19 -0
  15. biolib/_internal/types/__init__.py +4 -0
  16. biolib/_internal/types/app.py +9 -0
  17. biolib/_internal/types/data_record.py +40 -0
  18. biolib/_internal/types/experiment.py +10 -0
  19. biolib/_internal/types/resource.py +14 -0
  20. biolib/_internal/types/typing.py +7 -0
  21. biolib/_internal/utils/__init__.py +18 -0
  22. biolib/_runtime/runtime.py +80 -0
  23. biolib/api/__init__.py +1 -0
  24. biolib/api/client.py +39 -17
  25. biolib/app/app.py +40 -72
  26. biolib/app/search_apps.py +8 -12
  27. biolib/biolib_api_client/api_client.py +22 -10
  28. biolib/biolib_api_client/app_types.py +2 -1
  29. biolib/biolib_api_client/biolib_app_api.py +1 -1
  30. biolib/biolib_api_client/biolib_job_api.py +6 -0
  31. biolib/biolib_api_client/job_types.py +4 -4
  32. biolib/biolib_api_client/lfs_types.py +8 -2
  33. biolib/biolib_binary_format/remote_endpoints.py +12 -10
  34. biolib/biolib_binary_format/utils.py +41 -4
  35. biolib/cli/__init__.py +6 -2
  36. biolib/cli/auth.py +58 -0
  37. biolib/cli/data_record.py +80 -0
  38. biolib/cli/download_container.py +3 -1
  39. biolib/cli/init.py +1 -0
  40. biolib/cli/lfs.py +45 -11
  41. biolib/cli/push.py +1 -1
  42. biolib/cli/run.py +3 -2
  43. biolib/cli/start.py +1 -0
  44. biolib/compute_node/cloud_utils/cloud_utils.py +15 -18
  45. biolib/compute_node/job_worker/cache_state.py +1 -1
  46. biolib/compute_node/job_worker/executors/docker_executor.py +134 -114
  47. biolib/compute_node/job_worker/job_storage.py +3 -4
  48. biolib/compute_node/job_worker/job_worker.py +31 -15
  49. biolib/compute_node/remote_host_proxy.py +75 -70
  50. biolib/compute_node/webserver/webserver_types.py +0 -1
  51. biolib/experiments/experiment.py +75 -44
  52. biolib/jobs/job.py +125 -47
  53. biolib/jobs/job_result.py +46 -21
  54. biolib/jobs/types.py +1 -1
  55. biolib/runtime/__init__.py +14 -1
  56. biolib/sdk/__init__.py +29 -5
  57. biolib/typing_utils.py +2 -7
  58. biolib/user/sign_in.py +10 -14
  59. biolib/utils/__init__.py +1 -1
  60. biolib/utils/app_uri.py +11 -4
  61. biolib/utils/cache_state.py +2 -2
  62. biolib/utils/seq_util.py +38 -30
  63. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
  64. pybiolib-1.1.2193.dist-info/RECORD +123 -0
  65. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +1 -1
  66. biolib/biolib_api_client/biolib_account_api.py +0 -8
  67. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -34
  68. biolib/experiments/types.py +0 -9
  69. biolib/lfs/__init__.py +0 -6
  70. biolib/lfs/utils.py +0 -237
  71. biolib/runtime/results.py +0 -20
  72. pybiolib-1.1.1747.dist-info/RECORD +0 -108
  73. /biolib/{lfs → _internal/lfs}/cache.py +0 -0
  74. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
  75. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0
biolib/app/search_apps.py CHANGED
@@ -7,41 +7,37 @@ from biolib.typing_utils import Optional, List
7
7
  def search_apps(
8
8
  search_query: Optional[str] = None,
9
9
  team: Optional[str] = None,
10
- count: int = 100
11
- ) -> List[str]:
12
-
10
+ count: int = 100,
11
+ ) -> List[str]:
13
12
  query_exceeded_page_size = False
14
13
  params = {
15
14
  'page_size': count,
16
15
  }
17
16
  if team:
18
- if not team.startswith("@"):
19
- team = "@biolib.com/" + team
17
+ if not team.startswith('@'):
18
+ team = '@biolib.com/' + team
20
19
  params['account_handle'] = team
21
20
 
22
21
  if search_query:
23
22
  params['search'] = search_query
24
23
 
25
- apps_json = api.client.get(
26
- path='/apps/',
27
- params=params
28
- ).json()
24
+ apps_json = api.client.get(path='/apps/', params=params).json()
29
25
  if apps_json['count'] > count:
30
26
  query_exceeded_page_size = True
31
27
 
32
28
  apps = [app['resource_uri'] for app in apps_json['results']]
33
29
 
34
- if not utils.BASE_URL_IS_PUBLIC_BIOLIB and (not team or team.lower().startswith("@biolib.com")):
30
+ if not utils.BASE_URL_IS_PUBLIC_BIOLIB and (not team or team.lower().startswith('@biolib.com')):
35
31
  # Also get federated apps if running on enterprise deployment
36
32
  public_biolib_apps_json = api.client.get(
37
33
  authenticate=False,
38
34
  path='https://biolib.com/api/apps/',
39
- params=params
35
+ params=params,
40
36
  ).json()
41
37
  if public_biolib_apps_json['count'] > count:
42
38
  query_exceeded_page_size = True
43
39
 
44
- apps.extend([f'@biolib.com/{app["resource_uri"]}' for app in public_biolib_apps_json['results']])
40
+ apps.extend([f"@biolib.com/{app['resource_uri']}" for app in public_biolib_apps_json['results']])
45
41
 
46
42
  if query_exceeded_page_size:
47
43
  print(f'Search results exceeded {count}, use the argument "count" to increase the amount of results returned')
@@ -6,6 +6,7 @@ import os
6
6
  from datetime import datetime, timezone
7
7
  from json.decoder import JSONDecodeError
8
8
 
9
+ from biolib._runtime.runtime import Runtime
9
10
  from biolib._internal.http_client import HttpClient
10
11
  from biolib.typing_utils import Optional
11
12
  from biolib.biolib_errors import BioLibError
@@ -61,16 +62,18 @@ class _ApiClient:
61
62
  return
62
63
 
63
64
  if self.access_token:
64
- decoded_token = self._decode_jwt_without_checking_signature(self.access_token)
65
+ decoded_token = self.decode_jwt_without_checking_signature(self.access_token)
65
66
  if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60: # 60 second buffer
66
67
  # Token has not expired yet
67
68
  return
68
69
 
69
70
  # TODO: Implement nicer error handling
70
71
  try:
71
- response = HttpClient.request(method='POST',
72
- url=f'{self.base_url}/api/user/token/refresh/',
73
- data={'refresh': self.refresh_token})
72
+ response = HttpClient.request(
73
+ method='POST',
74
+ url=f'{self.base_url}/api/user/token/refresh/',
75
+ data={'refresh': self.refresh_token},
76
+ )
74
77
  except Exception as exception:
75
78
  logger.error('Sign in with refresh token failed')
76
79
  raise exception
@@ -111,9 +114,11 @@ class _ApiClient:
111
114
  def sign_in_with_api_token(self, api_token: str) -> None:
112
115
  logger_no_user_data.debug('ApiClient: Signing in with BIOLIB_TOKEN...')
113
116
  try:
114
- response = HttpClient.request(method='POST',
115
- url=f'{self.base_url}/api/user/api_tokens/exchange/',
116
- data={'token': api_token})
117
+ response = HttpClient.request(
118
+ method='POST',
119
+ url=f'{self.base_url}/api/user/api_tokens/exchange/',
120
+ data={'token': api_token},
121
+ )
117
122
  except Exception as exception:
118
123
  logger.error('Sign in with API token failed')
119
124
  raise exception
@@ -127,7 +132,7 @@ class _ApiClient:
127
132
  self.refresh_token = json_response['refresh_token']
128
133
 
129
134
  @staticmethod
130
- def _decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
135
+ def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
131
136
  jwt_bytes = jwt.encode('utf-8')
132
137
 
133
138
  try:
@@ -187,9 +192,16 @@ class BiolibApiClient:
187
192
  api_client.refresh_access_token()
188
193
 
189
194
  @staticmethod
190
- def assert_is_signed_in(authenticated_action_description: str) -> None:
195
+ def is_reauthentication_needed() -> bool:
191
196
  api_client = BiolibApiClient.get()
192
- if not api_client.is_signed_in:
197
+ if not api_client.is_signed_in and not Runtime.check_is_environment_biolib_app():
198
+ return True
199
+ else:
200
+ return False
201
+
202
+ @staticmethod
203
+ def assert_is_signed_in(authenticated_action_description: str) -> None:
204
+ if BiolibApiClient.is_reauthentication_needed():
193
205
  raise BioLibError(
194
206
  f'You must be signed in to {authenticated_action_description}. '
195
207
  f'Please set the environment variable "BIOLIB_TOKEN"'
@@ -1,7 +1,7 @@
1
1
  from enum import Enum
2
2
 
3
- from biolib.typing_utils import TypedDict, List, Optional, Dict, Literal
4
3
  from biolib.biolib_api_client.common_types import SemanticVersion
4
+ from biolib.typing_utils import Dict, List, Literal, Optional, TypedDict
5
5
 
6
6
 
7
7
  class AppVersionSlim(SemanticVersion):
@@ -16,6 +16,7 @@ class AppVersion(AppVersionSlim):
16
16
  source_code_license: str
17
17
  stdout_render_type: Literal['text', 'markdown']
18
18
  main_output_file: Optional[str]
19
+ app_uri: str
19
20
 
20
21
 
21
22
  class App(TypedDict):
@@ -35,7 +35,7 @@ def encode_multipart(data, files):
35
35
  line_array.append('')
36
36
 
37
37
  data_encoded = b'\r\n'.join([line.encode() if isinstance(line, str) else line for line in line_array])
38
- return 'multipart/form-data; boundary={}'.format(boundary), data_encoded
38
+ return f'multipart/form-data; boundary={boundary}', data_encoded
39
39
 
40
40
 
41
41
  def _get_git_branch_name() -> str:
@@ -46,6 +46,7 @@ class BiolibJobApi:
46
46
  experiment_uuid: Optional[str] = None,
47
47
  timeout: Optional[int] = None,
48
48
  notify: bool = False,
49
+ requested_machine_count: Optional[int] = None,
49
50
  ):
50
51
  data = {
51
52
  'app_version_id': app_version_id,
@@ -73,6 +74,9 @@ class BiolibJobApi:
73
74
  'requested_machine': machine
74
75
  })
75
76
 
77
+ if requested_machine_count:
78
+ data.update({'requested_machine_count': requested_machine_count})
79
+
76
80
  if experiment_uuid:
77
81
  data['experiment_uuid'] = experiment_uuid
78
82
 
@@ -156,6 +160,7 @@ class BiolibJobApi:
156
160
  caller_job_uuid: Optional[str] = None,
157
161
  requested_timeout_seconds: Optional[int] = None,
158
162
  notify: bool = False,
163
+ requested_machine_count: Optional[int] = None,
159
164
  ) -> Dict:
160
165
  job_dict: Dict = biolib.api.client.post(
161
166
  path='/jobs/create_job_with_data/',
@@ -171,6 +176,7 @@ class BiolibJobApi:
171
176
  'client-version': BIOLIB_PACKAGE_VERSION,
172
177
  'experiment-uuid': experiment_uuid,
173
178
  'requested-machine': requested_machine,
179
+ 'requested-machine-count': str(requested_machine_count) if requested_machine_count else None,
174
180
  'result-name-prefix': result_name_prefix,
175
181
  'requested-timeout-seconds': str(requested_timeout_seconds) if requested_timeout_seconds else None,
176
182
  'notify': 'true' if notify else 'false',
@@ -1,9 +1,8 @@
1
1
  from enum import Enum
2
2
 
3
- from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo
4
- from biolib.typing_utils import TypedDict, Optional, List
5
-
6
3
  from biolib.biolib_api_client.app_types import AppVersionOnJob, RemoteHost
4
+ from biolib.compute_node.webserver.webserver_types import ComputeNodeInfo
5
+ from biolib.typing_utils import List, Optional, TypedDict
7
6
 
8
7
 
9
8
  class JobState(Enum):
@@ -15,6 +14,7 @@ class JobState(Enum):
15
14
 
16
15
 
17
16
  class _Job(TypedDict):
17
+ app_uri: str
18
18
  app_version: AppVersionOnJob
19
19
  arguments_override_command: bool
20
20
  auth_token: str
@@ -22,10 +22,10 @@ class _Job(TypedDict):
22
22
  created_at: str
23
23
  federated_job_uuid: Optional[str]
24
24
  public_id: str
25
- uuid: str
26
25
  remote_hosts_with_warning: List[RemoteHost]
27
26
  state: str
28
27
  user_id: Optional[str]
28
+ uuid: str
29
29
 
30
30
 
31
31
  # type optional keys with total=False
@@ -1,13 +1,19 @@
1
1
  from biolib.typing_utils import TypedDict
2
2
 
3
3
 
4
- class LargeFileSystemVersion(TypedDict):
4
+ class DataRecordVersion(TypedDict):
5
5
  presigned_download_url: str
6
6
  size_bytes: int
7
7
  uri: str
8
8
  uuid: str
9
9
 
10
10
 
11
- class LargeFileSystem(TypedDict):
11
+ class DataRecordInfo(TypedDict):
12
12
  uri: str
13
13
  uuid: str
14
+
15
+
16
+ class DataRecordVersionInfo(TypedDict):
17
+ resource_uri: str
18
+ resource_uuid: str
19
+ resource_version_uuid: str
@@ -1,25 +1,27 @@
1
1
  from datetime import datetime, timedelta
2
- # from urllib.parse import urlparse, parse_qs
3
-
4
- from biolib.biolib_logging import logger
5
2
 
6
3
  from biolib.biolib_api_client.biolib_job_api import BiolibJobApi
7
4
  from biolib.biolib_binary_format.utils import RemoteEndpoint
8
5
 
6
+ # from urllib.parse import urlparse, parse_qs
7
+ from biolib.biolib_logging import logger
8
+ from biolib.typing_utils import Literal
9
+
9
10
 
10
- class RemoteJobStorageResultEndpoint(RemoteEndpoint):
11
- def __init__(self, job_id: str, job_auth_token: str):
12
- self._job_id = job_id
13
- self._job_auth_token = job_auth_token
11
+ class RemoteJobStorageEndpoint(RemoteEndpoint):
12
+ def __init__(self, job_uuid: str, job_auth_token: str, storage_type: Literal['input', 'output']):
14
13
  self._expires_at = None
14
+ self._job_auth_token = job_auth_token
15
+ self._job_uuid = job_uuid
15
16
  self._presigned_url = None
17
+ self._storage_type: Literal['input', 'output'] = storage_type
16
18
 
17
19
  def get_remote_url(self):
18
20
  if not self._presigned_url or datetime.utcnow() > self._expires_at:
19
21
  self._presigned_url = BiolibJobApi.get_job_storage_download_url(
20
22
  job_auth_token=self._job_auth_token,
21
- job_uuid=self._job_id,
22
- storage_type='results'
23
+ job_uuid=self._job_uuid,
24
+ storage_type='results' if self._storage_type == 'output' else 'input',
23
25
  )
24
26
  self._expires_at = datetime.utcnow() + timedelta(minutes=8)
25
27
  # TODO: Use expires at from url
@@ -27,6 +29,6 @@ class RemoteJobStorageResultEndpoint(RemoteEndpoint):
27
29
  # query_params = parse_qs(parsed_url.query)
28
30
  # time_at_generation = datetime.datetime.strptime(query_params['X-Amz-Date'][0], '%Y%m%dT%H%M%SZ')
29
31
  # self._expires_at = time_at_generation + timedelta(seconds=int(query_params['X-Amz-Expires'][0]))
30
- logger.debug(f'Job "{self._job_id}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
32
+ logger.debug(f'Job "{self._job_uuid}" fetched presigned URL with expiry at {self._expires_at.isoformat()}')
31
33
 
32
34
  return self._presigned_url
@@ -1,6 +1,8 @@
1
1
  from abc import ABC, abstractmethod
2
2
  import io
3
-
3
+ import math
4
+ from typing import Optional, Callable
5
+ from biolib.typing_utils import Iterator
4
6
  from biolib._internal.http_client import HttpClient
5
7
 
6
8
 
@@ -106,10 +108,18 @@ class InMemoryIndexableBuffer(IndexableBuffer):
106
108
 
107
109
  class LazyLoadedFile:
108
110
 
109
- def __init__(self, path: str, buffer: IndexableBuffer, start: int, length: int):
111
+ def __init__(
112
+ self,
113
+ path: str,
114
+ buffer: IndexableBuffer,
115
+ start: Optional[int],
116
+ length: int,
117
+ start_func: Optional[Callable[[], int]] = None,
118
+ ):
110
119
  self._path = path
111
120
  self._buffer = buffer
112
121
  self._start = start
122
+ self._start_func = start_func
113
123
  self._length = length
114
124
 
115
125
  def __repr__(self) -> str:
@@ -119,8 +129,16 @@ class LazyLoadedFile:
119
129
  def path(self) -> str:
120
130
  return self._path
121
131
 
132
+ @property
133
+ def name(self) -> str:
134
+ return self._path.split('/')[-1]
135
+
122
136
  @property
123
137
  def start(self) -> int:
138
+ if self._start is None:
139
+ assert self._start_func is not None, 'No start function or start value'
140
+ self._start = self._start_func()
141
+
124
142
  return self._start
125
143
 
126
144
  @property
@@ -130,5 +148,24 @@ class LazyLoadedFile:
130
148
  def get_file_handle(self) -> io.BufferedIOBase:
131
149
  return io.BytesIO(self.get_data())
132
150
 
133
- def get_data(self) -> bytes:
134
- return self._buffer.get_data(start=self._start, length=self._length)
151
+ def get_data(self, start=0, length=None) -> bytes:
152
+ start_offset = start + self.start
153
+ # make sure length doesn't go outside file boundaries
154
+ length_to_end_of_file = max(self._length - start, 0)
155
+ if length is None:
156
+ length_to_request = length_to_end_of_file
157
+ else:
158
+ length_to_request = min(length, length_to_end_of_file)
159
+ return self._buffer.get_data(start=start_offset, length=length_to_request)
160
+
161
+ def get_data_iterator(self) -> Iterator[bytes]:
162
+ if self._length == 0:
163
+ yield b''
164
+ else:
165
+ chunk_size = 10_000_000
166
+ chunks_to_yield = math.ceil(self._length / chunk_size)
167
+ for chunk_idx in range(chunks_to_yield - 1):
168
+ yield self._buffer.get_data(start=self.start+chunk_idx*chunk_size, length=chunk_size)
169
+ data_already_yielded = (chunks_to_yield - 1)*chunk_size
170
+ yield self._buffer.get_data(start=self.start+data_already_yielded,
171
+ length=self._length - data_already_yielded)
biolib/cli/__init__.py CHANGED
@@ -5,7 +5,7 @@ import click
5
5
 
6
6
  from biolib import utils
7
7
  from biolib.biolib_logging import logger, logger_no_user_data
8
- from biolib.cli import init, lfs, push, run, start, runtime, download_container
8
+ from biolib.cli import auth, data_record, download_container, init, lfs, push, run, runtime, start
9
9
 
10
10
 
11
11
  @click.version_option(version=utils.BIOLIB_PACKAGE_VERSION, prog_name='pybiolib')
@@ -20,13 +20,17 @@ def cli() -> None:
20
20
  logger_no_user_data.configure(default_log_level=logging.WARNING)
21
21
 
22
22
 
23
+ cli.add_command(auth.login)
24
+ cli.add_command(auth.logout)
25
+ cli.add_command(auth.whoami)
26
+ cli.add_command(download_container.download_container)
23
27
  cli.add_command(init.init)
24
28
  cli.add_command(lfs.lfs)
25
29
  cli.add_command(push.push)
26
30
  cli.add_command(run.run)
27
31
  cli.add_command(runtime.runtime)
28
32
  cli.add_command(start.start)
29
- cli.add_command(download_container.download_container)
33
+ cli.add_command(data_record.data_record)
30
34
 
31
35
  # allow this script to be called without poetry in dev e.g. by an IDE debugger
32
36
  if utils.IS_DEV and __name__ == '__main__':
biolib/cli/auth.py ADDED
@@ -0,0 +1,58 @@
1
+ import logging
2
+ import sys
3
+
4
+ import click
5
+
6
+ from biolib import api, biolib_errors
7
+ from biolib.biolib_api_client.api_client import BiolibApiClient
8
+ from biolib.biolib_logging import logger, logger_no_user_data
9
+ from biolib.user import sign_in, sign_out
10
+
11
+
12
+ @click.command(help='Login your to BioLib account with web browser')
13
+ @click.option(
14
+ '-w',
15
+ is_flag=True,
16
+ default=False,
17
+ required=False,
18
+ type=bool,
19
+ help='Automatically open the login page in the default web browser',
20
+ )
21
+ def login(w: bool) -> None: # pylint: disable=invalid-name
22
+ logger.configure(default_log_level=logging.INFO)
23
+ logger_no_user_data.configure(default_log_level=logging.INFO)
24
+ sign_in(open_in_default_browser=w)
25
+
26
+
27
+ @click.command(help='Logout of your BioLib account')
28
+ def logout() -> None:
29
+ logger.configure(default_log_level=logging.INFO)
30
+ logger_no_user_data.configure(default_log_level=logging.INFO)
31
+ sign_out()
32
+
33
+
34
+ @click.command(help='Prints out the full name of the user logged in')
35
+ def whoami() -> None:
36
+ client = BiolibApiClient.get()
37
+ if client.is_signed_in:
38
+ user_uuid = None
39
+ if client.access_token is None:
40
+ print('Unable to fetch user credentials. Please try logging out and logging in again.')
41
+ exit(1)
42
+ try:
43
+ user_uuid = client.decode_jwt_without_checking_signature(jwt=client.access_token)['payload']['public_id']
44
+ except biolib_errors.BioLibError as error:
45
+ print(
46
+ f'Unable to reference user public_id in access token:\n {error.message}',
47
+ file=sys.stderr,
48
+ )
49
+ exit(1)
50
+ response = api.client.get(path=f'/user/{user_uuid}/')
51
+ user_dict = response.json()
52
+ email = user_dict['email']
53
+ intrinsic_account = [account for account in user_dict['accounts'] if account['role'] == 'intrinsic'][0]
54
+ display_name = intrinsic_account['display_name']
55
+ print(f'Name: {display_name}\nEmail: {email}\nLogged into: {client.base_url}')
56
+ else:
57
+ print('Not logged in', file=sys.stderr)
58
+ exit(1)
@@ -0,0 +1,80 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ from typing import Dict, List
5
+
6
+ import click
7
+
8
+ from biolib._data_record.data_record import DataRecord
9
+ from biolib.biolib_logging import logger, logger_no_user_data
10
+ from biolib.typing_utils import Optional
11
+
12
+
13
+ @click.group(help='Data Records')
14
+ def data_record() -> None:
15
+ logger.configure(default_log_level=logging.INFO)
16
+ logger_no_user_data.configure(default_log_level=logging.INFO)
17
+
18
+
19
+ @data_record.command(help='Create a Data Record')
20
+ @click.argument('uri', required=True)
21
+ @click.option('--data-path', required=True, type=click.Path(exists=True))
22
+ @click.option('--record-type', required=False, type=str, default=None)
23
+ def create(uri: str, data_path: str, record_type: Optional[str]) -> None:
24
+ DataRecord.create(destination=uri, data_path=data_path, record_type=record_type)
25
+
26
+
27
+ @data_record.command(help='Update a Data Record')
28
+ @click.argument('uri', required=True)
29
+ @click.option('--data-path', required=True, type=click.Path(exists=True))
30
+ @click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
31
+ def update(uri: str, data_path: str, chunk_size: Optional[int]) -> None:
32
+ DataRecord.get_by_uri(uri=uri).update(data_path=data_path, chunk_size_in_mb=chunk_size)
33
+
34
+
35
+ @data_record.command(help='Download files from a Data Record')
36
+ @click.argument('uri', required=True)
37
+ @click.option('--file', required=False, type=str)
38
+ @click.option('--path-filter', required=False, type=str, hide_input=True)
39
+ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
40
+ record = DataRecord.get_by_uri(uri=uri)
41
+ if file is not None:
42
+ try:
43
+ file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file][0]
44
+ except IndexError:
45
+ raise Exception('File not found in data record') from None
46
+
47
+ assert not os.path.exists(file_obj.name), 'File already exists in current directory'
48
+ with open(file_obj.name, 'wb') as file_handle:
49
+ file_handle.write(file_obj.get_data())
50
+
51
+ else:
52
+ assert not os.path.exists(record.name), f'Directory with name {record.name} already exists in current directory'
53
+ record.save_files(output_dir=record.name, path_filter=path_filter)
54
+
55
+
56
+ @data_record.command(help='Describe a Data Record')
57
+ @click.argument('uri', required=True)
58
+ @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
59
+ def describe(uri: str, output_as_json: bool) -> None:
60
+ record = DataRecord.get_by_uri(uri)
61
+ files_info: List[Dict] = []
62
+ total_size_in_bytes = 0
63
+ for file in record.list_files():
64
+ files_info.append({'path': file.path, 'size_bytes': file.length})
65
+ total_size_in_bytes += file.length
66
+
67
+ if output_as_json:
68
+ print(
69
+ json.dumps(
70
+ obj={'uri': record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
71
+ indent=4,
72
+ )
73
+ )
74
+ else:
75
+ print(f'Data Record {record.uri}\ntotal {total_size_in_bytes} bytes\n')
76
+ print('size bytes path')
77
+ for file_info in files_info:
78
+ size_string = str(file_info['size_bytes'])
79
+ leading_space_string = ' ' * (10 - len(size_string))
80
+ print(f"{leading_space_string}{size_string} {file_info['path']}")
@@ -1,10 +1,12 @@
1
1
  import logging
2
+
2
3
  import click
4
+
3
5
  from biolib.biolib_download_container import download_container_from_uri
4
6
  from biolib.biolib_logging import logger, logger_no_user_data
5
7
 
6
8
 
7
- @click.command(help='Push an application to BioLib', name='download-container')
9
+ @click.command(help='Pull an application from BioLib', name='download-container', hidden=True)
8
10
  @click.argument('uri')
9
11
  def download_container(uri: str) -> None:
10
12
  logger.configure(default_log_level=logging.INFO)
biolib/cli/init.py CHANGED
@@ -2,6 +2,7 @@ import os
2
2
  import sys
3
3
 
4
4
  import click
5
+
5
6
  from biolib import templates
6
7
 
7
8
 
biolib/cli/lfs.py CHANGED
@@ -1,12 +1,15 @@
1
+ import json
1
2
  import logging
3
+ import os
2
4
  import sys
5
+ from typing import Dict, List
3
6
 
4
7
  import click
5
8
 
6
- import biolib.lfs
7
9
  from biolib import biolib_errors
8
- from biolib.biolib_logging import logger_no_user_data, logger
9
- from biolib.lfs import push_large_file_system, create_large_file_system, describe_large_file_system, prune_lfs_cache
10
+ from biolib._data_record.data_record import DataRecord
11
+ from biolib._internal.lfs import prune_lfs_cache
12
+ from biolib.biolib_logging import logger, logger_no_user_data
10
13
  from biolib.typing_utils import Optional
11
14
 
12
15
 
@@ -18,9 +21,10 @@ def lfs() -> None:
18
21
  @lfs.command(help='Create a Large File System')
19
22
  @click.argument('uri', required=True)
20
23
  def create(uri: str) -> None:
24
+ logger.warning('This is command deprecated, please use "biolib data-record create" instead.')
21
25
  logger.configure(default_log_level=logging.INFO)
22
26
  logger_no_user_data.configure(default_log_level=logging.INFO)
23
- create_large_file_system(lfs_uri=uri)
27
+ DataRecord.create(destination=uri)
24
28
 
25
29
 
26
30
  @lfs.command(help='Push a new version of a Large File System')
@@ -28,10 +32,11 @@ def create(uri: str) -> None:
28
32
  @click.option('--path', required=True, type=click.Path(exists=True))
29
33
  @click.option('--chunk-size', default=None, required=False, type=click.INT, help='The size of each chunk (In MB)')
30
34
  def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
35
+ logger.warning('This is command deprecated, please use "biolib data-record update" instead.')
31
36
  logger.configure(default_log_level=logging.INFO)
32
37
  logger_no_user_data.configure(default_log_level=logging.INFO)
33
38
  try:
34
- push_large_file_system(lfs_uri=uri, input_dir=path, chunk_size_in_mb=chunk_size)
39
+ DataRecord.get_by_uri(uri=uri).update(data_path=path, chunk_size_in_mb=chunk_size)
35
40
  except biolib_errors.BioLibError as error:
36
41
  print(f'An error occurred:\n{error.message}', file=sys.stderr)
37
42
  exit(1)
@@ -41,12 +46,20 @@ def push(uri: str, path: str, chunk_size: Optional[int]) -> None:
41
46
  @click.argument('uri', required=True)
42
47
  @click.option('--file-path', required=True, type=str)
43
48
  def download_file(uri: str, file_path: str) -> None:
49
+ logger.warning('This is command deprecated, please use "biolib data-record download" instead.')
44
50
  logger.configure(default_log_level=logging.INFO)
45
51
  logger_no_user_data.configure(default_log_level=logging.INFO)
46
52
  try:
47
- data = biolib.lfs.get_file_data_from_large_file_system(lfs_uri=uri, file_path=file_path)
48
- with open(file_path, mode='wb') as file:
49
- file.write(data)
53
+ record = DataRecord.get_by_uri(uri=uri)
54
+ try:
55
+ file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file_path][0]
56
+ except IndexError:
57
+ raise Exception('File not found in data record') from None
58
+
59
+ assert not os.path.exists(file_obj.name), 'File already exists in current directory'
60
+ with open(file_obj.name, 'wb') as file_handle:
61
+ file_handle.write(file_obj.get_data())
62
+
50
63
  except biolib_errors.BioLibError as error:
51
64
  print(f'An error occurred:\n{error.message}', file=sys.stderr)
52
65
  exit(1)
@@ -54,9 +67,30 @@ def download_file(uri: str, file_path: str) -> None:
54
67
 
55
68
  @lfs.command(help='Describe a Large File System')
56
69
  @click.argument('uri', required=True)
57
- @click.option('--json', is_flag=True, default=False, required=False, help='Format output as JSON')
58
- def describe(uri: str, json: bool) -> None:
59
- describe_large_file_system(lfs_uri=uri, output_as_json=json)
70
+ @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
71
+ def describe(uri: str, output_as_json: bool) -> None:
72
+ logger.warning('This is command deprecated, please use "biolib data-record describe" instead.')
73
+ data_record = DataRecord.get_by_uri(uri)
74
+ files_info: List[Dict] = []
75
+ total_size_in_bytes = 0
76
+ for file in data_record.list_files():
77
+ files_info.append({'path': file.path, 'size_bytes': file.length})
78
+ total_size_in_bytes += file.length
79
+
80
+ if output_as_json:
81
+ print(
82
+ json.dumps(
83
+ obj={'uri': data_record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
84
+ indent=4,
85
+ )
86
+ )
87
+ else:
88
+ print(f'Large File System {data_record.uri}\ntotal {total_size_in_bytes} bytes\n')
89
+ print('size bytes path')
90
+ for file_info in files_info:
91
+ size_string = str(file_info['size_bytes'])
92
+ leading_space_string = ' ' * (10 - len(size_string))
93
+ print(f"{leading_space_string}{size_string} {file_info['path']}")
60
94
 
61
95
 
62
96
  @lfs.command(help='Prune LFS cache', hidden=True)
biolib/cli/push.py CHANGED
@@ -3,8 +3,8 @@ from typing import Optional
3
3
 
4
4
  import click
5
5
 
6
- from biolib.biolib_logging import logger, logger_no_user_data
7
6
  from biolib._internal.push_application import push_application
7
+ from biolib.biolib_logging import logger, logger_no_user_data
8
8
 
9
9
 
10
10
  @click.command(help='Push an application to BioLib')