pybiolib 1.1.1629__py3-none-any.whl → 1.1.1881__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. biolib/__init__.py +11 -3
  2. biolib/_internal/data_record/__init__.py +1 -0
  3. biolib/_internal/data_record/data_record.py +153 -0
  4. biolib/_internal/data_record/remote_storage_endpoint.py +27 -0
  5. biolib/_internal/http_client.py +45 -15
  6. biolib/_internal/push_application.py +22 -37
  7. biolib/_internal/runtime.py +73 -0
  8. biolib/_internal/utils/__init__.py +18 -0
  9. biolib/api/client.py +12 -6
  10. biolib/app/app.py +6 -1
  11. biolib/app/search_apps.py +8 -12
  12. biolib/biolib_api_client/api_client.py +14 -9
  13. biolib/biolib_api_client/app_types.py +1 -0
  14. biolib/biolib_api_client/auth.py +0 -12
  15. biolib/biolib_api_client/biolib_app_api.py +53 -27
  16. biolib/biolib_api_client/biolib_job_api.py +11 -40
  17. biolib/biolib_binary_format/utils.py +19 -2
  18. biolib/cli/__init__.py +9 -3
  19. biolib/cli/auth.py +58 -0
  20. biolib/cli/data_record.py +43 -0
  21. biolib/cli/download_container.py +3 -1
  22. biolib/cli/init.py +1 -0
  23. biolib/cli/lfs.py +39 -9
  24. biolib/cli/push.py +1 -1
  25. biolib/cli/run.py +3 -2
  26. biolib/cli/start.py +1 -0
  27. biolib/compute_node/cloud_utils/cloud_utils.py +38 -65
  28. biolib/compute_node/job_worker/cache_state.py +1 -1
  29. biolib/compute_node/job_worker/executors/docker_executor.py +10 -8
  30. biolib/compute_node/job_worker/job_storage.py +9 -13
  31. biolib/compute_node/job_worker/job_worker.py +10 -4
  32. biolib/compute_node/remote_host_proxy.py +48 -11
  33. biolib/compute_node/webserver/worker_thread.py +2 -2
  34. biolib/jobs/job.py +33 -32
  35. biolib/lfs/__init__.py +0 -2
  36. biolib/lfs/utils.py +23 -115
  37. biolib/runtime/__init__.py +13 -1
  38. biolib/sdk/__init__.py +17 -4
  39. biolib/user/sign_in.py +8 -12
  40. biolib/utils/__init__.py +17 -45
  41. biolib/utils/app_uri.py +11 -4
  42. biolib/utils/cache_state.py +2 -2
  43. biolib/utils/multipart_uploader.py +42 -68
  44. biolib/utils/seq_util.py +47 -9
  45. biolib/utils/zip/remote_zip.py +9 -17
  46. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/METADATA +1 -2
  47. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/RECORD +50 -46
  48. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/WHEEL +1 -1
  49. biolib/biolib_api_client/biolib_account_api.py +0 -21
  50. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -53
  51. biolib/runtime/results.py +0 -20
  52. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/LICENSE +0 -0
  53. {pybiolib-1.1.1629.dist-info → pybiolib-1.1.1881.dist-info}/entry_points.txt +0 -0
biolib/app/search_apps.py CHANGED
@@ -7,41 +7,37 @@ from biolib.typing_utils import Optional, List
7
7
  def search_apps(
8
8
  search_query: Optional[str] = None,
9
9
  team: Optional[str] = None,
10
- count: int = 100
11
- ) -> List[str]:
12
-
10
+ count: int = 100,
11
+ ) -> List[str]:
13
12
  query_exceeded_page_size = False
14
13
  params = {
15
14
  'page_size': count,
16
15
  }
17
16
  if team:
18
- if not team.startswith("@"):
19
- team = "@biolib.com/" + team
17
+ if not team.startswith('@'):
18
+ team = '@biolib.com/' + team
20
19
  params['account_handle'] = team
21
20
 
22
21
  if search_query:
23
22
  params['search'] = search_query
24
23
 
25
- apps_json = api.client.get(
26
- path='/apps/',
27
- params=params
28
- ).json()
24
+ apps_json = api.client.get(path='/apps/', params=params).json()
29
25
  if apps_json['count'] > count:
30
26
  query_exceeded_page_size = True
31
27
 
32
28
  apps = [app['resource_uri'] for app in apps_json['results']]
33
29
 
34
- if not utils.BASE_URL_IS_PUBLIC_BIOLIB and (not team or team.lower().startswith("@biolib.com")):
30
+ if not utils.BASE_URL_IS_PUBLIC_BIOLIB and (not team or team.lower().startswith('@biolib.com')):
35
31
  # Also get federated apps if running on enterprise deployment
36
32
  public_biolib_apps_json = api.client.get(
37
33
  authenticate=False,
38
34
  path='https://biolib.com/api/apps/',
39
- params=params
35
+ params=params,
40
36
  ).json()
41
37
  if public_biolib_apps_json['count'] > count:
42
38
  query_exceeded_page_size = True
43
39
 
44
- apps.extend([f'@biolib.com/{app["resource_uri"]}' for app in public_biolib_apps_json['results']])
40
+ apps.extend([f"@biolib.com/{app['resource_uri']}" for app in public_biolib_apps_json['results']])
45
41
 
46
42
  if query_exceeded_page_size:
47
43
  print(f'Search results exceeded {count}, use the argument "count" to increase the amount of results returned')
@@ -6,6 +6,7 @@ import os
6
6
  from datetime import datetime, timezone
7
7
  from json.decoder import JSONDecodeError
8
8
 
9
+ from biolib._internal.runtime import Runtime
9
10
  from biolib._internal.http_client import HttpClient
10
11
  from biolib.typing_utils import Optional
11
12
  from biolib.biolib_errors import BioLibError
@@ -61,16 +62,18 @@ class _ApiClient:
61
62
  return
62
63
 
63
64
  if self.access_token:
64
- decoded_token = self._decode_jwt_without_checking_signature(self.access_token)
65
+ decoded_token = self.decode_jwt_without_checking_signature(self.access_token)
65
66
  if datetime.now(tz=timezone.utc).timestamp() < decoded_token['payload']['exp'] - 60: # 60 second buffer
66
67
  # Token has not expired yet
67
68
  return
68
69
 
69
70
  # TODO: Implement nicer error handling
70
71
  try:
71
- response = HttpClient.request(method='POST',
72
- url=f'{self.base_url}/api/user/token/refresh/',
73
- data={'refresh': self.refresh_token})
72
+ response = HttpClient.request(
73
+ method='POST',
74
+ url=f'{self.base_url}/api/user/token/refresh/',
75
+ data={'refresh': self.refresh_token},
76
+ )
74
77
  except Exception as exception:
75
78
  logger.error('Sign in with refresh token failed')
76
79
  raise exception
@@ -111,9 +114,11 @@ class _ApiClient:
111
114
  def sign_in_with_api_token(self, api_token: str) -> None:
112
115
  logger_no_user_data.debug('ApiClient: Signing in with BIOLIB_TOKEN...')
113
116
  try:
114
- response = HttpClient.request(method='POST',
115
- url=f'{self.base_url}/api/user/api_tokens/exchange/',
116
- data={'token': api_token})
117
+ response = HttpClient.request(
118
+ method='POST',
119
+ url=f'{self.base_url}/api/user/api_tokens/exchange/',
120
+ data={'token': api_token},
121
+ )
117
122
  except Exception as exception:
118
123
  logger.error('Sign in with API token failed')
119
124
  raise exception
@@ -127,7 +132,7 @@ class _ApiClient:
127
132
  self.refresh_token = json_response['refresh_token']
128
133
 
129
134
  @staticmethod
130
- def _decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
135
+ def decode_jwt_without_checking_signature(jwt: str) -> Dict[str, Any]:
131
136
  jwt_bytes = jwt.encode('utf-8')
132
137
 
133
138
  try:
@@ -189,7 +194,7 @@ class BiolibApiClient:
189
194
  @staticmethod
190
195
  def assert_is_signed_in(authenticated_action_description: str) -> None:
191
196
  api_client = BiolibApiClient.get()
192
- if not api_client.is_signed_in:
197
+ if not api_client.is_signed_in and not Runtime.check_is_environment_biolib_app():
193
198
  raise BioLibError(
194
199
  f'You must be signed in to {authenticated_action_description}. '
195
200
  f'Please set the environment variable "BIOLIB_TOKEN"'
@@ -99,6 +99,7 @@ class _AppVersionOnJob(TypedDict):
99
99
 
100
100
  class AppOnJob(TypedDict):
101
101
  allow_client_side_execution: bool
102
+ can_push_data_record_for_user: bool
102
103
  state: Literal['public', 'draft']
103
104
 
104
105
 
@@ -1,20 +1,8 @@
1
- from requests.auth import AuthBase # type: ignore
2
-
3
1
  from biolib import api
4
2
  from biolib.biolib_api_client.api_client import UserTokens
5
3
  from biolib.typing_utils import TypedDict, Literal
6
4
 
7
5
 
8
- class BearerAuth(AuthBase):
9
- def __init__(self, access_token=None):
10
- self.access_token = access_token
11
-
12
- def __call__(self, req):
13
- if self.access_token:
14
- req.headers['Authorization'] = 'Bearer ' + self.access_token
15
- return req
16
-
17
-
18
6
  class AuthChallengeCreate(TypedDict):
19
7
  token: str
20
8
 
@@ -1,18 +1,43 @@
1
+ import mimetypes
2
+ import random
1
3
  import re
2
4
  import os
3
5
  import subprocess
4
6
 
5
- import requests
6
-
7
7
  import biolib.api
8
8
  from biolib import biolib_errors
9
+ from biolib._internal.http_client import HttpError
9
10
  from biolib.typing_utils import Optional
10
- from biolib.biolib_api_client.auth import BearerAuth
11
- from biolib.biolib_api_client import BiolibApiClient, AppGetResponse
12
- from biolib.biolib_errors import BioLibError
11
+ from biolib.biolib_api_client import AppGetResponse
13
12
  from biolib.biolib_logging import logger
14
13
 
15
14
 
15
+ def encode_multipart(data, files):
16
+ boundary = f'----------{random.randint(0, 1000000000)}'
17
+ line_array = []
18
+
19
+ for (key, value) in data.items():
20
+ if not value is None:
21
+ line_array.append(f'--{boundary}')
22
+ line_array.append(f'Content-Disposition: form-data; name="{key}"')
23
+ line_array.append('')
24
+ line_array.append(value)
25
+
26
+ for (key, (filename, value)) in files.items():
27
+ line_array.append(f'--{boundary}')
28
+ line_array.append(f'Content-Disposition: form-data; name="{key}"; filename="{filename}"')
29
+ line_array.append(f'Content-Type: {mimetypes.guess_type(filename)[0] or "application/octet-stream"}')
30
+ line_array.append('')
31
+ line_array.append('')
32
+ line_array.append(value)
33
+
34
+ line_array.append(f'--{boundary}--')
35
+ line_array.append('')
36
+
37
+ data_encoded = b'\r\n'.join([line.encode() if isinstance(line, str) else line for line in line_array])
38
+ return f'multipart/form-data; boundary={boundary}', data_encoded
39
+
40
+
16
41
  def _get_git_branch_name() -> str:
17
42
  try:
18
43
  github_actions_branch_name = os.getenv('GITHUB_REF_NAME')
@@ -51,13 +76,10 @@ class BiolibAppApi:
51
76
  app_response: AppGetResponse = response.json()
52
77
  return app_response
53
78
 
54
- except requests.exceptions.HTTPError as error:
55
- if error.response.status_code == 404:
79
+ except HttpError as error:
80
+ if error.code == 404:
56
81
  raise biolib_errors.NotFound(f'Application {uri} not found.') from None
57
82
 
58
- if error.response.status_code == 400:
59
- raise biolib_errors.BioLibError(error.response.content.decode()) from None
60
-
61
83
  raise error
62
84
 
63
85
  @staticmethod
@@ -69,24 +91,28 @@ class BiolibAppApi:
69
91
  set_as_active,
70
92
  app_version_id_to_copy_images_from: Optional[str],
71
93
  ):
72
- response = requests.post(
73
- f'{BiolibApiClient.get().base_url}/api/app_versions/',
74
- files={
75
- 'source_files_zip': zip_binary,
76
- },
77
- data={
78
- 'app': app_id,
79
- 'set_as_active': 'true' if set_as_active else 'false',
80
- 'state': 'published',
81
- 'app_version_id_to_copy_images_from': app_version_id_to_copy_images_from,
82
- 'git_branch_name': _get_git_branch_name(),
83
- 'git_repository_url': _get_git_repository_url(),
84
- },
85
- auth=BearerAuth(BiolibApiClient.get().access_token)
86
- )
87
- if not response.ok:
94
+ try:
95
+ content_type, data_encoded = encode_multipart(
96
+ data={
97
+ 'app': app_id,
98
+ 'set_as_active': 'true' if set_as_active else 'false',
99
+ 'state': 'published',
100
+ 'app_version_id_to_copy_images_from': app_version_id_to_copy_images_from,
101
+ 'git_branch_name': _get_git_branch_name(),
102
+ 'git_repository_url': _get_git_repository_url(),
103
+ },
104
+ files={
105
+ 'source_files_zip': ('source_files.zip', zip_binary),
106
+ }
107
+ )
108
+ response = biolib.api.client.post(
109
+ path='/app_versions/',
110
+ data=data_encoded,
111
+ headers={'Content-Type': content_type},
112
+ )
113
+ except Exception as error:
88
114
  logger.error(f'Push failed for {author}/{app_name}:')
89
- raise BioLibError(response.text)
115
+ raise error
90
116
 
91
117
  # TODO: When response includes the version number, print the URL for the new app version
92
118
  logger.info(f'Initialized new app version for {author}/{app_name}.')
@@ -1,16 +1,12 @@
1
1
  import os
2
- import time
3
2
  from urllib.parse import urlparse
4
3
 
5
- import requests
6
-
7
4
  import biolib.api
8
5
 
9
6
  from biolib import utils
10
- from biolib.biolib_api_client.auth import BearerAuth
11
- from biolib.biolib_api_client import BiolibApiClient, CloudJob, JobState
12
- from biolib.biolib_errors import BioLibError, RetryLimitException, StorageDownloadFailed, JobResultPermissionError, \
13
- JobResultError, JobResultNotFound
7
+ from biolib._internal.http_client import HttpError
8
+ from biolib.biolib_api_client import CloudJob, JobState
9
+ from biolib.biolib_errors import JobResultPermissionError, JobResultError, JobResultNotFound, StorageDownloadFailed
14
10
  from biolib.biolib_logging import logger
15
11
  from biolib.utils import BIOLIB_PACKAGE_VERSION
16
12
  from biolib.typing_utils import TypedDict, Optional, Literal, Dict
@@ -96,35 +92,11 @@ class BiolibJobApi:
96
92
 
97
93
  @staticmethod
98
94
  def create_cloud_job(job_id: str, result_name_prefix: Optional[str]) -> CloudJob:
99
- response = None
100
95
  data = {'job_id': job_id}
101
96
  if result_name_prefix:
102
97
  data['result_name_prefix'] = result_name_prefix
103
98
 
104
- for retry in range(4):
105
- try:
106
- response = requests.post(
107
- f'{BiolibApiClient.get().base_url}/api/jobs/cloud/',
108
- json=data,
109
- auth=BearerAuth(BiolibApiClient.get().access_token)
110
- )
111
-
112
- if response.status_code == 503:
113
- raise RetryLimitException(response.content)
114
- # Handle possible validation errors from backend
115
- elif not response.ok:
116
- raise BioLibError(response.text)
117
-
118
- break
119
-
120
- except RetryLimitException as retry_exception: # pylint: disable=broad-except
121
- if retry > 3:
122
- raise BioLibError('Reached retry limit for cloud job creation') from retry_exception
123
- time.sleep(1)
124
-
125
- if not response:
126
- raise BioLibError('Could not create new cloud job')
127
-
99
+ response = biolib.api.client.post(path='/jobs/cloud/', data=data)
128
100
  cloud_job: CloudJob = response.json()
129
101
  return cloud_job
130
102
 
@@ -136,9 +108,9 @@ class BiolibJobApi:
136
108
  ) -> str:
137
109
  try:
138
110
  response = biolib.api.client.get(
139
- path=f'{BiolibApiClient.get().base_url}/api/jobs/{job_uuid}/storage/{storage_type}/download/',
111
+ path=f'/jobs/{job_uuid}/storage/{storage_type}/download/',
140
112
  authenticate=True,
141
- headers={'Job-Auth-Token': job_auth_token}
113
+ headers={'Job-Auth-Token': job_auth_token},
142
114
  )
143
115
  presigned_s3_download_link_response: PresignedS3DownloadLinkResponse = response.json()
144
116
  presigned_download_url = presigned_s3_download_link_response['presigned_download_url']
@@ -151,21 +123,20 @@ class BiolibJobApi:
151
123
 
152
124
  return presigned_download_url
153
125
 
154
- except requests.exceptions.HTTPError as error:
155
- status_code = error.response.status_code
126
+ except HttpError as error:
156
127
  if storage_type == 'results':
157
- if status_code == 401:
128
+ if error.code == 401:
158
129
  raise JobResultPermissionError('You must be signed in to get result of the job') from None
159
- elif status_code == 403:
130
+ elif error.code == 403:
160
131
  raise JobResultPermissionError(
161
132
  'Cannot get result of job. Maybe the job was created without being signed in?'
162
133
  ) from None
163
- elif status_code == 404:
134
+ elif error.code == 404:
164
135
  raise JobResultNotFound('Job result not found') from None
165
136
  else:
166
137
  raise JobResultError('Failed to get result of job') from error
167
138
  else:
168
- raise StorageDownloadFailed(error.response.content) from error
139
+ raise StorageDownloadFailed(f'Failed to download result of job got error: {error}') from error
169
140
 
170
141
  except Exception as error: # pylint: disable=broad-except
171
142
  if storage_type == 'results':
@@ -1,5 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  import io
3
+ from typing import Optional, Callable
3
4
 
4
5
  from biolib._internal.http_client import HttpClient
5
6
 
@@ -106,10 +107,18 @@ class InMemoryIndexableBuffer(IndexableBuffer):
106
107
 
107
108
  class LazyLoadedFile:
108
109
 
109
- def __init__(self, path: str, buffer: IndexableBuffer, start: int, length: int):
110
+ def __init__(
111
+ self,
112
+ path: str,
113
+ buffer: IndexableBuffer,
114
+ start: Optional[int],
115
+ length: int,
116
+ start_func: Optional[Callable[[], int]] = None,
117
+ ):
110
118
  self._path = path
111
119
  self._buffer = buffer
112
120
  self._start = start
121
+ self._start_func = start_func
113
122
  self._length = length
114
123
 
115
124
  def __repr__(self) -> str:
@@ -119,8 +128,16 @@ class LazyLoadedFile:
119
128
  def path(self) -> str:
120
129
  return self._path
121
130
 
131
+ @property
132
+ def name(self) -> str:
133
+ return self._path.split('/')[-1]
134
+
122
135
  @property
123
136
  def start(self) -> int:
137
+ if self._start is None:
138
+ assert self._start_func is not None, 'No start function or start value'
139
+ self._start = self._start_func()
140
+
124
141
  return self._start
125
142
 
126
143
  @property
@@ -131,4 +148,4 @@ class LazyLoadedFile:
131
148
  return io.BytesIO(self.get_data())
132
149
 
133
150
  def get_data(self) -> bytes:
134
- return self._buffer.get_data(start=self._start, length=self._length)
151
+ return self._buffer.get_data(start=self.start, length=self._length)
biolib/cli/__init__.py CHANGED
@@ -1,16 +1,18 @@
1
1
  import logging
2
+ import sys
2
3
 
3
4
  import click
4
5
 
5
6
  from biolib import utils
6
7
  from biolib.biolib_logging import logger, logger_no_user_data
7
- from biolib.cli import init, lfs, push, run, start, runtime, download_container
8
+ from biolib.cli import auth, data_record, download_container, init, lfs, push, run, runtime, start
8
9
 
9
10
 
10
11
  @click.version_option(version=utils.BIOLIB_PACKAGE_VERSION, prog_name='pybiolib')
11
12
  @click.group(context_settings=dict(help_option_names=['-h', '--help']))
12
13
  def cli() -> None:
13
- logger_no_user_data.debug(f'pybiolib, version {utils.BIOLIB_PACKAGE_VERSION}')
14
+ logger_no_user_data.debug(f'pybiolib {utils.BIOLIB_PACKAGE_VERSION}')
15
+ logger_no_user_data.debug(f'Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}')
14
16
  utils.STREAM_STDOUT = True
15
17
 
16
18
  # set more restrictive default log level for CLI
@@ -18,13 +20,17 @@ def cli() -> None:
18
20
  logger_no_user_data.configure(default_log_level=logging.WARNING)
19
21
 
20
22
 
23
+ cli.add_command(auth.login)
24
+ cli.add_command(auth.logout)
25
+ cli.add_command(auth.whoami)
26
+ cli.add_command(download_container.download_container)
21
27
  cli.add_command(init.init)
22
28
  cli.add_command(lfs.lfs)
23
29
  cli.add_command(push.push)
24
30
  cli.add_command(run.run)
25
31
  cli.add_command(runtime.runtime)
26
32
  cli.add_command(start.start)
27
- cli.add_command(download_container.download_container)
33
+ cli.add_command(data_record.data_record)
28
34
 
29
35
  # allow this script to be called without poetry in dev e.g. by an IDE debugger
30
36
  if utils.IS_DEV and __name__ == '__main__':
biolib/cli/auth.py ADDED
@@ -0,0 +1,58 @@
1
+ import logging
2
+ import sys
3
+
4
+ import click
5
+
6
+ from biolib import api, biolib_errors
7
+ from biolib.biolib_api_client.api_client import BiolibApiClient
8
+ from biolib.biolib_logging import logger, logger_no_user_data
9
+ from biolib.user import sign_in, sign_out
10
+
11
+
12
+ @click.command(help='Login your to BioLib account with web browser')
13
+ @click.option(
14
+ '-w',
15
+ is_flag=True,
16
+ default=False,
17
+ required=False,
18
+ type=bool,
19
+ help='Automatically open the login page in the default web browser',
20
+ )
21
+ def login(w: bool) -> None: # pylint: disable=invalid-name
22
+ logger.configure(default_log_level=logging.INFO)
23
+ logger_no_user_data.configure(default_log_level=logging.INFO)
24
+ sign_in(open_in_default_browser=w)
25
+
26
+
27
+ @click.command(help='Logout of your BioLib account')
28
+ def logout() -> None:
29
+ logger.configure(default_log_level=logging.INFO)
30
+ logger_no_user_data.configure(default_log_level=logging.INFO)
31
+ sign_out()
32
+
33
+
34
+ @click.command(help='Prints out the full name of the user logged in')
35
+ def whoami() -> None:
36
+ client = BiolibApiClient.get()
37
+ if client.is_signed_in:
38
+ user_uuid = None
39
+ if client.access_token is None:
40
+ print('Unable to fetch user credentials. Please try logging out and logging in again.')
41
+ exit(1)
42
+ try:
43
+ user_uuid = client.decode_jwt_without_checking_signature(jwt=client.access_token)['payload']['public_id']
44
+ except biolib_errors.BioLibError as error:
45
+ print(
46
+ f'Unable to reference user public_id in access token:\n {error.message}',
47
+ file=sys.stderr,
48
+ )
49
+ exit(1)
50
+ response = api.client.get(path=f'/user/{user_uuid}/')
51
+ user_dict = response.json()
52
+ email = user_dict['email']
53
+ intrinsic_account = [account for account in user_dict['accounts'] if account['role'] == 'intrinsic'][0]
54
+ display_name = intrinsic_account['display_name']
55
+ print(f'Name: {display_name}\nEmail: {email}')
56
+ else:
57
+ print('Not logged in', file=sys.stderr)
58
+ exit(1)
@@ -0,0 +1,43 @@
1
+ import logging
2
+ import os
3
+
4
+ import click
5
+
6
+ from biolib._internal.data_record import DataRecord
7
+ from biolib.biolib_logging import logger, logger_no_user_data
8
+ from biolib.typing_utils import Optional
9
+
10
+
11
+ @click.group(help='Data Records')
12
+ def data_record() -> None:
13
+ logger.configure(default_log_level=logging.INFO)
14
+ logger_no_user_data.configure(default_log_level=logging.INFO)
15
+
16
+
17
+ @data_record.command(help='Create a Data Record')
18
+ @click.option('--destination', type=str, required=True)
19
+ @click.option('--data-path', required=True, type=click.Path(exists=True))
20
+ @click.option('--name', type=str, required=False)
21
+ def create(destination: str, data_path: str, name: Optional[str] = None) -> None:
22
+ DataRecord.create(destination, data_path, name)
23
+
24
+
25
+ @data_record.command(help='Download files from a Data Record')
26
+ @click.argument('uri', required=True)
27
+ @click.option('--file', required=False, type=str)
28
+ @click.option('--path-filter', required=False, type=str, hide_input=True)
29
+ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
30
+ record = DataRecord(uri=uri)
31
+ if file is not None:
32
+ try:
33
+ file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file][0]
34
+ except IndexError:
35
+ raise Exception('File not found in data record') from None
36
+
37
+ assert not os.path.exists(file_obj.name), 'File already exists in current directory'
38
+ with open(file_obj.name, 'wb') as file_handle:
39
+ file_handle.write(file_obj.get_data())
40
+
41
+ else:
42
+ assert not os.path.exists(record.name), f'Directory with name {record.name} already exists in current directory'
43
+ record.save_files(output_dir=record.name, path_filter=path_filter)
@@ -1,10 +1,12 @@
1
1
  import logging
2
+
2
3
  import click
4
+
3
5
  from biolib.biolib_download_container import download_container_from_uri
4
6
  from biolib.biolib_logging import logger, logger_no_user_data
5
7
 
6
8
 
7
- @click.command(help='Push an application to BioLib', name='download-container')
9
+ @click.command(help='Pull an application from BioLib', name='download-container', hidden=True)
8
10
  @click.argument('uri')
9
11
  def download_container(uri: str) -> None:
10
12
  logger.configure(default_log_level=logging.INFO)
biolib/cli/init.py CHANGED
@@ -2,6 +2,7 @@ import os
2
2
  import sys
3
3
 
4
4
  import click
5
+
5
6
  from biolib import templates
6
7
 
7
8
 
biolib/cli/lfs.py CHANGED
@@ -1,12 +1,15 @@
1
+ import json
1
2
  import logging
3
+ import os
2
4
  import sys
5
+ from typing import Dict, List
3
6
 
4
7
  import click
5
8
 
6
- import biolib.lfs
7
9
  from biolib import biolib_errors
8
- from biolib.biolib_logging import logger_no_user_data, logger
9
- from biolib.lfs import push_large_file_system, create_large_file_system, describe_large_file_system, prune_lfs_cache
10
+ from biolib._internal.data_record import DataRecord
11
+ from biolib.biolib_logging import logger, logger_no_user_data
12
+ from biolib.lfs import create_large_file_system, prune_lfs_cache, push_large_file_system
10
13
  from biolib.typing_utils import Optional
11
14
 
12
15
 
@@ -44,9 +47,16 @@ def download_file(uri: str, file_path: str) -> None:
44
47
  logger.configure(default_log_level=logging.INFO)
45
48
  logger_no_user_data.configure(default_log_level=logging.INFO)
46
49
  try:
47
- data = biolib.lfs.get_file_data_from_large_file_system(lfs_uri=uri, file_path=file_path)
48
- with open(file_path, mode='wb') as file:
49
- file.write(data)
50
+ record = DataRecord(uri=uri)
51
+ try:
52
+ file_obj = [file_obj for file_obj in record.list_files() if file_obj.path == file_path][0]
53
+ except IndexError:
54
+ raise Exception('File not found in data record') from None
55
+
56
+ assert not os.path.exists(file_obj.name), 'File already exists in current directory'
57
+ with open(file_obj.name, 'wb') as file_handle:
58
+ file_handle.write(file_obj.get_data())
59
+
50
60
  except biolib_errors.BioLibError as error:
51
61
  print(f'An error occurred:\n{error.message}', file=sys.stderr)
52
62
  exit(1)
@@ -54,9 +64,29 @@ def download_file(uri: str, file_path: str) -> None:
54
64
 
55
65
  @lfs.command(help='Describe a Large File System')
56
66
  @click.argument('uri', required=True)
57
- @click.option('--json', is_flag=True, default=False, required=False, help='Format output as JSON')
58
- def describe(uri: str, json: bool) -> None:
59
- describe_large_file_system(lfs_uri=uri, output_as_json=json)
67
+ @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
68
+ def describe(uri: str, output_as_json: bool) -> None:
69
+ data_record = DataRecord(uri)
70
+ files_info: List[Dict] = []
71
+ total_size_in_bytes = 0
72
+ for file in data_record.list_files():
73
+ files_info.append({'path': file.path, 'size_bytes': file.length})
74
+ total_size_in_bytes += file.length
75
+
76
+ if output_as_json:
77
+ print(
78
+ json.dumps(
79
+ obj={'uri': data_record.uri, 'size_bytes': total_size_in_bytes, 'files': files_info},
80
+ indent=4,
81
+ )
82
+ )
83
+ else:
84
+ print(f'Large File System {data_record.uri}\ntotal {total_size_in_bytes} bytes\n')
85
+ print('size bytes path')
86
+ for file_info in files_info:
87
+ size_string = str(file_info['size_bytes'])
88
+ leading_space_string = ' ' * (10 - len(size_string))
89
+ print(f"{leading_space_string}{size_string} {file_info['path']}")
60
90
 
61
91
 
62
92
  @lfs.command(help='Prune LFS cache', hidden=True)
biolib/cli/push.py CHANGED
@@ -3,8 +3,8 @@ from typing import Optional
3
3
 
4
4
  import click
5
5
 
6
- from biolib.biolib_logging import logger, logger_no_user_data
7
6
  from biolib._internal.push_application import push_application
7
+ from biolib.biolib_logging import logger, logger_no_user_data
8
8
 
9
9
 
10
10
  @click.command(help='Push an application to BioLib')