pybiolib 1.1.1747__py3-none-any.whl → 1.1.2193__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. biolib/__init__.py +18 -5
  2. biolib/_data_record/data_record.py +278 -0
  3. biolib/_internal/data_record/__init__.py +1 -0
  4. biolib/_internal/data_record/data_record.py +97 -0
  5. biolib/_internal/data_record/remote_storage_endpoint.py +38 -0
  6. biolib/_internal/file_utils.py +77 -0
  7. biolib/_internal/fuse_mount/__init__.py +1 -0
  8. biolib/_internal/fuse_mount/experiment_fuse_mount.py +209 -0
  9. biolib/_internal/http_client.py +42 -23
  10. biolib/_internal/lfs/__init__.py +1 -0
  11. biolib/_internal/libs/__init__.py +1 -0
  12. biolib/_internal/libs/fusepy/__init__.py +1257 -0
  13. biolib/_internal/push_application.py +22 -37
  14. biolib/_internal/runtime.py +19 -0
  15. biolib/_internal/types/__init__.py +4 -0
  16. biolib/_internal/types/app.py +9 -0
  17. biolib/_internal/types/data_record.py +40 -0
  18. biolib/_internal/types/experiment.py +10 -0
  19. biolib/_internal/types/resource.py +14 -0
  20. biolib/_internal/types/typing.py +7 -0
  21. biolib/_internal/utils/__init__.py +18 -0
  22. biolib/_runtime/runtime.py +80 -0
  23. biolib/api/__init__.py +1 -0
  24. biolib/api/client.py +39 -17
  25. biolib/app/app.py +40 -72
  26. biolib/app/search_apps.py +8 -12
  27. biolib/biolib_api_client/api_client.py +22 -10
  28. biolib/biolib_api_client/app_types.py +2 -1
  29. biolib/biolib_api_client/biolib_app_api.py +1 -1
  30. biolib/biolib_api_client/biolib_job_api.py +6 -0
  31. biolib/biolib_api_client/job_types.py +4 -4
  32. biolib/biolib_api_client/lfs_types.py +8 -2
  33. biolib/biolib_binary_format/remote_endpoints.py +12 -10
  34. biolib/biolib_binary_format/utils.py +41 -4
  35. biolib/cli/__init__.py +6 -2
  36. biolib/cli/auth.py +58 -0
  37. biolib/cli/data_record.py +80 -0
  38. biolib/cli/download_container.py +3 -1
  39. biolib/cli/init.py +1 -0
  40. biolib/cli/lfs.py +45 -11
  41. biolib/cli/push.py +1 -1
  42. biolib/cli/run.py +3 -2
  43. biolib/cli/start.py +1 -0
  44. biolib/compute_node/cloud_utils/cloud_utils.py +15 -18
  45. biolib/compute_node/job_worker/cache_state.py +1 -1
  46. biolib/compute_node/job_worker/executors/docker_executor.py +134 -114
  47. biolib/compute_node/job_worker/job_storage.py +3 -4
  48. biolib/compute_node/job_worker/job_worker.py +31 -15
  49. biolib/compute_node/remote_host_proxy.py +75 -70
  50. biolib/compute_node/webserver/webserver_types.py +0 -1
  51. biolib/experiments/experiment.py +75 -44
  52. biolib/jobs/job.py +125 -47
  53. biolib/jobs/job_result.py +46 -21
  54. biolib/jobs/types.py +1 -1
  55. biolib/runtime/__init__.py +14 -1
  56. biolib/sdk/__init__.py +29 -5
  57. biolib/typing_utils.py +2 -7
  58. biolib/user/sign_in.py +10 -14
  59. biolib/utils/__init__.py +1 -1
  60. biolib/utils/app_uri.py +11 -4
  61. biolib/utils/cache_state.py +2 -2
  62. biolib/utils/seq_util.py +38 -30
  63. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/METADATA +1 -1
  64. pybiolib-1.1.2193.dist-info/RECORD +123 -0
  65. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/WHEEL +1 -1
  66. biolib/biolib_api_client/biolib_account_api.py +0 -8
  67. biolib/biolib_api_client/biolib_large_file_system_api.py +0 -34
  68. biolib/experiments/types.py +0 -9
  69. biolib/lfs/__init__.py +0 -6
  70. biolib/lfs/utils.py +0 -237
  71. biolib/runtime/results.py +0 -20
  72. pybiolib-1.1.1747.dist-info/RECORD +0 -108
  73. /biolib/{lfs → _internal/lfs}/cache.py +0 -0
  74. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/LICENSE +0 -0
  75. {pybiolib-1.1.1747.dist-info → pybiolib-1.1.2193.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,209 @@
1
+ import errno
2
+ import os
3
+ import stat
4
+ from datetime import datetime, timezone
5
+ from time import time
6
+
7
+ from biolib._internal.libs.fusepy import FUSE, FuseOSError, Operations
8
+ from biolib.biolib_errors import BioLibError
9
+ from biolib.jobs import Job
10
+ from biolib.typing_utils import Dict, List, Optional, Tuple, TypedDict
11
+
12
+
13
+ class _AttributeDict(TypedDict):
14
+ st_atime: int
15
+ st_ctime: int
16
+ st_gid: int
17
+ st_mode: int
18
+ st_mtime: int
19
+ st_nlink: int
20
+ st_size: int
21
+ st_uid: int
22
+
23
+
24
+ _SUCCESS_CODE = 0
25
+
26
+
27
+ class ExperimentFuseMount(Operations):
28
+ def __init__(self, experiment):
29
+ self._experiment = experiment
30
+ self._job_names_map: Optional[Dict[str, Job]] = None
31
+ self._jobs_last_fetched_at: float = 0.0
32
+ self._mounted_at_epoch_seconds: int = int(time())
33
+
34
+ @staticmethod
35
+ def mount_experiment(experiment, mount_path: str) -> None:
36
+ FUSE(
37
+ operations=ExperimentFuseMount(experiment),
38
+ mountpoint=mount_path,
39
+ nothreads=True,
40
+ foreground=True,
41
+ allow_other=False,
42
+ )
43
+
44
+ def getattr(self, path: str, fh=None) -> _AttributeDict:
45
+ if path == '/':
46
+ return self._get_directory_attributes(timestamp_epoch_seconds=self._mounted_at_epoch_seconds)
47
+
48
+ job, path_in_job = self._parse_path(path)
49
+ job_finished_at_epoch_seconds: int = int(
50
+ datetime.fromisoformat(job.to_dict()['finished_at'].rstrip('Z')).replace(tzinfo=timezone.utc).timestamp()
51
+ )
52
+
53
+ if path_in_job == '/':
54
+ return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
55
+
56
+ try:
57
+ file = job.get_output_file(path_in_job)
58
+ return self._get_file_attributes(
59
+ timestamp_epoch_seconds=job_finished_at_epoch_seconds,
60
+ size_in_bytes=file.length,
61
+ )
62
+ except BioLibError:
63
+ # file not found
64
+ pass
65
+
66
+ file_paths_in_job = [file.path for file in job.list_output_files()]
67
+
68
+ for file_path_in_job in file_paths_in_job:
69
+ if file_path_in_job.startswith(path_in_job):
70
+ return self._get_directory_attributes(timestamp_epoch_seconds=job_finished_at_epoch_seconds)
71
+
72
+ raise FuseOSError(errno.ENOENT) from None # No such file or directory
73
+
74
+ def readdir(self, path: str, fh: int) -> List[str]:
75
+ directory_entries = ['.', '..']
76
+
77
+ if path == '/':
78
+ directory_entries.extend(self._get_job_names_map(refresh_jobs=True).keys())
79
+ else:
80
+ job, path_in_job = self._parse_path(path)
81
+ dir_path_in_job = '/' if path_in_job == '/' else path_in_job + '/'
82
+ depth = dir_path_in_job.count('/')
83
+ directory_entries.extend(
84
+ set(
85
+ [
86
+ file.path.split('/')[depth]
87
+ for file in job.list_output_files()
88
+ if file.path.startswith(dir_path_in_job)
89
+ ]
90
+ )
91
+ )
92
+
93
+ return directory_entries
94
+
95
+ def open(self, path: str, flags: int) -> int:
96
+ job, path_in_job = self._parse_path(path)
97
+ try:
98
+ job.get_output_file(path_in_job)
99
+ except BioLibError:
100
+ # file not found
101
+ raise FuseOSError(errno.ENOENT) from None
102
+
103
+ return 1234 # dummy file handle
104
+
105
+ def read(self, path: str, size: int, offset: int, fh: int) -> bytes:
106
+ job, path_in_job = self._parse_path(path)
107
+ try:
108
+ file = job.get_output_file(path_in_job)
109
+ except BioLibError:
110
+ raise FuseOSError(errno.ENOENT) from None # No such file or directory
111
+
112
+ return file.get_data(start=offset, length=size)
113
+
114
+ def release(self, path: str, fh: int) -> int:
115
+ return _SUCCESS_CODE
116
+
117
+ def releasedir(self, path: str, fh: int) -> int:
118
+ return _SUCCESS_CODE
119
+
120
+ def flush(self, path: str, fh: int) -> int:
121
+ return _SUCCESS_CODE
122
+
123
+ @staticmethod
124
+ def _get_directory_attributes(timestamp_epoch_seconds: int) -> _AttributeDict:
125
+ return _AttributeDict(
126
+ st_atime=timestamp_epoch_seconds,
127
+ st_ctime=timestamp_epoch_seconds,
128
+ st_gid=os.getgid(),
129
+ st_mode=stat.S_IFDIR | 0o555, # Directory that is readable and executable by owner, group, and others.
130
+ st_mtime=timestamp_epoch_seconds,
131
+ st_nlink=1,
132
+ st_size=1,
133
+ st_uid=os.getuid(),
134
+ )
135
+
136
+ @staticmethod
137
+ def _get_file_attributes(timestamp_epoch_seconds: int, size_in_bytes: int) -> _AttributeDict:
138
+ return _AttributeDict(
139
+ st_atime=timestamp_epoch_seconds,
140
+ st_ctime=timestamp_epoch_seconds,
141
+ st_gid=os.getgid(),
142
+ st_mode=stat.S_IFREG | 0o444, # Regular file with read permissions for owner, group, and others.
143
+ st_mtime=timestamp_epoch_seconds,
144
+ st_nlink=1,
145
+ st_size=size_in_bytes,
146
+ st_uid=os.getuid(),
147
+ )
148
+
149
+ def _get_job_names_map(self, refresh_jobs=False) -> Dict[str, Job]:
150
+ current_time = time()
151
+ if not self._job_names_map or (current_time - self._jobs_last_fetched_at > 1 and refresh_jobs):
152
+ self._jobs_last_fetched_at = current_time
153
+ self._job_names_map = {job.get_name(): job for job in self._experiment.get_jobs(status='completed')}
154
+
155
+ return self._job_names_map
156
+
157
+ def _parse_path(self, path: str) -> Tuple[Job, str]:
158
+ path_splitted = path.split('/')
159
+ job_name = path_splitted[1]
160
+ path_in_job = '/' + '/'.join(path_splitted[2:])
161
+ job = self._get_job_names_map().get(job_name)
162
+ if not job:
163
+ raise FuseOSError(errno.ENOENT) # No such file or directory
164
+
165
+ return job, path_in_job
166
+
167
+ # ----------------------------------- File system methods not implemented below -----------------------------------
168
+
169
+ def chmod(self, path, mode):
170
+ raise FuseOSError(errno.EACCES)
171
+
172
+ def chown(self, path, uid, gid):
173
+ raise FuseOSError(errno.EACCES)
174
+
175
+ def mknod(self, path, mode, dev):
176
+ raise FuseOSError(errno.EACCES)
177
+
178
+ def rmdir(self, path):
179
+ raise FuseOSError(errno.EACCES)
180
+
181
+ def mkdir(self, path, mode):
182
+ raise FuseOSError(errno.EACCES)
183
+
184
+ def unlink(self, path):
185
+ raise FuseOSError(errno.EACCES)
186
+
187
+ def symlink(self, target, source):
188
+ raise FuseOSError(errno.EACCES)
189
+
190
+ def rename(self, old, new):
191
+ raise FuseOSError(errno.EACCES)
192
+
193
+ def link(self, target, source):
194
+ raise FuseOSError(errno.EACCES)
195
+
196
+ def utimens(self, path, times=None):
197
+ raise FuseOSError(errno.EACCES)
198
+
199
+ def create(self, path, mode, fi=None):
200
+ raise FuseOSError(errno.EACCES)
201
+
202
+ def write(self, path, data, offset, fh):
203
+ raise FuseOSError(errno.EACCES)
204
+
205
+ def truncate(self, path, length, fh=None):
206
+ raise FuseOSError(errno.EACCES)
207
+
208
+ def fsync(self, path, datasync, fh):
209
+ raise FuseOSError(errno.EACCES)
@@ -1,25 +1,25 @@
1
1
  import json
2
2
  import platform
3
- import time
3
+ import shutil
4
4
  import socket
5
5
  import ssl
6
6
  import subprocess
7
- import urllib.request
7
+ import time
8
8
  import urllib.error
9
9
  import urllib.parse
10
+ import urllib.request
10
11
 
11
12
  from biolib.biolib_logging import logger_no_user_data
12
- from biolib.typing_utils import Dict, Optional, Union, Literal, cast
13
+ from biolib.typing_utils import Dict, Literal, Optional, Union, cast
14
+
15
+ _HttpMethod = Literal['GET', 'POST', 'PATCH', 'PUT']
13
16
 
14
17
 
15
18
  def _create_ssl_context():
16
19
  context = ssl.create_default_context()
17
20
  try:
18
21
  if platform.system() == 'Darwin':
19
- certificates = subprocess.check_output(
20
- "security find-certificate -a -p",
21
- shell=True
22
- ).decode('utf-8')
22
+ certificates = subprocess.check_output('security find-certificate -a -p', shell=True).decode('utf-8')
23
23
  context.load_verify_locations(cadata=certificates)
24
24
  except BaseException:
25
25
  pass
@@ -33,7 +33,7 @@ class HttpError(urllib.error.HTTPError):
33
33
  code=http_error.code,
34
34
  msg=http_error.msg, # type: ignore
35
35
  hdrs=http_error.hdrs, # type: ignore
36
- fp=http_error.fp
36
+ fp=http_error.fp,
37
37
  )
38
38
 
39
39
  def __str__(self):
@@ -42,15 +42,24 @@ class HttpError(urllib.error.HTTPError):
42
42
 
43
43
 
44
44
  class HttpResponse:
45
- def __init__(self, response):
45
+ def __init__(self, response, response_path) -> None:
46
46
  self.headers: Dict[str, str] = dict(response.headers)
47
47
  self.status_code: int = int(response.status)
48
- self.content: bytes = response.read()
48
+ self.response_path = response_path
49
+ if self.response_path:
50
+ with open(self.response_path, 'wb') as out_file:
51
+ shutil.copyfileobj(response, out_file)
52
+ else:
53
+ self.content: bytes = response.read()
49
54
  self.url: str = response.geturl()
50
55
 
51
56
  @property
52
57
  def text(self) -> str:
53
- return cast(str, self.content.decode('utf-8'))
58
+ if self.response_path:
59
+ with open(self.response_path, 'rb') as fp:
60
+ return cast(str, fp.read().decode('utf-8'))
61
+ else:
62
+ return cast(str, self.content.decode('utf-8'))
54
63
 
55
64
  def json(self):
56
65
  return json.loads(self.text)
@@ -61,12 +70,13 @@ class HttpClient:
61
70
 
62
71
  @staticmethod
63
72
  def request(
64
- url: str,
65
- method: Optional[Literal['GET', 'POST', 'PATCH', 'PUT']] = None,
66
- data: Optional[Union[Dict, bytes]] = None,
67
- headers: Optional[Dict[str, str]] = None,
68
- retries: int = 5,
69
- timeout_in_seconds: Optional[int] = None,
73
+ url: str,
74
+ method: Optional[_HttpMethod] = None,
75
+ data: Optional[Union[Dict, bytes]] = None,
76
+ headers: Optional[Dict[str, str]] = None,
77
+ retries: int = 5,
78
+ timeout_in_seconds: Optional[int] = None,
79
+ response_path: Optional[str] = None,
70
80
  ) -> HttpResponse:
71
81
  if not HttpClient.ssl_context:
72
82
  HttpClient.ssl_context = _create_ssl_context()
@@ -84,7 +94,7 @@ class HttpClient:
84
94
  if timeout_in_seconds is None:
85
95
  timeout_in_seconds = 60 if isinstance(data, dict) else 180 # TODO: Calculate timeout based on data size
86
96
 
87
- last_error: Optional[urllib.error.URLError] = None
97
+ last_error: Optional[Exception] = None
88
98
  for retry_count in range(retries + 1):
89
99
  if retry_count > 0:
90
100
  time.sleep(5 * retry_count)
@@ -95,23 +105,32 @@ class HttpClient:
95
105
  context=HttpClient.ssl_context,
96
106
  timeout=timeout_in_seconds,
97
107
  ) as response:
98
- return HttpResponse(response)
108
+ return HttpResponse(response, response_path)
99
109
 
100
110
  except urllib.error.HTTPError as error:
101
- if error.code == 502:
102
- logger_no_user_data.debug(f'HTTP {method} request failed with status 502 for "{url}"')
111
+ if error.code == 429:
112
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 429 for "{url}"')
113
+ last_error = error
114
+ elif error.code == 502:
115
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 502 for "{url}"')
103
116
  last_error = error
104
117
  elif error.code == 503:
105
- logger_no_user_data.debug(f'HTTP {method} request failed with status 503 for "{url}"')
118
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 503 for "{url}"')
119
+ last_error = error
120
+ elif error.code == 504:
121
+ logger_no_user_data.warning(f'HTTP {method} request failed with status 504 for "{url}"')
106
122
  last_error = error
107
123
  else:
108
124
  raise HttpError(error) from None
109
125
 
110
126
  except urllib.error.URLError as error:
111
127
  if isinstance(error.reason, socket.timeout):
112
- logger_no_user_data.debug(f'HTTP {method} request failed with read timeout for "{url}"')
128
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
113
129
  last_error = error
114
130
  else:
115
131
  raise error
132
+ except socket.timeout as error:
133
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
134
+ last_error = error
116
135
 
117
136
  raise last_error or Exception(f'HTTP {method} request failed after {retries} retries for "{url}"')
@@ -0,0 +1 @@
1
+ from .cache import prune_lfs_cache
@@ -0,0 +1 @@
1
+ # Note: this directory is purely for libraries to be directly included instead of as dependencies