pybiolib 1.1.2193__py3-none-any.whl → 1.2.7.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -83,6 +83,8 @@ def verify_schema(specification: SqliteV1DatabaseSchema, actual_schema: SqliteV1
83
83
 
84
84
  def get_data_record_state_from_uri(uri) -> 'DataRecordVersionInfo':
85
85
  app_response: AppGetResponse = api_client.get(path='/app/', params={'uri': uri}).json()
86
+ if app_response['app']['type'] != 'data-record':
87
+ raise Exception(f'Resource "{uri}" is not a Data Record')
86
88
  return DataRecordVersionInfo(
87
89
  resource_uri=app_response['app_version']['app_uri'],
88
90
  resource_uuid=app_response['app']['public_id'],
@@ -125,12 +125,14 @@ class HttpClient:
125
125
 
126
126
  except urllib.error.URLError as error:
127
127
  if isinstance(error.reason, socket.timeout):
128
- logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
128
+ if retry_count > 0:
129
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
129
130
  last_error = error
130
131
  else:
131
132
  raise error
132
133
  except socket.timeout as error:
133
- logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
134
+ if retry_count > 0:
135
+ logger_no_user_data.warning(f'HTTP {method} request failed with read timeout for "{url}"')
134
136
  last_error = error
135
137
 
136
138
  raise last_error or Exception(f'HTTP {method} request failed after {retries} retries for "{url}"')
@@ -211,6 +211,11 @@ def push_application(
211
211
  try:
212
212
  logger.info(f'Trying to push image {docker_image_name} defined on module {module_name}.')
213
213
  image = docker_client.images.get(docker_image_name)
214
+ architecture = image.attrs.get('Architecture')
215
+ if architecture != 'amd64':
216
+ print(f"Error: '{docker_image_name}' is compiled for {architecture}, expected x86 (amd64).")
217
+ print('If you are on an ARM processor, try passing --platform linux/amd64 to docker build.')
218
+ exit(1)
214
219
  absolute_repo_uri = f'{utils.BIOLIB_SITE_HOSTNAME}/{repo}'
215
220
  image.tag(absolute_repo_uri, tag)
216
221
 
@@ -7,6 +7,7 @@ class RuntimeJobDataDict(TypedDict):
7
7
  job_uuid: str
8
8
  job_auth_token: str
9
9
  app_uri: str
10
+ is_environment_biolib_cloud: bool
10
11
 
11
12
 
12
13
  class BioLibRuntimeError(Exception):
@@ -0,0 +1,264 @@
1
+ import glob
2
+ import os
3
+ import re
4
+ import shutil
5
+ import subprocess
6
+ import tempfile
7
+
8
+ import biolib
9
+ from biolib.utils import SeqUtil
10
+
11
+
12
+ def natsorted(lst):
13
+ """Sort the list using the natural sort key."""
14
+
15
+ def _natural_sort_key(s):
16
+ """A key function for natural sorting."""
17
+ return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
18
+
19
+ return sorted(lst, key=_natural_sort_key)
20
+
21
+
22
+ def fasta_above_threshold(fasta_file, work_threshold, work_per_residue=1, verbose=False):
23
+ """True if total FASYA residue work above max_work"""
24
+
25
+ records = SeqUtil.parse_fasta(fasta_file)
26
+
27
+ # Calculate work units
28
+ total_work_units = 0
29
+ for i, record in enumerate(records):
30
+ sequence_work_units = len(record.sequence) * work_per_residue
31
+ total_work_units += sequence_work_units
32
+
33
+ if total_work_units >= work_threshold:
34
+ if verbose:
35
+ print(f'FASTA above threshold (stopped at {total_work_units}) >= {work_threshold}')
36
+ print(f'From from {i+1}/{len(records)} sequences in {fasta_file}')
37
+ return True
38
+
39
+ if verbose:
40
+ print(f'FASTA below threshold ({total_work_units}) < {work_threshold}')
41
+ print(f'From {len(records)} sequences in {fasta_file}')
42
+
43
+ return False
44
+
45
+
46
+ def run_locally(command_list, args):
47
+ """Run script locally (no multi-node processing)"""
48
+
49
+ # Prepare command
50
+ new_args = vars(args)
51
+
52
+ # Delete multinode-specific input arguments
53
+ for k in list(new_args.keys()):
54
+ if str(k).startswith('multinode'):
55
+ del new_args[k]
56
+
57
+ # Convert to list format
58
+ new_args_list = _args_dict_to_args_list(new_args)
59
+
60
+ # Prepare command, e.g. ["python3", "predict.py"] + new_args_list
61
+ command = command_list + new_args_list
62
+
63
+ if args.verbose >= 1:
64
+ print(f'Running {command}')
65
+
66
+ # Run command
67
+ result = subprocess.run(command, capture_output=True, text=True, check=False)
68
+ if result.returncode == 0:
69
+ print(f'{result.stdout}')
70
+ else:
71
+ print(f'Error: {result.stderr}')
72
+
73
+
74
+ def fasta_batch_records(fasta_file, work_per_batch_min, work_per_residue=1, verbose=False):
75
+ """Converts FASTA records to batches of records, based on thresholds"""
76
+
77
+ def log_batches(batches):
78
+ for i, batch in enumerate(batches):
79
+ batch_dict = {
80
+ 'records': len(batch),
81
+ 'residues': sum(len(record.sequence) for record in batch),
82
+ }
83
+
84
+ n_seqs, n_res = batch_dict['records'], batch_dict['residues']
85
+ print(f'Batch {i+1}: {n_res} residues from {n_seqs} sequences')
86
+
87
+ records = SeqUtil.parse_fasta(fasta_file)
88
+
89
+ batches = []
90
+ batch = []
91
+ current_work_units = 0
92
+ total_work_units = 0
93
+ for record in records:
94
+ # Add to batch
95
+ batch.append(record)
96
+
97
+ # Calculate work units
98
+ seq = record.sequence
99
+ sequence_work_units = len(seq) * work_per_residue
100
+
101
+ # Increase counters
102
+ current_work_units += sequence_work_units
103
+ total_work_units += sequence_work_units
104
+
105
+ # If above limit, start a new batch
106
+ if current_work_units >= work_per_batch_min:
107
+ batches.append(batch)
108
+ batch = []
109
+ current_work_units = 0
110
+
111
+ # Append last batch if present
112
+ if batch:
113
+ batches.append(batch)
114
+
115
+ if verbose:
116
+ log_batches(batches)
117
+
118
+ return batches
119
+
120
+
121
+ def fasta_send_batches_biolib(app_url, batches, args, args_fasta='fasta', verbose=1):
122
+ """
123
+ Send jobs through pybiolib interface
124
+ """
125
+
126
+ if args.verbose >= 1:
127
+ print(f'Sending {len(batches)} batches to Biolib')
128
+
129
+ # Login to biolib, prepare app
130
+ # current_app = biolib.load(Runtime.get_app_uri())
131
+ biolib.login()
132
+ current_app = biolib.load(app_url) # Nb: uses "_" not "-"
133
+
134
+ # Compute results
135
+ job_list = []
136
+ for i, batch_records in enumerate(batches): # MH
137
+ # Write FASTA, send to server
138
+ with tempfile.TemporaryDirectory() as tempdir:
139
+ # New arguments
140
+ new_args = vars(args)
141
+
142
+ # Write batched FASTA to send
143
+ fasta_path = f'{tempdir}/input.fasta'
144
+ SeqUtil.write_records_to_fasta(fasta_path, batch_records)
145
+ new_args[args_fasta] = fasta_path
146
+ new_args['multinode_only_local'] = True
147
+
148
+ # Convert to list
149
+ new_args_list = _args_dict_to_args_list(new_args)
150
+
151
+ # Send job
152
+ job = current_app.cli(args=new_args_list, blocking=False)
153
+ job_list.append(job)
154
+
155
+ # Job stats
156
+ if args.verbose:
157
+ batch_dict = _get_batch_stats(batch_records)
158
+ n_seqs, n_res = batch_dict['records'], batch_dict['residues']
159
+ print(f'Sending job {i+1}: {n_res} residues from {n_seqs} sequences -> arg_list = {new_args_list}')
160
+
161
+ # Stream job output at a time
162
+ print('Streaming job outputs ...')
163
+ for i, job in enumerate(job_list):
164
+ job.stream_logs()
165
+
166
+ # Check if job succeeded
167
+ assert job.get_exit_code() == 0, f'Job failed with exit code {job.get_exit_code()}'
168
+
169
+ # Write to disk
170
+ output_dir = f'job_output/job_{i+1}'
171
+ job.save_files(output_dir=output_dir)
172
+
173
+ if verbose:
174
+ print(f'Saving to {output_dir}')
175
+
176
+
177
+ def merge_folder(folder_name, job_out_dir='job_output', out_dir='output', verbose=1):
178
+ """Helper function for merging folders"""
179
+
180
+ os.makedirs(out_dir, exist_ok=True)
181
+
182
+ job_dirs = glob.glob(f'{job_out_dir}/job_*')
183
+ job_dirs = natsorted(job_dirs)
184
+
185
+ # Move first file, prepare to merge
186
+ first_folder = f'{job_dirs[0]}/{folder_name}'
187
+ merged_folder = f'{out_dir}/{folder_name}'
188
+ shutil.move(first_folder, merged_folder)
189
+
190
+ if verbose:
191
+ print(f'Merging {folder_name} from {len(job_dirs)} directories to {merged_folder}')
192
+
193
+ # If more than one folder, merge to first
194
+ if len(job_dirs) >= 2:
195
+ # Find each job output file
196
+ for job_dir in job_dirs[1:]:
197
+ # Move over extra files
198
+ extra_folder = f'{job_dir}/{folder_name}'
199
+ extra_files = os.listdir(extra_folder)
200
+ for file_name in extra_files:
201
+ file_path = f'{extra_folder}/{file_name}'
202
+ shutil.move(file_path, merged_folder)
203
+
204
+
205
+ def merge_file(
206
+ file_name,
207
+ header_lines_int=1,
208
+ job_out_dir='job_output',
209
+ out_dir='output',
210
+ verbose=1,
211
+ ):
212
+ """Helper function for merging files with headers"""
213
+
214
+ os.makedirs(out_dir, exist_ok=True)
215
+
216
+ job_dirs = glob.glob(f'{job_out_dir}/job_*')
217
+ job_dirs = natsorted(job_dirs)
218
+
219
+ # Move first file, prepare to merge
220
+ first_file = f'{job_dirs[0]}/{file_name}'
221
+ merged_file = f'{out_dir}/{file_name}'
222
+ shutil.move(first_file, merged_file)
223
+
224
+ if verbose:
225
+ print(f'Merging {file_name} from {len(job_dirs)} directories to {merged_file}')
226
+
227
+ # If more than one file, append to first
228
+ if len(job_dirs) >= 2:
229
+ # Open first file
230
+ with open(merged_file, 'a') as merged_file_handle:
231
+ # Find each job output file
232
+ for job_dir in job_dirs[1:]:
233
+ # Open extra file
234
+ extra_file = f'{job_dir}/{file_name}'
235
+ with open(extra_file) as extra_file_handle:
236
+ # Skip first n header lines
237
+ for _ in range(header_lines_int):
238
+ next(extra_file_handle)
239
+
240
+ # Append content to first file
241
+ contents = extra_file_handle.read()
242
+ merged_file_handle.write(contents)
243
+
244
+
245
+ def _get_batch_stats(batch):
246
+ stats_dict = {
247
+ 'records': len(batch),
248
+ 'residues': sum(len(R.sequence) for R in batch),
249
+ }
250
+
251
+ return stats_dict
252
+
253
+
254
+ def _args_dict_to_args_list(new_args):
255
+ """Converts args dict to list of arguments for Biolib"""
256
+
257
+ nested_list = [[f'--{key}', f'{value}'] for key, value in new_args.items()]
258
+
259
+ arg_list = []
260
+ for lst in nested_list:
261
+ for item in lst:
262
+ arg_list.append(item)
263
+
264
+ return arg_list
@@ -14,6 +14,10 @@ class Runtime:
14
14
  def check_is_environment_biolib_app() -> bool:
15
15
  return bool(Runtime._try_to_get_job_data())
16
16
 
17
+ @staticmethod
18
+ def check_is_environment_biolib_cloud() -> bool:
19
+ return Runtime._get_job_data().get('is_environment_biolib_cloud', False)
20
+
17
21
  @staticmethod
18
22
  def get_job_id() -> str:
19
23
  return Runtime._get_job_data()['job_uuid']
@@ -32,6 +32,7 @@ class App(TypedDict):
32
32
  public_id: str
33
33
  state: str
34
34
  resource_uri: str
35
+ type: str
35
36
 
36
37
 
37
38
  class AppGetResponse(TypedDict):
biolib/cli/data_record.py CHANGED
@@ -6,6 +6,7 @@ from typing import Dict, List
6
6
  import click
7
7
 
8
8
  from biolib._data_record.data_record import DataRecord
9
+ from biolib.biolib_api_client import BiolibApiClient
9
10
  from biolib.biolib_logging import logger, logger_no_user_data
10
11
  from biolib.typing_utils import Optional
11
12
 
@@ -57,6 +58,7 @@ def download(uri: str, file: Optional[str], path_filter: Optional[str]) -> None:
57
58
  @click.argument('uri', required=True)
58
59
  @click.option('--json', 'output_as_json', is_flag=True, default=False, required=False, help='Format output as JSON')
59
60
  def describe(uri: str, output_as_json: bool) -> None:
61
+ BiolibApiClient.assert_is_signed_in(authenticated_action_description='get Data Record description')
60
62
  record = DataRecord.get_by_uri(uri)
61
63
  files_info: List[Dict] = []
62
64
  total_size_in_bytes = 0
@@ -286,6 +286,7 @@ class DockerExecutor:
286
286
  job_uuid=self._options['job']['public_id'],
287
287
  job_auth_token=self._options['job']['auth_token'],
288
288
  app_uri=self._options['job']['app_uri'],
289
+ is_environment_biolib_cloud=bool(utils.IS_RUNNING_IN_CLOUD),
289
290
  )
290
291
  secrets: Dict[str, str] = dict(
291
292
  **module.get('secrets', {}),
@@ -47,8 +47,21 @@ class JobStorage:
47
47
  module_output_path = os.path.join(job_temporary_dir, JobStorage.module_output_file_name)
48
48
  module_output_size = os.path.getsize(module_output_path)
49
49
 
50
+ # Calculate chunk size based on max chunk count of 10_000, using 9_000 to be on the safe side
51
+ max_chunk_count = 9_000
52
+ min_chunk_size_bytes = 50_000_000
53
+ chunk_size_in_bytes = max(min_chunk_size_bytes, module_output_size // max_chunk_count)
54
+
55
+ logger_no_user_data.debug(
56
+ f'Job "{job_uuid}" uploading result of size {module_output_size} bytes '
57
+ f'with chunk size of {chunk_size_in_bytes} bytes...'
58
+ )
59
+
50
60
  with open(module_output_path, mode='rb') as module_output_file:
51
- module_output_iterator = get_chunk_iterator_from_file_object(module_output_file)
61
+ module_output_iterator = get_chunk_iterator_from_file_object(
62
+ file_object=module_output_file,
63
+ chunk_size_in_bytes=chunk_size_in_bytes,
64
+ )
52
65
  multipart_uploader = JobStorage._get_module_output_uploader(job_uuid)
53
66
  multipart_uploader.upload(
54
67
  payload_iterator=module_output_iterator,
@@ -1,3 +1,4 @@
1
+ import base64
1
2
  import io
2
3
  import subprocess
3
4
  import tarfile
@@ -160,6 +161,9 @@ class RemoteHostProxy:
160
161
  access_token = BiolibApiClient.get().access_token
161
162
  bearer_token = f'Bearer {access_token}' if access_token else ''
162
163
 
164
+ biolib_index_basic_auth = f'compute_node|admin:{compute_node_auth_token},{self._job_uuid}'
165
+ biolib_index_basic_auth_base64 = base64.b64encode(biolib_index_basic_auth.encode('utf-8')).decode('utf-8')
166
+
163
167
  nginx_config = f"""
164
168
  events {{
165
169
  worker_connections 1024;
@@ -270,7 +274,7 @@ http {{
270
274
  }}
271
275
 
272
276
  location /api/lfs/ {{
273
- proxy_pass https://$upstream_hostname/api/lfs/;
277
+ proxy_pass https://$upstream_hostname$request_uri;
274
278
  proxy_set_header authorization "";
275
279
  proxy_set_header compute-node-auth-token "{compute_node_auth_token}";
276
280
  proxy_set_header job-uuid "{self._job_uuid}";
@@ -279,7 +283,7 @@ http {{
279
283
  }}
280
284
 
281
285
  location /api/app/ {{
282
- proxy_pass https://$upstream_hostname/api/app/;
286
+ proxy_pass https://$upstream_hostname$request_uri;
283
287
  proxy_set_header authorization "";
284
288
  proxy_set_header compute-node-auth-token "{compute_node_auth_token}";
285
289
  proxy_set_header job-uuid "{self._job_uuid}";
@@ -288,33 +292,40 @@ http {{
288
292
  }}
289
293
 
290
294
  location /api/ {{
291
- proxy_pass https://$upstream_hostname/api/;
295
+ proxy_pass https://$upstream_hostname$request_uri;
292
296
  proxy_set_header authorization "";
293
297
  proxy_set_header cookie "";
294
298
  proxy_ssl_server_name on;
295
299
  }}
296
300
 
297
301
  location /proxy/storage/job-storage/ {{
298
- proxy_pass https://$upstream_hostname/proxy/storage/job-storage/;
302
+ proxy_pass https://$upstream_hostname$request_uri;
299
303
  proxy_set_header authorization "";
300
304
  proxy_set_header cookie "";
301
305
  proxy_ssl_server_name on;
302
306
  }}
303
307
 
304
308
  location /proxy/storage/lfs/versions/ {{
305
- proxy_pass https://$upstream_hostname/proxy/storage/lfs/versions/;
309
+ proxy_pass https://$upstream_hostname$request_uri;
306
310
  proxy_set_header authorization "";
307
311
  proxy_set_header cookie "";
308
312
  proxy_ssl_server_name on;
309
313
  }}
310
314
 
311
315
  location /proxy/cloud/ {{
312
- proxy_pass https://$upstream_hostname/proxy/cloud/;
316
+ proxy_pass https://$upstream_hostname$request_uri;
313
317
  proxy_set_header authorization "";
314
318
  proxy_set_header cookie "";
315
319
  proxy_ssl_server_name on;
316
320
  }}
317
321
 
322
+ location /proxy/index/ {{
323
+ proxy_pass https://$upstream_hostname$request_uri;
324
+ proxy_set_header authorization "Basic {biolib_index_basic_auth_base64}";
325
+ proxy_set_header cookie "";
326
+ proxy_ssl_server_name on;
327
+ }}
328
+
318
329
  location / {{
319
330
  return 404 "Not found";
320
331
  }}
@@ -2,23 +2,23 @@ import base64
2
2
  import os
3
3
  import random
4
4
  import shutil
5
+ import socket
5
6
  import sys
6
- import time
7
7
  import threading
8
- import socket
8
+ import time
9
9
  from queue import Queue
10
10
 
11
- from biolib import utils
11
+ from biolib import api, utils
12
+ from biolib.biolib_binary_format import ModuleOutputV2, SystemException, SystemStatusUpdate
12
13
  from biolib.biolib_binary_format.utils import LocalFileIndexableBuffer
14
+ from biolib.biolib_logging import logger, logger_no_user_data
13
15
  from biolib.compute_node.cloud_utils import CloudUtils
14
16
  from biolib.compute_node.job_worker import JobWorkerProcess
15
17
  from biolib.compute_node.job_worker.job_storage import JobStorage
16
18
  from biolib.compute_node.socker_listener_thread import SocketListenerThread
17
19
  from biolib.compute_node.socket_sender_thread import SocketSenderThread
20
+ from biolib.compute_node.utils import SystemExceptionCodes, WorkerThreadException, get_package_type
18
21
  from biolib.compute_node.webserver import webserver_utils
19
- from biolib.biolib_binary_format import SystemStatusUpdate, SystemException, ModuleOutputV2
20
- from biolib.compute_node.utils import get_package_type, WorkerThreadException, SystemExceptionCodes
21
- from biolib.biolib_logging import logger, logger_no_user_data
22
22
 
23
23
  SOCKET_HOST = '127.0.0.1'
24
24
 
@@ -37,7 +37,7 @@ class WorkerThread(threading.Thread):
37
37
  self._sender_thread = None
38
38
  self._start_and_connect_to_compute_process()
39
39
 
40
- logger.debug(f"WorkerThread connected to port {self._socket_port}")
40
+ logger.debug(f'WorkerThread connected to port {self._socket_port}')
41
41
 
42
42
  except Exception as exception:
43
43
  logger_no_user_data.error(exception)
@@ -79,20 +79,16 @@ class WorkerThread(threading.Thread):
79
79
  if progress == 94:
80
80
  # Get Job exit code
81
81
  try:
82
- module_output_path = os.path.join(self._job_temporary_dir,
83
- JobStorage.module_output_file_name)
84
- module_output = ModuleOutputV2(
85
- buffer=LocalFileIndexableBuffer(
86
- filename=module_output_path
87
- )
82
+ module_output_path = os.path.join(
83
+ self._job_temporary_dir,
84
+ JobStorage.module_output_file_name,
88
85
  )
86
+ module_output = ModuleOutputV2(buffer=LocalFileIndexableBuffer(filename=module_output_path))
89
87
  self.compute_state['exit_code'] = module_output.get_exit_code()
90
88
  logger_no_user_data.debug(f"Got exit code: {self.compute_state['exit_code']}")
91
89
 
92
90
  except Exception as error: # pylint: disable=broad-except
93
- logger_no_user_data.error(
94
- f'Could not get exit_code from module output due to: {error}'
95
- )
91
+ logger_no_user_data.error(f'Could not get exit_code from module output due to: {error}')
96
92
 
97
93
  if utils.IS_RUNNING_IN_CLOUD:
98
94
  JobStorage.upload_module_output(
@@ -107,7 +103,7 @@ class WorkerThread(threading.Thread):
107
103
  elif package_type == 'SystemException':
108
104
  error_code = SystemException(package).deserialize()
109
105
  self.compute_state['status']['error_code'] = error_code
110
- logger.debug("Hit error. Terminating Worker Thread and Compute Process")
106
+ logger.debug('Hit error. Terminating Worker Thread and Compute Process')
111
107
  self.compute_state['progress'] = 95
112
108
  self.terminate()
113
109
 
@@ -153,10 +149,10 @@ class WorkerThread(threading.Thread):
153
149
 
154
150
  # Starting a thread for accepting connections before starting the process that should to connect to the socket
155
151
  logger_no_user_data.debug('Starting connection thread')
156
- self._connection_thread = threading.Thread(target=self._accept_new_socket_connection, args=[
157
- received_messages_queue,
158
- messages_to_send_queue
159
- ])
152
+ self._connection_thread = threading.Thread(
153
+ target=self._accept_new_socket_connection,
154
+ args=[received_messages_queue, messages_to_send_queue],
155
+ )
160
156
  self._connection_thread.start()
161
157
  logger_no_user_data.debug('Started connection thread')
162
158
  logger_no_user_data.debug('Starting compute process')
@@ -177,6 +173,16 @@ class WorkerThread(threading.Thread):
177
173
  self._sender_thread.start()
178
174
 
179
175
  def terminate(self) -> None:
176
+ cloud_job_uuid = self.compute_state['cloud_job_id']
177
+ exit_code = self.compute_state.get('exit_code')
178
+ system_exception_code = self.compute_state['status'].get('error_code')
179
+ if utils.IS_RUNNING_IN_CLOUD:
180
+ CloudUtils.finish_cloud_job(
181
+ cloud_job_id=cloud_job_uuid,
182
+ system_exception_code=system_exception_code,
183
+ exit_code=exit_code,
184
+ )
185
+
180
186
  deregistered_due_to_error = False
181
187
  if self._job_worker_process:
182
188
  logger_no_user_data.debug(
@@ -184,7 +190,8 @@ class WorkerThread(threading.Thread):
184
190
  )
185
191
  self._job_worker_process.terminate()
186
192
 
187
- for _ in range(10):
193
+ clean_up_timeout_in_seconds = 600
194
+ for _ in range(clean_up_timeout_in_seconds):
188
195
  if self._job_worker_process.exitcode is not None:
189
196
  logger_no_user_data.debug(
190
197
  f'Job "{self._job_uuid}" worker process exitcode {self._job_worker_process.exitcode}'
@@ -196,28 +203,18 @@ class WorkerThread(threading.Thread):
196
203
 
197
204
  if self._job_worker_process.exitcode is None:
198
205
  # TODO: Figure out if more error handling is necessary here
199
- logger_no_user_data.error(f'Job {self._job_uuid} worker process did not exit within 10 seconds')
206
+ logger_no_user_data.error(
207
+ f'Job {self._job_uuid} worker process did not exit within {clean_up_timeout_in_seconds} seconds'
208
+ )
200
209
  if utils.IS_RUNNING_IN_CLOUD:
201
210
  logger_no_user_data.error('Deregistering compute node...')
202
211
  CloudUtils.deregister(error='job_cleanup_timed_out')
203
212
  deregistered_due_to_error = True
204
213
 
205
214
  # Delete result as error occurred
206
- system_exception_code = self.compute_state['status'].get('error_code')
207
215
  if system_exception_code and os.path.exists(self._job_temporary_dir):
208
216
  shutil.rmtree(self._job_temporary_dir)
209
217
 
210
- exit_code = self.compute_state.get('exit_code')
211
-
212
- if utils.IS_RUNNING_IN_CLOUD:
213
- # Get and send compute node exception code and job exit code if present
214
- logger_no_user_data.debug(f"Sending exit code {exit_code}")
215
- CloudUtils.finish_cloud_job(
216
- cloud_job_id=self.compute_state['cloud_job_id'],
217
- system_exception_code=system_exception_code,
218
- exit_code=exit_code
219
- )
220
-
221
218
  if self._socket:
222
219
  self._socket.close()
223
220
 
@@ -225,7 +222,7 @@ class WorkerThread(threading.Thread):
225
222
  self._connection.close()
226
223
 
227
224
  if self.compute_state['progress'] == 95:
228
- seconds_to_sleep = 60 # 1 minute
225
+ seconds_to_sleep = 5
229
226
  logger_no_user_data.debug(
230
227
  f'Job "{self._job_uuid}" worker thread sleeping for {seconds_to_sleep} seconds before cleaning up'
231
228
  )
@@ -234,7 +231,7 @@ class WorkerThread(threading.Thread):
234
231
 
235
232
  compute_state_dict = webserver_utils.JOB_ID_TO_COMPUTE_STATE_DICT
236
233
  if self._job_uuid in compute_state_dict:
237
- # Delete result as user has not started download within 60 seconds
234
+ # Delete result as user has not started download
238
235
  if compute_state_dict[self._job_uuid]['progress'] == 95 and os.path.exists(self._job_temporary_dir):
239
236
  shutil.rmtree(self._job_temporary_dir)
240
237
 
@@ -245,12 +242,18 @@ class WorkerThread(threading.Thread):
245
242
  f'Job "{self._job_uuid}" could not be found, maybe it has already been cleaned up'
246
243
  )
247
244
 
248
- logger_no_user_data.debug(f'Job "{self._job_uuid}" worker thread terminated')
249
-
250
245
  if utils.IS_RUNNING_IN_CLOUD:
246
+ config = CloudUtils.get_webserver_config()
247
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" reporting CloudJob "{cloud_job_uuid}" as cleaned up...')
248
+ api.client.post(
249
+ path=f'/internal/compute-nodes/cloud-jobs/{cloud_job_uuid}/cleaned-up/',
250
+ headers={'Compute-Node-Auth-Token': config['compute_node_info']['auth_token']},
251
+ )
252
+
251
253
  if deregistered_due_to_error:
252
254
  CloudUtils.shutdown() # shutdown now
253
255
  else:
254
256
  webserver_utils.update_auto_shutdown_time()
255
257
 
258
+ logger_no_user_data.debug(f'Job "{self._job_uuid}" worker thread exiting...')
256
259
  sys.exit()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pybiolib
3
- Version: 1.1.2193
3
+ Version: 1.2.7.dev0
4
4
  Summary: BioLib Python Client
5
5
  Home-page: https://github.com/biolib
6
6
  License: MIT
@@ -4,18 +4,18 @@ biolib/__init__.py,sha256=_tThyzISH81yS9KXP_X3qEiKXmsIp5XOBcJIODfLVnc,4338
4
4
  biolib/_data_record/data_record.py,sha256=CoyYRse5VdUBhQzzPfR9BkytgOsM-IZxkfMX1kyRnPk,12589
5
5
  biolib/_internal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  biolib/_internal/data_record/__init__.py,sha256=fGdME6JGRU_2VxpJbYpGXYndjN-feUkmKY4fuMyq3cg,76
7
- biolib/_internal/data_record/data_record.py,sha256=YmaAABR57goDCE8-rKb2j0FPMSbDtRPCm_HhT3mM074,4299
7
+ biolib/_internal/data_record/data_record.py,sha256=g_-jdy5-Zem3dthwxJj2OuQqkDGTyc-iGqN1rtYYD1A,4418
8
8
  biolib/_internal/data_record/remote_storage_endpoint.py,sha256=eCptuZ4DMAPnaNCVDvpWXwXGI6Jac9U1N5dqU8Cj95Q,1732
9
9
  biolib/_internal/file_utils.py,sha256=4jT6j7bB21c0JNn5BfnyWQib_zt0CVtJ_TiOFOStRcE,2604
10
10
  biolib/_internal/fuse_mount/__init__.py,sha256=B_tM6RM2dBw-vbpoHJC4X3tOAaN1H2RDvqYJOw3xFwg,55
11
11
  biolib/_internal/fuse_mount/experiment_fuse_mount.py,sha256=08aUdEq_bvqLBft_gSLjOClKDy5sBnMts1RfJf7AP_U,7012
12
- biolib/_internal/http_client.py,sha256=DdooXei93JKGYGV4aQmzue_oFzvHkozg2UCxgk9dfDM,5081
12
+ biolib/_internal/http_client.py,sha256=Q7z7_DMI1EaVJSyevrra63piuee-NfqcAHRtQt76ZdA,5165
13
13
  biolib/_internal/lfs/__init__.py,sha256=gSWo_xg61UniYgD7yNYxeT4I9uaXBCBSi3_nmZjnPpE,35
14
14
  biolib/_internal/lfs/cache.py,sha256=pQS2np21rdJ6I3DpoOutnzPHpLOZgUIS8TMltUJk_k4,2226
15
15
  biolib/_internal/libs/__init__.py,sha256=Jdf4tNPqe_oIIf6zYml6TiqhL_02Vyqwge6IELrAFhw,98
16
16
  biolib/_internal/libs/fusepy/__init__.py,sha256=AWDzNFS-XV_5yKb0Qx7kggIhPzq1nj_BZS5y2Nso08k,41944
17
- biolib/_internal/push_application.py,sha256=8P7eXvySn7CRp5XBDkO3xjTGixS8g7-jD-_iwzM_XDI,10020
18
- biolib/_internal/runtime.py,sha256=9pZ3s3L7LGxdqOgnHh1KK3Jjyn_9MjhQmKHI-6hMT3U,448
17
+ biolib/_internal/push_application.py,sha256=b1WvlijJYOLp7f43AoAzjINKkALM9DVB8yfSetEP4Z4,10381
18
+ biolib/_internal/runtime.py,sha256=BiHl4klUHr36MCpqKaUso4idHeBZfPAahLYRQrabFqA,486
19
19
  biolib/_internal/types/__init__.py,sha256=11ZucS8jKeLGAAswXyKI7FH2KLHd6T9Sh8ZK2Ar3jlk,152
20
20
  biolib/_internal/types/app.py,sha256=Mz2QGD_jESX-K9JYnLWPo4YA__Q_1FQQTk9pvidCohU,118
21
21
  biolib/_internal/types/data_record.py,sha256=AHoIiwVqeHj0HozQxFRAyxk-d3XJgLWno4ic1z9eTrQ,865
@@ -23,7 +23,8 @@ biolib/_internal/types/experiment.py,sha256=D94iBdn2nS92lRW-TOs1a2WKXJD5ZtmzL4yp
23
23
  biolib/_internal/types/resource.py,sha256=G-vPkZoe4Um6FPxsQZtRzAlbSW5sDW4NFkbjn21I3V4,372
24
24
  biolib/_internal/types/typing.py,sha256=D4EKKEe7kDx0K6lJi-H_XLtk-8w6nu2fdqn9bvzI-Xo,288
25
25
  biolib/_internal/utils/__init__.py,sha256=p5vsIFyu-zYqBgdSMfwW9NC_jk7rXvvCbV4Bzd3As7c,630
26
- biolib/_runtime/runtime.py,sha256=daYxzIpRoW4k-HJFu2BMXeylYSlCXn3-SqdSriCFnKw,2770
26
+ biolib/_internal/utils/multinode.py,sha256=UnM08GXc8U-p0eoSleer4BIgngIsn_fgh9FxRQJkIiI,8068
27
+ biolib/_runtime/runtime.py,sha256=oVgTnDDJv9L4BUP1_sd0oAj4LLyyiPSQdhp7ixWARvw,2923
27
28
  biolib/api/__init__.py,sha256=mQ4u8FijqyLzjYMezMUUbbBGNB3iFmkNdjXnWPZ7Jlw,138
28
29
  biolib/api/client.py,sha256=FRpdH5aI187b_I_4HUNi680v4iOP65z5f2RcUo8D8MA,3559
29
30
  biolib/app/__init__.py,sha256=cdPtcfb_U-bxb9iSL4fCEq2rpD9OjkyY4W-Zw60B0LI,37
@@ -31,7 +32,7 @@ biolib/app/app.py,sha256=P2RwaDAskUHzlciuTJUroqUocRwoyOLT6YbgMyCRRDI,8484
31
32
  biolib/app/search_apps.py,sha256=K4a41f5XIWth2BWI7OffASgIsD0ko8elCax8YL2igaY,1470
32
33
  biolib/biolib_api_client/__init__.py,sha256=E5EMa19wJoblwSdQPYrxc_BtIeRsAuO0L_jQweWw-Yk,182
33
34
  biolib/biolib_api_client/api_client.py,sha256=ciNx4ybpyKG5LEf4KQdGEz13r0jTxImyQat4_HDecD0,7373
34
- biolib/biolib_api_client/app_types.py,sha256=FxSr4UqfnMhLe34p8bm02wsC3g1Jz8iaing5tRKDOQI,2442
35
+ biolib/biolib_api_client/app_types.py,sha256=1sXz9XnLRKNALMglNdTbew7AL6OkcUan0MPdj4xQLis,2456
35
36
  biolib/biolib_api_client/auth.py,sha256=kjm0ZHnH3I8so3su2sZbBxNHYp-ZUdrZ5lwQ0K36RSw,949
36
37
  biolib/biolib_api_client/biolib_app_api.py,sha256=DndlVxrNTes6DOaWyMINLGZQCRMWVvR7gwt5HVlyf5Y,4240
37
38
  biolib/biolib_api_client/biolib_job_api.py,sha256=7bKfav3-12ewXkEUoLdCmbWdebW8148kxfGJW9SsXZI,7125
@@ -57,7 +58,7 @@ biolib/biolib_errors.py,sha256=5m4lK2l39DafpoXBImEBD4EPH3ayXBX0JgtPzmGClow,689
57
58
  biolib/biolib_logging.py,sha256=J3E5H_LL5k6ZUim2C8gqN7E6lCBZMTpO4tnMpOPwG9U,2854
58
59
  biolib/cli/__init__.py,sha256=0v3c_J-U0k46c5ZWeQjLG_kTaKDJm81LBxQpDO2B_aI,1286
59
60
  biolib/cli/auth.py,sha256=rpWGmXs6Fz6CGrO9K8ibPRszOdXG78Vig_boKaVCD9A,2082
60
- biolib/cli/data_record.py,sha256=08JbZkFWKMo0PrnhhG0jQEKnNW7pPLti9cOw8s1TWfI,3344
61
+ biolib/cli/data_record.py,sha256=t8DfJK2EZ_SNZ9drDA_N5Jqy8DNwf9f5SlFrIaOvtv0,3501
61
62
  biolib/cli/download_container.py,sha256=HIZVHOPmslGE5M2Dsp9r2cCkAEJx__vcsDz5Wt5LRos,483
62
63
  biolib/cli/init.py,sha256=wQOfii_au-d30Hp7DdH-WVw-WVraKvA_zY4za1w7DE8,821
63
64
  biolib/cli/lfs.py,sha256=z2qHUwink85mv9yDgifbVKkVwuyknGhMDTfly_gLKJM,4151
@@ -74,19 +75,19 @@ biolib/compute_node/job_worker/cache_state.py,sha256=MwjSRzcJJ_4jybqvBL4xdgnDYSI
74
75
  biolib/compute_node/job_worker/cache_types.py,sha256=ajpLy8i09QeQS9dEqTn3T6NVNMY_YsHQkSD5nvIHccQ,818
75
76
  biolib/compute_node/job_worker/docker_image_cache.py,sha256=ansHIkJIq_EMW1nZNlW-RRLVVeKWTbzNICYaOHpKiRE,7460
76
77
  biolib/compute_node/job_worker/executors/__init__.py,sha256=bW6t1qi3PZTlHM4quaTLa8EI4ALTCk83cqcVJfJfJfE,145
77
- biolib/compute_node/job_worker/executors/docker_executor.py,sha256=2H7GooL0oAifPcbie0unatB4fRoHyqbsr6S91uagc_g,27952
78
+ biolib/compute_node/job_worker/executors/docker_executor.py,sha256=LcYc4x4-vkmRM1FN1SNj7xRj4qZUNVjQWtgbYMFg3JU,28029
78
79
  biolib/compute_node/job_worker/executors/docker_types.py,sha256=VhsU1DKtJjx_BbCkVmiPZPH4ROiL1ygW1Y_s1Kbpa2o,216
79
80
  biolib/compute_node/job_worker/executors/tars/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
81
  biolib/compute_node/job_worker/executors/types.py,sha256=yP5gG39hr-DLnw9bOE--VHi-1arDbIYiGuV1rlTbbHI,1466
81
82
  biolib/compute_node/job_worker/job_legacy_input_wait_timeout_thread.py,sha256=_cvEiZbOwfkv6fYmfrvdi_FVviIEYr_dSClQcOQaUWM,1198
82
83
  biolib/compute_node/job_worker/job_max_runtime_timer_thread.py,sha256=K_xgz7IhiIjpLlXRk8sqaMyLoApcidJkgu29sJX0gb8,1174
83
- biolib/compute_node/job_worker/job_storage.py,sha256=LNkklckDLbYgCHsK5FGrEK75Kw-H4f4JcTCAtuE9His,4035
84
+ biolib/compute_node/job_worker/job_storage.py,sha256=lScHI3ubcHKagSEW243tgbIWXUfbWDHDjEOPMvXxJE8,4603
84
85
  biolib/compute_node/job_worker/job_worker.py,sha256=fuWoYJo9HOqLmWl8yeCXh0mhT4ebbkrWac-BVb58khs,28842
85
86
  biolib/compute_node/job_worker/large_file_system.py,sha256=XXqRlVtYhs-Ji9zQGIk5KQPXFO_Q5jJH0nnlw4GkeMY,10461
86
87
  biolib/compute_node/job_worker/mappings.py,sha256=Z48Kg4nbcOvsT2-9o3RRikBkqflgO4XeaWxTGz-CNvI,2499
87
88
  biolib/compute_node/job_worker/utilization_reporter_thread.py,sha256=7tm5Yk9coqJ9VbEdnO86tSXI0iM0omwIyKENxdxiVXk,8575
88
89
  biolib/compute_node/job_worker/utils.py,sha256=wgxcIA8yAhUPdCwyvuuJ0JmreyWmmUoBO33vWtG60xg,1282
89
- biolib/compute_node/remote_host_proxy.py,sha256=CNWJLXXYm8DGujxEJIsg1wUKFoSgU0nhwdmjMn5gelE,14690
90
+ biolib/compute_node/remote_host_proxy.py,sha256=ibkWqvLJJHwOM7GzYqJV613QPacU5392vjnDK8BNJIU,15213
90
91
  biolib/compute_node/socker_listener_thread.py,sha256=T5_UikA3MB9bD5W_dckYLPTgixh72vKUlgbBvj9dbM0,1601
91
92
  biolib/compute_node/socket_sender_thread.py,sha256=YgamPHeUm2GjMFGx8qk-99WlZhEs-kAb3q_2O6qByig,971
92
93
  biolib/compute_node/utils.py,sha256=M7i_WTyxbFM3Lri9RWZ_8FeQNYrQIWpKGLfp2I55oeY,4677
@@ -95,7 +96,7 @@ biolib/compute_node/webserver/gunicorn_flask_application.py,sha256=jPfR_YvNBekLU
95
96
  biolib/compute_node/webserver/webserver.py,sha256=15PkRyhtdtSgFDxa0z78aPO4ciZURsFqJYi-HtUmZF8,6494
96
97
  biolib/compute_node/webserver/webserver_types.py,sha256=2t8EaFKESnves3BA_NBdnS2yAdo1qwamCFHiSt888nE,380
97
98
  biolib/compute_node/webserver/webserver_utils.py,sha256=XWvwYPbWNR3qS0FYbLLp-MDDfVk0QdaAmg3xPrT0H2s,4234
98
- biolib/compute_node/webserver/worker_thread.py,sha256=26tG73TADnOcXsAr7Iyf6smrLlCqB4x-vvmpUb8WqnA,11569
99
+ biolib/compute_node/webserver/worker_thread.py,sha256=GRRBUqXdMKvbjyLQhYlqGIbFKeU2iiEXIe5IXi9wgdg,11806
99
100
  biolib/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
101
  biolib/experiments/experiment.py,sha256=jIRixmQm3Gq9YdJ3I0-rE1vFukXqq6U4zXehFOJ1yZk,7614
101
102
  biolib/jobs/__init__.py,sha256=aIb2H2DHjQbM2Bs-dysFijhwFcL58Blp0Co0gimED3w,32
@@ -116,8 +117,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
116
117
  biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
117
118
  biolib/utils/seq_util.py,sha256=ZQFcaE37B2dtucN2zDjOmdya_X0ITc1zBFZJNQY13XA,5183
118
119
  biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
119
- pybiolib-1.1.2193.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
120
- pybiolib-1.1.2193.dist-info/METADATA,sha256=NCFsHsCnbnOhw1O2L-polZfFa2pNdtA89TfBW7f_o7E,1508
121
- pybiolib-1.1.2193.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
122
- pybiolib-1.1.2193.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
123
- pybiolib-1.1.2193.dist-info/RECORD,,
120
+ pybiolib-1.2.7.dev0.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
121
+ pybiolib-1.2.7.dev0.dist-info/METADATA,sha256=O1duMBotGWLsk_tKeihafeOizbCNQVWjCVT7SbzVTZ4,1510
122
+ pybiolib-1.2.7.dev0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
123
+ pybiolib-1.2.7.dev0.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
124
+ pybiolib-1.2.7.dev0.dist-info/RECORD,,