pybiolib 1.1.2208__py3-none-any.whl → 1.1.2218__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biolib/compute_node/webserver/worker_thread.py +41 -38
- {pybiolib-1.1.2208.dist-info → pybiolib-1.1.2218.dist-info}/METADATA +1 -1
- {pybiolib-1.1.2208.dist-info → pybiolib-1.1.2218.dist-info}/RECORD +6 -6
- {pybiolib-1.1.2208.dist-info → pybiolib-1.1.2218.dist-info}/LICENSE +0 -0
- {pybiolib-1.1.2208.dist-info → pybiolib-1.1.2218.dist-info}/WHEEL +0 -0
- {pybiolib-1.1.2208.dist-info → pybiolib-1.1.2218.dist-info}/entry_points.txt +0 -0
@@ -2,23 +2,23 @@ import base64
|
|
2
2
|
import os
|
3
3
|
import random
|
4
4
|
import shutil
|
5
|
+
import socket
|
5
6
|
import sys
|
6
|
-
import time
|
7
7
|
import threading
|
8
|
-
import
|
8
|
+
import time
|
9
9
|
from queue import Queue
|
10
10
|
|
11
|
-
from biolib import utils
|
11
|
+
from biolib import api, utils
|
12
|
+
from biolib.biolib_binary_format import ModuleOutputV2, SystemException, SystemStatusUpdate
|
12
13
|
from biolib.biolib_binary_format.utils import LocalFileIndexableBuffer
|
14
|
+
from biolib.biolib_logging import logger, logger_no_user_data
|
13
15
|
from biolib.compute_node.cloud_utils import CloudUtils
|
14
16
|
from biolib.compute_node.job_worker import JobWorkerProcess
|
15
17
|
from biolib.compute_node.job_worker.job_storage import JobStorage
|
16
18
|
from biolib.compute_node.socker_listener_thread import SocketListenerThread
|
17
19
|
from biolib.compute_node.socket_sender_thread import SocketSenderThread
|
20
|
+
from biolib.compute_node.utils import SystemExceptionCodes, WorkerThreadException, get_package_type
|
18
21
|
from biolib.compute_node.webserver import webserver_utils
|
19
|
-
from biolib.biolib_binary_format import SystemStatusUpdate, SystemException, ModuleOutputV2
|
20
|
-
from biolib.compute_node.utils import get_package_type, WorkerThreadException, SystemExceptionCodes
|
21
|
-
from biolib.biolib_logging import logger, logger_no_user_data
|
22
22
|
|
23
23
|
SOCKET_HOST = '127.0.0.1'
|
24
24
|
|
@@ -37,7 +37,7 @@ class WorkerThread(threading.Thread):
|
|
37
37
|
self._sender_thread = None
|
38
38
|
self._start_and_connect_to_compute_process()
|
39
39
|
|
40
|
-
logger.debug(f
|
40
|
+
logger.debug(f'WorkerThread connected to port {self._socket_port}')
|
41
41
|
|
42
42
|
except Exception as exception:
|
43
43
|
logger_no_user_data.error(exception)
|
@@ -79,20 +79,16 @@ class WorkerThread(threading.Thread):
|
|
79
79
|
if progress == 94:
|
80
80
|
# Get Job exit code
|
81
81
|
try:
|
82
|
-
module_output_path = os.path.join(
|
83
|
-
|
84
|
-
|
85
|
-
buffer=LocalFileIndexableBuffer(
|
86
|
-
filename=module_output_path
|
87
|
-
)
|
82
|
+
module_output_path = os.path.join(
|
83
|
+
self._job_temporary_dir,
|
84
|
+
JobStorage.module_output_file_name,
|
88
85
|
)
|
86
|
+
module_output = ModuleOutputV2(buffer=LocalFileIndexableBuffer(filename=module_output_path))
|
89
87
|
self.compute_state['exit_code'] = module_output.get_exit_code()
|
90
88
|
logger_no_user_data.debug(f"Got exit code: {self.compute_state['exit_code']}")
|
91
89
|
|
92
90
|
except Exception as error: # pylint: disable=broad-except
|
93
|
-
logger_no_user_data.error(
|
94
|
-
f'Could not get exit_code from module output due to: {error}'
|
95
|
-
)
|
91
|
+
logger_no_user_data.error(f'Could not get exit_code from module output due to: {error}')
|
96
92
|
|
97
93
|
if utils.IS_RUNNING_IN_CLOUD:
|
98
94
|
JobStorage.upload_module_output(
|
@@ -107,7 +103,7 @@ class WorkerThread(threading.Thread):
|
|
107
103
|
elif package_type == 'SystemException':
|
108
104
|
error_code = SystemException(package).deserialize()
|
109
105
|
self.compute_state['status']['error_code'] = error_code
|
110
|
-
logger.debug(
|
106
|
+
logger.debug('Hit error. Terminating Worker Thread and Compute Process')
|
111
107
|
self.compute_state['progress'] = 95
|
112
108
|
self.terminate()
|
113
109
|
|
@@ -153,10 +149,10 @@ class WorkerThread(threading.Thread):
|
|
153
149
|
|
154
150
|
# Starting a thread for accepting connections before starting the process that should to connect to the socket
|
155
151
|
logger_no_user_data.debug('Starting connection thread')
|
156
|
-
self._connection_thread = threading.Thread(
|
157
|
-
|
158
|
-
messages_to_send_queue
|
159
|
-
|
152
|
+
self._connection_thread = threading.Thread(
|
153
|
+
target=self._accept_new_socket_connection,
|
154
|
+
args=[received_messages_queue, messages_to_send_queue],
|
155
|
+
)
|
160
156
|
self._connection_thread.start()
|
161
157
|
logger_no_user_data.debug('Started connection thread')
|
162
158
|
logger_no_user_data.debug('Starting compute process')
|
@@ -177,6 +173,16 @@ class WorkerThread(threading.Thread):
|
|
177
173
|
self._sender_thread.start()
|
178
174
|
|
179
175
|
def terminate(self) -> None:
|
176
|
+
cloud_job_uuid = self.compute_state['cloud_job_id']
|
177
|
+
exit_code = self.compute_state.get('exit_code')
|
178
|
+
system_exception_code = self.compute_state['status'].get('error_code')
|
179
|
+
if utils.IS_RUNNING_IN_CLOUD:
|
180
|
+
CloudUtils.finish_cloud_job(
|
181
|
+
cloud_job_id=cloud_job_uuid,
|
182
|
+
system_exception_code=system_exception_code,
|
183
|
+
exit_code=exit_code,
|
184
|
+
)
|
185
|
+
|
180
186
|
deregistered_due_to_error = False
|
181
187
|
if self._job_worker_process:
|
182
188
|
logger_no_user_data.debug(
|
@@ -184,7 +190,8 @@ class WorkerThread(threading.Thread):
|
|
184
190
|
)
|
185
191
|
self._job_worker_process.terminate()
|
186
192
|
|
187
|
-
|
193
|
+
clean_up_timeout_in_seconds = 600
|
194
|
+
for _ in range(clean_up_timeout_in_seconds):
|
188
195
|
if self._job_worker_process.exitcode is not None:
|
189
196
|
logger_no_user_data.debug(
|
190
197
|
f'Job "{self._job_uuid}" worker process exitcode {self._job_worker_process.exitcode}'
|
@@ -196,35 +203,25 @@ class WorkerThread(threading.Thread):
|
|
196
203
|
|
197
204
|
if self._job_worker_process.exitcode is None:
|
198
205
|
# TODO: Figure out if more error handling is necessary here
|
199
|
-
logger_no_user_data.error(
|
206
|
+
logger_no_user_data.error(
|
207
|
+
f'Job {self._job_uuid} worker process did not exit within {clean_up_timeout_in_seconds} seconds'
|
208
|
+
)
|
200
209
|
if utils.IS_RUNNING_IN_CLOUD:
|
201
210
|
logger_no_user_data.error('Deregistering compute node...')
|
202
211
|
CloudUtils.deregister(error='job_cleanup_timed_out')
|
203
212
|
deregistered_due_to_error = True
|
204
213
|
|
205
214
|
# Delete result as error occurred
|
206
|
-
system_exception_code = self.compute_state['status'].get('error_code')
|
207
215
|
if system_exception_code and os.path.exists(self._job_temporary_dir):
|
208
216
|
shutil.rmtree(self._job_temporary_dir)
|
209
217
|
|
210
|
-
exit_code = self.compute_state.get('exit_code')
|
211
|
-
|
212
|
-
if utils.IS_RUNNING_IN_CLOUD:
|
213
|
-
# Get and send compute node exception code and job exit code if present
|
214
|
-
logger_no_user_data.debug(f"Sending exit code {exit_code}")
|
215
|
-
CloudUtils.finish_cloud_job(
|
216
|
-
cloud_job_id=self.compute_state['cloud_job_id'],
|
217
|
-
system_exception_code=system_exception_code,
|
218
|
-
exit_code=exit_code
|
219
|
-
)
|
220
|
-
|
221
218
|
if self._socket:
|
222
219
|
self._socket.close()
|
223
220
|
|
224
221
|
if self._connection:
|
225
222
|
self._connection.close()
|
226
223
|
|
227
|
-
if self.compute_state['progress'] == 95:
|
224
|
+
if not utils.IS_RUNNING_IN_CLOUD and self.compute_state['progress'] == 95:
|
228
225
|
seconds_to_sleep = 60 # 1 minute
|
229
226
|
logger_no_user_data.debug(
|
230
227
|
f'Job "{self._job_uuid}" worker thread sleeping for {seconds_to_sleep} seconds before cleaning up'
|
@@ -245,12 +242,18 @@ class WorkerThread(threading.Thread):
|
|
245
242
|
f'Job "{self._job_uuid}" could not be found, maybe it has already been cleaned up'
|
246
243
|
)
|
247
244
|
|
248
|
-
logger_no_user_data.debug(f'Job "{self._job_uuid}" worker thread terminated')
|
249
|
-
|
250
245
|
if utils.IS_RUNNING_IN_CLOUD:
|
246
|
+
config = CloudUtils.get_webserver_config()
|
247
|
+
logger_no_user_data.debug(f'Job "{self._job_uuid}" reporting CloudJob "{cloud_job_uuid}" as cleaned up...')
|
248
|
+
api.client.post(
|
249
|
+
path=f'/internal/compute-nodes/cloud-jobs/{cloud_job_uuid}/cleaned-up/',
|
250
|
+
headers={'Compute-Node-Auth-Token': config['compute_node_info']['auth_token']},
|
251
|
+
)
|
252
|
+
|
251
253
|
if deregistered_due_to_error:
|
252
254
|
CloudUtils.shutdown() # shutdown now
|
253
255
|
else:
|
254
256
|
webserver_utils.update_auto_shutdown_time()
|
255
257
|
|
258
|
+
logger_no_user_data.debug(f'Job "{self._job_uuid}" worker thread exiting...')
|
256
259
|
sys.exit()
|
@@ -95,7 +95,7 @@ biolib/compute_node/webserver/gunicorn_flask_application.py,sha256=jPfR_YvNBekLU
|
|
95
95
|
biolib/compute_node/webserver/webserver.py,sha256=15PkRyhtdtSgFDxa0z78aPO4ciZURsFqJYi-HtUmZF8,6494
|
96
96
|
biolib/compute_node/webserver/webserver_types.py,sha256=2t8EaFKESnves3BA_NBdnS2yAdo1qwamCFHiSt888nE,380
|
97
97
|
biolib/compute_node/webserver/webserver_utils.py,sha256=XWvwYPbWNR3qS0FYbLLp-MDDfVk0QdaAmg3xPrT0H2s,4234
|
98
|
-
biolib/compute_node/webserver/worker_thread.py,sha256=
|
98
|
+
biolib/compute_node/webserver/worker_thread.py,sha256=AfzSK6ZXXHB1QufemIEmCbVjvLPcOieNJK4xJRRM7B0,11871
|
99
99
|
biolib/experiments/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
100
100
|
biolib/experiments/experiment.py,sha256=jIRixmQm3Gq9YdJ3I0-rE1vFukXqq6U4zXehFOJ1yZk,7614
|
101
101
|
biolib/jobs/__init__.py,sha256=aIb2H2DHjQbM2Bs-dysFijhwFcL58Blp0Co0gimED3w,32
|
@@ -116,8 +116,8 @@ biolib/utils/cache_state.py,sha256=u256F37QSRIVwqKlbnCyzAX4EMI-kl6Dwu6qwj-Qmag,3
|
|
116
116
|
biolib/utils/multipart_uploader.py,sha256=XvGP1I8tQuKhAH-QugPRoEsCi9qvbRk-DVBs5PNwwJo,8452
|
117
117
|
biolib/utils/seq_util.py,sha256=ZQFcaE37B2dtucN2zDjOmdya_X0ITc1zBFZJNQY13XA,5183
|
118
118
|
biolib/utils/zip/remote_zip.py,sha256=0wErYlxir5921agfFeV1xVjf29l9VNgGQvNlWOlj2Yc,23232
|
119
|
-
pybiolib-1.1.
|
120
|
-
pybiolib-1.1.
|
121
|
-
pybiolib-1.1.
|
122
|
-
pybiolib-1.1.
|
123
|
-
pybiolib-1.1.
|
119
|
+
pybiolib-1.1.2218.dist-info/LICENSE,sha256=F2h7gf8i0agDIeWoBPXDMYScvQOz02pAWkKhTGOHaaw,1067
|
120
|
+
pybiolib-1.1.2218.dist-info/METADATA,sha256=syMRSPiPLeFeF4Z5KUmOiQiWBjkohMPb1uARFQZLOK0,1508
|
121
|
+
pybiolib-1.1.2218.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
122
|
+
pybiolib-1.1.2218.dist-info/entry_points.txt,sha256=p6DyaP_2kctxegTX23WBznnrDi4mz6gx04O5uKtRDXg,42
|
123
|
+
pybiolib-1.1.2218.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|