learning-loop-node 0.15.0__py3-none-any.whl → 0.16.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of learning-loop-node might be problematic. Click here for more details.
- learning_loop_node/annotation/annotator_node.py +7 -1
- learning_loop_node/data_classes/__init__.py +32 -6
- learning_loop_node/data_classes/general.py +10 -11
- learning_loop_node/detector/detector_node.py +145 -134
- learning_loop_node/detector/outbox.py +2 -2
- learning_loop_node/loop_communication.py +9 -8
- learning_loop_node/node.py +49 -36
- learning_loop_node/rest.py +3 -2
- learning_loop_node/tests/annotator/test_annotator_node.py +4 -1
- learning_loop_node/tests/detector/conftest.py +9 -0
- learning_loop_node/tests/detector/test_outbox.py +27 -15
- learning_loop_node/tests/trainer/states/test_state_sync_confusion_matrix.py +4 -1
- learning_loop_node/trainer/trainer_logic_generic.py +3 -0
- learning_loop_node/trainer/trainer_node.py +4 -3
- {learning_loop_node-0.15.0.dist-info → learning_loop_node-0.16.1.dist-info}/METADATA +1 -1
- {learning_loop_node-0.15.0.dist-info → learning_loop_node-0.16.1.dist-info}/RECORD +17 -17
- {learning_loop_node-0.15.0.dist-info → learning_loop_node-0.16.1.dist-info}/WHEEL +0 -0
|
@@ -18,7 +18,7 @@ from .annotator_logic import AnnotatorLogic
|
|
|
18
18
|
class AnnotatorNode(Node):
|
|
19
19
|
|
|
20
20
|
def __init__(self, name: str, annotator_logic: AnnotatorLogic, uuid: Optional[str] = None):
|
|
21
|
-
super().__init__(name, uuid, 'annotation_node')
|
|
21
|
+
super().__init__(name, uuid=uuid, node_type='annotation_node')
|
|
22
22
|
self.tool = annotator_logic
|
|
23
23
|
self.histories: Dict = {}
|
|
24
24
|
annotator_logic.init(self)
|
|
@@ -35,6 +35,9 @@ class AnnotatorNode(Node):
|
|
|
35
35
|
return self.tool.logout_user(sid)
|
|
36
36
|
|
|
37
37
|
async def _handle_user_input(self, user_input_dict: Dict) -> str:
|
|
38
|
+
if not self.sio_client or not self.sio_client.connected:
|
|
39
|
+
raise ConnectionError('SocketIO client is not connected')
|
|
40
|
+
|
|
38
41
|
user_input = from_dict(data_class=UserInput, data=user_input_dict)
|
|
39
42
|
|
|
40
43
|
if user_input.data.key_up == 'Escape':
|
|
@@ -66,6 +69,9 @@ class AnnotatorNode(Node):
|
|
|
66
69
|
|
|
67
70
|
async def send_status(self):
|
|
68
71
|
|
|
72
|
+
if not self.sio_client or not self.sio_client.connected:
|
|
73
|
+
raise ConnectionError('SocketIO client is not connected')
|
|
74
|
+
|
|
69
75
|
status = AnnotationNodeStatus(
|
|
70
76
|
id=self.uuid,
|
|
71
77
|
name=self.name,
|
|
@@ -1,17 +1,43 @@
|
|
|
1
1
|
from .annotations import AnnotationData, SegmentationAnnotation, ToolOutput, UserInput
|
|
2
|
-
from .detections import (
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
2
|
+
from .detections import (
|
|
3
|
+
BoxDetection,
|
|
4
|
+
ClassificationDetection,
|
|
5
|
+
Detections,
|
|
6
|
+
Observation,
|
|
7
|
+
Point,
|
|
8
|
+
PointDetection,
|
|
9
|
+
SegmentationDetection,
|
|
10
|
+
Shape,
|
|
11
|
+
)
|
|
12
|
+
from .general import (
|
|
13
|
+
AboutResponse,
|
|
14
|
+
AnnotationNodeStatus,
|
|
15
|
+
Category,
|
|
16
|
+
Context,
|
|
17
|
+
DetectorStatus,
|
|
18
|
+
ErrorConfiguration,
|
|
19
|
+
ModelInformation,
|
|
20
|
+
ModelVersionResponse,
|
|
21
|
+
NodeState,
|
|
22
|
+
NodeStatus,
|
|
23
|
+
)
|
|
6
24
|
from .image_metadata import ImageMetadata, ImagesMetadata
|
|
7
25
|
from .socket_response import SocketResponse
|
|
8
|
-
from .training import
|
|
26
|
+
from .training import (
|
|
27
|
+
Errors,
|
|
28
|
+
PretrainedModel,
|
|
29
|
+
Training,
|
|
30
|
+
TrainingError,
|
|
31
|
+
TrainingOut,
|
|
32
|
+
TrainingStateData,
|
|
33
|
+
TrainingStatus,
|
|
34
|
+
)
|
|
9
35
|
|
|
10
36
|
__all__ = [
|
|
11
37
|
'AboutResponse', 'AnnotationData', 'SegmentationAnnotation', 'ToolOutput', 'UserInput',
|
|
12
38
|
'BoxDetection', 'ClassificationDetection', 'ImageMetadata', 'Observation', 'Point', 'PointDetection',
|
|
13
39
|
'SegmentationDetection', 'Shape', 'Detections',
|
|
14
|
-
'AnnotationNodeStatus', 'Category', 'Context', '
|
|
40
|
+
'AnnotationNodeStatus', 'Category', 'Context', 'DetectorStatus', 'ErrorConfiguration',
|
|
15
41
|
'ModelInformation', 'NodeState', 'NodeStatus', 'ModelVersionResponse', 'ImagesMetadata',
|
|
16
42
|
'SocketResponse',
|
|
17
43
|
'Errors', 'PretrainedModel', 'Training',
|
|
@@ -148,8 +148,8 @@ class NodeState(str, Enum):
|
|
|
148
148
|
class NodeStatus():
|
|
149
149
|
id: str
|
|
150
150
|
name: str
|
|
151
|
-
state:
|
|
152
|
-
uptime:
|
|
151
|
+
state: NodeState = NodeState.Online
|
|
152
|
+
uptime: int = 0
|
|
153
153
|
errors: Dict = field(default_factory=dict)
|
|
154
154
|
capabilities: List[str] = field(default_factory=list)
|
|
155
155
|
|
|
@@ -175,14 +175,13 @@ class AnnotationNodeStatus(NodeStatus):
|
|
|
175
175
|
|
|
176
176
|
|
|
177
177
|
@dataclass(**KWONLY_SLOTS)
|
|
178
|
-
class
|
|
179
|
-
|
|
178
|
+
class DetectorStatus():
|
|
179
|
+
uuid: str
|
|
180
180
|
name: str
|
|
181
|
+
state: NodeState
|
|
182
|
+
uptime: int
|
|
181
183
|
model_format: str
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
errors:
|
|
185
|
-
|
|
186
|
-
current_model: Optional[str] = None
|
|
187
|
-
target_model: Optional[str] = None
|
|
188
|
-
operation_mode: Optional[str] = None
|
|
184
|
+
current_model: Optional[str]
|
|
185
|
+
target_model: Optional[str]
|
|
186
|
+
errors: Dict
|
|
187
|
+
operation_mode: str
|
|
@@ -13,9 +13,17 @@ from dacite import from_dict
|
|
|
13
13
|
from fastapi.encoders import jsonable_encoder
|
|
14
14
|
from socketio import AsyncClient
|
|
15
15
|
|
|
16
|
-
from ..data_classes import (
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
from ..data_classes import (
|
|
17
|
+
AboutResponse,
|
|
18
|
+
Category,
|
|
19
|
+
Context,
|
|
20
|
+
DetectorStatus,
|
|
21
|
+
ImageMetadata,
|
|
22
|
+
ImagesMetadata,
|
|
23
|
+
ModelInformation,
|
|
24
|
+
ModelVersionResponse,
|
|
25
|
+
Shape,
|
|
26
|
+
)
|
|
19
27
|
from ..data_exchanger import DataExchanger, DownloadError
|
|
20
28
|
from ..enums import OperationMode, VersionMode
|
|
21
29
|
from ..globals import GLOBALS
|
|
@@ -37,7 +45,7 @@ from .rest import upload as rest_upload
|
|
|
37
45
|
class DetectorNode(Node):
|
|
38
46
|
|
|
39
47
|
def __init__(self, name: str, detector: DetectorLogic, uuid: Optional[str] = None, use_backdoor_controls: bool = False) -> None:
|
|
40
|
-
super().__init__(name, uuid, 'detector', False)
|
|
48
|
+
super().__init__(name, uuid=uuid, node_type='detector', needs_login=False, needs_sio=False)
|
|
41
49
|
self.detector_logic = detector
|
|
42
50
|
self.organization = environment_reader.organization()
|
|
43
51
|
self.project = environment_reader.project()
|
|
@@ -64,6 +72,10 @@ class DetectorNode(Node):
|
|
|
64
72
|
self.target_model: Optional[ModelInformation] = None
|
|
65
73
|
self.loop_deployment_target: Optional[ModelInformation] = None
|
|
66
74
|
|
|
75
|
+
self._regular_status_sync_cycles: int = int(os.environ.get('SYNC_CYCLES', '6'))
|
|
76
|
+
"""sync status every 6 cycles (6*10s = 1min)"""
|
|
77
|
+
self._repeat_cycles_to_next_sync: int = 0
|
|
78
|
+
|
|
67
79
|
self.include_router(rest_detect.router, tags=["detect"])
|
|
68
80
|
self.include_router(rest_upload.router, prefix="")
|
|
69
81
|
self.include_router(rest_mode.router, tags=["operation_mode"])
|
|
@@ -74,7 +86,7 @@ class DetectorNode(Node):
|
|
|
74
86
|
if use_backdoor_controls or os.environ.get('USE_BACKDOOR_CONTROLS', '0').lower() in ('1', 'true'):
|
|
75
87
|
self.include_router(backdoor_controls.router)
|
|
76
88
|
|
|
77
|
-
self.
|
|
89
|
+
self._setup_sio_server()
|
|
78
90
|
|
|
79
91
|
def get_about_response(self) -> AboutResponse:
|
|
80
92
|
return AboutResponse(
|
|
@@ -190,13 +202,7 @@ class DetectorNode(Node):
|
|
|
190
202
|
except Exception:
|
|
191
203
|
self.log.exception("error during 'shutdown'")
|
|
192
204
|
|
|
193
|
-
|
|
194
|
-
try:
|
|
195
|
-
await self._check_for_update()
|
|
196
|
-
except Exception:
|
|
197
|
-
self.log.exception("error during '_check_for_update'")
|
|
198
|
-
|
|
199
|
-
def setup_sio_server(self) -> None:
|
|
205
|
+
def _setup_sio_server(self) -> None:
|
|
200
206
|
"""The DetectorNode acts as a SocketIO server. This method sets up the server and defines the event handlers."""
|
|
201
207
|
# pylint: disable=unused-argument
|
|
202
208
|
|
|
@@ -322,96 +328,22 @@ class DetectorNode(Node):
|
|
|
322
328
|
def connect(sid, environ, auth) -> None:
|
|
323
329
|
self.connected_clients.append(sid)
|
|
324
330
|
|
|
325
|
-
|
|
326
|
-
try:
|
|
327
|
-
self.log.debug('Current operation mode is %s', self.operation_mode)
|
|
328
|
-
try:
|
|
329
|
-
await self.sync_status_with_learning_loop()
|
|
330
|
-
except Exception:
|
|
331
|
-
self.log.exception('Sync with learning loop failed (could not check for updates):')
|
|
332
|
-
return
|
|
333
|
-
|
|
334
|
-
if self.operation_mode != OperationMode.Idle:
|
|
335
|
-
self.log.debug('not checking for updates; operation mode is %s', self.operation_mode)
|
|
336
|
-
return
|
|
337
|
-
|
|
338
|
-
self.status.reset_error('update_model')
|
|
339
|
-
if self.target_model is None:
|
|
340
|
-
self.log.debug('not checking for updates; no target model selected')
|
|
341
|
-
return
|
|
342
|
-
|
|
343
|
-
if self.detector_logic.model_info is not None:
|
|
344
|
-
current_version = self.detector_logic.model_info.version
|
|
345
|
-
else:
|
|
346
|
-
current_version = None
|
|
331
|
+
# ================================== Repeat Cycle, sync and model updates ==================================
|
|
347
332
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
try:
|
|
360
|
-
await self.data_exchanger.download_model(target_model_folder,
|
|
361
|
-
Context(organization=self.organization,
|
|
362
|
-
project=self.project),
|
|
363
|
-
self.target_model.id,
|
|
364
|
-
self.detector_logic.model_format)
|
|
365
|
-
self.log.info('Downloaded model %s', self.target_model.version)
|
|
366
|
-
except Exception:
|
|
367
|
-
self.log.exception('Could not download model %s', self.target_model.version)
|
|
368
|
-
shutil.rmtree(target_model_folder, ignore_errors=True)
|
|
369
|
-
return
|
|
370
|
-
try:
|
|
371
|
-
os.unlink(model_symlink)
|
|
372
|
-
os.remove(model_symlink)
|
|
373
|
-
except Exception:
|
|
374
|
-
pass
|
|
375
|
-
os.symlink(target_model_folder, model_symlink)
|
|
376
|
-
self.log.info('Updated symlink for model to %s', os.readlink(model_symlink))
|
|
377
|
-
|
|
378
|
-
try:
|
|
379
|
-
self.detector_logic.load_model_info_and_init_model()
|
|
380
|
-
except NodeNeedsRestartError:
|
|
381
|
-
self.log.error('Node needs restart')
|
|
382
|
-
sys.exit(0)
|
|
383
|
-
except Exception:
|
|
384
|
-
self.log.exception('Could not load model, will retry download on next check')
|
|
385
|
-
shutil.rmtree(target_model_folder, ignore_errors=True)
|
|
386
|
-
return
|
|
387
|
-
try:
|
|
388
|
-
await self.sync_status_with_learning_loop()
|
|
389
|
-
except Exception:
|
|
390
|
-
pass
|
|
391
|
-
# self.reload(reason='new model installed')
|
|
392
|
-
|
|
393
|
-
except Exception as e:
|
|
394
|
-
self.log.exception('check_for_update failed')
|
|
395
|
-
msg = e.cause if isinstance(e, DownloadError) else str(e)
|
|
396
|
-
self.status.set_error('update_model', f'Could not update model: {msg}')
|
|
397
|
-
try:
|
|
398
|
-
await self.sync_status_with_learning_loop()
|
|
399
|
-
except Exception:
|
|
400
|
-
pass
|
|
401
|
-
|
|
402
|
-
async def sync_status_with_learning_loop(self) -> None:
|
|
403
|
-
"""Sync status of the detector with the Learning Loop.
|
|
404
|
-
The Learning Loop will respond with the model info of the deployment target.
|
|
405
|
-
If version_control is set to FollowLoop, the detector will update the target_model.
|
|
406
|
-
Return if the communication was successful.
|
|
407
|
-
|
|
408
|
-
Raises:
|
|
409
|
-
Exception: If the communication with the Learning Loop failed.
|
|
410
|
-
"""
|
|
333
|
+
async def on_repeat(self) -> None:
|
|
334
|
+
"""Implementation of the repeat cycle. This method is called every 10 seconds.
|
|
335
|
+
To avoid too many requests, the status is only synced every 6 cycles (1 minute)."""
|
|
336
|
+
try:
|
|
337
|
+
self._repeat_cycles_to_next_sync -= 1
|
|
338
|
+
if self._repeat_cycles_to_next_sync <= 0:
|
|
339
|
+
self._repeat_cycles_to_next_sync = self._regular_status_sync_cycles
|
|
340
|
+
await self._sync_status_with_loop()
|
|
341
|
+
await self._update_model_if_required()
|
|
342
|
+
except Exception:
|
|
343
|
+
self.log.exception("error during '_check_for_update'")
|
|
411
344
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
raise Exception('Status sync failed: not connected')
|
|
345
|
+
async def _sync_status_with_loop(self) -> None:
|
|
346
|
+
"""Sync status of the detector with the Learning Loop."""
|
|
415
347
|
|
|
416
348
|
if self.detector_logic.model_info is not None:
|
|
417
349
|
current_model = self.detector_logic.model_info.version
|
|
@@ -420,8 +352,8 @@ class DetectorNode(Node):
|
|
|
420
352
|
|
|
421
353
|
target_model_version = self.target_model.version if self.target_model else None
|
|
422
354
|
|
|
423
|
-
status =
|
|
424
|
-
|
|
355
|
+
status = DetectorStatus(
|
|
356
|
+
uuid=self.uuid,
|
|
425
357
|
name=self.name,
|
|
426
358
|
state=self.status.state,
|
|
427
359
|
errors=self.status.errors,
|
|
@@ -432,49 +364,128 @@ class DetectorNode(Node):
|
|
|
432
364
|
model_format=self.detector_logic.model_format,
|
|
433
365
|
)
|
|
434
366
|
|
|
435
|
-
self.log_status_on_change(status.state
|
|
367
|
+
self.log_status_on_change(status.state, status)
|
|
368
|
+
|
|
369
|
+
try:
|
|
370
|
+
response = await self.loop_communicator.post(
|
|
371
|
+
f'/{self.organization}/projects/{self.project}/detectors', json=jsonable_encoder(asdict(status)))
|
|
372
|
+
except Exception:
|
|
373
|
+
self.log.warning('Exception while trying to sync status with loop')
|
|
374
|
+
|
|
375
|
+
if response.status_code != 200:
|
|
376
|
+
self.log.warning('Status update failed: %s', str(response))
|
|
436
377
|
|
|
437
|
-
|
|
378
|
+
async def _update_model_if_required(self) -> None:
|
|
379
|
+
"""Check if a new model is available and update if necessary.
|
|
380
|
+
The Learning Loop will respond with the model info of the deployment target.
|
|
381
|
+
If version_control is set to FollowLoop or the chosen target model is not used,
|
|
382
|
+
the detector will update the target_model."""
|
|
438
383
|
try:
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
self.
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
self.
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
384
|
+
if self.operation_mode != OperationMode.Idle:
|
|
385
|
+
self.log.debug('not checking for updates; operation mode is %s', self.operation_mode)
|
|
386
|
+
return
|
|
387
|
+
|
|
388
|
+
await self._check_for_new_deployment_target()
|
|
389
|
+
|
|
390
|
+
self.status.reset_error('update_model')
|
|
391
|
+
if self.target_model is None:
|
|
392
|
+
self.log.debug('not running any updates; target model is None')
|
|
393
|
+
return
|
|
394
|
+
|
|
395
|
+
current_version = self.detector_logic.model_info.version \
|
|
396
|
+
if self.detector_logic.model_info is not None else None
|
|
397
|
+
|
|
398
|
+
if current_version != self.target_model.version:
|
|
399
|
+
self.log.info('Updating model from %s to %s',
|
|
400
|
+
current_version or "-", self.target_model.version)
|
|
401
|
+
await self._update_model(self.target_model)
|
|
402
|
+
|
|
403
|
+
except Exception as e:
|
|
404
|
+
self.log.exception('check_for_update failed')
|
|
405
|
+
msg = e.cause if isinstance(e, DownloadError) else str(e)
|
|
406
|
+
self.status.set_error('update_model', f'Could not update model: {msg}')
|
|
407
|
+
await self._sync_status_with_loop()
|
|
408
|
+
|
|
409
|
+
async def _check_for_new_deployment_target(self) -> None:
|
|
410
|
+
"""Ask the learning loop for the current deployment target and update self.loop_deployment_target.
|
|
411
|
+
If version_control is set to FollowLoop, also update target_model."""
|
|
412
|
+
try:
|
|
413
|
+
response = await self.loop_communicator.get(
|
|
414
|
+
f'/{self.organization}/projects/{self.project}/deployment/target')
|
|
415
|
+
except Exception:
|
|
416
|
+
self.log.warning('Exception while trying to check for new deployment target')
|
|
417
|
+
return
|
|
418
|
+
|
|
419
|
+
if response.status_code != 200:
|
|
420
|
+
self.log.warning('Failed to check for new deployment target: %s', str(response))
|
|
421
|
+
return
|
|
422
|
+
|
|
423
|
+
response_data = response.json()
|
|
424
|
+
|
|
425
|
+
deployment_target_uuid = response_data['model_uuid']
|
|
426
|
+
deployment_target_version = response_data['version']
|
|
460
427
|
self.loop_deployment_target = ModelInformation(organization=self.organization, project=self.project,
|
|
461
428
|
host="", categories=[],
|
|
462
|
-
id=
|
|
463
|
-
version=
|
|
429
|
+
id=deployment_target_uuid,
|
|
430
|
+
version=deployment_target_version)
|
|
464
431
|
|
|
465
432
|
if (self.version_control == VersionMode.FollowLoop and
|
|
466
433
|
self.target_model != self.loop_deployment_target):
|
|
467
|
-
|
|
434
|
+
previous_version = self.target_model.version if self.target_model else None
|
|
468
435
|
self.target_model = self.loop_deployment_target
|
|
469
|
-
self.log.info('
|
|
470
|
-
|
|
436
|
+
self.log.info('Deployment target changed from %s to %s',
|
|
437
|
+
previous_version, self.target_model.version)
|
|
438
|
+
|
|
439
|
+
async def _update_model(self, target_model: ModelInformation) -> None:
|
|
440
|
+
"""Download and install the target model.
|
|
441
|
+
On failure, the target_model will be set to None which will trigger a retry on the next check."""
|
|
442
|
+
|
|
443
|
+
with step_into(GLOBALS.data_folder):
|
|
444
|
+
target_model_folder = f'models/{target_model.version}'
|
|
445
|
+
if os.path.exists(target_model_folder) and len(os.listdir(target_model_folder)) > 0:
|
|
446
|
+
self.log.info('No need to download model. %s (already exists)', target_model.version)
|
|
447
|
+
else:
|
|
448
|
+
os.makedirs(target_model_folder, exist_ok=True)
|
|
449
|
+
try:
|
|
450
|
+
await self.data_exchanger.download_model(target_model_folder,
|
|
451
|
+
Context(organization=self.organization,
|
|
452
|
+
project=self.project),
|
|
453
|
+
target_model.id, self.detector_logic.model_format)
|
|
454
|
+
self.log.info('Downloaded model %s', target_model.version)
|
|
455
|
+
except Exception:
|
|
456
|
+
self.log.exception('Could not download model %s', target_model.version)
|
|
457
|
+
shutil.rmtree(target_model_folder, ignore_errors=True)
|
|
458
|
+
self.target_model = None
|
|
459
|
+
return
|
|
460
|
+
|
|
461
|
+
model_symlink = 'model'
|
|
462
|
+
try:
|
|
463
|
+
os.unlink(model_symlink)
|
|
464
|
+
os.remove(model_symlink)
|
|
465
|
+
except Exception:
|
|
466
|
+
pass
|
|
467
|
+
os.symlink(target_model_folder, model_symlink)
|
|
468
|
+
self.log.info('Updated symlink for model to %s', os.readlink(model_symlink))
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
self.detector_logic.load_model_info_and_init_model()
|
|
472
|
+
except NodeNeedsRestartError:
|
|
473
|
+
self.log.error('Node needs restart')
|
|
474
|
+
sys.exit(0)
|
|
475
|
+
except Exception:
|
|
476
|
+
self.log.exception('Could not load model, will retry download on next check')
|
|
477
|
+
shutil.rmtree(target_model_folder, ignore_errors=True)
|
|
478
|
+
self.target_model = None
|
|
479
|
+
return
|
|
480
|
+
|
|
481
|
+
await self._sync_status_with_loop()
|
|
482
|
+
# self.reload(reason='new model installed')
|
|
483
|
+
|
|
484
|
+
# ================================== API Implementations ==================================
|
|
471
485
|
|
|
472
486
|
async def set_operation_mode(self, mode: OperationMode):
|
|
473
487
|
self.operation_mode = mode
|
|
474
|
-
|
|
475
|
-
await self.sync_status_with_learning_loop()
|
|
476
|
-
except Exception as e:
|
|
477
|
-
self.log.warning('Operation mode set to %s, but sync failed: %s', mode, e)
|
|
488
|
+
await self._sync_status_with_loop()
|
|
478
489
|
|
|
479
490
|
def reload(self, reason: str):
|
|
480
491
|
"""provide a cause for the reload"""
|
|
@@ -189,7 +189,7 @@ class Outbox():
|
|
|
189
189
|
|
|
190
190
|
async def _continuous_upload(self) -> None:
|
|
191
191
|
self.log.info('continuous upload started')
|
|
192
|
-
assert self.shutdown_event is not None
|
|
192
|
+
assert self.shutdown_event is not None, 'shutdown_event is None'
|
|
193
193
|
while not self.shutdown_event.is_set():
|
|
194
194
|
await self.upload()
|
|
195
195
|
await asyncio.sleep(self.UPLOAD_INTERVAL_S)
|
|
@@ -287,7 +287,7 @@ class Outbox():
|
|
|
287
287
|
return True
|
|
288
288
|
|
|
289
289
|
try:
|
|
290
|
-
assert self.shutdown_event is not None
|
|
290
|
+
assert self.shutdown_event is not None, 'shutdown_event is None'
|
|
291
291
|
self.shutdown_event.set()
|
|
292
292
|
await asyncio.wait_for(self.upload_task, timeout=self.UPLOAD_TIMEOUT_S + 1)
|
|
293
293
|
except asyncio.TimeoutError:
|
|
@@ -8,7 +8,8 @@ from httpx import Cookies, Timeout
|
|
|
8
8
|
|
|
9
9
|
from .helpers import environment_reader
|
|
10
10
|
|
|
11
|
-
logging.
|
|
11
|
+
logger = logging.getLogger('loop_communication')
|
|
12
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
12
13
|
|
|
13
14
|
SLEEP_TIME_ON_429 = 5
|
|
14
15
|
MAX_RETRIES_ON_429 = 20
|
|
@@ -37,9 +38,9 @@ class LoopCommunicator():
|
|
|
37
38
|
host: str = environment_reader.host(default='learning-loop.ai')
|
|
38
39
|
self.ssl_cert_path = environment_reader.ssl_certificate_path()
|
|
39
40
|
if self.ssl_cert_path:
|
|
40
|
-
|
|
41
|
+
logger.info('Using SSL certificate at %s', self.ssl_cert_path)
|
|
41
42
|
else:
|
|
42
|
-
|
|
43
|
+
logger.info('No SSL certificate path set')
|
|
43
44
|
self.host: str = host
|
|
44
45
|
self.username: str = environment_reader.username()
|
|
45
46
|
self.password: str = environment_reader.password()
|
|
@@ -52,7 +53,7 @@ class LoopCommunicator():
|
|
|
52
53
|
else:
|
|
53
54
|
self.async_client = httpx.AsyncClient(base_url=self.base_url, timeout=Timeout(60.0))
|
|
54
55
|
|
|
55
|
-
|
|
56
|
+
logger.info('Loop interface initialized with base_url: %s / user: %s', self.base_url, self.username)
|
|
56
57
|
|
|
57
58
|
def websocket_url(self) -> str:
|
|
58
59
|
return f'ws{"s" if "learning-loop.ai" in self.host else ""}://' + self.host
|
|
@@ -65,7 +66,7 @@ class LoopCommunicator():
|
|
|
65
66
|
self.async_client.cookies.clear()
|
|
66
67
|
response = await self.async_client.post('/api/login', data={'username': self.username, 'password': self.password})
|
|
67
68
|
if response.status_code != 200:
|
|
68
|
-
|
|
69
|
+
logger.info('Login failed with response: %s', response)
|
|
69
70
|
raise LoopCommunicationException('Login failed with response: ' + str(response))
|
|
70
71
|
self.async_client.cookies.update(response.cookies)
|
|
71
72
|
|
|
@@ -74,7 +75,7 @@ class LoopCommunicator():
|
|
|
74
75
|
|
|
75
76
|
response = await self.async_client.post('/api/logout')
|
|
76
77
|
if response.status_code != 200:
|
|
77
|
-
|
|
78
|
+
logger.info('Logout failed with response: %s', response)
|
|
78
79
|
raise LoopCommunicationException('Logout failed with response: ' + str(response))
|
|
79
80
|
self.async_client.cookies.clear()
|
|
80
81
|
|
|
@@ -90,12 +91,12 @@ class LoopCommunicator():
|
|
|
90
91
|
start_time = time.time()
|
|
91
92
|
while True:
|
|
92
93
|
try:
|
|
93
|
-
|
|
94
|
+
logger.info('Checking if backend is ready')
|
|
94
95
|
response = await self.get('/status', requires_login=False)
|
|
95
96
|
if response.status_code == 200:
|
|
96
97
|
return True
|
|
97
98
|
except Exception:
|
|
98
|
-
|
|
99
|
+
logger.info('backend not ready yet.')
|
|
99
100
|
if timeout is not None and time.time() + 10 - start_time > timeout:
|
|
100
101
|
raise TimeoutError('Backend not ready within timeout')
|
|
101
102
|
await asyncio.sleep(10)
|
learning_loop_node/node.py
CHANGED
|
@@ -4,9 +4,9 @@ from .helpers import log_conf # pylint: disable=unused-import
|
|
|
4
4
|
|
|
5
5
|
# isort: split
|
|
6
6
|
# pylint: disable=wrong-import-order,ungrouped-imports
|
|
7
|
-
|
|
8
7
|
import asyncio
|
|
9
8
|
import logging
|
|
9
|
+
import os
|
|
10
10
|
import ssl
|
|
11
11
|
import sys
|
|
12
12
|
from abc import abstractmethod
|
|
@@ -32,7 +32,12 @@ class NodeConnectionError(Exception):
|
|
|
32
32
|
|
|
33
33
|
class Node(FastAPI):
|
|
34
34
|
|
|
35
|
-
def __init__(self,
|
|
35
|
+
def __init__(self,
|
|
36
|
+
name: str, *,
|
|
37
|
+
uuid: Optional[str] = None,
|
|
38
|
+
node_type: str = 'node',
|
|
39
|
+
needs_login: bool = True,
|
|
40
|
+
needs_sio: bool = True) -> None:
|
|
36
41
|
"""Base class for all nodes. A node is a process that communicates with the zauberzeug learning loop.
|
|
37
42
|
This class provides the basic functionality to connect to the learning loop via socket.io and to exchange data.
|
|
38
43
|
|
|
@@ -42,6 +47,7 @@ class Node(FastAPI):
|
|
|
42
47
|
and stored in f'{GLOBALS.data_folder}/uuids.json'.
|
|
43
48
|
From the second run, the uuid is recovered based on the name of the node.
|
|
44
49
|
needs_login (bool): If True, the node will try to login to the learning loop.
|
|
50
|
+
needs_sio (bool): If True, the node will try to establish and keep a socket.io connection to the loop.
|
|
45
51
|
"""
|
|
46
52
|
|
|
47
53
|
super().__init__(lifespan=self.lifespan)
|
|
@@ -49,13 +55,16 @@ class Node(FastAPI):
|
|
|
49
55
|
self.name = name
|
|
50
56
|
self.uuid = uuid or read_or_create_uuid(self.name)
|
|
51
57
|
self.needs_login = needs_login
|
|
58
|
+
self._needs_sio = needs_sio
|
|
59
|
+
if needs_sio and not needs_login:
|
|
60
|
+
raise ValueError('A node that needs sio must also need login')
|
|
52
61
|
|
|
53
62
|
self.log = logging.getLogger('Node')
|
|
54
63
|
self.init_loop_communicator()
|
|
55
64
|
self.data_exchanger = DataExchanger(None, self.loop_communicator)
|
|
56
65
|
|
|
57
66
|
self.startup_datetime = datetime.now()
|
|
58
|
-
self.
|
|
67
|
+
self.sio_client: Optional[AsyncClient] = None
|
|
59
68
|
self.status = NodeStatus(id=self.uuid, name=self.name)
|
|
60
69
|
|
|
61
70
|
self.sio_headers = {'organization': self.loop_communicator.organization,
|
|
@@ -64,7 +73,7 @@ class Node(FastAPI):
|
|
|
64
73
|
|
|
65
74
|
self.repeat_task: Any = None
|
|
66
75
|
self.socket_connection_broken = False
|
|
67
|
-
self._skip_repeat_loop =
|
|
76
|
+
self._skip_repeat_loop = os.environ.get('SKIP_REPEAT_ON_START', '0') in ('True', 'true', '1')
|
|
68
77
|
|
|
69
78
|
self.include_router(router)
|
|
70
79
|
|
|
@@ -78,23 +87,18 @@ class Node(FastAPI):
|
|
|
78
87
|
|
|
79
88
|
self._client_session: Optional[aiohttp.ClientSession] = None
|
|
80
89
|
|
|
81
|
-
def log_status_on_change(self, current_state_str: str, full_status: Any):
|
|
90
|
+
def log_status_on_change(self, current_state_str: str, full_status: Any) -> None:
|
|
82
91
|
if self.previous_state != current_state_str:
|
|
83
92
|
self.previous_state = current_state_str
|
|
84
93
|
self.log.info('Status changed to %s', full_status)
|
|
85
94
|
else:
|
|
86
95
|
self.log.debug('sending status %s', full_status)
|
|
87
96
|
|
|
88
|
-
def init_loop_communicator(self):
|
|
97
|
+
def init_loop_communicator(self) -> None:
|
|
98
|
+
"""Initialize the loop communicator and set the websocket url."""
|
|
89
99
|
self.loop_communicator = LoopCommunicator()
|
|
90
100
|
self.websocket_url = self.loop_communicator.websocket_url()
|
|
91
101
|
|
|
92
|
-
@property
|
|
93
|
-
def sio_client(self) -> AsyncClient:
|
|
94
|
-
if self._sio_client is None:
|
|
95
|
-
raise Exception('sio_client not yet initialized')
|
|
96
|
-
return self._sio_client
|
|
97
|
-
|
|
98
102
|
# --------------------------------------------------- APPLICATION LIFECYCLE ---------------------------------------------------
|
|
99
103
|
@asynccontextmanager
|
|
100
104
|
async def lifespan(self, app: FastAPI): # pylint: disable=unused-argument
|
|
@@ -114,7 +118,7 @@ class Node(FastAPI):
|
|
|
114
118
|
except asyncio.CancelledError:
|
|
115
119
|
pass
|
|
116
120
|
|
|
117
|
-
async def _on_startup(self):
|
|
121
|
+
async def _on_startup(self) -> None:
|
|
118
122
|
self.log.info('received "startup" lifecycle-event - connecting to loop')
|
|
119
123
|
try:
|
|
120
124
|
await self.reconnect_to_loop()
|
|
@@ -124,17 +128,22 @@ class Node(FastAPI):
|
|
|
124
128
|
await self.on_startup()
|
|
125
129
|
self.log.info('successfully finished on_startup')
|
|
126
130
|
|
|
127
|
-
async def _on_shutdown(self):
|
|
131
|
+
async def _on_shutdown(self) -> None:
|
|
128
132
|
self.log.info('received "shutdown" lifecycle-event')
|
|
129
133
|
await self.loop_communicator.shutdown()
|
|
130
|
-
if self.
|
|
131
|
-
await self.
|
|
134
|
+
if self.sio_client is not None:
|
|
135
|
+
await self.sio_client.disconnect()
|
|
132
136
|
if self._client_session is not None:
|
|
133
137
|
await self._client_session.close()
|
|
134
138
|
self.log.info('successfully disconnected from loop.')
|
|
135
139
|
await self.on_shutdown()
|
|
136
140
|
|
|
137
141
|
async def repeat_loop(self) -> None:
|
|
142
|
+
"""Executed every `repeat_loop_cycle_sec` seconds.
|
|
143
|
+
Triggers the abstract method `on_repeat` which should be implemented by the subclass.
|
|
144
|
+
If `needs_sio` is True, it ensures that the socket.io connection is established before calling on_repeat.
|
|
145
|
+
"""
|
|
146
|
+
|
|
138
147
|
while True:
|
|
139
148
|
if self._skip_repeat_loop:
|
|
140
149
|
self.log.debug('node is muted, skipping repeat loop')
|
|
@@ -142,7 +151,8 @@ class Node(FastAPI):
|
|
|
142
151
|
continue
|
|
143
152
|
try:
|
|
144
153
|
async with self.repeat_loop_lock:
|
|
145
|
-
|
|
154
|
+
if self._needs_sio:
|
|
155
|
+
await self._ensure_sio_connection()
|
|
146
156
|
await self.on_repeat()
|
|
147
157
|
except asyncio.CancelledError:
|
|
148
158
|
return
|
|
@@ -153,14 +163,17 @@ class Node(FastAPI):
|
|
|
153
163
|
|
|
154
164
|
await asyncio.sleep(self.repeat_loop_cycle_sec)
|
|
155
165
|
|
|
156
|
-
async def _ensure_sio_connection(self):
|
|
157
|
-
if
|
|
166
|
+
async def _ensure_sio_connection(self) -> None:
|
|
167
|
+
"""Call reconnect_to_loop if the socket.io connection is broken or not established."""
|
|
168
|
+
if self.socket_connection_broken or self.sio_client is None or not self.sio_client.connected:
|
|
158
169
|
self.log.info('Reconnecting to loop via sio due to %s',
|
|
159
170
|
'broken connection' if self.socket_connection_broken else 'no connection')
|
|
160
171
|
await self.reconnect_to_loop()
|
|
161
172
|
|
|
162
|
-
async def reconnect_to_loop(self):
|
|
173
|
+
async def reconnect_to_loop(self) -> None:
|
|
163
174
|
"""Initialize the loop communicator, log in if needed and reconnect to the loop via socket.io."""
|
|
175
|
+
if not self._needs_sio:
|
|
176
|
+
return
|
|
164
177
|
self.init_loop_communicator()
|
|
165
178
|
await self.loop_communicator.backend_ready(timeout=5)
|
|
166
179
|
if self.needs_login:
|
|
@@ -174,13 +187,13 @@ class Node(FastAPI):
|
|
|
174
187
|
|
|
175
188
|
self.socket_connection_broken = False
|
|
176
189
|
|
|
177
|
-
def set_skip_repeat_loop(self, value: bool):
|
|
190
|
+
def set_skip_repeat_loop(self, value: bool) -> None:
|
|
178
191
|
self._skip_repeat_loop = value
|
|
179
192
|
self.log.info('node is muted: %s', value)
|
|
180
193
|
|
|
181
194
|
# --------------------------------------------------- SOCKET.IO ---------------------------------------------------
|
|
182
195
|
|
|
183
|
-
async def _reconnect_socketio(self):
|
|
196
|
+
async def _reconnect_socketio(self) -> None:
|
|
184
197
|
"""Create a socket.io client, connect it to the learning loop and register its events.
|
|
185
198
|
The current client is disconnected and deleted if it already exists."""
|
|
186
199
|
|
|
@@ -188,7 +201,7 @@ class Node(FastAPI):
|
|
|
188
201
|
cookies = self.loop_communicator.get_cookies()
|
|
189
202
|
self.log.debug('HTTP Cookies: %s\n', cookies)
|
|
190
203
|
|
|
191
|
-
if self.
|
|
204
|
+
if self.sio_client is not None:
|
|
192
205
|
try:
|
|
193
206
|
await self.sio_client.disconnect()
|
|
194
207
|
self.log.info('disconnected from loop via sio')
|
|
@@ -199,7 +212,7 @@ class Node(FastAPI):
|
|
|
199
212
|
'Did not receive disconnect event from loop within 5 seconds.\nContinuing with new connection...')
|
|
200
213
|
except Exception as e:
|
|
201
214
|
self.log.warning('Could not disconnect from loop via sio: %s.\nIgnoring...', e)
|
|
202
|
-
self.
|
|
215
|
+
self.sio_client = None
|
|
203
216
|
|
|
204
217
|
connector = None
|
|
205
218
|
if self.loop_communicator.ssl_cert_path:
|
|
@@ -217,55 +230,55 @@ class Node(FastAPI):
|
|
|
217
230
|
else:
|
|
218
231
|
self._client_session = aiohttp.ClientSession(connector=connector)
|
|
219
232
|
|
|
220
|
-
self.
|
|
233
|
+
self.sio_client = AsyncClient(request_timeout=20, http_session=self._client_session)
|
|
221
234
|
|
|
222
235
|
# pylint: disable=protected-access
|
|
223
|
-
self.
|
|
236
|
+
self.sio_client._trigger_event = ensure_socket_response(self.sio_client._trigger_event)
|
|
224
237
|
|
|
225
|
-
@self.
|
|
238
|
+
@self.sio_client.event
|
|
226
239
|
async def connect():
|
|
227
240
|
self.log.info('received "connect" via sio from loop.')
|
|
228
241
|
self.CONNECTED_TO_LOOP.set()
|
|
229
242
|
self.DISCONNECTED_FROM_LOOP.clear()
|
|
230
243
|
|
|
231
|
-
@self.
|
|
244
|
+
@self.sio_client.event
|
|
232
245
|
async def disconnect():
|
|
233
246
|
self.log.info('received "disconnect" via sio from loop.')
|
|
234
247
|
self.DISCONNECTED_FROM_LOOP.set()
|
|
235
248
|
self.CONNECTED_TO_LOOP.clear()
|
|
236
249
|
|
|
237
|
-
@self.
|
|
250
|
+
@self.sio_client.event
|
|
238
251
|
async def restart():
|
|
239
252
|
self.log.info('received "restart" via sio from loop -> restarting node.')
|
|
240
253
|
sys.exit(0)
|
|
241
254
|
|
|
242
|
-
self.register_sio_events(self.
|
|
255
|
+
self.register_sio_events(self.sio_client)
|
|
243
256
|
try:
|
|
244
|
-
await self.
|
|
257
|
+
await self.sio_client.connect(f"{self.websocket_url}", headers=self.sio_headers, socketio_path="/ws/socket.io")
|
|
245
258
|
except Exception as e:
|
|
246
259
|
self.log.exception('Could not connect socketio client to loop')
|
|
247
260
|
raise NodeConnectionError('Could not connect socketio client to loop') from e
|
|
248
261
|
|
|
249
|
-
if not self.
|
|
262
|
+
if not self.sio_client.connected:
|
|
250
263
|
self.log.exception('Could not connect socketio client to loop')
|
|
251
264
|
raise NodeConnectionError('Could not connect socketio client to loop')
|
|
252
265
|
|
|
253
266
|
# --------------------------------------------------- ABSTRACT METHODS ---------------------------------------------------
|
|
254
267
|
|
|
255
268
|
@abstractmethod
|
|
256
|
-
async def on_startup(self):
|
|
269
|
+
async def on_startup(self) -> None:
|
|
257
270
|
"""This method is called when the node is started.
|
|
258
271
|
Note: In this method the sio connection is not yet established!"""
|
|
259
272
|
|
|
260
273
|
@abstractmethod
|
|
261
|
-
async def on_shutdown(self):
|
|
274
|
+
async def on_shutdown(self) -> None:
|
|
262
275
|
"""This method is called when the node is shut down."""
|
|
263
276
|
|
|
264
277
|
@abstractmethod
|
|
265
|
-
async def on_repeat(self):
|
|
278
|
+
async def on_repeat(self) -> None:
|
|
266
279
|
"""This method is called every 10 seconds."""
|
|
267
280
|
|
|
268
281
|
@abstractmethod
|
|
269
|
-
def register_sio_events(self, sio_client: AsyncClient):
|
|
282
|
+
def register_sio_events(self, sio_client: AsyncClient) -> None:
|
|
270
283
|
"""Register (additional) socket.io events for the communication with the learning loop.
|
|
271
284
|
The events: connect, disconnect and restart are already registered and should not be overwritten."""
|
learning_loop_node/rest.py
CHANGED
|
@@ -37,7 +37,7 @@ async def _debug_logging(request: Request) -> str:
|
|
|
37
37
|
@router.put("/socketio")
|
|
38
38
|
async def _socketio(request: Request) -> str:
|
|
39
39
|
'''
|
|
40
|
-
Enable or disable the socketio connection to the learning loop.
|
|
40
|
+
Enable or disable the socketio connection and repeat loop to the learning loop.
|
|
41
41
|
Not intended to be used outside of testing.
|
|
42
42
|
|
|
43
43
|
Example Usage
|
|
@@ -48,7 +48,8 @@ async def _socketio(request: Request) -> str:
|
|
|
48
48
|
node: 'Node' = request.app
|
|
49
49
|
|
|
50
50
|
if state == 'off':
|
|
51
|
-
|
|
51
|
+
if node.sio_client:
|
|
52
|
+
await node.sio_client.disconnect()
|
|
52
53
|
node.set_skip_repeat_loop(True) # Prevent auto-reconnection
|
|
53
54
|
return 'off'
|
|
54
55
|
if state == 'on':
|
|
@@ -46,12 +46,15 @@ def default_user_input() -> UserInput:
|
|
|
46
46
|
@pytest.mark.asyncio
|
|
47
47
|
@pytest.mark.usefixtures('setup_test_project')
|
|
48
48
|
async def test_image_download():
|
|
49
|
+
# pylint: disable=protected-access
|
|
50
|
+
|
|
49
51
|
image_folder = '/tmp/learning_loop_lib_data/zauberzeug/pytest_nodelib_annotator/images'
|
|
50
52
|
|
|
51
53
|
assert os.path.exists(image_folder) is False or len(os.listdir(image_folder)) == 0
|
|
52
54
|
|
|
53
55
|
node = AnnotatorNode(name="", uuid="", annotator_logic=MockedAnnotatatorLogic())
|
|
54
56
|
user_input = default_user_input()
|
|
55
|
-
|
|
57
|
+
await node._ensure_sio_connection() # This is required as the node is not "started"
|
|
58
|
+
_ = await node._handle_user_input(jsonable_encoder(asdict(user_input)))
|
|
56
59
|
|
|
57
60
|
assert os.path.exists(image_folder) is True and len(os.listdir(image_folder)) == 1
|
|
@@ -38,6 +38,15 @@ def should_have_segmentations(request) -> bool:
|
|
|
38
38
|
return should_have_seg
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
@pytest.fixture(scope="session", name="event_loop")
|
|
42
|
+
def fixture_event_loop():
|
|
43
|
+
"""Overrides pytest default function scoped event loop"""
|
|
44
|
+
policy = asyncio.get_event_loop_policy()
|
|
45
|
+
loop = policy.new_event_loop()
|
|
46
|
+
yield loop
|
|
47
|
+
loop.close()
|
|
48
|
+
|
|
49
|
+
|
|
41
50
|
@pytest.fixture()
|
|
42
51
|
async def test_detector_node():
|
|
43
52
|
"""Initializes and runs a detector testnode. Note that the running instance and the one the function returns are not the same instances!"""
|
|
@@ -20,6 +20,30 @@ async def test_outbox():
|
|
|
20
20
|
test_outbox = Outbox()
|
|
21
21
|
|
|
22
22
|
yield test_outbox
|
|
23
|
+
|
|
24
|
+
await test_outbox.set_mode('stopped')
|
|
25
|
+
shutil.rmtree(test_outbox.path, ignore_errors=True)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.fixture(autouse=True, scope='session')
|
|
29
|
+
async def fix_upload_bug():
|
|
30
|
+
""" This is a workaround for an upload bug that causes the SECOND upload to fail on the CI server. """
|
|
31
|
+
os.environ['LOOP_ORGANIZATION'] = 'zauberzeug'
|
|
32
|
+
os.environ['LOOP_PROJECT'] = 'demo'
|
|
33
|
+
shutil.rmtree(f'{GLOBALS.data_folder}/outbox', ignore_errors=True)
|
|
34
|
+
test_outbox = Outbox()
|
|
35
|
+
|
|
36
|
+
await test_outbox.set_mode('continuous_upload')
|
|
37
|
+
await test_outbox.save(get_test_image_binary())
|
|
38
|
+
await asyncio.sleep(6)
|
|
39
|
+
assert await wait_for_outbox_count(test_outbox, 0, timeout=15), 'File was not cleared even though outbox should be in continuous_upload'
|
|
40
|
+
assert test_outbox.upload_counter == 1
|
|
41
|
+
|
|
42
|
+
await test_outbox.save(get_test_image_binary())
|
|
43
|
+
await asyncio.sleep(6)
|
|
44
|
+
# assert await wait_for_outbox_count(test_outbox, 0, timeout=90), 'File was not cleared even though outbox should be in continuous_upload'
|
|
45
|
+
# assert test_outbox.upload_counter == 2
|
|
46
|
+
|
|
23
47
|
await test_outbox.set_mode('stopped')
|
|
24
48
|
shutil.rmtree(test_outbox.path, ignore_errors=True)
|
|
25
49
|
|
|
@@ -37,17 +61,6 @@ async def test_set_outbox_mode(test_outbox: Outbox):
|
|
|
37
61
|
assert test_outbox.upload_counter == 1
|
|
38
62
|
|
|
39
63
|
|
|
40
|
-
@pytest.mark.asyncio
|
|
41
|
-
async def test_outbox_upload_is_successful(test_outbox: Outbox):
|
|
42
|
-
await test_outbox.save(get_test_image_binary())
|
|
43
|
-
await asyncio.sleep(1)
|
|
44
|
-
await test_outbox.save(get_test_image_binary())
|
|
45
|
-
assert await wait_for_outbox_count(test_outbox, 2)
|
|
46
|
-
await test_outbox.upload()
|
|
47
|
-
assert await wait_for_outbox_count(test_outbox, 0)
|
|
48
|
-
assert test_outbox.upload_counter == 2
|
|
49
|
-
|
|
50
|
-
|
|
51
64
|
@pytest.mark.asyncio
|
|
52
65
|
async def test_invalid_jpg_is_not_saved(test_outbox: Outbox):
|
|
53
66
|
invalid_bytes = b'invalid jpg'
|
|
@@ -58,14 +71,13 @@ async def test_invalid_jpg_is_not_saved(test_outbox: Outbox):
|
|
|
58
71
|
# ------------------------------ Helper functions --------------------------------------
|
|
59
72
|
|
|
60
73
|
|
|
61
|
-
def get_test_image_binary():
|
|
62
|
-
img = Image.new('RGB', (
|
|
74
|
+
def get_test_image_binary() -> bytes:
|
|
75
|
+
img = Image.new('RGB', (600, 300), color=(73, 109, 137))
|
|
63
76
|
# convert img to jpg binary
|
|
64
77
|
|
|
65
78
|
img_byte_arr = io.BytesIO()
|
|
66
79
|
img.save(img_byte_arr, format='JPEG')
|
|
67
|
-
|
|
68
|
-
return img_byte_arr
|
|
80
|
+
return img_byte_arr.getvalue()
|
|
69
81
|
|
|
70
82
|
# return img.tobytes() # NOT WORKING
|
|
71
83
|
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
|
|
2
2
|
import asyncio
|
|
3
3
|
|
|
4
|
-
from pytest_mock import
|
|
4
|
+
from pytest_mock import ( # pip install pytest-mock # pylint: disable=import-error # type: ignore
|
|
5
|
+
MockerFixture,
|
|
6
|
+
)
|
|
5
7
|
|
|
6
8
|
from ....enums import TrainerState
|
|
7
9
|
from ....trainer.trainer_logic import TrainerLogic
|
|
@@ -54,6 +56,7 @@ async def test_unsynced_model_available__sync_successful(test_initialized_traine
|
|
|
54
56
|
async def test_unsynced_model_available__sio_not_connected(test_initialized_trainer_node: TrainerNode):
|
|
55
57
|
trainer = test_initialized_trainer_node.trainer_logic
|
|
56
58
|
assert isinstance(trainer, TestingTrainerLogic)
|
|
59
|
+
assert test_initialized_trainer_node.sio_client is not None
|
|
57
60
|
|
|
58
61
|
await test_initialized_trainer_node.sio_client.disconnect()
|
|
59
62
|
test_initialized_trainer_node.set_skip_repeat_loop(True)
|
|
@@ -369,6 +369,9 @@ class TrainerLogicGeneric(ABC):
|
|
|
369
369
|
"""Syncronizes the training with the Learning Loop via the update_training endpoint.
|
|
370
370
|
NOTE: This stage sets the errors explicitly because it may be used inside the training stage.
|
|
371
371
|
"""
|
|
372
|
+
if not self.node.sio_client or not self.node.sio_client.connected:
|
|
373
|
+
raise ConnectionError('SocketIO client is not connected')
|
|
374
|
+
|
|
372
375
|
error_key = 'sync_confusion_matrix'
|
|
373
376
|
try:
|
|
374
377
|
new_best_model = self._get_new_best_training_state()
|
|
@@ -16,7 +16,7 @@ from .trainer_logic_generic import TrainerLogicGeneric
|
|
|
16
16
|
class TrainerNode(Node):
|
|
17
17
|
|
|
18
18
|
def __init__(self, name: str, trainer_logic: TrainerLogicGeneric, uuid: Optional[str] = None, use_backdoor_controls: bool = False):
|
|
19
|
-
super().__init__(name, uuid, 'trainer')
|
|
19
|
+
super().__init__(name, uuid=uuid, node_type='trainer')
|
|
20
20
|
trainer_logic._node = self
|
|
21
21
|
self.trainer_logic = trainer_logic
|
|
22
22
|
self.last_training_io = LastTrainingIO(self.uuid)
|
|
@@ -52,7 +52,8 @@ class TrainerNode(Node):
|
|
|
52
52
|
self.check_idle_timeout()
|
|
53
53
|
except exceptions.TimeoutError:
|
|
54
54
|
self.log.warning('timeout when sending status to learning loop, reconnecting sio_client')
|
|
55
|
-
|
|
55
|
+
if self.sio_client:
|
|
56
|
+
await self.sio_client.disconnect() # NOTE: reconnect happens in node._on_repeat
|
|
56
57
|
except Exception:
|
|
57
58
|
self.log.exception('could not send status. Exception:')
|
|
58
59
|
|
|
@@ -76,7 +77,7 @@ class TrainerNode(Node):
|
|
|
76
77
|
return True
|
|
77
78
|
|
|
78
79
|
async def send_status(self):
|
|
79
|
-
if not self.sio_client.connected:
|
|
80
|
+
if not self.sio_client or not self.sio_client.connected:
|
|
80
81
|
self.log.debug('cannot send status - not connected to the Learning Loop')
|
|
81
82
|
return
|
|
82
83
|
|
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
learning_loop_node/__init__.py,sha256=onN5s8-x_xBsCM6NLmJO0Ym1sJHeCFaGw8qb0oQZmz8,364
|
|
2
2
|
learning_loop_node/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
learning_loop_node/annotation/annotator_logic.py,sha256=BTaopkJZkIf1CI5lfsVKsxbxoUIbDJrevavuQUT5e_c,1000
|
|
4
|
-
learning_loop_node/annotation/annotator_node.py,sha256=
|
|
5
|
-
learning_loop_node/data_classes/__init__.py,sha256=
|
|
4
|
+
learning_loop_node/annotation/annotator_node.py,sha256=J5xwSnM5rwTWrTe-TI37J0JHKf_4PlDuABaHvgjYr_Q,4443
|
|
5
|
+
learning_loop_node/data_classes/__init__.py,sha256=6-pLbokCAvTFW-lh1lLUu7u8V5ZyD-2IVmFg5HHI4Cc,1329
|
|
6
6
|
learning_loop_node/data_classes/annotations.py,sha256=NfMlTv2_5AfVY_JDM4tbjETFjSN2S2I2LJJPMMcDT50,966
|
|
7
7
|
learning_loop_node/data_classes/detections.py,sha256=7vqcS0EK8cmDjRDckHlpSZDZ9YO6qajRmYvx-oxatFc,5425
|
|
8
|
-
learning_loop_node/data_classes/general.py,sha256=
|
|
8
|
+
learning_loop_node/data_classes/general.py,sha256=GQ6vPEIm4qqBV4RZT_YS_dPeKMdbCKo6Pe5-e4Cg3_k,7295
|
|
9
9
|
learning_loop_node/data_classes/image_metadata.py,sha256=YccDyHMbnOrRr4-9hHbCNBpuhlZem5M64c0ZbZXTASY,1764
|
|
10
10
|
learning_loop_node/data_classes/socket_response.py,sha256=tIdt-oYf6ULoJIDYQCecNM9OtWR6_wJ9tL0Ksu83Vko,655
|
|
11
11
|
learning_loop_node/data_classes/training.py,sha256=TybwcCDf_NUaDUaOj30lPm-7Z3Qk9XFRibEX5qIv96Y,5737
|
|
12
12
|
learning_loop_node/data_exchanger.py,sha256=nd9JNPLn9amIeTcSIyUPpbE97ORAcb5yNphvmpgWSUQ,9095
|
|
13
13
|
learning_loop_node/detector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
learning_loop_node/detector/detector_logic.py,sha256=YmsEsqSr0CUUWKtSR7EFU92HA90NvdYiPZGDQKXJUxU,2462
|
|
15
|
-
learning_loop_node/detector/detector_node.py,sha256=
|
|
15
|
+
learning_loop_node/detector/detector_node.py,sha256=IW9vGbl8Xq7DdylYM-jSJtitkCTs4uGYRZyWGuWauYo,29498
|
|
16
16
|
learning_loop_node/detector/exceptions.py,sha256=C6KbNPlSbtfgDrZx2Hbhm7Suk9jVoR3fMRCO0CkrMsQ,196
|
|
17
17
|
learning_loop_node/detector/inbox_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
learning_loop_node/detector/inbox_filter/cam_observation_history.py,sha256=1PHgXRrhSQ34HSFw7mdX8ndRxHf_i1aP5nXXnrZxhAY,3312
|
|
19
19
|
learning_loop_node/detector/inbox_filter/relevance_filter.py,sha256=rI46jL9ZuI0hiDVxWCfXllB8DlQyyewNs6oZ6MnglMc,1540
|
|
20
|
-
learning_loop_node/detector/outbox.py,sha256=
|
|
20
|
+
learning_loop_node/detector/outbox.py,sha256=izWJtnHG0PNX3-YWtkybLch2slnmT2pmAYrqZpHOaTA,12768
|
|
21
21
|
learning_loop_node/detector/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
learning_loop_node/detector/rest/about.py,sha256=evHJ2svUZY_DFz0FSef5u9c5KW4Uc3GL7EbPinG9-dg,583
|
|
23
23
|
learning_loop_node/detector/rest/backdoor_controls.py,sha256=ZNaFOvC0OLWNtcLiG-NIqS_y1kkLP4csgk3CHhp8Gis,885
|
|
@@ -40,17 +40,17 @@ learning_loop_node/helpers/gdrive_downloader.py,sha256=zeYJciTAJVRpu_eFjwgYLCpIa
|
|
|
40
40
|
learning_loop_node/helpers/log_conf.py,sha256=hqVAa_9NnYEU6N0dcOKmph82p7MpgKqeF_eomTLYzWY,961
|
|
41
41
|
learning_loop_node/helpers/misc.py,sha256=J29iBmsEUAraKKDN1m1NKiHQ3QrP5ub5HBU6cllSP2g,7384
|
|
42
42
|
learning_loop_node/helpers/run.py,sha256=_uox-j3_K_bL3yCAwy3JYSOiIxrnhzVxyxWpCe8_J9U,876
|
|
43
|
-
learning_loop_node/loop_communication.py,sha256=
|
|
44
|
-
learning_loop_node/node.py,sha256
|
|
43
|
+
learning_loop_node/loop_communication.py,sha256=d1tJZ9T_y22FrEq1XzvidgH6d-byGxFq3ovLvPBS4FA,7611
|
|
44
|
+
learning_loop_node/node.py,sha256=xK-xODRo7ov-dNNMcpLW2GAauvjKAK3K9RQh4P9S994,12160
|
|
45
45
|
learning_loop_node/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
learning_loop_node/rest.py,sha256=
|
|
46
|
+
learning_loop_node/rest.py,sha256=5X9IVW9kf1gNf8jifGW9g_gI_-9TEeoMMOW16jvwpRE,1599
|
|
47
47
|
learning_loop_node/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
48
|
learning_loop_node/tests/annotator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
49
|
learning_loop_node/tests/annotator/conftest.py,sha256=e83I8WNAUgCFmum1GCx_nSjP9uwAoPIwPk72elypNQY,2098
|
|
50
50
|
learning_loop_node/tests/annotator/pytest.ini,sha256=8QdjmawLy1zAzXrJ88or1kpFDhJw0W5UOnDfGGs_igU,262
|
|
51
|
-
learning_loop_node/tests/annotator/test_annotator_node.py,sha256=
|
|
51
|
+
learning_loop_node/tests/annotator/test_annotator_node.py,sha256=OgdUj0PEWSe0KPTNVVi-1d7DoK7IC9Q3Q3G8TPiP9f4,2090
|
|
52
52
|
learning_loop_node/tests/detector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
|
-
learning_loop_node/tests/detector/conftest.py,sha256=
|
|
53
|
+
learning_loop_node/tests/detector/conftest.py,sha256=Z1uPZGSL5jZyRQkHycQpHjsBjn-sL1QfuJrrJrGTNtM,5517
|
|
54
54
|
learning_loop_node/tests/detector/inbox_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
55
|
learning_loop_node/tests/detector/inbox_filter/test_observation.py,sha256=k4WYdvnuV7d_r7zI4M2aA8WuBjm0aycQ0vj1rGE2q4w,1370
|
|
56
56
|
learning_loop_node/tests/detector/inbox_filter/test_relevance_group.py,sha256=r-wABFQVsTNTjv7vYGr8wbHfOWy43F_B14ZDWHfiZ-A,7613
|
|
@@ -59,7 +59,7 @@ learning_loop_node/tests/detector/pytest.ini,sha256=8QdjmawLy1zAzXrJ88or1kpFDhJw
|
|
|
59
59
|
learning_loop_node/tests/detector/test.jpg,sha256=msA-vHPmvPiro_D102Qmn1fn4vNfooqYYEXPxZUmYpk,161390
|
|
60
60
|
learning_loop_node/tests/detector/test_client_communication.py,sha256=cVviUmAwbLY3LsJcY-D3ve-Jwxk9WVOrVupeh-PdKtA,8013
|
|
61
61
|
learning_loop_node/tests/detector/test_detector_node.py,sha256=0ZMV6coAvdq-nH8CwY9_LR2tUcH9VLcAB1CWuwHQMpo,3023
|
|
62
|
-
learning_loop_node/tests/detector/test_outbox.py,sha256=
|
|
62
|
+
learning_loop_node/tests/detector/test_outbox.py,sha256=K7c0GeKujNlgjDFS3aY1lN7kDbfJ4dBQfB9lBp3o3_Q,3262
|
|
63
63
|
learning_loop_node/tests/detector/test_relevance_filter.py,sha256=7oTXW4AuObk7NxMqGSwnjcspH3-QUbSdCYlz9hvzV78,2079
|
|
64
64
|
learning_loop_node/tests/detector/testing_detector.py,sha256=MZajybyzISz2G1OENfLHgZhBcLCYzTR4iN9JkWpq5-s,551
|
|
65
65
|
learning_loop_node/tests/general/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -81,7 +81,7 @@ learning_loop_node/tests/trainer/states/test_state_cleanup.py,sha256=gZNxSSwnj9f
|
|
|
81
81
|
learning_loop_node/tests/trainer/states/test_state_detecting.py,sha256=-NLR5se7_OY_X8_Gf-BWw7X6dS_Pzsnkz84J5aTbqFU,3689
|
|
82
82
|
learning_loop_node/tests/trainer/states/test_state_download_train_model.py,sha256=-T8iAutBliv0MV5bV5lPvn2aNjF3vMBCj8iAZTC-Q7g,2992
|
|
83
83
|
learning_loop_node/tests/trainer/states/test_state_prepare.py,sha256=boCU93Bv2VWbW73MC_suTbwCcuR7RWn-6dgVvdiJ9tA,2291
|
|
84
|
-
learning_loop_node/tests/trainer/states/test_state_sync_confusion_matrix.py,sha256=
|
|
84
|
+
learning_loop_node/tests/trainer/states/test_state_sync_confusion_matrix.py,sha256=R3UqQJ2GQMapwRQ5WuZJb9M5IfroD2QqFI4h8etiH0Y,5223
|
|
85
85
|
learning_loop_node/tests/trainer/states/test_state_train.py,sha256=ovRs8EepQjy0yQJssK0TdcZcraBhmUkbMWeNKdHS114,2893
|
|
86
86
|
learning_loop_node/tests/trainer/states/test_state_upload_detections.py,sha256=oFQGTeRZhW7MBISAfpe65KphZNxFUsZu3-5hD9_LS6k,7438
|
|
87
87
|
learning_loop_node/tests/trainer/states/test_state_upload_model.py,sha256=jHWLa48tNljZwIiqI-1z71ENRGnn7Z0BsVcDBVWVBj4,3642
|
|
@@ -97,8 +97,8 @@ learning_loop_node/trainer/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
97
97
|
learning_loop_node/trainer/rest/backdoor_controls.py,sha256=ZnK8ypY5r_q0-YZbtaOxhQThzuZvMsQHM5gJGESd_dE,5131
|
|
98
98
|
learning_loop_node/trainer/test_executor.py,sha256=6BVGDN_6f5GEMMEvDLSG1yzMybSvgXaP5uYpSfsVPP0,2224
|
|
99
99
|
learning_loop_node/trainer/trainer_logic.py,sha256=eK-01qZzi10UjLMCQX8vy5eW2FoghPj3rzzDC-s3Si4,8792
|
|
100
|
-
learning_loop_node/trainer/trainer_logic_generic.py,sha256=
|
|
101
|
-
learning_loop_node/trainer/trainer_node.py,sha256=
|
|
102
|
-
learning_loop_node-0.
|
|
103
|
-
learning_loop_node-0.
|
|
104
|
-
learning_loop_node-0.
|
|
100
|
+
learning_loop_node/trainer/trainer_logic_generic.py,sha256=KcHmXr-Hp8_Wuejzj8odY6sRPqi6aw1SEXv3YlbjM98,27057
|
|
101
|
+
learning_loop_node/trainer/trainer_node.py,sha256=tsAMzJewdS7Bi_1b9FwG0d2lGlv2lY37pgOLWr0bP_I,4582
|
|
102
|
+
learning_loop_node-0.16.1.dist-info/METADATA,sha256=nHAEMpBL_tSXA00hNvuLoAuh0RcS1FACaCS_JsCP7rA,13509
|
|
103
|
+
learning_loop_node-0.16.1.dist-info/WHEEL,sha256=WGfLGfLX43Ei_YORXSnT54hxFygu34kMpcQdmgmEwCQ,88
|
|
104
|
+
learning_loop_node-0.16.1.dist-info/RECORD,,
|
|
File without changes
|