learning-loop-node 0.9.3__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of learning-loop-node might be problematic. Click here for more details.
- learning_loop_node/__init__.py +2 -3
- learning_loop_node/annotation/annotator_logic.py +2 -2
- learning_loop_node/annotation/annotator_node.py +16 -15
- learning_loop_node/data_classes/__init__.py +17 -10
- learning_loop_node/data_classes/detections.py +7 -2
- learning_loop_node/data_classes/general.py +4 -5
- learning_loop_node/data_classes/training.py +49 -21
- learning_loop_node/data_exchanger.py +85 -139
- learning_loop_node/detector/__init__.py +0 -1
- learning_loop_node/detector/detector_node.py +10 -13
- learning_loop_node/detector/inbox_filter/cam_observation_history.py +4 -7
- learning_loop_node/detector/outbox.py +0 -1
- learning_loop_node/detector/rest/about.py +1 -0
- learning_loop_node/detector/tests/conftest.py +0 -1
- learning_loop_node/detector/tests/test_client_communication.py +5 -3
- learning_loop_node/detector/tests/test_outbox.py +2 -0
- learning_loop_node/detector/tests/testing_detector.py +1 -8
- learning_loop_node/globals.py +2 -2
- learning_loop_node/helpers/gdrive_downloader.py +1 -1
- learning_loop_node/helpers/misc.py +124 -17
- learning_loop_node/loop_communication.py +57 -25
- learning_loop_node/node.py +62 -135
- learning_loop_node/tests/test_downloader.py +8 -7
- learning_loop_node/tests/test_executor.py +14 -11
- learning_loop_node/tests/test_helper.py +3 -5
- learning_loop_node/trainer/downloader.py +1 -1
- learning_loop_node/trainer/executor.py +87 -83
- learning_loop_node/trainer/io_helpers.py +66 -9
- learning_loop_node/trainer/rest/backdoor_controls.py +10 -5
- learning_loop_node/trainer/rest/controls.py +3 -1
- learning_loop_node/trainer/tests/conftest.py +19 -28
- learning_loop_node/trainer/tests/states/test_state_cleanup.py +5 -3
- learning_loop_node/trainer/tests/states/test_state_detecting.py +23 -20
- learning_loop_node/trainer/tests/states/test_state_download_train_model.py +18 -12
- learning_loop_node/trainer/tests/states/test_state_prepare.py +13 -12
- learning_loop_node/trainer/tests/states/test_state_sync_confusion_matrix.py +21 -18
- learning_loop_node/trainer/tests/states/test_state_train.py +27 -28
- learning_loop_node/trainer/tests/states/test_state_upload_detections.py +34 -32
- learning_loop_node/trainer/tests/states/test_state_upload_model.py +22 -20
- learning_loop_node/trainer/tests/test_errors.py +20 -12
- learning_loop_node/trainer/tests/test_trainer_states.py +4 -5
- learning_loop_node/trainer/tests/testing_trainer_logic.py +25 -30
- learning_loop_node/trainer/trainer_logic.py +80 -590
- learning_loop_node/trainer/trainer_logic_generic.py +495 -0
- learning_loop_node/trainer/trainer_node.py +27 -106
- {learning_loop_node-0.9.3.dist-info → learning_loop_node-0.10.0.dist-info}/METADATA +1 -1
- learning_loop_node-0.10.0.dist-info/RECORD +85 -0
- learning_loop_node/converter/converter_logic.py +0 -68
- learning_loop_node/converter/converter_node.py +0 -125
- learning_loop_node/converter/tests/test_converter.py +0 -55
- learning_loop_node/trainer/training_syncronizer.py +0 -52
- learning_loop_node-0.9.3.dist-info/RECORD +0 -88
- /learning_loop_node/{converter/__init__.py → py.typed} +0 -0
- {learning_loop_node-0.9.3.dist-info → learning_loop_node-0.10.0.dist-info}/WHEEL +0 -0
|
@@ -2,22 +2,28 @@
|
|
|
2
2
|
import asyncio
|
|
3
3
|
import os
|
|
4
4
|
|
|
5
|
+
from learning_loop_node.data_classes import TrainerState
|
|
5
6
|
from learning_loop_node.trainer.tests.state_helper import assert_training_state, create_active_training_file
|
|
6
7
|
from learning_loop_node.trainer.tests.testing_trainer_logic import TestingTrainerLogic
|
|
7
8
|
|
|
9
|
+
# pylint: disable=protected-access
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
async def test_downloading_is_successful(test_initialized_trainer: TestingTrainerLogic):
|
|
10
13
|
trainer = test_initialized_trainer
|
|
11
|
-
create_active_training_file(trainer, training_state=
|
|
14
|
+
create_active_training_file(trainer, training_state=TrainerState.DataDownloaded)
|
|
12
15
|
|
|
13
16
|
trainer.model_format = 'mocked'
|
|
14
|
-
trainer.
|
|
17
|
+
trainer._init_from_last_training()
|
|
15
18
|
|
|
16
|
-
|
|
19
|
+
asyncio.get_running_loop().create_task(
|
|
20
|
+
trainer._perform_state('download_model',
|
|
21
|
+
TrainerState.TrainModelDownloading,
|
|
22
|
+
TrainerState.TrainModelDownloaded, trainer._download_model))
|
|
17
23
|
await assert_training_state(trainer.training, 'train_model_downloading', timeout=1, interval=0.001)
|
|
18
24
|
await assert_training_state(trainer.training, 'train_model_downloaded', timeout=1, interval=0.001)
|
|
19
25
|
|
|
20
|
-
assert trainer.training.training_state ==
|
|
26
|
+
assert trainer.training.training_state == TrainerState.TrainModelDownloaded
|
|
21
27
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
22
28
|
|
|
23
29
|
# file on disk
|
|
@@ -29,9 +35,9 @@ async def test_downloading_is_successful(test_initialized_trainer: TestingTraine
|
|
|
29
35
|
async def test_abort_download_model(test_initialized_trainer: TestingTrainerLogic):
|
|
30
36
|
trainer = test_initialized_trainer
|
|
31
37
|
create_active_training_file(trainer, training_state='data_downloaded')
|
|
32
|
-
trainer.
|
|
38
|
+
trainer._init_from_last_training()
|
|
33
39
|
|
|
34
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
40
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
35
41
|
await assert_training_state(trainer.training, 'train_model_downloading', timeout=1, interval=0.001)
|
|
36
42
|
|
|
37
43
|
await trainer.stop()
|
|
@@ -43,15 +49,15 @@ async def test_abort_download_model(test_initialized_trainer: TestingTrainerLogi
|
|
|
43
49
|
|
|
44
50
|
async def test_downloading_failed(test_initialized_trainer: TestingTrainerLogic):
|
|
45
51
|
trainer = test_initialized_trainer
|
|
46
|
-
create_active_training_file(trainer, training_state=
|
|
47
|
-
|
|
48
|
-
trainer.
|
|
52
|
+
create_active_training_file(trainer, training_state=TrainerState.DataDownloaded,
|
|
53
|
+
base_model_uuid_or_name='00000000-0000-0000-0000-000000000000') # bad model id)
|
|
54
|
+
trainer._init_from_last_training()
|
|
49
55
|
|
|
50
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
56
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
51
57
|
await assert_training_state(trainer.training, 'train_model_downloading', timeout=1, interval=0.001)
|
|
52
|
-
await assert_training_state(trainer.training,
|
|
58
|
+
await assert_training_state(trainer.training, TrainerState.DataDownloaded, timeout=1, interval=0.001)
|
|
53
59
|
|
|
54
60
|
assert trainer.errors.has_error_for('download_model')
|
|
55
61
|
assert trainer._training is not None # pylint: disable=protected-access
|
|
56
|
-
assert trainer.training.training_state ==
|
|
62
|
+
assert trainer.training.training_state == TrainerState.DataDownloaded
|
|
57
63
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
|
|
3
|
-
from learning_loop_node.data_classes import Context
|
|
3
|
+
from learning_loop_node.data_classes import Context, TrainerState
|
|
4
4
|
from learning_loop_node.trainer.tests.state_helper import assert_training_state, create_active_training_file
|
|
5
5
|
from learning_loop_node.trainer.tests.testing_trainer_logic import TestingTrainerLogic
|
|
6
6
|
from learning_loop_node.trainer.trainer_logic import TrainerLogic
|
|
7
7
|
|
|
8
|
+
# pylint: disable=protected-access
|
|
8
9
|
error_key = 'prepare'
|
|
9
10
|
|
|
10
11
|
|
|
@@ -15,11 +16,11 @@ def trainer_has_error(trainer: TrainerLogic):
|
|
|
15
16
|
async def test_preparing_is_successful(test_initialized_trainer: TestingTrainerLogic):
|
|
16
17
|
trainer = test_initialized_trainer
|
|
17
18
|
create_active_training_file(trainer)
|
|
18
|
-
trainer.
|
|
19
|
+
trainer._init_from_last_training()
|
|
19
20
|
|
|
20
|
-
await trainer.prepare
|
|
21
|
+
await trainer._perform_state('prepare', TrainerState.DataDownloading, TrainerState.DataDownloaded, trainer._prepare)
|
|
21
22
|
assert trainer_has_error(trainer) is False
|
|
22
|
-
assert trainer.training.training_state ==
|
|
23
|
+
assert trainer.training.training_state == TrainerState.DataDownloaded
|
|
23
24
|
assert trainer.training.data is not None
|
|
24
25
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
25
26
|
|
|
@@ -27,10 +28,10 @@ async def test_preparing_is_successful(test_initialized_trainer: TestingTrainerL
|
|
|
27
28
|
async def test_abort_preparing(test_initialized_trainer: TestingTrainerLogic):
|
|
28
29
|
trainer = test_initialized_trainer
|
|
29
30
|
create_active_training_file(trainer)
|
|
30
|
-
trainer.
|
|
31
|
+
trainer._init_from_last_training()
|
|
31
32
|
|
|
32
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
33
|
-
await assert_training_state(trainer.training,
|
|
33
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
34
|
+
await assert_training_state(trainer.training, TrainerState.DataDownloading, timeout=1, interval=0.001)
|
|
34
35
|
|
|
35
36
|
await trainer.stop()
|
|
36
37
|
await asyncio.sleep(0.1)
|
|
@@ -43,13 +44,13 @@ async def test_request_error(test_initialized_trainer: TestingTrainerLogic):
|
|
|
43
44
|
trainer = test_initialized_trainer
|
|
44
45
|
create_active_training_file(trainer, context=Context(
|
|
45
46
|
organization='zauberzeug', project='some_bad_project'))
|
|
46
|
-
trainer.
|
|
47
|
+
trainer._init_from_last_training()
|
|
47
48
|
|
|
48
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
49
|
-
await assert_training_state(trainer.training,
|
|
50
|
-
await assert_training_state(trainer.training,
|
|
49
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
50
|
+
await assert_training_state(trainer.training, TrainerState.DataDownloading, timeout=3, interval=0.001)
|
|
51
|
+
await assert_training_state(trainer.training, TrainerState.Initialized, timeout=3, interval=0.001)
|
|
51
52
|
|
|
52
53
|
assert trainer_has_error(trainer)
|
|
53
54
|
assert trainer._training is not None # pylint: disable=protected-access
|
|
54
|
-
assert trainer.training.training_state ==
|
|
55
|
+
assert trainer.training.training_state == TrainerState.Initialized
|
|
55
56
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
@@ -3,12 +3,15 @@ import asyncio
|
|
|
3
3
|
|
|
4
4
|
from pytest_mock import MockerFixture # pip install pytest-mock
|
|
5
5
|
|
|
6
|
+
from learning_loop_node.data_classes import TrainerState
|
|
6
7
|
from learning_loop_node.trainer.trainer_logic import TrainerLogic
|
|
7
8
|
from learning_loop_node.trainer.trainer_node import TrainerNode
|
|
8
9
|
|
|
9
10
|
from ..state_helper import assert_training_state, create_active_training_file
|
|
10
11
|
from ..testing_trainer_logic import TestingTrainerLogic
|
|
11
12
|
|
|
13
|
+
# pylint: disable=protected-access
|
|
14
|
+
|
|
12
15
|
error_key = 'sync_confusion_matrix'
|
|
13
16
|
|
|
14
17
|
|
|
@@ -21,14 +24,14 @@ async def test_nothing_to_sync(test_initialized_trainer: TestingTrainerLogic):
|
|
|
21
24
|
|
|
22
25
|
# TODO this requires trainer to have _training
|
|
23
26
|
# trainer.load_active_training()
|
|
24
|
-
create_active_training_file(trainer, training_state=
|
|
25
|
-
trainer.
|
|
27
|
+
create_active_training_file(trainer, training_state=TrainerState.TrainingFinished)
|
|
28
|
+
trainer._init_from_last_training()
|
|
26
29
|
|
|
27
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
30
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
28
31
|
|
|
29
|
-
await assert_training_state(trainer.training,
|
|
32
|
+
await assert_training_state(trainer.training, TrainerState.ConfusionMatrixSynced, timeout=1, interval=0.001)
|
|
30
33
|
assert trainer_has_error(trainer) is False
|
|
31
|
-
assert trainer.training.training_state ==
|
|
34
|
+
assert trainer.training.training_state == TrainerState.ConfusionMatrixSynced
|
|
32
35
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
33
36
|
|
|
34
37
|
|
|
@@ -37,16 +40,16 @@ async def test_unsynced_model_available__sync_successful(test_initialized_traine
|
|
|
37
40
|
assert isinstance(trainer, TestingTrainerLogic)
|
|
38
41
|
|
|
39
42
|
await mock_socket_io_call(mocker, test_initialized_trainer_node, {'success': True})
|
|
40
|
-
create_active_training_file(trainer, training_state=
|
|
43
|
+
create_active_training_file(trainer, training_state=TrainerState.TrainingFinished)
|
|
41
44
|
|
|
42
|
-
trainer.
|
|
45
|
+
trainer._init_from_last_training()
|
|
43
46
|
trainer.has_new_model = True
|
|
44
47
|
|
|
45
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
46
|
-
await assert_training_state(trainer.training,
|
|
48
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
49
|
+
await assert_training_state(trainer.training, TrainerState.ConfusionMatrixSynced, timeout=1, interval=0.001)
|
|
47
50
|
|
|
48
51
|
assert trainer_has_error(trainer) is False
|
|
49
|
-
# assert trainer.training.training_state ==
|
|
52
|
+
# assert trainer.training.training_state == TrainerState.ConfusionMatrixSynced
|
|
50
53
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
51
54
|
|
|
52
55
|
|
|
@@ -54,18 +57,18 @@ async def test_unsynced_model_available__sio_not_connected(test_initialized_trai
|
|
|
54
57
|
trainer = test_initialized_trainer_node.trainer_logic
|
|
55
58
|
assert isinstance(trainer, TestingTrainerLogic)
|
|
56
59
|
|
|
57
|
-
create_active_training_file(trainer, training_state=
|
|
60
|
+
create_active_training_file(trainer, training_state=TrainerState.TrainingFinished)
|
|
58
61
|
|
|
59
62
|
assert test_initialized_trainer_node.sio_client.connected is False
|
|
60
63
|
trainer.has_new_model = True
|
|
61
64
|
|
|
62
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
65
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
63
66
|
|
|
64
67
|
await assert_training_state(trainer.training, 'confusion_matrix_syncing', timeout=1, interval=0.001)
|
|
65
|
-
await assert_training_state(trainer.training,
|
|
68
|
+
await assert_training_state(trainer.training, TrainerState.TrainingFinished, timeout=1, interval=0.001)
|
|
66
69
|
|
|
67
70
|
assert trainer_has_error(trainer)
|
|
68
|
-
assert trainer.training.training_state ==
|
|
71
|
+
assert trainer.training.training_state == TrainerState.TrainingFinished
|
|
69
72
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
70
73
|
|
|
71
74
|
|
|
@@ -75,16 +78,16 @@ async def test_unsynced_model_available__request_is_not_successful(test_initiali
|
|
|
75
78
|
|
|
76
79
|
await mock_socket_io_call(mocker, test_initialized_trainer_node, {'success': False})
|
|
77
80
|
|
|
78
|
-
create_active_training_file(trainer, training_state=
|
|
81
|
+
create_active_training_file(trainer, training_state=TrainerState.TrainingFinished)
|
|
79
82
|
|
|
80
83
|
trainer.has_new_model = True
|
|
81
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
84
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
82
85
|
|
|
83
86
|
await assert_training_state(trainer.training, 'confusion_matrix_syncing', timeout=1, interval=0.001)
|
|
84
|
-
await assert_training_state(trainer.training,
|
|
87
|
+
await assert_training_state(trainer.training, TrainerState.TrainingFinished, timeout=1, interval=0.001)
|
|
85
88
|
|
|
86
89
|
assert trainer_has_error(trainer)
|
|
87
|
-
assert trainer.training.training_state ==
|
|
90
|
+
assert trainer.training.training_state == TrainerState.TrainingFinished
|
|
88
91
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
89
92
|
|
|
90
93
|
|
|
@@ -1,48 +1,49 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
|
|
3
|
+
from learning_loop_node.data_classes import TrainerState
|
|
3
4
|
from learning_loop_node.tests.test_helper import condition
|
|
4
5
|
from learning_loop_node.trainer.tests.state_helper import assert_training_state, create_active_training_file
|
|
5
6
|
from learning_loop_node.trainer.tests.testing_trainer_logic import TestingTrainerLogic
|
|
6
7
|
|
|
8
|
+
# pylint: disable=protected-access
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
async def test_successful_training(test_initialized_trainer: TestingTrainerLogic):
|
|
9
12
|
trainer = test_initialized_trainer
|
|
10
13
|
|
|
11
|
-
create_active_training_file(trainer, training_state=
|
|
12
|
-
trainer.
|
|
14
|
+
create_active_training_file(trainer, training_state=TrainerState.TrainModelDownloaded)
|
|
15
|
+
trainer._init_from_last_training()
|
|
13
16
|
|
|
14
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
17
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
15
18
|
|
|
16
|
-
await
|
|
19
|
+
await condition(lambda: trainer._executor and trainer._executor.is_running(), timeout=1, interval=0.01)
|
|
20
|
+
await assert_training_state(trainer.training, TrainerState.TrainingRunning, timeout=1, interval=0.01)
|
|
17
21
|
assert trainer.start_training_task is not None
|
|
18
|
-
assert trainer.start_training_task.__name__ == 'start_training'
|
|
19
22
|
|
|
20
|
-
# pylint: disable=protected-access
|
|
21
23
|
assert trainer._executor is not None
|
|
22
|
-
trainer.
|
|
23
|
-
await assert_training_state(trainer.training,
|
|
24
|
+
await trainer.stop() # NOTE normally a training terminates itself
|
|
25
|
+
await assert_training_state(trainer.training, TrainerState.TrainingFinished, timeout=1, interval=0.001)
|
|
24
26
|
|
|
25
|
-
assert trainer.training.training_state ==
|
|
27
|
+
assert trainer.training.training_state == TrainerState.TrainingFinished
|
|
26
28
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
async def test_stop_running_training(test_initialized_trainer: TestingTrainerLogic):
|
|
30
32
|
trainer = test_initialized_trainer
|
|
31
33
|
|
|
32
|
-
create_active_training_file(trainer, training_state=
|
|
33
|
-
trainer.
|
|
34
|
+
create_active_training_file(trainer, training_state=TrainerState.TrainModelDownloaded)
|
|
35
|
+
trainer._init_from_last_training()
|
|
34
36
|
|
|
35
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
37
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
36
38
|
|
|
37
|
-
await condition(lambda: trainer._executor and trainer._executor.
|
|
38
|
-
await assert_training_state(trainer.training,
|
|
39
|
+
await condition(lambda: trainer._executor and trainer._executor.is_running(), timeout=1, interval=0.01)
|
|
40
|
+
await assert_training_state(trainer.training, TrainerState.TrainingRunning, timeout=1, interval=0.01)
|
|
39
41
|
assert trainer.start_training_task is not None
|
|
40
|
-
assert trainer.start_training_task.__name__ == 'start_training'
|
|
41
42
|
|
|
42
43
|
await trainer.stop()
|
|
43
|
-
await assert_training_state(trainer.training,
|
|
44
|
+
await assert_training_state(trainer.training, TrainerState.TrainingFinished, timeout=2, interval=0.01)
|
|
44
45
|
|
|
45
|
-
assert trainer.training.training_state ==
|
|
46
|
+
assert trainer.training.training_state == TrainerState.TrainingFinished
|
|
46
47
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
47
48
|
|
|
48
49
|
|
|
@@ -50,21 +51,19 @@ async def test_training_can_maybe_resumed(test_initialized_trainer: TestingTrain
|
|
|
50
51
|
trainer = test_initialized_trainer
|
|
51
52
|
|
|
52
53
|
# NOTE e.g. when a node-computer is restarted
|
|
53
|
-
create_active_training_file(trainer, training_state=
|
|
54
|
-
trainer.
|
|
55
|
-
trainer.
|
|
54
|
+
create_active_training_file(trainer, training_state=TrainerState.TrainModelDownloaded)
|
|
55
|
+
trainer._init_from_last_training()
|
|
56
|
+
trainer._can_resume_flag = True
|
|
56
57
|
|
|
57
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
58
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
58
59
|
|
|
59
|
-
await condition(lambda: trainer._executor and trainer._executor.
|
|
60
|
-
await assert_training_state(trainer.training,
|
|
60
|
+
await condition(lambda: trainer._executor and trainer._executor.is_running(), timeout=1, interval=0.01)
|
|
61
|
+
await assert_training_state(trainer.training, TrainerState.TrainingRunning, timeout=1, interval=0.001)
|
|
61
62
|
assert trainer.start_training_task is not None
|
|
62
|
-
assert trainer.start_training_task.__name__ == 'resume'
|
|
63
63
|
|
|
64
|
-
# pylint: disable=protected-access
|
|
65
64
|
assert trainer._executor is not None
|
|
66
|
-
trainer._executor.
|
|
67
|
-
await assert_training_state(trainer.training,
|
|
65
|
+
await trainer._executor.stop_and_wait() # NOTE normally a training terminates itself e.g
|
|
66
|
+
await assert_training_state(trainer.training, TrainerState.TrainingFinished, timeout=1, interval=0.001)
|
|
68
67
|
|
|
69
|
-
assert trainer.training.training_state ==
|
|
68
|
+
assert trainer.training.training_state == TrainerState.TrainingFinished
|
|
70
69
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
@@ -4,12 +4,13 @@ import pytest
|
|
|
4
4
|
from dacite import from_dict
|
|
5
5
|
|
|
6
6
|
from learning_loop_node.conftest import get_dummy_detections
|
|
7
|
-
from learning_loop_node.data_classes import BoxDetection, Context, Detections
|
|
7
|
+
from learning_loop_node.data_classes import BoxDetection, Context, Detections, TrainerState
|
|
8
8
|
from learning_loop_node.loop_communication import LoopCommunicator
|
|
9
9
|
from learning_loop_node.trainer.tests.state_helper import assert_training_state, create_active_training_file
|
|
10
10
|
from learning_loop_node.trainer.tests.testing_trainer_logic import TestingTrainerLogic
|
|
11
11
|
from learning_loop_node.trainer.trainer_logic import TrainerLogic
|
|
12
12
|
|
|
13
|
+
# pylint: disable=protected-access
|
|
13
14
|
error_key = 'upload_detections'
|
|
14
15
|
|
|
15
16
|
|
|
@@ -43,13 +44,14 @@ async def create_valid_detection_file(trainer: TrainerLogic, number_of_entries:
|
|
|
43
44
|
@pytest.mark.asyncio
|
|
44
45
|
async def test_upload_successful(test_initialized_trainer: TestingTrainerLogic):
|
|
45
46
|
trainer = test_initialized_trainer
|
|
46
|
-
create_active_training_file(trainer, training_state=
|
|
47
|
-
trainer.
|
|
47
|
+
create_active_training_file(trainer, training_state=TrainerState.Detected)
|
|
48
|
+
trainer._init_from_last_training()
|
|
48
49
|
|
|
49
50
|
await create_valid_detection_file(trainer)
|
|
50
|
-
await
|
|
51
|
+
await asyncio.get_running_loop().create_task(
|
|
52
|
+
trainer._perform_state('upload_detections', TrainerState.DetectionUploading, TrainerState.ReadyForCleanup, trainer.active_training_io.upload_detetions))
|
|
51
53
|
|
|
52
|
-
assert trainer.training.training_state ==
|
|
54
|
+
assert trainer.training.training_state == TrainerState.ReadyForCleanup
|
|
53
55
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
54
56
|
|
|
55
57
|
|
|
@@ -57,13 +59,16 @@ async def test_upload_successful(test_initialized_trainer: TestingTrainerLogic):
|
|
|
57
59
|
async def test_detection_upload_progress_is_stored(test_initialized_trainer: TestingTrainerLogic):
|
|
58
60
|
trainer = test_initialized_trainer
|
|
59
61
|
|
|
60
|
-
create_active_training_file(trainer, training_state=
|
|
61
|
-
trainer.
|
|
62
|
+
create_active_training_file(trainer, training_state=TrainerState.Detected)
|
|
63
|
+
trainer._init_from_last_training()
|
|
62
64
|
|
|
63
65
|
await create_valid_detection_file(trainer)
|
|
64
66
|
|
|
65
67
|
assert trainer.active_training_io.load_detections_upload_file_index() == 0
|
|
66
|
-
await trainer.upload_detections()
|
|
68
|
+
# await trainer.upload_detections()
|
|
69
|
+
await asyncio.get_running_loop().create_task(
|
|
70
|
+
trainer._perform_state('upload_detections', TrainerState.DetectionUploading, TrainerState.ReadyForCleanup, trainer.active_training_io.upload_detetions))
|
|
71
|
+
|
|
67
72
|
assert trainer.active_training_io.load_detection_upload_progress() == 0 # Progress is reset for every file
|
|
68
73
|
assert trainer.active_training_io.load_detections_upload_file_index() == 1
|
|
69
74
|
|
|
@@ -72,8 +77,8 @@ async def test_detection_upload_progress_is_stored(test_initialized_trainer: Tes
|
|
|
72
77
|
async def test_ensure_all_detections_are_uploaded(test_initialized_trainer: TestingTrainerLogic):
|
|
73
78
|
trainer = test_initialized_trainer
|
|
74
79
|
|
|
75
|
-
create_active_training_file(trainer, training_state=
|
|
76
|
-
trainer.
|
|
80
|
+
create_active_training_file(trainer, training_state=TrainerState.Detected)
|
|
81
|
+
trainer._init_from_last_training()
|
|
77
82
|
|
|
78
83
|
await create_valid_detection_file(trainer, 2, 0)
|
|
79
84
|
await create_valid_detection_file(trainer, 2, 1)
|
|
@@ -87,7 +92,7 @@ async def test_ensure_all_detections_are_uploaded(test_initialized_trainer: Test
|
|
|
87
92
|
for i in range(skip_detections, len(detections), batch_size):
|
|
88
93
|
batch_detections = detections[i:i+batch_size]
|
|
89
94
|
# pylint: disable=protected-access
|
|
90
|
-
await trainer._upload_detections(trainer.training.context, batch_detections, i + batch_size)
|
|
95
|
+
await trainer.active_training_io._upload_detections(trainer.training.context, batch_detections, i + batch_size)
|
|
91
96
|
|
|
92
97
|
expected_value = i + batch_size if i + batch_size < len(detections) else 0 # Progress is reset for every file
|
|
93
98
|
assert trainer.active_training_io.load_detection_upload_progress() == expected_value
|
|
@@ -103,7 +108,7 @@ async def test_ensure_all_detections_are_uploaded(test_initialized_trainer: Test
|
|
|
103
108
|
for i in range(skip_detections, len(detections), batch_size):
|
|
104
109
|
batch_detections = detections[i:i+batch_size]
|
|
105
110
|
# pylint: disable=protected-access
|
|
106
|
-
await trainer._upload_detections(trainer.training.context, batch_detections, i + batch_size)
|
|
111
|
+
await trainer.active_training_io._upload_detections(trainer.training.context, batch_detections, i + batch_size)
|
|
107
112
|
|
|
108
113
|
expected_value = i + batch_size if i + batch_size < len(detections) else 0 # Progress is reset for every file
|
|
109
114
|
assert trainer.active_training_io.load_detection_upload_progress() == expected_value
|
|
@@ -114,46 +119,43 @@ async def test_ensure_all_detections_are_uploaded(test_initialized_trainer: Test
|
|
|
114
119
|
async def test_bad_status_from_LearningLoop(test_initialized_trainer: TestingTrainerLogic):
|
|
115
120
|
trainer = test_initialized_trainer
|
|
116
121
|
|
|
117
|
-
create_active_training_file(trainer, training_state=
|
|
122
|
+
create_active_training_file(trainer, training_state=TrainerState.Detected, context=Context(
|
|
118
123
|
organization='zauberzeug', project='some_bad_project'))
|
|
119
|
-
trainer.
|
|
124
|
+
trainer._init_from_last_training()
|
|
120
125
|
trainer.active_training_io.save_detections([get_dummy_detections()])
|
|
121
126
|
|
|
122
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
123
|
-
await assert_training_state(trainer.training,
|
|
124
|
-
await assert_training_state(trainer.training,
|
|
127
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
128
|
+
await assert_training_state(trainer.training, TrainerState.DetectionUploading, timeout=1, interval=0.001)
|
|
129
|
+
await assert_training_state(trainer.training, TrainerState.Detected, timeout=1, interval=0.001)
|
|
125
130
|
|
|
126
131
|
assert trainer_has_error(trainer)
|
|
127
|
-
assert trainer.training.training_state ==
|
|
132
|
+
assert trainer.training.training_state == TrainerState.Detected
|
|
128
133
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
129
134
|
|
|
130
135
|
|
|
131
|
-
async def
|
|
136
|
+
async def test_go_to_cleanup_if_no_detections_exist(test_initialized_trainer: TestingTrainerLogic):
|
|
137
|
+
"""This test simulates a situation where the detection file is missing.
|
|
138
|
+
In this case, the trainer should report an error and move to the ReadyForCleanup state."""
|
|
132
139
|
trainer = test_initialized_trainer
|
|
133
140
|
|
|
134
141
|
# e.g. missing detection file
|
|
135
|
-
create_active_training_file(trainer, training_state=
|
|
136
|
-
trainer.
|
|
137
|
-
|
|
138
|
-
_ = asyncio.get_running_loop().create_task(trainer.run())
|
|
139
|
-
await assert_training_state(trainer.training, 'detection_uploading', timeout=1, interval=0.001)
|
|
140
|
-
await assert_training_state(trainer.training, 'detected', timeout=1, interval=0.001)
|
|
142
|
+
create_active_training_file(trainer, training_state=TrainerState.Detected)
|
|
143
|
+
trainer._init_from_last_training()
|
|
141
144
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
assert trainer.node.last_training_io.load() == trainer.training
|
|
145
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
146
|
+
await assert_training_state(trainer.training, TrainerState.ReadyForCleanup, timeout=1, interval=0.001)
|
|
145
147
|
|
|
146
148
|
|
|
147
149
|
async def test_abort_uploading(test_initialized_trainer: TestingTrainerLogic):
|
|
148
150
|
trainer = test_initialized_trainer
|
|
149
151
|
|
|
150
|
-
create_active_training_file(trainer, training_state=
|
|
151
|
-
trainer.
|
|
152
|
+
create_active_training_file(trainer, training_state=TrainerState.Detected)
|
|
153
|
+
trainer._init_from_last_training()
|
|
152
154
|
await create_valid_detection_file(trainer)
|
|
153
155
|
|
|
154
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
156
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
155
157
|
|
|
156
|
-
await assert_training_state(trainer.training,
|
|
158
|
+
await assert_training_state(trainer.training, TrainerState.DetectionUploading, timeout=1, interval=0.001)
|
|
157
159
|
|
|
158
160
|
await trainer.stop()
|
|
159
161
|
await asyncio.sleep(0.1)
|
|
@@ -2,11 +2,12 @@ import asyncio
|
|
|
2
2
|
|
|
3
3
|
from pytest_mock import MockerFixture
|
|
4
4
|
|
|
5
|
-
from learning_loop_node.data_classes import Context
|
|
5
|
+
from learning_loop_node.data_classes import Context, TrainerState
|
|
6
6
|
from learning_loop_node.trainer.tests.state_helper import assert_training_state, create_active_training_file
|
|
7
7
|
from learning_loop_node.trainer.tests.testing_trainer_logic import TestingTrainerLogic
|
|
8
8
|
from learning_loop_node.trainer.trainer_logic import TrainerLogic
|
|
9
9
|
|
|
10
|
+
# pylint: disable=protected-access
|
|
10
11
|
error_key = 'upload_model'
|
|
11
12
|
|
|
12
13
|
|
|
@@ -19,28 +20,29 @@ async def test_successful_upload(mocker: MockerFixture, test_initialized_trainer
|
|
|
19
20
|
mock_upload_model_for_training(mocker, 'new_model_id')
|
|
20
21
|
|
|
21
22
|
create_active_training_file(trainer)
|
|
22
|
-
trainer.
|
|
23
|
+
trainer._init_from_last_training()
|
|
23
24
|
|
|
24
|
-
train_task = asyncio.get_running_loop().create_task(
|
|
25
|
+
train_task = asyncio.get_running_loop().create_task(
|
|
26
|
+
trainer._perform_state('upload_model', TrainerState.TrainModelUploading, TrainerState.TrainModelUploaded, trainer._upload_model))
|
|
25
27
|
|
|
26
|
-
await assert_training_state(trainer.training,
|
|
28
|
+
await assert_training_state(trainer.training, TrainerState.TrainModelUploading, timeout=1, interval=0.001)
|
|
27
29
|
await train_task
|
|
28
30
|
|
|
29
31
|
assert trainer_has_error(trainer) is False
|
|
30
|
-
assert trainer.training.training_state ==
|
|
31
|
-
assert trainer.training.
|
|
32
|
+
assert trainer.training.training_state == TrainerState.TrainModelUploaded
|
|
33
|
+
assert trainer.training.model_uuid_for_detecting is not None
|
|
32
34
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
33
35
|
|
|
34
36
|
|
|
35
37
|
async def test_abort_upload_model(test_initialized_trainer: TestingTrainerLogic):
|
|
36
38
|
trainer = test_initialized_trainer
|
|
37
39
|
|
|
38
|
-
create_active_training_file(trainer, training_state=
|
|
39
|
-
trainer.
|
|
40
|
+
create_active_training_file(trainer, training_state=TrainerState.ConfusionMatrixSynced)
|
|
41
|
+
trainer._init_from_last_training()
|
|
40
42
|
|
|
41
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
43
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
42
44
|
|
|
43
|
-
await assert_training_state(trainer.training,
|
|
45
|
+
await assert_training_state(trainer.training, TrainerState.TrainModelUploading, timeout=1, interval=0.001)
|
|
44
46
|
|
|
45
47
|
await trainer.stop()
|
|
46
48
|
await asyncio.sleep(0.1)
|
|
@@ -55,18 +57,18 @@ async def test_bad_server_response_content(test_initialized_trainer: TestingTrai
|
|
|
55
57
|
The training should be aborted and the training state should be set to confusion_matrix_synced."""
|
|
56
58
|
trainer = test_initialized_trainer
|
|
57
59
|
|
|
58
|
-
create_active_training_file(trainer, training_state=
|
|
59
|
-
trainer.
|
|
60
|
+
create_active_training_file(trainer, training_state=TrainerState.ConfusionMatrixSynced)
|
|
61
|
+
trainer._init_from_last_training()
|
|
60
62
|
|
|
61
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
63
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
62
64
|
|
|
63
|
-
await assert_training_state(trainer.training,
|
|
65
|
+
await assert_training_state(trainer.training, TrainerState.TrainModelUploading, timeout=1, interval=0.001)
|
|
64
66
|
# TODO goes to finished because of the error
|
|
65
|
-
await assert_training_state(trainer.training,
|
|
67
|
+
await assert_training_state(trainer.training, TrainerState.ConfusionMatrixSynced, timeout=2, interval=0.001)
|
|
66
68
|
|
|
67
69
|
assert trainer_has_error(trainer)
|
|
68
|
-
assert trainer.training.training_state ==
|
|
69
|
-
assert trainer.training.
|
|
70
|
+
assert trainer.training.training_state == TrainerState.ConfusionMatrixSynced
|
|
71
|
+
assert trainer.training.model_uuid_for_detecting is None
|
|
70
72
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
71
73
|
|
|
72
74
|
|
|
@@ -76,12 +78,12 @@ async def test_mock_loop_response_example(mocker: MockerFixture, test_initialize
|
|
|
76
78
|
mock_upload_model_for_training(mocker, 'new_model_id')
|
|
77
79
|
|
|
78
80
|
create_active_training_file(trainer)
|
|
79
|
-
trainer.
|
|
81
|
+
trainer._init_from_last_training()
|
|
80
82
|
|
|
81
83
|
# pylint: disable=protected-access
|
|
82
|
-
result = await trainer.
|
|
84
|
+
result = await trainer._upload_model_return_new_model_uuid(Context(organization='zauberzeug', project='demo'))
|
|
83
85
|
assert result is not None
|
|
84
86
|
|
|
85
87
|
|
|
86
88
|
def mock_upload_model_for_training(mocker, return_value):
|
|
87
|
-
mocker.patch('learning_loop_node.data_exchanger.DataExchanger.
|
|
89
|
+
mocker.patch('learning_loop_node.data_exchanger.DataExchanger.upload_model_get_uuid', return_value=return_value)
|
|
@@ -1,37 +1,45 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import re
|
|
3
3
|
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from learning_loop_node.data_classes import TrainerState
|
|
4
7
|
from learning_loop_node.trainer.tests.state_helper import assert_training_state, create_active_training_file
|
|
5
8
|
from learning_loop_node.trainer.tests.testing_trainer_logic import TestingTrainerLogic
|
|
6
9
|
|
|
10
|
+
# pylint: disable=protected-access
|
|
11
|
+
|
|
7
12
|
|
|
8
13
|
async def test_training_process_is_stopped_when_trainer_reports_error(test_initialized_trainer: TestingTrainerLogic):
|
|
9
14
|
trainer = test_initialized_trainer
|
|
10
|
-
create_active_training_file(trainer, training_state=
|
|
11
|
-
trainer.
|
|
12
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
15
|
+
create_active_training_file(trainer, training_state=TrainerState.TrainModelDownloaded)
|
|
16
|
+
trainer._init_from_last_training()
|
|
17
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
13
18
|
|
|
14
|
-
await assert_training_state(trainer.training,
|
|
19
|
+
await assert_training_state(trainer.training, TrainerState.TrainingRunning, timeout=1, interval=0.001)
|
|
15
20
|
trainer.error_msg = 'some_error'
|
|
16
|
-
await assert_training_state(trainer.training,
|
|
21
|
+
await assert_training_state(trainer.training, TrainerState.TrainModelDownloaded, timeout=6, interval=0.001)
|
|
17
22
|
|
|
18
23
|
|
|
24
|
+
@pytest.mark.skip(reason='The since_last_start flag is deprecated.')
|
|
19
25
|
async def test_log_can_provide_only_data_for_current_run(test_initialized_trainer: TestingTrainerLogic):
|
|
20
26
|
trainer = test_initialized_trainer
|
|
21
|
-
create_active_training_file(trainer, training_state=
|
|
22
|
-
trainer.
|
|
23
|
-
_ = asyncio.get_running_loop().create_task(trainer.
|
|
27
|
+
create_active_training_file(trainer, training_state=TrainerState.TrainModelDownloaded)
|
|
28
|
+
trainer._init_from_last_training()
|
|
29
|
+
_ = asyncio.get_running_loop().create_task(trainer._run())
|
|
30
|
+
|
|
31
|
+
await assert_training_state(trainer.training, TrainerState.TrainingRunning, timeout=1, interval=0.001)
|
|
32
|
+
await asyncio.sleep(0.1) # give tests a bit time to to check for the state
|
|
24
33
|
|
|
25
|
-
await assert_training_state(trainer.training, 'training_running', timeout=1, interval=0.001)
|
|
26
34
|
assert trainer._executor is not None
|
|
27
35
|
assert len(re.findall('Starting executor', str(trainer._executor.get_log_by_lines()))) == 1
|
|
28
36
|
|
|
29
37
|
trainer.error_msg = 'some_error'
|
|
30
|
-
await assert_training_state(trainer.training,
|
|
38
|
+
await assert_training_state(trainer.training, TrainerState.TrainModelDownloaded, timeout=6, interval=0.001)
|
|
31
39
|
trainer.error_msg = None
|
|
32
|
-
await assert_training_state(trainer.training,
|
|
40
|
+
await assert_training_state(trainer.training, TrainerState.TrainingRunning, timeout=1, interval=0.001)
|
|
33
41
|
await asyncio.sleep(1)
|
|
34
42
|
|
|
35
43
|
assert len(re.findall('Starting executor', str(trainer._executor.get_log_by_lines()))) > 1
|
|
36
44
|
# Here only the current run is provided
|
|
37
|
-
assert len(re.findall('Starting executor', str(trainer._executor.get_log_by_lines(since_last_start=True)))) == 1
|
|
45
|
+
# assert len(re.findall('Starting executor', str(trainer._executor.get_log_by_lines(since_last_start=True)))) == 1
|