learning-loop-node 0.10.10__tar.gz → 0.10.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of learning-loop-node might be problematic. Click here for more details.
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/PKG-INFO +33 -14
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/README.md +32 -13
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/data_classes/general.py +1 -1
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/data_exchanger.py +9 -5
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/detector_node.py +67 -39
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/about.py +1 -1
- learning_loop_node-0.10.11/learning_loop_node/detector/rest/model_version_control.py +101 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/node.py +1 -1
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/test_client_communication.py +56 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/trainer_node.py +30 -2
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/pyproject.toml +1 -1
- learning_loop_node-0.10.10/learning_loop_node/trainer/rest/controls.py +0 -28
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/annotation/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/annotation/annotator_logic.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/annotation/annotator_node.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/data_classes/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/data_classes/annotations.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/data_classes/detections.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/data_classes/socket_response.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/data_classes/training.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/detector_logic.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/inbox_filter/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/inbox_filter/cam_observation_history.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/inbox_filter/relevance_filter.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/outbox.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/backdoor_controls.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/detect.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/operation_mode.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/outbox_mode.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/upload.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/examples/novelty_score_updater.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/globals.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/helpers/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/helpers/environment_reader.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/helpers/gdrive_downloader.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/helpers/log_conf.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/helpers/misc.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/loop_communication.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/py.typed +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/annotator/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/annotator/conftest.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/annotator/pytest.ini +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/annotator/test_annotator_node.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/conftest.py +1 -1
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/inbox_filter/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/inbox_filter/test_observation.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/inbox_filter/test_relevance_group.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/inbox_filter/test_unexpected_observations_count.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/pytest.ini +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/test.jpg +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/test_outbox.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/test_relevance_filter.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/testing_detector.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/general/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/general/conftest.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/general/pytest.ini +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/general/test_data/file_1.txt +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/general/test_data/file_2.txt +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/general/test_data/model.json +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/general/test_data_classes.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/general/test_downloader.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/general/test_learning_loop_node.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/test_helper.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/conftest.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/pytest.ini +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/state_helper.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/states/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/states/test_state_cleanup.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/states/test_state_detecting.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/states/test_state_download_train_model.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/states/test_state_prepare.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/states/test_state_sync_confusion_matrix.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/states/test_state_train.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/states/test_state_upload_detections.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/states/test_state_upload_model.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/test_errors.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/test_trainer_states.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/trainer/testing_trainer_logic.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/downloader.py +1 -1
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/exceptions.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/executor.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/io_helpers.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/rest/__init__.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/rest/backdoor_controls.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/test_executor.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/trainer_logic.py +0 -0
- {learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/trainer_logic_generic.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: learning-loop-node
|
|
3
|
-
Version: 0.10.
|
|
3
|
+
Version: 0.10.11
|
|
4
4
|
Summary: Python Library for Nodes which connect to the Zauberzeug Learning Loop
|
|
5
5
|
Home-page: https://github.com/zauberzeug/learning_loop_node
|
|
6
6
|
License: MIT
|
|
@@ -57,19 +57,20 @@ To start a node you have to implement the logic by inheriting from the correspon
|
|
|
57
57
|
|
|
58
58
|
You can configure connection to our Learning Loop by specifying the following environment variables before starting:
|
|
59
59
|
|
|
60
|
-
| Name
|
|
61
|
-
|
|
|
62
|
-
| LOOP_HOST
|
|
63
|
-
| LOOP_USERNAME
|
|
64
|
-
| LOOP_PASSWORD
|
|
65
|
-
| LOOP_SSL_CERT_PATH
|
|
66
|
-
| LOOP_ORGANIZATION
|
|
67
|
-
| LOOP_PROJECT
|
|
68
|
-
| MIN_UNCERTAIN_THRESHOLD
|
|
69
|
-
| MAX_UNCERTAIN_THRESHOLD
|
|
70
|
-
| INFERENCE_BATCH_SIZE
|
|
71
|
-
| RESTART_AFTER_TRAINING
|
|
72
|
-
| KEEP_OLD_TRAININGS
|
|
60
|
+
| Name | Alias | Purpose | Required by |
|
|
61
|
+
| ------------------------ | ------------ | ------------------------------------------------------------ | -------------------- |
|
|
62
|
+
| LOOP_HOST | HOST | Learning Loop address (e.g. learning-loop.ai) | all |
|
|
63
|
+
| LOOP_USERNAME | USERNAME | Learning Loop user name | all besides Detector |
|
|
64
|
+
| LOOP_PASSWORD | PASSWORD | Learning Loop password | all besides Detector |
|
|
65
|
+
| LOOP_SSL_CERT_PATH | - | Path to the SSL certificate | all (opt.) |
|
|
66
|
+
| LOOP_ORGANIZATION | ORGANIZATION | Organization name | Detector |
|
|
67
|
+
| LOOP_PROJECT | PROJECT | Project name | Detector |
|
|
68
|
+
| MIN_UNCERTAIN_THRESHOLD | PROJECT | smallest confidence (float) at which auto-upload will happen | Detector |
|
|
69
|
+
| MAX_UNCERTAIN_THRESHOLD | PROJECT | largest confidence (float) at which auto-upload will happen | Detector |
|
|
70
|
+
| INFERENCE_BATCH_SIZE | - | Batch size of trainer when calculating detections | Trainer (opt.) |
|
|
71
|
+
| RESTART_AFTER_TRAINING | - | Restart the trainer after training (set to 1) | Trainer (opt.) |
|
|
72
|
+
| KEEP_OLD_TRAININGS | - | Do not delete old trainings (set to 1) | Trainer (opt.) |
|
|
73
|
+
| TRAINER_IDLE_TIMEOUT_SEC | - | Automatically shutdown trainer after timeout (in seconds) | Trainer (opt.) |
|
|
73
74
|
|
|
74
75
|
#### Testing
|
|
75
76
|
|
|
@@ -104,6 +105,24 @@ The detector also has a sio **upload endpoint** that can be used to upload image
|
|
|
104
105
|
|
|
105
106
|
The endpoint returns None if the upload was successful and an error message otherwise.
|
|
106
107
|
|
|
108
|
+
### Changing the model version
|
|
109
|
+
|
|
110
|
+
The detector can be configured to one of the following behaviors:
|
|
111
|
+
|
|
112
|
+
- download use a specific model version
|
|
113
|
+
- automatically update the model version according to the learning loop deployment target
|
|
114
|
+
- pause the model updates and use the version that was last loaded
|
|
115
|
+
|
|
116
|
+
The model versioning configuration can be accessed/changed via a REST endpoint. Example Usage:
|
|
117
|
+
|
|
118
|
+
- Fetch the current model versioning configuration: `curl http://localhost/model_version`
|
|
119
|
+
- Configure the detector to use a specific model version: `curl -X PUT -d "1.0" http://localhost/model_version`
|
|
120
|
+
- Configure the detector to automatically update the model version: `curl -X PUT -d "follow_loop" http://localhost/model_version`
|
|
121
|
+
- Pause the model updates: `curl -X PUT -d "pause" http://localhost/model_version`
|
|
122
|
+
|
|
123
|
+
Note that the configuration is not persistent, however, the default behavior on startup can be configured via the environment variable `VERSION_CONTROL_DEFAULT`.
|
|
124
|
+
If the environment variable is set to `VERSION_CONTROL_DEFAULT=PAUSE`, the detector will pause the model updates on startup. Otherwise, the detector will automatically follow the loop deployment target.
|
|
125
|
+
|
|
107
126
|
### Changing the outbox mode
|
|
108
127
|
|
|
109
128
|
If the autoupload is set to `all` or `filtered` (selected) images and the corresponding detections are saved on HDD (the outbox). A background thread will upload the images and detections to the Learning Loop. The outbox is located in the `outbox` folder in the root directory of the node. The outbox can be cleared by deleting the files in the folder.
|
|
@@ -17,19 +17,20 @@ To start a node you have to implement the logic by inheriting from the correspon
|
|
|
17
17
|
|
|
18
18
|
You can configure connection to our Learning Loop by specifying the following environment variables before starting:
|
|
19
19
|
|
|
20
|
-
| Name
|
|
21
|
-
|
|
|
22
|
-
| LOOP_HOST
|
|
23
|
-
| LOOP_USERNAME
|
|
24
|
-
| LOOP_PASSWORD
|
|
25
|
-
| LOOP_SSL_CERT_PATH
|
|
26
|
-
| LOOP_ORGANIZATION
|
|
27
|
-
| LOOP_PROJECT
|
|
28
|
-
| MIN_UNCERTAIN_THRESHOLD
|
|
29
|
-
| MAX_UNCERTAIN_THRESHOLD
|
|
30
|
-
| INFERENCE_BATCH_SIZE
|
|
31
|
-
| RESTART_AFTER_TRAINING
|
|
32
|
-
| KEEP_OLD_TRAININGS
|
|
20
|
+
| Name | Alias | Purpose | Required by |
|
|
21
|
+
| ------------------------ | ------------ | ------------------------------------------------------------ | -------------------- |
|
|
22
|
+
| LOOP_HOST | HOST | Learning Loop address (e.g. learning-loop.ai) | all |
|
|
23
|
+
| LOOP_USERNAME | USERNAME | Learning Loop user name | all besides Detector |
|
|
24
|
+
| LOOP_PASSWORD | PASSWORD | Learning Loop password | all besides Detector |
|
|
25
|
+
| LOOP_SSL_CERT_PATH | - | Path to the SSL certificate | all (opt.) |
|
|
26
|
+
| LOOP_ORGANIZATION | ORGANIZATION | Organization name | Detector |
|
|
27
|
+
| LOOP_PROJECT | PROJECT | Project name | Detector |
|
|
28
|
+
| MIN_UNCERTAIN_THRESHOLD | PROJECT | smallest confidence (float) at which auto-upload will happen | Detector |
|
|
29
|
+
| MAX_UNCERTAIN_THRESHOLD | PROJECT | largest confidence (float) at which auto-upload will happen | Detector |
|
|
30
|
+
| INFERENCE_BATCH_SIZE | - | Batch size of trainer when calculating detections | Trainer (opt.) |
|
|
31
|
+
| RESTART_AFTER_TRAINING | - | Restart the trainer after training (set to 1) | Trainer (opt.) |
|
|
32
|
+
| KEEP_OLD_TRAININGS | - | Do not delete old trainings (set to 1) | Trainer (opt.) |
|
|
33
|
+
| TRAINER_IDLE_TIMEOUT_SEC | - | Automatically shutdown trainer after timeout (in seconds) | Trainer (opt.) |
|
|
33
34
|
|
|
34
35
|
#### Testing
|
|
35
36
|
|
|
@@ -64,6 +65,24 @@ The detector also has a sio **upload endpoint** that can be used to upload image
|
|
|
64
65
|
|
|
65
66
|
The endpoint returns None if the upload was successful and an error message otherwise.
|
|
66
67
|
|
|
68
|
+
### Changing the model version
|
|
69
|
+
|
|
70
|
+
The detector can be configured to one of the following behaviors:
|
|
71
|
+
|
|
72
|
+
- download use a specific model version
|
|
73
|
+
- automatically update the model version according to the learning loop deployment target
|
|
74
|
+
- pause the model updates and use the version that was last loaded
|
|
75
|
+
|
|
76
|
+
The model versioning configuration can be accessed/changed via a REST endpoint. Example Usage:
|
|
77
|
+
|
|
78
|
+
- Fetch the current model versioning configuration: `curl http://localhost/model_version`
|
|
79
|
+
- Configure the detector to use a specific model version: `curl -X PUT -d "1.0" http://localhost/model_version`
|
|
80
|
+
- Configure the detector to automatically update the model version: `curl -X PUT -d "follow_loop" http://localhost/model_version`
|
|
81
|
+
- Pause the model updates: `curl -X PUT -d "pause" http://localhost/model_version`
|
|
82
|
+
|
|
83
|
+
Note that the configuration is not persistent, however, the default behavior on startup can be configured via the environment variable `VERSION_CONTROL_DEFAULT`.
|
|
84
|
+
If the environment variable is set to `VERSION_CONTROL_DEFAULT=PAUSE`, the detector will pause the model updates on startup. Otherwise, the detector will automatically follow the loop deployment target.
|
|
85
|
+
|
|
67
86
|
### Changing the outbox mode
|
|
68
87
|
|
|
69
88
|
If the autoupload is set to `all` or `filtered` (selected) images and the corresponding detections are saved on HDD (the outbox). A background thread will upload the images and detections to the Learning Loop. The outbox is located in the `outbox` folder in the root directory of the node. The outbox can be cleared by deleting the files in the folder.
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/data_classes/general.py
RENAMED
|
@@ -50,7 +50,7 @@ class ModelInformation():
|
|
|
50
50
|
organization: str
|
|
51
51
|
project: str
|
|
52
52
|
version: str
|
|
53
|
-
categories: List[Category]
|
|
53
|
+
categories: List[Category] = field(default_factory=list)
|
|
54
54
|
resolution: Optional[int] = None
|
|
55
55
|
model_root_path: Optional[str] = None
|
|
56
56
|
model_size: Optional[str] = None
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/data_exchanger.py
RENAMED
|
@@ -77,7 +77,7 @@ class DataExchanger():
|
|
|
77
77
|
logging.info('got empty list. No images were downloaded')
|
|
78
78
|
return []
|
|
79
79
|
|
|
80
|
-
progress_factor = 0.5 / num_image_ids # 50% of progress is for downloading data
|
|
80
|
+
progress_factor = 0.5 / num_image_ids # first 50% of progress is for downloading data
|
|
81
81
|
images_data: List[Dict] = []
|
|
82
82
|
for i in range(0, num_image_ids, chunk_size):
|
|
83
83
|
self.progress = i * progress_factor
|
|
@@ -100,20 +100,21 @@ class DataExchanger():
|
|
|
100
100
|
new_image_uuids = [id for id in image_uuids if id not in existing_uuids]
|
|
101
101
|
|
|
102
102
|
paths, _ = create_resource_paths(self.context.organization, self.context.project, new_image_uuids)
|
|
103
|
-
|
|
103
|
+
num_new_image_ids = len(new_image_uuids)
|
|
104
104
|
os.makedirs(image_folder, exist_ok=True)
|
|
105
105
|
|
|
106
|
-
progress_factor = 0.5 /
|
|
107
|
-
for i in range(0,
|
|
106
|
+
progress_factor = 0.5 / num_new_image_ids # second 50% of progress is for downloading images
|
|
107
|
+
for i in range(0, num_new_image_ids, chunk_size):
|
|
108
108
|
self.progress = 0.5 + i * progress_factor
|
|
109
109
|
chunk_paths = paths[i:i+chunk_size]
|
|
110
|
-
chunk_ids =
|
|
110
|
+
chunk_ids = new_image_uuids[i:i+chunk_size]
|
|
111
111
|
tasks = []
|
|
112
112
|
for j, chunk_j in enumerate(chunk_paths):
|
|
113
113
|
start = time()
|
|
114
114
|
tasks.append(create_task(self._download_one_image(chunk_j, chunk_ids[j], image_folder)))
|
|
115
115
|
await asyncio.sleep(max(0, 0.02 - (time() - start))) # prevent too many requests at once
|
|
116
116
|
await asyncio.gather(*tasks)
|
|
117
|
+
self.progress = 1.0
|
|
117
118
|
|
|
118
119
|
async def _download_one_image(self, path: str, image_id: str, image_folder: str) -> None:
|
|
119
120
|
response = await self.loop_communicator.get(path)
|
|
@@ -124,7 +125,10 @@ class DataExchanger():
|
|
|
124
125
|
async with aiofiles.open(filename, 'wb') as f:
|
|
125
126
|
await f.write(response.content)
|
|
126
127
|
if not await is_valid_image(filename, self.check_jpeg):
|
|
128
|
+
logging.error('Invalid image "%s". Removing it..', filename)
|
|
127
129
|
os.remove(filename)
|
|
130
|
+
else:
|
|
131
|
+
logging.debug('Downloaded image "%s"', filename)
|
|
128
132
|
|
|
129
133
|
async def download_model(self, target_folder: str, context: Context, model_uuid: str, model_format: str) -> List[str]:
|
|
130
134
|
"""Downloads a model (and additional meta data like model.json) and returns the paths of the downloaded files.
|
|
@@ -6,7 +6,7 @@ import subprocess
|
|
|
6
6
|
from dataclasses import asdict
|
|
7
7
|
from datetime import datetime
|
|
8
8
|
from threading import Thread
|
|
9
|
-
from typing import Dict, List,
|
|
9
|
+
from typing import Dict, List, Optional, Union
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
from dacite import from_dict
|
|
@@ -26,6 +26,7 @@ from .outbox import Outbox
|
|
|
26
26
|
from .rest import about as rest_about
|
|
27
27
|
from .rest import backdoor_controls
|
|
28
28
|
from .rest import detect as rest_detect
|
|
29
|
+
from .rest import model_version_control as rest_version_control
|
|
29
30
|
from .rest import operation_mode as rest_mode
|
|
30
31
|
from .rest import outbox_mode as rest_outbox_mode
|
|
31
32
|
from .rest import upload as rest_upload
|
|
@@ -52,13 +53,22 @@ class DetectorNode(Node):
|
|
|
52
53
|
self.loop_communicator)
|
|
53
54
|
|
|
54
55
|
self.relevance_filter: RelevanceFilter = RelevanceFilter(self.outbox)
|
|
55
|
-
|
|
56
|
+
|
|
57
|
+
# NOTE: version_control controls the behavior of the detector node.
|
|
58
|
+
# FollowLoop: the detector node will follow the loop and update the model if necessary
|
|
59
|
+
# SpecificVersion: the detector node will update to a specific version, set via the /model_version endpoint
|
|
60
|
+
# Pause: the detector node will not update the model
|
|
61
|
+
self.version_control: rest_version_control.VersionMode = rest_version_control.VersionMode.Pause if os.environ.get(
|
|
62
|
+
'VERSION_CONTROL_DEFAULT', 'follow_loop').lower() == 'pause' else rest_version_control.VersionMode.FollowLoop
|
|
63
|
+
self.target_model: Optional[ModelInformation] = None
|
|
64
|
+
self.loop_deployment_target: Optional[ModelInformation] = None
|
|
56
65
|
|
|
57
66
|
self.include_router(rest_detect.router, tags=["detect"])
|
|
58
67
|
self.include_router(rest_upload.router, prefix="")
|
|
59
68
|
self.include_router(rest_mode.router, tags=["operation_mode"])
|
|
60
69
|
self.include_router(rest_about.router, tags=["about"])
|
|
61
70
|
self.include_router(rest_outbox_mode.router, tags=["outbox_mode"])
|
|
71
|
+
self.include_router(rest_version_control.router, tags=["model_version"])
|
|
62
72
|
|
|
63
73
|
if use_backdoor_controls:
|
|
64
74
|
self.include_router(backdoor_controls.router)
|
|
@@ -75,6 +85,8 @@ class DetectorNode(Node):
|
|
|
75
85
|
Context(organization=self.organization, project=self.project),
|
|
76
86
|
self.loop_communicator)
|
|
77
87
|
self.relevance_filter = RelevanceFilter(self.outbox)
|
|
88
|
+
self.version_control = rest_version_control.VersionMode.Pause if os.environ.get(
|
|
89
|
+
'VERSION_CONTROL_DEFAULT', 'follow_loop').lower() == 'pause' else rest_version_control.VersionMode.FollowLoop
|
|
78
90
|
self.target_model = None
|
|
79
91
|
# self.setup_sio_server()
|
|
80
92
|
|
|
@@ -183,20 +195,12 @@ class DetectorNode(Node):
|
|
|
183
195
|
return
|
|
184
196
|
try:
|
|
185
197
|
self.log.info(f'Current operation mode is {self.operation_mode}')
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
198
|
+
try:
|
|
199
|
+
await self.sync_status_with_learning_loop()
|
|
200
|
+
except Exception as e:
|
|
201
|
+
self.log.error(f'Could not check for updates: {e}')
|
|
189
202
|
return
|
|
190
203
|
|
|
191
|
-
# TODO: solve race condition (it should not be required to recheck if model_info is not None, but it is!)
|
|
192
|
-
if self.detector_logic.is_initialized:
|
|
193
|
-
model_info = self.detector_logic._model_info # pylint: disable=protected-access
|
|
194
|
-
if model_info is not None:
|
|
195
|
-
self.log.info(f'Current model: {model_info.version} with id {model_info.id}')
|
|
196
|
-
else:
|
|
197
|
-
self.log.info('no model loaded')
|
|
198
|
-
else:
|
|
199
|
-
self.log.info('no model loaded')
|
|
200
204
|
if self.operation_mode != OperationMode.Idle:
|
|
201
205
|
self.log.info(f'not checking for updates; operation mode is {self.operation_mode}')
|
|
202
206
|
return
|
|
@@ -206,25 +210,22 @@ class DetectorNode(Node):
|
|
|
206
210
|
self.log.info('not checking for updates; no target model selected')
|
|
207
211
|
return
|
|
208
212
|
|
|
209
|
-
self.
|
|
210
|
-
|
|
211
|
-
if
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
if not self.detector_logic.is_initialized or self.target_model != version:
|
|
216
|
-
cur_model = version or "-"
|
|
217
|
-
self.log.info(f'Current model "{cur_model}" needs to be updated to {self.target_model}')
|
|
213
|
+
current_version = self.detector_logic._model_info.version if self.detector_logic._model_info is not None else None
|
|
214
|
+
|
|
215
|
+
if not self.detector_logic.is_initialized or self.target_model.version != current_version:
|
|
216
|
+
self.log.info(
|
|
217
|
+
f'Current model "{current_version or "-"}" needs to be updated to {self.target_model.version}')
|
|
218
|
+
|
|
218
219
|
with step_into(GLOBALS.data_folder):
|
|
219
220
|
model_symlink = 'model'
|
|
220
|
-
target_model_folder = f'models/{self.target_model}'
|
|
221
|
+
target_model_folder = f'models/{self.target_model.version}'
|
|
221
222
|
shutil.rmtree(target_model_folder, ignore_errors=True)
|
|
222
223
|
os.makedirs(target_model_folder)
|
|
223
224
|
|
|
224
225
|
await self.data_exchanger.download_model(target_model_folder,
|
|
225
226
|
Context(organization=self.organization,
|
|
226
227
|
project=self.project),
|
|
227
|
-
|
|
228
|
+
self.target_model.id, self.detector_logic.model_format)
|
|
228
229
|
try:
|
|
229
230
|
os.unlink(model_symlink)
|
|
230
231
|
os.remove(model_symlink)
|
|
@@ -234,26 +235,42 @@ class DetectorNode(Node):
|
|
|
234
235
|
self.log.info(f'Updated symlink for model to {os.readlink(model_symlink)}')
|
|
235
236
|
|
|
236
237
|
self.detector_logic.load_model()
|
|
237
|
-
|
|
238
|
+
try:
|
|
239
|
+
await self.sync_status_with_learning_loop()
|
|
240
|
+
except Exception:
|
|
241
|
+
pass
|
|
238
242
|
# self.reload(reason='new model installed')
|
|
239
|
-
|
|
240
|
-
self.log.info('Versions are identic. Nothing to do.')
|
|
243
|
+
|
|
241
244
|
except Exception as e:
|
|
242
245
|
self.log.exception('check_for_update failed')
|
|
243
246
|
msg = e.cause if isinstance(e, DownloadError) else str(e)
|
|
244
247
|
self.status.set_error('update_model', f'Could not update model: {msg}')
|
|
245
|
-
|
|
248
|
+
try:
|
|
249
|
+
await self.sync_status_with_learning_loop()
|
|
250
|
+
except Exception:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
async def sync_status_with_learning_loop(self) -> None:
|
|
254
|
+
"""Sync status of the detector with the Learning Loop.
|
|
255
|
+
The Learning Loop will respond with the model info of the deployment target.
|
|
256
|
+
If version_control is set to FollowLoop, the detector will update the target_model.
|
|
257
|
+
Return if the communication was successful.
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
Exception: If the communication with the Learning Loop failed.
|
|
261
|
+
"""
|
|
246
262
|
|
|
247
|
-
async def send_status(self) -> Union[str, Literal[False]]:
|
|
248
263
|
if not self.sio_client.connected:
|
|
249
|
-
self.log.info('
|
|
250
|
-
|
|
264
|
+
self.log.info('Status sync failed: not connected')
|
|
265
|
+
raise Exception('Status sync failed: not connected')
|
|
251
266
|
|
|
252
267
|
try:
|
|
253
268
|
current_model = self.detector_logic.model_info.version
|
|
254
269
|
except Exception:
|
|
255
270
|
current_model = None
|
|
256
271
|
|
|
272
|
+
target_model_version = self.target_model.version if self.target_model else None
|
|
273
|
+
|
|
257
274
|
status = DetectionStatus(
|
|
258
275
|
id=self.uuid,
|
|
259
276
|
name=self.name,
|
|
@@ -262,27 +279,38 @@ class DetectorNode(Node):
|
|
|
262
279
|
uptime=int((datetime.now() - self.startup_datetime).total_seconds()),
|
|
263
280
|
operation_mode=self.operation_mode,
|
|
264
281
|
current_model=current_model,
|
|
265
|
-
target_model=
|
|
282
|
+
target_model=target_model_version,
|
|
266
283
|
model_format=self.detector_logic.model_format,
|
|
267
284
|
)
|
|
268
285
|
|
|
269
286
|
self.log.info(f'sending status {status}')
|
|
270
287
|
response = await self.sio_client.call('update_detector', (self.organization, self.project, jsonable_encoder(asdict(status))))
|
|
288
|
+
|
|
271
289
|
assert response is not None
|
|
272
290
|
socket_response = from_dict(data_class=SocketResponse, data=response)
|
|
273
291
|
if not socket_response.success:
|
|
274
292
|
self.log.error(f'Statusupdate failed: {response}')
|
|
275
|
-
|
|
293
|
+
raise Exception(f'Statusupdate failed: {response}')
|
|
276
294
|
|
|
277
295
|
assert socket_response.payload is not None
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
296
|
+
|
|
297
|
+
deployment_target_model_id = socket_response.payload['target_model_id']
|
|
298
|
+
deployment_target_model_version = socket_response.payload['target_model_version']
|
|
299
|
+
self.loop_deployment_target = ModelInformation(organization=self.organization, project=self.project,
|
|
300
|
+
host="", categories=[],
|
|
301
|
+
id=deployment_target_model_id,
|
|
302
|
+
version=deployment_target_model_version)
|
|
303
|
+
|
|
304
|
+
if self.version_control == rest_version_control.VersionMode.FollowLoop:
|
|
305
|
+
self.target_model = self.loop_deployment_target
|
|
306
|
+
self.log.info(f'After sending status. Target_model is {self.target_model.version}')
|
|
282
307
|
|
|
283
308
|
async def set_operation_mode(self, mode: OperationMode):
|
|
284
309
|
self.operation_mode = mode
|
|
285
|
-
|
|
310
|
+
try:
|
|
311
|
+
await self.sync_status_with_learning_loop()
|
|
312
|
+
except Exception as e:
|
|
313
|
+
self.log.warning(f'Operation mode set to {mode}, but sync failed: {e}')
|
|
286
314
|
|
|
287
315
|
def reload(self, reason: str):
|
|
288
316
|
'''provide a cause for the reload'''
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/about.py
RENAMED
|
@@ -21,5 +21,5 @@ async def get_about(request: Request):
|
|
|
21
21
|
'operation_mode': app.operation_mode.value,
|
|
22
22
|
'state': app.status.state,
|
|
23
23
|
'model_info': app.detector_logic._model_info, # pylint: disable=protected-access
|
|
24
|
-
'target_model': app.target_model
|
|
24
|
+
'target_model': app.target_model.version if app.target_model is not None else 'None',
|
|
25
25
|
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
7
|
+
|
|
8
|
+
from ...data_classes import ModelInformation
|
|
9
|
+
from ...globals import GLOBALS
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from ..detector_node import DetectorNode
|
|
13
|
+
|
|
14
|
+
router = APIRouter()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VersionMode(str, Enum):
|
|
18
|
+
FollowLoop = 'follow_loop' # will follow the loop
|
|
19
|
+
SpecificVersion = 'specific_version' # will follow the specific version
|
|
20
|
+
Pause = 'pause' # will pause the updates
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@router.get("/model_version")
|
|
24
|
+
async def get_version(request: Request):
|
|
25
|
+
'''
|
|
26
|
+
Example Usage
|
|
27
|
+
curl http://localhost/model_version
|
|
28
|
+
'''
|
|
29
|
+
# pylint: disable=protected-access
|
|
30
|
+
|
|
31
|
+
app: 'DetectorNode' = request.app
|
|
32
|
+
|
|
33
|
+
current_version = app.detector_logic._model_info.version if app.detector_logic._model_info is not None else 'None'
|
|
34
|
+
target_version = app.target_model.version if app.target_model is not None else 'None'
|
|
35
|
+
loop_version = app.loop_deployment_target.version if app.loop_deployment_target is not None else 'None'
|
|
36
|
+
|
|
37
|
+
local_versions: list[str] = []
|
|
38
|
+
|
|
39
|
+
local_models = os.listdir(os.path.join(GLOBALS.data_folder, 'models'))
|
|
40
|
+
for model in local_models:
|
|
41
|
+
if model.replace('.', '').isdigit():
|
|
42
|
+
local_versions.append(model)
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
'current_version': current_version,
|
|
46
|
+
'target_version': target_version,
|
|
47
|
+
'loop_version': loop_version,
|
|
48
|
+
'local_versions': local_versions,
|
|
49
|
+
'version_control': app.version_control.value,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@router.put("/model_version")
|
|
54
|
+
async def put_version(request: Request):
|
|
55
|
+
'''
|
|
56
|
+
Example Usage
|
|
57
|
+
curl -X PUT -d "follow_loop" http://localhost/model_version
|
|
58
|
+
curl -X PUT -d "pause" http://localhost/model_version
|
|
59
|
+
curl -X PUT -d "13.6" http://localhost/model_version
|
|
60
|
+
'''
|
|
61
|
+
app: 'DetectorNode' = request.app
|
|
62
|
+
content = str(await request.body(), 'utf-8')
|
|
63
|
+
|
|
64
|
+
if content == 'follow_loop':
|
|
65
|
+
app.version_control = VersionMode.FollowLoop
|
|
66
|
+
elif content == 'pause':
|
|
67
|
+
app.version_control = VersionMode.Pause
|
|
68
|
+
else:
|
|
69
|
+
app.version_control = VersionMode.SpecificVersion
|
|
70
|
+
if not content or not content.replace('.', '').isdigit():
|
|
71
|
+
raise HTTPException(400, 'Invalid version number')
|
|
72
|
+
target_version = content
|
|
73
|
+
|
|
74
|
+
if app.target_model is not None and app.target_model.version == target_version:
|
|
75
|
+
return "OK"
|
|
76
|
+
|
|
77
|
+
# Fetch the model uuid by version from the loop
|
|
78
|
+
uri = f'/{app.organization}/projects/{app.project}/models'
|
|
79
|
+
response = await app.loop_communicator.get(uri)
|
|
80
|
+
if response.status_code != 200:
|
|
81
|
+
app.version_control = VersionMode.Pause
|
|
82
|
+
raise HTTPException(500, 'Failed to load models from learning loop')
|
|
83
|
+
|
|
84
|
+
models = response.json()['models']
|
|
85
|
+
models_with_target_version = [m for m in models if m['version'] == target_version]
|
|
86
|
+
if len(models_with_target_version) == 0:
|
|
87
|
+
app.version_control = VersionMode.Pause
|
|
88
|
+
raise HTTPException(400, f'No Model with version {target_version}')
|
|
89
|
+
if len(models_with_target_version) > 1:
|
|
90
|
+
app.version_control = VersionMode.Pause
|
|
91
|
+
raise HTTPException(500, f'Multiple models with version {target_version}')
|
|
92
|
+
|
|
93
|
+
model_id = models_with_target_version[0]['id']
|
|
94
|
+
model_host = models_with_target_version[0].get('host', 'unknown')
|
|
95
|
+
|
|
96
|
+
app.target_model = ModelInformation(organization=app.organization, project=app.project,
|
|
97
|
+
host=model_host, categories=[],
|
|
98
|
+
id=model_id,
|
|
99
|
+
version=target_version)
|
|
100
|
+
|
|
101
|
+
return "OK"
|
|
@@ -16,7 +16,7 @@ from socketio import AsyncClient
|
|
|
16
16
|
from .data_classes import NodeStatus
|
|
17
17
|
from .data_exchanger import DataExchanger
|
|
18
18
|
from .helpers import log_conf
|
|
19
|
-
from .helpers.misc import
|
|
19
|
+
from .helpers.misc import ensure_socket_response, read_or_create_uuid
|
|
20
20
|
from .loop_communication import LoopCommunicator
|
|
21
21
|
|
|
22
22
|
|
|
@@ -107,6 +107,62 @@ async def test_about_endpoint(test_detector_node: DetectorNode):
|
|
|
107
107
|
assert any(c.name == 'purple point' for c in model_information.categories)
|
|
108
108
|
|
|
109
109
|
|
|
110
|
+
async def test_model_version_api(test_detector_node: DetectorNode):
|
|
111
|
+
await asyncio.sleep(3)
|
|
112
|
+
|
|
113
|
+
response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
|
|
114
|
+
assert response.status_code == 200
|
|
115
|
+
response_dict = json.loads(response.content)
|
|
116
|
+
assert response_dict['current_version'] == '1.1'
|
|
117
|
+
assert response_dict['target_version'] == '1.1'
|
|
118
|
+
assert response_dict['loop_version'] == '1.1'
|
|
119
|
+
assert response_dict['local_versions'] == ['1.1']
|
|
120
|
+
assert response_dict['version_control'] == 'follow_loop'
|
|
121
|
+
|
|
122
|
+
response = requests.put(f'http://localhost:{GLOBALS.detector_port}/model_version', data='1.0', timeout=30)
|
|
123
|
+
response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
|
|
124
|
+
assert response.status_code == 200
|
|
125
|
+
response_dict = json.loads(response.content)
|
|
126
|
+
assert response_dict['current_version'] == '1.1'
|
|
127
|
+
assert response_dict['target_version'] == '1.0'
|
|
128
|
+
assert response_dict['loop_version'] == '1.1'
|
|
129
|
+
assert response_dict['local_versions'] == ['1.1']
|
|
130
|
+
assert response_dict['version_control'] == 'specific_version'
|
|
131
|
+
|
|
132
|
+
await asyncio.sleep(11)
|
|
133
|
+
|
|
134
|
+
response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
|
|
135
|
+
assert response.status_code == 200
|
|
136
|
+
response_dict = json.loads(response.content)
|
|
137
|
+
assert response_dict['current_version'] == '1.0'
|
|
138
|
+
assert response_dict['target_version'] == '1.0'
|
|
139
|
+
assert response_dict['loop_version'] == '1.1'
|
|
140
|
+
assert set(response_dict['local_versions']) == set(['1.1', '1.0'])
|
|
141
|
+
assert response_dict['version_control'] == 'specific_version'
|
|
142
|
+
|
|
143
|
+
response = requests.put(f'http://localhost:{GLOBALS.detector_port}/model_version', data='pause', timeout=30)
|
|
144
|
+
await asyncio.sleep(11)
|
|
145
|
+
response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
|
|
146
|
+
assert response.status_code == 200
|
|
147
|
+
response_dict = json.loads(response.content)
|
|
148
|
+
assert response_dict['current_version'] == '1.0'
|
|
149
|
+
assert response_dict['target_version'] == '1.0'
|
|
150
|
+
assert response_dict['loop_version'] == '1.1'
|
|
151
|
+
assert set(response_dict['local_versions']) == set(['1.1', '1.0'])
|
|
152
|
+
assert response_dict['version_control'] == 'pause'
|
|
153
|
+
|
|
154
|
+
response = requests.put(f'http://localhost:{GLOBALS.detector_port}/model_version', data='follow_loop', timeout=30)
|
|
155
|
+
await asyncio.sleep(11)
|
|
156
|
+
response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
|
|
157
|
+
assert response.status_code == 200
|
|
158
|
+
response_dict = json.loads(response.content)
|
|
159
|
+
assert response_dict['current_version'] == '1.1'
|
|
160
|
+
assert response_dict['target_version'] == '1.1'
|
|
161
|
+
assert response_dict['loop_version'] == '1.1'
|
|
162
|
+
assert set(response_dict['local_versions']) == set(['1.1', '1.0'])
|
|
163
|
+
assert response_dict['version_control'] == 'follow_loop'
|
|
164
|
+
|
|
165
|
+
|
|
110
166
|
async def test_rest_outbox_mode(test_detector_node: DetectorNode):
|
|
111
167
|
await asyncio.sleep(3)
|
|
112
168
|
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/trainer_node.py
RENAMED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import time
|
|
1
4
|
from dataclasses import asdict
|
|
2
5
|
from typing import Dict, Optional
|
|
3
6
|
|
|
@@ -7,7 +10,7 @@ from socketio import AsyncClient, exceptions
|
|
|
7
10
|
from ..data_classes import TrainingStatus
|
|
8
11
|
from ..node import Node
|
|
9
12
|
from .io_helpers import LastTrainingIO
|
|
10
|
-
from .rest import backdoor_controls
|
|
13
|
+
from .rest import backdoor_controls
|
|
11
14
|
from .trainer_logic_generic import TrainerLogicGeneric
|
|
12
15
|
|
|
13
16
|
|
|
@@ -20,7 +23,15 @@ class TrainerNode(Node):
|
|
|
20
23
|
self.last_training_io = LastTrainingIO(self.uuid)
|
|
21
24
|
self.trainer_logic._last_training_io = self.last_training_io
|
|
22
25
|
|
|
23
|
-
self.
|
|
26
|
+
self.first_idle_time: float | None = None
|
|
27
|
+
if os.environ.get('TRAINER_IDLE_TIMEOUT_SEC', 0.0):
|
|
28
|
+
self.idle_timeout = float(os.environ.get('TRAINER_IDLE_TIMEOUT_SEC', 0.0))
|
|
29
|
+
else:
|
|
30
|
+
self.idle_timeout = 0.0
|
|
31
|
+
if self.idle_timeout:
|
|
32
|
+
self.log.info(
|
|
33
|
+
f'Trainer started with an idle_timeout of {self.idle_timeout} seconds. Note that shutdown does not work if docker container has the restart policy set to always')
|
|
34
|
+
|
|
24
35
|
if use_backdoor_controls:
|
|
25
36
|
self.include_router(backdoor_controls.router, tags=["controls"])
|
|
26
37
|
|
|
@@ -38,6 +49,7 @@ class TrainerNode(Node):
|
|
|
38
49
|
if await self.trainer_logic.try_continue_run_if_incomplete():
|
|
39
50
|
return # NOTE: we prevent sending idle status after starting a continuation
|
|
40
51
|
await self.send_status()
|
|
52
|
+
self.check_idle_timeout()
|
|
41
53
|
except exceptions.TimeoutError:
|
|
42
54
|
self.log.warning('timeout when sending status to learning loop, reconnecting sio_client')
|
|
43
55
|
await self.sio_client.disconnect() # NOTE: reconnect happens in node._on_repeat
|
|
@@ -90,3 +102,19 @@ class TrainerNode(Node):
|
|
|
90
102
|
result = await self.sio_client.call('update_trainer', jsonable_encoder(asdict(status)), timeout=30)
|
|
91
103
|
if isinstance(result, Dict) and not result['success']:
|
|
92
104
|
self.log.error(f'Error when sending status update: Response from loop was:\n {result}')
|
|
105
|
+
|
|
106
|
+
def check_idle_timeout(self):
|
|
107
|
+
if not self.idle_timeout:
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
if self.trainer_logic.state == 'idle':
|
|
111
|
+
if self.first_idle_time is None:
|
|
112
|
+
self.first_idle_time = time.time()
|
|
113
|
+
idle_time = time.time() - self.first_idle_time
|
|
114
|
+
if idle_time > self.idle_timeout:
|
|
115
|
+
self.log.info('Trainer has been idle for %.2f s (with timeout %.2f s). Shutting down.',
|
|
116
|
+
idle_time, self.idle_timeout)
|
|
117
|
+
sys.exit(0)
|
|
118
|
+
self.log.debug('idle time: %.2f s / %.2f s', idle_time, self.idle_timeout)
|
|
119
|
+
else:
|
|
120
|
+
self.first_idle_time = None
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
import logging
|
|
3
|
-
|
|
4
|
-
from fastapi import APIRouter, HTTPException, Request
|
|
5
|
-
|
|
6
|
-
from learning_loop_node.trainer.trainer_logic import TrainerLogic
|
|
7
|
-
|
|
8
|
-
router = APIRouter()
|
|
9
|
-
|
|
10
|
-
# pylint: disable=protected-access
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@router.post("/controls/detect/{organization}/{project}/{version}")
|
|
14
|
-
async def operation_mode(organization: str, project: str, version: str, request: Request):
|
|
15
|
-
'''
|
|
16
|
-
Example Usage
|
|
17
|
-
curl -X POST localhost/controls/detect/<organization>/<project>/<model_version>
|
|
18
|
-
'''
|
|
19
|
-
path = f'/{organization}/projects/{project}/models'
|
|
20
|
-
response = await request.app.loop_communication.get(path)
|
|
21
|
-
if response.status_code != 200:
|
|
22
|
-
raise HTTPException(404, 'could not load latest model')
|
|
23
|
-
models = response.json()['models']
|
|
24
|
-
model_id = next(m for m in models if m['version'] == version)['id']
|
|
25
|
-
logging.info(model_id)
|
|
26
|
-
trainer: TrainerLogic = request.app.trainer
|
|
27
|
-
await trainer._do_detections()
|
|
28
|
-
return "OK"
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/annotation/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/outbox.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/detect.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/detector/rest/upload.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/helpers/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/helpers/log_conf.py
RENAMED
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/helpers/misc.py
RENAMED
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/loop_communication.py
RENAMED
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/detector/test.jpg
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/tests/test_helper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/__init__.py
RENAMED
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/downloader.py
RENAMED
|
@@ -17,8 +17,8 @@ class TrainingsDownloader():
|
|
|
17
17
|
return (image_data, skipped_image_count)
|
|
18
18
|
|
|
19
19
|
async def download_images_and_annotations(self, image_ids: List[str], image_folder: str) -> Tuple[List[Dict], int]:
|
|
20
|
-
await self.data_exchanger.download_images(image_ids, image_folder)
|
|
21
20
|
image_data = await self.data_exchanger.download_images_data(image_ids)
|
|
21
|
+
await self.data_exchanger.download_images(image_ids, image_folder)
|
|
22
22
|
logging.info('filtering corrupt images') # download only safes valid images
|
|
23
23
|
valid_image_data: List[Dict] = []
|
|
24
24
|
skipped_image_count = 0
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/exceptions.py
RENAMED
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/executor.py
RENAMED
|
File without changes
|
{learning_loop_node-0.10.10 → learning_loop_node-0.10.11}/learning_loop_node/trainer/io_helpers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|