learning-loop-node 0.10.10__py3-none-any.whl → 0.10.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of learning-loop-node might be problematic. Click here for more details.

@@ -50,7 +50,7 @@ class ModelInformation():
50
50
  organization: str
51
51
  project: str
52
52
  version: str
53
- categories: List[Category]
53
+ categories: List[Category] = field(default_factory=list)
54
54
  resolution: Optional[int] = None
55
55
  model_root_path: Optional[str] = None
56
56
  model_size: Optional[str] = None
@@ -77,7 +77,7 @@ class DataExchanger():
77
77
  logging.info('got empty list. No images were downloaded')
78
78
  return []
79
79
 
80
- progress_factor = 0.5 / num_image_ids # 50% of progress is for downloading data
80
+ progress_factor = 0.5 / num_image_ids # first 50% of progress is for downloading data
81
81
  images_data: List[Dict] = []
82
82
  for i in range(0, num_image_ids, chunk_size):
83
83
  self.progress = i * progress_factor
@@ -100,20 +100,21 @@ class DataExchanger():
100
100
  new_image_uuids = [id for id in image_uuids if id not in existing_uuids]
101
101
 
102
102
  paths, _ = create_resource_paths(self.context.organization, self.context.project, new_image_uuids)
103
- num_image_ids = len(image_uuids)
103
+ num_new_image_ids = len(new_image_uuids)
104
104
  os.makedirs(image_folder, exist_ok=True)
105
105
 
106
- progress_factor = 0.5 / num_image_ids # second 50% of progress is for downloading images
107
- for i in range(0, num_image_ids, chunk_size):
106
+ progress_factor = 0.5 / num_new_image_ids # second 50% of progress is for downloading images
107
+ for i in range(0, num_new_image_ids, chunk_size):
108
108
  self.progress = 0.5 + i * progress_factor
109
109
  chunk_paths = paths[i:i+chunk_size]
110
- chunk_ids = image_uuids[i:i+chunk_size]
110
+ chunk_ids = new_image_uuids[i:i+chunk_size]
111
111
  tasks = []
112
112
  for j, chunk_j in enumerate(chunk_paths):
113
113
  start = time()
114
114
  tasks.append(create_task(self._download_one_image(chunk_j, chunk_ids[j], image_folder)))
115
115
  await asyncio.sleep(max(0, 0.02 - (time() - start))) # prevent too many requests at once
116
116
  await asyncio.gather(*tasks)
117
+ self.progress = 1.0
117
118
 
118
119
  async def _download_one_image(self, path: str, image_id: str, image_folder: str) -> None:
119
120
  response = await self.loop_communicator.get(path)
@@ -124,7 +125,10 @@ class DataExchanger():
124
125
  async with aiofiles.open(filename, 'wb') as f:
125
126
  await f.write(response.content)
126
127
  if not await is_valid_image(filename, self.check_jpeg):
128
+ logging.error('Invalid image "%s". Removing it..', filename)
127
129
  os.remove(filename)
130
+ else:
131
+ logging.debug('Downloaded image "%s"', filename)
128
132
 
129
133
  async def download_model(self, target_folder: str, context: Context, model_uuid: str, model_format: str) -> List[str]:
130
134
  """Downloads a model (and additional meta data like model.json) and returns the paths of the downloaded files.
@@ -6,7 +6,7 @@ import subprocess
6
6
  from dataclasses import asdict
7
7
  from datetime import datetime
8
8
  from threading import Thread
9
- from typing import Dict, List, Literal, Optional, Union
9
+ from typing import Dict, List, Optional, Union
10
10
 
11
11
  import numpy as np
12
12
  from dacite import from_dict
@@ -26,6 +26,7 @@ from .outbox import Outbox
26
26
  from .rest import about as rest_about
27
27
  from .rest import backdoor_controls
28
28
  from .rest import detect as rest_detect
29
+ from .rest import model_version_control as rest_version_control
29
30
  from .rest import operation_mode as rest_mode
30
31
  from .rest import outbox_mode as rest_outbox_mode
31
32
  from .rest import upload as rest_upload
@@ -52,13 +53,22 @@ class DetectorNode(Node):
52
53
  self.loop_communicator)
53
54
 
54
55
  self.relevance_filter: RelevanceFilter = RelevanceFilter(self.outbox)
55
- self.target_model: Optional[str] = None
56
+
57
+ # NOTE: version_control controls the behavior of the detector node.
58
+ # FollowLoop: the detector node will follow the loop and update the model if necessary
59
+ # SpecificVersion: the detector node will update to a specific version, set via the /model_version endpoint
60
+ # Pause: the detector node will not update the model
61
+ self.version_control: rest_version_control.VersionMode = rest_version_control.VersionMode.Pause if os.environ.get(
62
+ 'VERSION_CONTROL_DEFAULT', 'follow_loop').lower() == 'pause' else rest_version_control.VersionMode.FollowLoop
63
+ self.target_model: Optional[ModelInformation] = None
64
+ self.loop_deployment_target: Optional[ModelInformation] = None
56
65
 
57
66
  self.include_router(rest_detect.router, tags=["detect"])
58
67
  self.include_router(rest_upload.router, prefix="")
59
68
  self.include_router(rest_mode.router, tags=["operation_mode"])
60
69
  self.include_router(rest_about.router, tags=["about"])
61
70
  self.include_router(rest_outbox_mode.router, tags=["outbox_mode"])
71
+ self.include_router(rest_version_control.router, tags=["model_version"])
62
72
 
63
73
  if use_backdoor_controls:
64
74
  self.include_router(backdoor_controls.router)
@@ -75,6 +85,8 @@ class DetectorNode(Node):
75
85
  Context(organization=self.organization, project=self.project),
76
86
  self.loop_communicator)
77
87
  self.relevance_filter = RelevanceFilter(self.outbox)
88
+ self.version_control = rest_version_control.VersionMode.Pause if os.environ.get(
89
+ 'VERSION_CONTROL_DEFAULT', 'follow_loop').lower() == 'pause' else rest_version_control.VersionMode.FollowLoop
78
90
  self.target_model = None
79
91
  # self.setup_sio_server()
80
92
 
@@ -183,20 +195,12 @@ class DetectorNode(Node):
183
195
  return
184
196
  try:
185
197
  self.log.info(f'Current operation mode is {self.operation_mode}')
186
- update_to_model_id = await self.send_status()
187
- if not update_to_model_id:
188
- self.log.info('could not check for updates')
198
+ try:
199
+ await self.sync_status_with_learning_loop()
200
+ except Exception as e:
201
+ self.log.error(f'Could not check for updates: {e}')
189
202
  return
190
203
 
191
- # TODO: solve race condition (it should not be required to recheck if model_info is not None, but it is!)
192
- if self.detector_logic.is_initialized:
193
- model_info = self.detector_logic._model_info # pylint: disable=protected-access
194
- if model_info is not None:
195
- self.log.info(f'Current model: {model_info.version} with id {model_info.id}')
196
- else:
197
- self.log.info('no model loaded')
198
- else:
199
- self.log.info('no model loaded')
200
204
  if self.operation_mode != OperationMode.Idle:
201
205
  self.log.info(f'not checking for updates; operation mode is {self.operation_mode}')
202
206
  return
@@ -206,25 +210,22 @@ class DetectorNode(Node):
206
210
  self.log.info('not checking for updates; no target model selected')
207
211
  return
208
212
 
209
- self.log.info('going to check for new updates') # TODO: solve race condition !!!
210
- model_info = self.detector_logic._model_info # pylint: disable=protected-access
211
- if model_info is not None:
212
- version = model_info.version
213
- else:
214
- version = None
215
- if not self.detector_logic.is_initialized or self.target_model != version:
216
- cur_model = version or "-"
217
- self.log.info(f'Current model "{cur_model}" needs to be updated to {self.target_model}')
213
+ current_version = self.detector_logic._model_info.version if self.detector_logic._model_info is not None else None
214
+
215
+ if not self.detector_logic.is_initialized or self.target_model.version != current_version:
216
+ self.log.info(
217
+ f'Current model "{current_version or "-"}" needs to be updated to {self.target_model.version}')
218
+
218
219
  with step_into(GLOBALS.data_folder):
219
220
  model_symlink = 'model'
220
- target_model_folder = f'models/{self.target_model}'
221
+ target_model_folder = f'models/{self.target_model.version}'
221
222
  shutil.rmtree(target_model_folder, ignore_errors=True)
222
223
  os.makedirs(target_model_folder)
223
224
 
224
225
  await self.data_exchanger.download_model(target_model_folder,
225
226
  Context(organization=self.organization,
226
227
  project=self.project),
227
- update_to_model_id, self.detector_logic.model_format)
228
+ self.target_model.id, self.detector_logic.model_format)
228
229
  try:
229
230
  os.unlink(model_symlink)
230
231
  os.remove(model_symlink)
@@ -234,26 +235,42 @@ class DetectorNode(Node):
234
235
  self.log.info(f'Updated symlink for model to {os.readlink(model_symlink)}')
235
236
 
236
237
  self.detector_logic.load_model()
237
- await self.send_status()
238
+ try:
239
+ await self.sync_status_with_learning_loop()
240
+ except Exception:
241
+ pass
238
242
  # self.reload(reason='new model installed')
239
- else:
240
- self.log.info('Versions are identic. Nothing to do.')
243
+
241
244
  except Exception as e:
242
245
  self.log.exception('check_for_update failed')
243
246
  msg = e.cause if isinstance(e, DownloadError) else str(e)
244
247
  self.status.set_error('update_model', f'Could not update model: {msg}')
245
- await self.send_status()
248
+ try:
249
+ await self.sync_status_with_learning_loop()
250
+ except Exception:
251
+ pass
252
+
253
+ async def sync_status_with_learning_loop(self) -> None:
254
+ """Sync status of the detector with the Learning Loop.
255
+ The Learning Loop will respond with the model info of the deployment target.
256
+ If version_control is set to FollowLoop, the detector will update the target_model.
257
+ Return if the communication was successful.
258
+
259
+ Raises:
260
+ Exception: If the communication with the Learning Loop failed.
261
+ """
246
262
 
247
- async def send_status(self) -> Union[str, Literal[False]]:
248
263
  if not self.sio_client.connected:
249
- self.log.info('could not send status -- we are not connected to the Learning Loop')
250
- return False
264
+ self.log.info('Status sync failed: not connected')
265
+ raise Exception('Status sync failed: not connected')
251
266
 
252
267
  try:
253
268
  current_model = self.detector_logic.model_info.version
254
269
  except Exception:
255
270
  current_model = None
256
271
 
272
+ target_model_version = self.target_model.version if self.target_model else None
273
+
257
274
  status = DetectionStatus(
258
275
  id=self.uuid,
259
276
  name=self.name,
@@ -262,27 +279,38 @@ class DetectorNode(Node):
262
279
  uptime=int((datetime.now() - self.startup_datetime).total_seconds()),
263
280
  operation_mode=self.operation_mode,
264
281
  current_model=current_model,
265
- target_model=self.target_model,
282
+ target_model=target_model_version,
266
283
  model_format=self.detector_logic.model_format,
267
284
  )
268
285
 
269
286
  self.log.info(f'sending status {status}')
270
287
  response = await self.sio_client.call('update_detector', (self.organization, self.project, jsonable_encoder(asdict(status))))
288
+
271
289
  assert response is not None
272
290
  socket_response = from_dict(data_class=SocketResponse, data=response)
273
291
  if not socket_response.success:
274
292
  self.log.error(f'Statusupdate failed: {response}')
275
- return False
293
+ raise Exception(f'Statusupdate failed: {response}')
276
294
 
277
295
  assert socket_response.payload is not None
278
- # TODO This is weird because target_model_version is stored in self and target_model_id is returned
279
- self.target_model = socket_response.payload['target_model_version']
280
- self.log.info(f'After sending status. Target_model is {self.target_model}')
281
- return socket_response.payload['target_model_id']
296
+
297
+ deployment_target_model_id = socket_response.payload['target_model_id']
298
+ deployment_target_model_version = socket_response.payload['target_model_version']
299
+ self.loop_deployment_target = ModelInformation(organization=self.organization, project=self.project,
300
+ host="", categories=[],
301
+ id=deployment_target_model_id,
302
+ version=deployment_target_model_version)
303
+
304
+ if self.version_control == rest_version_control.VersionMode.FollowLoop:
305
+ self.target_model = self.loop_deployment_target
306
+ self.log.info(f'After sending status. Target_model is {self.target_model.version}')
282
307
 
283
308
  async def set_operation_mode(self, mode: OperationMode):
284
309
  self.operation_mode = mode
285
- await self.send_status()
310
+ try:
311
+ await self.sync_status_with_learning_loop()
312
+ except Exception as e:
313
+ self.log.warning(f'Operation mode set to {mode}, but sync failed: {e}')
286
314
 
287
315
  def reload(self, reason: str):
288
316
  '''provide a cause for the reload'''
@@ -21,5 +21,5 @@ async def get_about(request: Request):
21
21
  'operation_mode': app.operation_mode.value,
22
22
  'state': app.status.state,
23
23
  'model_info': app.detector_logic._model_info, # pylint: disable=protected-access
24
- 'target_model': app.target_model, # pylint: disable=protected-access
24
+ 'target_model': app.target_model.version if app.target_model is not None else 'None',
25
25
  }
@@ -0,0 +1,101 @@
1
+
2
+ import os
3
+ from enum import Enum
4
+ from typing import TYPE_CHECKING
5
+
6
+ from fastapi import APIRouter, HTTPException, Request
7
+
8
+ from ...data_classes import ModelInformation
9
+ from ...globals import GLOBALS
10
+
11
+ if TYPE_CHECKING:
12
+ from ..detector_node import DetectorNode
13
+
14
+ router = APIRouter()
15
+
16
+
17
+ class VersionMode(str, Enum):
18
+ FollowLoop = 'follow_loop' # will follow the loop
19
+ SpecificVersion = 'specific_version' # will follow the specific version
20
+ Pause = 'pause' # will pause the updates
21
+
22
+
23
+ @router.get("/model_version")
24
+ async def get_version(request: Request):
25
+ '''
26
+ Example Usage
27
+ curl http://localhost/model_version
28
+ '''
29
+ # pylint: disable=protected-access
30
+
31
+ app: 'DetectorNode' = request.app
32
+
33
+ current_version = app.detector_logic._model_info.version if app.detector_logic._model_info is not None else 'None'
34
+ target_version = app.target_model.version if app.target_model is not None else 'None'
35
+ loop_version = app.loop_deployment_target.version if app.loop_deployment_target is not None else 'None'
36
+
37
+ local_versions: list[str] = []
38
+
39
+ local_models = os.listdir(os.path.join(GLOBALS.data_folder, 'models'))
40
+ for model in local_models:
41
+ if model.replace('.', '').isdigit():
42
+ local_versions.append(model)
43
+
44
+ return {
45
+ 'current_version': current_version,
46
+ 'target_version': target_version,
47
+ 'loop_version': loop_version,
48
+ 'local_versions': local_versions,
49
+ 'version_control': app.version_control.value,
50
+ }
51
+
52
+
53
+ @router.put("/model_version")
54
+ async def put_version(request: Request):
55
+ '''
56
+ Example Usage
57
+ curl -X PUT -d "follow_loop" http://localhost/model_version
58
+ curl -X PUT -d "pause" http://localhost/model_version
59
+ curl -X PUT -d "13.6" http://localhost/model_version
60
+ '''
61
+ app: 'DetectorNode' = request.app
62
+ content = str(await request.body(), 'utf-8')
63
+
64
+ if content == 'follow_loop':
65
+ app.version_control = VersionMode.FollowLoop
66
+ elif content == 'pause':
67
+ app.version_control = VersionMode.Pause
68
+ else:
69
+ app.version_control = VersionMode.SpecificVersion
70
+ if not content or not content.replace('.', '').isdigit():
71
+ raise HTTPException(400, 'Invalid version number')
72
+ target_version = content
73
+
74
+ if app.target_model is not None and app.target_model.version == target_version:
75
+ return "OK"
76
+
77
+ # Fetch the model uuid by version from the loop
78
+ uri = f'/{app.organization}/projects/{app.project}/models'
79
+ response = await app.loop_communicator.get(uri)
80
+ if response.status_code != 200:
81
+ app.version_control = VersionMode.Pause
82
+ raise HTTPException(500, 'Failed to load models from learning loop')
83
+
84
+ models = response.json()['models']
85
+ models_with_target_version = [m for m in models if m['version'] == target_version]
86
+ if len(models_with_target_version) == 0:
87
+ app.version_control = VersionMode.Pause
88
+ raise HTTPException(400, f'No Model with version {target_version}')
89
+ if len(models_with_target_version) > 1:
90
+ app.version_control = VersionMode.Pause
91
+ raise HTTPException(500, f'Multiple models with version {target_version}')
92
+
93
+ model_id = models_with_target_version[0]['id']
94
+ model_host = models_with_target_version[0].get('host', 'unknown')
95
+
96
+ app.target_model = ModelInformation(organization=app.organization, project=app.project,
97
+ host=model_host, categories=[],
98
+ id=model_id,
99
+ version=target_version)
100
+
101
+ return "OK"
@@ -16,7 +16,7 @@ from socketio import AsyncClient
16
16
  from .data_classes import NodeStatus
17
17
  from .data_exchanger import DataExchanger
18
18
  from .helpers import log_conf
19
- from .helpers.misc import activate_asyncio_warnings, ensure_socket_response, read_or_create_uuid
19
+ from .helpers.misc import ensure_socket_response, read_or_create_uuid
20
20
  from .loop_communication import LoopCommunicator
21
21
 
22
22
 
@@ -1,8 +1,8 @@
1
- import shutil
2
1
  import asyncio
3
2
  import logging
4
3
  import multiprocessing
5
4
  import os
5
+ import shutil
6
6
  import socket
7
7
  from glob import glob
8
8
  from multiprocessing import Process, log_to_stderr
@@ -107,6 +107,62 @@ async def test_about_endpoint(test_detector_node: DetectorNode):
107
107
  assert any(c.name == 'purple point' for c in model_information.categories)
108
108
 
109
109
 
110
+ async def test_model_version_api(test_detector_node: DetectorNode):
111
+ await asyncio.sleep(3)
112
+
113
+ response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
114
+ assert response.status_code == 200
115
+ response_dict = json.loads(response.content)
116
+ assert response_dict['current_version'] == '1.1'
117
+ assert response_dict['target_version'] == '1.1'
118
+ assert response_dict['loop_version'] == '1.1'
119
+ assert response_dict['local_versions'] == ['1.1']
120
+ assert response_dict['version_control'] == 'follow_loop'
121
+
122
+ response = requests.put(f'http://localhost:{GLOBALS.detector_port}/model_version', data='1.0', timeout=30)
123
+ response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
124
+ assert response.status_code == 200
125
+ response_dict = json.loads(response.content)
126
+ assert response_dict['current_version'] == '1.1'
127
+ assert response_dict['target_version'] == '1.0'
128
+ assert response_dict['loop_version'] == '1.1'
129
+ assert response_dict['local_versions'] == ['1.1']
130
+ assert response_dict['version_control'] == 'specific_version'
131
+
132
+ await asyncio.sleep(11)
133
+
134
+ response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
135
+ assert response.status_code == 200
136
+ response_dict = json.loads(response.content)
137
+ assert response_dict['current_version'] == '1.0'
138
+ assert response_dict['target_version'] == '1.0'
139
+ assert response_dict['loop_version'] == '1.1'
140
+ assert set(response_dict['local_versions']) == set(['1.1', '1.0'])
141
+ assert response_dict['version_control'] == 'specific_version'
142
+
143
+ response = requests.put(f'http://localhost:{GLOBALS.detector_port}/model_version', data='pause', timeout=30)
144
+ await asyncio.sleep(11)
145
+ response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
146
+ assert response.status_code == 200
147
+ response_dict = json.loads(response.content)
148
+ assert response_dict['current_version'] == '1.0'
149
+ assert response_dict['target_version'] == '1.0'
150
+ assert response_dict['loop_version'] == '1.1'
151
+ assert set(response_dict['local_versions']) == set(['1.1', '1.0'])
152
+ assert response_dict['version_control'] == 'pause'
153
+
154
+ response = requests.put(f'http://localhost:{GLOBALS.detector_port}/model_version', data='follow_loop', timeout=30)
155
+ await asyncio.sleep(11)
156
+ response = requests.get(f'http://localhost:{GLOBALS.detector_port}/model_version', timeout=30)
157
+ assert response.status_code == 200
158
+ response_dict = json.loads(response.content)
159
+ assert response_dict['current_version'] == '1.1'
160
+ assert response_dict['target_version'] == '1.1'
161
+ assert response_dict['loop_version'] == '1.1'
162
+ assert set(response_dict['local_versions']) == set(['1.1', '1.0'])
163
+ assert response_dict['version_control'] == 'follow_loop'
164
+
165
+
110
166
  async def test_rest_outbox_mode(test_detector_node: DetectorNode):
111
167
  await asyncio.sleep(3)
112
168
 
@@ -17,8 +17,8 @@ class TrainingsDownloader():
17
17
  return (image_data, skipped_image_count)
18
18
 
19
19
  async def download_images_and_annotations(self, image_ids: List[str], image_folder: str) -> Tuple[List[Dict], int]:
20
- await self.data_exchanger.download_images(image_ids, image_folder)
21
20
  image_data = await self.data_exchanger.download_images_data(image_ids)
21
+ await self.data_exchanger.download_images(image_ids, image_folder)
22
22
  logging.info('filtering corrupt images') # download only safes valid images
23
23
  valid_image_data: List[Dict] = []
24
24
  skipped_image_count = 0
@@ -1,3 +1,6 @@
1
+ import os
2
+ import sys
3
+ import time
1
4
  from dataclasses import asdict
2
5
  from typing import Dict, Optional
3
6
 
@@ -7,7 +10,7 @@ from socketio import AsyncClient, exceptions
7
10
  from ..data_classes import TrainingStatus
8
11
  from ..node import Node
9
12
  from .io_helpers import LastTrainingIO
10
- from .rest import backdoor_controls, controls
13
+ from .rest import backdoor_controls
11
14
  from .trainer_logic_generic import TrainerLogicGeneric
12
15
 
13
16
 
@@ -20,7 +23,15 @@ class TrainerNode(Node):
20
23
  self.last_training_io = LastTrainingIO(self.uuid)
21
24
  self.trainer_logic._last_training_io = self.last_training_io
22
25
 
23
- self.include_router(controls.router, tags=["controls"])
26
+ self.first_idle_time: float | None = None
27
+ if os.environ.get('TRAINER_IDLE_TIMEOUT_SEC', 0.0):
28
+ self.idle_timeout = float(os.environ.get('TRAINER_IDLE_TIMEOUT_SEC', 0.0))
29
+ else:
30
+ self.idle_timeout = 0.0
31
+ if self.idle_timeout:
32
+ self.log.info(
33
+ f'Trainer started with an idle_timeout of {self.idle_timeout} seconds. Note that shutdown does not work if docker container has the restart policy set to always')
34
+
24
35
  if use_backdoor_controls:
25
36
  self.include_router(backdoor_controls.router, tags=["controls"])
26
37
 
@@ -38,6 +49,7 @@ class TrainerNode(Node):
38
49
  if await self.trainer_logic.try_continue_run_if_incomplete():
39
50
  return # NOTE: we prevent sending idle status after starting a continuation
40
51
  await self.send_status()
52
+ self.check_idle_timeout()
41
53
  except exceptions.TimeoutError:
42
54
  self.log.warning('timeout when sending status to learning loop, reconnecting sio_client')
43
55
  await self.sio_client.disconnect() # NOTE: reconnect happens in node._on_repeat
@@ -90,3 +102,19 @@ class TrainerNode(Node):
90
102
  result = await self.sio_client.call('update_trainer', jsonable_encoder(asdict(status)), timeout=30)
91
103
  if isinstance(result, Dict) and not result['success']:
92
104
  self.log.error(f'Error when sending status update: Response from loop was:\n {result}')
105
+
106
+ def check_idle_timeout(self):
107
+ if not self.idle_timeout:
108
+ return
109
+
110
+ if self.trainer_logic.state == 'idle':
111
+ if self.first_idle_time is None:
112
+ self.first_idle_time = time.time()
113
+ idle_time = time.time() - self.first_idle_time
114
+ if idle_time > self.idle_timeout:
115
+ self.log.info('Trainer has been idle for %.2f s (with timeout %.2f s). Shutting down.',
116
+ idle_time, self.idle_timeout)
117
+ sys.exit(0)
118
+ self.log.debug('idle time: %.2f s / %.2f s', idle_time, self.idle_timeout)
119
+ else:
120
+ self.first_idle_time = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: learning-loop-node
3
- Version: 0.10.10
3
+ Version: 0.10.11
4
4
  Summary: Python Library for Nodes which connect to the Zauberzeug Learning Loop
5
5
  Home-page: https://github.com/zauberzeug/learning_loop_node
6
6
  License: MIT
@@ -57,19 +57,20 @@ To start a node you have to implement the logic by inheriting from the correspon
57
57
 
58
58
  You can configure connection to our Learning Loop by specifying the following environment variables before starting:
59
59
 
60
- | Name | Alias | Purpose | Required by |
61
- | ----------------------- | ------------ | ------------------------------------------------------------ | -------------------- |
62
- | LOOP_HOST | HOST | Learning Loop address (e.g. learning-loop.ai) | all |
63
- | LOOP_USERNAME | USERNAME | Learning Loop user name | all besides Detector |
64
- | LOOP_PASSWORD | PASSWORD | Learning Loop password | all besides Detector |
65
- | LOOP_SSL_CERT_PATH | - | Path to the SSL certificate | all (opt.) |
66
- | LOOP_ORGANIZATION | ORGANIZATION | Organization name | Detector |
67
- | LOOP_PROJECT | PROJECT | Project name | Detector |
68
- | MIN_UNCERTAIN_THRESHOLD | PROJECT | smallest confidence (float) at which auto-upload will happen | Detector |
69
- | MAX_UNCERTAIN_THRESHOLD | PROJECT | largest confidence (float) at which auto-upload will happen | Detector |
70
- | INFERENCE_BATCH_SIZE | - | Batch size of trainer when calculating detections | Trainer (opt.) |
71
- | RESTART_AFTER_TRAINING | - | Restart the trainer after training (set to 1) | Trainer (opt.) |
72
- | KEEP_OLD_TRAININGS | - | Do not delete old trainings (set to 1) | Trainer (opt.) |
60
+ | Name | Alias | Purpose | Required by |
61
+ | ------------------------ | ------------ | ------------------------------------------------------------ | -------------------- |
62
+ | LOOP_HOST | HOST | Learning Loop address (e.g. learning-loop.ai) | all |
63
+ | LOOP_USERNAME | USERNAME | Learning Loop user name | all besides Detector |
64
+ | LOOP_PASSWORD | PASSWORD | Learning Loop password | all besides Detector |
65
+ | LOOP_SSL_CERT_PATH | - | Path to the SSL certificate | all (opt.) |
66
+ | LOOP_ORGANIZATION | ORGANIZATION | Organization name | Detector |
67
+ | LOOP_PROJECT | PROJECT | Project name | Detector |
68
+ | MIN_UNCERTAIN_THRESHOLD | PROJECT | smallest confidence (float) at which auto-upload will happen | Detector |
69
+ | MAX_UNCERTAIN_THRESHOLD | PROJECT | largest confidence (float) at which auto-upload will happen | Detector |
70
+ | INFERENCE_BATCH_SIZE | - | Batch size of trainer when calculating detections | Trainer (opt.) |
71
+ | RESTART_AFTER_TRAINING | - | Restart the trainer after training (set to 1) | Trainer (opt.) |
72
+ | KEEP_OLD_TRAININGS | - | Do not delete old trainings (set to 1) | Trainer (opt.) |
73
+ | TRAINER_IDLE_TIMEOUT_SEC | - | Automatically shutdown trainer after timeout (in seconds) | Trainer (opt.) |
73
74
 
74
75
  #### Testing
75
76
 
@@ -104,6 +105,24 @@ The detector also has a sio **upload endpoint** that can be used to upload image
104
105
 
105
106
  The endpoint returns None if the upload was successful and an error message otherwise.
106
107
 
108
+ ### Changing the model version
109
+
110
+ The detector can be configured to one of the following behaviors:
111
+
112
+ - download use a specific model version
113
+ - automatically update the model version according to the learning loop deployment target
114
+ - pause the model updates and use the version that was last loaded
115
+
116
+ The model versioning configuration can be accessed/changed via a REST endpoint. Example Usage:
117
+
118
+ - Fetch the current model versioning configuration: `curl http://localhost/model_version`
119
+ - Configure the detector to use a specific model version: `curl -X PUT -d "1.0" http://localhost/model_version`
120
+ - Configure the detector to automatically update the model version: `curl -X PUT -d "follow_loop" http://localhost/model_version`
121
+ - Pause the model updates: `curl -X PUT -d "pause" http://localhost/model_version`
122
+
123
+ Note that the configuration is not persistent, however, the default behavior on startup can be configured via the environment variable `VERSION_CONTROL_DEFAULT`.
124
+ If the environment variable is set to `VERSION_CONTROL_DEFAULT=PAUSE`, the detector will pause the model updates on startup. Otherwise, the detector will automatically follow the loop deployment target.
125
+
107
126
  ### Changing the outbox mode
108
127
 
109
128
  If the autoupload is set to `all` or `filtered` (selected) images and the corresponding detections are saved on HDD (the outbox). A background thread will upload the images and detections to the Learning Loop. The outbox is located in the `outbox` folder in the root directory of the node. The outbox can be cleared by deleting the files in the folder.
@@ -5,21 +5,22 @@ learning_loop_node/annotation/annotator_node.py,sha256=wk11CQtM3A0Dr7efCn_Mw2X7q
5
5
  learning_loop_node/data_classes/__init__.py,sha256=wCX88lDgbb8V-gtVCVe9i-NvvZuMe5FX7eD_UJgYYXw,1305
6
6
  learning_loop_node/data_classes/annotations.py,sha256=iInU0Nuy_oYT_sj4k_n-W0UShCBI2cHQYrt8imymbtM,1211
7
7
  learning_loop_node/data_classes/detections.py,sha256=1BcU5PNzIbryWcj2xJ6ysLBTBwGOdv9SxSJiUG8WEmw,4349
8
- learning_loop_node/data_classes/general.py,sha256=44GJrJvGfPwDUmRsS7If9uSlE6KPP50LGUX91VzesLw,4664
8
+ learning_loop_node/data_classes/general.py,sha256=Bd0ngYhYvS_9OYOO6lAKEnDzLuSdPmR4I2YV-0DRsxs,4694
9
9
  learning_loop_node/data_classes/socket_response.py,sha256=tIdt-oYf6ULoJIDYQCecNM9OtWR6_wJ9tL0Ksu83Vko,655
10
10
  learning_loop_node/data_classes/training.py,sha256=hnMHZMk-WNRERyo7U97qL09v1tIdhnzPfTH-JgifLwU,6164
11
- learning_loop_node/data_exchanger.py,sha256=BTrXwjNkG9KgtUxil_ijMggql8sZDKXQm26xdKQr8_0,8459
11
+ learning_loop_node/data_exchanger.py,sha256=bAozCSGcUGx9VedGI3KP0s2w5S1gFIYm58gAS7cMebA,8661
12
12
  learning_loop_node/detector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  learning_loop_node/detector/detector_logic.py,sha256=se0jRFbV7BfTvCuCI3gcUllSYIZ5dxTkvdISe6pPTRg,1660
14
- learning_loop_node/detector/detector_node.py,sha256=jaz4TiHNVFd8p7NQ6Zcrsro9c-X9EHmmWXpFXAiO4G4,16695
14
+ learning_loop_node/detector/detector_node.py,sha256=v7hX06HuBtS4LiqlqOxDBaJlqkJanHQ4mrdjc4fVwDQ,18282
15
15
  learning_loop_node/detector/inbox_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  learning_loop_node/detector/inbox_filter/cam_observation_history.py,sha256=TD346I9ymtIP0_CJXCIKMRuiXbfVVanXNu_iHAwDd7Q,3318
17
17
  learning_loop_node/detector/inbox_filter/relevance_filter.py,sha256=s2FuwZ-tD_5obkSutstjc8pE_hLGbrv9WjrEO9t8rJ8,1011
18
18
  learning_loop_node/detector/outbox.py,sha256=MHHP4rnGaV8JxDSig96KZN4hSQ3i9z6-7WYhTTrMtp0,8082
19
19
  learning_loop_node/detector/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- learning_loop_node/detector/rest/about.py,sha256=-PNqlQI_tzRvoSI_UR9rX8-5GeiENNpRDQ4Ylw3wYVs,607
20
+ learning_loop_node/detector/rest/about.py,sha256=pNnIllLBJYc_edFCJGFTCBT55pIj7GI-zYsCZyjQ8EA,623
21
21
  learning_loop_node/detector/rest/backdoor_controls.py,sha256=38axRG66Z3_Q6bYKa7Hw-ldChEAu-dJcBM_Sl_17Ozo,1725
22
22
  learning_loop_node/detector/rest/detect.py,sha256=8Rl1swANKgHc42P1z75t_PErQxpCKKPdAsKqDIZgdNU,1873
23
+ learning_loop_node/detector/rest/model_version_control.py,sha256=PKG7foFyNSvjoMhWCDb7w3mq-2e0bx5gq3ov7Rao8HU,3703
23
24
  learning_loop_node/detector/rest/operation_mode.py,sha256=eIo6_56qyZECftf4AEN8wJMABIojC0TRazvWeg0Uj_s,1664
24
25
  learning_loop_node/detector/rest/outbox_mode.py,sha256=anSZHB6jliz1t3fxrmEzgwNB62UHNdWNc9ZYOc5Nn9s,1018
25
26
  learning_loop_node/detector/rest/upload.py,sha256=IPzxJPayD7_Gx5uYC1lVJwWxdnQgM8MYGa5NugXVosY,544
@@ -31,7 +32,7 @@ learning_loop_node/helpers/gdrive_downloader.py,sha256=zeYJciTAJVRpu_eFjwgYLCpIa
31
32
  learning_loop_node/helpers/log_conf.py,sha256=3yd-jaMOeD5cRIgA5w_BH2L5odf8c4-ZjD89Bdqwe44,824
32
33
  learning_loop_node/helpers/misc.py,sha256=j4is8Rv0ttnCqF-R-wP3xwEi67OI6IBJav5Woo5lyDk,7701
33
34
  learning_loop_node/loop_communication.py,sha256=rG5MdavSTaREZ6OWfAUIT_qkkYPw3is2_FujLmHQeIc,6576
34
- learning_loop_node/node.py,sha256=pJg3mO7Egwtu7ewzWWgEXMtCG17u7yZjFt-KeN9n7rM,8010
35
+ learning_loop_node/node.py,sha256=mohvJGGfrMSGPTDvQOlX7TWs4QZ8YZHqr1YkyyGQphc,7983
35
36
  learning_loop_node/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
37
  learning_loop_node/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
38
  learning_loop_node/tests/annotator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -39,14 +40,14 @@ learning_loop_node/tests/annotator/conftest.py,sha256=G4ZvdZUdvPp9bYCzg3eEVkGCeX
39
40
  learning_loop_node/tests/annotator/pytest.ini,sha256=8QdjmawLy1zAzXrJ88or1kpFDhJw0W5UOnDfGGs_igU,262
40
41
  learning_loop_node/tests/annotator/test_annotator_node.py,sha256=TPNPPrQAxQ_zEecQcH7hlczgD3ABtTCNtUvWD1_oApk,1985
41
42
  learning_loop_node/tests/detector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- learning_loop_node/tests/detector/conftest.py,sha256=noWJWPaM9T2Shgs1I5KRDzszjPz2qTRmE1kqLaGMIdY,4161
43
+ learning_loop_node/tests/detector/conftest.py,sha256=_zNEYIuiRmXj4qPsQli82JbqRR5CzhChkyo3dP8WWaU,4161
43
44
  learning_loop_node/tests/detector/inbox_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
45
  learning_loop_node/tests/detector/inbox_filter/test_observation.py,sha256=k4WYdvnuV7d_r7zI4M2aA8WuBjm0aycQ0vj1rGE2q4w,1370
45
46
  learning_loop_node/tests/detector/inbox_filter/test_relevance_group.py,sha256=XjiMsS0LgvM0OkPf5-s2rjFbG7C42LTmz_rDVMGHKoY,7603
46
47
  learning_loop_node/tests/detector/inbox_filter/test_unexpected_observations_count.py,sha256=MWC7PbaCy14jjRw0_oilkXj6gymAsUZXHJdzNW5m2D4,1639
47
48
  learning_loop_node/tests/detector/pytest.ini,sha256=8QdjmawLy1zAzXrJ88or1kpFDhJw0W5UOnDfGGs_igU,262
48
49
  learning_loop_node/tests/detector/test.jpg,sha256=msA-vHPmvPiro_D102Qmn1fn4vNfooqYYEXPxZUmYpk,161390
49
- learning_loop_node/tests/detector/test_client_communication.py,sha256=9r1LULGmRqKihRgG0v7-CkQryRCsyyahoLy8QyjsOTU,6128
50
+ learning_loop_node/tests/detector/test_client_communication.py,sha256=2gJARodJSDuJHgeN1_xLMbvDcPQkXpBXEefu7MOyePk,8998
50
51
  learning_loop_node/tests/detector/test_outbox.py,sha256=5RMKQfuu1-rvpVCpEtt_D70bYgma-sIrTHWxHdTdU9Y,3001
51
52
  learning_loop_node/tests/detector/test_relevance_filter.py,sha256=3VLhHKaxPzLYmiNZagvgg9ZHkPhWk4_-qpmkJw36wBU,2046
52
53
  learning_loop_node/tests/detector/testing_detector.py,sha256=FeQroV85IvsT8dmalQBqf1FLNt_buCtZK3-lgtmbrBI,542
@@ -77,17 +78,16 @@ learning_loop_node/tests/trainer/test_errors.py,sha256=Z3BWvUkVKxMGe_RNYeVbrhPps
77
78
  learning_loop_node/tests/trainer/test_trainer_states.py,sha256=djYCs5ieajQHRjk8QcUVBUkQEG8UGYFoNGwSX0z2oGk,1067
78
79
  learning_loop_node/tests/trainer/testing_trainer_logic.py,sha256=KslqDJDntkgH4Yd_z-guiVPvzi5Q-l-Bqc3fUjT5N7U,3883
79
80
  learning_loop_node/trainer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
- learning_loop_node/trainer/downloader.py,sha256=qzx7zzObcFEvRVQFe8gi8KJNIapASi1_XssbspXD1Rw,1469
81
+ learning_loop_node/trainer/downloader.py,sha256=Qk-oBcrGCVuWTVs3hvAJzQSqCIHPGZ7NXLJ_fAqvCoY,1469
81
82
  learning_loop_node/trainer/exceptions.py,sha256=hLLDGncC6PLZjKg4lZBpu-QA8itQIxiuxExz1uptgnw,40
82
83
  learning_loop_node/trainer/executor.py,sha256=-0BxDqmAI1NCiISi7Rw8McJQfgxxVy1gSa1epYuL3U0,3942
83
84
  learning_loop_node/trainer/io_helpers.py,sha256=Ylxz8HAId0Jlz95So5kXdJEp1yKQuwroDKIhbTUscF4,7257
84
85
  learning_loop_node/trainer/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
86
  learning_loop_node/trainer/rest/backdoor_controls.py,sha256=YQcG0KwxzKDNYeMtHrSwr26q__N7ty0o6Kar6CLWAd0,5869
86
- learning_loop_node/trainer/rest/controls.py,sha256=XF37i2edeMHKdSXyJc4ZqaTZ38u6d3u3Sb3C-Mwyfko,934
87
87
  learning_loop_node/trainer/test_executor.py,sha256=6BVGDN_6f5GEMMEvDLSG1yzMybSvgXaP5uYpSfsVPP0,2224
88
88
  learning_loop_node/trainer/trainer_logic.py,sha256=PJxiO1chPdvpq8UTtzv_nVam9CouCswX9b1FnRwT2Tw,8411
89
89
  learning_loop_node/trainer/trainer_logic_generic.py,sha256=AzllMMiUPP_CMkjIVqse8wY50Cg5RDnk5y5ERVUjtZg,25801
90
- learning_loop_node/trainer/trainer_node.py,sha256=dV-kcTIxhHsep_xIXdGc_AaeJM1mFlQNnwUpTkG4btg,4110
91
- learning_loop_node-0.10.10.dist-info/METADATA,sha256=d4e08fjXLZ4nieGSIE97nLYl1sapDta_W4mA-Uh05oM,10384
92
- learning_loop_node-0.10.10.dist-info/WHEEL,sha256=WGfLGfLX43Ei_YORXSnT54hxFygu34kMpcQdmgmEwCQ,88
93
- learning_loop_node-0.10.10.dist-info/RECORD,,
90
+ learning_loop_node/trainer/trainer_node.py,sha256=dIptgXTwBC_bnPo-U2sG_zDaaqZixY7FA0dPt7P2Lps,5261
91
+ learning_loop_node-0.10.11.dist-info/METADATA,sha256=4IVvGgmNH8Iu_f4EIEpZKc0xGwKTzL-st8GNjM2YDPk,11702
92
+ learning_loop_node-0.10.11.dist-info/WHEEL,sha256=WGfLGfLX43Ei_YORXSnT54hxFygu34kMpcQdmgmEwCQ,88
93
+ learning_loop_node-0.10.11.dist-info/RECORD,,
@@ -1,28 +0,0 @@
1
-
2
- import logging
3
-
4
- from fastapi import APIRouter, HTTPException, Request
5
-
6
- from learning_loop_node.trainer.trainer_logic import TrainerLogic
7
-
8
- router = APIRouter()
9
-
10
- # pylint: disable=protected-access
11
-
12
-
13
- @router.post("/controls/detect/{organization}/{project}/{version}")
14
- async def operation_mode(organization: str, project: str, version: str, request: Request):
15
- '''
16
- Example Usage
17
- curl -X POST localhost/controls/detect/<organization>/<project>/<model_version>
18
- '''
19
- path = f'/{organization}/projects/{project}/models'
20
- response = await request.app.loop_communication.get(path)
21
- if response.status_code != 200:
22
- raise HTTPException(404, 'could not load latest model')
23
- models = response.json()['models']
24
- model_id = next(m for m in models if m['version'] == version)['id']
25
- logging.info(model_id)
26
- trainer: TrainerLogic = request.app.trainer
27
- await trainer._do_detections()
28
- return "OK"