PyPI - learning-loop-node - Versions diffs - 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

learning-loop-node 0.11.0py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of learning-loop-node might be problematic. Click here for more details.

Files changed (30) hide show

learning_loop_node/tests/trainer/conftest.py CHANGED Viewed

@@ -30,12 +30,15 @@ async def test_initialized_trainer_node():
     node = TrainerNode(name='test', trainer_logic=trainer, uuid='NOD30000-0000-0000-0000-000000000000')
     trainer._node = node
     trainer._init_new_training(context=Context(organization='zauberzeug', project='demo'),
-                               details={'categories': [],
-                                        'id': '00000000-0000-0000-0000-000000000012',  # version 1.2 of demo project
-                                        'training_number': 0,
-                                        'resolution': 800,
-                                        'flip_rl': False,
-                                        'flip_ud': False})
+                               training_config={'categories': [],
+                                                'id': '00000000-0000-0000-0000-000000000012',  # version 1.2 of demo project
+                                                'training_number': 0,
+                                                'model_variant': '',
+                                                'hyperparameters': {
+                                   'resolution': 800,
+                                   'flip_rl': False,
+                                   'flip_ud': False}
+    })
     await node._on_startup()
     yield node
     await node._on_shutdown()
@@ -50,12 +53,15 @@ async def test_initialized_trainer():
     await node._on_startup()
     trainer._node = node
     trainer._init_new_training(context=Context(organization='zauberzeug', project='demo'),
-                               details={'categories': [],
-                                        'id': '00000000-0000-0000-0000-000000000012',  # version 1.2 of demo project
-                                        'training_number': 0,
-                                        'resolution': 800,
-                                        'flip_rl': False,
-                                        'flip_ud': False})
+                               training_config={'categories': [],
+                                                'id': '00000000-0000-0000-0000-000000000012',  # version 1.2 of demo project
+                                                'training_number': 0,
+                                                'model_variant': '',
+                                                'hyperparameters': {
+                                   'resolution': 800,
+                                   'flip_rl': False,
+                                   'flip_ud': False}
+    })
     yield trainer
     try:
         await node._on_shutdown()

learning_loop_node/tests/trainer/states/test_state_download_train_model.py CHANGED Viewed

@@ -3,6 +3,7 @@ import asyncio
 import os
 from ....data_classes import TrainerState
+from ... import test_helper
 from ..state_helper import assert_training_state, create_active_training_file
 from ..testing_trainer_logic import TestingTrainerLogic
@@ -11,9 +12,12 @@ from ..testing_trainer_logic import TestingTrainerLogic
 async def test_downloading_is_successful(test_initialized_trainer: TestingTrainerLogic):
     trainer = test_initialized_trainer
-    create_active_training_file(trainer, training_state=TrainerState.DataDownloaded)
-    trainer.model_format = 'mocked'
+    model_id = await test_helper.get_latest_model_id(project='demo')
+    create_active_training_file(trainer,
+                                base_model_uuid=model_id,
+                                training_state=TrainerState.DataDownloaded)
     trainer._init_from_last_training()
     asyncio.get_running_loop().create_task(
@@ -50,7 +54,7 @@ async def test_abort_download_model(test_initialized_trainer: TestingTrainerLogi
 async def test_downloading_failed(test_initialized_trainer: TestingTrainerLogic):
     trainer = test_initialized_trainer
     create_active_training_file(trainer, training_state=TrainerState.DataDownloaded,
-                                base_model_uuid_or_name='00000000-0000-0000-0000-000000000000')  # bad model id)
+                                base_model_uuid='00000000-0000-0000-0000-000000000000')  # bad model id)
     trainer._init_from_last_training()
     trainer._begin_training_task()

learning_loop_node/tests/trainer/states/test_state_prepare.py CHANGED Viewed

@@ -20,7 +20,6 @@ async def test_preparing_is_successful(test_initialized_trainer: TestingTrainerL
     await trainer._perform_state('prepare', TrainerState.DataDownloading, TrainerState.DataDownloaded, trainer._prepare)
     assert trainer_has_prepare_error(trainer) is False
     assert trainer.training.training_state == TrainerState.DataDownloaded
-    assert trainer.training.data is not None
     assert trainer.node.last_training_io.load() == trainer.training

learning_loop_node/tests/trainer/states/test_state_sync_confusion_matrix.py CHANGED Viewed

@@ -19,7 +19,7 @@ def trainer_has_sync_confusion_matrix_error(trainer: TrainerLogic):
 async def test_nothing_to_sync(test_initialized_trainer: TestingTrainerLogic):
     trainer = test_initialized_trainer
-    # TODO this requires trainer to have _training
+    # NOTE: this requires trainer to have _training
     # trainer.load_active_training()
     create_active_training_file(trainer, training_state=TrainerState.TrainingFinished)
     trainer._init_from_last_training()
@@ -40,6 +40,7 @@ async def test_unsynced_model_available__sync_successful(test_initialized_traine
     create_active_training_file(trainer, training_state=TrainerState.TrainingFinished)
     trainer._init_from_last_training()
+    trainer.training.image_data = []
     trainer.has_new_model = True
     trainer._begin_training_task()

learning_loop_node/tests/trainer/states/test_state_train.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import asyncio
 from ....data_classes import TrainerState
 from ...test_helper import condition
 from ..state_helper import assert_training_state, create_active_training_file

learning_loop_node/tests/trainer/test_trainer_states.py CHANGED Viewed

@@ -14,7 +14,12 @@ def create_training() -> Training:
         context=context,
         project_folder='',
         images_folder='',
-        training_folder='')
+        training_folder='',
+        categories=[],
+        hyperparameters={},
+        model_variant='',
+        training_number=0,
+        training_state=TrainerState.Preparing)
     return training

learning_loop_node/tests/trainer/testing_trainer_logic.py CHANGED Viewed

@@ -30,13 +30,13 @@ class TestingTrainerLogic(TrainerLogic):
                 PretrainedModel(name='large', label='Large', description='a large model')]
     # pylint: disable=unused-argument
-    async def _start_training_from_base_model(self, model: str = 'model.model') -> None:
+    async def _start_training_from_base_model(self) -> None:
         assert self._executor is not None
         await self._executor.start('/bin/bash -c "while true; do sleep 1; done"')
     async def _start_training_from_scratch(self) -> None:
-        assert self.training.base_model_uuid_or_name is not None, 'base_model_uuid_or_name must be set'
-        await self._start_training_from_base_model(model=f'model_{self.training.base_model_uuid_or_name}.pt')
+        assert self._executor is not None
+        await self._executor.start('/bin/bash -c "while true; do sleep 1; done"')
     def _get_new_best_training_state(self) -> Optional[TrainingStateData]:
         if self.has_new_model:

learning_loop_node/trainer/downloader.py CHANGED Viewed

@@ -27,5 +27,5 @@ class TrainingsDownloader():
                 valid_image_data.append(i)
             else:
                 skipped_image_count += 1
-        logging.info(f'Done downloading image data for {len(image_data)} images.')
+        logging.info('Done downloading image data for %s images.', len(image_data))
         return (valid_image_data, skipped_image_count)

learning_loop_node/trainer/executor.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 import os
 import shlex
 from io import BufferedWriter
-from typing import List, Optional, Dict
+from typing import Dict, List, Optional
 class Executor:
@@ -33,7 +33,7 @@ class Executor:
         if env is not None:
             full_env.update(env)
-        logging.info(f'Starting executor with command: {cmd} in {self.path} - logging to {self.log_file_path}')
+        logging.info('Starting executor with command: %s in %s - logging to %s', cmd, self.path, self.log_file_path)
         self.log_file = open(self.log_file_path, 'ab')
         self._process = await asyncio.create_subprocess_exec(

learning_loop_node/trainer/rest/backdoor_controls.py CHANGED Viewed

@@ -29,7 +29,7 @@ async def provide_new_model(request: Request):
     if value == 'on':
         trainer_node.trainer_logic.provide_new_model = True  # type: ignore
-    logging.debug(f'turning automatically provide_new_model {value}')
+    logging.debug('turning automatically provide_new_model %s', value)
 @router.post("/reset")
@@ -64,7 +64,7 @@ def set_error_configuration(msg: Dict, request: Request):
                                              get_new_model=msg.get('get_new_model', None),
                                              save_model=msg.get('save_model', None), )
-    logging.info(f'setting error configuration to: {asdict(error_configuration)}')
+    logging.info('setting error configuration to: %s', asdict(error_configuration))
     trainer_logic = request.app.trainer_logic
     # NOTE: trainer_logic is MockTrainerLogic which has a property error_configuration
@@ -82,23 +82,23 @@ async def add_steps(request: Request):
     if not trainer_logic._executor or not trainer_logic._executor.is_running():  # pylint: disable=protected-access
         training = trainer_logic._training  # pylint: disable=protected-access
-        logging.error(f'cannot add steps when not running, state: {training.training_state if training else "None"}')
+        logging.error('cannot add steps when not running, state: %s', training.training_state if training else 'None')
         raise HTTPException(status_code=409, detail="trainer is not running")
     steps = int(str(await request.body(), 'utf-8'))
     previous_state = trainer_logic.provide_new_model  # type: ignore
     trainer_logic.provide_new_model = True  # type: ignore
-    logging.warning(f'simulating newly completed models by moving {steps} forward')
+    logging.warning('simulating newly completed models by moving %s forward', steps)
     for _ in range(steps):
         try:
             logging.warning('calling sync_confusion_matrix')
-            await trainer_logic._sync_confusion_matrix()  # pylint: disable=protected-access
+            await trainer_logic._sync_training()  # pylint: disable=protected-access
         except Exception:
             pass  # Tests can force synchroniation to fail, error state is reported to backend
     trainer_logic.provide_new_model = previous_state  # type: ignore
-    logging.warning(f'progress increased to {trainer_logic.current_iteration}')  # type: ignore
+    logging.warning('progress increased to %s', trainer_logic.current_iteration)  # type: ignore
     await trainer_node.send_status()

learning_loop_node/trainer/trainer_logic.py CHANGED Viewed

@@ -62,7 +62,7 @@ class TrainerLogic(TrainerLogicGeneric):
                         break
                     self.errors.reset(error_key)
                     try:
-                        await self._sync_confusion_matrix()
+                        await self._sync_training()
                     except asyncio.CancelledError:
                         logging.warning('CancelledError in run_training')
                         raise
@@ -130,8 +130,12 @@ class TrainerLogic(TrainerLogicGeneric):
         if self._can_resume():
             self.start_training_task = self._resume()
         else:
-            base_model_uuid_or_name = self.training.base_model_uuid_or_name
-            if not is_valid_uuid4(base_model_uuid_or_name):
+            base_model_uuid_is_none = self.training.base_model_uuid is None
+            base_model_uuid_is_valid = is_valid_uuid4(self.training.base_model_uuid)
+            if not base_model_uuid_is_none and not base_model_uuid_is_valid:
+                logging.warning('base_model_uuid is not a valid uuid4: %s\n Starting training from scratch.',
+                                self.training.base_model_uuid)
+            if not base_model_uuid_is_valid:
                 self.start_training_task = self._start_training_from_scratch()
             else:
                 self.start_training_task = self._start_training_from_base_model()

learning_loop_node/trainer/trainer_logic_generic.py CHANGED Viewed

@@ -10,9 +10,9 @@ from typing import TYPE_CHECKING, Callable, Coroutine, Dict, List, Optional
 from fastapi.encoders import jsonable_encoder
-from ..data_classes import (Context, Errors, Hyperparameter, PretrainedModel, TrainerState, Training, TrainingData,
-                            TrainingOut, TrainingStateData)
-from ..helpers.misc import create_project_folder, delete_all_training_folders, generate_training, is_valid_uuid4
+from ..data_classes import (Context, Errors, PretrainedModel, TrainerState, Training, TrainingOut, TrainingStateData,
+                            TrainingStatus)
+from ..helpers.misc import create_project_folder, delete_all_training_folders, is_valid_uuid4
 from .downloader import TrainingsDownloader
 from .exceptions import CriticalError, NodeNeedsRestartError
 from .io_helpers import ActiveTrainingIO, EnvironmentVars, LastTrainingIO
@@ -66,19 +66,12 @@ class TrainerLogicGeneric(ABC):
         return self._training
     @property
-    def hyperparameter(self) -> Hyperparameter:
-        assert self.training_data is not None, 'Training should have data'
-        assert self.training_data.hyperparameter is not None, 'Training.data should have hyperparameter'
-        return self.training_data.hyperparameter
+    def hyperparameters(self) -> dict:
+        assert self._training is not None, 'Training should have data'
+        return self._training.hyperparameters
     # ---------------------------------------- PROPERTIES ----------------------------------------
-    @property
-    def training_data(self) -> Optional[TrainingData]:
-        if self.training_active and self.training.data:
-            return self.training.data
-        return None
     @property
     def training_context(self) -> Optional[Context]:
         if self.training_active:
@@ -111,12 +104,8 @@ class TrainerLogicGeneric(ABC):
     def hyperparameters_for_state_sync(self) -> Optional[Dict]:
         """Used in sync_confusion_matrix and send_status to provide information about the training configuration.
         """
-        if self._training and self._training.data and self._training.data.hyperparameter:
-            information = {}
-            information['resolution'] = self._training.data.hyperparameter.resolution
-            information['flipRl'] = self._training.data.hyperparameter.flip_rl
-            information['flipUd'] = self._training.data.hyperparameter.flip_ud
-            return information
+        if self._training:
+            return self._training.hyperparameters
         return None
     @property
@@ -173,6 +162,24 @@ class TrainerLogicGeneric(ABC):
         # Initializing a new training object will create the folder structure for the training.
         # The training loop will then run through the states of the training.
+    def generate_status_for_loop(self, trainer_uuid: str, trainer_name: str) -> TrainingStatus:
+        status = TrainingStatus(id=trainer_uuid,
+                                name=trainer_name,
+                                state=self.state,
+                                errors={},
+                                uptime=self.training_uptime,
+                                progress=self.general_progress)
+        status.pretrained_models = self.provided_pretrained_models
+        status.architecture = self.model_architecture
+        if self._training:
+            status.errors = self.errors.errors
+            status.context = self.training_context
+        return status
     async def try_continue_run_if_incomplete(self) -> bool:
         """Tries to continue a training if the last training was not finished.
         """
@@ -188,29 +195,30 @@ class TrainerLogicGeneric(ABC):
         """
         self._training = self.last_training_io.load()
         assert self._training is not None and self._training.training_folder is not None, 'could not restore training folder'
+        logger.info('restored training: \n%s', self._training)
         self._active_training_io = ActiveTrainingIO(
             self._training.training_folder, self.node.loop_communicator, self._training.context)
-    async def begin_training(self, organization: str, project: str, details: Dict) -> None:
+    async def begin_training(self, organization: str, project: str, training_config: Dict) -> None:
         """Called on `begin_training` event from the Learning Loop.
         """
-        self._init_new_training(Context(organization=organization, project=project), details)
+        self._init_new_training(Context(organization=organization, project=project), training_config)
         self._begin_training_task()
     def _begin_training_task(self) -> None:
         # NOTE: Task object is used to potentially cancel the task
         self.training_task = asyncio.get_event_loop().create_task(self._run())
-    def _init_new_training(self, context: Context, details: Dict) -> None:
+    def _init_new_training(self, context: Context, training_config: Dict) -> None:
         """Called on `begin_training` event from the Learning Loop.
-        Note that details needs the entries 'categories' and 'training_number',
+        Note that training_config needs the entries 'categories', 'model_variant' and 'training_number',
         but also the hyperparameter entries.
+        'base_model_uuid' is optional if the training is continued from a previous training.
         """
         project_folder = create_project_folder(context)
         if not self._environment_vars.keep_old_trainings:
             delete_all_training_folders(project_folder)
-        self._training = generate_training(project_folder, context)
-        self._training.set_values_from_data(details)
+        self._training = Training.generate_training(project_folder, context, training_config)
         self._active_training_io = ActiveTrainingIO(
             self._training.training_folder, self.node.loop_communicator, context)
@@ -254,7 +262,7 @@ class TrainerLogicGeneric(ABC):
             elif tstate == TrainerState.TrainModelDownloaded:  # -> TrainingRunning -> TrainingFinished
                 await self._perform_state('run_training', TrainerState.TrainingRunning, TrainerState.TrainingFinished, self._train)
             elif tstate == TrainerState.TrainingFinished:  # -> ConfusionMatrixSyncing -> ConfusionMatrixSynced
-                await self._perform_state('sync_confusion_matrix', TrainerState.ConfusionMatrixSyncing, TrainerState.ConfusionMatrixSynced, self._sync_confusion_matrix)
+                await self._perform_state('sync_confusion_matrix', TrainerState.ConfusionMatrixSyncing, TrainerState.ConfusionMatrixSynced, self._sync_training)
             elif tstate == TrainerState.ConfusionMatrixSynced:  # -> TrainModelUploading -> TrainModelUploaded
                 await self._perform_state('upload_model', TrainerState.TrainModelUploading, TrainerState.TrainModelUploaded, self._upload_model)
             elif tstate == TrainerState.TrainModelUploaded:  # -> Detecting -> Detected
@@ -298,6 +306,7 @@ class TrainerLogicGeneric(ABC):
             logger.error('Node Restart Requested')
             sys.exit(0)
         except Exception as e:
+            print('Error in %s - Exception: %s', state_during, e, flush=True)
             self.errors.set(error_key, str(e))
             logger.exception('Error in %s - Exception: %s', state_during, e)
             self.training.training_state = previous_state
@@ -316,19 +325,25 @@ class TrainerLogicGeneric(ABC):
         self.node.data_exchanger.set_context(self.training.context)
         downloader = TrainingsDownloader(self.node.data_exchanger)
         image_data, skipped_image_count = await downloader.download_training_data(self.training.images_folder)
-        assert self.training.data is not None, 'training.data must be set'
-        self.training.data.image_data = image_data
-        self.training.data.skipped_image_count = skipped_image_count
+        self.training.image_data = image_data
+        self.training.skipped_image_count = skipped_image_count
     async def _download_model(self) -> None:
         """If training is continued, the model is downloaded from the Learning Loop to the training_folder.
         The downloaded model.json file is renamed to base_model.json because a new model.json will be created during training.
         """
-        base_model_uuid = self.training.base_model_uuid_or_name
+        base_model_uuid = self.training.base_model_uuid
+        base_model_uuid_is_none = base_model_uuid is None
+        base_model_uuid_is_valid = is_valid_uuid4(base_model_uuid)
+        if not base_model_uuid_is_none and not base_model_uuid_is_valid:
+            logger.warning(
+                'base model uuid was provided but was not valid (base_model_uuid: %s).\nSkipping download and starting training from scratch.', base_model_uuid)
+            return
-        # TODO this checks if we continue a training -> make more explicit
-        if not base_model_uuid or not is_valid_uuid4(base_model_uuid):
-            logger.info('skipping model download. No base model provided (in form of uuid): %s', base_model_uuid)
+        if base_model_uuid_is_none:
+            logger.info('No base model provided (base_model_uuid: %s).\nStarting training from scratch.', base_model_uuid)
             return
         logger.info('loading model from Learning Loop')
@@ -337,19 +352,21 @@ class TrainerLogicGeneric(ABC):
         shutil.move(f'{self.training.training_folder}/model.json',
                     f'{self.training.training_folder}/base_model.json')
-    async def _sync_confusion_matrix(self) -> None:
-        """Syncronizes the confusion matrix with the Learning Loop via the update_training endpoint.
+    async def _sync_training(self) -> None:
+        """Syncronizes the training with the Learning Loop via the update_training endpoint.
         NOTE: This stage sets the errors explicitly because it may be used inside the training stage.
         """
         error_key = 'sync_confusion_matrix'
         try:
             new_best_model = self._get_new_best_training_state()
-            if new_best_model and self.training.data:
+            if new_best_model:
                 new_training = TrainingOut(trainer_id=self.node.uuid,
+                                           trainer_name=self.node.name,
                                            confusion_matrix=new_best_model.confusion_matrix,
-                                           train_image_count=self.training.data.train_image_count(),
-                                           test_image_count=self.training.data.test_image_count(),
-                                           hyperparameters=self.hyperparameters_for_state_sync)
+                                           train_image_count=self.training.train_image_count(),
+                                           test_image_count=self.training.test_image_count(),
+                                           hyperparameters=self.hyperparameters_for_state_sync,
+                                           best_epoch=new_best_model.epoch)
                 await asyncio.sleep(0.1)  # NOTE needed for tests.
                 result = await self.node.sio_client.call('update_training', (
@@ -411,7 +428,7 @@ class TrainerLogicGeneric(ABC):
     def _dump_categories_to_json(self) -> str:
         """Dumps the categories to a json file and returns the path to the file.
         """
-        content = {'categories': [asdict(c) for c in self.training_data.categories], } if self.training_data else None
+        content = {'categories': [asdict(c) for c in self._training.categories], } if self._training else None
         json_path = '/tmp/model.json'
         with open(json_path, 'w') as f:
             json.dump(content, f)
@@ -481,12 +498,13 @@ class TrainerLogicGeneric(ABC):
     @abstractmethod
     def _get_new_best_training_state(self) -> Optional[TrainingStateData]:
-        """Is called frequently by `_sync_confusion_matrix` to check if a new "best" model is availabe.
+        """Is called frequently by `_sync_training` during training to check if a new "best" model is availabe.
         Returns None if no new model could be found. Otherwise TrainingStateData(confusion_matrix, meta_information).
         `confusion_matrix` contains a dict of all classes:
             - The classes must be identified by their uuid, not their name.
             - For each class a dict with tp, fp, fn is provided (true positives, false positives, false negatives).
         `meta_information` can hold any data which is helpful for self._on_metrics_published to store weight file etc for later upload via self.get_model_files
+        `epoch` is the epoch number of the best model.
         """
         raise NotImplementedError

learning_loop_node/trainer/trainer_node.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import Dict, Optional
 from fastapi.encoders import jsonable_encoder
 from socketio import AsyncClient, exceptions
-from ..data_classes import TrainingStatus
 from ..node import Node
 from .io_helpers import LastTrainingIO
 from .rest import backdoor_controls
@@ -23,14 +22,15 @@ class TrainerNode(Node):
         self.last_training_io = LastTrainingIO(self.uuid)
         self.trainer_logic._last_training_io = self.last_training_io
-        self.first_idle_time: float | None = None
+        self._first_idle_time: float | None = None
         if os.environ.get('TRAINER_IDLE_TIMEOUT_SEC', 0.0):
-            self.idle_timeout = float(os.environ.get('TRAINER_IDLE_TIMEOUT_SEC', 0.0))
+            self._idle_timeout = float(os.environ.get('TRAINER_IDLE_TIMEOUT_SEC', 0.0))
         else:
-            self.idle_timeout = 0.0
-        if self.idle_timeout:
+            self._idle_timeout = 0.0
+        if self._idle_timeout:
             self.log.info(
-                f'Trainer started with an idle_timeout of {self.idle_timeout} seconds. Note that shutdown does not work if docker container has the restart policy set to always')
+                'Trainer started with an idle_timeout of %s seconds. Note that shutdown does not work if docker container has the restart policy set to always',
+                self._idle_timeout)
         if use_backdoor_controls or os.environ.get('USE_BACKDOOR_CONTROLS', '0').lower() in ('1', 'true'):
             self.include_router(backdoor_controls.router, tags=["controls"])
@@ -53,8 +53,8 @@ class TrainerNode(Node):
         except exceptions.TimeoutError:
             self.log.warning('timeout when sending status to learning loop, reconnecting sio_client')
             await self.sio_client.disconnect()  # NOTE: reconnect happens in node._on_repeat
-        except Exception as e:
-            self.log.exception(f'could not send status state: {e}')
+        except Exception:
+            self.log.exception('could not send status. Exception:')
     # ---------------------------------------------- NODE METHODS ---------------------------------------------------
@@ -68,7 +68,7 @@ class TrainerNode(Node):
         @sio_client.event
         async def stop_training():
-            self.log.info(f'stop_training received. Current state : {self.status.state}')
+            self.log.info('stop_training received. Current state : %s', self.trainer_logic.state)
             try:
                 await self.trainer_logic.stop()
             except Exception:
@@ -80,24 +80,7 @@ class TrainerNode(Node):
             self.log.debug('cannot send status - not connected to the Learning Loop')
             return
-        status = TrainingStatus(id=self.uuid,
-                                name=self.name,
-                                state=self.trainer_logic.state,
-                                errors={},
-                                uptime=self.trainer_logic.training_uptime,
-                                progress=self.trainer_logic.general_progress)
-        status.pretrained_models = self.trainer_logic.provided_pretrained_models
-        status.architecture = self.trainer_logic.model_architecture
-        if data := self.trainer_logic.training_data:
-            status.train_image_count = data.train_image_count()
-            status.test_image_count = data.test_image_count()
-            status.skipped_image_count = data.skipped_image_count
-            status.hyperparameters = self.trainer_logic.hyperparameters_for_state_sync
-            status.errors = self.trainer_logic.errors.errors
-            status.context = self.trainer_logic.training_context
+        status = self.trainer_logic.generate_status_for_loop(self.uuid, self.name)
         self.log.debug('sending status: %s', status.short_str())
         result = await self.sio_client.call('update_trainer', jsonable_encoder(asdict(status)), timeout=30)
         if isinstance(result, Dict) and not result['success']:
@@ -105,17 +88,17 @@ class TrainerNode(Node):
             self.log.error('Error when sending status update: Response from loop was:\n %s', result)
     def check_idle_timeout(self):
-        if not self.idle_timeout:
+        if not self._idle_timeout:
             return
         if self.trainer_logic.state == 'idle':
-            if self.first_idle_time is None:
-                self.first_idle_time = time.time()
-            idle_time = time.time() - self.first_idle_time
-            if idle_time > self.idle_timeout:
+            if self._first_idle_time is None:
+                self._first_idle_time = time.time()
+            idle_time = time.time() - self._first_idle_time
+            if idle_time > self._idle_timeout:
                 self.log.info('Trainer has been idle for %.2f s (with timeout %.2f s). Shutting down.',
-                              idle_time, self.idle_timeout)
+                              idle_time, self._idle_timeout)
                 sys.exit(0)
-            self.log.debug('idle time: %.2f s / %.2f s', idle_time, self.idle_timeout)
+            self.log.debug('idle time: %.2f s / %.2f s', idle_time, self._idle_timeout)
         else:
-            self.first_idle_time = None
+            self._first_idle_time = None

{learning_loop_node-0.11.0.dist-info → learning_loop_node-0.12.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: learning-loop-node
-Version: 0.11.0
+Version: 0.12.0
 Summary: Python Library for Nodes which connect to the Zauberzeug Learning Loop
 Home-page: https://github.com/zauberzeug/learning_loop_node
 License: MIT

learning-loop-node 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

Potentially problematic release.

learning-loop-node 0.11.0py3-none-any.whl → 0.12.0py3-none-any.whl