learning-loop-node 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of learning-loop-node might be problematic. Click here for more details.
- learning_loop_node/data_classes/__init__.py +3 -3
- learning_loop_node/data_classes/general.py +1 -1
- learning_loop_node/data_classes/training.py +62 -67
- learning_loop_node/data_exchanger.py +11 -9
- learning_loop_node/helpers/environment_reader.py +2 -2
- learning_loop_node/helpers/log_conf.py +4 -1
- learning_loop_node/helpers/misc.py +7 -17
- learning_loop_node/loop_communication.py +4 -8
- learning_loop_node/node.py +10 -4
- learning_loop_node/rest.py +4 -2
- learning_loop_node/tests/detector/conftest.py +17 -21
- learning_loop_node/tests/trainer/conftest.py +18 -12
- learning_loop_node/tests/trainer/states/test_state_download_train_model.py +7 -3
- learning_loop_node/tests/trainer/states/test_state_prepare.py +0 -1
- learning_loop_node/tests/trainer/states/test_state_sync_confusion_matrix.py +2 -1
- learning_loop_node/tests/trainer/states/test_state_train.py +0 -2
- learning_loop_node/tests/trainer/test_trainer_states.py +6 -1
- learning_loop_node/tests/trainer/testing_trainer_logic.py +3 -3
- learning_loop_node/trainer/downloader.py +1 -1
- learning_loop_node/trainer/executor.py +2 -2
- learning_loop_node/trainer/rest/backdoor_controls.py +6 -6
- learning_loop_node/trainer/trainer_logic.py +7 -3
- learning_loop_node/trainer/trainer_logic_generic.py +59 -41
- learning_loop_node/trainer/trainer_node.py +18 -35
- {learning_loop_node-0.11.1.dist-info → learning_loop_node-0.12.0.dist-info}/METADATA +1 -1
- {learning_loop_node-0.11.1.dist-info → learning_loop_node-0.12.0.dist-info}/RECORD +27 -27
- {learning_loop_node-0.11.1.dist-info → learning_loop_node-0.12.0.dist-info}/WHEEL +0 -0
|
@@ -5,8 +5,8 @@ from .general import (AnnotationNodeStatus, Category, CategoryType, Context, Det
|
|
|
5
5
|
ModelInformation, NodeState, NodeStatus)
|
|
6
6
|
from .image_metadata import ImageMetadata
|
|
7
7
|
from .socket_response import SocketResponse
|
|
8
|
-
from .training import (Errors,
|
|
9
|
-
|
|
8
|
+
from .training import (Errors, PretrainedModel, TrainerState, Training, TrainingError, TrainingOut, TrainingStateData,
|
|
9
|
+
TrainingStatus)
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
12
12
|
'AnnotationData', 'AnnotationEventType', 'SegmentationAnnotation', 'ToolOutput', 'UserInput',
|
|
@@ -15,6 +15,6 @@ __all__ = [
|
|
|
15
15
|
'AnnotationNodeStatus', 'Category', 'CategoryType', 'Context', 'DetectionStatus', 'ErrorConfiguration',
|
|
16
16
|
'ModelInformation', 'NodeState', 'NodeStatus',
|
|
17
17
|
'SocketResponse',
|
|
18
|
-
'Errors', '
|
|
18
|
+
'Errors', 'PretrainedModel', 'TrainerState', 'Training',
|
|
19
19
|
'TrainingError', 'TrainingOut', 'TrainingStateData', 'TrainingStatus',
|
|
20
20
|
]
|
|
@@ -75,7 +75,7 @@ class ModelInformation():
|
|
|
75
75
|
"""
|
|
76
76
|
model_info_file_path = f'{model_root_path}/model.json'
|
|
77
77
|
if not os.path.exists(model_info_file_path):
|
|
78
|
-
logging.warning(
|
|
78
|
+
logging.warning('could not find model information file %s', model_info_file_path)
|
|
79
79
|
return None
|
|
80
80
|
with open(model_info_file_path, 'r') as f:
|
|
81
81
|
try:
|
|
@@ -4,46 +4,16 @@ import time
|
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Dict, List, Optional
|
|
7
|
+
from typing import Any, Dict, List, Optional
|
|
8
|
+
from uuid import uuid4
|
|
8
9
|
|
|
10
|
+
from ..helpers.misc import create_image_folder, create_training_folder
|
|
9
11
|
# pylint: disable=no-name-in-module
|
|
10
12
|
from .general import Category, Context
|
|
11
13
|
|
|
12
14
|
KWONLY_SLOTS = {'kw_only': True, 'slots': True} if sys.version_info >= (3, 10) else {}
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
@dataclass(**KWONLY_SLOTS)
|
|
16
|
-
class Hyperparameter():
|
|
17
|
-
resolution: int
|
|
18
|
-
flip_rl: bool
|
|
19
|
-
flip_ud: bool
|
|
20
|
-
|
|
21
|
-
@staticmethod
|
|
22
|
-
def from_data(data: Dict):
|
|
23
|
-
return Hyperparameter(
|
|
24
|
-
resolution=data['resolution'],
|
|
25
|
-
flip_rl=data.get('flip_rl', False),
|
|
26
|
-
flip_ud=data.get('flip_ud', False)
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
@dataclass(**KWONLY_SLOTS)
|
|
31
|
-
class TrainingData():
|
|
32
|
-
image_data: List[Dict] = field(default_factory=list)
|
|
33
|
-
skipped_image_count: Optional[int] = 0
|
|
34
|
-
categories: List[Category] = field(default_factory=list)
|
|
35
|
-
hyperparameter: Optional[Hyperparameter] = None
|
|
36
|
-
|
|
37
|
-
def image_ids(self):
|
|
38
|
-
return [image['id'] for image in self.image_data]
|
|
39
|
-
|
|
40
|
-
def train_image_count(self):
|
|
41
|
-
return len([image for image in self.image_data if image['set'] == 'train'])
|
|
42
|
-
|
|
43
|
-
def test_image_count(self):
|
|
44
|
-
return len([image for image in self.image_data if image['set'] == 'test'])
|
|
45
|
-
|
|
46
|
-
|
|
47
17
|
@dataclass(**KWONLY_SLOTS)
|
|
48
18
|
class PretrainedModel():
|
|
49
19
|
name: str
|
|
@@ -75,26 +45,21 @@ class TrainerState(str, Enum):
|
|
|
75
45
|
class TrainingStatus():
|
|
76
46
|
id: str # NOTE this must not be changed, but tests wont detect a change -> update tests!
|
|
77
47
|
name: str
|
|
48
|
+
|
|
78
49
|
state: Optional[str]
|
|
79
|
-
errors: Optional[Dict]
|
|
80
50
|
uptime: Optional[float]
|
|
51
|
+
errors: Optional[Dict[str, Any]]
|
|
81
52
|
progress: Optional[float]
|
|
82
53
|
|
|
83
|
-
train_image_count: Optional[int] = None
|
|
84
|
-
test_image_count: Optional[int] = None
|
|
85
|
-
skipped_image_count: Optional[int] = None
|
|
86
54
|
pretrained_models: List[PretrainedModel] = field(default_factory=list)
|
|
87
|
-
hyperparameters: Optional[Dict] = None
|
|
88
55
|
architecture: Optional[str] = None
|
|
89
56
|
context: Optional[Context] = None
|
|
90
57
|
|
|
91
58
|
def short_str(self) -> str:
|
|
92
59
|
prgr = f'{self.progress * 100:.0f}%' if self.progress else ''
|
|
93
|
-
trtesk = f'{self.train_image_count}/{self.test_image_count}/{self.skipped_image_count}' if self.train_image_count else 'n.a.'
|
|
94
60
|
cntxt = f'{self.context.organization}/{self.context.project}' if self.context else ''
|
|
95
|
-
hyps = f'({self.hyperparameters})' if self.hyperparameters else ''
|
|
96
61
|
arch = f'.{self.architecture} - ' if self.architecture else ''
|
|
97
|
-
return f'[{str(self.state).rsplit(".", maxsplit=1)[-1]} {prgr}. {self.name}({self.id}).
|
|
62
|
+
return f'[{str(self.state).rsplit(".", maxsplit=1)[-1]} {prgr}. {self.name}({self.id}). {cntxt}{arch}]'
|
|
98
63
|
|
|
99
64
|
|
|
100
65
|
@dataclass(**KWONLY_SLOTS)
|
|
@@ -105,53 +70,83 @@ class Training():
|
|
|
105
70
|
project_folder: str # f'{GLOBALS.data_folder}/{context.organization}/{context.project}'
|
|
106
71
|
images_folder: str # f'{project_folder}/images'
|
|
107
72
|
training_folder: str # f'{project_folder}/trainings/{trainings_id}'
|
|
73
|
+
|
|
74
|
+
categories: List[Category]
|
|
75
|
+
hyperparameters: dict
|
|
76
|
+
|
|
77
|
+
training_number: int
|
|
78
|
+
training_state: str
|
|
79
|
+
model_variant: str # from `provided_pretrained_models->name`
|
|
80
|
+
|
|
108
81
|
start_time: float = field(default_factory=time.time)
|
|
109
82
|
|
|
110
|
-
# model uuid to
|
|
111
|
-
base_model_uuid_or_name: Optional[str] = None
|
|
83
|
+
base_model_uuid: Optional[str] = None # model uuid to continue training (is loaded from loop)
|
|
112
84
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
model_uuid_for_detecting: Optional[str] = None
|
|
117
|
-
hyperparameters: Optional[Dict] = None
|
|
85
|
+
# NOTE: these are set later after the model has been uploaded
|
|
86
|
+
image_data: Optional[List[dict]] = None
|
|
87
|
+
skipped_image_count: Optional[int] = None
|
|
88
|
+
model_uuid_for_detecting: Optional[str] = None # Model uuid to load from the loop after training and upload
|
|
118
89
|
|
|
119
90
|
@property
|
|
120
91
|
def training_folder_path(self) -> Path:
|
|
121
92
|
return Path(self.training_folder)
|
|
122
93
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
94
|
+
@classmethod
|
|
95
|
+
def generate_training(cls, project_folder: str, context: Context, data: Dict[str, Any]) -> 'Training':
|
|
96
|
+
if 'hyperparameters' not in data or not isinstance(data['hyperparameters'], dict):
|
|
97
|
+
raise ValueError('hyperparameters missing or not a dict')
|
|
98
|
+
if 'categories' not in data or not isinstance(data['categories'], list):
|
|
99
|
+
raise ValueError('categories missing or not a list')
|
|
100
|
+
if 'training_number' not in data or not isinstance(data['training_number'], int):
|
|
101
|
+
raise ValueError('training_number missing or not an int')
|
|
102
|
+
if 'model_variant' not in data or not isinstance(data['model_variant'], str):
|
|
103
|
+
raise ValueError('model_variant missing or not a str')
|
|
104
|
+
|
|
105
|
+
training_uuid = str(uuid4())
|
|
106
|
+
|
|
107
|
+
return Training(
|
|
108
|
+
id=training_uuid,
|
|
109
|
+
context=context,
|
|
110
|
+
project_folder=project_folder,
|
|
111
|
+
images_folder=create_image_folder(project_folder),
|
|
112
|
+
training_folder=create_training_folder(project_folder, training_uuid),
|
|
113
|
+
categories=Category.from_list(data['categories']),
|
|
114
|
+
hyperparameters=data['hyperparameters'],
|
|
115
|
+
training_number=data['training_number'],
|
|
116
|
+
base_model_uuid=data.get('base_model_uuid', None),
|
|
117
|
+
model_variant=data['model_variant'],
|
|
118
|
+
training_state=TrainerState.Initialized.value
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def image_ids(self) -> List[str]:
|
|
122
|
+
assert self.image_data is not None, 'Image data not set'
|
|
123
|
+
return [image['id'] for image in self.image_data]
|
|
124
|
+
|
|
125
|
+
def train_image_count(self) -> int:
|
|
126
|
+
assert self.image_data is not None, 'Image data not set'
|
|
127
|
+
return len([image for image in self.image_data if image['set'] == 'train'])
|
|
128
|
+
|
|
129
|
+
def test_image_count(self) -> int:
|
|
130
|
+
assert self.image_data is not None, 'Image data not set'
|
|
131
|
+
return len([image for image in self.image_data if image['set'] == 'test'])
|
|
129
132
|
|
|
130
133
|
|
|
131
134
|
@dataclass(**KWONLY_SLOTS)
|
|
132
135
|
class TrainingOut():
|
|
136
|
+
trainer_id: str
|
|
137
|
+
trainer_name: Optional[str] = None
|
|
133
138
|
confusion_matrix: Optional[Dict] = None # This is actually just class-wise metrics
|
|
134
139
|
train_image_count: Optional[int] = None
|
|
135
140
|
test_image_count: Optional[int] = None
|
|
136
|
-
|
|
137
|
-
|
|
141
|
+
hyperparameters: Optional[Dict[str, Any]] = None
|
|
142
|
+
best_epoch: Optional[int] = None
|
|
138
143
|
|
|
139
144
|
|
|
140
145
|
@dataclass(**KWONLY_SLOTS)
|
|
141
146
|
class TrainingStateData():
|
|
142
147
|
confusion_matrix: Dict = field(default_factory=dict)
|
|
143
148
|
meta_information: Dict = field(default_factory=dict)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
@dataclass(**KWONLY_SLOTS)
|
|
147
|
-
class Model():
|
|
148
|
-
uuid: str
|
|
149
|
-
confusion_matrix: Optional[Dict] = None
|
|
150
|
-
parent_id: Optional[str] = None
|
|
151
|
-
train_image_count: Optional[int] = None
|
|
152
|
-
test_image_count: Optional[int] = None
|
|
153
|
-
trainer_id: Optional[str] = None
|
|
154
|
-
hyperparameters: Optional[str] = None
|
|
149
|
+
epoch: Optional[int] = None
|
|
155
150
|
|
|
156
151
|
|
|
157
152
|
class Errors():
|
|
@@ -62,7 +62,7 @@ class DataExchanger():
|
|
|
62
62
|
|
|
63
63
|
async def fetch_image_uuids(self, query_params: Optional[str] = '') -> List[str]:
|
|
64
64
|
"""Fetch image uuids from the learning loop data endpoint."""
|
|
65
|
-
logging.info(
|
|
65
|
+
logging.info('Fetching image uuids for %s/%s..', self.context.organization, self.context.project)
|
|
66
66
|
|
|
67
67
|
response = await self.loop_communicator.get(f'/{self.context.organization}/projects/{self.context.project}/data?{query_params}')
|
|
68
68
|
assert response.status_code == 200, response
|
|
@@ -70,7 +70,7 @@ class DataExchanger():
|
|
|
70
70
|
|
|
71
71
|
async def download_images_data(self, image_uuids: List[str], chunk_size: int = 100) -> List[Dict]:
|
|
72
72
|
"""Download image annotations, tags, set and other information for the given image uuids."""
|
|
73
|
-
logging.info(
|
|
73
|
+
logging.info('Fetching annotations, tags, sets, etc. for %s images..', len(image_uuids))
|
|
74
74
|
|
|
75
75
|
num_image_ids = len(image_uuids)
|
|
76
76
|
if num_image_ids == 0:
|
|
@@ -84,7 +84,7 @@ class DataExchanger():
|
|
|
84
84
|
chunk_ids = image_uuids[i:i+chunk_size]
|
|
85
85
|
response = await self.loop_communicator.get(f'/{self.context.organization}/projects/{self.context.project}/images?ids={",".join(chunk_ids)}')
|
|
86
86
|
if response.status_code != 200:
|
|
87
|
-
logging.error(
|
|
87
|
+
logging.error('Error %s during downloading image data. Continue with next batch..', response.status_code)
|
|
88
88
|
continue
|
|
89
89
|
images_data += response.json()['images']
|
|
90
90
|
|
|
@@ -92,7 +92,7 @@ class DataExchanger():
|
|
|
92
92
|
|
|
93
93
|
async def download_images(self, image_uuids: List[str], image_folder: str, chunk_size: int = 10) -> None:
|
|
94
94
|
"""Downloads images (actual image data). Will skip existing images"""
|
|
95
|
-
logging.info(
|
|
95
|
+
logging.info('Downloading %s images (actual image data).. skipping existing images.', len(image_uuids))
|
|
96
96
|
if not image_uuids:
|
|
97
97
|
return
|
|
98
98
|
|
|
@@ -106,7 +106,7 @@ class DataExchanger():
|
|
|
106
106
|
self.progress = 1.0
|
|
107
107
|
return
|
|
108
108
|
|
|
109
|
-
logging.info(
|
|
109
|
+
logging.info('Downloading %s new images to %s..', num_new_image_ids, image_folder)
|
|
110
110
|
os.makedirs(image_folder, exist_ok=True)
|
|
111
111
|
|
|
112
112
|
progress_factor = 0.5 / num_new_image_ids # second 50% of progress is for downloading images
|
|
@@ -128,7 +128,7 @@ class DataExchanger():
|
|
|
128
128
|
await asyncio.sleep(1)
|
|
129
129
|
response = await self.loop_communicator.get(path)
|
|
130
130
|
if response.status_code != HTTPStatus.OK:
|
|
131
|
-
logging.error(
|
|
131
|
+
logging.error('bad status code %s for %s. Details: %s', response.status_code, path, response.text)
|
|
132
132
|
return
|
|
133
133
|
filename = f'{image_folder}/{image_id}.jpg'
|
|
134
134
|
async with aiofiles.open(filename, 'wb') as f:
|
|
@@ -171,7 +171,7 @@ class DataExchanger():
|
|
|
171
171
|
created_files.append(new_file)
|
|
172
172
|
|
|
173
173
|
shutil.rmtree(tmp_path, ignore_errors=True)
|
|
174
|
-
logging.info(
|
|
174
|
+
logging.info('Downloaded model %s(%s) to %s.', model_uuid, model_format, target_folder)
|
|
175
175
|
return created_files
|
|
176
176
|
|
|
177
177
|
async def upload_model_get_uuid(self, context: Context, files: List[str], training_number: Optional[int], mformat: str) -> str:
|
|
@@ -182,10 +182,12 @@ class DataExchanger():
|
|
|
182
182
|
"""
|
|
183
183
|
response = await self.loop_communicator.put(f'/{context.organization}/projects/{context.project}/trainings/{training_number}/models/latest/{mformat}/file', files=files)
|
|
184
184
|
if response.status_code != 200:
|
|
185
|
-
logging.error(
|
|
185
|
+
logging.error('Could not upload model for training %s, format %s: %s',
|
|
186
|
+
training_number, mformat, response.text)
|
|
186
187
|
raise CriticalError(
|
|
187
188
|
f'Could not upload model for training {training_number}, format {mformat}: {response.text}')
|
|
188
189
|
|
|
189
190
|
uploaded_model = response.json()
|
|
190
|
-
logging.info(
|
|
191
|
+
logging.info('Uploaded model for training %s, format %s. Response is: %s',
|
|
192
|
+
training_number, mformat, uploaded_model)
|
|
191
193
|
return uploaded_model['id']
|
|
@@ -11,14 +11,14 @@ def read_from_env(possible_names: List[str], ignore_errors: bool = True) -> Opti
|
|
|
11
11
|
# Possible error: no values are set
|
|
12
12
|
if not values:
|
|
13
13
|
if ignore_errors:
|
|
14
|
-
logging.warning(
|
|
14
|
+
logging.warning('no environment variable set for %s', possible_names)
|
|
15
15
|
return None
|
|
16
16
|
raise ValueError(f'no environment variable set for {possible_names}')
|
|
17
17
|
|
|
18
18
|
# Possible error: multiple values are not None and not equal
|
|
19
19
|
if len(values) > 1 and len(set(values)) > 1:
|
|
20
20
|
if ignore_errors:
|
|
21
|
-
logging.warning(
|
|
21
|
+
logging.warning('different environment variables set for %s: %s', possible_names, values)
|
|
22
22
|
return None
|
|
23
23
|
raise ValueError(f'different environment variables set for {possible_names}: {values}')
|
|
24
24
|
|
|
@@ -2,7 +2,7 @@ import logging.config
|
|
|
2
2
|
|
|
3
3
|
LOGGING_CONF = {
|
|
4
4
|
'version': 1,
|
|
5
|
-
'disable_existing_loggers':
|
|
5
|
+
'disable_existing_loggers': False, # to make sure this config is used
|
|
6
6
|
'formatters': {
|
|
7
7
|
'default': {
|
|
8
8
|
'format': '%(asctime)s,%(msecs)01d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
|
|
@@ -34,3 +34,6 @@ LOGGING_CONF = {
|
|
|
34
34
|
|
|
35
35
|
def init():
|
|
36
36
|
logging.config.dictConfig(LOGGING_CONF)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
init()
|
|
@@ -14,7 +14,8 @@ from uuid import UUID, uuid4
|
|
|
14
14
|
|
|
15
15
|
import pynvml
|
|
16
16
|
|
|
17
|
-
from ..data_classes import Context
|
|
17
|
+
from ..data_classes.general import Context
|
|
18
|
+
from ..data_classes.socket_response import SocketResponse
|
|
18
19
|
from ..globals import GLOBALS
|
|
19
20
|
|
|
20
21
|
T = TypeVar('T')
|
|
@@ -81,11 +82,11 @@ async def delete_corrupt_images(image_folder: str, check_jpeg: bool = False) ->
|
|
|
81
82
|
n_deleted = 0
|
|
82
83
|
for image in glob(f'{image_folder}/*.jpg'):
|
|
83
84
|
if not await is_valid_image(image, check_jpeg):
|
|
84
|
-
logging.debug(
|
|
85
|
+
logging.debug(' deleting image %s', image)
|
|
85
86
|
os.remove(image)
|
|
86
87
|
n_deleted += 1
|
|
87
88
|
|
|
88
|
-
logging.info(
|
|
89
|
+
logging.info('deleted %s images', n_deleted)
|
|
89
90
|
|
|
90
91
|
|
|
91
92
|
def create_resource_paths(organization_name: str, project_name: str, image_ids: List[str]) -> Tuple[List[str], List[str]]:
|
|
@@ -144,7 +145,7 @@ def ensure_socket_response(func):
|
|
|
144
145
|
raise Exception(
|
|
145
146
|
f"Return type for sio must be str, bool, SocketResponse or None', but was {type(value)}'")
|
|
146
147
|
except Exception as e:
|
|
147
|
-
logging.exception(
|
|
148
|
+
logging.exception('An error occured for %s', args[0])
|
|
148
149
|
|
|
149
150
|
return asdict(SocketResponse.for_failure(str(e)))
|
|
150
151
|
|
|
@@ -183,26 +184,15 @@ def activate_asyncio_warnings() -> None:
|
|
|
183
184
|
|
|
184
185
|
|
|
185
186
|
def images_for_ids(image_ids, image_folder) -> List[str]:
|
|
186
|
-
logging.info(
|
|
187
|
+
logging.info('### Going to get images for %s images ids', len(image_ids))
|
|
187
188
|
start = perf_counter()
|
|
188
189
|
images = [img for img in glob(f'{image_folder}/**/*.*', recursive=True)
|
|
189
190
|
if os.path.splitext(os.path.basename(img))[0] in image_ids]
|
|
190
191
|
end = perf_counter()
|
|
191
|
-
logging.info(
|
|
192
|
+
logging.info('found %s images for %s image ids, which took %.2f seconds', len(images), len(image_ids), end-start)
|
|
192
193
|
return images
|
|
193
194
|
|
|
194
195
|
|
|
195
|
-
def generate_training(project_folder: str, context: Context) -> Training:
|
|
196
|
-
training_uuid = str(uuid4())
|
|
197
|
-
return Training(
|
|
198
|
-
id=training_uuid,
|
|
199
|
-
context=context,
|
|
200
|
-
project_folder=project_folder,
|
|
201
|
-
images_folder=create_image_folder(project_folder),
|
|
202
|
-
training_folder=create_training_folder(project_folder, training_uuid)
|
|
203
|
-
)
|
|
204
|
-
|
|
205
|
-
|
|
206
196
|
def delete_all_training_folders(project_folder: str):
|
|
207
197
|
if not os.path.exists(f'{project_folder}/trainings'):
|
|
208
198
|
return
|
|
@@ -94,8 +94,7 @@ class LoopCommunicator():
|
|
|
94
94
|
if requires_login:
|
|
95
95
|
await self.ensure_login()
|
|
96
96
|
return await self.retry_on_401(self._get, path, api_prefix)
|
|
97
|
-
|
|
98
|
-
return await self._get(path, api_prefix)
|
|
97
|
+
return await self._get(path, api_prefix)
|
|
99
98
|
|
|
100
99
|
async def _get(self, path: str, api_prefix: str) -> httpx.Response:
|
|
101
100
|
return await self.async_client.get(api_prefix+path)
|
|
@@ -104,8 +103,7 @@ class LoopCommunicator():
|
|
|
104
103
|
if requires_login:
|
|
105
104
|
await self.ensure_login()
|
|
106
105
|
return await self.retry_on_401(self._put, path, files, api_prefix, **kwargs)
|
|
107
|
-
|
|
108
|
-
return await self._put(path, files, api_prefix, **kwargs)
|
|
106
|
+
return await self._put(path, files, api_prefix, **kwargs)
|
|
109
107
|
|
|
110
108
|
async def _put(self, path: str, files: Optional[List[str]], api_prefix: str, **kwargs) -> httpx.Response:
|
|
111
109
|
if files is None:
|
|
@@ -133,8 +131,7 @@ class LoopCommunicator():
|
|
|
133
131
|
if requires_login:
|
|
134
132
|
await self.ensure_login()
|
|
135
133
|
return await self.retry_on_401(self._post, path, api_prefix, **kwargs)
|
|
136
|
-
|
|
137
|
-
return await self._post(path, api_prefix, **kwargs)
|
|
134
|
+
return await self._post(path, api_prefix, **kwargs)
|
|
138
135
|
|
|
139
136
|
async def _post(self, path, api_prefix='/api', **kwargs) -> httpx.Response:
|
|
140
137
|
return await self.async_client.post(api_prefix+path, **kwargs)
|
|
@@ -143,8 +140,7 @@ class LoopCommunicator():
|
|
|
143
140
|
if requires_login:
|
|
144
141
|
await self.ensure_login()
|
|
145
142
|
return await self.retry_on_401(self._delete, path, api_prefix, **kwargs)
|
|
146
|
-
|
|
147
|
-
return await self._delete(path, api_prefix, **kwargs)
|
|
143
|
+
return await self._delete(path, api_prefix, **kwargs)
|
|
148
144
|
|
|
149
145
|
async def _delete(self, path, api_prefix, **kwargs) -> httpx.Response:
|
|
150
146
|
return await self.async_client.delete(api_prefix+path, **kwargs)
|
learning_loop_node/node.py
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
|
|
2
|
+
# NOTE: log_conf is imported first to initialize the loggers before they are created
|
|
3
|
+
from .helpers import log_conf # pylint: disable=unused-import
|
|
4
|
+
|
|
5
|
+
# isort: split
|
|
6
|
+
# pylint: disable=wrong-import-order,ungrouped-imports
|
|
7
|
+
|
|
1
8
|
import asyncio
|
|
2
9
|
import logging
|
|
3
10
|
import ssl
|
|
@@ -14,7 +21,6 @@ from socketio import AsyncClient
|
|
|
14
21
|
|
|
15
22
|
from .data_classes import NodeStatus
|
|
16
23
|
from .data_exchanger import DataExchanger
|
|
17
|
-
from .helpers import log_conf
|
|
18
24
|
from .helpers.misc import ensure_socket_response, read_or_create_uuid
|
|
19
25
|
from .loop_communication import LoopCommunicator
|
|
20
26
|
from .rest import router
|
|
@@ -39,7 +45,6 @@ class Node(FastAPI):
|
|
|
39
45
|
"""
|
|
40
46
|
|
|
41
47
|
super().__init__(lifespan=self.lifespan)
|
|
42
|
-
log_conf.init()
|
|
43
48
|
|
|
44
49
|
self.name = name
|
|
45
50
|
self.uuid = uuid or read_or_create_uuid(self.name)
|
|
@@ -98,13 +103,14 @@ class Node(FastAPI):
|
|
|
98
103
|
pass
|
|
99
104
|
|
|
100
105
|
async def _on_startup(self):
|
|
101
|
-
self.log.info('received "startup" lifecycle-event')
|
|
106
|
+
self.log.info('received "startup" lifecycle-event - connecting to loop')
|
|
102
107
|
try:
|
|
103
108
|
await self.reconnect_to_loop()
|
|
104
109
|
except Exception:
|
|
105
110
|
self.log.warning('Could not establish sio connection to loop during startup')
|
|
106
|
-
self.log.info('
|
|
111
|
+
self.log.info('successfully connected to loop - calling on_startup')
|
|
107
112
|
await self.on_startup()
|
|
113
|
+
self.log.info('successfully finished on_startup')
|
|
108
114
|
|
|
109
115
|
async def _on_shutdown(self):
|
|
110
116
|
self.log.info('received "shutdown" lifecycle-event')
|
learning_loop_node/rest.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from logging import Logger
|
|
2
3
|
from typing import TYPE_CHECKING
|
|
3
4
|
|
|
4
5
|
from fastapi import APIRouter, HTTPException, Request
|
|
@@ -20,14 +21,15 @@ async def _debug_logging(request: Request) -> str:
|
|
|
20
21
|
'''
|
|
21
22
|
state = str(await request.body(), 'utf-8')
|
|
22
23
|
node: 'Node' = request.app
|
|
24
|
+
log: Logger = node.log # type: ignore
|
|
23
25
|
|
|
24
26
|
if state == 'off':
|
|
25
27
|
logger.info('turning debug logging off')
|
|
26
|
-
|
|
28
|
+
log.setLevel('INFO')
|
|
27
29
|
return 'off'
|
|
28
30
|
if state == 'on':
|
|
29
31
|
logger.info('turning debug logging on')
|
|
30
|
-
|
|
32
|
+
log.setLevel('DEBUG')
|
|
31
33
|
return 'on'
|
|
32
34
|
raise HTTPException(status_code=400, detail='Invalid state')
|
|
33
35
|
|
|
@@ -118,34 +118,30 @@ def get_outbox_files(outbox: Outbox):
|
|
|
118
118
|
return [file for file in files if os.path.isfile(file)]
|
|
119
119
|
|
|
120
120
|
|
|
121
|
-
|
|
122
|
-
def
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
model_name="mock",
|
|
132
|
-
)])
|
|
133
|
-
|
|
134
|
-
@property
|
|
135
|
-
def is_initialized(self):
|
|
136
|
-
return True
|
|
121
|
+
class MockDetectorLogic(DetectorLogic): # pylint: disable=abstract-method
|
|
122
|
+
def __init__(self):
|
|
123
|
+
super().__init__('mock')
|
|
124
|
+
self.image_metadata = ImageMetadata(
|
|
125
|
+
box_detections=[BoxDetection(category_name="test",
|
|
126
|
+
category_id="1",
|
|
127
|
+
confidence=0.9,
|
|
128
|
+
x=0, y=0, width=10, height=10,
|
|
129
|
+
model_name="mock",
|
|
130
|
+
)])
|
|
137
131
|
|
|
138
|
-
|
|
139
|
-
|
|
132
|
+
@property
|
|
133
|
+
def is_initialized(self):
|
|
134
|
+
return True
|
|
140
135
|
|
|
141
|
-
|
|
136
|
+
def evaluate_with_all_info(self, image: np.ndarray, tags: List[str], source: Optional[str] = None, creation_date: Optional[str] = None):
|
|
137
|
+
return self.image_metadata
|
|
142
138
|
|
|
143
139
|
|
|
144
140
|
@pytest.fixture
|
|
145
|
-
def detector_node(
|
|
141
|
+
def detector_node():
|
|
146
142
|
os.environ['LOOP_ORGANIZATION'] = 'test_organization'
|
|
147
143
|
os.environ['LOOP_PROJECT'] = 'test_project'
|
|
148
|
-
return DetectorNode(name="test_node", detector=
|
|
144
|
+
return DetectorNode(name="test_node", detector=MockDetectorLogic())
|
|
149
145
|
|
|
150
146
|
# ====================================== REDUNDANT FIXTURES IN ALL CONFTESTS ! ======================================
|
|
151
147
|
|
|
@@ -30,12 +30,15 @@ async def test_initialized_trainer_node():
|
|
|
30
30
|
node = TrainerNode(name='test', trainer_logic=trainer, uuid='NOD30000-0000-0000-0000-000000000000')
|
|
31
31
|
trainer._node = node
|
|
32
32
|
trainer._init_new_training(context=Context(organization='zauberzeug', project='demo'),
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
training_config={'categories': [],
|
|
34
|
+
'id': '00000000-0000-0000-0000-000000000012', # version 1.2 of demo project
|
|
35
|
+
'training_number': 0,
|
|
36
|
+
'model_variant': '',
|
|
37
|
+
'hyperparameters': {
|
|
38
|
+
'resolution': 800,
|
|
39
|
+
'flip_rl': False,
|
|
40
|
+
'flip_ud': False}
|
|
41
|
+
})
|
|
39
42
|
await node._on_startup()
|
|
40
43
|
yield node
|
|
41
44
|
await node._on_shutdown()
|
|
@@ -50,12 +53,15 @@ async def test_initialized_trainer():
|
|
|
50
53
|
await node._on_startup()
|
|
51
54
|
trainer._node = node
|
|
52
55
|
trainer._init_new_training(context=Context(organization='zauberzeug', project='demo'),
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
56
|
+
training_config={'categories': [],
|
|
57
|
+
'id': '00000000-0000-0000-0000-000000000012', # version 1.2 of demo project
|
|
58
|
+
'training_number': 0,
|
|
59
|
+
'model_variant': '',
|
|
60
|
+
'hyperparameters': {
|
|
61
|
+
'resolution': 800,
|
|
62
|
+
'flip_rl': False,
|
|
63
|
+
'flip_ud': False}
|
|
64
|
+
})
|
|
59
65
|
yield trainer
|
|
60
66
|
try:
|
|
61
67
|
await node._on_shutdown()
|
|
@@ -3,6 +3,7 @@ import asyncio
|
|
|
3
3
|
import os
|
|
4
4
|
|
|
5
5
|
from ....data_classes import TrainerState
|
|
6
|
+
from ... import test_helper
|
|
6
7
|
from ..state_helper import assert_training_state, create_active_training_file
|
|
7
8
|
from ..testing_trainer_logic import TestingTrainerLogic
|
|
8
9
|
|
|
@@ -11,9 +12,12 @@ from ..testing_trainer_logic import TestingTrainerLogic
|
|
|
11
12
|
|
|
12
13
|
async def test_downloading_is_successful(test_initialized_trainer: TestingTrainerLogic):
|
|
13
14
|
trainer = test_initialized_trainer
|
|
14
|
-
create_active_training_file(trainer, training_state=TrainerState.DataDownloaded)
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
model_id = await test_helper.get_latest_model_id(project='demo')
|
|
17
|
+
create_active_training_file(trainer,
|
|
18
|
+
base_model_uuid=model_id,
|
|
19
|
+
training_state=TrainerState.DataDownloaded)
|
|
20
|
+
|
|
17
21
|
trainer._init_from_last_training()
|
|
18
22
|
|
|
19
23
|
asyncio.get_running_loop().create_task(
|
|
@@ -50,7 +54,7 @@ async def test_abort_download_model(test_initialized_trainer: TestingTrainerLogi
|
|
|
50
54
|
async def test_downloading_failed(test_initialized_trainer: TestingTrainerLogic):
|
|
51
55
|
trainer = test_initialized_trainer
|
|
52
56
|
create_active_training_file(trainer, training_state=TrainerState.DataDownloaded,
|
|
53
|
-
|
|
57
|
+
base_model_uuid='00000000-0000-0000-0000-000000000000') # bad model id)
|
|
54
58
|
trainer._init_from_last_training()
|
|
55
59
|
|
|
56
60
|
trainer._begin_training_task()
|
|
@@ -20,7 +20,6 @@ async def test_preparing_is_successful(test_initialized_trainer: TestingTrainerL
|
|
|
20
20
|
await trainer._perform_state('prepare', TrainerState.DataDownloading, TrainerState.DataDownloaded, trainer._prepare)
|
|
21
21
|
assert trainer_has_prepare_error(trainer) is False
|
|
22
22
|
assert trainer.training.training_state == TrainerState.DataDownloaded
|
|
23
|
-
assert trainer.training.data is not None
|
|
24
23
|
assert trainer.node.last_training_io.load() == trainer.training
|
|
25
24
|
|
|
26
25
|
|
|
@@ -19,7 +19,7 @@ def trainer_has_sync_confusion_matrix_error(trainer: TrainerLogic):
|
|
|
19
19
|
async def test_nothing_to_sync(test_initialized_trainer: TestingTrainerLogic):
|
|
20
20
|
trainer = test_initialized_trainer
|
|
21
21
|
|
|
22
|
-
#
|
|
22
|
+
# NOTE: this requires trainer to have _training
|
|
23
23
|
# trainer.load_active_training()
|
|
24
24
|
create_active_training_file(trainer, training_state=TrainerState.TrainingFinished)
|
|
25
25
|
trainer._init_from_last_training()
|
|
@@ -40,6 +40,7 @@ async def test_unsynced_model_available__sync_successful(test_initialized_traine
|
|
|
40
40
|
create_active_training_file(trainer, training_state=TrainerState.TrainingFinished)
|
|
41
41
|
|
|
42
42
|
trainer._init_from_last_training()
|
|
43
|
+
trainer.training.image_data = []
|
|
43
44
|
trainer.has_new_model = True
|
|
44
45
|
|
|
45
46
|
trainer._begin_training_task()
|
|
@@ -14,7 +14,12 @@ def create_training() -> Training:
|
|
|
14
14
|
context=context,
|
|
15
15
|
project_folder='',
|
|
16
16
|
images_folder='',
|
|
17
|
-
training_folder=''
|
|
17
|
+
training_folder='',
|
|
18
|
+
categories=[],
|
|
19
|
+
hyperparameters={},
|
|
20
|
+
model_variant='',
|
|
21
|
+
training_number=0,
|
|
22
|
+
training_state=TrainerState.Preparing)
|
|
18
23
|
return training
|
|
19
24
|
|
|
20
25
|
|
|
@@ -30,13 +30,13 @@ class TestingTrainerLogic(TrainerLogic):
|
|
|
30
30
|
PretrainedModel(name='large', label='Large', description='a large model')]
|
|
31
31
|
|
|
32
32
|
# pylint: disable=unused-argument
|
|
33
|
-
async def _start_training_from_base_model(self
|
|
33
|
+
async def _start_training_from_base_model(self) -> None:
|
|
34
34
|
assert self._executor is not None
|
|
35
35
|
await self._executor.start('/bin/bash -c "while true; do sleep 1; done"')
|
|
36
36
|
|
|
37
37
|
async def _start_training_from_scratch(self) -> None:
|
|
38
|
-
assert self.
|
|
39
|
-
await self.
|
|
38
|
+
assert self._executor is not None
|
|
39
|
+
await self._executor.start('/bin/bash -c "while true; do sleep 1; done"')
|
|
40
40
|
|
|
41
41
|
def _get_new_best_training_state(self) -> Optional[TrainingStateData]:
|
|
42
42
|
if self.has_new_model:
|
|
@@ -27,5 +27,5 @@ class TrainingsDownloader():
|
|
|
27
27
|
valid_image_data.append(i)
|
|
28
28
|
else:
|
|
29
29
|
skipped_image_count += 1
|
|
30
|
-
logging.info(
|
|
30
|
+
logging.info('Done downloading image data for %s images.', len(image_data))
|
|
31
31
|
return (valid_image_data, skipped_image_count)
|
|
@@ -3,7 +3,7 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import shlex
|
|
5
5
|
from io import BufferedWriter
|
|
6
|
-
from typing import List, Optional
|
|
6
|
+
from typing import Dict, List, Optional
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class Executor:
|
|
@@ -33,7 +33,7 @@ class Executor:
|
|
|
33
33
|
if env is not None:
|
|
34
34
|
full_env.update(env)
|
|
35
35
|
|
|
36
|
-
logging.info(
|
|
36
|
+
logging.info('Starting executor with command: %s in %s - logging to %s', cmd, self.path, self.log_file_path)
|
|
37
37
|
self.log_file = open(self.log_file_path, 'ab')
|
|
38
38
|
|
|
39
39
|
self._process = await asyncio.create_subprocess_exec(
|
|
@@ -29,7 +29,7 @@ async def provide_new_model(request: Request):
|
|
|
29
29
|
if value == 'on':
|
|
30
30
|
trainer_node.trainer_logic.provide_new_model = True # type: ignore
|
|
31
31
|
|
|
32
|
-
logging.debug(
|
|
32
|
+
logging.debug('turning automatically provide_new_model %s', value)
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
@router.post("/reset")
|
|
@@ -64,7 +64,7 @@ def set_error_configuration(msg: Dict, request: Request):
|
|
|
64
64
|
get_new_model=msg.get('get_new_model', None),
|
|
65
65
|
save_model=msg.get('save_model', None), )
|
|
66
66
|
|
|
67
|
-
logging.info(
|
|
67
|
+
logging.info('setting error configuration to: %s', asdict(error_configuration))
|
|
68
68
|
trainer_logic = request.app.trainer_logic
|
|
69
69
|
|
|
70
70
|
# NOTE: trainer_logic is MockTrainerLogic which has a property error_configuration
|
|
@@ -82,23 +82,23 @@ async def add_steps(request: Request):
|
|
|
82
82
|
|
|
83
83
|
if not trainer_logic._executor or not trainer_logic._executor.is_running(): # pylint: disable=protected-access
|
|
84
84
|
training = trainer_logic._training # pylint: disable=protected-access
|
|
85
|
-
logging.error(
|
|
85
|
+
logging.error('cannot add steps when not running, state: %s', training.training_state if training else 'None')
|
|
86
86
|
raise HTTPException(status_code=409, detail="trainer is not running")
|
|
87
87
|
|
|
88
88
|
steps = int(str(await request.body(), 'utf-8'))
|
|
89
89
|
|
|
90
90
|
previous_state = trainer_logic.provide_new_model # type: ignore
|
|
91
91
|
trainer_logic.provide_new_model = True # type: ignore
|
|
92
|
-
logging.warning(
|
|
92
|
+
logging.warning('simulating newly completed models by moving %s forward', steps)
|
|
93
93
|
|
|
94
94
|
for _ in range(steps):
|
|
95
95
|
try:
|
|
96
96
|
logging.warning('calling sync_confusion_matrix')
|
|
97
|
-
await trainer_logic.
|
|
97
|
+
await trainer_logic._sync_training() # pylint: disable=protected-access
|
|
98
98
|
except Exception:
|
|
99
99
|
pass # Tests can force synchroniation to fail, error state is reported to backend
|
|
100
100
|
trainer_logic.provide_new_model = previous_state # type: ignore
|
|
101
|
-
logging.warning(
|
|
101
|
+
logging.warning('progress increased to %s', trainer_logic.current_iteration) # type: ignore
|
|
102
102
|
await trainer_node.send_status()
|
|
103
103
|
|
|
104
104
|
|
|
@@ -62,7 +62,7 @@ class TrainerLogic(TrainerLogicGeneric):
|
|
|
62
62
|
break
|
|
63
63
|
self.errors.reset(error_key)
|
|
64
64
|
try:
|
|
65
|
-
await self.
|
|
65
|
+
await self._sync_training()
|
|
66
66
|
except asyncio.CancelledError:
|
|
67
67
|
logging.warning('CancelledError in run_training')
|
|
68
68
|
raise
|
|
@@ -130,8 +130,12 @@ class TrainerLogic(TrainerLogicGeneric):
|
|
|
130
130
|
if self._can_resume():
|
|
131
131
|
self.start_training_task = self._resume()
|
|
132
132
|
else:
|
|
133
|
-
|
|
134
|
-
|
|
133
|
+
base_model_uuid_is_none = self.training.base_model_uuid is None
|
|
134
|
+
base_model_uuid_is_valid = is_valid_uuid4(self.training.base_model_uuid)
|
|
135
|
+
if not base_model_uuid_is_none and not base_model_uuid_is_valid:
|
|
136
|
+
logging.warning('base_model_uuid is not a valid uuid4: %s\n Starting training from scratch.',
|
|
137
|
+
self.training.base_model_uuid)
|
|
138
|
+
if not base_model_uuid_is_valid:
|
|
135
139
|
self.start_training_task = self._start_training_from_scratch()
|
|
136
140
|
else:
|
|
137
141
|
self.start_training_task = self._start_training_from_base_model()
|
|
@@ -10,9 +10,9 @@ from typing import TYPE_CHECKING, Callable, Coroutine, Dict, List, Optional
|
|
|
10
10
|
|
|
11
11
|
from fastapi.encoders import jsonable_encoder
|
|
12
12
|
|
|
13
|
-
from ..data_classes import (Context, Errors,
|
|
14
|
-
|
|
15
|
-
from ..helpers.misc import create_project_folder, delete_all_training_folders,
|
|
13
|
+
from ..data_classes import (Context, Errors, PretrainedModel, TrainerState, Training, TrainingOut, TrainingStateData,
|
|
14
|
+
TrainingStatus)
|
|
15
|
+
from ..helpers.misc import create_project_folder, delete_all_training_folders, is_valid_uuid4
|
|
16
16
|
from .downloader import TrainingsDownloader
|
|
17
17
|
from .exceptions import CriticalError, NodeNeedsRestartError
|
|
18
18
|
from .io_helpers import ActiveTrainingIO, EnvironmentVars, LastTrainingIO
|
|
@@ -66,19 +66,12 @@ class TrainerLogicGeneric(ABC):
|
|
|
66
66
|
return self._training
|
|
67
67
|
|
|
68
68
|
@property
|
|
69
|
-
def
|
|
70
|
-
assert self.
|
|
71
|
-
|
|
72
|
-
return self.training_data.hyperparameter
|
|
69
|
+
def hyperparameters(self) -> dict:
|
|
70
|
+
assert self._training is not None, 'Training should have data'
|
|
71
|
+
return self._training.hyperparameters
|
|
73
72
|
|
|
74
73
|
# ---------------------------------------- PROPERTIES ----------------------------------------
|
|
75
74
|
|
|
76
|
-
@property
|
|
77
|
-
def training_data(self) -> Optional[TrainingData]:
|
|
78
|
-
if self.training_active and self.training.data:
|
|
79
|
-
return self.training.data
|
|
80
|
-
return None
|
|
81
|
-
|
|
82
75
|
@property
|
|
83
76
|
def training_context(self) -> Optional[Context]:
|
|
84
77
|
if self.training_active:
|
|
@@ -111,12 +104,8 @@ class TrainerLogicGeneric(ABC):
|
|
|
111
104
|
def hyperparameters_for_state_sync(self) -> Optional[Dict]:
|
|
112
105
|
"""Used in sync_confusion_matrix and send_status to provide information about the training configuration.
|
|
113
106
|
"""
|
|
114
|
-
if self._training
|
|
115
|
-
|
|
116
|
-
information['resolution'] = self._training.data.hyperparameter.resolution
|
|
117
|
-
information['flipRl'] = self._training.data.hyperparameter.flip_rl
|
|
118
|
-
information['flipUd'] = self._training.data.hyperparameter.flip_ud
|
|
119
|
-
return information
|
|
107
|
+
if self._training:
|
|
108
|
+
return self._training.hyperparameters
|
|
120
109
|
return None
|
|
121
110
|
|
|
122
111
|
@property
|
|
@@ -173,6 +162,24 @@ class TrainerLogicGeneric(ABC):
|
|
|
173
162
|
# Initializing a new training object will create the folder structure for the training.
|
|
174
163
|
# The training loop will then run through the states of the training.
|
|
175
164
|
|
|
165
|
+
def generate_status_for_loop(self, trainer_uuid: str, trainer_name: str) -> TrainingStatus:
|
|
166
|
+
|
|
167
|
+
status = TrainingStatus(id=trainer_uuid,
|
|
168
|
+
name=trainer_name,
|
|
169
|
+
state=self.state,
|
|
170
|
+
errors={},
|
|
171
|
+
uptime=self.training_uptime,
|
|
172
|
+
progress=self.general_progress)
|
|
173
|
+
|
|
174
|
+
status.pretrained_models = self.provided_pretrained_models
|
|
175
|
+
status.architecture = self.model_architecture
|
|
176
|
+
|
|
177
|
+
if self._training:
|
|
178
|
+
status.errors = self.errors.errors
|
|
179
|
+
status.context = self.training_context
|
|
180
|
+
|
|
181
|
+
return status
|
|
182
|
+
|
|
176
183
|
async def try_continue_run_if_incomplete(self) -> bool:
|
|
177
184
|
"""Tries to continue a training if the last training was not finished.
|
|
178
185
|
"""
|
|
@@ -188,29 +195,30 @@ class TrainerLogicGeneric(ABC):
|
|
|
188
195
|
"""
|
|
189
196
|
self._training = self.last_training_io.load()
|
|
190
197
|
assert self._training is not None and self._training.training_folder is not None, 'could not restore training folder'
|
|
198
|
+
logger.info('restored training: \n%s', self._training)
|
|
191
199
|
self._active_training_io = ActiveTrainingIO(
|
|
192
200
|
self._training.training_folder, self.node.loop_communicator, self._training.context)
|
|
193
201
|
|
|
194
|
-
async def begin_training(self, organization: str, project: str,
|
|
202
|
+
async def begin_training(self, organization: str, project: str, training_config: Dict) -> None:
|
|
195
203
|
"""Called on `begin_training` event from the Learning Loop.
|
|
196
204
|
"""
|
|
197
|
-
self._init_new_training(Context(organization=organization, project=project),
|
|
205
|
+
self._init_new_training(Context(organization=organization, project=project), training_config)
|
|
198
206
|
self._begin_training_task()
|
|
199
207
|
|
|
200
208
|
def _begin_training_task(self) -> None:
|
|
201
209
|
# NOTE: Task object is used to potentially cancel the task
|
|
202
210
|
self.training_task = asyncio.get_event_loop().create_task(self._run())
|
|
203
211
|
|
|
204
|
-
def _init_new_training(self, context: Context,
|
|
212
|
+
def _init_new_training(self, context: Context, training_config: Dict) -> None:
|
|
205
213
|
"""Called on `begin_training` event from the Learning Loop.
|
|
206
|
-
Note that
|
|
214
|
+
Note that training_config needs the entries 'categories', 'model_variant' and 'training_number',
|
|
207
215
|
but also the hyperparameter entries.
|
|
216
|
+
'base_model_uuid' is optional if the training is continued from a previous training.
|
|
208
217
|
"""
|
|
209
218
|
project_folder = create_project_folder(context)
|
|
210
219
|
if not self._environment_vars.keep_old_trainings:
|
|
211
220
|
delete_all_training_folders(project_folder)
|
|
212
|
-
self._training = generate_training(project_folder, context)
|
|
213
|
-
self._training.set_values_from_data(details)
|
|
221
|
+
self._training = Training.generate_training(project_folder, context, training_config)
|
|
214
222
|
|
|
215
223
|
self._active_training_io = ActiveTrainingIO(
|
|
216
224
|
self._training.training_folder, self.node.loop_communicator, context)
|
|
@@ -254,7 +262,7 @@ class TrainerLogicGeneric(ABC):
|
|
|
254
262
|
elif tstate == TrainerState.TrainModelDownloaded: # -> TrainingRunning -> TrainingFinished
|
|
255
263
|
await self._perform_state('run_training', TrainerState.TrainingRunning, TrainerState.TrainingFinished, self._train)
|
|
256
264
|
elif tstate == TrainerState.TrainingFinished: # -> ConfusionMatrixSyncing -> ConfusionMatrixSynced
|
|
257
|
-
await self._perform_state('sync_confusion_matrix', TrainerState.ConfusionMatrixSyncing, TrainerState.ConfusionMatrixSynced, self.
|
|
265
|
+
await self._perform_state('sync_confusion_matrix', TrainerState.ConfusionMatrixSyncing, TrainerState.ConfusionMatrixSynced, self._sync_training)
|
|
258
266
|
elif tstate == TrainerState.ConfusionMatrixSynced: # -> TrainModelUploading -> TrainModelUploaded
|
|
259
267
|
await self._perform_state('upload_model', TrainerState.TrainModelUploading, TrainerState.TrainModelUploaded, self._upload_model)
|
|
260
268
|
elif tstate == TrainerState.TrainModelUploaded: # -> Detecting -> Detected
|
|
@@ -298,6 +306,7 @@ class TrainerLogicGeneric(ABC):
|
|
|
298
306
|
logger.error('Node Restart Requested')
|
|
299
307
|
sys.exit(0)
|
|
300
308
|
except Exception as e:
|
|
309
|
+
print('Error in %s - Exception: %s', state_during, e, flush=True)
|
|
301
310
|
self.errors.set(error_key, str(e))
|
|
302
311
|
logger.exception('Error in %s - Exception: %s', state_during, e)
|
|
303
312
|
self.training.training_state = previous_state
|
|
@@ -316,19 +325,25 @@ class TrainerLogicGeneric(ABC):
|
|
|
316
325
|
self.node.data_exchanger.set_context(self.training.context)
|
|
317
326
|
downloader = TrainingsDownloader(self.node.data_exchanger)
|
|
318
327
|
image_data, skipped_image_count = await downloader.download_training_data(self.training.images_folder)
|
|
319
|
-
|
|
320
|
-
self.training.
|
|
321
|
-
self.training.
|
|
328
|
+
|
|
329
|
+
self.training.image_data = image_data
|
|
330
|
+
self.training.skipped_image_count = skipped_image_count
|
|
322
331
|
|
|
323
332
|
async def _download_model(self) -> None:
|
|
324
333
|
"""If training is continued, the model is downloaded from the Learning Loop to the training_folder.
|
|
325
334
|
The downloaded model.json file is renamed to base_model.json because a new model.json will be created during training.
|
|
326
335
|
"""
|
|
327
|
-
base_model_uuid = self.training.
|
|
336
|
+
base_model_uuid = self.training.base_model_uuid
|
|
337
|
+
base_model_uuid_is_none = base_model_uuid is None
|
|
338
|
+
base_model_uuid_is_valid = is_valid_uuid4(base_model_uuid)
|
|
339
|
+
|
|
340
|
+
if not base_model_uuid_is_none and not base_model_uuid_is_valid:
|
|
341
|
+
logger.warning(
|
|
342
|
+
'base model uuid was provided but was not valid (base_model_uuid: %s).\nSkipping download and starting training from scratch.', base_model_uuid)
|
|
343
|
+
return
|
|
328
344
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
logger.info('skipping model download. No base model provided (in form of uuid): %s', base_model_uuid)
|
|
345
|
+
if base_model_uuid_is_none:
|
|
346
|
+
logger.info('No base model provided (base_model_uuid: %s).\nStarting training from scratch.', base_model_uuid)
|
|
332
347
|
return
|
|
333
348
|
|
|
334
349
|
logger.info('loading model from Learning Loop')
|
|
@@ -337,19 +352,21 @@ class TrainerLogicGeneric(ABC):
|
|
|
337
352
|
shutil.move(f'{self.training.training_folder}/model.json',
|
|
338
353
|
f'{self.training.training_folder}/base_model.json')
|
|
339
354
|
|
|
340
|
-
async def
|
|
341
|
-
"""Syncronizes the
|
|
355
|
+
async def _sync_training(self) -> None:
|
|
356
|
+
"""Syncronizes the training with the Learning Loop via the update_training endpoint.
|
|
342
357
|
NOTE: This stage sets the errors explicitly because it may be used inside the training stage.
|
|
343
358
|
"""
|
|
344
359
|
error_key = 'sync_confusion_matrix'
|
|
345
360
|
try:
|
|
346
361
|
new_best_model = self._get_new_best_training_state()
|
|
347
|
-
if new_best_model
|
|
362
|
+
if new_best_model:
|
|
348
363
|
new_training = TrainingOut(trainer_id=self.node.uuid,
|
|
364
|
+
trainer_name=self.node.name,
|
|
349
365
|
confusion_matrix=new_best_model.confusion_matrix,
|
|
350
|
-
train_image_count=self.training.
|
|
351
|
-
test_image_count=self.training.
|
|
352
|
-
hyperparameters=self.hyperparameters_for_state_sync
|
|
366
|
+
train_image_count=self.training.train_image_count(),
|
|
367
|
+
test_image_count=self.training.test_image_count(),
|
|
368
|
+
hyperparameters=self.hyperparameters_for_state_sync,
|
|
369
|
+
best_epoch=new_best_model.epoch)
|
|
353
370
|
await asyncio.sleep(0.1) # NOTE needed for tests.
|
|
354
371
|
|
|
355
372
|
result = await self.node.sio_client.call('update_training', (
|
|
@@ -411,7 +428,7 @@ class TrainerLogicGeneric(ABC):
|
|
|
411
428
|
def _dump_categories_to_json(self) -> str:
|
|
412
429
|
"""Dumps the categories to a json file and returns the path to the file.
|
|
413
430
|
"""
|
|
414
|
-
content = {'categories': [asdict(c) for c in self.
|
|
431
|
+
content = {'categories': [asdict(c) for c in self._training.categories], } if self._training else None
|
|
415
432
|
json_path = '/tmp/model.json'
|
|
416
433
|
with open(json_path, 'w') as f:
|
|
417
434
|
json.dump(content, f)
|
|
@@ -481,12 +498,13 @@ class TrainerLogicGeneric(ABC):
|
|
|
481
498
|
|
|
482
499
|
@abstractmethod
|
|
483
500
|
def _get_new_best_training_state(self) -> Optional[TrainingStateData]:
|
|
484
|
-
"""Is called frequently by `
|
|
501
|
+
"""Is called frequently by `_sync_training` during training to check if a new "best" model is availabe.
|
|
485
502
|
Returns None if no new model could be found. Otherwise TrainingStateData(confusion_matrix, meta_information).
|
|
486
503
|
`confusion_matrix` contains a dict of all classes:
|
|
487
504
|
- The classes must be identified by their uuid, not their name.
|
|
488
505
|
- For each class a dict with tp, fp, fn is provided (true positives, false positives, false negatives).
|
|
489
506
|
`meta_information` can hold any data which is helpful for self._on_metrics_published to store weight file etc for later upload via self.get_model_files
|
|
507
|
+
`epoch` is the epoch number of the best model.
|
|
490
508
|
"""
|
|
491
509
|
raise NotImplementedError
|
|
492
510
|
|
|
@@ -7,7 +7,6 @@ from typing import Dict, Optional
|
|
|
7
7
|
from fastapi.encoders import jsonable_encoder
|
|
8
8
|
from socketio import AsyncClient, exceptions
|
|
9
9
|
|
|
10
|
-
from ..data_classes import TrainingStatus
|
|
11
10
|
from ..node import Node
|
|
12
11
|
from .io_helpers import LastTrainingIO
|
|
13
12
|
from .rest import backdoor_controls
|
|
@@ -23,14 +22,15 @@ class TrainerNode(Node):
|
|
|
23
22
|
self.last_training_io = LastTrainingIO(self.uuid)
|
|
24
23
|
self.trainer_logic._last_training_io = self.last_training_io
|
|
25
24
|
|
|
26
|
-
self.
|
|
25
|
+
self._first_idle_time: float | None = None
|
|
27
26
|
if os.environ.get('TRAINER_IDLE_TIMEOUT_SEC', 0.0):
|
|
28
|
-
self.
|
|
27
|
+
self._idle_timeout = float(os.environ.get('TRAINER_IDLE_TIMEOUT_SEC', 0.0))
|
|
29
28
|
else:
|
|
30
|
-
self.
|
|
31
|
-
if self.
|
|
29
|
+
self._idle_timeout = 0.0
|
|
30
|
+
if self._idle_timeout:
|
|
32
31
|
self.log.info(
|
|
33
|
-
|
|
32
|
+
'Trainer started with an idle_timeout of %s seconds. Note that shutdown does not work if docker container has the restart policy set to always',
|
|
33
|
+
self._idle_timeout)
|
|
34
34
|
|
|
35
35
|
if use_backdoor_controls or os.environ.get('USE_BACKDOOR_CONTROLS', '0').lower() in ('1', 'true'):
|
|
36
36
|
self.include_router(backdoor_controls.router, tags=["controls"])
|
|
@@ -53,8 +53,8 @@ class TrainerNode(Node):
|
|
|
53
53
|
except exceptions.TimeoutError:
|
|
54
54
|
self.log.warning('timeout when sending status to learning loop, reconnecting sio_client')
|
|
55
55
|
await self.sio_client.disconnect() # NOTE: reconnect happens in node._on_repeat
|
|
56
|
-
except Exception
|
|
57
|
-
self.log.exception(
|
|
56
|
+
except Exception:
|
|
57
|
+
self.log.exception('could not send status. Exception:')
|
|
58
58
|
|
|
59
59
|
# ---------------------------------------------- NODE METHODS ---------------------------------------------------
|
|
60
60
|
|
|
@@ -68,7 +68,7 @@ class TrainerNode(Node):
|
|
|
68
68
|
|
|
69
69
|
@sio_client.event
|
|
70
70
|
async def stop_training():
|
|
71
|
-
self.log.info(
|
|
71
|
+
self.log.info('stop_training received. Current state : %s', self.trainer_logic.state)
|
|
72
72
|
try:
|
|
73
73
|
await self.trainer_logic.stop()
|
|
74
74
|
except Exception:
|
|
@@ -80,24 +80,7 @@ class TrainerNode(Node):
|
|
|
80
80
|
self.log.debug('cannot send status - not connected to the Learning Loop')
|
|
81
81
|
return
|
|
82
82
|
|
|
83
|
-
status =
|
|
84
|
-
name=self.name,
|
|
85
|
-
state=self.trainer_logic.state,
|
|
86
|
-
errors={},
|
|
87
|
-
uptime=self.trainer_logic.training_uptime,
|
|
88
|
-
progress=self.trainer_logic.general_progress)
|
|
89
|
-
|
|
90
|
-
status.pretrained_models = self.trainer_logic.provided_pretrained_models
|
|
91
|
-
status.architecture = self.trainer_logic.model_architecture
|
|
92
|
-
|
|
93
|
-
if data := self.trainer_logic.training_data:
|
|
94
|
-
status.train_image_count = data.train_image_count()
|
|
95
|
-
status.test_image_count = data.test_image_count()
|
|
96
|
-
status.skipped_image_count = data.skipped_image_count
|
|
97
|
-
status.hyperparameters = self.trainer_logic.hyperparameters_for_state_sync
|
|
98
|
-
status.errors = self.trainer_logic.errors.errors
|
|
99
|
-
status.context = self.trainer_logic.training_context
|
|
100
|
-
|
|
83
|
+
status = self.trainer_logic.generate_status_for_loop(self.uuid, self.name)
|
|
101
84
|
self.log.debug('sending status: %s', status.short_str())
|
|
102
85
|
result = await self.sio_client.call('update_trainer', jsonable_encoder(asdict(status)), timeout=30)
|
|
103
86
|
if isinstance(result, Dict) and not result['success']:
|
|
@@ -105,17 +88,17 @@ class TrainerNode(Node):
|
|
|
105
88
|
self.log.error('Error when sending status update: Response from loop was:\n %s', result)
|
|
106
89
|
|
|
107
90
|
def check_idle_timeout(self):
|
|
108
|
-
if not self.
|
|
91
|
+
if not self._idle_timeout:
|
|
109
92
|
return
|
|
110
93
|
|
|
111
94
|
if self.trainer_logic.state == 'idle':
|
|
112
|
-
if self.
|
|
113
|
-
self.
|
|
114
|
-
idle_time = time.time() - self.
|
|
115
|
-
if idle_time > self.
|
|
95
|
+
if self._first_idle_time is None:
|
|
96
|
+
self._first_idle_time = time.time()
|
|
97
|
+
idle_time = time.time() - self._first_idle_time
|
|
98
|
+
if idle_time > self._idle_timeout:
|
|
116
99
|
self.log.info('Trainer has been idle for %.2f s (with timeout %.2f s). Shutting down.',
|
|
117
|
-
idle_time, self.
|
|
100
|
+
idle_time, self._idle_timeout)
|
|
118
101
|
sys.exit(0)
|
|
119
|
-
self.log.debug('idle time: %.2f s / %.2f s', idle_time, self.
|
|
102
|
+
self.log.debug('idle time: %.2f s / %.2f s', idle_time, self._idle_timeout)
|
|
120
103
|
else:
|
|
121
|
-
self.
|
|
104
|
+
self._first_idle_time = None
|
|
@@ -2,14 +2,14 @@ learning_loop_node/__init__.py,sha256=onN5s8-x_xBsCM6NLmJO0Ym1sJHeCFaGw8qb0oQZmz
|
|
|
2
2
|
learning_loop_node/annotation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
learning_loop_node/annotation/annotator_logic.py,sha256=BTaopkJZkIf1CI5lfsVKsxbxoUIbDJrevavuQUT5e_c,1000
|
|
4
4
|
learning_loop_node/annotation/annotator_node.py,sha256=UrJ8MpZ44UhsjmVuSHr2BhHyLC-kIMDi3IuBBMKzN1g,4117
|
|
5
|
-
learning_loop_node/data_classes/__init__.py,sha256=
|
|
5
|
+
learning_loop_node/data_classes/__init__.py,sha256=OZMvTALB_vRz1wnmIPy2mRXteZmot2HZJmhAKBWKrWg,1284
|
|
6
6
|
learning_loop_node/data_classes/annotations.py,sha256=iInU0Nuy_oYT_sj4k_n-W0UShCBI2cHQYrt8imymbtM,1211
|
|
7
7
|
learning_loop_node/data_classes/detections.py,sha256=7vqcS0EK8cmDjRDckHlpSZDZ9YO6qajRmYvx-oxatFc,5425
|
|
8
|
-
learning_loop_node/data_classes/general.py,sha256=
|
|
8
|
+
learning_loop_node/data_classes/general.py,sha256=DIJnmLmbnVgg8Xzg6CKIeQpM7EQhiGcOjeqzNn2NRIA,6194
|
|
9
9
|
learning_loop_node/data_classes/image_metadata.py,sha256=56nNSf_7aMlvKsJOG8vKCzJHcqKGHVRoULp85pJ2imA,1598
|
|
10
10
|
learning_loop_node/data_classes/socket_response.py,sha256=tIdt-oYf6ULoJIDYQCecNM9OtWR6_wJ9tL0Ksu83Vko,655
|
|
11
|
-
learning_loop_node/data_classes/training.py,sha256=
|
|
12
|
-
learning_loop_node/data_exchanger.py,sha256=
|
|
11
|
+
learning_loop_node/data_classes/training.py,sha256=Lr1dhRyYpv3ctD7ifFMwhqbS3Mx9yi0zVOxEi-V7uC8,6438
|
|
12
|
+
learning_loop_node/data_exchanger.py,sha256=Ux-dHItR_oLBd2HGMNn1WbWBk_KVTOwJn0Wco03rkgo,9192
|
|
13
13
|
learning_loop_node/detector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
learning_loop_node/detector/detector_logic.py,sha256=fAaeLykvkuOeaQx-scuN1pkydK8cPdmNT75P8xqImY0,2130
|
|
15
15
|
learning_loop_node/detector/detector_node.py,sha256=ryzPcv5wfNjA_Sk5YDcUkZoKEUGPT1s29rvFLGGPIZ8,19929
|
|
@@ -28,21 +28,21 @@ learning_loop_node/detector/rest/upload.py,sha256=5YWY0Ku4duZqKd6tjyJzq-Ga83o2UY
|
|
|
28
28
|
learning_loop_node/examples/novelty_score_updater.py,sha256=1DRgM9lxjFV-q2JvGDDsNLz_ic_rhEZ9wc6ZdjcxwPE,2038
|
|
29
29
|
learning_loop_node/globals.py,sha256=tgw_8RYOipPV9aYlyUhYtXfUxvJKRvfUk6u-qVAtZmY,174
|
|
30
30
|
learning_loop_node/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
learning_loop_node/helpers/environment_reader.py,sha256=
|
|
31
|
+
learning_loop_node/helpers/environment_reader.py,sha256=6DxDJecLHxiGczByhyVa_JssAwwft7vuNCGaEzoSY2I,1662
|
|
32
32
|
learning_loop_node/helpers/gdrive_downloader.py,sha256=zeYJciTAJVRpu_eFjwgYLCpIa6hU1d71anqEBb564Rk,1145
|
|
33
|
-
learning_loop_node/helpers/log_conf.py,sha256=
|
|
34
|
-
learning_loop_node/helpers/misc.py,sha256=
|
|
35
|
-
learning_loop_node/loop_communication.py,sha256=
|
|
36
|
-
learning_loop_node/node.py,sha256=
|
|
33
|
+
learning_loop_node/helpers/log_conf.py,sha256=hqVAa_9NnYEU6N0dcOKmph82p7MpgKqeF_eomTLYzWY,961
|
|
34
|
+
learning_loop_node/helpers/misc.py,sha256=J29iBmsEUAraKKDN1m1NKiHQ3QrP5ub5HBU6cllSP2g,7384
|
|
35
|
+
learning_loop_node/loop_communication.py,sha256=0Hm7cv3z_QqfnRa0oUH_S3uTTmfPFF7DXNGCd-lKAHk,6688
|
|
36
|
+
learning_loop_node/node.py,sha256=z2CG4s_uvboY7pBQ8JlJr5s67Hf31ckfGjaRulsrqy0,10593
|
|
37
37
|
learning_loop_node/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
|
-
learning_loop_node/rest.py,sha256=
|
|
38
|
+
learning_loop_node/rest.py,sha256=omwlRHLnyG-kgCBVnZDk5_SAPobL9g7slWeX21wsPGw,1551
|
|
39
39
|
learning_loop_node/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
40
|
learning_loop_node/tests/annotator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
learning_loop_node/tests/annotator/conftest.py,sha256=G4ZvdZUdvPp9bYCzg3eEVkGCeXn9INZ3AcN7d5CyLkU,1931
|
|
42
42
|
learning_loop_node/tests/annotator/pytest.ini,sha256=8QdjmawLy1zAzXrJ88or1kpFDhJw0W5UOnDfGGs_igU,262
|
|
43
43
|
learning_loop_node/tests/annotator/test_annotator_node.py,sha256=TPNPPrQAxQ_zEecQcH7hlczgD3ABtTCNtUvWD1_oApk,1985
|
|
44
44
|
learning_loop_node/tests/detector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
|
-
learning_loop_node/tests/detector/conftest.py,sha256=
|
|
45
|
+
learning_loop_node/tests/detector/conftest.py,sha256=4zNW8dnwj3CDKCkFNVCPbHgFTYtDvdaqnUM4s_I-cq4,5328
|
|
46
46
|
learning_loop_node/tests/detector/inbox_filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
learning_loop_node/tests/detector/inbox_filter/test_observation.py,sha256=k4WYdvnuV7d_r7zI4M2aA8WuBjm0aycQ0vj1rGE2q4w,1370
|
|
48
48
|
learning_loop_node/tests/detector/inbox_filter/test_relevance_group.py,sha256=r-wABFQVsTNTjv7vYGr8wbHfOWy43F_B14ZDWHfiZ-A,7613
|
|
@@ -65,32 +65,32 @@ learning_loop_node/tests/general/test_downloader.py,sha256=y4GcUyR0OAfrwltd6eyQg
|
|
|
65
65
|
learning_loop_node/tests/general/test_learning_loop_node.py,sha256=SZd-VChpWnnsPN46pr4E_LL3ZevYx6psU-AWdVeOFpQ,770
|
|
66
66
|
learning_loop_node/tests/test_helper.py,sha256=Xajn6BWJqeD36YAETwdcJd6awY2NPmaOis3gWgFc97k,2909
|
|
67
67
|
learning_loop_node/tests/trainer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
-
learning_loop_node/tests/trainer/conftest.py,sha256=
|
|
68
|
+
learning_loop_node/tests/trainer/conftest.py,sha256=X-Ib-ju-emKomgaPjddwAgMp0zt2Slu59Y3cPfmTipI,3672
|
|
69
69
|
learning_loop_node/tests/trainer/pytest.ini,sha256=8QdjmawLy1zAzXrJ88or1kpFDhJw0W5UOnDfGGs_igU,262
|
|
70
70
|
learning_loop_node/tests/trainer/state_helper.py,sha256=MDe9opeKruip74FoRFff8MSWGiQNFqDpPtIEIbgPnFc,919
|
|
71
71
|
learning_loop_node/tests/trainer/states/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
72
72
|
learning_loop_node/tests/trainer/states/test_state_cleanup.py,sha256=gZNxSSwnj9f0esExNnQzqadM6-sE3IsF5sNbD0bZNu8,1250
|
|
73
73
|
learning_loop_node/tests/trainer/states/test_state_detecting.py,sha256=KGXTR69J_1pJoT8S0ceC3vSyHLw52mIpjbawH4c-8JA,3696
|
|
74
|
-
learning_loop_node/tests/trainer/states/test_state_download_train_model.py,sha256=
|
|
75
|
-
learning_loop_node/tests/trainer/states/test_state_prepare.py,sha256=
|
|
76
|
-
learning_loop_node/tests/trainer/states/test_state_sync_confusion_matrix.py,sha256=
|
|
77
|
-
learning_loop_node/tests/trainer/states/test_state_train.py,sha256=
|
|
74
|
+
learning_loop_node/tests/trainer/states/test_state_download_train_model.py,sha256=rgMcPtG2mu8ojT6ftcIzA1oWOLo0cubuaEofKXgoUYo,2999
|
|
75
|
+
learning_loop_node/tests/trainer/states/test_state_prepare.py,sha256=N9A8UDVoiXvMd1htqigYfuRAqpq2VjnfKlP4EROJnX8,2270
|
|
76
|
+
learning_loop_node/tests/trainer/states/test_state_sync_confusion_matrix.py,sha256=wMUhw6t2Hv7dx1_wh8W748oRhMvy_O3TB3lBzEWmPiE,5111
|
|
77
|
+
learning_loop_node/tests/trainer/states/test_state_train.py,sha256=IBOtnkDApwfaoEavFWgydxXwRXOPfj8-U78r1tdJH_k,2900
|
|
78
78
|
learning_loop_node/tests/trainer/states/test_state_upload_detections.py,sha256=0Qkavl4i2tZmCOxKkNsQUqa1JWhAgcOsbrW3_eYHfxo,7417
|
|
79
79
|
learning_loop_node/tests/trainer/states/test_state_upload_model.py,sha256=y2o4WBo7kBG_JWSWmt4icjrwya5hQ30zCWC-YMVEwEk,3621
|
|
80
80
|
learning_loop_node/tests/trainer/test_errors.py,sha256=khWCTzi-JW4nSz9QnsRh9wDPmiuE_zdxXukh59qixuY,2109
|
|
81
|
-
learning_loop_node/tests/trainer/test_trainer_states.py,sha256=
|
|
82
|
-
learning_loop_node/tests/trainer/testing_trainer_logic.py,sha256=
|
|
81
|
+
learning_loop_node/tests/trainer/test_trainer_states.py,sha256=SJqbh6DiLn7_D62DTIxDcBnPV4iPOdJms_aTytw3INc,1218
|
|
82
|
+
learning_loop_node/tests/trainer/testing_trainer_logic.py,sha256=vaz7EbsVRcSMyAfEGpXgNFj1yQc9dYLCYDrjjmYTZ1o,3765
|
|
83
83
|
learning_loop_node/trainer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
|
-
learning_loop_node/trainer/downloader.py,sha256=
|
|
84
|
+
learning_loop_node/trainer/downloader.py,sha256=AIyEKM4XYtd6VgmXrP3VayV9DpJzdURK1Brx81ePNSM,1470
|
|
85
85
|
learning_loop_node/trainer/exceptions.py,sha256=vbuoE6kssLQuA8zd3LiDHmZglP6E2IJJwEi5AZtWXxY,420
|
|
86
|
-
learning_loop_node/trainer/executor.py,sha256
|
|
86
|
+
learning_loop_node/trainer/executor.py,sha256=XQ1TwgOF2pQv7bgzSaljDWG4I1ySjYXL8r9c_p9MX1Q,3947
|
|
87
87
|
learning_loop_node/trainer/io_helpers.py,sha256=ZnAPVqhq8XCHe1NoiOQJ_w0B-estcc8CBQHnb423UDw,7226
|
|
88
88
|
learning_loop_node/trainer/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
|
-
learning_loop_node/trainer/rest/backdoor_controls.py,sha256
|
|
89
|
+
learning_loop_node/trainer/rest/backdoor_controls.py,sha256=ZnK8ypY5r_q0-YZbtaOxhQThzuZvMsQHM5gJGESd_dE,5131
|
|
90
90
|
learning_loop_node/trainer/test_executor.py,sha256=6BVGDN_6f5GEMMEvDLSG1yzMybSvgXaP5uYpSfsVPP0,2224
|
|
91
|
-
learning_loop_node/trainer/trainer_logic.py,sha256=
|
|
92
|
-
learning_loop_node/trainer/trainer_logic_generic.py,sha256=
|
|
93
|
-
learning_loop_node/trainer/trainer_node.py,sha256=
|
|
94
|
-
learning_loop_node-0.
|
|
95
|
-
learning_loop_node-0.
|
|
96
|
-
learning_loop_node-0.
|
|
91
|
+
learning_loop_node/trainer/trainer_logic.py,sha256=SHzoCb_hwI3zC6VCW_7jWi45Ng3etEo6WepFntS3pnA,8773
|
|
92
|
+
learning_loop_node/trainer/trainer_logic_generic.py,sha256=WM0gaTln0wjNKemNpxhCHRQCTEj_TVPgN1oMNXkacSU,26795
|
|
93
|
+
learning_loop_node/trainer/trainer_node.py,sha256=9nk_LH4jmuUzZ5ApOGvut1RAcyULU7DtiIgtjKzIrpU,4494
|
|
94
|
+
learning_loop_node-0.12.0.dist-info/METADATA,sha256=w9nc4bwIZkzvTLOM9SjLxxnGDldz1-hdrbw0pouBsMo,11906
|
|
95
|
+
learning_loop_node-0.12.0.dist-info/WHEEL,sha256=WGfLGfLX43Ei_YORXSnT54hxFygu34kMpcQdmgmEwCQ,88
|
|
96
|
+
learning_loop_node-0.12.0.dist-info/RECORD,,
|
|
File without changes
|