code-loader 1.0.48__py3-none-any.whl → 1.0.49.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_loader/code_inegration_processes_manager.py +83 -0
- code_loader/contract/datasetclasses.py +12 -2
- code_loader/inner_leap_binder/leapbinder.py +26 -7
- code_loader/leaploader.py +26 -3
- {code_loader-1.0.48.dist-info → code_loader-1.0.49.dev2.dist-info}/METADATA +1 -1
- {code_loader-1.0.48.dist-info → code_loader-1.0.49.dev2.dist-info}/RECORD +8 -7
- {code_loader-1.0.48.dist-info → code_loader-1.0.49.dev2.dist-info}/LICENSE +0 -0
- {code_loader-1.0.48.dist-info → code_loader-1.0.49.dev2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
# mypy: ignore-errors
|
2
|
+
import traceback
|
3
|
+
from dataclasses import dataclass
|
4
|
+
|
5
|
+
from typing import List, Tuple, Optional
|
6
|
+
|
7
|
+
from multiprocessing import Process, Queue
|
8
|
+
|
9
|
+
from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
|
10
|
+
from code_loader.leaploader import LeapLoader
|
11
|
+
from code_loader.contract.enums import DataStateEnum
|
12
|
+
from code_loader.metric_calculator_parallelized import MetricCalculatorParallelized
|
13
|
+
from code_loader.samples_generator_parallelized import SamplesGeneratorParallelized
|
14
|
+
|
15
|
+
|
16
|
+
@dataclass
|
17
|
+
class SampleSerializableError:
|
18
|
+
state: DataStateEnum
|
19
|
+
index: int
|
20
|
+
leap_script_trace: str
|
21
|
+
exception_as_str: str
|
22
|
+
|
23
|
+
|
24
|
+
class CodeIntegrationProcessesManager:
|
25
|
+
def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
|
26
|
+
max_samples_in_queue: int = 128) -> None:
|
27
|
+
self.metric_calculator_parallelized = MetricCalculatorParallelized(code_path, code_entry_name)
|
28
|
+
self.samples_generator_parallelized = SamplesGeneratorParallelized(code_path, code_entry_name)
|
29
|
+
|
30
|
+
def _create_and_start_process(self) -> Process:
|
31
|
+
process = self.multiprocessing_context.Process(
|
32
|
+
target=CodeIntegrationProcessesManager._process_func,
|
33
|
+
args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
|
34
|
+
self._ready_processed_results))
|
35
|
+
process.daemon = True
|
36
|
+
process.start()
|
37
|
+
return process
|
38
|
+
|
39
|
+
def _run_and_warm_first_process(self):
|
40
|
+
process = self._create_and_start_process()
|
41
|
+
self.processes = [process]
|
42
|
+
|
43
|
+
# needed in order to make sure the preprocess func runs once in nonparallel
|
44
|
+
self._start_process_inputs([(DataStateEnum.training, 0)])
|
45
|
+
self._get_next_ready_processed_result()
|
46
|
+
|
47
|
+
def _operation_decider(self):
|
48
|
+
if self.metric_calculator_parallelized._ready_processed_results.empty() and not \
|
49
|
+
self.metric_calculator_parallelized._inputs_waiting_to_be_process.empty():
|
50
|
+
return 'metric'
|
51
|
+
|
52
|
+
if self.samples_generator_parallelized._ready_processed_results.empty() and not \
|
53
|
+
self.samples_generator_parallelized._inputs_waiting_to_be_process.empty():
|
54
|
+
return 'dataset'
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
@staticmethod
|
60
|
+
def _process_func(code_path: str, code_entry_name: str,
|
61
|
+
samples_to_process: Queue, ready_samples: Queue,
|
62
|
+
metrics_to_process: Queue, ready_metrics: Queue) -> None:
|
63
|
+
import os
|
64
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
65
|
+
|
66
|
+
leap_loader = LeapLoader(code_path, code_entry_name)
|
67
|
+
while True:
|
68
|
+
|
69
|
+
# decide on sample or metric to process
|
70
|
+
state, idx = samples_to_process.get(block=True)
|
71
|
+
leap_loader._preprocess_result()
|
72
|
+
try:
|
73
|
+
sample = leap_loader.get_sample(state, idx)
|
74
|
+
except Exception as e:
|
75
|
+
leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
|
76
|
+
ready_samples.put(SampleSerializableError(state, idx, leap_script_trace, str(e)))
|
77
|
+
continue
|
78
|
+
|
79
|
+
ready_samples.put(sample)
|
80
|
+
|
81
|
+
def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
|
82
|
+
return self.start_process_inputs(sample_identities)
|
83
|
+
|
@@ -32,8 +32,18 @@ class PreprocessResponse:
|
|
32
32
|
}
|
33
33
|
response = PreprocessResponse(length=len(preprocessed_data), data=preprocessed_data)
|
34
34
|
"""
|
35
|
-
length: int
|
36
|
-
data: Any
|
35
|
+
length: Optional[int] = None # Deprecated. Please use sample_ids instead
|
36
|
+
data: Any = None
|
37
|
+
sample_ids: Optional[Union[List[str], List[int]]] = None
|
38
|
+
state: Optional[DataStateType] = None
|
39
|
+
|
40
|
+
def __post_init__(self):
|
41
|
+
if self.length is not None and self.sample_ids is None:
|
42
|
+
self.sample_ids = [i for i in range(self.length)]
|
43
|
+
elif self.length is None and self.sample_ids is not None:
|
44
|
+
self.length = len(self.sample_ids)
|
45
|
+
else:
|
46
|
+
raise Exception("length is deprecated. Please use sample_ids instead.")
|
37
47
|
|
38
48
|
|
39
49
|
SectionCallableInterface = Callable[[int, PreprocessResponse], npt.NDArray[np.float32]]
|
@@ -389,17 +389,36 @@ class LeapBinder:
|
|
389
389
|
if preprocess is None:
|
390
390
|
raise Exception("Please make sure you call the leap_binder.set_preprocess method")
|
391
391
|
preprocess_results = preprocess.function()
|
392
|
-
preprocess_result_dict = {
|
393
|
-
|
394
|
-
|
395
|
-
|
392
|
+
preprocess_result_dict = {}
|
393
|
+
for i, preprocess_result in enumerate(preprocess_results):
|
394
|
+
if preprocess_result.state is None:
|
395
|
+
state_enum = DataStateEnum(i)
|
396
|
+
preprocess_result.state = DataStateType(state_enum.name)
|
397
|
+
else:
|
398
|
+
state_enum = DataStateEnum[preprocess_result.state.name]
|
396
399
|
|
397
|
-
|
398
|
-
|
399
|
-
preprocess_result_dict[
|
400
|
+
if state_enum in preprocess_result_dict:
|
401
|
+
raise Exception(f"Duplicate state {state_enum.name} in preprocess results")
|
402
|
+
preprocess_result_dict[state_enum] = preprocess_result
|
403
|
+
|
404
|
+
if DataStateEnum.unlabeled not in preprocess_result_dict:
|
405
|
+
preprocess_unlabeled_result = self.get_preprocess_unlabeled_result()
|
406
|
+
if preprocess_unlabeled_result is not None:
|
407
|
+
preprocess_result_dict[DataStateEnum.unlabeled] = preprocess_unlabeled_result
|
408
|
+
|
409
|
+
if DataStateEnum.training not in preprocess_result_dict:
|
410
|
+
raise Exception("Training data is required")
|
411
|
+
if DataStateEnum.validation not in preprocess_result_dict:
|
412
|
+
raise Exception("Validation data is required")
|
400
413
|
|
401
414
|
return preprocess_result_dict
|
402
415
|
|
416
|
+
def get_preprocess_unlabeled_result(self) -> Optional[PreprocessResponse]:
|
417
|
+
unlabeled_preprocess = self.setup_container.unlabeled_data_preprocess
|
418
|
+
if unlabeled_preprocess is not None:
|
419
|
+
return unlabeled_preprocess.function()
|
420
|
+
return None
|
421
|
+
|
403
422
|
def _get_all_dataset_base_handlers(self) -> List[Union[DatasetBaseHandler, MetadataHandler]]:
|
404
423
|
all_dataset_base_handlers: List[Union[DatasetBaseHandler, MetadataHandler]] = []
|
405
424
|
all_dataset_base_handlers.extend(self.setup_container.inputs)
|
code_loader/leaploader.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
import importlib.util
|
3
3
|
import io
|
4
4
|
import sys
|
5
|
+
import time
|
5
6
|
from contextlib import redirect_stdout
|
6
7
|
from functools import lru_cache
|
7
8
|
from pathlib import Path
|
@@ -27,6 +28,8 @@ class LeapLoader:
|
|
27
28
|
self.code_entry_name = code_entry_name
|
28
29
|
self.code_path = code_path
|
29
30
|
|
31
|
+
self._preprocess_result_cached = None
|
32
|
+
|
30
33
|
@lru_cache()
|
31
34
|
def exec_script(self) -> None:
|
32
35
|
try:
|
@@ -105,6 +108,10 @@ class LeapLoader:
|
|
105
108
|
|
106
109
|
def get_sample(self, state: DataStateEnum, idx: int) -> DatasetSample:
|
107
110
|
self.exec_script()
|
111
|
+
preprocess_result = self._preprocess_result()
|
112
|
+
if state == DataStateEnum.unlabeled and idx not in preprocess_result[state].sample_ids:
|
113
|
+
self._preprocess_result(update_unlabeled_preprocess=True)
|
114
|
+
|
108
115
|
sample = DatasetSample(inputs=self._get_inputs(state, idx),
|
109
116
|
gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, idx),
|
110
117
|
metadata=self._get_metadata(state, idx),
|
@@ -279,10 +286,26 @@ class LeapLoader:
|
|
279
286
|
]
|
280
287
|
return ModelSetup(custom_layer_instances)
|
281
288
|
|
282
|
-
|
283
|
-
def _preprocess_result(self) -> Dict[DataStateEnum, PreprocessResponse]:
|
289
|
+
def _preprocess_result(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, PreprocessResponse]:
|
284
290
|
self.exec_script()
|
285
|
-
|
291
|
+
|
292
|
+
if self._preprocess_result_cached is None:
|
293
|
+
self._preprocess_result_cached = global_leap_binder.get_preprocess_result()
|
294
|
+
|
295
|
+
if update_unlabeled_preprocess:
|
296
|
+
self._preprocess_result_cached[
|
297
|
+
DataStateEnum.unlabeled] = global_leap_binder.get_preprocess_unlabeled_result()
|
298
|
+
|
299
|
+
return self._preprocess_result_cached
|
300
|
+
|
301
|
+
def get_preprocess_sample_ids(self) -> Dict[DataStateEnum, Union[List[int], List[str]]]:
|
302
|
+
preprocess_result = self._preprocess_result()
|
303
|
+
sample_ids = {}
|
304
|
+
for state, preprocess_response in preprocess_result.items():
|
305
|
+
sample_ids[state] = preprocess_response.sample_ids
|
306
|
+
|
307
|
+
return sample_ids
|
308
|
+
|
286
309
|
|
287
310
|
def _get_dataset_handlers(self, handlers: Iterable[DatasetBaseHandler],
|
288
311
|
state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
|
@@ -1,7 +1,8 @@
|
|
1
1
|
LICENSE,sha256=qIwWjdspQeSMTtnFZBC8MuT-95L02FPvzRUdWFxrwJY,1067
|
2
2
|
code_loader/__init__.py,sha256=6MMWr0ObOU7hkqQKgOqp4Zp3I28L7joGC9iCbQYtAJg,241
|
3
|
+
code_loader/code_inegration_processes_manager.py,sha256=XslWOPeNQk4RAFJ_f3tP5Oe3EgcIR7BE7Y8r9Ty73-o,3261
|
3
4
|
code_loader/contract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
code_loader/contract/datasetclasses.py,sha256=
|
5
|
+
code_loader/contract/datasetclasses.py,sha256=ooGKDBix0ZEmTqjgpYLedbu09NhiHOdhr3oJb-KRZG4,6237
|
5
6
|
code_loader/contract/enums.py,sha256=6Lo7p5CUog68Fd31bCozIuOgIp_IhSiPqWWph2k3OGU,1602
|
6
7
|
code_loader/contract/exceptions.py,sha256=jWqu5i7t-0IG0jGRsKF4DjJdrsdpJjIYpUkN1F4RiyQ,51
|
7
8
|
code_loader/contract/responsedataclasses.py,sha256=w7xVOv2S8Hyb5lqyomMGiKAWXDTSOG-FX1YW39bXD3A,3969
|
@@ -17,12 +18,12 @@ code_loader/experiment_api/types.py,sha256=MY8xFARHwdVA7p4dxyhD60ShmttgTvb4qdp1o
|
|
17
18
|
code_loader/experiment_api/utils.py,sha256=XZHtxge12TS4H4-8PjV3sKuhp8Ud6ojAiIzTZJEqBqc,3304
|
18
19
|
code_loader/experiment_api/workingspace_config_utils.py,sha256=DLzXQCg4dgTV_YgaSbeTVzq-2ja_SQw4zi7LXwKL9cY,990
|
19
20
|
code_loader/inner_leap_binder/__init__.py,sha256=koOlJyMNYzGbEsoIbXathSmQ-L38N_pEXH_HvL7beXU,99
|
20
|
-
code_loader/inner_leap_binder/leapbinder.py,sha256=
|
21
|
-
code_loader/leaploader.py,sha256=
|
21
|
+
code_loader/inner_leap_binder/leapbinder.py,sha256=m-9eNKYqK71irfEd4T1bNyM9t8mh_utdUDPYvbK34UY,24902
|
22
|
+
code_loader/leaploader.py,sha256=33NcO2Rdc9UnAY3gBMaoEIBOL53PPaYyMUVjR1jvqjQ,18461
|
22
23
|
code_loader/utils.py,sha256=TZAoUbA2pE8eK3Le3s5Xr4eRaYdeDMQtxotx6rh-5oE,2185
|
23
24
|
code_loader/visualizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
24
25
|
code_loader/visualizers/default_visualizers.py,sha256=VoqO9FN84yXyMjRjHjUTOt2GdTkJRMbHbXJ1cJkREkk,2230
|
25
|
-
code_loader-1.0.
|
26
|
-
code_loader-1.0.
|
27
|
-
code_loader-1.0.
|
28
|
-
code_loader-1.0.
|
26
|
+
code_loader-1.0.49.dev2.dist-info/LICENSE,sha256=qIwWjdspQeSMTtnFZBC8MuT-95L02FPvzRUdWFxrwJY,1067
|
27
|
+
code_loader-1.0.49.dev2.dist-info/METADATA,sha256=EEwk68bEgmlwBEKXlkr9LugpanKMRttVXYw1nvsGiuo,893
|
28
|
+
code_loader-1.0.49.dev2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
29
|
+
code_loader-1.0.49.dev2.dist-info/RECORD,,
|
File without changes
|
File without changes
|