code-loader 1.0.50__tar.gz → 1.0.52__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {code_loader-1.0.50 → code_loader-1.0.52}/PKG-INFO +2 -1
  2. code_loader-1.0.52/code_loader/code_inegration_processes_manager.py +83 -0
  3. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/contract/datasetclasses.py +32 -14
  4. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/inner_leap_binder/leapbinder.py +35 -8
  5. code_loader-1.0.52/code_loader/inner_leap_binder/leapbinder_decorators.py +380 -0
  6. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/leaploader.py +57 -17
  7. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/utils.py +1 -1
  8. {code_loader-1.0.50 → code_loader-1.0.52}/pyproject.toml +2 -1
  9. {code_loader-1.0.50 → code_loader-1.0.52}/LICENSE +0 -0
  10. {code_loader-1.0.50 → code_loader-1.0.52}/README.md +0 -0
  11. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/__init__.py +0 -0
  12. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/contract/__init__.py +0 -0
  13. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/contract/enums.py +0 -0
  14. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/contract/exceptions.py +0 -0
  15. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/contract/responsedataclasses.py +0 -0
  16. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/contract/visualizer_classes.py +0 -0
  17. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/__init__.py +0 -0
  18. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/api.py +0 -0
  19. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/cli_config_utils.py +0 -0
  20. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/client.py +0 -0
  21. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/epoch.py +0 -0
  22. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/experiment.py +0 -0
  23. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/experiment_context.py +0 -0
  24. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/types.py +0 -0
  25. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/utils.py +0 -0
  26. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/experiment_api/workingspace_config_utils.py +0 -0
  27. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/inner_leap_binder/__init__.py +0 -0
  28. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/visualizers/__init__.py +0 -0
  29. {code_loader-1.0.50 → code_loader-1.0.52}/code_loader/visualizers/default_visualizers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: code-loader
3
- Version: 1.0.50
3
+ Version: 1.0.52
4
4
  Summary:
5
5
  Home-page: https://github.com/tensorleap/code-loader
6
6
  License: MIT
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.8
13
13
  Classifier: Programming Language :: Python :: 3.9
14
14
  Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
+ Requires-Dist: matplotlib (>=3.3,<3.4)
16
17
  Requires-Dist: numpy (>=1.22.3,<2.0.0)
17
18
  Requires-Dist: psutil (>=5.9.5,<6.0.0)
18
19
  Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
@@ -0,0 +1,83 @@
1
+ # mypy: ignore-errors
2
+ import traceback
3
+ from dataclasses import dataclass
4
+
5
+ from typing import List, Tuple, Optional
6
+
7
+ from multiprocessing import Process, Queue
8
+
9
+ from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
10
+ from code_loader.leaploader import LeapLoader
11
+ from code_loader.contract.enums import DataStateEnum
12
+ from code_loader.metric_calculator_parallelized import MetricCalculatorParallelized
13
+ from code_loader.samples_generator_parallelized import SamplesGeneratorParallelized
14
+
15
+
16
+ @dataclass
17
+ class SampleSerializableError:
18
+ state: DataStateEnum
19
+ index: int
20
+ leap_script_trace: str
21
+ exception_as_str: str
22
+
23
+
24
+ class CodeIntegrationProcessesManager:
25
+ def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
26
+ max_samples_in_queue: int = 128) -> None:
27
+ self.metric_calculator_parallelized = MetricCalculatorParallelized(code_path, code_entry_name)
28
+ self.samples_generator_parallelized = SamplesGeneratorParallelized(code_path, code_entry_name)
29
+
30
+ def _create_and_start_process(self) -> Process:
31
+ process = self.multiprocessing_context.Process(
32
+ target=CodeIntegrationProcessesManager._process_func,
33
+ args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
34
+ self._ready_processed_results))
35
+ process.daemon = True
36
+ process.start()
37
+ return process
38
+
39
+ def _run_and_warm_first_process(self):
40
+ process = self._create_and_start_process()
41
+ self.processes = [process]
42
+
43
+ # needed in order to make sure the preprocess func runs once in nonparallel
44
+ self._start_process_inputs([(DataStateEnum.training, 0)])
45
+ self._get_next_ready_processed_result()
46
+
47
+ def _operation_decider(self):
48
+ if self.metric_calculator_parallelized._ready_processed_results.empty() and not \
49
+ self.metric_calculator_parallelized._inputs_waiting_to_be_process.empty():
50
+ return 'metric'
51
+
52
+ if self.samples_generator_parallelized._ready_processed_results.empty() and not \
53
+ self.samples_generator_parallelized._inputs_waiting_to_be_process.empty():
54
+ return 'dataset'
55
+
56
+
57
+
58
+
59
+ @staticmethod
60
+ def _process_func(code_path: str, code_entry_name: str,
61
+ samples_to_process: Queue, ready_samples: Queue,
62
+ metrics_to_process: Queue, ready_metrics: Queue) -> None:
63
+ import os
64
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
65
+
66
+ leap_loader = LeapLoader(code_path, code_entry_name)
67
+ while True:
68
+
69
+ # decide on sample or metric to process
70
+ state, idx = samples_to_process.get(block=True)
71
+ leap_loader._preprocess_result()
72
+ try:
73
+ sample = leap_loader.get_sample(state, idx)
74
+ except Exception as e:
75
+ leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
76
+ ready_samples.put(SampleSerializableError(state, idx, leap_script_trace, str(e)))
77
+ continue
78
+
79
+ ready_samples.put(sample)
80
+
81
+ def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
82
+ return self.start_process_inputs(sample_identities)
83
+
@@ -32,21 +32,39 @@ class PreprocessResponse:
32
32
  }
33
33
  response = PreprocessResponse(length=len(preprocessed_data), data=preprocessed_data)
34
34
  """
35
- length: int
36
- data: Any
37
-
38
-
39
- SectionCallableInterface = Callable[[int, PreprocessResponse], npt.NDArray[np.float32]]
35
+ length: Optional[int] = None # Deprecated. Please use sample_ids instead
36
+ data: Any = None
37
+ sample_ids: Optional[Union[List[str], List[int]]] = None
38
+ state: Optional[DataStateType] = None
39
+ sample_id_type: Optional[Union[Type[str], Type[int]]] = None
40
+
41
+ def __post_init__(self) -> None:
42
+ if self.length is not None and self.sample_ids is None:
43
+ self.sample_ids = [i for i in range(self.length)]
44
+ self.sample_id_type = int
45
+ elif self.length is None and self.sample_ids is not None:
46
+ self.length = len(self.sample_ids)
47
+ if self.sample_id_type is None:
48
+ self.sample_id_type = str
49
+ else:
50
+ raise Exception("length is deprecated.")
51
+
52
+ def __len__(self) -> int:
53
+ assert self.sample_ids is not None
54
+ return len(self.sample_ids)
55
+
56
+
57
+ SectionCallableInterface = Callable[[Union[int, str], PreprocessResponse], npt.NDArray[np.float32]]
40
58
 
41
59
  MetadataSectionCallableInterface = Union[
42
- Callable[[int, PreprocessResponse], int],
43
- Callable[[int, PreprocessResponse], Dict[str, int]],
44
- Callable[[int, PreprocessResponse], str],
45
- Callable[[int, PreprocessResponse], Dict[str, str]],
46
- Callable[[int, PreprocessResponse], bool],
47
- Callable[[int, PreprocessResponse], Dict[str, bool]],
48
- Callable[[int, PreprocessResponse], float],
49
- Callable[[int, PreprocessResponse], Dict[str, float]]
60
+ Callable[[Union[int, str], PreprocessResponse], int],
61
+ Callable[[Union[int, str], PreprocessResponse], Dict[str, int]],
62
+ Callable[[Union[int, str], PreprocessResponse], str],
63
+ Callable[[Union[int, str], PreprocessResponse], Dict[str, str]],
64
+ Callable[[Union[int, str], PreprocessResponse], bool],
65
+ Callable[[Union[int, str], PreprocessResponse], Dict[str, bool]],
66
+ Callable[[Union[int, str], PreprocessResponse], float],
67
+ Callable[[Union[int, str], PreprocessResponse], Dict[str, float]]
50
68
  ]
51
69
 
52
70
 
@@ -181,5 +199,5 @@ class DatasetSample:
181
199
  inputs: Dict[str, npt.NDArray[np.float32]]
182
200
  gt: Optional[Dict[str, npt.NDArray[np.float32]]]
183
201
  metadata: Dict[str, Union[str, int, bool, float]]
184
- index: int
202
+ index: Union[int, str]
185
203
  state: DataStateEnum
@@ -37,6 +37,8 @@ class LeapBinder:
37
37
  self._encoder_names: List[str] = list()
38
38
  self._extend_with_default_visualizers()
39
39
 
40
+ self.batch_size_to_validate: Optional[int] = None
41
+
40
42
  def _extend_with_default_visualizers(self) -> None:
41
43
  self.set_visualizer(function=default_image_visualizer, name=DefaultVisualizer.Image.value,
42
44
  visualizer_type=LeapDataType.Image)
@@ -389,17 +391,36 @@ class LeapBinder:
389
391
  if preprocess is None:
390
392
  raise Exception("Please make sure you call the leap_binder.set_preprocess method")
391
393
  preprocess_results = preprocess.function()
392
- preprocess_result_dict = {
393
- DataStateEnum(i): preprocess_result
394
- for i, preprocess_result in enumerate(preprocess_results)
395
- }
394
+ preprocess_result_dict = {}
395
+ for i, preprocess_result in enumerate(preprocess_results):
396
+ if preprocess_result.state is None:
397
+ state_enum = DataStateEnum(i)
398
+ preprocess_result.state = DataStateType(state_enum.name)
399
+ else:
400
+ state_enum = DataStateEnum[preprocess_result.state.name]
396
401
 
397
- unlabeled_preprocess = self.setup_container.unlabeled_data_preprocess
398
- if unlabeled_preprocess is not None:
399
- preprocess_result_dict[DataStateEnum.unlabeled] = unlabeled_preprocess.function()
402
+ if state_enum in preprocess_result_dict:
403
+ raise Exception(f"Duplicate state {state_enum.name} in preprocess results")
404
+ preprocess_result_dict[state_enum] = preprocess_result
405
+
406
+ if DataStateEnum.unlabeled not in preprocess_result_dict:
407
+ preprocess_unlabeled_result = self.get_preprocess_unlabeled_result()
408
+ if preprocess_unlabeled_result is not None:
409
+ preprocess_result_dict[DataStateEnum.unlabeled] = preprocess_unlabeled_result
410
+
411
+ if DataStateEnum.training not in preprocess_result_dict:
412
+ raise Exception("Training data is required")
413
+ if DataStateEnum.validation not in preprocess_result_dict:
414
+ raise Exception("Validation data is required")
400
415
 
401
416
  return preprocess_result_dict
402
417
 
418
+ def get_preprocess_unlabeled_result(self) -> Optional[PreprocessResponse]:
419
+ unlabeled_preprocess = self.setup_container.unlabeled_data_preprocess
420
+ if unlabeled_preprocess is not None:
421
+ return unlabeled_preprocess.function()
422
+ return None
423
+
403
424
  def _get_all_dataset_base_handlers(self) -> List[Union[DatasetBaseHandler, MetadataHandler]]:
404
425
  all_dataset_base_handlers: List[Union[DatasetBaseHandler, MetadataHandler]] = []
405
426
  all_dataset_base_handlers.extend(self.setup_container.inputs)
@@ -411,7 +432,8 @@ class LeapBinder:
411
432
  def check_handler(
412
433
  preprocess_response: PreprocessResponse, test_result: List[DatasetTestResultPayload],
413
434
  dataset_base_handler: Union[DatasetBaseHandler, MetadataHandler]) -> List[DatasetTestResultPayload]:
414
- raw_result = dataset_base_handler.function(0, preprocess_response)
435
+ assert preprocess_response.sample_ids is not None
436
+ raw_result = dataset_base_handler.function(preprocess_response.sample_ids[0], preprocess_response)
415
437
  handler_type = 'metadata' if isinstance(dataset_base_handler, MetadataHandler) else None
416
438
  if isinstance(dataset_base_handler, MetadataHandler) and isinstance(raw_result, dict):
417
439
  metadata_test_result_payloads = [
@@ -452,4 +474,9 @@ class LeapBinder:
452
474
  self.check_handlers(preprocess_result)
453
475
  print("Successful!")
454
476
 
477
+ def set_batch_size_to_validate(self, batch_size: int) -> None:
478
+ self.batch_size_to_validate = batch_size
479
+
480
+
481
+
455
482
 
@@ -0,0 +1,380 @@
1
+ # mypy: ignore-errors
2
+
3
+ from typing import Optional, Union, Callable, List
4
+
5
+ import numpy as np
6
+ import numpy.typing as npt
7
+
8
+ from code_loader.contract.datasetclasses import CustomCallableInterfaceMultiArgs, \
9
+ CustomMultipleReturnCallableInterfaceMultiArgs, ConfusionMatrixCallableInterfaceMultiArgs, CustomCallableInterface, \
10
+ VisualizerCallableInterface, MetadataSectionCallableInterface, PreprocessResponse, SectionCallableInterface, \
11
+ ConfusionMatrixElement
12
+ from code_loader.contract.enums import MetricDirection, LeapDataType
13
+ from code_loader import leap_binder
14
+ from code_loader.contract.visualizer_classes import LeapImage, LeapImageMask, LeapTextMask, LeapText, LeapGraph, \
15
+ LeapHorizontalBar, LeapImageWithBBox, LeapImageWithHeatmap
16
+
17
+
18
+ def tensorleap_custom_metric(name: str, direction: Optional[MetricDirection] = MetricDirection.Downward):
19
+ def decorating_function(
20
+ user_function: Union[CustomCallableInterfaceMultiArgs,
21
+ CustomMultipleReturnCallableInterfaceMultiArgs,
22
+ ConfusionMatrixCallableInterfaceMultiArgs]
23
+ ):
24
+ for metric_handler in leap_binder.setup_container.metrics:
25
+ if metric_handler.name == name:
26
+ raise Exception(f'Metric with name {name} already exists. '
27
+ f'Please choose another')
28
+
29
+ leap_binder.add_custom_metric(user_function, name, direction)
30
+
31
+ def _validate_input_args(*args, **kwargs) -> None:
32
+ for i, arg in enumerate(args):
33
+ assert isinstance(arg, np.ndarray), (f'tensorleap_custom_metric validation failed: '
34
+ f'Argument #{i} should be a numpy array. Got {type(arg)}.')
35
+ if leap_binder.batch_size_to_validate:
36
+ assert arg.shape[0] == leap_binder.batch_size_to_validate, \
37
+ (f'tensorleap_custom_metric validation failed: Argument #{i} '
38
+ f'first dim should be as the batch size. Got {arg.shape[0]} '
39
+ f'instead of {leap_binder.batch_size_to_validate}')
40
+
41
+ for _arg_name, arg in kwargs.items():
42
+ assert isinstance(arg, np.ndarray), (f'tensorleap_custom_metric validation failed: '
43
+ f'Argument {_arg_name} should be a numpy array. Got {type(arg)}.')
44
+ if leap_binder.batch_size_to_validate:
45
+ assert arg.shape[0] == leap_binder.batch_size_to_validate, \
46
+ (f'tensorleap_custom_metric validation failed: Argument {_arg_name} '
47
+ f'first dim should be as the batch size. Got {arg.shape[0]} '
48
+ f'instead of {leap_binder.batch_size_to_validate}')
49
+
50
+ def _validate_result(result) -> None:
51
+ supported_types_message = (f'tensorleap_custom_metric validation failed: '
52
+ f'Metric has returned unsupported type. Supported types are List[float], '
53
+ f'List[List[ConfusionMatrixElement]], NDArray[np.float32]. ')
54
+
55
+ if isinstance(result, list):
56
+ if isinstance(result[0], list):
57
+ assert isinstance(result[0][0], ConfusionMatrixElement), \
58
+ f'{supported_types_message}Got List[List[{type(result[0][0])}]].'
59
+ else:
60
+ assert isinstance(result[0], float), f'{supported_types_message}Got List[{type(result[0])}].'
61
+
62
+ else:
63
+ assert isinstance(result, np.ndarray), f'{supported_types_message}Got {type(result)}.'
64
+ assert len(result.shape) == 1, (f'tensorleap_custom_metric validation failed: '
65
+ f'The return shape should be 1D. Got {len(result.shape)}D.')
66
+ if leap_binder.batch_size_to_validate:
67
+ assert len(result) == leap_binder.batch_size_to_validate, \
68
+ f'tensorleap_custom_metrix validation failed: The return len should be as the batch size.'
69
+
70
+ def inner(*args, **kwargs):
71
+ _validate_input_args(*args, **kwargs)
72
+ result = user_function(*args, **kwargs)
73
+ _validate_result(result)
74
+ return result
75
+
76
+ return inner
77
+
78
+ return decorating_function
79
+
80
+
81
+ def tensorleap_custom_visualizer(name: str, visualizer_type: LeapDataType,
82
+ heatmap_function: Optional[Callable[..., npt.NDArray[np.float32]]] = None):
83
+ def decorating_function(user_function: VisualizerCallableInterface):
84
+ for viz_handler in leap_binder.setup_container.visualizers:
85
+ if viz_handler.name == name:
86
+ raise Exception(f'Visualizer with name {name} already exists. '
87
+ f'Please choose another')
88
+
89
+ leap_binder.set_visualizer(user_function, name, visualizer_type, heatmap_function)
90
+
91
+ def _validate_input_args(*args, **kwargs):
92
+ for i, arg in enumerate(args):
93
+ assert isinstance(arg, np.ndarray), (f'tensorleap_custom_visualizer validation failed: '
94
+ f'Argument #{i} should be a numpy array. Got {type(arg)}.')
95
+ if leap_binder.batch_size_to_validate:
96
+ assert arg.shape[0] == leap_binder.batch_size_to_validate, \
97
+ (f'tensorleap_custom_visualizer validation failed: Argument #{i} '
98
+ f'first dim should be 1. The visualizers will always run with batch size 1. Got {arg.shape[0]}')
99
+
100
+ for _arg_name, arg in kwargs.items():
101
+ assert isinstance(arg, np.ndarray), (f'tensorleap_custom_visualizer validation failed: '
102
+ f'Argument {_arg_name} should be a numpy array. Got {type(arg)}.')
103
+ if leap_binder.batch_size_to_validate:
104
+ assert arg.shape[0] == leap_binder.batch_size_to_validate, \
105
+ (f'tensorleap_custom_visualizer validation failed: Argument {_arg_name} '
106
+ f'first dim should be 1. The visualizers will always run with batch size 1. Got {arg.shape[0]}')
107
+
108
+ def _validate_result(result):
109
+ result_type_map = {
110
+ LeapDataType.Image: LeapImage,
111
+ LeapDataType.ImageMask: LeapImageMask,
112
+ LeapDataType.TextMask: LeapTextMask,
113
+ LeapDataType.Text: LeapText,
114
+ LeapDataType.Graph: LeapGraph,
115
+ LeapDataType.HorizontalBar: LeapHorizontalBar,
116
+ LeapDataType.ImageWithBBox: LeapImageWithBBox,
117
+ LeapDataType.ImageWithHeatmap: LeapImageWithHeatmap
118
+ }
119
+ assert isinstance(result, result_type_map[visualizer_type]), \
120
+ (f'tensorleap_custom_visualizer validation failed: '
121
+ f'The return type should be {result_type_map[visualizer_type]}. Got {type(result)}.')
122
+
123
+ def inner(*args, **kwargs):
124
+ _validate_input_args(*args, **kwargs)
125
+ result = user_function(*args, **kwargs)
126
+ _validate_result(result)
127
+ return result
128
+
129
+ return inner
130
+
131
+ return decorating_function
132
+
133
+
134
+ def tensorleap_metadata(name: str):
135
+ def decorating_function(user_function: MetadataSectionCallableInterface):
136
+ for metadata_handler in leap_binder.setup_container.metadata:
137
+ if metadata_handler.name == name:
138
+ raise Exception(f'Metadata with name {name} already exists. '
139
+ f'Please choose another')
140
+
141
+ leap_binder.set_metadata(user_function, name)
142
+
143
+ def _validate_input_args(sample_id: Union[int, str], preprocess_response: PreprocessResponse):
144
+ assert isinstance(sample_id, (int, str)), \
145
+ (f'tensorleap_metadata validation failed: '
146
+ f'Argument sample_id should be either int or str. Got {type(sample_id)}.')
147
+ assert isinstance(preprocess_response, PreprocessResponse), \
148
+ (f'tensorleap_metadata validation failed: '
149
+ f'Argument preprocess_response should be a PreprocessResponse. Got {type(preprocess_response)}.')
150
+ assert type(sample_id) == preprocess_response.sample_id_type, \
151
+ (f'tensorleap_metadata validation failed: '
152
+ f'Argument sample_id should be as the same type as defined in the preprocess response '
153
+ f'{preprocess_response.sample_id_type}. Got {type(sample_id)}.')
154
+
155
+ def _validate_result(result):
156
+ supported_result_types = (int, str, bool, float, dict, np.floating,
157
+ np.bool_, np.unsignedinteger, np.signedinteger, np.integer)
158
+ assert isinstance(result, supported_result_types), \
159
+ (f'tensorleap_metadata validation failed: '
160
+ f'Unsupported return type. Got {type(result)}. should be any of {str(supported_result_types)}')
161
+ if isinstance(result, dict):
162
+ for key, value in result.items():
163
+ assert isinstance(key, str), \
164
+ (f'tensorleap_metadata validation failed: '
165
+ f'Keys in the return dict should be of type str. Got {type(key)}.')
166
+ assert isinstance(value, supported_result_types), \
167
+ (f'tensorleap_metadata validation failed: '
168
+ f'Values in the return dict should be of type {str(supported_result_types)}. Got {type(value)}.')
169
+
170
+ def inner(sample_id, preprocess_response):
171
+ _validate_input_args(sample_id, preprocess_response)
172
+ result = user_function(sample_id, preprocess_response)
173
+ _validate_result(result)
174
+ return result
175
+
176
+ return inner
177
+
178
+ return decorating_function
179
+
180
+
181
+ def tensorleap_preprocess():
182
+ def decorating_function(user_function: Callable[[], List[PreprocessResponse]]):
183
+ leap_binder.set_preprocess(user_function)
184
+
185
+ def _validate_input_args(*args, **kwargs):
186
+ assert len(args) == 0 and len(kwargs) == 0, \
187
+ (f'tensorleap_preprocess validation failed: '
188
+ f'The function should not take any arguments. Got {args} and {kwargs}.')
189
+
190
+ def _validate_result(result):
191
+ assert isinstance(result, list), \
192
+ (f'tensorleap_preprocess validation failed: '
193
+ f'The return type should be a list. Got {type(result)}.')
194
+ for i, response in enumerate(result):
195
+ assert isinstance(response, PreprocessResponse), \
196
+ (f'tensorleap_preprocess validation failed: '
197
+ f'Element #{i} in the return list should be a PreprocessResponse. Got {type(response)}.')
198
+ assert len(set(result)) == len(result), \
199
+ (f'tensorleap_preprocess validation failed: '
200
+ f'The return list should not contain duplicate PreprocessResponse objects.')
201
+
202
+ def inner(*args, **kwargs):
203
+ _validate_input_args(*args, **kwargs)
204
+ result = user_function()
205
+ _validate_result(result)
206
+ return result
207
+
208
+ return inner
209
+
210
+ return decorating_function
211
+
212
+
213
+ def tensorleap_unlabeled_preprocess():
214
+ def decorating_function(user_function: Callable[[], PreprocessResponse]):
215
+ leap_binder.set_unlabeled_data_preprocess(user_function)
216
+
217
+ def _validate_input_args(*args, **kwargs):
218
+ assert len(args) == 0 and len(kwargs) == 0, \
219
+ (f'tensorleap_unlabeled_preprocess validation failed: '
220
+ f'The function should not take any arguments. Got {args} and {kwargs}.')
221
+
222
+ def _validate_result(result):
223
+ assert isinstance(result, PreprocessResponse), \
224
+ (f'tensorleap_unlabeled_preprocess validation failed: '
225
+ f'The return type should be a PreprocessResponse. Got {type(result)}.')
226
+
227
+ def inner(*args, **kwargs):
228
+ _validate_input_args(*args, **kwargs)
229
+ result = user_function()
230
+ _validate_result(result)
231
+ return result
232
+
233
+ return inner
234
+
235
+ return decorating_function
236
+
237
+
238
+ def tensorleap_input_encoder(name: str):
239
+ def decorating_function(user_function: SectionCallableInterface):
240
+ for input_handler in leap_binder.setup_container.inputs:
241
+ if input_handler.name == name:
242
+ raise Exception(f'Input with name {name} already exists. '
243
+ f'Please choose another')
244
+
245
+ leap_binder.set_input(user_function, name)
246
+
247
+ def _validate_input_args(sample_id: Union[int, str], preprocess_response: PreprocessResponse):
248
+ assert isinstance(sample_id, (int, str)), \
249
+ (f'tensorleap_input_encoder validation failed: '
250
+ f'Argument sample_id should be either int or str. Got {type(sample_id)}.')
251
+ assert isinstance(preprocess_response, PreprocessResponse), \
252
+ (f'tensorleap_input_encoder validation failed: '
253
+ f'Argument preprocess_response should be a PreprocessResponse. Got {type(preprocess_response)}.')
254
+ assert type(sample_id) == preprocess_response.sample_id_type, \
255
+ (f'tensorleap_input_encoder validation failed: '
256
+ f'Argument sample_id should be as the same type as defined in the preprocess response '
257
+ f'{preprocess_response.sample_id_type}. Got {type(sample_id)}.')
258
+
259
+ def _validate_result(result):
260
+ assert isinstance(result, np.ndarray), \
261
+ (f'tensorleap_input_encoder validation failed: '
262
+ f'Unsupported return type. Should be a numpy array. Got {type(result)}.')
263
+ assert result.dtype == np.float32, \
264
+ (f'tensorleap_input_encoder validation failed: '
265
+ f'The return type should be a numpy array of type float32. Got {result.dtype}.')
266
+
267
+ def inner(sample_id, preprocess_response):
268
+ _validate_input_args(sample_id, preprocess_response)
269
+ result = user_function(sample_id, preprocess_response)
270
+ _validate_result(result)
271
+ return result
272
+
273
+ return inner
274
+
275
+ return decorating_function
276
+
277
+
278
+ def tensorleap_gt_encoder(name: str):
279
+ def decorating_function(user_function: SectionCallableInterface):
280
+ for gt_handler in leap_binder.setup_container.ground_truths:
281
+ if gt_handler.name == name:
282
+ raise Exception(f'Input with name {name} already exists. '
283
+ f'Please choose another')
284
+
285
+ leap_binder.set_ground_truth(user_function, name)
286
+
287
+ def _validate_input_args(sample_id: Union[int, str], preprocess_response: PreprocessResponse):
288
+ assert isinstance(sample_id, (int, str)), \
289
+ (f'tensorleap_gt_encoder validation failed: '
290
+ f'Argument sample_id should be either int or str. Got {type(sample_id)}.')
291
+ assert isinstance(preprocess_response, PreprocessResponse), \
292
+ (f'tensorleap_gt_encoder validation failed: '
293
+ f'Argument preprocess_response should be a PreprocessResponse. Got {type(preprocess_response)}.')
294
+ assert type(sample_id) == preprocess_response.sample_id_type, \
295
+ (f'tensorleap_gt_encoder validation failed: '
296
+ f'Argument sample_id should be as the same type as defined in the preprocess response '
297
+ f'{preprocess_response.sample_id_type}. Got {type(sample_id)}.')
298
+
299
+ def _validate_result(result):
300
+ assert isinstance(result, np.ndarray), \
301
+ (f'tensorleap_gt_encoder validation failed: '
302
+ f'Unsupported return type. Should be a numpy array. Got {type(result)}.')
303
+ assert result.dtype == np.float32, \
304
+ (f'tensorleap_gt_encoder validation failed: '
305
+ f'The return type should be a numpy array of type float32. Got {result.dtype}.')
306
+
307
+ def inner(sample_id, preprocess_response):
308
+ _validate_input_args(sample_id, preprocess_response)
309
+ result = user_function(sample_id, preprocess_response)
310
+ _validate_result(result)
311
+ return result
312
+
313
+ return inner
314
+
315
+ return decorating_function
316
+
317
+
318
+ def tensorleap_custom_loss(name: str):
319
+ def decorating_function(user_function: CustomCallableInterface):
320
+ for loss_handler in leap_binder.setup_container.custom_loss_handlers:
321
+ if loss_handler.name == name:
322
+ raise Exception(f'Input with name {name} already exists. '
323
+ f'Please choose another')
324
+
325
+ leap_binder.add_custom_loss(user_function, name)
326
+
327
+ def _validate_input_args(*args, **kwargs):
328
+ try:
329
+ import tensorflow as tf
330
+ except ImportError:
331
+ raise Exception('the input arguments of the custom loss function should be tensorflow tensors')
332
+
333
+ for i, arg in enumerate(args):
334
+ assert isinstance(arg, tf.Tensor), (f'tensorleap_custom_loss validation failed: '
335
+ f'Argument #{i} should be a tensorflow tensor. Got {type(arg)}.')
336
+ for _arg_name, arg in kwargs.items():
337
+ assert isinstance(arg, tf.Tensor), (f'tensorleap_custom_loss validation failed: '
338
+ f'Argument {_arg_name} should be a tensorflow tensor. Got {type(arg)}.')
339
+
340
+ def _validate_result(result):
341
+ try:
342
+ import tensorflow as tf
343
+ except ImportError:
344
+ raise Exception('the input arguments of the custom loss function should be tensorflow tensors')
345
+
346
+ assert isinstance(result, (np.ndarray, tf.Tensor)), \
347
+ (f'tensorleap_custom_loss validation failed: '
348
+ f'The return type should be a numpy array or a tensorflow tensor. Got {type(result)}.')
349
+
350
+ def inner(sample_id, preprocess_response):
351
+ _validate_input_args(sample_id, preprocess_response)
352
+ result = user_function(sample_id, preprocess_response)
353
+ _validate_result(result)
354
+ return result
355
+
356
+ return inner
357
+
358
+ return decorating_function
359
+
360
+
361
+ def tensorleap_custom_layer(name: str):
362
+ def decorating_function(custom_layer):
363
+ for custom_layer_handler in leap_binder.setup_container.custom_layers.values():
364
+ if custom_layer_handler.name == name:
365
+ raise Exception(f'Custom Layer with name {name} already exists. '
366
+ f'Please choose another')
367
+
368
+ try:
369
+ import tensorflow as tf
370
+ except ImportError:
371
+ raise Exception('The custom layer should be inherited from tf.keras.layers.Layer')
372
+
373
+ if not issubclass(custom_layer, tf.keras.layers.Layer):
374
+ raise Exception('The custom layer should be inherited from tf.keras.layers.Layer')
375
+
376
+ leap_binder.set_custom_layer(custom_layer, name)
377
+
378
+ return custom_layer
379
+
380
+ return decorating_function
@@ -2,10 +2,11 @@
2
2
  import importlib.util
3
3
  import io
4
4
  import sys
5
+ import time
5
6
  from contextlib import redirect_stdout
6
7
  from functools import lru_cache
7
8
  from pathlib import Path
8
- from typing import Dict, List, Iterable, Union, Any
9
+ from typing import Dict, List, Iterable, Union, Any, Type
9
10
 
10
11
  import numpy as np
11
12
  import numpy.typing as npt
@@ -27,6 +28,8 @@ class LeapLoader:
27
28
  self.code_entry_name = code_entry_name
28
29
  self.code_path = code_path
29
30
 
31
+ self._preprocess_result_cached = None
32
+
30
33
  @lru_cache()
31
34
  def exec_script(self) -> None:
32
35
  try:
@@ -103,12 +106,16 @@ class LeapLoader:
103
106
  for prediction_type in setup.prediction_types
104
107
  }
105
108
 
106
- def get_sample(self, state: DataStateEnum, idx: int) -> DatasetSample:
109
+ def get_sample(self, state: DataStateEnum, sample_id: Union[int, str]) -> DatasetSample:
107
110
  self.exec_script()
108
- sample = DatasetSample(inputs=self._get_inputs(state, idx),
109
- gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, idx),
110
- metadata=self._get_metadata(state, idx),
111
- index=idx,
111
+ preprocess_result = self._preprocess_result()
112
+ if state == DataStateEnum.unlabeled and sample_id not in preprocess_result[state].sample_ids:
113
+ self._preprocess_result(update_unlabeled_preprocess=True)
114
+
115
+ sample = DatasetSample(inputs=self._get_inputs(state, sample_id),
116
+ gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, sample_id),
117
+ metadata=self._get_metadata(state, sample_id),
118
+ index=sample_id,
112
119
  state=state)
113
120
  return sample
114
121
 
@@ -148,6 +155,13 @@ class LeapLoader:
148
155
  test_result = DatasetTestResultPayload('preprocess')
149
156
  try:
150
157
  preprocess_result = self._preprocess_result()
158
+ if self.get_sample_id_type() is str:
159
+ max_allowed_item_size = np.dtype('<U256').itemsize
160
+ for state, preprocess_response in preprocess_result.items():
161
+ sample_ids_array = np.array(preprocess_response.sample_ids)
162
+ if sample_ids_array.dtype.itemsize > max_allowed_item_size:
163
+ raise Exception(f"Sample id are too long. Max allowed length is 256 charecters.")
164
+
151
165
  global_leap_binder.check_preprocess(preprocess_result)
152
166
  except Exception as e:
153
167
  line_number, file_name, stacktrace = get_root_exception_file_and_line_number()
@@ -279,27 +293,42 @@ class LeapLoader:
279
293
  ]
280
294
  return ModelSetup(custom_layer_instances)
281
295
 
282
- @lru_cache()
283
- def _preprocess_result(self) -> Dict[DataStateEnum, PreprocessResponse]:
296
+ def _preprocess_result(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, PreprocessResponse]:
284
297
  self.exec_script()
285
- return global_leap_binder.get_preprocess_result()
298
+
299
+ if self._preprocess_result_cached is None:
300
+ self._preprocess_result_cached = global_leap_binder.get_preprocess_result()
301
+
302
+ if update_unlabeled_preprocess:
303
+ self._preprocess_result_cached[
304
+ DataStateEnum.unlabeled] = global_leap_binder.get_preprocess_unlabeled_result()
305
+
306
+ return self._preprocess_result_cached
307
+
308
+ def get_preprocess_sample_ids(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, Union[List[int], List[str]]]:
309
+ preprocess_result = self._preprocess_result(update_unlabeled_preprocess)
310
+ sample_ids = {}
311
+ for state, preprocess_response in preprocess_result.items():
312
+ sample_ids[state] = preprocess_response.sample_ids
313
+
314
+ return sample_ids
286
315
 
287
316
  def _get_dataset_handlers(self, handlers: Iterable[DatasetBaseHandler],
288
- state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
317
+ state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
289
318
  result_agg = {}
290
319
  preprocess_result = self._preprocess_result()
291
320
  preprocess_state = preprocess_result[state]
292
321
  for handler in handlers:
293
- handler_result = handler.function(idx, preprocess_state)
322
+ handler_result = handler.function(sample_id, preprocess_state)
294
323
  handler_name = handler.name
295
324
  result_agg[handler_name] = handler_result
296
325
  return result_agg
297
326
 
298
- def _get_inputs(self, state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
299
- return self._get_dataset_handlers(global_leap_binder.setup_container.inputs, state, idx)
327
+ def _get_inputs(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
328
+ return self._get_dataset_handlers(global_leap_binder.setup_container.inputs, state, sample_id)
300
329
 
301
- def _get_gt(self, state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
302
- return self._get_dataset_handlers(global_leap_binder.setup_container.ground_truths, state, idx)
330
+ def _get_gt(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
331
+ return self._get_dataset_handlers(global_leap_binder.setup_container.ground_truths, state, sample_id)
303
332
 
304
333
  @lru_cache()
305
334
  def _metadata_name_to_type(self) -> Dict[str, DatasetMetadataType]:
@@ -334,12 +363,12 @@ class LeapLoader:
334
363
 
335
364
  return converted_value
336
365
 
337
- def _get_metadata(self, state: DataStateEnum, idx: int) -> Dict[str, Union[str, int, bool, float]]:
366
+ def _get_metadata(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, Union[str, int, bool, float]]:
338
367
  result_agg = {}
339
368
  preprocess_result = self._preprocess_result()
340
369
  preprocess_state = preprocess_result[state]
341
370
  for handler in global_leap_binder.setup_container.metadata:
342
- handler_result = handler.function(idx, preprocess_state)
371
+ handler_result = handler.function(sample_id, preprocess_state)
343
372
  if isinstance(handler_result, dict):
344
373
  for single_metadata_name, single_metadata_result in handler_result.items():
345
374
  handler_name = f'{handler.name}_{single_metadata_name}'
@@ -349,3 +378,14 @@ class LeapLoader:
349
378
  result_agg[handler_name] = self._convert_metadata_to_correct_type(handler_name, handler_result)
350
379
 
351
380
  return result_agg
381
+
382
+ @lru_cache()
383
+ def get_sample_id_type(self) -> Type:
384
+ preprocess_results = list(self._preprocess_result().values())
385
+ id_type = preprocess_results[0].sample_id_type
386
+ for preprocess_result in preprocess_results:
387
+ if preprocess_result.sample_id_type != id_type:
388
+ raise Exception("Different id types in preprocess results")
389
+
390
+ return id_type
391
+
@@ -10,7 +10,7 @@ from code_loader.contract.datasetclasses import SectionCallableInterface, Prepro
10
10
 
11
11
 
12
12
  def to_numpy_return_wrapper(encoder_function: SectionCallableInterface) -> SectionCallableInterface:
13
- def numpy_encoder_function(idx: int, samples: PreprocessResponse) -> npt.NDArray[np.float32]:
13
+ def numpy_encoder_function(idx: Union[int, str], samples: PreprocessResponse) -> npt.NDArray[np.float32]:
14
14
  result = encoder_function(idx, samples)
15
15
  numpy_result: npt.NDArray[np.float32] = np.array(result)
16
16
  return numpy_result
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "code-loader"
3
- version = "1.0.50"
3
+ version = "1.0.52"
4
4
  description = ""
5
5
  authors = ["dorhar <doron.harnoy@tensorleap.ai>"]
6
6
  license = "MIT"
@@ -15,6 +15,7 @@ include = [
15
15
  python = ">=3.8,<3.12"
16
16
  numpy = "^1.22.3"
17
17
  psutil = "^5.9.5"
18
+ matplotlib = ">=3.3,<3.4"
18
19
  requests = "^2.32.3"
19
20
  pyyaml = "^6.0.2"
20
21
 
File without changes
File without changes