code-loader 1.0.50__py3-none-any.whl → 1.0.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ # mypy: ignore-errors
2
+ import traceback
3
+ from dataclasses import dataclass
4
+
5
+ from typing import List, Tuple, Optional
6
+
7
+ from multiprocessing import Process, Queue
8
+
9
+ from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
10
+ from code_loader.leaploader import LeapLoader
11
+ from code_loader.contract.enums import DataStateEnum
12
+ from code_loader.metric_calculator_parallelized import MetricCalculatorParallelized
13
+ from code_loader.samples_generator_parallelized import SamplesGeneratorParallelized
14
+
15
+
16
+ @dataclass
17
+ class SampleSerializableError:
18
+ state: DataStateEnum
19
+ index: int
20
+ leap_script_trace: str
21
+ exception_as_str: str
22
+
23
+
24
+ class CodeIntegrationProcessesManager:
25
+ def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
26
+ max_samples_in_queue: int = 128) -> None:
27
+ self.metric_calculator_parallelized = MetricCalculatorParallelized(code_path, code_entry_name)
28
+ self.samples_generator_parallelized = SamplesGeneratorParallelized(code_path, code_entry_name)
29
+
30
+ def _create_and_start_process(self) -> Process:
31
+ process = self.multiprocessing_context.Process(
32
+ target=CodeIntegrationProcessesManager._process_func,
33
+ args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
34
+ self._ready_processed_results))
35
+ process.daemon = True
36
+ process.start()
37
+ return process
38
+
39
+ def _run_and_warm_first_process(self):
40
+ process = self._create_and_start_process()
41
+ self.processes = [process]
42
+
43
+ # needed in order to make sure the preprocess func runs once in nonparallel
44
+ self._start_process_inputs([(DataStateEnum.training, 0)])
45
+ self._get_next_ready_processed_result()
46
+
47
+ def _operation_decider(self):
48
+ if self.metric_calculator_parallelized._ready_processed_results.empty() and not \
49
+ self.metric_calculator_parallelized._inputs_waiting_to_be_process.empty():
50
+ return 'metric'
51
+
52
+ if self.samples_generator_parallelized._ready_processed_results.empty() and not \
53
+ self.samples_generator_parallelized._inputs_waiting_to_be_process.empty():
54
+ return 'dataset'
55
+
56
+
57
+
58
+
59
+ @staticmethod
60
+ def _process_func(code_path: str, code_entry_name: str,
61
+ samples_to_process: Queue, ready_samples: Queue,
62
+ metrics_to_process: Queue, ready_metrics: Queue) -> None:
63
+ import os
64
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
65
+
66
+ leap_loader = LeapLoader(code_path, code_entry_name)
67
+ while True:
68
+
69
+ # decide on sample or metric to process
70
+ state, idx = samples_to_process.get(block=True)
71
+ leap_loader._preprocess_result()
72
+ try:
73
+ sample = leap_loader.get_sample(state, idx)
74
+ except Exception as e:
75
+ leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
76
+ ready_samples.put(SampleSerializableError(state, idx, leap_script_trace, str(e)))
77
+ continue
78
+
79
+ ready_samples.put(sample)
80
+
81
+ def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
82
+ return self.start_process_inputs(sample_identities)
83
+
@@ -32,21 +32,39 @@ class PreprocessResponse:
32
32
  }
33
33
  response = PreprocessResponse(length=len(preprocessed_data), data=preprocessed_data)
34
34
  """
35
- length: int
36
- data: Any
37
-
38
-
39
- SectionCallableInterface = Callable[[int, PreprocessResponse], npt.NDArray[np.float32]]
35
+ length: Optional[int] = None # Deprecated. Please use sample_ids instead
36
+ data: Any = None
37
+ sample_ids: Optional[Union[List[str], List[int]]] = None
38
+ state: Optional[DataStateType] = None
39
+ sample_id_type: Optional[Union[Type[str], Type[int]]] = None
40
+
41
+ def __post_init__(self) -> None:
42
+ if self.length is not None and self.sample_ids is None:
43
+ self.sample_ids = [i for i in range(self.length)]
44
+ self.sample_id_type = int
45
+ elif self.length is None and self.sample_ids is not None:
46
+ self.length = len(self.sample_ids)
47
+ if self.sample_id_type is None:
48
+ self.sample_id_type = str
49
+ else:
50
+ raise Exception("length is deprecated.")
51
+
52
+ def __len__(self) -> int:
53
+ assert self.sample_ids is not None
54
+ return len(self.sample_ids)
55
+
56
+
57
+ SectionCallableInterface = Callable[[Union[int, str], PreprocessResponse], npt.NDArray[np.float32]]
40
58
 
41
59
  MetadataSectionCallableInterface = Union[
42
- Callable[[int, PreprocessResponse], int],
43
- Callable[[int, PreprocessResponse], Dict[str, int]],
44
- Callable[[int, PreprocessResponse], str],
45
- Callable[[int, PreprocessResponse], Dict[str, str]],
46
- Callable[[int, PreprocessResponse], bool],
47
- Callable[[int, PreprocessResponse], Dict[str, bool]],
48
- Callable[[int, PreprocessResponse], float],
49
- Callable[[int, PreprocessResponse], Dict[str, float]]
60
+ Callable[[Union[int, str], PreprocessResponse], int],
61
+ Callable[[Union[int, str], PreprocessResponse], Dict[str, int]],
62
+ Callable[[Union[int, str], PreprocessResponse], str],
63
+ Callable[[Union[int, str], PreprocessResponse], Dict[str, str]],
64
+ Callable[[Union[int, str], PreprocessResponse], bool],
65
+ Callable[[Union[int, str], PreprocessResponse], Dict[str, bool]],
66
+ Callable[[Union[int, str], PreprocessResponse], float],
67
+ Callable[[Union[int, str], PreprocessResponse], Dict[str, float]]
50
68
  ]
51
69
 
52
70
 
@@ -181,5 +199,5 @@ class DatasetSample:
181
199
  inputs: Dict[str, npt.NDArray[np.float32]]
182
200
  gt: Optional[Dict[str, npt.NDArray[np.float32]]]
183
201
  metadata: Dict[str, Union[str, int, bool, float]]
184
- index: int
202
+ index: Union[int, str]
185
203
  state: DataStateEnum
@@ -389,17 +389,36 @@ class LeapBinder:
389
389
  if preprocess is None:
390
390
  raise Exception("Please make sure you call the leap_binder.set_preprocess method")
391
391
  preprocess_results = preprocess.function()
392
- preprocess_result_dict = {
393
- DataStateEnum(i): preprocess_result
394
- for i, preprocess_result in enumerate(preprocess_results)
395
- }
392
+ preprocess_result_dict = {}
393
+ for i, preprocess_result in enumerate(preprocess_results):
394
+ if preprocess_result.state is None:
395
+ state_enum = DataStateEnum(i)
396
+ preprocess_result.state = DataStateType(state_enum.name)
397
+ else:
398
+ state_enum = DataStateEnum[preprocess_result.state.name]
396
399
 
397
- unlabeled_preprocess = self.setup_container.unlabeled_data_preprocess
398
- if unlabeled_preprocess is not None:
399
- preprocess_result_dict[DataStateEnum.unlabeled] = unlabeled_preprocess.function()
400
+ if state_enum in preprocess_result_dict:
401
+ raise Exception(f"Duplicate state {state_enum.name} in preprocess results")
402
+ preprocess_result_dict[state_enum] = preprocess_result
403
+
404
+ if DataStateEnum.unlabeled not in preprocess_result_dict:
405
+ preprocess_unlabeled_result = self.get_preprocess_unlabeled_result()
406
+ if preprocess_unlabeled_result is not None:
407
+ preprocess_result_dict[DataStateEnum.unlabeled] = preprocess_unlabeled_result
408
+
409
+ if DataStateEnum.training not in preprocess_result_dict:
410
+ raise Exception("Training data is required")
411
+ if DataStateEnum.validation not in preprocess_result_dict:
412
+ raise Exception("Validation data is required")
400
413
 
401
414
  return preprocess_result_dict
402
415
 
416
+ def get_preprocess_unlabeled_result(self) -> Optional[PreprocessResponse]:
417
+ unlabeled_preprocess = self.setup_container.unlabeled_data_preprocess
418
+ if unlabeled_preprocess is not None:
419
+ return unlabeled_preprocess.function()
420
+ return None
421
+
403
422
  def _get_all_dataset_base_handlers(self) -> List[Union[DatasetBaseHandler, MetadataHandler]]:
404
423
  all_dataset_base_handlers: List[Union[DatasetBaseHandler, MetadataHandler]] = []
405
424
  all_dataset_base_handlers.extend(self.setup_container.inputs)
@@ -411,7 +430,8 @@ class LeapBinder:
411
430
  def check_handler(
412
431
  preprocess_response: PreprocessResponse, test_result: List[DatasetTestResultPayload],
413
432
  dataset_base_handler: Union[DatasetBaseHandler, MetadataHandler]) -> List[DatasetTestResultPayload]:
414
- raw_result = dataset_base_handler.function(0, preprocess_response)
433
+ assert preprocess_response.sample_ids is not None
434
+ raw_result = dataset_base_handler.function(preprocess_response.sample_ids[0], preprocess_response)
415
435
  handler_type = 'metadata' if isinstance(dataset_base_handler, MetadataHandler) else None
416
436
  if isinstance(dataset_base_handler, MetadataHandler) and isinstance(raw_result, dict):
417
437
  metadata_test_result_payloads = [
code_loader/leaploader.py CHANGED
@@ -2,10 +2,11 @@
2
2
  import importlib.util
3
3
  import io
4
4
  import sys
5
+ import time
5
6
  from contextlib import redirect_stdout
6
7
  from functools import lru_cache
7
8
  from pathlib import Path
8
- from typing import Dict, List, Iterable, Union, Any
9
+ from typing import Dict, List, Iterable, Union, Any, Type
9
10
 
10
11
  import numpy as np
11
12
  import numpy.typing as npt
@@ -27,6 +28,8 @@ class LeapLoader:
27
28
  self.code_entry_name = code_entry_name
28
29
  self.code_path = code_path
29
30
 
31
+ self._preprocess_result_cached = None
32
+
30
33
  @lru_cache()
31
34
  def exec_script(self) -> None:
32
35
  try:
@@ -103,12 +106,16 @@ class LeapLoader:
103
106
  for prediction_type in setup.prediction_types
104
107
  }
105
108
 
106
- def get_sample(self, state: DataStateEnum, idx: int) -> DatasetSample:
109
+ def get_sample(self, state: DataStateEnum, sample_id: Union[int, str]) -> DatasetSample:
107
110
  self.exec_script()
108
- sample = DatasetSample(inputs=self._get_inputs(state, idx),
109
- gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, idx),
110
- metadata=self._get_metadata(state, idx),
111
- index=idx,
111
+ preprocess_result = self._preprocess_result()
112
+ if state == DataStateEnum.unlabeled and sample_id not in preprocess_result[state].sample_ids:
113
+ self._preprocess_result(update_unlabeled_preprocess=True)
114
+
115
+ sample = DatasetSample(inputs=self._get_inputs(state, sample_id),
116
+ gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, sample_id),
117
+ metadata=self._get_metadata(state, sample_id),
118
+ index=sample_id,
112
119
  state=state)
113
120
  return sample
114
121
 
@@ -148,6 +155,13 @@ class LeapLoader:
148
155
  test_result = DatasetTestResultPayload('preprocess')
149
156
  try:
150
157
  preprocess_result = self._preprocess_result()
158
+ if self.get_sample_id_type() is str:
159
+ max_allowed_item_size = np.dtype('<U256').itemsize
160
+ for state, preprocess_response in preprocess_result.items():
161
+ sample_ids_array = np.array(preprocess_response.sample_ids)
162
+ if sample_ids_array.dtype.itemsize > max_allowed_item_size:
163
+ raise Exception(f"Sample id are too long. Max allowed length is 256 charecters.")
164
+
151
165
  global_leap_binder.check_preprocess(preprocess_result)
152
166
  except Exception as e:
153
167
  line_number, file_name, stacktrace = get_root_exception_file_and_line_number()
@@ -279,27 +293,42 @@ class LeapLoader:
279
293
  ]
280
294
  return ModelSetup(custom_layer_instances)
281
295
 
282
- @lru_cache()
283
- def _preprocess_result(self) -> Dict[DataStateEnum, PreprocessResponse]:
296
+ def _preprocess_result(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, PreprocessResponse]:
284
297
  self.exec_script()
285
- return global_leap_binder.get_preprocess_result()
298
+
299
+ if self._preprocess_result_cached is None:
300
+ self._preprocess_result_cached = global_leap_binder.get_preprocess_result()
301
+
302
+ if update_unlabeled_preprocess:
303
+ self._preprocess_result_cached[
304
+ DataStateEnum.unlabeled] = global_leap_binder.get_preprocess_unlabeled_result()
305
+
306
+ return self._preprocess_result_cached
307
+
308
+ def get_preprocess_sample_ids(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, Union[List[int], List[str]]]:
309
+ preprocess_result = self._preprocess_result(update_unlabeled_preprocess)
310
+ sample_ids = {}
311
+ for state, preprocess_response in preprocess_result.items():
312
+ sample_ids[state] = preprocess_response.sample_ids
313
+
314
+ return sample_ids
286
315
 
287
316
  def _get_dataset_handlers(self, handlers: Iterable[DatasetBaseHandler],
288
- state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
317
+ state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
289
318
  result_agg = {}
290
319
  preprocess_result = self._preprocess_result()
291
320
  preprocess_state = preprocess_result[state]
292
321
  for handler in handlers:
293
- handler_result = handler.function(idx, preprocess_state)
322
+ handler_result = handler.function(sample_id, preprocess_state)
294
323
  handler_name = handler.name
295
324
  result_agg[handler_name] = handler_result
296
325
  return result_agg
297
326
 
298
- def _get_inputs(self, state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
299
- return self._get_dataset_handlers(global_leap_binder.setup_container.inputs, state, idx)
327
+ def _get_inputs(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
328
+ return self._get_dataset_handlers(global_leap_binder.setup_container.inputs, state, sample_id)
300
329
 
301
- def _get_gt(self, state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
302
- return self._get_dataset_handlers(global_leap_binder.setup_container.ground_truths, state, idx)
330
+ def _get_gt(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, npt.NDArray[np.float32]]:
331
+ return self._get_dataset_handlers(global_leap_binder.setup_container.ground_truths, state, sample_id)
303
332
 
304
333
  @lru_cache()
305
334
  def _metadata_name_to_type(self) -> Dict[str, DatasetMetadataType]:
@@ -334,12 +363,12 @@ class LeapLoader:
334
363
 
335
364
  return converted_value
336
365
 
337
- def _get_metadata(self, state: DataStateEnum, idx: int) -> Dict[str, Union[str, int, bool, float]]:
366
+ def _get_metadata(self, state: DataStateEnum, sample_id: Union[int, str]) -> Dict[str, Union[str, int, bool, float]]:
338
367
  result_agg = {}
339
368
  preprocess_result = self._preprocess_result()
340
369
  preprocess_state = preprocess_result[state]
341
370
  for handler in global_leap_binder.setup_container.metadata:
342
- handler_result = handler.function(idx, preprocess_state)
371
+ handler_result = handler.function(sample_id, preprocess_state)
343
372
  if isinstance(handler_result, dict):
344
373
  for single_metadata_name, single_metadata_result in handler_result.items():
345
374
  handler_name = f'{handler.name}_{single_metadata_name}'
@@ -349,3 +378,14 @@ class LeapLoader:
349
378
  result_agg[handler_name] = self._convert_metadata_to_correct_type(handler_name, handler_result)
350
379
 
351
380
  return result_agg
381
+
382
+ @lru_cache()
383
+ def get_sample_id_type(self) -> Type:
384
+ preprocess_results = list(self._preprocess_result().values())
385
+ id_type = preprocess_results[0].sample_id_type
386
+ for preprocess_result in preprocess_results:
387
+ if preprocess_result.sample_id_type != id_type:
388
+ raise Exception("Different id types in preprocess results")
389
+
390
+ return id_type
391
+
code_loader/utils.py CHANGED
@@ -10,7 +10,7 @@ from code_loader.contract.datasetclasses import SectionCallableInterface, Prepro
10
10
 
11
11
 
12
12
  def to_numpy_return_wrapper(encoder_function: SectionCallableInterface) -> SectionCallableInterface:
13
- def numpy_encoder_function(idx: int, samples: PreprocessResponse) -> npt.NDArray[np.float32]:
13
+ def numpy_encoder_function(idx: Union[int, str], samples: PreprocessResponse) -> npt.NDArray[np.float32]:
14
14
  result = encoder_function(idx, samples)
15
15
  numpy_result: npt.NDArray[np.float32] = np.array(result)
16
16
  return numpy_result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: code-loader
3
- Version: 1.0.50
3
+ Version: 1.0.51
4
4
  Summary:
5
5
  Home-page: https://github.com/tensorleap/code-loader
6
6
  License: MIT
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.8
13
13
  Classifier: Programming Language :: Python :: 3.9
14
14
  Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
+ Requires-Dist: matplotlib (>=3.3,<3.4)
16
17
  Requires-Dist: numpy (>=1.22.3,<2.0.0)
17
18
  Requires-Dist: psutil (>=5.9.5,<6.0.0)
18
19
  Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
@@ -1,7 +1,8 @@
1
1
  LICENSE,sha256=qIwWjdspQeSMTtnFZBC8MuT-95L02FPvzRUdWFxrwJY,1067
2
2
  code_loader/__init__.py,sha256=6MMWr0ObOU7hkqQKgOqp4Zp3I28L7joGC9iCbQYtAJg,241
3
+ code_loader/code_inegration_processes_manager.py,sha256=XslWOPeNQk4RAFJ_f3tP5Oe3EgcIR7BE7Y8r9Ty73-o,3261
3
4
  code_loader/contract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- code_loader/contract/datasetclasses.py,sha256=HPm-z82EbkIk_C_vkCpD8oBs5pgUpStzciMRV0auMlI,5679
5
+ code_loader/contract/datasetclasses.py,sha256=lOIY-h9t4k9NxNsC9GrJhltmhpqRju3AuLA3WVQcCMs,6614
5
6
  code_loader/contract/enums.py,sha256=6Lo7p5CUog68Fd31bCozIuOgIp_IhSiPqWWph2k3OGU,1602
6
7
  code_loader/contract/exceptions.py,sha256=jWqu5i7t-0IG0jGRsKF4DjJdrsdpJjIYpUkN1F4RiyQ,51
7
8
  code_loader/contract/responsedataclasses.py,sha256=w7xVOv2S8Hyb5lqyomMGiKAWXDTSOG-FX1YW39bXD3A,3969
@@ -17,12 +18,12 @@ code_loader/experiment_api/types.py,sha256=MY8xFARHwdVA7p4dxyhD60ShmttgTvb4qdp1o
17
18
  code_loader/experiment_api/utils.py,sha256=XZHtxge12TS4H4-8PjV3sKuhp8Ud6ojAiIzTZJEqBqc,3304
18
19
  code_loader/experiment_api/workingspace_config_utils.py,sha256=DLzXQCg4dgTV_YgaSbeTVzq-2ja_SQw4zi7LXwKL9cY,990
19
20
  code_loader/inner_leap_binder/__init__.py,sha256=koOlJyMNYzGbEsoIbXathSmQ-L38N_pEXH_HvL7beXU,99
20
- code_loader/inner_leap_binder/leapbinder.py,sha256=ALUtiRYBxxP1xjza8WWZvVt3jNmfevRnxPYIQ4wy3g4,23808
21
- code_loader/leaploader.py,sha256=_iB23STM_6PuedtRsI_tod3dUoe1j5YoNuuoASBLLPc,17481
22
- code_loader/utils.py,sha256=TZAoUbA2pE8eK3Le3s5Xr4eRaYdeDMQtxotx6rh-5oE,2185
21
+ code_loader/inner_leap_binder/leapbinder.py,sha256=4DaLjwwa0wR9qR6K5hKZNakd1oludBRRZPJcCzKsi78,24912
22
+ code_loader/leaploader.py,sha256=POUgD6x1GH_iF_eDGz-VLX4DsIl2kddufKVDdrA_K-U,19491
23
+ code_loader/utils.py,sha256=aw2i_fqW_ADjLB66FWZd9DfpCQ7mPdMyauROC5Nd51I,2197
23
24
  code_loader/visualizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
25
  code_loader/visualizers/default_visualizers.py,sha256=VoqO9FN84yXyMjRjHjUTOt2GdTkJRMbHbXJ1cJkREkk,2230
25
- code_loader-1.0.50.dist-info/LICENSE,sha256=qIwWjdspQeSMTtnFZBC8MuT-95L02FPvzRUdWFxrwJY,1067
26
- code_loader-1.0.50.dist-info/METADATA,sha256=ThRDw4Frh9tilH0mroPmMI2bYeKQ-JXjheuLpRI0Bn8,849
27
- code_loader-1.0.50.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
- code_loader-1.0.50.dist-info/RECORD,,
26
+ code_loader-1.0.51.dist-info/LICENSE,sha256=qIwWjdspQeSMTtnFZBC8MuT-95L02FPvzRUdWFxrwJY,1067
27
+ code_loader-1.0.51.dist-info/METADATA,sha256=_lsVhZ2UtU1jvL3Sb-3dUfIvlUieLMGrIWqtO2f5gU4,888
28
+ code_loader-1.0.51.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
29
+ code_loader-1.0.51.dist-info/RECORD,,