code-loader 1.0.48__tar.gz → 1.0.49.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/PKG-INFO +1 -1
  2. code_loader-1.0.49.dev2/code_loader/code_inegration_processes_manager.py +83 -0
  3. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/contract/datasetclasses.py +12 -2
  4. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/inner_leap_binder/leapbinder.py +26 -7
  5. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/leaploader.py +26 -3
  6. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/pyproject.toml +1 -1
  7. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/LICENSE +0 -0
  8. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/README.md +0 -0
  9. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/__init__.py +0 -0
  10. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/contract/__init__.py +0 -0
  11. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/contract/enums.py +0 -0
  12. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/contract/exceptions.py +0 -0
  13. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/contract/responsedataclasses.py +0 -0
  14. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/contract/visualizer_classes.py +0 -0
  15. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/__init__.py +0 -0
  16. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/api.py +0 -0
  17. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/cli_config_utils.py +0 -0
  18. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/client.py +0 -0
  19. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/epoch.py +0 -0
  20. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/experiment.py +0 -0
  21. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/experiment_context.py +0 -0
  22. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/types.py +0 -0
  23. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/utils.py +0 -0
  24. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/experiment_api/workingspace_config_utils.py +0 -0
  25. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/inner_leap_binder/__init__.py +0 -0
  26. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/utils.py +0 -0
  27. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/visualizers/__init__.py +0 -0
  28. {code_loader-1.0.48 → code_loader-1.0.49.dev2}/code_loader/visualizers/default_visualizers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: code-loader
3
- Version: 1.0.48
3
+ Version: 1.0.49.dev2
4
4
  Summary:
5
5
  Home-page: https://github.com/tensorleap/code-loader
6
6
  License: MIT
@@ -0,0 +1,83 @@
1
+ # mypy: ignore-errors
2
+ import traceback
3
+ from dataclasses import dataclass
4
+
5
+ from typing import List, Tuple, Optional
6
+
7
+ from multiprocessing import Process, Queue
8
+
9
+ from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
10
+ from code_loader.leaploader import LeapLoader
11
+ from code_loader.contract.enums import DataStateEnum
12
+ from code_loader.metric_calculator_parallelized import MetricCalculatorParallelized
13
+ from code_loader.samples_generator_parallelized import SamplesGeneratorParallelized
14
+
15
+
16
+ @dataclass
17
+ class SampleSerializableError:
18
+ state: DataStateEnum
19
+ index: int
20
+ leap_script_trace: str
21
+ exception_as_str: str
22
+
23
+
24
+ class CodeIntegrationProcessesManager:
25
+ def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
26
+ max_samples_in_queue: int = 128) -> None:
27
+ self.metric_calculator_parallelized = MetricCalculatorParallelized(code_path, code_entry_name)
28
+ self.samples_generator_parallelized = SamplesGeneratorParallelized(code_path, code_entry_name)
29
+
30
+ def _create_and_start_process(self) -> Process:
31
+ process = self.multiprocessing_context.Process(
32
+ target=CodeIntegrationProcessesManager._process_func,
33
+ args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
34
+ self._ready_processed_results))
35
+ process.daemon = True
36
+ process.start()
37
+ return process
38
+
39
+ def _run_and_warm_first_process(self):
40
+ process = self._create_and_start_process()
41
+ self.processes = [process]
42
+
43
+ # needed in order to make sure the preprocess func runs once in nonparallel
44
+ self._start_process_inputs([(DataStateEnum.training, 0)])
45
+ self._get_next_ready_processed_result()
46
+
47
+ def _operation_decider(self):
48
+ if self.metric_calculator_parallelized._ready_processed_results.empty() and not \
49
+ self.metric_calculator_parallelized._inputs_waiting_to_be_process.empty():
50
+ return 'metric'
51
+
52
+ if self.samples_generator_parallelized._ready_processed_results.empty() and not \
53
+ self.samples_generator_parallelized._inputs_waiting_to_be_process.empty():
54
+ return 'dataset'
55
+
56
+
57
+
58
+
59
+ @staticmethod
60
+ def _process_func(code_path: str, code_entry_name: str,
61
+ samples_to_process: Queue, ready_samples: Queue,
62
+ metrics_to_process: Queue, ready_metrics: Queue) -> None:
63
+ import os
64
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
65
+
66
+ leap_loader = LeapLoader(code_path, code_entry_name)
67
+ while True:
68
+
69
+ # decide on sample or metric to process
70
+ state, idx = samples_to_process.get(block=True)
71
+ leap_loader._preprocess_result()
72
+ try:
73
+ sample = leap_loader.get_sample(state, idx)
74
+ except Exception as e:
75
+ leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
76
+ ready_samples.put(SampleSerializableError(state, idx, leap_script_trace, str(e)))
77
+ continue
78
+
79
+ ready_samples.put(sample)
80
+
81
+ def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
82
+ return self.start_process_inputs(sample_identities)
83
+
@@ -32,8 +32,18 @@ class PreprocessResponse:
32
32
  }
33
33
  response = PreprocessResponse(length=len(preprocessed_data), data=preprocessed_data)
34
34
  """
35
- length: int
36
- data: Any
35
+ length: Optional[int] = None # Deprecated. Please use sample_ids instead
36
+ data: Any = None
37
+ sample_ids: Optional[Union[List[str], List[int]]] = None
38
+ state: Optional[DataStateType] = None
39
+
40
+ def __post_init__(self):
41
+ if self.length is not None and self.sample_ids is None:
42
+ self.sample_ids = [i for i in range(self.length)]
43
+ elif self.length is None and self.sample_ids is not None:
44
+ self.length = len(self.sample_ids)
45
+ else:
46
+ raise Exception("length is deprecated. Please use sample_ids instead.")
37
47
 
38
48
 
39
49
  SectionCallableInterface = Callable[[int, PreprocessResponse], npt.NDArray[np.float32]]
@@ -389,17 +389,36 @@ class LeapBinder:
389
389
  if preprocess is None:
390
390
  raise Exception("Please make sure you call the leap_binder.set_preprocess method")
391
391
  preprocess_results = preprocess.function()
392
- preprocess_result_dict = {
393
- DataStateEnum(i): preprocess_result
394
- for i, preprocess_result in enumerate(preprocess_results)
395
- }
392
+ preprocess_result_dict = {}
393
+ for i, preprocess_result in enumerate(preprocess_results):
394
+ if preprocess_result.state is None:
395
+ state_enum = DataStateEnum(i)
396
+ preprocess_result.state = DataStateType(state_enum.name)
397
+ else:
398
+ state_enum = DataStateEnum[preprocess_result.state.name]
396
399
 
397
- unlabeled_preprocess = self.setup_container.unlabeled_data_preprocess
398
- if unlabeled_preprocess is not None:
399
- preprocess_result_dict[DataStateEnum.unlabeled] = unlabeled_preprocess.function()
400
+ if state_enum in preprocess_result_dict:
401
+ raise Exception(f"Duplicate state {state_enum.name} in preprocess results")
402
+ preprocess_result_dict[state_enum] = preprocess_result
403
+
404
+ if DataStateEnum.unlabeled not in preprocess_result_dict:
405
+ preprocess_unlabeled_result = self.get_preprocess_unlabeled_result()
406
+ if preprocess_unlabeled_result is not None:
407
+ preprocess_result_dict[DataStateEnum.unlabeled] = preprocess_unlabeled_result
408
+
409
+ if DataStateEnum.training not in preprocess_result_dict:
410
+ raise Exception("Training data is required")
411
+ if DataStateEnum.validation not in preprocess_result_dict:
412
+ raise Exception("Validation data is required")
400
413
 
401
414
  return preprocess_result_dict
402
415
 
416
+ def get_preprocess_unlabeled_result(self) -> Optional[PreprocessResponse]:
417
+ unlabeled_preprocess = self.setup_container.unlabeled_data_preprocess
418
+ if unlabeled_preprocess is not None:
419
+ return unlabeled_preprocess.function()
420
+ return None
421
+
403
422
  def _get_all_dataset_base_handlers(self) -> List[Union[DatasetBaseHandler, MetadataHandler]]:
404
423
  all_dataset_base_handlers: List[Union[DatasetBaseHandler, MetadataHandler]] = []
405
424
  all_dataset_base_handlers.extend(self.setup_container.inputs)
@@ -2,6 +2,7 @@
2
2
  import importlib.util
3
3
  import io
4
4
  import sys
5
+ import time
5
6
  from contextlib import redirect_stdout
6
7
  from functools import lru_cache
7
8
  from pathlib import Path
@@ -27,6 +28,8 @@ class LeapLoader:
27
28
  self.code_entry_name = code_entry_name
28
29
  self.code_path = code_path
29
30
 
31
+ self._preprocess_result_cached = None
32
+
30
33
  @lru_cache()
31
34
  def exec_script(self) -> None:
32
35
  try:
@@ -105,6 +108,10 @@ class LeapLoader:
105
108
 
106
109
  def get_sample(self, state: DataStateEnum, idx: int) -> DatasetSample:
107
110
  self.exec_script()
111
+ preprocess_result = self._preprocess_result()
112
+ if state == DataStateEnum.unlabeled and idx not in preprocess_result[state].sample_ids:
113
+ self._preprocess_result(update_unlabeled_preprocess=True)
114
+
108
115
  sample = DatasetSample(inputs=self._get_inputs(state, idx),
109
116
  gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, idx),
110
117
  metadata=self._get_metadata(state, idx),
@@ -279,10 +286,26 @@ class LeapLoader:
279
286
  ]
280
287
  return ModelSetup(custom_layer_instances)
281
288
 
282
- @lru_cache()
283
- def _preprocess_result(self) -> Dict[DataStateEnum, PreprocessResponse]:
289
+ def _preprocess_result(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, PreprocessResponse]:
284
290
  self.exec_script()
285
- return global_leap_binder.get_preprocess_result()
291
+
292
+ if self._preprocess_result_cached is None:
293
+ self._preprocess_result_cached = global_leap_binder.get_preprocess_result()
294
+
295
+ if update_unlabeled_preprocess:
296
+ self._preprocess_result_cached[
297
+ DataStateEnum.unlabeled] = global_leap_binder.get_preprocess_unlabeled_result()
298
+
299
+ return self._preprocess_result_cached
300
+
301
+ def get_preprocess_sample_ids(self) -> Dict[DataStateEnum, Union[List[int], List[str]]]:
302
+ preprocess_result = self._preprocess_result()
303
+ sample_ids = {}
304
+ for state, preprocess_response in preprocess_result.items():
305
+ sample_ids[state] = preprocess_response.sample_ids
306
+
307
+ return sample_ids
308
+
286
309
 
287
310
  def _get_dataset_handlers(self, handlers: Iterable[DatasetBaseHandler],
288
311
  state: DataStateEnum, idx: int) -> Dict[str, npt.NDArray[np.float32]]:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "code-loader"
3
- version = "1.0.48"
3
+ version = "1.0.49.dev2"
4
4
  description = ""
5
5
  authors = ["dorhar <doron.harnoy@tensorleap.ai>"]
6
6
  license = "MIT"
File without changes