code-loader 1.0.49.dev1__tar.gz → 1.0.49.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/PKG-INFO +1 -1
  2. code_loader-1.0.49.dev3/code_loader/code_inegration_processes_manager.py +83 -0
  3. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/leaploader.py +9 -15
  4. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/pyproject.toml +1 -1
  5. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/LICENSE +0 -0
  6. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/README.md +0 -0
  7. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/__init__.py +0 -0
  8. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/contract/__init__.py +0 -0
  9. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/contract/datasetclasses.py +0 -0
  10. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/contract/enums.py +0 -0
  11. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/contract/exceptions.py +0 -0
  12. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/contract/responsedataclasses.py +0 -0
  13. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/contract/visualizer_classes.py +0 -0
  14. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/__init__.py +0 -0
  15. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/api.py +0 -0
  16. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/cli_config_utils.py +0 -0
  17. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/client.py +0 -0
  18. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/epoch.py +0 -0
  19. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/experiment.py +0 -0
  20. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/experiment_context.py +0 -0
  21. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/types.py +0 -0
  22. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/utils.py +0 -0
  23. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/experiment_api/workingspace_config_utils.py +0 -0
  24. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/inner_leap_binder/__init__.py +0 -0
  25. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/inner_leap_binder/leapbinder.py +0 -0
  26. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/utils.py +0 -0
  27. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/visualizers/__init__.py +0 -0
  28. {code_loader-1.0.49.dev1 → code_loader-1.0.49.dev3}/code_loader/visualizers/default_visualizers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: code-loader
3
- Version: 1.0.49.dev1
3
+ Version: 1.0.49.dev3
4
4
  Summary:
5
5
  Home-page: https://github.com/tensorleap/code-loader
6
6
  License: MIT
@@ -0,0 +1,83 @@
1
+ # mypy: ignore-errors
2
+ import traceback
3
+ from dataclasses import dataclass
4
+
5
+ from typing import List, Tuple, Optional
6
+
7
+ from multiprocessing import Process, Queue
8
+
9
+ from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
10
+ from code_loader.leaploader import LeapLoader
11
+ from code_loader.contract.enums import DataStateEnum
12
+ from code_loader.metric_calculator_parallelized import MetricCalculatorParallelized
13
+ from code_loader.samples_generator_parallelized import SamplesGeneratorParallelized
14
+
15
+
16
+ @dataclass
17
+ class SampleSerializableError:
18
+ state: DataStateEnum
19
+ index: int
20
+ leap_script_trace: str
21
+ exception_as_str: str
22
+
23
+
24
+ class CodeIntegrationProcessesManager:
25
+ def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
26
+ max_samples_in_queue: int = 128) -> None:
27
+ self.metric_calculator_parallelized = MetricCalculatorParallelized(code_path, code_entry_name)
28
+ self.samples_generator_parallelized = SamplesGeneratorParallelized(code_path, code_entry_name)
29
+
30
+ def _create_and_start_process(self) -> Process:
31
+ process = self.multiprocessing_context.Process(
32
+ target=CodeIntegrationProcessesManager._process_func,
33
+ args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
34
+ self._ready_processed_results))
35
+ process.daemon = True
36
+ process.start()
37
+ return process
38
+
39
+ def _run_and_warm_first_process(self):
40
+ process = self._create_and_start_process()
41
+ self.processes = [process]
42
+
43
+ # needed in order to make sure the preprocess func runs once in nonparallel
44
+ self._start_process_inputs([(DataStateEnum.training, 0)])
45
+ self._get_next_ready_processed_result()
46
+
47
+ def _operation_decider(self):
48
+ if self.metric_calculator_parallelized._ready_processed_results.empty() and not \
49
+ self.metric_calculator_parallelized._inputs_waiting_to_be_process.empty():
50
+ return 'metric'
51
+
52
+ if self.samples_generator_parallelized._ready_processed_results.empty() and not \
53
+ self.samples_generator_parallelized._inputs_waiting_to_be_process.empty():
54
+ return 'dataset'
55
+
56
+
57
+
58
+
59
+ @staticmethod
60
+ def _process_func(code_path: str, code_entry_name: str,
61
+ samples_to_process: Queue, ready_samples: Queue,
62
+ metrics_to_process: Queue, ready_metrics: Queue) -> None:
63
+ import os
64
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
65
+
66
+ leap_loader = LeapLoader(code_path, code_entry_name)
67
+ while True:
68
+
69
+ # decide on sample or metric to process
70
+ state, idx = samples_to_process.get(block=True)
71
+ leap_loader._preprocess_result()
72
+ try:
73
+ sample = leap_loader.get_sample(state, idx)
74
+ except Exception as e:
75
+ leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
76
+ ready_samples.put(SampleSerializableError(state, idx, leap_script_trace, str(e)))
77
+ continue
78
+
79
+ ready_samples.put(sample)
80
+
81
+ def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
82
+ return self.start_process_inputs(sample_identities)
83
+
@@ -24,13 +24,11 @@ from code_loader.utils import get_root_exception_file_and_line_number
24
24
 
25
25
 
26
26
  class LeapLoader:
27
- def __init__(self, code_path: str, code_entry_name: str, streaming_mode: bool = False):
27
+ def __init__(self, code_path: str, code_entry_name: str):
28
28
  self.code_entry_name = code_entry_name
29
29
  self.code_path = code_path
30
30
 
31
- self.streaming_mode = streaming_mode
32
31
  self._preprocess_result_cached = None
33
- self._last_updated_unlabeled_preprocess = None
34
32
 
35
33
  @lru_cache()
36
34
  def exec_script(self) -> None:
@@ -110,6 +108,10 @@ class LeapLoader:
110
108
 
111
109
  def get_sample(self, state: DataStateEnum, idx: int) -> DatasetSample:
112
110
  self.exec_script()
111
+ preprocess_result = self._preprocess_result()
112
+ if state == DataStateEnum.unlabeled and idx not in preprocess_result[state].sample_ids:
113
+ self._preprocess_result(update_unlabeled_preprocess=True)
114
+
113
115
  sample = DatasetSample(inputs=self._get_inputs(state, idx),
114
116
  gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, idx),
115
117
  metadata=self._get_metadata(state, idx),
@@ -284,28 +286,20 @@ class LeapLoader:
284
286
  ]
285
287
  return ModelSetup(custom_layer_instances)
286
288
 
287
- def _preprocess_result(self) -> Dict[DataStateEnum, PreprocessResponse]:
289
+ def _preprocess_result(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, PreprocessResponse]:
288
290
  self.exec_script()
289
291
 
290
292
  if self._preprocess_result_cached is None:
291
293
  self._preprocess_result_cached = global_leap_binder.get_preprocess_result()
292
- self._last_updated_unlabeled_preprocess = time.time()
293
294
 
294
- if self.streaming_mode and global_leap_binder.setup_container.unlabeled_data_preprocess is None:
295
- raise Exception("unlabeled_data_preprocess is not defined in the dataset script and it "
296
- "most be defined in production monitoring.")
297
-
298
- update_unlabeled_preprocess_interval = 120
299
- if (self.streaming_mode and
300
- time.time() - self._last_updated_unlabeled__preprocess > update_unlabeled_preprocess_interval):
295
+ if update_unlabeled_preprocess:
301
296
  self._preprocess_result_cached[
302
297
  DataStateEnum.unlabeled] = global_leap_binder.get_preprocess_unlabeled_result()
303
- self._last_updated_unlabeled_preprocess = time.time()
304
298
 
305
299
  return self._preprocess_result_cached
306
300
 
307
- def get_preprocess_sample_ids(self) -> Dict[DataStateEnum, Union[List[int], List[str]]]:
308
- preprocess_result = self._preprocess_result()
301
+ def get_preprocess_sample_ids(self, update_unlabeled_preprocess=False) -> Dict[DataStateEnum, Union[List[int], List[str]]]:
302
+ preprocess_result = self._preprocess_result(update_unlabeled_preprocess)
309
303
  sample_ids = {}
310
304
  for state, preprocess_response in preprocess_result.items():
311
305
  sample_ids[state] = preprocess_response.sample_ids
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "code-loader"
3
- version = "1.0.49.dev1"
3
+ version = "1.0.49.dev3"
4
4
  description = ""
5
5
  authors = ["dorhar <doron.harnoy@tensorleap.ai>"]
6
6
  license = "MIT"