code-loader 1.0.14__py3-none-any.whl → 1.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: code-loader
3
- Version: 1.0.14
3
+ Version: 1.0.15
4
4
  Summary:
5
5
  Home-page: https://github.com/tensorleap/code-loader
6
6
  License: MIT
@@ -8,15 +8,11 @@ code_loader/contract/responsedataclasses.py,sha256=thIxKSx3mwoKHYICULGaeh2-S56L1
8
8
  code_loader/contract/visualizer_classes.py,sha256=1FjVO744J_EMuJfHWXGdvSz6vl3Vu7iS3CDfs8MzEEQ,5138
9
9
  code_loader/inner_leap_binder/__init__.py,sha256=koOlJyMNYzGbEsoIbXathSmQ-L38N_pEXH_HvL7beXU,99
10
10
  code_loader/inner_leap_binder/leapbinder.py,sha256=KKVyStkCb7K1wB4AP4c2fmJuSFiC0MxWy0VwJ7Id_J8,12510
11
- code_loader/leap_loader_parallelized_base.py,sha256=KuxlDECh4dimY7g8rNqYiqneurAtLmsAJ1hi2PvGo7o,4974
12
11
  code_loader/leaploader.py,sha256=YvOys16wyATiGDVsxP8Fi-mgrnhFCAw4_JOtXEGjlUg,15362
13
- code_loader/metric_calculator_parallelized.py,sha256=aCOw4NCrqTUQkaLrglYxWBK9XGWZ1M9cySti2HaS4dA,2181
14
- code_loader/samples_generator_parallelized.py,sha256=9uA1TMZcClLPtGqYfzFgcqMLahyzzQ_p7ESuN-F_qPU,2431
15
12
  code_loader/utils.py,sha256=WUcM97OuxrhfLCRPoH9EbXrxajNpYgX1CTMc3_PXtYU,1736
16
- code_loader/visualizer_calculator_parallelized.py,sha256=V3G9BOka5b_QIsyBCeK1NYofkbD5Wej01eGXP3dceK8,2523
17
13
  code_loader/visualizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
14
  code_loader/visualizers/default_visualizers.py,sha256=HqWx2qfTrroGl2n8Fpmr_4X-rk7tE2oGapjO3gzz4WY,2226
19
- code_loader-1.0.14.dist-info/LICENSE,sha256=qIwWjdspQeSMTtnFZBC8MuT-95L02FPvzRUdWFxrwJY,1067
20
- code_loader-1.0.14.dist-info/METADATA,sha256=sbANzs7i8Y4jkfiXKsvN3bQx-EhzZBryiV284G7SBug,811
21
- code_loader-1.0.14.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
22
- code_loader-1.0.14.dist-info/RECORD,,
15
+ code_loader-1.0.15.dist-info/LICENSE,sha256=qIwWjdspQeSMTtnFZBC8MuT-95L02FPvzRUdWFxrwJY,1067
16
+ code_loader-1.0.15.dist-info/METADATA,sha256=q5zVlORo2jsg6P-8fpA0m1P-NJlkJP0ro91MvQbFqfk,811
17
+ code_loader-1.0.15.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
18
+ code_loader-1.0.15.dist-info/RECORD,,
@@ -1,140 +0,0 @@
1
- # mypy: ignore-errors
2
- import multiprocessing
3
- from abc import ABC, abstractmethod
4
- from functools import lru_cache
5
- from queue import Empty
6
- from threading import Thread
7
- from typing import List, Optional, Any
8
- from multiprocessing import Process, Queue
9
- import psutil
10
-
11
-
12
- class LeapLoaderParallelizedBase(ABC):
13
- def __init__(self, code_path: str, code_entry_name: str,
14
- n_workers: Optional[int] = 2, max_ready_results_in_queue: int = 128,
15
- multiprocessing_context: Optional[str] = None) -> None:
16
- self.multiprocessing_context = multiprocessing
17
- if multiprocessing_context is not None:
18
- self.multiprocessing_context = multiprocessing.get_context(multiprocessing_context)
19
-
20
- self.code_entry_name = code_entry_name
21
- self.code_path = code_path
22
-
23
- if n_workers is not None and n_workers <= 0:
24
- raise Exception("need at least one worker")
25
- self.n_workers = n_workers
26
- self.max_ready_results_in_queue = max_ready_results_in_queue
27
-
28
- self._n_inputs_waiting_to_be_process = 0
29
- self._inputs_waiting_to_be_process: Optional[Queue] = None
30
- self._ready_processed_results: Optional[Queue] = None
31
- self.processes: Optional[List[Process]] = None
32
- self._generate_inputs_thread: Optional[Thread] = None
33
- self._should_stop_thread = False
34
-
35
- def _calculate_n_workers_by_hardware(self) -> int:
36
- p = psutil.Process(self.processes[0].pid)
37
- memory_usage_in_bytes = p.memory_info().rss
38
- total_memory_in_bytes = psutil.virtual_memory().total
39
-
40
- n_workers = min(int(multiprocessing.cpu_count()),
41
- int(total_memory_in_bytes * 0.5 / memory_usage_in_bytes))
42
- n_workers = max(n_workers, 1)
43
- return n_workers
44
-
45
- @abstractmethod
46
- def _create_and_start_process(self) -> Process:
47
- pass
48
-
49
- def _run_and_warm_first_process(self):
50
- pass
51
-
52
- @lru_cache()
53
- def start(self) -> None:
54
- self._inputs_waiting_to_be_process = self.multiprocessing_context.Queue(5000)
55
- self._ready_processed_results = self.multiprocessing_context.Queue(self.max_ready_results_in_queue)
56
-
57
- self._run_and_warm_first_process()
58
- n_workers = self.n_workers
59
- if self.n_workers is None:
60
- n_workers = self._calculate_n_workers_by_hardware()
61
-
62
- if self.processes is None:
63
- self.processes = []
64
- for _ in range(n_workers):
65
- self.processes.append(self._create_and_start_process())
66
-
67
- def _start_process_inputs(self, inputs: List[Any]):
68
- assert self._inputs_waiting_to_be_process is not None
69
- assert self._ready_processed_results is not None
70
-
71
- for _input in inputs:
72
- if self._should_stop_thread:
73
- break
74
- self._n_inputs_waiting_to_be_process += 1
75
- self._inputs_waiting_to_be_process.put(_input)
76
-
77
- def _clear_queues(self):
78
- if self._generate_inputs_thread is not None:
79
- self._should_stop_thread = True
80
- try:
81
- self._inputs_waiting_to_be_process.get_nowait()
82
- self._n_inputs_waiting_to_be_process -= 1
83
- except Empty:
84
- pass
85
- self._generate_inputs_thread.join()
86
- while not self._inputs_waiting_to_be_process.empty():
87
- try:
88
- self._inputs_waiting_to_be_process.get_nowait()
89
- self._n_inputs_waiting_to_be_process -= 1
90
- except Empty:
91
- pass
92
-
93
- for _ in range(self._n_inputs_waiting_to_be_process):
94
- self._get_next_ready_processed_result()
95
-
96
- self._should_stop_thread = False
97
-
98
- def _get_next_ready_processed_result(self):
99
- result = self._ready_processed_results.get()
100
- self._n_inputs_waiting_to_be_process -= 1
101
- return result
102
-
103
- def start_process_inputs(self, inputs: List[Any]):
104
- self.start()
105
-
106
- self._clear_queues()
107
-
108
- self._generate_inputs_thread = Thread(target=self._start_process_inputs, args=(inputs,))
109
- self._generate_inputs_thread.start()
110
- return self._get_next_ready_processed_result
111
-
112
- @staticmethod
113
- def _release_queue(queue: Queue):
114
- assert queue is not None
115
- queue.close()
116
- queue.join_thread()
117
-
118
- def release(self) -> None:
119
- if self.processes is None:
120
- return
121
- self._clear_queues()
122
-
123
- self._release_queue(self._inputs_waiting_to_be_process)
124
- self._release_queue(self._ready_processed_results)
125
-
126
- for process in self.processes:
127
- process.terminate()
128
- process.kill()
129
- process.join()
130
- process.close()
131
-
132
- self.processes = None
133
-
134
- def __del__(self) -> None:
135
- self.release()
136
-
137
- def check_processes(self) -> None:
138
- for process in self.processes:
139
- if not process.is_alive():
140
- raise Exception(f'Sub process unexpected failure. Exit code: {process.exitcode}')
@@ -1,52 +0,0 @@
1
- # mypy: ignore-errors
2
- from typing import Optional, List, Tuple, Dict
3
- from multiprocessing import Process, Queue
4
- from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
5
- import traceback
6
- from dataclasses import dataclass
7
- import numpy as np
8
- from code_loader.leaploader import LeapLoader
9
-
10
-
11
- @dataclass
12
- class MetricSerializableError:
13
- metric_id: str
14
- metric_name: str
15
- leap_script_trace: str
16
- exception_as_str: str
17
-
18
-
19
- class MetricCalculatorParallelized(LeapLoaderParallelizedBase):
20
- def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
21
- max_samples_in_queue: int = 128) -> None:
22
- super().__init__(code_path, code_entry_name, n_workers, max_samples_in_queue, "spawn")
23
-
24
- @staticmethod
25
- def _process_func(code_path: str, code_entry_name: str,
26
- metrics_to_process: Queue, ready_samples: Queue) -> None:
27
- import os
28
- os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
29
-
30
- leap_loader = LeapLoader(code_path, code_entry_name)
31
- while True:
32
- metric_id, metric_name, input_arg_name_to_tensor = metrics_to_process.get(block=True)
33
- try:
34
- metric_result = leap_loader.metric_by_name()[metric_name].function(**input_arg_name_to_tensor)
35
- except Exception as e:
36
- leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
37
- ready_samples.put(MetricSerializableError(metric_id, metric_name, leap_script_trace, str(e)))
38
- continue
39
-
40
- ready_samples.put((metric_id, metric_result))
41
-
42
- def _create_and_start_process(self) -> Process:
43
- process = self.multiprocessing_context.Process(
44
- target=MetricCalculatorParallelized._process_func,
45
- args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
46
- self._ready_processed_results))
47
- process.daemon = True
48
- process.start()
49
- return process
50
-
51
- def calculate_metrics(self, input_arg_name_to_tensor_list: List[Tuple[str, str, Dict[str, np.array]]]):
52
- return self.start_process_inputs(input_arg_name_to_tensor_list)
@@ -1,65 +0,0 @@
1
- # mypy: ignore-errors
2
- import traceback
3
- from dataclasses import dataclass
4
-
5
- from typing import List, Tuple, Optional
6
-
7
- from multiprocessing import Process, Queue
8
-
9
- from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
10
- from code_loader.leaploader import LeapLoader
11
- from code_loader.contract.enums import DataStateEnum
12
-
13
-
14
- @dataclass
15
- class SampleSerializableError:
16
- state: DataStateEnum
17
- index: int
18
- leap_script_trace: str
19
- exception_as_str: str
20
-
21
-
22
- class SamplesGeneratorParallelized(LeapLoaderParallelizedBase):
23
- def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
24
- max_samples_in_queue: int = 128) -> None:
25
- super().__init__(code_path, code_entry_name, n_workers, max_samples_in_queue)
26
-
27
- def _create_and_start_process(self) -> Process:
28
- process = self.multiprocessing_context.Process(
29
- target=SamplesGeneratorParallelized._process_func,
30
- args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
31
- self._ready_processed_results))
32
- process.daemon = True
33
- process.start()
34
- return process
35
-
36
- def _run_and_warm_first_process(self):
37
- process = self._create_and_start_process()
38
- self.processes = [process]
39
-
40
- # needed in order to make sure the preprocess func runs once in nonparallel
41
- self._start_process_inputs([(DataStateEnum.training, 0)])
42
- self._get_next_ready_processed_result()
43
-
44
- @staticmethod
45
- def _process_func(code_path: str, code_entry_name: str,
46
- samples_to_process: Queue, ready_samples: Queue) -> None:
47
- import os
48
- os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
49
- os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
50
-
51
- leap_loader = LeapLoader(code_path, code_entry_name)
52
- while True:
53
- state, idx = samples_to_process.get(block=True)
54
- try:
55
- sample = leap_loader.get_sample(state, idx)
56
- except Exception as e:
57
- leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
58
- ready_samples.put(SampleSerializableError(state, idx, leap_script_trace, str(e)))
59
- continue
60
-
61
- ready_samples.put(sample)
62
-
63
- def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
64
- return self.start_process_inputs(sample_identities)
65
-
@@ -1,61 +0,0 @@
1
- # mypy: ignore-errors
2
- from typing import Optional, List, Tuple, Dict
3
- from multiprocessing import Process, Queue
4
-
5
- import numpy as np
6
-
7
- from code_loader.leap_loader_parallelized_base import LeapLoaderParallelizedBase
8
- from dataclasses import dataclass
9
- from code_loader.leaploader import LeapLoader
10
-
11
-
12
- @dataclass
13
- class VisualizerSerializableError:
14
- visualizer_id: str
15
- visualizer_name: str
16
- index_in_batch: int
17
- exception_as_str: str
18
-
19
-
20
- class VisualizerCalculatorParallelized(LeapLoaderParallelizedBase):
21
- def __init__(self, code_path: str, code_entry_name: str, n_workers: Optional[int] = 2,
22
- max_samples_in_queue: int = 128) -> None:
23
- super().__init__(code_path, code_entry_name, n_workers, max_samples_in_queue, "spawn")
24
-
25
- @staticmethod
26
- def _process_func(code_path: str, code_entry_name: str,
27
- visualizers_to_process: Queue, ready_visualizations: Queue) -> None:
28
- import os
29
- os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
30
-
31
- leap_loader = LeapLoader(code_path, code_entry_name)
32
-
33
- # running preprocessing to sync preprocessing in main thread (can be valuable when preprocess is filling a
34
- # global param that visualizer is using)
35
- leap_loader._preprocess_result()
36
- leap_loader._preprocess_result.cache_clear()
37
-
38
- while True:
39
- index_in_batch, visualizer_id, visualizer_name, input_arg_name_to_tensor = \
40
- visualizers_to_process.get(block=True)
41
- try:
42
- visualizer_result = \
43
- leap_loader.visualizer_by_name()[visualizer_name].function(**input_arg_name_to_tensor)
44
- except Exception as e:
45
- ready_visualizations.put(VisualizerSerializableError(
46
- visualizer_id, visualizer_name, index_in_batch, str(e)))
47
- continue
48
-
49
- ready_visualizations.put((index_in_batch, visualizer_id, visualizer_result))
50
-
51
- def _create_and_start_process(self) -> Process:
52
- process = self.multiprocessing_context.Process(
53
- target=VisualizerCalculatorParallelized._process_func,
54
- args=(self.code_path, self.code_entry_name, self._inputs_waiting_to_be_process,
55
- self._ready_processed_results))
56
- process.daemon = True
57
- process.start()
58
- return process
59
-
60
- def calculate_visualizers(self, input_arg_name_to_tensor_list: List[Tuple[int, str, str, Dict[str, np.array]]]):
61
- return self.start_process_inputs(input_arg_name_to_tensor_list)