code-loader 0.2.70.dev1__py3-none-any.whl → 0.2.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_loader/leaploader.py +8 -7
- code_loader/samples_generator_parallelized.py +143 -0
- {code_loader-0.2.70.dev1.dist-info → code_loader-0.2.71.dist-info}/METADATA +3 -3
- {code_loader-0.2.70.dev1.dist-info → code_loader-0.2.71.dist-info}/RECORD +6 -5
- {code_loader-0.2.70.dev1.dist-info → code_loader-0.2.71.dist-info}/WHEEL +1 -1
- {code_loader-0.2.70.dev1.dist-info → code_loader-0.2.71.dist-info}/LICENSE +0 -0
code_loader/leaploader.py
CHANGED
@@ -98,13 +98,14 @@ class LeapLoader:
|
|
98
98
|
}
|
99
99
|
|
100
100
|
def get_sample(self, state: DataStateEnum, idx: int) -> DatasetSample:
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
101
|
+
with tf.device('/cpu:0'):
|
102
|
+
self.exec_script()
|
103
|
+
sample = DatasetSample(inputs=self._get_inputs(state, idx),
|
104
|
+
gt=None if state == DataStateEnum.unlabeled else self._get_gt(state, idx),
|
105
|
+
metadata=self._get_metadata(state, idx),
|
106
|
+
index=idx,
|
107
|
+
state=state)
|
108
|
+
return sample
|
108
109
|
|
109
110
|
def check_dataset(self) -> DatasetIntegParseResult:
|
110
111
|
test_payloads: List[DatasetTestResultPayload] = []
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# mypy: ignore-errors
|
2
|
+
|
3
|
+
import traceback
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from functools import lru_cache
|
6
|
+
from queue import Empty
|
7
|
+
from threading import Thread
|
8
|
+
from typing import List, Tuple, Optional
|
9
|
+
from multiprocessing import Process, Queue
|
10
|
+
|
11
|
+
from code_loader.leaploader import LeapLoader
|
12
|
+
from code_loader.contract.enums import DataStateEnum
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class GetSampleSerializableError:
|
17
|
+
state: DataStateEnum
|
18
|
+
index: int
|
19
|
+
leap_script_trace: str
|
20
|
+
exception_as_str: str
|
21
|
+
|
22
|
+
|
23
|
+
class SamplesGeneratorParallelized:
|
24
|
+
def __init__(self, code_path: str, code_entry_name: str,
|
25
|
+
n_workers: int = 2, max_samples_in_queue: int = 128) -> None:
|
26
|
+
self.code_entry_name = code_entry_name
|
27
|
+
self.code_path = code_path
|
28
|
+
|
29
|
+
if n_workers <= 0:
|
30
|
+
raise Exception("need at least one worker")
|
31
|
+
self.n_workers = n_workers
|
32
|
+
self.max_samples_in_queue = max_samples_in_queue
|
33
|
+
|
34
|
+
self._n_samples_to_process = 0
|
35
|
+
self._samples_to_process: Optional[Queue] = None
|
36
|
+
self._ready_samples: Optional[Queue] = None
|
37
|
+
self.processes: Optional[List[Process]] = None
|
38
|
+
self._generate_samples_thread: Optional[Thread] = None
|
39
|
+
self._should_stop_thread = False
|
40
|
+
|
41
|
+
@lru_cache()
|
42
|
+
def start(self) -> None:
|
43
|
+
self._samples_to_process = Queue(5000)
|
44
|
+
self._ready_samples = Queue(self.max_samples_in_queue)
|
45
|
+
|
46
|
+
self.processes = [
|
47
|
+
Process(target=SamplesGeneratorParallelized._process_func,
|
48
|
+
args=(self.code_path, self.code_entry_name, self._samples_to_process, self._ready_samples))
|
49
|
+
for _ in range(self.n_workers)]
|
50
|
+
|
51
|
+
for proc in self.processes:
|
52
|
+
proc.daemon = True
|
53
|
+
proc.start()
|
54
|
+
|
55
|
+
# needed in order to make sure the preprocess func runs once in nonparallel
|
56
|
+
self._generate_samples([(DataStateEnum.training, 0)])
|
57
|
+
self._get_next_sample()
|
58
|
+
|
59
|
+
@staticmethod
|
60
|
+
def _process_func(code_path: str, code_entry_name: str,
|
61
|
+
samples_to_process: Queue, ready_samples: Queue) -> None:
|
62
|
+
leap_loader = LeapLoader(code_path, code_entry_name)
|
63
|
+
leap_loader.exec_script()
|
64
|
+
while True:
|
65
|
+
state, idx = samples_to_process.get(block=True)
|
66
|
+
try:
|
67
|
+
sample = leap_loader.get_sample(state, idx)
|
68
|
+
except Exception as e:
|
69
|
+
leap_script_trace = traceback.format_exc().split('File "<string>"')[-1]
|
70
|
+
ready_samples.put(GetSampleSerializableError(state, idx, leap_script_trace, str(e)))
|
71
|
+
continue
|
72
|
+
|
73
|
+
ready_samples.put(sample)
|
74
|
+
|
75
|
+
def _generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
|
76
|
+
assert self._samples_to_process is not None
|
77
|
+
assert self._ready_samples is not None
|
78
|
+
|
79
|
+
for sample in sample_identities:
|
80
|
+
if self._should_stop_thread:
|
81
|
+
break
|
82
|
+
self._n_samples_to_process += 1
|
83
|
+
self._samples_to_process.put(sample)
|
84
|
+
|
85
|
+
def _clear_queues(self):
|
86
|
+
if self._generate_samples_thread is not None:
|
87
|
+
self._should_stop_thread = True
|
88
|
+
try:
|
89
|
+
self._samples_to_process.get_nowait()
|
90
|
+
self._n_samples_to_process -= 1
|
91
|
+
except Empty:
|
92
|
+
pass
|
93
|
+
self._generate_samples_thread.join()
|
94
|
+
while not self._samples_to_process.empty():
|
95
|
+
try:
|
96
|
+
self._samples_to_process.get_nowait()
|
97
|
+
self._n_samples_to_process -= 1
|
98
|
+
except Empty:
|
99
|
+
pass
|
100
|
+
|
101
|
+
for _ in range(self._n_samples_to_process):
|
102
|
+
self._get_next_sample()
|
103
|
+
|
104
|
+
self._should_stop_thread = False
|
105
|
+
|
106
|
+
def _get_next_sample(self):
|
107
|
+
sample = self._ready_samples.get()
|
108
|
+
self._n_samples_to_process -= 1
|
109
|
+
return sample
|
110
|
+
|
111
|
+
def generate_samples(self, sample_identities: List[Tuple[DataStateEnum, int]]):
|
112
|
+
self.start()
|
113
|
+
|
114
|
+
self._clear_queues()
|
115
|
+
|
116
|
+
self._generate_samples_thread = Thread(target=self._generate_samples, args=(sample_identities,))
|
117
|
+
self._generate_samples_thread.start()
|
118
|
+
return self._get_next_sample
|
119
|
+
|
120
|
+
@staticmethod
|
121
|
+
def _release_queue(queue: Queue):
|
122
|
+
assert queue is not None
|
123
|
+
queue.close()
|
124
|
+
queue.join_thread()
|
125
|
+
|
126
|
+
def release(self) -> None:
|
127
|
+
if self.processes is None:
|
128
|
+
return
|
129
|
+
self._clear_queues()
|
130
|
+
|
131
|
+
self._release_queue(self._samples_to_process)
|
132
|
+
self._release_queue(self._ready_samples)
|
133
|
+
|
134
|
+
for process in self.processes:
|
135
|
+
process.terminate()
|
136
|
+
process.kill()
|
137
|
+
process.join()
|
138
|
+
process.close()
|
139
|
+
|
140
|
+
self.processes = None
|
141
|
+
|
142
|
+
def __del__(self) -> None:
|
143
|
+
self.release()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: code-loader
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.71
|
4
4
|
Summary:
|
5
5
|
Home-page: https://github.com/tensorleap/code-loader
|
6
6
|
License: MIT
|
@@ -13,8 +13,8 @@ Classifier: Programming Language :: Python :: 3.8
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.9
|
14
14
|
Classifier: Programming Language :: Python :: 3.10
|
15
15
|
Requires-Dist: numpy (>=1.22.3,<2.0.0)
|
16
|
-
Requires-Dist: tensorflow (
|
17
|
-
Requires-Dist: tensorflow-macos (
|
16
|
+
Requires-Dist: tensorflow (>=2.11.0,<3.0.0) ; platform_machine == "x86_64"
|
17
|
+
Requires-Dist: tensorflow-macos (>=2.11.0,<3.0.0) ; platform_machine == "arm64"
|
18
18
|
Requires-Dist: typeguard (>=2.13.3,<3.0.0)
|
19
19
|
Project-URL: Repository, https://github.com/tensorleap/code-loader
|
20
20
|
Description-Content-Type: text/markdown
|
@@ -17,13 +17,14 @@ code_loader/helpers/detection/yolo/pytorch_utils.py,sha256=RuB9tt13fIKuuGXoaTLOm
|
|
17
17
|
code_loader/helpers/detection/yolo/utils.py,sha256=eV9-IZhglRNGud1UmSckIuA49WRKJitDQhPKK7iIaBc,6563
|
18
18
|
code_loader/leap_binder/__init__.py,sha256=Oe7DNj6RmgX-tMruOrI7GYvf3SyAnzOxugF2e117_Z8,93
|
19
19
|
code_loader/leap_binder/leapbinder.py,sha256=WjC-UA5xeY9UZtMqJQOxjidlFcWURC9mmy7OrQy1IY0,6692
|
20
|
-
code_loader/leaploader.py,sha256=
|
20
|
+
code_loader/leaploader.py,sha256=3vvzBzS0oev6kqXjtriz3G6z0tijGOJLIH8QoOvu1Ls,15044
|
21
21
|
code_loader/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
code_loader/metrics/default_metrics.py,sha256=xtvTskR7LC5pyKSlFcvlyBpFY9TSXNGVAi5taEPSI60,6727
|
23
|
+
code_loader/samples_generator_parallelized.py,sha256=3Q9EixQmZ7Pv1G0J3N_S0d3O2u1znVTpvxPi2iSp9gQ,4747
|
23
24
|
code_loader/utils.py,sha256=WUcM97OuxrhfLCRPoH9EbXrxajNpYgX1CTMc3_PXtYU,1736
|
24
25
|
code_loader/visualizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
26
|
code_loader/visualizers/default_visualizers.py,sha256=HqWx2qfTrroGl2n8Fpmr_4X-rk7tE2oGapjO3gzz4WY,2226
|
26
|
-
code_loader-0.2.
|
27
|
-
code_loader-0.2.
|
28
|
-
code_loader-0.2.
|
29
|
-
code_loader-0.2.
|
27
|
+
code_loader-0.2.71.dist-info/LICENSE,sha256=qIwWjdspQeSMTtnFZBC8MuT-95L02FPvzRUdWFxrwJY,1067
|
28
|
+
code_loader-0.2.71.dist-info/METADATA,sha256=JLp90ger6p7KRVnGmKZ1dUQht7euhaIQBNH9F5S50Z4,876
|
29
|
+
code_loader-0.2.71.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
30
|
+
code_loader-0.2.71.dist-info/RECORD,,
|
File without changes
|