pyerualjetwork 4.1.5__py3-none-any.whl → 4.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyerualjetwork/__init__.py +4 -3
- pyerualjetwork/activation_functions_cuda.py +1 -1
- pyerualjetwork/data_operations.py +41 -80
- pyerualjetwork/data_operations_cuda.py +66 -108
- pyerualjetwork/memory_operations.py +184 -78
- pyerualjetwork/metrics_cuda.py +3 -4
- pyerualjetwork/model_operations.py +4 -5
- pyerualjetwork/model_operations_cuda.py +7 -6
- pyerualjetwork/plan.py +35 -23
- pyerualjetwork/plan_cuda.py +113 -81
- pyerualjetwork/planeat.py +3 -3
- pyerualjetwork/planeat_cuda.py +3 -3
- pyerualjetwork/visualizations.py +147 -125
- pyerualjetwork/visualizations_cuda.py +160 -130
- {pyerualjetwork-4.1.5.dist-info → pyerualjetwork-4.1.7.dist-info}/METADATA +25 -9
- pyerualjetwork-4.1.7.dist-info/RECORD +24 -0
- pyerualjetwork-4.1.5.dist-info/RECORD +0 -24
- {pyerualjetwork-4.1.5.dist-info → pyerualjetwork-4.1.7.dist-info}/WHEEL +0 -0
- {pyerualjetwork-4.1.5.dist-info → pyerualjetwork-4.1.7.dist-info}/top_level.txt +0 -0
pyerualjetwork/__init__.py
CHANGED
@@ -14,7 +14,8 @@ package_names = [
|
|
14
14
|
'numpy==1.26.4',
|
15
15
|
'matplotlib==3.9.0',
|
16
16
|
'colorama==0.4.6',
|
17
|
-
'psutil==6.1.1'
|
17
|
+
'psutil==6.1.1',
|
18
|
+
'cupy-cuda12x==13.3.0'
|
18
19
|
]
|
19
20
|
|
20
21
|
installed_packages = pkg_resources.working_set
|
@@ -47,8 +48,8 @@ for package_name in package_names:
|
|
47
48
|
|
48
49
|
print(f"PyerualJetwork is ready to use with {err} errors")
|
49
50
|
|
50
|
-
__version__ = "4.1.
|
51
|
-
__update__ = "*
|
51
|
+
__version__ = "4.1.7"
|
52
|
+
__update__ = "* Changes: https://github.com/HCB06/PyerualJetwork/blob/main/CHANGES\n* PyerualJetwork document: https://github.com/HCB06/PyerualJetwork/blob/main/Welcome_to_PyerualJetwork/PYERUALJETWORK_USER_MANUEL_AND_LEGAL_INFORMATION(EN).pdf\n* YouTube tutorials: https://www.youtube.com/@HasanCanBeydili"
|
52
53
|
|
53
54
|
def print_version(__version__):
|
54
55
|
print(f"PyerualJetwork Version {__version__}" + '\n')
|
@@ -16,26 +16,10 @@ def encode_one_hot(y_train, y_test=None, summary=False):
|
|
16
16
|
Returns:
|
17
17
|
tuple: One-hot encoded y_train and (if given) y_test.
|
18
18
|
"""
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
if y_train.dtype != np.uint16:
|
24
|
-
y_train = np.array(y_train, copy=False).astype(np.uint16, copy=False)
|
25
|
-
else:
|
26
|
-
if y_train.dtype != np.uint32:
|
27
|
-
y_train = np.array(y_train, copy=False).astype(np.uint32, copy=False)
|
28
|
-
|
29
|
-
if y_test is not None:
|
30
|
-
if len(y_test) < 256:
|
31
|
-
if y_test.dtype != np.uint8:
|
32
|
-
y_test = np.array(y_test, copy=False).astype(np.uint8, copy=False)
|
33
|
-
elif len(y_test) <= 32767:
|
34
|
-
if y_test.dtype != np.uint16:
|
35
|
-
y_test = np.array(y_test, copy=False).astype(np.uint16, copy=False)
|
36
|
-
else:
|
37
|
-
if y_test.dtype != np.uint32:
|
38
|
-
y_test = np.array(y_test, copy=False).astype(np.uint32, copy=False)
|
19
|
+
from .memory_operations import optimize_labels
|
20
|
+
|
21
|
+
y_train = optimize_labels(y_train, one_hot_encoded=False, cuda=False)
|
22
|
+
y_test = optimize_labels(y_test, one_hot_encoded=False, cuda=False)
|
39
23
|
|
40
24
|
classes = np.unique(y_train)
|
41
25
|
class_count = len(classes)
|
@@ -47,12 +31,12 @@ def encode_one_hot(y_train, y_test=None, summary=False):
|
|
47
31
|
for cls, idx in class_to_index.items():
|
48
32
|
print(f" {idx}: {cls}")
|
49
33
|
|
50
|
-
y_train_encoded = np.zeros((y_train.shape[0], class_count))
|
34
|
+
y_train_encoded = np.zeros((y_train.shape[0], class_count), dtype=y_train.dtype)
|
51
35
|
for i, label in enumerate(y_train):
|
52
36
|
y_train_encoded[i, class_to_index[label]] = 1
|
53
37
|
|
54
38
|
if y_test is not None:
|
55
|
-
y_test_encoded = np.zeros((y_test.shape[0], class_count))
|
39
|
+
y_test_encoded = np.zeros((y_test.shape[0], class_count), dtype=y_test.dtype)
|
56
40
|
for i, label in enumerate(y_test):
|
57
41
|
y_test_encoded[i, class_to_index[label]] = 1
|
58
42
|
return y_train_encoded, y_test_encoded
|
@@ -90,16 +74,10 @@ def split(X, y, test_size, random_state=42, dtype=np.float32):
|
|
90
74
|
Returns:
|
91
75
|
tuple: x_train, x_test, y_train, y_test as ordered training and testing data subsets.
|
92
76
|
"""
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
elif len(y) <= 32767:
|
98
|
-
if y.dtype != np.uint16:
|
99
|
-
y = np.array(y, copy=False).astype(np.uint16, copy=False)
|
100
|
-
else:
|
101
|
-
if y.dtype != np.uint32:
|
102
|
-
y = np.array(y, copy=False).astype(np.uint32, copy=False)
|
77
|
+
from .memory_operations import transfer_to_cpu, optimize_labels
|
78
|
+
|
79
|
+
X = transfer_to_cpu(X, dtype=dtype)
|
80
|
+
y = optimize_labels(y, one_hot_encoded=False, cuda=False)
|
103
81
|
|
104
82
|
num_samples = X.shape[0]
|
105
83
|
|
@@ -124,6 +102,8 @@ def split(X, y, test_size, random_state=42, dtype=np.float32):
|
|
124
102
|
x_train, x_test = X[train_indices], X[test_indices]
|
125
103
|
y_train, y_test = y[train_indices], y[test_indices]
|
126
104
|
|
105
|
+
del X, y
|
106
|
+
|
127
107
|
return x_train, x_test, y_train, y_test
|
128
108
|
|
129
109
|
|
@@ -142,20 +122,11 @@ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=np.float32
|
|
142
122
|
y_balanced -- Balanced class labels (one-hot encoded, NumPy array format)
|
143
123
|
"""
|
144
124
|
from .ui import loading_bars
|
145
|
-
|
146
|
-
bar_format = loading_bars()[0]
|
125
|
+
from .memory_operations import transfer_to_cpu
|
147
126
|
|
148
|
-
x_train =
|
149
|
-
if len(y_train[0]) < 256:
|
150
|
-
if y_train.dtype != np.uint8:
|
151
|
-
y_train = np.array(y_train, copy=False).astype(np.uint8, copy=False)
|
152
|
-
elif len(y_train[0]) <= 32767:
|
153
|
-
if y_train.dtype != np.uint16:
|
154
|
-
y_train = np.array(y_train, copy=False).astype(np.uint16, copy=False)
|
155
|
-
else:
|
156
|
-
if y_train.dtype != np.uint32:
|
157
|
-
y_train = np.array(y_train, copy=False).astype(np.uint32, copy=False)
|
127
|
+
x_train = transfer_to_cpu(x_train, dtype=dtype)
|
158
128
|
|
129
|
+
bar_format = loading_bars()[0]
|
159
130
|
classes = np.arange(y_train.shape[1])
|
160
131
|
class_count = len(classes)
|
161
132
|
|
@@ -181,8 +152,8 @@ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=np.float32
|
|
181
152
|
if num_samples < target_samples_per_class:
|
182
153
|
|
183
154
|
samples_to_add = target_samples_per_class - num_samples
|
184
|
-
additional_samples = np.zeros((samples_to_add, x_train.shape[1]))
|
185
|
-
additional_labels = np.zeros((samples_to_add, y_train.shape[1]))
|
155
|
+
additional_samples = np.zeros((samples_to_add, x_train.shape[1]), dtype=x_train.dtype)
|
156
|
+
additional_labels = np.zeros((samples_to_add, y_train.shape[1]), dtype=y_train.dtype)
|
186
157
|
|
187
158
|
for i in range(samples_to_add):
|
188
159
|
|
@@ -200,10 +171,12 @@ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=np.float32
|
|
200
171
|
x_balanced.append(additional_samples)
|
201
172
|
y_balanced.append(additional_labels)
|
202
173
|
|
203
|
-
x_balanced = np.vstack(x_balanced)
|
204
|
-
y_balanced = np.vstack(y_balanced)
|
174
|
+
x_balanced = np.vstack(x_balanced, dtype=x_train.dtype)
|
175
|
+
y_balanced = np.vstack(y_balanced, dtype=y_train.dtype)
|
176
|
+
|
177
|
+
del x_train, y_train
|
205
178
|
|
206
|
-
return x_balanced
|
179
|
+
return x_balanced, y_balanced
|
207
180
|
|
208
181
|
|
209
182
|
def auto_balancer(x_train, y_train, dtype=np.float32):
|
@@ -220,20 +193,11 @@ def auto_balancer(x_train, y_train, dtype=np.float32):
|
|
220
193
|
tuple: A tuple containing balanced input data and labels.
|
221
194
|
"""
|
222
195
|
from .ui import loading_bars
|
223
|
-
|
224
|
-
bar_format = loading_bars()[0]
|
196
|
+
from .memory_operations import transfer_to_cpu
|
225
197
|
|
226
|
-
x_train =
|
227
|
-
if len(y_train[0]) < 256:
|
228
|
-
if y_train.dtype != np.uint8:
|
229
|
-
y_train = np.array(y_train, copy=False).astype(np.uint8, copy=False)
|
230
|
-
elif len(y_train[0]) <= 32767:
|
231
|
-
if y_train.dtype != np.uint16:
|
232
|
-
y_train = np.array(y_train, copy=False).astype(np.uint16, copy=False)
|
233
|
-
else:
|
234
|
-
if y_train.dtype != np.uint32:
|
235
|
-
y_train = np.array(y_train, copy=False).astype(np.uint32, copy=False)
|
198
|
+
x_train = transfer_to_cpu(x_train, dtype=dtype)
|
236
199
|
|
200
|
+
bar_format = loading_bars()[0]
|
237
201
|
classes = np.arange(y_train.shape[1])
|
238
202
|
class_count = len(classes)
|
239
203
|
|
@@ -271,10 +235,15 @@ def auto_balancer(x_train, y_train, dtype=np.float32):
|
|
271
235
|
print(Fore.RED + "ERROR: Inputs and labels must be same length check parameters")
|
272
236
|
sys.exit()
|
273
237
|
|
274
|
-
|
238
|
+
BalancedInputs = BalancedInputs.astype(dtype, copy=False)
|
239
|
+
BalancedLabels = BalancedLabels.astype(dtype=y_train.dtype, copy=False)
|
275
240
|
|
241
|
+
del x_train, y_train
|
276
242
|
|
277
|
-
|
243
|
+
return BalancedInputs, BalancedLabels
|
244
|
+
|
245
|
+
|
246
|
+
def synthetic_augmentation(x, y, dtype=np.float32):
|
278
247
|
"""
|
279
248
|
Generates synthetic examples to balance classes with fewer examples.
|
280
249
|
|
@@ -288,24 +257,12 @@ def synthetic_augmentation(x_train, y_train, dtype=np.float32):
|
|
288
257
|
y_balanced -- Balanced class labels (one-hot encoded, array format)
|
289
258
|
"""
|
290
259
|
from .ui import loading_bars
|
291
|
-
|
292
|
-
bar_format = loading_bars()[0]
|
260
|
+
from .memory_operations import transfer_to_cpu
|
293
261
|
|
294
|
-
|
295
|
-
if len(y_train[0]) < 256:
|
296
|
-
if y_train.dtype != np.uint8:
|
297
|
-
y_train = np.array(y_train, copy=False).astype(np.uint8, copy=False)
|
298
|
-
elif len(y_train[0]) <= 32767:
|
299
|
-
if y_train.dtype != np.uint16:
|
300
|
-
y_train = np.array(y_train, copy=False).astype(np.uint16, copy=False)
|
301
|
-
else:
|
302
|
-
if y_train.dtype != np.uint32:
|
303
|
-
y_train = np.array(y_train, copy=False).astype(np.uint32, copy=False)
|
304
|
-
|
305
|
-
x = x_train
|
306
|
-
y = y_train
|
262
|
+
x = transfer_to_cpu(x, dtype=dtype)
|
307
263
|
|
308
|
-
|
264
|
+
bar_format = loading_bars()[0]
|
265
|
+
classes = np.arange(y.shape[1])
|
309
266
|
class_count = len(classes)
|
310
267
|
|
311
268
|
class_distribution = {i: 0 for i in range(class_count)}
|
@@ -340,8 +297,12 @@ def synthetic_augmentation(x_train, y_train, dtype=np.float32):
|
|
340
297
|
|
341
298
|
num_samples += 1
|
342
299
|
|
300
|
+
x_balanced = np.array(x_balanced).astype(dtype, copy=False)
|
301
|
+
y_balanced = np.array(y_balanced).astype(dtype=y.dtype, copy=False)
|
302
|
+
|
303
|
+
del x, y
|
343
304
|
|
344
|
-
return
|
305
|
+
return x_balanced, y_balanced
|
345
306
|
|
346
307
|
|
347
308
|
def standard_scaler(x_train=None, x_test=None, scaler_params=None, dtype=np.float32):
|
@@ -17,29 +17,13 @@ def encode_one_hot(y_train, y_test=None, summary=False):
|
|
17
17
|
tuple: One-hot encoded y_train and (if given) y_test.
|
18
18
|
"""
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
if y_train.dtype != cp.uint16:
|
25
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint16, copy=False)
|
26
|
-
else:
|
27
|
-
if y_train.dtype != cp.uint32:
|
28
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint32, copy=False)
|
29
|
-
|
30
|
-
if y_test is not None:
|
31
|
-
if len(y_test) < 256:
|
32
|
-
if y_test.dtype != cp.uint8:
|
33
|
-
y_test = cp.array(y_test, copy=False).astype(cp.uint8, copy=False)
|
34
|
-
elif len(y_test) <= 32767:
|
35
|
-
if y_test.dtype != cp.uint16:
|
36
|
-
y_test = cp.array(y_test, copy=False).astype(cp.uint16, copy=False)
|
37
|
-
else:
|
38
|
-
if y_test.dtype != cp.uint32:
|
39
|
-
y_test = cp.array(y_test, copy=False).astype(cp.uint32, copy=False)
|
20
|
+
from .memory_operations import optimize_labels, transfer_to_cpu
|
21
|
+
|
22
|
+
y_train = optimize_labels(y_train, one_hot_encoded=False, cuda=True)
|
23
|
+
y_test = optimize_labels(y_test, one_hot_encoded=False, cuda=True)
|
40
24
|
|
41
|
-
y_train = y_train.
|
42
|
-
y_test = y_test.
|
25
|
+
y_train = transfer_to_cpu(y_train,dtype=y_train.dtype)
|
26
|
+
y_test = transfer_to_cpu(y_test,dtype=y_test.dtype)
|
43
27
|
|
44
28
|
classes = np.unique(y_train)
|
45
29
|
class_count = len(classes)
|
@@ -51,17 +35,17 @@ def encode_one_hot(y_train, y_test=None, summary=False):
|
|
51
35
|
for cls, idx in class_to_index.items():
|
52
36
|
print(f" {idx}: {cls}")
|
53
37
|
|
54
|
-
y_train_encoded = np.zeros((y_train.shape[0], class_count))
|
38
|
+
y_train_encoded = np.zeros((y_train.shape[0], class_count), dtype=y_train.dtype)
|
55
39
|
for i, label in enumerate(y_train):
|
56
40
|
y_train_encoded[i, class_to_index[label]] = 1
|
57
41
|
|
58
42
|
if y_test is not None:
|
59
|
-
y_test_encoded = np.zeros((y_test.shape[0], class_count))
|
43
|
+
y_test_encoded = np.zeros((y_test.shape[0], class_count), dtype=y_test.dtype)
|
60
44
|
for i, label in enumerate(y_test):
|
61
45
|
y_test_encoded[i, class_to_index[label]] = 1
|
62
|
-
return cp.array(y_train_encoded), cp.array(y_test_encoded)
|
46
|
+
return cp.array(y_train_encoded, dtype=y_train.dtype), cp.array(y_test_encoded, dtype=y_test.dtype)
|
63
47
|
|
64
|
-
return cp.array(y_train_encoded)
|
48
|
+
return cp.array(y_train_encoded, dtype=y_train.dtype)
|
65
49
|
|
66
50
|
|
67
51
|
def decode_one_hot(encoded_data):
|
@@ -75,36 +59,34 @@ def decode_one_hot(encoded_data):
|
|
75
59
|
cupy.ndarray: Decoded categorical labels with shape (n_samples,).
|
76
60
|
"""
|
77
61
|
|
78
|
-
|
62
|
+
if encoded_data.ndim == 1: return cp.argmax(encoded_data)
|
63
|
+
else: return cp.argmax(encoded_data, axis=1)
|
79
64
|
|
80
|
-
return decoded_labels
|
81
65
|
|
82
66
|
|
83
|
-
def split(X, y, test_size, random_state=42, dtype=cp.float32,
|
67
|
+
def split(X, y, test_size, random_state=42, dtype=cp.float32, shuffle_in_cpu=False):
|
84
68
|
"""
|
85
69
|
Splits the given X (features) and y (labels) data into training and testing subsets.
|
86
70
|
|
87
71
|
Args:
|
88
72
|
X (cupy.ndarray): Features data.
|
73
|
+
|
89
74
|
y (cupy.ndarray): Labels data.
|
75
|
+
|
90
76
|
test_size (float or int): Proportion or number of samples for the test subset.
|
77
|
+
|
91
78
|
random_state (int or None): Seed for random state. Default: 42.
|
79
|
+
|
92
80
|
dtype (cupy.dtype): Data type for the arrays. np.float32 by default. Example: cp.float64 or cp.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!] (optional)
|
93
|
-
|
81
|
+
|
82
|
+
shuffle_in_cpu (bool): If True, output will be same cpu's split function. Default: False. (Use this for direct comparison of cpu training.)
|
94
83
|
Returns:
|
95
84
|
tuple: x_train, x_test, y_train, y_test as ordered training and testing data subsets.
|
96
85
|
"""
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
elif len(y) <= 32767:
|
102
|
-
if y.dtype != cp.uint16:
|
103
|
-
y = cp.array(y, copy=False).astype(cp.uint16, copy=False)
|
104
|
-
else:
|
105
|
-
if y.dtype != cp.uint32:
|
106
|
-
y = cp.array(y, copy=False).astype(cp.uint32, copy=False)
|
107
|
-
|
86
|
+
from .memory_operations import transfer_to_gpu, optimize_labels
|
87
|
+
|
88
|
+
X = transfer_to_gpu(X, dtype=dtype)
|
89
|
+
y = optimize_labels(y, one_hot_encoded=False, cuda=True)
|
108
90
|
|
109
91
|
num_samples = X.shape[0]
|
110
92
|
|
@@ -117,7 +99,7 @@ def split(X, y, test_size, random_state=42, dtype=cp.float32, use_cpu=False):
|
|
117
99
|
else:
|
118
100
|
raise ValueError("test_size should be float or int.")
|
119
101
|
|
120
|
-
if
|
102
|
+
if shuffle_in_cpu:
|
121
103
|
indices = np.arange(num_samples)
|
122
104
|
np.random.seed(random_state)
|
123
105
|
np.random.shuffle(indices)
|
@@ -134,15 +116,13 @@ def split(X, y, test_size, random_state=42, dtype=cp.float32, use_cpu=False):
|
|
134
116
|
|
135
117
|
x_train, x_test = X[train_indices], X[test_indices]
|
136
118
|
y_train, y_test = y[train_indices], y[test_indices]
|
137
|
-
|
138
119
|
del X
|
139
120
|
del y
|
140
|
-
cp.
|
141
|
-
|
121
|
+
cp.cuda.MemoryPool().free_all_blocks()
|
142
122
|
return x_train, x_test, y_train, y_test
|
143
123
|
|
144
124
|
|
145
|
-
def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=cp.float32,
|
125
|
+
def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=cp.float32, shuffle_in_cpu=False):
|
146
126
|
"""
|
147
127
|
Generates synthetic examples to balance classes to the specified number of examples per class.
|
148
128
|
|
@@ -156,28 +136,18 @@ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=cp.float32
|
|
156
136
|
|
157
137
|
dtype (cupy.dtype): Data type for the arrays. np.float32 by default. Example: cp.float64 or cp.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!] (optional)
|
158
138
|
|
159
|
-
|
139
|
+
shuffle_in_cpu (bool): If True, output will be same cpu's manuel_balancer function. Default: False. (Use this for direct comparison of cpu training.)
|
160
140
|
|
161
141
|
Returns:
|
162
142
|
x_balanced -- Balanced input dataset (cupy array format)
|
163
143
|
y_balanced -- Balanced class labels (one-hot encoded, cupy array format)
|
164
144
|
"""
|
165
145
|
from .ui import loading_bars
|
166
|
-
|
167
|
-
bar_format = loading_bars()[0]
|
146
|
+
from .memory_operations import transfer_to_gpu
|
168
147
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
if y_train.dtype != cp.uint8:
|
173
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint8, copy=False)
|
174
|
-
elif len(y_train[0]) <= 32767:
|
175
|
-
if y_train.dtype != cp.uint16:
|
176
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint16, copy=False)
|
177
|
-
else:
|
178
|
-
if y_train.dtype != cp.uint32:
|
179
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint32, copy=False)
|
180
|
-
|
148
|
+
bar_format = loading_bars()[0]
|
149
|
+
x_train = transfer_to_gpu(x_train, dtype=dtype)
|
150
|
+
y_train = transfer_to_gpu(y_train, dtype=y_train.dtype)
|
181
151
|
|
182
152
|
classes = cp.arange(y_train.shape[1])
|
183
153
|
class_count = len(classes)
|
@@ -192,7 +162,7 @@ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=cp.float32
|
|
192
162
|
|
193
163
|
if num_samples > target_samples_per_class:
|
194
164
|
|
195
|
-
if
|
165
|
+
if shuffle_in_cpu:
|
196
166
|
selected_indices = np.random.choice(
|
197
167
|
class_indices.get(), target_samples_per_class, replace=False)
|
198
168
|
else:
|
@@ -209,12 +179,12 @@ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=cp.float32
|
|
209
179
|
if num_samples < target_samples_per_class:
|
210
180
|
|
211
181
|
samples_to_add = target_samples_per_class - num_samples
|
212
|
-
additional_samples = cp.zeros((samples_to_add, x_train.shape[1]))
|
213
|
-
additional_labels = cp.zeros((samples_to_add, y_train.shape[1]))
|
182
|
+
additional_samples = cp.zeros((samples_to_add, x_train.shape[1]), dtype=x_train.dtype)
|
183
|
+
additional_labels = cp.zeros((samples_to_add, y_train.shape[1]), dtype=y_train.dtype)
|
214
184
|
|
215
185
|
for i in range(samples_to_add):
|
216
186
|
|
217
|
-
if
|
187
|
+
if shuffle_in_cpu:
|
218
188
|
random_indices = np.random.choice(class_indices.get(), 2, replace=False)
|
219
189
|
else:
|
220
190
|
random_indices = cp.random.choice(class_indices, 2, replace=False)
|
@@ -222,7 +192,7 @@ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=cp.float32
|
|
222
192
|
sample1 = x_train[random_indices[0]]
|
223
193
|
sample2 = x_train[random_indices[1]]
|
224
194
|
|
225
|
-
if
|
195
|
+
if shuffle_in_cpu:
|
226
196
|
synthetic_sample = sample1 + (sample2 - sample1) * np.random.rand()
|
227
197
|
else:
|
228
198
|
synthetic_sample = sample1 + (sample2 - sample1) * cp.random.rand()
|
@@ -234,13 +204,16 @@ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=cp.float32
|
|
234
204
|
x_balanced.append(additional_samples)
|
235
205
|
y_balanced.append(additional_labels)
|
236
206
|
|
237
|
-
x_balanced = cp.vstack(x_balanced)
|
238
|
-
y_balanced = cp.vstack(y_balanced)
|
207
|
+
x_balanced = cp.vstack(x_balanced, dtype=x_train.dtype)
|
208
|
+
y_balanced = cp.vstack(y_balanced, dtype=y_train.dtype)
|
209
|
+
|
210
|
+
del x_train, y_train
|
211
|
+
cp.cuda.MemoryPool().free_all_blocks()
|
239
212
|
|
240
213
|
return x_balanced, y_balanced
|
241
214
|
|
242
215
|
|
243
|
-
def auto_balancer(x_train, y_train, dtype=cp.float32,
|
216
|
+
def auto_balancer(x_train, y_train, dtype=cp.float32, shuffle_in_cpu=False):
|
244
217
|
|
245
218
|
"""
|
246
219
|
Function to balance the training data across different classes.
|
@@ -248,35 +221,27 @@ def auto_balancer(x_train, y_train, dtype=cp.float32, use_cpu=False):
|
|
248
221
|
Arguments:
|
249
222
|
x_train (list): Input data for training.
|
250
223
|
|
251
|
-
y_train (list): Labels corresponding to the input data.
|
224
|
+
y_train (list): Labels corresponding to the input data. (one-hot encoded)
|
252
225
|
|
253
226
|
dtype (cupy.dtype): Data type for the arrays. np.float32 by default. Example: cp.float64 or cp.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!] (optional)
|
254
227
|
|
255
|
-
|
228
|
+
shuffle_in_cpu (bool): If True, output will be same cpu's auto_balancer function. Default: False. (Use this for direct comparison of cpu training.)
|
256
229
|
Returns:
|
257
230
|
tuple: A tuple containing balanced input data and labels.
|
258
231
|
"""
|
259
232
|
from .ui import loading_bars
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
if y_train.dtype != cp.uint8:
|
265
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint8, copy=False)
|
266
|
-
elif len(y_train[0]) <= 32767:
|
267
|
-
if y_train.dtype != cp.uint16:
|
268
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint16, copy=False)
|
269
|
-
else:
|
270
|
-
if y_train.dtype != cp.uint32:
|
271
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint32, copy=False)
|
233
|
+
from .memory_operations import transfer_to_gpu
|
234
|
+
|
235
|
+
x_train = transfer_to_gpu(x_train, dtype=dtype)
|
236
|
+
y_train = transfer_to_gpu(y_train, dtype=y_train.dtype)
|
272
237
|
|
273
238
|
bar_format = loading_bars()[0]
|
274
239
|
|
275
|
-
classes = cp.arange(y_train.shape[1])
|
240
|
+
classes = cp.arange(y_train.shape[1], dtype=y_train.dtype)
|
276
241
|
class_count = len(classes)
|
277
242
|
|
278
243
|
|
279
|
-
ClassIndices = {i: cp.where(
|
244
|
+
ClassIndices = {i: cp.where(y_train[:, i] == 1)[
|
280
245
|
0] for i in range(class_count)}
|
281
246
|
classes = [len(ClassIndices[i]) for i in range(class_count)]
|
282
247
|
|
@@ -290,7 +255,7 @@ def auto_balancer(x_train, y_train, dtype=cp.float32, use_cpu=False):
|
|
290
255
|
for i in tqdm(range(class_count),leave=False, ascii="▱▰",
|
291
256
|
bar_format= bar_format, desc='Balancing Data',ncols=70):
|
292
257
|
if len(ClassIndices[i]) > MinCount:
|
293
|
-
if
|
258
|
+
if shuffle_in_cpu:
|
294
259
|
SelectedIndices = np.random.choice(
|
295
260
|
ClassIndices[i].get(), MinCount, replace=False)
|
296
261
|
else:
|
@@ -303,7 +268,7 @@ def auto_balancer(x_train, y_train, dtype=cp.float32, use_cpu=False):
|
|
303
268
|
BalancedInputs = [x_train[idx] for idx in BalancedIndices]
|
304
269
|
BalancedLabels = [y_train[idx] for idx in BalancedIndices]
|
305
270
|
|
306
|
-
if
|
271
|
+
if shuffle_in_cpu:
|
307
272
|
permutation = np.random.permutation(len(BalancedInputs))
|
308
273
|
else:
|
309
274
|
permutation = cp.random.permutation(len(BalancedInputs))
|
@@ -313,12 +278,13 @@ def auto_balancer(x_train, y_train, dtype=cp.float32, use_cpu=False):
|
|
313
278
|
|
314
279
|
print(Fore.GREEN + "Data Succesfully Balanced from: " + str(len(x_train)
|
315
280
|
) + " to: " + str(len(BalancedInputs)) + ". from: auto_balancer " + Style.RESET_ALL)
|
281
|
+
del x_train, y_train
|
282
|
+
cp.cuda.MemoryPool().free_all_blocks()
|
316
283
|
|
317
|
-
|
318
284
|
return BalancedInputs, BalancedLabels
|
319
285
|
|
320
286
|
|
321
|
-
def synthetic_augmentation(x_train, y_train, dtype=cp.float32,
|
287
|
+
def synthetic_augmentation(x_train, y_train, dtype=cp.float32, shuffle_in_cpu=False):
|
322
288
|
"""
|
323
289
|
Generates synthetic examples to balance classes with fewer examples using CuPy.
|
324
290
|
Arguments:
|
@@ -329,30 +295,20 @@ def synthetic_augmentation(x_train, y_train, dtype=cp.float32, use_cpu=False):
|
|
329
295
|
|
330
296
|
dtype (cupy.dtype): Data type for the arrays. np.float32 by default. Example: cp.float64 or cp.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!] (optional)
|
331
297
|
|
332
|
-
|
298
|
+
shuffle_in_cpu (bool): If True, output will be same cpu's synthetic_augmentation function. Default: False. (Use this for direct comparison of cpu training.)
|
333
299
|
|
334
300
|
Returns:
|
335
301
|
x_train_balanced -- Balanced input dataset (cupy array format)
|
336
302
|
y_train_balanced -- Balanced class labels (one-hot encoded, cupy array format)
|
337
303
|
"""
|
338
304
|
from .ui import loading_bars
|
339
|
-
|
305
|
+
from .memory_operations import transfer_to_gpu
|
306
|
+
|
307
|
+
x = transfer_to_gpu(x_train, dtype=dtype)
|
308
|
+
y = transfer_to_gpu(y_train, dtype=y_train.dtype)
|
309
|
+
|
340
310
|
bar_format = loading_bars()[0]
|
341
311
|
|
342
|
-
x = x_train.astype(dtype, copy=False)
|
343
|
-
|
344
|
-
if len(y_train[0]) < 256:
|
345
|
-
if y_train.dtype != cp.uint8:
|
346
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint8, copy=False)
|
347
|
-
elif len(y_train[0]) <= 32767:
|
348
|
-
if y_train.dtype != cp.uint16:
|
349
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint16, copy=False)
|
350
|
-
else:
|
351
|
-
if y_train.dtype != cp.uint32:
|
352
|
-
y_train = cp.array(y_train, copy=False).astype(cp.uint32, copy=False)
|
353
|
-
|
354
|
-
y = y_train
|
355
|
-
|
356
312
|
classes = cp.arange(y_train.shape[1])
|
357
313
|
class_count = len(classes)
|
358
314
|
class_distribution = {i: 0 for i in range(class_count)}
|
@@ -371,7 +327,7 @@ def synthetic_augmentation(x_train, y_train, dtype=cp.float32, use_cpu=False):
|
|
371
327
|
|
372
328
|
if num_samples < max_class_count:
|
373
329
|
while num_samples < max_class_count:
|
374
|
-
if
|
330
|
+
if shuffle_in_cpu:
|
375
331
|
random_indices = np.random.choice(
|
376
332
|
class_indices, 2, replace=False)
|
377
333
|
else:
|
@@ -380,7 +336,7 @@ def synthetic_augmentation(x_train, y_train, dtype=cp.float32, use_cpu=False):
|
|
380
336
|
sample1 = x[random_indices[0]]
|
381
337
|
sample2 = x[random_indices[1]]
|
382
338
|
|
383
|
-
if
|
339
|
+
if shuffle_in_cpu:
|
384
340
|
synthetic_sample = sample1 + \
|
385
341
|
(sample2 - sample1) * np.random.rand()
|
386
342
|
else:
|
@@ -394,6 +350,9 @@ def synthetic_augmentation(x_train, y_train, dtype=cp.float32, use_cpu=False):
|
|
394
350
|
x_balanced = cp.array(x_balanced)
|
395
351
|
y_balanced = cp.array(y_balanced)
|
396
352
|
|
353
|
+
del x_train, y_train, x, y
|
354
|
+
cp.cuda.MemoryPool().free_all_blocks()
|
355
|
+
|
397
356
|
return x_balanced, y_balanced
|
398
357
|
|
399
358
|
def standard_scaler(x_train=None, x_test=None, scaler_params=None, dtype=cp.float32):
|
@@ -462,7 +421,6 @@ def normalization(
|
|
462
421
|
MaxAbs = cp.max(cp.abs(Input.astype(dtype, copy=False)))
|
463
422
|
return (Input / MaxAbs)
|
464
423
|
|
465
|
-
|
466
424
|
def find_closest_factors(a):
|
467
425
|
|
468
426
|
root = int(math.sqrt(a))
|