pyerualjetwork 4.0.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,439 @@
1
+ from tqdm import tqdm
2
+ import numpy as np
3
+ from colorama import Fore, Style
4
+ import sys
5
+ import math
6
+
7
+ def encode_one_hot(y_train, y_test=None, summary=False):
8
+ """
9
+ Performs one-hot encoding on y_train and y_test data.
10
+
11
+ Args:
12
+ y_train (numpy.ndarray): Train label data.
13
+ y_test (numpy.ndarray): Test label data one-hot encoded. (optional).
14
+ summary (bool): If True, prints the class-to-index mapping. Default: False
15
+
16
+ Returns:
17
+ tuple: One-hot encoded y_train and (if given) y_test.
18
+ """
19
+ if len(y_train) < 256:
20
+ if y_train.dtype != np.uint8:
21
+ y_train = np.array(y_train, copy=False).astype(np.uint8, copy=False)
22
+ elif len(y_train) <= 32767:
23
+ if y_train.dtype != np.uint16:
24
+ y_train = np.array(y_train, copy=False).astype(np.uint16, copy=False)
25
+ else:
26
+ if y_train.dtype != np.uint32:
27
+ y_train = np.array(y_train, copy=False).astype(np.uint32, copy=False)
28
+
29
+ if y_test is not None:
30
+ if len(y_test) < 256:
31
+ if y_test.dtype != np.uint8:
32
+ y_test = np.array(y_test, copy=False).astype(np.uint8, copy=False)
33
+ elif len(y_test) <= 32767:
34
+ if y_test.dtype != np.uint16:
35
+ y_test = np.array(y_test, copy=False).astype(np.uint16, copy=False)
36
+ else:
37
+ if y_test.dtype != np.uint32:
38
+ y_test = np.array(y_test, copy=False).astype(np.uint32, copy=False)
39
+
40
+ classes = np.unique(y_train)
41
+ class_count = len(classes)
42
+
43
+ class_to_index = {cls: idx for idx, cls in enumerate(classes)}
44
+
45
+ if summary:
46
+ print("Class-to-index mapping:")
47
+ for cls, idx in class_to_index.items():
48
+ print(f" {idx}: {cls}")
49
+
50
+ y_train_encoded = np.zeros((y_train.shape[0], class_count))
51
+ for i, label in enumerate(y_train):
52
+ y_train_encoded[i, class_to_index[label]] = 1
53
+
54
+ if y_test is not None:
55
+ y_test_encoded = np.zeros((y_test.shape[0], class_count))
56
+ for i, label in enumerate(y_test):
57
+ y_test_encoded[i, class_to_index[label]] = 1
58
+ return y_train_encoded, y_test_encoded
59
+
60
+ return y_train_encoded
61
+
62
+
63
+ def decode_one_hot(encoded_data):
64
+ """
65
+ Decodes one-hot encoded data to original categorical labels.
66
+
67
+ Args:
68
+ encoded_data (numpy.ndarray): One-hot encoded data with shape (n_samples, n_classes).
69
+
70
+ Returns:
71
+ numpy.ndarray: Decoded categorical labels with shape (n_samples,).
72
+ """
73
+
74
+ decoded_labels = np.argmax(encoded_data, axis=1)
75
+
76
+ return decoded_labels
77
+
78
+
79
+ def split(X, y, test_size, random_state, dtype=np.float32):
80
+ """
81
+ Splits the given X (features) and y (labels) data into training and testing subsets.
82
+
83
+ Args:
84
+ X (numpy.ndarray): Features data.
85
+ y (numpy.ndarray): Labels data.
86
+ test_size (float or int): Proportion or number of samples for the test subset.
87
+ random_state (int or None): Seed for random state.
88
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
89
+
90
+ Returns:
91
+ tuple: x_train, x_test, y_train, y_test as ordered training and testing data subsets.
92
+ """
93
+ X = X.astype(dtype)
94
+ if len(y) < 256:
95
+ if y.dtype != np.uint8:
96
+ y = np.array(y, copy=False).astype(np.uint8, copy=False)
97
+ elif len(y) <= 32767:
98
+ if y.dtype != np.uint16:
99
+ y = np.array(y, copy=False).astype(np.uint16, copy=False)
100
+ else:
101
+ if y.dtype != np.uint32:
102
+ y = np.array(y, copy=False).astype(np.uint32, copy=False)
103
+
104
+ num_samples = X.shape[0]
105
+
106
+ if isinstance(test_size, float):
107
+ test_size = int(test_size * num_samples)
108
+ elif isinstance(test_size, int):
109
+ if test_size > num_samples:
110
+ raise ValueError(
111
+ "test_size cannot be larger than the number of samples.")
112
+ else:
113
+ raise ValueError("test_size should be float or int.")
114
+
115
+ if random_state is not None:
116
+ np.random.seed(random_state)
117
+
118
+ indices = np.arange(num_samples)
119
+ np.random.shuffle(indices)
120
+
121
+ test_indices = indices[:test_size]
122
+ train_indices = indices[test_size:]
123
+
124
+ x_train, x_test = X[train_indices], X[test_indices]
125
+ y_train, y_test = y[train_indices], y[test_indices]
126
+
127
+ return x_train, x_test, y_train, y_test
128
+
129
+
130
+ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=np.float32):
131
+ """
132
+ Generates synthetic examples to balance classes to the specified number of examples per class.
133
+
134
+ Arguments:
135
+ x_train -- Input dataset (examples) - NumPy array format
136
+ y_train -- Class labels (one-hot encoded) - NumPy array format
137
+ target_samples_per_class -- Desired number of samples per class
138
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
139
+
140
+ Returns:
141
+ x_balanced -- Balanced input dataset (NumPy array format)
142
+ y_balanced -- Balanced class labels (one-hot encoded, NumPy array format)
143
+ """
144
+ from .ui import loading_bars
145
+
146
+ bar_format = loading_bars()[0]
147
+
148
+ x_train = np.array(x_train, copy=False).astype(dtype, copy=False)
149
+ if len(y_train[0]) < 256:
150
+ if y_train.dtype != np.uint8:
151
+ y_train = np.array(y_train, copy=False).astype(np.uint8, copy=False)
152
+ elif len(y_train[0]) <= 32767:
153
+ if y_train.dtype != np.uint16:
154
+ y_train = np.array(y_train, copy=False).astype(np.uint16, copy=False)
155
+ else:
156
+ if y_train.dtype != np.uint32:
157
+ y_train = np.array(y_train, copy=False).astype(np.uint32, copy=False)
158
+
159
+ classes = np.arange(y_train.shape[1])
160
+ class_count = len(classes)
161
+
162
+ x_balanced = []
163
+ y_balanced = []
164
+
165
+ for class_label in tqdm(range(class_count),leave=False, ascii="▱▰",
166
+ bar_format=bar_format,desc='Augmenting Data',ncols= 52):
167
+ class_indices = np.where(np.argmax(y_train, axis=1) == class_label)[0]
168
+ num_samples = len(class_indices)
169
+
170
+ if num_samples > target_samples_per_class:
171
+
172
+ selected_indices = np.random.choice(class_indices, target_samples_per_class, replace=False)
173
+ x_balanced.append(x_train[selected_indices])
174
+ y_balanced.append(y_train[selected_indices])
175
+
176
+ else:
177
+
178
+ x_balanced.append(x_train[class_indices])
179
+ y_balanced.append(y_train[class_indices])
180
+
181
+ if num_samples < target_samples_per_class:
182
+
183
+ samples_to_add = target_samples_per_class - num_samples
184
+ additional_samples = np.zeros((samples_to_add, x_train.shape[1]))
185
+ additional_labels = np.zeros((samples_to_add, y_train.shape[1]))
186
+
187
+ for i in range(samples_to_add):
188
+
189
+ random_indices = np.random.choice(class_indices, 2, replace=False)
190
+ sample1 = x_train[random_indices[0]]
191
+ sample2 = x_train[random_indices[1]]
192
+
193
+
194
+ synthetic_sample = sample1 + (sample2 - sample1) * np.random.rand()
195
+
196
+ additional_samples[i] = synthetic_sample
197
+ additional_labels[i] = y_train[class_indices[0]]
198
+
199
+
200
+ x_balanced.append(additional_samples)
201
+ y_balanced.append(additional_labels)
202
+
203
+ x_balanced = np.vstack(x_balanced)
204
+ y_balanced = np.vstack(y_balanced)
205
+
206
+ return x_balanced.astype(dtype), y_balanced.astype(dtype)
207
+
208
+
209
+ def auto_balancer(x_train, y_train, dtype=np.float32):
210
+
211
+ """
212
+ Function to balance the training data across different classes.
213
+
214
+ Arguments:
215
+ x_train (list): Input data for training.
216
+ y_train (list): Labels corresponding to the input data. one-hot encoded.
217
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
218
+
219
+ Returns:
220
+ tuple: A tuple containing balanced input data and labels.
221
+ """
222
+ from .ui import loading_bars
223
+
224
+ bar_format = loading_bars()[0]
225
+
226
+ x_train = np.array(x_train, copy=False).astype(dtype, copy=False)
227
+ if len(y_train[0]) < 256:
228
+ if y_train.dtype != np.uint8:
229
+ y_train = np.array(y_train, copy=False).astype(np.uint8, copy=False)
230
+ elif len(y_train[0]) <= 32767:
231
+ if y_train.dtype != np.uint16:
232
+ y_train = np.array(y_train, copy=False).astype(np.uint16, copy=False)
233
+ else:
234
+ if y_train.dtype != np.uint32:
235
+ y_train = np.array(y_train, copy=False).astype(np.uint32, copy=False)
236
+
237
+ classes = np.arange(y_train.shape[1])
238
+ class_count = len(classes)
239
+
240
+ try:
241
+ ClassIndices = {i: np.where(y_train[:, i] == 1)[
242
+ 0] for i in range(class_count)}
243
+ classes = [len(ClassIndices[i]) for i in range(class_count)]
244
+
245
+ if len(set(classes)) == 1:
246
+ print(Fore.WHITE + "INFO: Data have already balanced. from: auto_balancer" + Style.RESET_ALL)
247
+ return x_train, y_train
248
+
249
+ MinCount = min(classes)
250
+
251
+ BalancedIndices = []
252
+ for i in tqdm(range(class_count),leave=False, ascii="▱▰",
253
+ bar_format= bar_format, desc='Balancing Data',ncols=70):
254
+ if len(ClassIndices[i]) > MinCount:
255
+ SelectedIndices = np.random.choice(
256
+ ClassIndices[i], MinCount, replace=False)
257
+ else:
258
+ SelectedIndices = ClassIndices[i]
259
+ BalancedIndices.extend(SelectedIndices)
260
+
261
+ BalancedInputs = [x_train[idx] for idx in BalancedIndices]
262
+ BalancedLabels = [y_train[idx] for idx in BalancedIndices]
263
+
264
+ permutation = np.random.permutation(len(BalancedInputs))
265
+ BalancedInputs = np.array(BalancedInputs)[permutation]
266
+ BalancedLabels = np.array(BalancedLabels)[permutation]
267
+
268
+ print(Fore.GREEN + "Data Succesfully Balanced from: " + str(len(x_train)
269
+ ) + " to: " + str(len(BalancedInputs)) + ". from: auto_balancer " + Style.RESET_ALL)
270
+ except:
271
+ print(Fore.RED + "ERROR: Inputs and labels must be same length check parameters")
272
+ sys.exit()
273
+
274
+ return BalancedInputs.astype(dtype, copy=False), BalancedLabels.astype(dtype, copy=False)
275
+
276
+
277
+ def synthetic_augmentation(x_train, y_train, dtype=np.float32):
278
+ """
279
+ Generates synthetic examples to balance classes with fewer examples.
280
+
281
+ Arguments:
282
+ x -- Input dataset (examples) - array format
283
+ y -- Class labels (one-hot encoded) - array format
284
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
285
+
286
+ Returns:
287
+ x_balanced -- Balanced input dataset (array format)
288
+ y_balanced -- Balanced class labels (one-hot encoded, array format)
289
+ """
290
+ from .ui import loading_bars
291
+
292
+ bar_format = loading_bars()[0]
293
+
294
+ x_train = x_train.astype(dtype)
295
+ if len(y_train[0]) < 256:
296
+ if y_train.dtype != np.uint8:
297
+ y_train = np.array(y_train, copy=False).astype(np.uint8, copy=False)
298
+ elif len(y_train[0]) <= 32767:
299
+ if y_train.dtype != np.uint16:
300
+ y_train = np.array(y_train, copy=False).astype(np.uint16, copy=False)
301
+ else:
302
+ if y_train.dtype != np.uint32:
303
+ y_train = np.array(y_train, copy=False).astype(np.uint32, copy=False)
304
+
305
+ x = x_train
306
+ y = y_train
307
+
308
+ classes = np.arange(y_train.shape[1])
309
+ class_count = len(classes)
310
+
311
+ class_distribution = {i: 0 for i in range(class_count)}
312
+ for label in y:
313
+ class_distribution[np.argmax(label)] += 1
314
+
315
+ max_class_count = max(class_distribution.values())
316
+
317
+ x_balanced = list(x)
318
+ y_balanced = list(y)
319
+
320
+
321
+ for class_label in tqdm(range(class_count), leave=False, ascii="▱▰",
322
+ bar_format=bar_format,desc='Augmenting Data',ncols= 52):
323
+ class_indices = [i for i, label in enumerate(
324
+ y) if np.argmax(label) == class_label]
325
+ num_samples = len(class_indices)
326
+
327
+ if num_samples < max_class_count:
328
+ while num_samples < max_class_count:
329
+
330
+ random_indices = np.random.choice(
331
+ class_indices, 2, replace=False)
332
+ sample1 = x[random_indices[0]]
333
+ sample2 = x[random_indices[1]]
334
+
335
+ synthetic_sample = sample1 + \
336
+ (np.array(sample2) - np.array(sample1)) * np.random.rand()
337
+
338
+ x_balanced.append(synthetic_sample.tolist())
339
+ y_balanced.append(y[class_indices[0]])
340
+
341
+ num_samples += 1
342
+
343
+
344
+ return np.array(x_balanced).astype(dtype, copy=False), np.array(y_balanced).astype(dtype, copy=False)
345
+
346
+
347
+ def standard_scaler(x_train=None, x_test=None, scaler_params=None, dtype=np.float32):
348
+ """
349
+ Standardizes training and test datasets. x_test may be None.
350
+
351
+ Args:
352
+ x_train: numpy.ndarray
353
+
354
+ x_test: numpy.ndarray (optional)
355
+
356
+ scaler_params (optional for using model)
357
+
358
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
359
+
360
+ Returns:
361
+ list:
362
+ Scaler parameters: mean and std
363
+ tuple
364
+ Standardized training and test datasets
365
+ """
366
+ if x_train is not None and scaler_params is None and x_test is not None:
367
+ x_train = x_train.astype(dtype, copy=False)
368
+ x_test = x_test.astype(dtype, copy=False)
369
+
370
+ mean = np.mean(x_train, axis=0)
371
+ std = np.std(x_train, axis=0)
372
+
373
+ train_data_scaled = (x_train - mean) / std
374
+ test_data_scaled = (x_test - mean) / std
375
+
376
+ train_data_scaled = np.nan_to_num(train_data_scaled, nan=0)
377
+ test_data_scaled = np.nan_to_num(test_data_scaled, nan=0)
378
+
379
+ scaler_params = [mean, std]
380
+
381
+ return scaler_params, train_data_scaled, test_data_scaled
382
+
383
+ if scaler_params is None and x_train is None and x_test is not None:
384
+ return x_test.astype(dtype, copy=False) # sample data not scaled
385
+
386
+ if scaler_params is not None:
387
+ x_test = x_test.astype(dtype, copy=False)
388
+ scaled_data = (x_test - scaler_params[0]) / scaler_params[1]
389
+ scaled_data = np.nan_to_num(scaled_data, nan=0)
390
+
391
+ return scaled_data # sample data scaled
392
+
393
+
394
+ def normalization(
395
+ Input, # num: Input data to be normalized.
396
+ dtype=np.float32):
397
+ """
398
+ Normalizes the input data using maximum absolute scaling.
399
+
400
+ Args:
401
+ Input (num): Input data to be normalized.
402
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
403
+
404
+ Returns:
405
+ (num) Scaled input data after normalization.
406
+ """
407
+
408
+ MaxAbs = np.max(np.abs(Input.astype(dtype, copy=False)))
409
+ return (Input / MaxAbs)
410
+
411
+
412
+ def find_closest_factors(a):
413
+
414
+ root = int(math.sqrt(a))
415
+
416
+ for i in range(root, 0, -1):
417
+ if a % i == 0:
418
+ j = a // i
419
+ return i, j
420
+
421
+
422
+ def batcher(x_test, y_test, batch_size=1):
423
+
424
+ y_labels = np.argmax(y_test, axis=1)
425
+
426
+ sampled_x, sampled_y = [], []
427
+
428
+ for class_label in np.unique(y_labels):
429
+
430
+ class_indices = np.where(y_labels == class_label)[0]
431
+
432
+ num_samples = int(len(class_indices) * batch_size)
433
+
434
+ sampled_indices = np.random.choice(class_indices, num_samples, replace=False)
435
+
436
+ sampled_x.append(x_test[sampled_indices])
437
+ sampled_y.append(y_test[sampled_indices])
438
+
439
+ return np.concatenate(sampled_x), np.concatenate(sampled_y)