pyerualjetwork 4.3.2.1__py3-none-any.whl → 4.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {pyerualjetwork-afterburner → pyerualjetwork}/__init__.py +1 -1
  2. pyerualjetwork/activation_functions.py +343 -0
  3. pyerualjetwork/activation_functions_cuda.py +340 -0
  4. pyerualjetwork/model_operations.py +408 -0
  5. pyerualjetwork/model_operations_cuda.py +421 -0
  6. pyerualjetwork/plan.py +627 -0
  7. pyerualjetwork/plan_cuda.py +651 -0
  8. pyerualjetwork/planeat.py +825 -0
  9. pyerualjetwork/planeat_cuda.py +834 -0
  10. {pyerualjetwork-4.3.2.1.dist-info → pyerualjetwork-4.3.3.dist-info}/METADATA +17 -4
  11. pyerualjetwork-4.3.3.dist-info/RECORD +44 -0
  12. pyerualjetwork-4.3.3.dist-info/top_level.txt +2 -0
  13. pyerualjetwork_afterburner/__init__.py +11 -0
  14. pyerualjetwork_afterburner/data_operations.py +406 -0
  15. pyerualjetwork_afterburner/data_operations_cuda.py +461 -0
  16. pyerualjetwork_afterburner/help.py +17 -0
  17. pyerualjetwork_afterburner/loss_functions.py +21 -0
  18. pyerualjetwork_afterburner/loss_functions_cuda.py +21 -0
  19. pyerualjetwork_afterburner/memory_operations.py +298 -0
  20. pyerualjetwork_afterburner/metrics.py +190 -0
  21. pyerualjetwork_afterburner/metrics_cuda.py +163 -0
  22. pyerualjetwork_afterburner/ui.py +22 -0
  23. pyerualjetwork_afterburner/visualizations.py +823 -0
  24. pyerualjetwork_afterburner/visualizations_cuda.py +825 -0
  25. pyerualjetwork-4.3.2.1.dist-info/RECORD +0 -24
  26. pyerualjetwork-4.3.2.1.dist-info/top_level.txt +0 -1
  27. {pyerualjetwork-afterburner → pyerualjetwork}/data_operations.py +0 -0
  28. {pyerualjetwork-afterburner → pyerualjetwork}/data_operations_cuda.py +0 -0
  29. {pyerualjetwork-afterburner → pyerualjetwork}/help.py +0 -0
  30. {pyerualjetwork-afterburner → pyerualjetwork}/loss_functions.py +0 -0
  31. {pyerualjetwork-afterburner → pyerualjetwork}/loss_functions_cuda.py +0 -0
  32. {pyerualjetwork-afterburner → pyerualjetwork}/memory_operations.py +0 -0
  33. {pyerualjetwork-afterburner → pyerualjetwork}/metrics.py +0 -0
  34. {pyerualjetwork-afterburner → pyerualjetwork}/metrics_cuda.py +0 -0
  35. {pyerualjetwork-afterburner → pyerualjetwork}/ui.py +0 -0
  36. {pyerualjetwork-afterburner → pyerualjetwork}/visualizations.py +0 -0
  37. {pyerualjetwork-afterburner → pyerualjetwork}/visualizations_cuda.py +0 -0
  38. {pyerualjetwork-4.3.2.1.dist-info → pyerualjetwork-4.3.3.dist-info}/WHEEL +0 -0
  39. {pyerualjetwork-afterburner → pyerualjetwork_afterburner}/activation_functions.py +0 -0
  40. {pyerualjetwork-afterburner → pyerualjetwork_afterburner}/activation_functions_cuda.py +0 -0
  41. {pyerualjetwork-afterburner → pyerualjetwork_afterburner}/model_operations.py +0 -0
  42. {pyerualjetwork-afterburner → pyerualjetwork_afterburner}/model_operations_cuda.py +0 -0
  43. {pyerualjetwork-afterburner → pyerualjetwork_afterburner}/plan.py +0 -0
  44. {pyerualjetwork-afterburner → pyerualjetwork_afterburner}/plan_cuda.py +0 -0
  45. {pyerualjetwork-afterburner → pyerualjetwork_afterburner}/planeat.py +0 -0
  46. {pyerualjetwork-afterburner → pyerualjetwork_afterburner}/planeat_cuda.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pyerualjetwork
3
- Version: 4.3.2.1
3
+ Version: 4.3.3
4
4
  Summary: PyerualJetwork is a machine learning library supported with GPU(CUDA) acceleration written in Python for professionals and researchers including with PLAN algorithm, PLANEAT algorithm (genetic optimization). Also includes data pre-process and memory manegament
5
5
  Author: Hasan Can Beydili
6
6
  Author-email: tchasancan@gmail.com
@@ -23,19 +23,32 @@ PyPi Page: https://pypi.org/project/pyerualjetwork/
23
23
  GitHub Page: https://github.com/HCB06/PyerualJetwork
24
24
 
25
25
 
26
- pip install pyerualjetwork==x.x.x (means it is a normal package)
27
- pip install pyerualjetwork==x.x.x.x (last x means it is a afterburner package)
26
+ pip install pyerualjetwork
28
27
 
28
+ 'use if your data small, medium or large:'
29
+
29
30
  from pyerualjetwork import plan
30
31
  from pyerualjetwork import planeat
31
32
  from pyerualjetwork import data_operations
32
33
  from pyerualjetwork import model_operations
33
-
34
+
34
35
  from pyerualjetwork import plan_cuda
35
36
  from pyerualjetwork import planeat_cuda
36
37
  from pyerualjetwork import data_operations_cuda
37
38
  from pyerualjetwork import model_operations_cuda
38
39
 
40
+ 'use if your data huge: _afterburner package (afterburner package comes with powerful paralellism, afterburner with cuda modules offers super-fast training but some memory managemant features and visualization features discarded. Specially designed for LLM training and other massive model training)'
41
+
42
+ from pyerualjetwork_afterburner import plan
43
+ from pyerualjetwork_afterburner import planeat
44
+ from pyerualjetwork_afterburner import data_operations
45
+ from pyerualjetwork_afterburner import model_operations
46
+
47
+ from pyerualjetwork_afterburner import plan_cuda
48
+ from pyerualjetwork_afterburner import planeat_cuda
49
+ from pyerualjetwork_afterburner import data_operations_cuda
50
+ from pyerualjetwork_afterburner import model_operations_cuda
51
+
39
52
  Optimized for Visual Studio Code
40
53
 
41
54
  requires=[
@@ -0,0 +1,44 @@
1
+ pyerualjetwork/__init__.py,sha256=aKQcQmw61Ctm9v2v1JO5ZYp4F3k7s9KkeId4a_B31ac,639
2
+ pyerualjetwork/activation_functions.py,sha256=eLEesmMgDvkI1TqaLTpqtOgTaLbHEAyw-D57KIKd9G4,11775
3
+ pyerualjetwork/activation_functions_cuda.py,sha256=ztIw6rMR4t1289_TPIGYwE6qarl_YbSOGj5Ep3rUMqs,11803
4
+ pyerualjetwork/data_operations.py,sha256=Flteouu6rfSo2uHMqBHuzO02dXmbNa-I5qWmUpGTZ5Y,14760
5
+ pyerualjetwork/data_operations_cuda.py,sha256=UpoJoFhIwTU4xg9dVuLAxLAT4CkRaGsxvtJG9j1xrNo,17629
6
+ pyerualjetwork/help.py,sha256=nQ_YbYA2RtuafhuvkreNpX0WWL1I_nzlelwCtvei0_Y,775
7
+ pyerualjetwork/loss_functions.py,sha256=6PyBI232SQRGuFnG3LDGvnv_PUdWzT2_2mUODJiejGI,618
8
+ pyerualjetwork/loss_functions_cuda.py,sha256=C93IZJcrOpT6HMK9x1O4AHJWXYTkN5WZiqdssPbvAPk,617
9
+ pyerualjetwork/memory_operations.py,sha256=I7QiZ--xSyRkFF0wcckPwZV7K9emEvyx5aJ3DiRHZFI,13468
10
+ pyerualjetwork/metrics.py,sha256=q7MkhnZDRbCjFBDDfUgrl8lBYnUT_1ro1LxeBq105pI,6077
11
+ pyerualjetwork/metrics_cuda.py,sha256=73h9GC7XwmnFCVzFEEiPQfF8CwHIz2wsCbxpZrJtYgw,5061
12
+ pyerualjetwork/model_operations.py,sha256=RKqnh7-MByFosxqme4q4jC1lOndX26O-OVXYV6ZxoEE,12965
13
+ pyerualjetwork/model_operations_cuda.py,sha256=XnKKq54ZLaqCm-NaJ6d8IToACKcKg2Ttq6moowVRRWo,13365
14
+ pyerualjetwork/plan.py,sha256=ApMQC46_I8qtMqO4lLYLme--SGcMRg-GRo1-gSb3A3I,31894
15
+ pyerualjetwork/plan_cuda.py,sha256=H_EuNNyxrY6-AiuRkOYC8J_UmbzoqJ9aeO0i9pgUDZI,33277
16
+ pyerualjetwork/planeat.py,sha256=e-J-u5gJYijKznN6gn2DZoaCJJro84DOBYTy1rR5-y4,39470
17
+ pyerualjetwork/planeat_cuda.py,sha256=QNHCQLkR0MNFqyN2iHAtC7cbf8qZiD3p_54YH3lnMFA,39529
18
+ pyerualjetwork/ui.py,sha256=wu2BhU1k-w3Kcho5Jtq4SEKe68ftaUeRGneUOSCVDjU,575
19
+ pyerualjetwork/visualizations.py,sha256=1SKMZaJ80OD2qHUyMxW1IOv8zwmxzMPxclfbeq1Xr4g,28772
20
+ pyerualjetwork/visualizations_cuda.py,sha256=KbMhfsLlxujy_i3QrwCf734Q-k6d7Zn_7CEbm3gzK9w,29186
21
+ pyerualjetwork_afterburner/__init__.py,sha256=PWlgYDHv0-7II5khz9y5meQi0PdWYwsuQ7-pEcCijqM,655
22
+ pyerualjetwork_afterburner/activation_functions.py,sha256=2bv7o4EPEFr8cSKq7KI04HhMUyxgBpe8soGvN98Mazg,7740
23
+ pyerualjetwork_afterburner/activation_functions_cuda.py,sha256=Ua606lsj9LQahfLi6oZMkSyzyPT7ySrvC6qfACNCbL8,7781
24
+ pyerualjetwork_afterburner/data_operations.py,sha256=Flteouu6rfSo2uHMqBHuzO02dXmbNa-I5qWmUpGTZ5Y,14760
25
+ pyerualjetwork_afterburner/data_operations_cuda.py,sha256=UpoJoFhIwTU4xg9dVuLAxLAT4CkRaGsxvtJG9j1xrNo,17629
26
+ pyerualjetwork_afterburner/help.py,sha256=nQ_YbYA2RtuafhuvkreNpX0WWL1I_nzlelwCtvei0_Y,775
27
+ pyerualjetwork_afterburner/loss_functions.py,sha256=6PyBI232SQRGuFnG3LDGvnv_PUdWzT2_2mUODJiejGI,618
28
+ pyerualjetwork_afterburner/loss_functions_cuda.py,sha256=C93IZJcrOpT6HMK9x1O4AHJWXYTkN5WZiqdssPbvAPk,617
29
+ pyerualjetwork_afterburner/memory_operations.py,sha256=I7QiZ--xSyRkFF0wcckPwZV7K9emEvyx5aJ3DiRHZFI,13468
30
+ pyerualjetwork_afterburner/metrics.py,sha256=q7MkhnZDRbCjFBDDfUgrl8lBYnUT_1ro1LxeBq105pI,6077
31
+ pyerualjetwork_afterburner/metrics_cuda.py,sha256=73h9GC7XwmnFCVzFEEiPQfF8CwHIz2wsCbxpZrJtYgw,5061
32
+ pyerualjetwork_afterburner/model_operations.py,sha256=MCSCNYiiICRVZITobtS3ZIWmH5Q9gjyELuH32sAdgg4,12649
33
+ pyerualjetwork_afterburner/model_operations_cuda.py,sha256=NT01BK5nrDYE7H1x3KnSI8gmx0QTGGB0mP_LqEb1uuU,13157
34
+ pyerualjetwork_afterburner/plan.py,sha256=Gxv8ii4brTYMzzFZBP-X6kkwc6w6vtTPiMmqVOAqoq8,21972
35
+ pyerualjetwork_afterburner/plan_cuda.py,sha256=usyL-rWfczko8MQ-tmgMyt7UrKoH7IG3FX3edBiq-vc,22716
36
+ pyerualjetwork_afterburner/planeat.py,sha256=Lq5R0aMS4UIdZdbUKsKDv5g0WLwYryomR3IQYb8vAa4,37573
37
+ pyerualjetwork_afterburner/planeat_cuda.py,sha256=dZdKrrhdnoTjcF8Uv23Y4UvlOfizazNyx9v6QsdpIoo,37621
38
+ pyerualjetwork_afterburner/ui.py,sha256=wu2BhU1k-w3Kcho5Jtq4SEKe68ftaUeRGneUOSCVDjU,575
39
+ pyerualjetwork_afterburner/visualizations.py,sha256=1SKMZaJ80OD2qHUyMxW1IOv8zwmxzMPxclfbeq1Xr4g,28772
40
+ pyerualjetwork_afterburner/visualizations_cuda.py,sha256=KbMhfsLlxujy_i3QrwCf734Q-k6d7Zn_7CEbm3gzK9w,29186
41
+ pyerualjetwork-4.3.3.dist-info/METADATA,sha256=hfWfhq4gsongGVfzIxIzRuSzE44rsnHDUmVSbD4pXlo,8304
42
+ pyerualjetwork-4.3.3.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
43
+ pyerualjetwork-4.3.3.dist-info/top_level.txt,sha256=uK64ge08QQoPuXM3aiRVPgiQQtl8Fxm2-HieIut5Lwo,42
44
+ pyerualjetwork-4.3.3.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ pyerualjetwork
2
+ pyerualjetwork_afterburner
@@ -0,0 +1,11 @@
1
+ __version__ = "4.3.2.2b0-afterburner"
2
+ __update__ = "* Changes: https://github.com/HCB06/PyerualJetwork/blob/main/CHANGES\n* PyerualJetwork Homepage: https://github.com/HCB06/PyerualJetwork/tree/main\n* PyerualJetwork document: https://github.com/HCB06/PyerualJetwork/blob/main/Welcome_to_PyerualJetwork/PYERUALJETWORK_USER_MANUEL_AND_LEGAL_INFORMATION(EN).pdf\n* YouTube tutorials: https://www.youtube.com/@HasanCanBeydili"
3
+
4
+ def print_version(__version__):
5
+ print(f"PyerualJetwork Version {__version__}" + '\n')
6
+
7
+ def print_update_notes(__update__):
8
+ print(f"Notes:\n{__update__}")
9
+
10
+ print_version(__version__)
11
+ print_update_notes(__update__)
@@ -0,0 +1,406 @@
1
+ from tqdm import tqdm
2
+ import numpy as np
3
+ from colorama import Fore, Style
4
+ import sys
5
+ import math
6
+
7
+ def encode_one_hot(y_train, y_test=None, summary=False):
8
+ """
9
+ Performs one-hot encoding on y_train and y_test data.
10
+
11
+ Args:
12
+ y_train (numpy.ndarray): Train label data.
13
+ y_test (numpy.ndarray): Test label data one-hot encoded. (optional).
14
+ summary (bool, optional): If True, prints the class-to-index mapping. Default: False
15
+
16
+ Returns:
17
+ tuple: One-hot encoded y_train and (if given) y_test.
18
+ """
19
+ from .memory_operations import optimize_labels
20
+
21
+ y_train = optimize_labels(y_train, one_hot_encoded=False, cuda=False)
22
+ y_test = optimize_labels(y_test, one_hot_encoded=False, cuda=False)
23
+
24
+ classes = np.unique(y_train)
25
+ class_count = len(classes)
26
+
27
+ class_to_index = {cls: idx for idx, cls in enumerate(classes)}
28
+
29
+ if summary:
30
+ print("Class-to-index mapping:")
31
+ for cls, idx in class_to_index.items():
32
+ print(f" {idx}: {cls}")
33
+
34
+ y_train_encoded = np.zeros((y_train.shape[0], class_count), dtype=y_train.dtype)
35
+ for i, label in enumerate(y_train):
36
+ y_train_encoded[i, class_to_index[label]] = 1
37
+
38
+ if y_test is not None:
39
+ y_test_encoded = np.zeros((y_test.shape[0], class_count), dtype=y_test.dtype)
40
+ for i, label in enumerate(y_test):
41
+ y_test_encoded[i, class_to_index[label]] = 1
42
+ return y_train_encoded, y_test_encoded
43
+
44
+ return y_train_encoded
45
+
46
+
47
+ def decode_one_hot(encoded_data):
48
+ """
49
+ Decodes one-hot encoded data to original categorical labels.
50
+
51
+ Args:
52
+ encoded_data (numpy.ndarray): One-hot encoded data with shape (n_samples, n_classes).
53
+
54
+ Returns:
55
+ numpy.ndarray: Decoded categorical labels with shape (n_samples,).
56
+ """
57
+
58
+ if encoded_data.ndim == 1: return np.argmax(encoded_data)
59
+ else: return np.argmax(encoded_data, axis=1)
60
+
61
+
62
+ def split(X, y, test_size, random_state=42, dtype=np.float32):
63
+ """
64
+ Splits the given X (features) and y (labels) data into training and testing subsets.
65
+
66
+ Args:
67
+ X (numpy.ndarray): Features data.
68
+
69
+ y (numpy.ndarray): Labels data.
70
+
71
+ test_size (float or int): Proportion or number of samples for the test subset.
72
+
73
+ random_state (int or None): Seed for random state. Default: 42.
74
+
75
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
76
+
77
+ Returns:
78
+ tuple: x_train, x_test, y_train, y_test as ordered training and testing data subsets.
79
+ """
80
+ from .memory_operations import transfer_to_cpu, optimize_labels
81
+
82
+ X = transfer_to_cpu(X, dtype=dtype)
83
+ y = optimize_labels(y, one_hot_encoded=False, cuda=False)
84
+
85
+ num_samples = X.shape[0]
86
+
87
+ if isinstance(test_size, float):
88
+ test_size = int(test_size * num_samples)
89
+ elif isinstance(test_size, int):
90
+ if test_size > num_samples:
91
+ raise ValueError(
92
+ "test_size cannot be larger than the number of samples.")
93
+ else:
94
+ raise ValueError("test_size should be float or int.")
95
+
96
+ if random_state is not None:
97
+ np.random.seed(random_state)
98
+
99
+ indices = np.arange(num_samples)
100
+ np.random.shuffle(indices)
101
+
102
+ test_indices = indices[:test_size]
103
+ train_indices = indices[test_size:]
104
+
105
+ x_train, x_test = X[train_indices], X[test_indices]
106
+ y_train, y_test = y[train_indices], y[test_indices]
107
+
108
+ del X, y
109
+
110
+ return x_train, x_test, y_train, y_test
111
+
112
+
113
+ def manuel_balancer(x_train, y_train, target_samples_per_class, dtype=np.float32):
114
+ """
115
+ Generates synthetic examples to balance classes to the specified number of examples per class.
116
+
117
+ Arguments:
118
+ x_train -- Input dataset (examples) - NumPy array format
119
+ y_train -- Class labels (one-hot encoded) - NumPy array format
120
+ target_samples_per_class -- Desired number of samples per class
121
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
122
+
123
+ Returns:
124
+ x_balanced -- Balanced input dataset (NumPy array format)
125
+ y_balanced -- Balanced class labels (one-hot encoded, NumPy array format)
126
+ """
127
+ from .ui import loading_bars
128
+ from .memory_operations import transfer_to_cpu
129
+
130
+ x_train = transfer_to_cpu(x_train, dtype=dtype)
131
+
132
+ bar_format = loading_bars()[0]
133
+ classes = np.arange(y_train.shape[1])
134
+ class_count = len(classes)
135
+
136
+ x_balanced = []
137
+ y_balanced = []
138
+
139
+ for class_label in tqdm(range(class_count),leave=False, ascii="▱▰",
140
+ bar_format=bar_format,desc='Augmenting Data',ncols= 52):
141
+ class_indices = np.where(np.argmax(y_train, axis=1) == class_label)[0]
142
+ num_samples = len(class_indices)
143
+
144
+ if num_samples > target_samples_per_class:
145
+
146
+ selected_indices = np.random.choice(class_indices, target_samples_per_class, replace=False)
147
+ x_balanced.append(x_train[selected_indices])
148
+ y_balanced.append(y_train[selected_indices])
149
+
150
+ else:
151
+
152
+ x_balanced.append(x_train[class_indices])
153
+ y_balanced.append(y_train[class_indices])
154
+
155
+ if num_samples < target_samples_per_class:
156
+
157
+ samples_to_add = target_samples_per_class - num_samples
158
+ additional_samples = np.zeros((samples_to_add, x_train.shape[1]), dtype=x_train.dtype)
159
+ additional_labels = np.zeros((samples_to_add, y_train.shape[1]), dtype=y_train.dtype)
160
+
161
+ for i in range(samples_to_add):
162
+
163
+ random_indices = np.random.choice(class_indices, 2, replace=False)
164
+ sample1 = x_train[random_indices[0]]
165
+ sample2 = x_train[random_indices[1]]
166
+
167
+
168
+ synthetic_sample = sample1 + (sample2 - sample1) * np.random.rand()
169
+
170
+ additional_samples[i] = synthetic_sample
171
+ additional_labels[i] = y_train[class_indices[0]]
172
+
173
+
174
+ x_balanced.append(additional_samples)
175
+ y_balanced.append(additional_labels)
176
+
177
+ x_balanced = np.vstack(x_balanced, dtype=x_train.dtype)
178
+ y_balanced = np.vstack(y_balanced, dtype=y_train.dtype)
179
+
180
+ del x_train, y_train
181
+
182
+ return x_balanced, y_balanced
183
+
184
+
185
+ def auto_balancer(x_train, y_train, dtype=np.float32):
186
+
187
+ """
188
+ Function to balance the training data across different classes.
189
+
190
+ Arguments:
191
+ x_train (list): Input data for training.
192
+ y_train (list): Labels corresponding to the input data. one-hot encoded.
193
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
194
+
195
+ Returns:
196
+ tuple: A tuple containing balanced input data and labels.
197
+ """
198
+ from .ui import loading_bars
199
+ from .memory_operations import transfer_to_cpu
200
+
201
+ x_train = transfer_to_cpu(x_train, dtype=dtype)
202
+
203
+ bar_format = loading_bars()[0]
204
+ classes = np.arange(y_train.shape[1])
205
+ class_count = len(classes)
206
+
207
+ try:
208
+ ClassIndices = {i: np.where(y_train[:, i] == 1)[
209
+ 0] for i in range(class_count)}
210
+ classes = [len(ClassIndices[i]) for i in range(class_count)]
211
+
212
+ if len(set(classes)) == 1:
213
+ print(Fore.WHITE + "INFO: Data have already balanced. from: auto_balancer" + Style.RESET_ALL)
214
+ return x_train, y_train
215
+
216
+ MinCount = min(classes)
217
+
218
+ BalancedIndices = []
219
+ for i in tqdm(range(class_count),leave=False, ascii="▱▰",
220
+ bar_format= bar_format, desc='Balancing Data',ncols=70):
221
+ if len(ClassIndices[i]) > MinCount:
222
+ SelectedIndices = np.random.choice(
223
+ ClassIndices[i], MinCount, replace=False)
224
+ else:
225
+ SelectedIndices = ClassIndices[i]
226
+ BalancedIndices.extend(SelectedIndices)
227
+
228
+ BalancedInputs = [x_train[idx] for idx in BalancedIndices]
229
+ BalancedLabels = [y_train[idx] for idx in BalancedIndices]
230
+
231
+ permutation = np.random.permutation(len(BalancedInputs))
232
+ BalancedInputs = np.array(BalancedInputs)[permutation]
233
+ BalancedLabels = np.array(BalancedLabels)[permutation]
234
+
235
+ print(Fore.GREEN + "Data Succesfully Balanced from: " + str(len(x_train)
236
+ ) + " to: " + str(len(BalancedInputs)) + ". from: auto_balancer " + Style.RESET_ALL)
237
+ except:
238
+ print(Fore.RED + "ERROR: Inputs and labels must be same length check parameters")
239
+ sys.exit()
240
+
241
+ BalancedInputs = BalancedInputs.astype(dtype, copy=False)
242
+ BalancedLabels = BalancedLabels.astype(dtype=y_train.dtype, copy=False)
243
+
244
+ del x_train, y_train
245
+
246
+ return BalancedInputs, BalancedLabels
247
+
248
+
249
+ def synthetic_augmentation(x, y, dtype=np.float32):
250
+ """
251
+ Generates synthetic examples to balance classes with fewer examples.
252
+
253
+ Arguments:
254
+ x -- Input dataset (examples) - array format
255
+ y -- Class labels (one-hot encoded) - array format
256
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
257
+
258
+ Returns:
259
+ x_balanced -- Balanced input dataset (array format)
260
+ y_balanced -- Balanced class labels (one-hot encoded, array format)
261
+ """
262
+ from .ui import loading_bars
263
+ from .memory_operations import transfer_to_cpu
264
+
265
+ x = transfer_to_cpu(x, dtype=dtype)
266
+
267
+ bar_format = loading_bars()[0]
268
+ classes = np.arange(y.shape[1])
269
+ class_count = len(classes)
270
+
271
+ class_distribution = {i: 0 for i in range(class_count)}
272
+ for label in y:
273
+ class_distribution[np.argmax(label)] += 1
274
+
275
+ max_class_count = max(class_distribution.values())
276
+
277
+ x_balanced = list(x)
278
+ y_balanced = list(y)
279
+
280
+
281
+ for class_label in tqdm(range(class_count), leave=False, ascii="▱▰",
282
+ bar_format=bar_format,desc='Augmenting Data',ncols= 52):
283
+ class_indices = [i for i, label in enumerate(
284
+ y) if np.argmax(label) == class_label]
285
+ num_samples = len(class_indices)
286
+
287
+ if num_samples < max_class_count:
288
+ while num_samples < max_class_count:
289
+
290
+ random_indices = np.random.choice(
291
+ class_indices, 2, replace=False)
292
+ sample1 = x[random_indices[0]]
293
+ sample2 = x[random_indices[1]]
294
+
295
+ synthetic_sample = sample1 + \
296
+ (np.array(sample2) - np.array(sample1)) * np.random.rand()
297
+
298
+ x_balanced.append(synthetic_sample.tolist())
299
+ y_balanced.append(y[class_indices[0]])
300
+
301
+ num_samples += 1
302
+
303
+ x_balanced = np.array(x_balanced).astype(dtype, copy=False)
304
+ y_balanced = np.array(y_balanced).astype(dtype=y.dtype, copy=False)
305
+
306
+ del x, y
307
+
308
+ return x_balanced, y_balanced
309
+
310
+
311
+ def standard_scaler(x_train=None, x_test=None, scaler_params=None, dtype=np.float32):
312
+ """
313
+ Standardizes training and test datasets. x_test may be None.
314
+
315
+ Args:
316
+ x_train: numpy.ndarray
317
+
318
+ x_test: numpy.ndarray (optional)
319
+
320
+ scaler_params (optional for using model)
321
+
322
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
323
+
324
+ Returns:
325
+ list:
326
+ Scaler parameters: mean and std
327
+ tuple
328
+ Standardized training and test datasets
329
+ """
330
+ if x_train is not None and scaler_params is None and x_test is not None:
331
+ x_train = x_train.astype(dtype, copy=False)
332
+ x_test = x_test.astype(dtype, copy=False)
333
+
334
+ mean = np.mean(x_train, axis=0)
335
+ std = np.std(x_train, axis=0)
336
+
337
+ train_data_scaled = (x_train - mean) / std
338
+ test_data_scaled = (x_test - mean) / std
339
+
340
+ train_data_scaled = np.nan_to_num(train_data_scaled, nan=0)
341
+ test_data_scaled = np.nan_to_num(test_data_scaled, nan=0)
342
+
343
+ scaler_params = [mean, std]
344
+
345
+ return scaler_params, train_data_scaled, test_data_scaled
346
+
347
+ if scaler_params is None and x_train is None and x_test is not None:
348
+ return x_test.astype(dtype, copy=False) # sample data not scaled
349
+
350
+ if scaler_params is not None:
351
+ x_test = x_test.astype(dtype, copy=False)
352
+ scaled_data = (x_test - scaler_params[0]) / scaler_params[1]
353
+ scaled_data = np.nan_to_num(scaled_data, nan=0)
354
+
355
+ return scaled_data # sample data scaled
356
+
357
+
358
+ def normalization(
359
+ Input, # num: Input data to be normalized.
360
+ dtype=np.float32):
361
+ """
362
+ Normalizes the input data using maximum absolute scaling.
363
+
364
+ Args:
365
+ Input (num): Input data to be normalized.
366
+ dtype (numpy.dtype): Data type for the arrays. np.float32 by default. Example: np.float64 or np.float16. [fp32 for balanced devices, fp64 for strong devices, fp16 for weak devices: not reccomended!]
367
+
368
+ Returns:
369
+ (num) Scaled input data after normalization.
370
+ """
371
+
372
+ MaxAbs = np.max(np.abs(Input.astype(dtype, copy=False)))
373
+ return (Input / MaxAbs)
374
+
375
+
376
+ def find_closest_factors(a):
377
+
378
+ root = int(math.sqrt(a))
379
+
380
+ for i in range(root, 0, -1):
381
+ if a % i == 0:
382
+ j = a // i
383
+ return i, j
384
+
385
+
386
+ def batcher(x_test, y_test, batch_size=1):
387
+
388
+ if batch_size == 1:
389
+ return x_test, y_test
390
+
391
+ y_labels = np.argmax(y_test, axis=1)
392
+
393
+ sampled_x, sampled_y = [], []
394
+
395
+ for class_label in np.unique(y_labels):
396
+
397
+ class_indices = np.where(y_labels == class_label)[0]
398
+
399
+ num_samples = int(len(class_indices) * batch_size)
400
+
401
+ sampled_indices = np.random.choice(class_indices, num_samples, replace=False)
402
+
403
+ sampled_x.append(x_test[sampled_indices])
404
+ sampled_y.append(y_test[sampled_indices])
405
+
406
+ return np.concatenate(sampled_x), np.concatenate(sampled_y)