pyerualjetwork 3.3.3__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyerualjetwork/__init__.py +80 -0
- pyerualjetwork/activation_functions.py +367 -0
- pyerualjetwork/data_operations.py +401 -0
- pyerualjetwork/help.py +16 -0
- pyerualjetwork/loss_functions.py +21 -0
- pyerualjetwork/metrics.py +190 -0
- pyerualjetwork/model_operations.py +350 -0
- pyerualjetwork/plan.py +645 -0
- pyerualjetwork/planeat.py +726 -0
- pyerualjetwork/ui.py +22 -0
- pyerualjetwork/visualizations.py +799 -0
- pyerualjetwork-4.0.0.dist-info/METADATA +90 -0
- pyerualjetwork-4.0.0.dist-info/RECORD +15 -0
- pyerualjetwork-4.0.0.dist-info/top_level.txt +1 -0
- plan/__init__.py +0 -5
- plan/plan.py +0 -2173
- pyerualjetwork-3.3.3.dist-info/METADATA +0 -8
- pyerualjetwork-3.3.3.dist-info/RECORD +0 -6
- pyerualjetwork-3.3.3.dist-info/top_level.txt +0 -1
- {pyerualjetwork-3.3.3.dist-info → pyerualjetwork-4.0.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,401 @@
|
|
1
|
+
from tqdm import tqdm
|
2
|
+
import numpy as np
|
3
|
+
from colorama import Fore, Style
|
4
|
+
import sys
|
5
|
+
import math
|
6
|
+
|
7
|
+
def encode_one_hot(y_train, y_test=None, summary=False):
|
8
|
+
"""
|
9
|
+
Performs one-hot encoding on y_train and y_test data.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
y_train (numpy.ndarray): Train label data.
|
13
|
+
y_test (numpy.ndarray): Test label data. (optional).
|
14
|
+
summary (bool): If True, prints the class-to-index mapping. Default: False
|
15
|
+
|
16
|
+
Returns:
|
17
|
+
tuple: One-hot encoded y_train ve (eğer varsa) y_test verileri.
|
18
|
+
"""
|
19
|
+
classes = np.unique(y_train)
|
20
|
+
class_count = len(classes)
|
21
|
+
|
22
|
+
class_to_index = {cls: idx for idx, cls in enumerate(classes)}
|
23
|
+
|
24
|
+
if summary:
|
25
|
+
print("Class-to-index mapping:")
|
26
|
+
for cls, idx in class_to_index.items():
|
27
|
+
print(f" {idx}: {cls}")
|
28
|
+
|
29
|
+
y_train_encoded = np.zeros((y_train.shape[0], class_count))
|
30
|
+
for i, label in enumerate(y_train):
|
31
|
+
y_train_encoded[i, class_to_index[label]] = 1
|
32
|
+
|
33
|
+
if y_test is not None:
|
34
|
+
y_test_encoded = np.zeros((y_test.shape[0], class_count))
|
35
|
+
for i, label in enumerate(y_test):
|
36
|
+
y_test_encoded[i, class_to_index[label]] = 1
|
37
|
+
return y_train_encoded, y_test_encoded
|
38
|
+
|
39
|
+
return y_train_encoded
|
40
|
+
|
41
|
+
|
42
|
+
def decode_one_hot(encoded_data):
|
43
|
+
"""
|
44
|
+
Decodes one-hot encoded data to original categorical labels.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
encoded_data (numpy.ndarray): One-hot encoded data with shape (n_samples, n_classes).
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
numpy.ndarray: Decoded categorical labels with shape (n_samples,).
|
51
|
+
"""
|
52
|
+
|
53
|
+
decoded_labels = np.argmax(encoded_data, axis=1)
|
54
|
+
|
55
|
+
return decoded_labels
|
56
|
+
|
57
|
+
|
58
|
+
def split(X, y, test_size, random_state):
|
59
|
+
"""
|
60
|
+
Splits the given X (features) and y (labels) data into training and testing subsets.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
X (numpy.ndarray): Features data.
|
64
|
+
y (numpy.ndarray): Labels data.
|
65
|
+
test_size (float or int): Proportion or number of samples for the test subset.
|
66
|
+
random_state (int or None): Seed for random state.
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
tuple: x_train, x_test, y_train, y_test as ordered training and testing data subsets.
|
70
|
+
"""
|
71
|
+
|
72
|
+
num_samples = X.shape[0]
|
73
|
+
|
74
|
+
if isinstance(test_size, float):
|
75
|
+
test_size = int(test_size * num_samples)
|
76
|
+
elif isinstance(test_size, int):
|
77
|
+
if test_size > num_samples:
|
78
|
+
raise ValueError(
|
79
|
+
"test_size cannot be larger than the number of samples.")
|
80
|
+
else:
|
81
|
+
raise ValueError("test_size should be float or int.")
|
82
|
+
|
83
|
+
if random_state is not None:
|
84
|
+
np.random.seed(random_state)
|
85
|
+
|
86
|
+
indices = np.arange(num_samples)
|
87
|
+
np.random.shuffle(indices)
|
88
|
+
|
89
|
+
test_indices = indices[:test_size]
|
90
|
+
train_indices = indices[test_size:]
|
91
|
+
|
92
|
+
x_train, x_test = X[train_indices], X[test_indices]
|
93
|
+
y_train, y_test = y[train_indices], y[test_indices]
|
94
|
+
|
95
|
+
return x_train, x_test, y_train, y_test
|
96
|
+
|
97
|
+
|
98
|
+
def manuel_balancer(x_train, y_train, target_samples_per_class):
|
99
|
+
"""
|
100
|
+
Generates synthetic examples to balance classes to the specified number of examples per class.
|
101
|
+
|
102
|
+
Arguments:
|
103
|
+
x_train -- Input dataset (examples) - NumPy array format
|
104
|
+
y_train -- Class labels (one-hot encoded) - NumPy array format
|
105
|
+
target_samples_per_class -- Desired number of samples per class
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
x_balanced -- Balanced input dataset (NumPy array format)
|
109
|
+
y_balanced -- Balanced class labels (one-hot encoded, NumPy array format)
|
110
|
+
"""
|
111
|
+
from .ui import loading_bars
|
112
|
+
|
113
|
+
bar_format = loading_bars()[0]
|
114
|
+
|
115
|
+
try:
|
116
|
+
x_train = np.array(x_train)
|
117
|
+
y_train = np.array(y_train)
|
118
|
+
except:
|
119
|
+
pass
|
120
|
+
|
121
|
+
classes = np.arange(y_train.shape[1])
|
122
|
+
class_count = len(classes)
|
123
|
+
|
124
|
+
x_balanced = []
|
125
|
+
y_balanced = []
|
126
|
+
|
127
|
+
for class_label in tqdm(range(class_count),leave=False, ascii="▱▰",
|
128
|
+
bar_format=bar_format,desc='Augmenting Data',ncols= 52):
|
129
|
+
class_indices = np.where(np.argmax(y_train, axis=1) == class_label)[0]
|
130
|
+
num_samples = len(class_indices)
|
131
|
+
|
132
|
+
if num_samples > target_samples_per_class:
|
133
|
+
|
134
|
+
selected_indices = np.random.choice(class_indices, target_samples_per_class, replace=False)
|
135
|
+
x_balanced.append(x_train[selected_indices])
|
136
|
+
y_balanced.append(y_train[selected_indices])
|
137
|
+
|
138
|
+
else:
|
139
|
+
|
140
|
+
x_balanced.append(x_train[class_indices])
|
141
|
+
y_balanced.append(y_train[class_indices])
|
142
|
+
|
143
|
+
if num_samples < target_samples_per_class:
|
144
|
+
|
145
|
+
samples_to_add = target_samples_per_class - num_samples
|
146
|
+
additional_samples = np.zeros((samples_to_add, x_train.shape[1]))
|
147
|
+
additional_labels = np.zeros((samples_to_add, y_train.shape[1]))
|
148
|
+
|
149
|
+
for i in range(samples_to_add):
|
150
|
+
|
151
|
+
random_indices = np.random.choice(class_indices, 2, replace=False)
|
152
|
+
sample1 = x_train[random_indices[0]]
|
153
|
+
sample2 = x_train[random_indices[1]]
|
154
|
+
|
155
|
+
|
156
|
+
synthetic_sample = sample1 + (sample2 - sample1) * np.random.rand()
|
157
|
+
|
158
|
+
additional_samples[i] = synthetic_sample
|
159
|
+
additional_labels[i] = y_train[class_indices[0]]
|
160
|
+
|
161
|
+
|
162
|
+
x_balanced.append(additional_samples)
|
163
|
+
y_balanced.append(additional_labels)
|
164
|
+
|
165
|
+
x_balanced = np.vstack(x_balanced)
|
166
|
+
y_balanced = np.vstack(y_balanced)
|
167
|
+
|
168
|
+
return x_balanced, y_balanced
|
169
|
+
|
170
|
+
|
171
|
+
def auto_balancer(x_train, y_train):
|
172
|
+
|
173
|
+
"""
|
174
|
+
Function to balance the training data across different classes.
|
175
|
+
|
176
|
+
Arguments:
|
177
|
+
x_train (list): Input data for training.
|
178
|
+
y_train (list): Labels corresponding to the input data.
|
179
|
+
|
180
|
+
Returns:
|
181
|
+
tuple: A tuple containing balanced input data and labels.
|
182
|
+
"""
|
183
|
+
from .ui import loading_bars
|
184
|
+
|
185
|
+
bar_format = loading_bars()[0]
|
186
|
+
|
187
|
+
classes = np.arange(y_train.shape[1])
|
188
|
+
class_count = len(classes)
|
189
|
+
|
190
|
+
try:
|
191
|
+
ClassIndices = {i: np.where(np.array(y_train)[:, i] == 1)[
|
192
|
+
0] for i in range(class_count)}
|
193
|
+
classes = [len(ClassIndices[i]) for i in range(class_count)]
|
194
|
+
|
195
|
+
if len(set(classes)) == 1:
|
196
|
+
print(Fore.WHITE + "INFO: Data have already balanced. from: auto_balancer" + Style.RESET_ALL)
|
197
|
+
return x_train, y_train
|
198
|
+
|
199
|
+
MinCount = min(classes)
|
200
|
+
|
201
|
+
BalancedIndices = []
|
202
|
+
for i in tqdm(range(class_count),leave=False, ascii="▱▰",
|
203
|
+
bar_format= bar_format, desc='Balancing Data',ncols=70):
|
204
|
+
if len(ClassIndices[i]) > MinCount:
|
205
|
+
SelectedIndices = np.random.choice(
|
206
|
+
ClassIndices[i], MinCount, replace=False)
|
207
|
+
else:
|
208
|
+
SelectedIndices = ClassIndices[i]
|
209
|
+
BalancedIndices.extend(SelectedIndices)
|
210
|
+
|
211
|
+
BalancedInputs = [x_train[idx] for idx in BalancedIndices]
|
212
|
+
BalancedLabels = [y_train[idx] for idx in BalancedIndices]
|
213
|
+
|
214
|
+
permutation = np.random.permutation(len(BalancedInputs))
|
215
|
+
BalancedInputs = np.array(BalancedInputs)[permutation]
|
216
|
+
BalancedLabels = np.array(BalancedLabels)[permutation]
|
217
|
+
|
218
|
+
print(Fore.GREEN + "Data Succesfully Balanced from: " + str(len(x_train)
|
219
|
+
) + " to: " + str(len(BalancedInputs)) + ". from: auto_balancer " + Style.RESET_ALL)
|
220
|
+
except:
|
221
|
+
print(Fore.RED + "ERROR: Inputs and labels must be same length check parameters")
|
222
|
+
sys.exit()
|
223
|
+
|
224
|
+
return np.array(BalancedInputs), np.array(BalancedLabels)
|
225
|
+
|
226
|
+
|
227
|
+
def synthetic_augmentation(x_train, y_train):
|
228
|
+
"""
|
229
|
+
Generates synthetic examples to balance classes with fewer examples.
|
230
|
+
|
231
|
+
Arguments:
|
232
|
+
x -- Input dataset (examples) - array format
|
233
|
+
y -- Class labels (one-hot encoded) - array format
|
234
|
+
|
235
|
+
Returns:
|
236
|
+
x_balanced -- Balanced input dataset (array format)
|
237
|
+
y_balanced -- Balanced class labels (one-hot encoded, array format)
|
238
|
+
"""
|
239
|
+
from .ui import loading_bars
|
240
|
+
|
241
|
+
bar_format = loading_bars()[0]
|
242
|
+
|
243
|
+
x = x_train
|
244
|
+
y = y_train
|
245
|
+
classes = np.arange(y_train.shape[1])
|
246
|
+
class_count = len(classes)
|
247
|
+
|
248
|
+
class_distribution = {i: 0 for i in range(class_count)}
|
249
|
+
for label in y:
|
250
|
+
class_distribution[np.argmax(label)] += 1
|
251
|
+
|
252
|
+
max_class_count = max(class_distribution.values())
|
253
|
+
|
254
|
+
x_balanced = list(x)
|
255
|
+
y_balanced = list(y)
|
256
|
+
|
257
|
+
|
258
|
+
for class_label in tqdm(range(class_count), leave=False, ascii="▱▰",
|
259
|
+
bar_format=bar_format,desc='Augmenting Data',ncols= 52):
|
260
|
+
class_indices = [i for i, label in enumerate(
|
261
|
+
y) if np.argmax(label) == class_label]
|
262
|
+
num_samples = len(class_indices)
|
263
|
+
|
264
|
+
if num_samples < max_class_count:
|
265
|
+
while num_samples < max_class_count:
|
266
|
+
|
267
|
+
random_indices = np.random.choice(
|
268
|
+
class_indices, 2, replace=False)
|
269
|
+
sample1 = x[random_indices[0]]
|
270
|
+
sample2 = x[random_indices[1]]
|
271
|
+
|
272
|
+
synthetic_sample = sample1 + \
|
273
|
+
(np.array(sample2) - np.array(sample1)) * np.random.rand()
|
274
|
+
|
275
|
+
x_balanced.append(synthetic_sample.tolist())
|
276
|
+
y_balanced.append(y[class_indices[0]])
|
277
|
+
|
278
|
+
num_samples += 1
|
279
|
+
|
280
|
+
|
281
|
+
return np.array(x_balanced), np.array(y_balanced)
|
282
|
+
|
283
|
+
|
284
|
+
def standard_scaler(x_train=None, x_test=None, scaler_params=None):
|
285
|
+
"""
|
286
|
+
Standardizes training and test datasets. x_test may be None.
|
287
|
+
|
288
|
+
Args:
|
289
|
+
train_data: numpy.ndarray
|
290
|
+
test_data: numpy.ndarray (optional)
|
291
|
+
scaler_params (optional for using model)
|
292
|
+
|
293
|
+
Returns:
|
294
|
+
list:
|
295
|
+
Scaler parameters: mean and std
|
296
|
+
tuple
|
297
|
+
Standardized training and test datasets
|
298
|
+
"""
|
299
|
+
|
300
|
+
try:
|
301
|
+
|
302
|
+
x_train = x_train.tolist()
|
303
|
+
x_test = x_test.tolist()
|
304
|
+
|
305
|
+
except:
|
306
|
+
|
307
|
+
pass
|
308
|
+
|
309
|
+
if x_train != None and scaler_params == None and x_test != None:
|
310
|
+
|
311
|
+
mean = np.mean(x_train, axis=0)
|
312
|
+
std = np.std(x_train, axis=0)
|
313
|
+
|
314
|
+
train_data_scaled = (x_train - mean) / std
|
315
|
+
test_data_scaled = (x_test - mean) / std
|
316
|
+
|
317
|
+
train_data_scaled = np.nan_to_num(train_data_scaled, nan=0)
|
318
|
+
test_data_scaled = np.nan_to_num(test_data_scaled, nan=0)
|
319
|
+
|
320
|
+
scaler_params = [mean, std]
|
321
|
+
|
322
|
+
return scaler_params, train_data_scaled, test_data_scaled
|
323
|
+
|
324
|
+
try:
|
325
|
+
if scaler_params == None and x_train == None and x_test != None:
|
326
|
+
|
327
|
+
mean = np.mean(x_train, axis=0)
|
328
|
+
std = np.std(x_train, axis=0)
|
329
|
+
train_data_scaled = (x_train - mean) / std
|
330
|
+
|
331
|
+
train_data_scaled = np.nan_to_num(train_data_scaled, nan=0)
|
332
|
+
|
333
|
+
scaler_params = [mean, std]
|
334
|
+
|
335
|
+
return scaler_params, train_data_scaled
|
336
|
+
except:
|
337
|
+
|
338
|
+
# this model is not scaled
|
339
|
+
|
340
|
+
return x_test
|
341
|
+
|
342
|
+
if scaler_params != None:
|
343
|
+
|
344
|
+
try:
|
345
|
+
|
346
|
+
test_data_scaled = (x_test - scaler_params[0]) / scaler_params[1]
|
347
|
+
test_data_scaled = np.nan_to_num(test_data_scaled, nan=0)
|
348
|
+
|
349
|
+
except:
|
350
|
+
|
351
|
+
test_data_scaled = (x_test - scaler_params[0]) / scaler_params[1]
|
352
|
+
test_data_scaled = np.nan_to_num(test_data_scaled, nan=0)
|
353
|
+
|
354
|
+
return test_data_scaled
|
355
|
+
|
356
|
+
|
357
|
+
def normalization(
|
358
|
+
Input # num: Input data to be normalized.
|
359
|
+
):
|
360
|
+
"""
|
361
|
+
Normalizes the input data using maximum absolute scaling.
|
362
|
+
|
363
|
+
Args:
|
364
|
+
Input (num): Input data to be normalized.
|
365
|
+
|
366
|
+
Returns:
|
367
|
+
(num) Scaled input data after normalization.
|
368
|
+
"""
|
369
|
+
|
370
|
+
MaxAbs = np.max(np.abs(Input))
|
371
|
+
return (Input / MaxAbs)
|
372
|
+
|
373
|
+
|
374
|
+
def find_closest_factors(a):
|
375
|
+
|
376
|
+
root = int(math.sqrt(a))
|
377
|
+
|
378
|
+
for i in range(root, 0, -1):
|
379
|
+
if a % i == 0:
|
380
|
+
j = a // i
|
381
|
+
return i, j
|
382
|
+
|
383
|
+
|
384
|
+
def batcher(x_test, y_test, batch_size=1):
|
385
|
+
|
386
|
+
y_labels = np.argmax(y_test, axis=1)
|
387
|
+
|
388
|
+
sampled_x, sampled_y = [], []
|
389
|
+
|
390
|
+
for class_label in np.unique(y_labels):
|
391
|
+
|
392
|
+
class_indices = np.where(y_labels == class_label)[0]
|
393
|
+
|
394
|
+
num_samples = int(len(class_indices) * batch_size)
|
395
|
+
|
396
|
+
sampled_indices = np.random.choice(class_indices, num_samples, replace=False)
|
397
|
+
|
398
|
+
sampled_x.append(x_test[sampled_indices])
|
399
|
+
sampled_y.append(y_test[sampled_indices])
|
400
|
+
|
401
|
+
return np.concatenate(sampled_x), np.concatenate(sampled_y)
|
pyerualjetwork/help.py
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
from .activation_functions import all_activations
|
2
|
+
|
3
|
+
def activation_potentiation():
|
4
|
+
|
5
|
+
activations_list = all_activations()
|
6
|
+
|
7
|
+
print('All available activations: ', activations_list, "\n\nYOU CAN COMBINE EVERY ACTIVATION. EXAMPLE: ['linear', 'tanh'] or ['waveakt', 'linear', 'sine'].")
|
8
|
+
|
9
|
+
return activations_list
|
10
|
+
|
11
|
+
def docs_and_examples():
|
12
|
+
|
13
|
+
print('PLAN document: https://github.com/HCB06/PyerualJetwork/tree/main/Welcome_to_PLAN\n')
|
14
|
+
print('PLAN examples: https://github.com/HCB06/PyerualJetwork/tree/main/Welcome_to_PyerualJetwork/ExampleCodes\n')
|
15
|
+
print('PLANEAT examples: https://github.com/HCB06/PyerualJetwork/tree/main/Welcome_to_Anaplan/ExampleCodes/PLANEAT\n')
|
16
|
+
print('PyerualJetwork document and examples: https://github.com/HCB06/PyerualJetwork/tree/main/Welcome_to_PyerualJetwork')
|
@@ -0,0 +1,21 @@
|
|
1
|
+
|
2
|
+
import numpy as np
|
3
|
+
|
4
|
+
def categorical_crossentropy(y_true_batch, y_pred_batch):
|
5
|
+
epsilon = 1e-7
|
6
|
+
y_pred_batch = np.clip(y_pred_batch, epsilon, 1. - epsilon)
|
7
|
+
|
8
|
+
losses = -np.sum(y_true_batch * np.log(y_pred_batch), axis=1)
|
9
|
+
|
10
|
+
mean_loss = np.mean(losses)
|
11
|
+
return mean_loss
|
12
|
+
|
13
|
+
|
14
|
+
def binary_crossentropy(y_true_batch, y_pred_batch):
|
15
|
+
epsilon = 1e-7
|
16
|
+
y_pred_batch = np.clip(y_pred_batch, epsilon, 1. - epsilon)
|
17
|
+
|
18
|
+
losses = -np.mean(y_true_batch * np.log(y_pred_batch) + (1 - y_true_batch) * np.log(1 - y_pred_batch), axis=1)
|
19
|
+
|
20
|
+
mean_loss = np.mean(losses)
|
21
|
+
return mean_loss
|
@@ -0,0 +1,190 @@
|
|
1
|
+
import numpy as np
|
2
|
+
|
3
|
+
def metrics(y_ts, test_preds, average='weighted'):
|
4
|
+
"""
|
5
|
+
Calculates precision, recall and F1 score for a classification task.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
y_ts (list or numpy.ndarray): True labels.
|
9
|
+
test_preds (list or numpy.ndarray): Predicted labels.
|
10
|
+
average (str): Type of averaging ('micro', 'macro', 'weighted').
|
11
|
+
|
12
|
+
Returns:
|
13
|
+
tuple: Precision, recall, F1 score.
|
14
|
+
"""
|
15
|
+
|
16
|
+
from .data_operations import decode_one_hot
|
17
|
+
|
18
|
+
y_test_d = decode_one_hot(y_ts)
|
19
|
+
y_test_d = np.array(y_test_d)
|
20
|
+
y_pred = np.array(test_preds)
|
21
|
+
|
22
|
+
if y_test_d.ndim > 1:
|
23
|
+
y_test_d = y_test_d.reshape(-1)
|
24
|
+
if y_pred.ndim > 1:
|
25
|
+
y_pred = y_pred.reshape(-1)
|
26
|
+
|
27
|
+
tp = {}
|
28
|
+
fp = {}
|
29
|
+
fn = {}
|
30
|
+
|
31
|
+
classes = np.unique(np.concatenate((y_test_d, y_pred)))
|
32
|
+
|
33
|
+
for c in classes:
|
34
|
+
tp[c] = 0
|
35
|
+
fp[c] = 0
|
36
|
+
fn[c] = 0
|
37
|
+
|
38
|
+
for c in classes:
|
39
|
+
for true, pred in zip(y_test_d, y_pred):
|
40
|
+
if true == c and pred == c:
|
41
|
+
tp[c] += 1
|
42
|
+
elif true != c and pred == c:
|
43
|
+
fp[c] += 1
|
44
|
+
elif true == c and pred != c:
|
45
|
+
fn[c] += 1
|
46
|
+
|
47
|
+
precision = {}
|
48
|
+
recall = {}
|
49
|
+
f1 = {}
|
50
|
+
|
51
|
+
for c in classes:
|
52
|
+
precision[c] = tp[c] / (tp[c] + fp[c]) if (tp[c] + fp[c]) > 0 else 0
|
53
|
+
recall[c] = tp[c] / (tp[c] + fn[c]) if (tp[c] + fn[c]) > 0 else 0
|
54
|
+
f1[c] = 2 * (precision[c] * recall[c]) / (precision[c] + recall[c]) if (precision[c] + recall[c]) > 0 else 0
|
55
|
+
|
56
|
+
if average == 'micro':
|
57
|
+
precision_val = np.sum(list(tp.values())) / (np.sum(list(tp.values())) + np.sum(list(fp.values()))) if (np.sum(list(tp.values())) + np.sum(list(fp.values()))) > 0 else 0
|
58
|
+
recall_val = np.sum(list(tp.values())) / (np.sum(list(tp.values())) + np.sum(list(fn.values()))) if (np.sum(list(tp.values())) + np.sum(list(fn.values()))) > 0 else 0
|
59
|
+
f1_val = 2 * (precision_val * recall_val) / (precision_val + recall_val) if (precision_val + recall_val) > 0 else 0
|
60
|
+
|
61
|
+
elif average == 'macro':
|
62
|
+
precision_val = np.mean(list(precision.values()))
|
63
|
+
recall_val = np.mean(list(recall.values()))
|
64
|
+
f1_val = np.mean(list(f1.values()))
|
65
|
+
|
66
|
+
elif average == 'weighted':
|
67
|
+
weights = np.array([np.sum(y_test_d == c) for c in classes])
|
68
|
+
weights = weights / np.sum(weights)
|
69
|
+
precision_val = np.sum([weights[i] * precision[classes[i]] for i in range(len(classes))])
|
70
|
+
recall_val = np.sum([weights[i] * recall[classes[i]] for i in range(len(classes))])
|
71
|
+
f1_val = np.sum([weights[i] * f1[classes[i]] for i in range(len(classes))])
|
72
|
+
|
73
|
+
else:
|
74
|
+
raise ValueError("Invalid value for 'average'. Choose from 'micro', 'macro', 'weighted'.")
|
75
|
+
|
76
|
+
return precision_val, recall_val, f1_val
|
77
|
+
|
78
|
+
|
79
|
+
def roc_curve(y_true, y_score):
|
80
|
+
"""
|
81
|
+
Compute Receiver Operating Characteristic (ROC) curve.
|
82
|
+
|
83
|
+
Parameters:
|
84
|
+
y_true : array, shape = [n_samples]
|
85
|
+
True binary labels in range {0, 1} or {-1, 1}.
|
86
|
+
y_score : array, shape = [n_samples]
|
87
|
+
Target scores, can either be probability estimates of the positive class,
|
88
|
+
confidence values, or non-thresholded measure of decisions (as returned
|
89
|
+
by decision_function on some classifiers).
|
90
|
+
|
91
|
+
Returns:
|
92
|
+
fpr : array, shape = [n]
|
93
|
+
Increasing false positive rates such that element i is the false positive rate
|
94
|
+
of predictions with score >= thresholds[i].
|
95
|
+
tpr : array, shape = [n]
|
96
|
+
Increasing true positive rates such that element i is the true positive rate
|
97
|
+
of predictions with score >= thresholds[i].
|
98
|
+
thresholds : array, shape = [n]
|
99
|
+
Decreasing thresholds on the decision function used to compute fpr and tpr.
|
100
|
+
"""
|
101
|
+
|
102
|
+
y_true = np.asarray(y_true)
|
103
|
+
y_score = np.asarray(y_score)
|
104
|
+
|
105
|
+
if len(np.unique(y_true)) != 2:
|
106
|
+
raise ValueError("Only binary classification is supported.")
|
107
|
+
|
108
|
+
|
109
|
+
desc_score_indices = np.argsort(y_score, kind="mergesort")[::-1]
|
110
|
+
y_score = y_score[desc_score_indices]
|
111
|
+
y_true = y_true[desc_score_indices]
|
112
|
+
|
113
|
+
|
114
|
+
fpr = []
|
115
|
+
tpr = []
|
116
|
+
thresholds = []
|
117
|
+
n_pos = np.sum(y_true)
|
118
|
+
n_neg = len(y_true) - n_pos
|
119
|
+
|
120
|
+
tp = 0
|
121
|
+
fp = 0
|
122
|
+
prev_score = None
|
123
|
+
|
124
|
+
|
125
|
+
for i, score in enumerate(y_score):
|
126
|
+
if score != prev_score:
|
127
|
+
fpr.append(fp / n_neg)
|
128
|
+
tpr.append(tp / n_pos)
|
129
|
+
thresholds.append(score)
|
130
|
+
prev_score = score
|
131
|
+
|
132
|
+
if y_true[i] == 1:
|
133
|
+
tp += 1
|
134
|
+
else:
|
135
|
+
fp += 1
|
136
|
+
|
137
|
+
fpr.append(fp / n_neg)
|
138
|
+
tpr.append(tp / n_pos)
|
139
|
+
thresholds.append(score)
|
140
|
+
|
141
|
+
return np.array(fpr), np.array(tpr), np.array(thresholds)
|
142
|
+
|
143
|
+
|
144
|
+
def confusion_matrix(y_true, y_pred, class_count):
|
145
|
+
"""
|
146
|
+
Computes confusion matrix.
|
147
|
+
|
148
|
+
Args:
|
149
|
+
y_true (numpy.ndarray): True class labels (1D array).
|
150
|
+
y_pred (numpy.ndarray): Predicted class labels (1D array).
|
151
|
+
num_classes (int): Number of classes.
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
numpy.ndarray: Confusion matrix of shape (num_classes, num_classes).
|
155
|
+
"""
|
156
|
+
confusion = np.zeros((class_count, class_count), dtype=int)
|
157
|
+
|
158
|
+
for i in range(len(y_true)):
|
159
|
+
true_label = y_true[i]
|
160
|
+
pred_label = y_pred[i]
|
161
|
+
confusion[true_label, pred_label] += 1
|
162
|
+
|
163
|
+
return confusion
|
164
|
+
|
165
|
+
|
166
|
+
def pca(X, n_components):
|
167
|
+
"""
|
168
|
+
|
169
|
+
Parameters:
|
170
|
+
X (numpy array): (n_samples, n_features)
|
171
|
+
n_components (int):
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
X_reduced (numpy array): (n_samples, n_components)
|
175
|
+
"""
|
176
|
+
|
177
|
+
X_meaned = X - np.mean(X, axis=0)
|
178
|
+
|
179
|
+
covariance_matrix = np.cov(X_meaned, rowvar=False)
|
180
|
+
|
181
|
+
eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
|
182
|
+
|
183
|
+
sorted_index = np.argsort(eigenvalues)[::-1]
|
184
|
+
sorted_eigenvectors = eigenvectors[:, sorted_index]
|
185
|
+
|
186
|
+
eigenvectors_subset = sorted_eigenvectors[:, :n_components]
|
187
|
+
|
188
|
+
X_reduced = np.dot(X_meaned, eigenvectors_subset)
|
189
|
+
|
190
|
+
return X_reduced
|