pyerualjetwork 2.1.2__py3-none-any.whl → 2.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plan_bi/__init__.py +1 -1
- plan_bi/plan_bi.py +396 -109
- plan_di/__init__.py +1 -1
- plan_di/plan_di.py +776 -490
- pyerualjetwork-2.1.4.dist-info/METADATA +8 -0
- pyerualjetwork-2.1.4.dist-info/RECORD +8 -0
- pyerualjetwork-2.1.2.dist-info/METADATA +0 -8
- pyerualjetwork-2.1.2.dist-info/RECORD +0 -8
- {pyerualjetwork-2.1.2.dist-info → pyerualjetwork-2.1.4.dist-info}/WHEEL +0 -0
- {pyerualjetwork-2.1.2.dist-info → pyerualjetwork-2.1.4.dist-info}/top_level.txt +0 -0
plan_bi/plan_bi.py
CHANGED
@@ -426,8 +426,8 @@ def Relu(
|
|
426
426
|
def evaluate(
|
427
427
|
x_test, # list[num]: Test input data.
|
428
428
|
y_test, # list[num]: Test labels.
|
429
|
-
activation_potential, # float:
|
430
|
-
|
429
|
+
activation_potential, # float: Input activation potential
|
430
|
+
show_metrices, # (bool): (True or False)
|
431
431
|
W # list[num]: Weight matrix of the neural network.
|
432
432
|
) -> tuple:
|
433
433
|
infoTestModel = """
|
@@ -437,7 +437,7 @@ def evaluate(
|
|
437
437
|
x_test (list[num]): Test input data.
|
438
438
|
y_test (list[num]): Test labels.
|
439
439
|
activation_potential (float): Input activation potential
|
440
|
-
|
440
|
+
show_metrices (bool): (True or False)
|
441
441
|
W (list[num]): Weight matrix list of the neural network.
|
442
442
|
|
443
443
|
Returns:
|
@@ -450,7 +450,8 @@ def evaluate(
|
|
450
450
|
try:
|
451
451
|
Wc = [0] * len(W) # Wc = weight copy
|
452
452
|
true = 0
|
453
|
-
|
453
|
+
y_preds = [-1] * len(y_test)
|
454
|
+
acc_list = []
|
454
455
|
for i, w in enumerate(W):
|
455
456
|
Wc[i] = np.copy(w)
|
456
457
|
print('\rCopying weights.....',i+1,'/',len(W),end = "")
|
@@ -479,25 +480,9 @@ def evaluate(
|
|
479
480
|
if RealOutput == PredictedOutput:
|
480
481
|
true += 1
|
481
482
|
acc = true / len(y_test)
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
y_testVisual = np.copy(y_test)
|
487
|
-
y_testVisual = np.argmax(y_testVisual, axis=1)
|
488
|
-
|
489
|
-
plt.figure(figsize=(12, 6))
|
490
|
-
sns.kdeplot(y_testVisual, label='Real Outputs', fill=True)
|
491
|
-
sns.kdeplot(TestPredictions, label='Predictions', fill=True)
|
492
|
-
plt.legend()
|
493
|
-
plt.xlabel('Class')
|
494
|
-
plt.ylabel('Data size')
|
495
|
-
plt.title('Predictions and Real Outputs for Testing KDE Plot')
|
496
|
-
plt.show()
|
497
|
-
|
498
|
-
if inpIndex + 1 != len(x_test):
|
499
|
-
|
500
|
-
plt.close('all')
|
483
|
+
if show_metrices == True:
|
484
|
+
acc_list.append(acc)
|
485
|
+
y_preds[inpIndex] = PredictedOutput
|
501
486
|
|
502
487
|
uni_end_time = time.time()
|
503
488
|
|
@@ -514,7 +499,9 @@ def evaluate(
|
|
514
499
|
elif calculating_est > 3600:
|
515
500
|
print('\rest......(h):',calculating_est/3600,'\n',end= "")
|
516
501
|
print('\rTest accuracy: ' ,acc ,"\n", end="")
|
517
|
-
|
502
|
+
if show_metrices == True:
|
503
|
+
plot_evaluate(y_test, y_preds, acc_list)
|
504
|
+
|
518
505
|
EndTime = time.time()
|
519
506
|
for i, w in enumerate(Wc):
|
520
507
|
W[i] = np.copy(w)
|
@@ -550,14 +537,14 @@ def evaluate(
|
|
550
537
|
|
551
538
|
|
552
539
|
|
553
|
-
return W,
|
540
|
+
return W,y_preds,acc
|
554
541
|
|
555
542
|
|
556
543
|
def multiple_evaluate(
|
557
544
|
x_test, # list[num]: Test input data.
|
558
545
|
y_test, # list[num]: Test labels.
|
559
|
-
activation_potentials, # float:
|
560
|
-
|
546
|
+
activation_potentials, # float: Input activation potential
|
547
|
+
show_metrices, # (bool): (True or False)
|
561
548
|
MW # list[list[num]]: Weight matrix of the neural network.
|
562
549
|
) -> tuple:
|
563
550
|
infoTestModel = """
|
@@ -567,7 +554,7 @@ def multiple_evaluate(
|
|
567
554
|
x_test (list[num]): Test input data.
|
568
555
|
y_test (list[num]): Test labels.
|
569
556
|
activation_potential (float): Input activation potential
|
570
|
-
|
557
|
+
show_metrices (bool): (True or False)
|
571
558
|
MW (list(list[num])): Multiple Weight matrix list of the neural network. (Multiple model testing)
|
572
559
|
|
573
560
|
Returns:
|
@@ -576,7 +563,8 @@ def multiple_evaluate(
|
|
576
563
|
|
577
564
|
layers = ['fex','cat']
|
578
565
|
|
579
|
-
try:
|
566
|
+
try:
|
567
|
+
acc_list = []
|
580
568
|
print(Fore.GREEN + "\n\nTest Started with 0 ERROR\n" + Style.RESET_ALL)
|
581
569
|
start_time = time.time()
|
582
570
|
true = 0
|
@@ -590,7 +578,7 @@ def multiple_evaluate(
|
|
590
578
|
|
591
579
|
Wc = [0] * len(W) # Wc = weight copy
|
592
580
|
|
593
|
-
|
581
|
+
y_preds = [None] * len(y_test)
|
594
582
|
for i, w in enumerate(W):
|
595
583
|
Wc[i] = np.copy(w)
|
596
584
|
|
@@ -619,25 +607,11 @@ def multiple_evaluate(
|
|
619
607
|
if RealOutput == PredictedOutput:
|
620
608
|
true += 1
|
621
609
|
acc = true / len(y_test)
|
622
|
-
|
623
|
-
|
624
|
-
if visualize == 'y':
|
625
|
-
|
626
|
-
y_testVisual = np.copy(y_test)
|
627
|
-
y_testVisual = np.argmax(y_testVisual, axis=1)
|
628
|
-
|
629
|
-
plt.figure(figsize=(12, 6))
|
630
|
-
sns.kdeplot(y_testVisual, label='Real Outputs', fill=True)
|
631
|
-
sns.kdeplot(TestPredictions, label='Predictions', fill=True)
|
632
|
-
plt.legend()
|
633
|
-
plt.xlabel('Class')
|
634
|
-
plt.ylabel('Data size')
|
635
|
-
plt.title('Predictions and Real Outputs for Testing KDE Plot')
|
636
|
-
plt.show()
|
610
|
+
if show_metrices == True:
|
637
611
|
|
638
|
-
|
612
|
+
acc_list.append(acc)
|
639
613
|
|
640
|
-
|
614
|
+
y_preds[inpIndex] = PredictedOutput
|
641
615
|
|
642
616
|
uni_end_time = time.time()
|
643
617
|
|
@@ -654,6 +628,10 @@ def multiple_evaluate(
|
|
654
628
|
elif calculating_est > 3600:
|
655
629
|
print('\rest......(h):',calculating_est/3600,'\n',end= "")
|
656
630
|
print('\rTest accuracy: ' ,acc ,"\n", end="")
|
631
|
+
|
632
|
+
if show_metrices == True:
|
633
|
+
|
634
|
+
plot_evaluate(y_test, y_preds, acc_list)
|
657
635
|
|
658
636
|
EndTime = time.time()
|
659
637
|
for i, w in enumerate(Wc):
|
@@ -690,7 +668,7 @@ def multiple_evaluate(
|
|
690
668
|
|
691
669
|
|
692
670
|
|
693
|
-
return W,
|
671
|
+
return W,y_preds,acc
|
694
672
|
|
695
673
|
def save_model(model_name,
|
696
674
|
model_type,
|
@@ -700,6 +678,7 @@ def save_model(model_name,
|
|
700
678
|
weights_type,
|
701
679
|
weights_format,
|
702
680
|
model_path,
|
681
|
+
scaler,
|
703
682
|
W
|
704
683
|
):
|
705
684
|
|
@@ -715,6 +694,7 @@ def save_model(model_name,
|
|
715
694
|
weights_type (str): Type of weights to save (options: 'txt', 'npy', 'mat').
|
716
695
|
WeightFormat (str): Format of the weights (options: 'd', 'f', 'raw').
|
717
696
|
model_path (str): Path where the model will be saved. For example: C:/Users/beydili/Desktop/denemePLAN/
|
697
|
+
scaler (bool): trained data it used standard_scaler ? (True or False)
|
718
698
|
W: Weights list of the model.
|
719
699
|
|
720
700
|
Returns:
|
@@ -724,7 +704,7 @@ def save_model(model_name,
|
|
724
704
|
# Operations to be performed by the function will be written here
|
725
705
|
pass
|
726
706
|
|
727
|
-
layers = ['fex','cat']
|
707
|
+
layers = ['fex','cat']
|
728
708
|
|
729
709
|
if weights_type != 'txt' and weights_type != 'npy' and weights_type != 'mat':
|
730
710
|
print(Fore.RED + "ERROR110: Save Weight type (File Extension) Type must be 'txt' or 'npy' or 'mat' from: save_model" + infosave_model + Style.RESET_ALL)
|
@@ -761,7 +741,8 @@ def save_model(model_name,
|
|
761
741
|
'SAVE DATE': datetime.now(),
|
762
742
|
'WEIGHTS TYPE': weights_type,
|
763
743
|
'WEIGHTS FORMAT': weights_format,
|
764
|
-
'MODEL PATH': model_path
|
744
|
+
'MODEL PATH': model_path,
|
745
|
+
'STANDARD SCALER': scaler
|
765
746
|
}
|
766
747
|
try:
|
767
748
|
|
@@ -875,17 +856,11 @@ def load_model(model_name,
|
|
875
856
|
print(Fore.RED + "ERROR: Model Path error. accaptable form: 'C:/Users/hasancanbeydili/Desktop/denemePLAN/' from: load_model" + infoload_model + Style.RESET_ALL)
|
876
857
|
|
877
858
|
model_name = str(df['MODEL NAME'].iloc[0])
|
878
|
-
layers = df['LAYERS'].tolist()
|
879
859
|
layer_count = int(df['LAYER COUNT'].iloc[0])
|
880
|
-
class_count = int(df['CLASS COUNT'].iloc[0])
|
881
860
|
activation_potential = int(df['ACTIVATION POTENTIAL'].iloc[0])
|
882
|
-
NeuronCount = int(df['NEURON COUNT'].iloc[0])
|
883
|
-
SynapseCount = int(df['SYNAPSE COUNT'].iloc[0])
|
884
|
-
test_acc = int(df['TEST ACCURACY'].iloc[0])
|
885
|
-
model_type = str(df['MODEL TYPE'].iloc[0])
|
886
861
|
WeightType = str(df['WEIGHTS TYPE'].iloc[0])
|
887
|
-
WeightFormat = str(df['WEIGHTS FORMAT'].iloc[0])
|
888
862
|
model_path = str(df['MODEL PATH'].iloc[0])
|
863
|
+
|
889
864
|
|
890
865
|
W = [0] * layer_count
|
891
866
|
|
@@ -901,9 +876,9 @@ def load_model(model_name,
|
|
901
876
|
else:
|
902
877
|
raise ValueError(Fore.RED + "Incorrect weight type value. Value must be 'txt', 'npy' or 'mat' from: load_model." + infoload_model + Style.RESET_ALL)
|
903
878
|
print(Fore.GREEN + "Model loaded succesfully" + Style.RESET_ALL)
|
904
|
-
return W,activation_potential,df
|
879
|
+
return W, activation_potential, df
|
905
880
|
|
906
|
-
def predict_model_ssd(Input,model_name,model_path):
|
881
|
+
def predict_model_ssd(Input, model_name, model_path):
|
907
882
|
|
908
883
|
infopredict_model_ssd = """
|
909
884
|
Function to make a prediction using a divided pruning learning artificial neural network (PLAN).
|
@@ -915,7 +890,13 @@ def predict_model_ssd(Input,model_name,model_path):
|
|
915
890
|
Returns:
|
916
891
|
ndarray: Output from the model.
|
917
892
|
"""
|
918
|
-
W,activation_potential = load_model(model_name,model_path)
|
893
|
+
W, activation_potential, df = load_model(model_name,model_path)
|
894
|
+
|
895
|
+
scaler = str(df['STANDARD SCALER'].iloc[0])
|
896
|
+
|
897
|
+
if scaler == 'True':
|
898
|
+
|
899
|
+
Input = standard_scaler(Input, None)
|
919
900
|
|
920
901
|
layers = ['fex','cat']
|
921
902
|
|
@@ -942,7 +923,7 @@ def predict_model_ssd(Input,model_name,model_path):
|
|
942
923
|
return neural_layer
|
943
924
|
|
944
925
|
|
945
|
-
def predict_model_ram(Input,activation_potential,W):
|
926
|
+
def predict_model_ram(Input, activation_potential, scaler, W):
|
946
927
|
|
947
928
|
infopredict_model_ram = """
|
948
929
|
Function to make a prediction using a divided pruning learning artificial neural network (PLAN).
|
@@ -951,11 +932,15 @@ def predict_model_ram(Input,activation_potential,W):
|
|
951
932
|
Arguments:
|
952
933
|
Input (list or ndarray): Input data for the model (single vector or single matrix).
|
953
934
|
activation_potential (float): Activation potential.
|
935
|
+
scaler (bool): trained data it used standard_scaler ? (True or False)
|
954
936
|
W (list of ndarrays): Weights of the model.
|
955
937
|
|
956
938
|
Returns:
|
957
939
|
ndarray: Output from the model.
|
958
940
|
"""
|
941
|
+
if scaler == True:
|
942
|
+
|
943
|
+
Input = standard_scaler(Input, None)
|
959
944
|
|
960
945
|
layers = ['fex','cat']
|
961
946
|
|
@@ -983,117 +968,419 @@ def predict_model_ram(Input,activation_potential,W):
|
|
983
968
|
return neural_layer
|
984
969
|
|
985
970
|
|
986
|
-
|
987
|
-
|
988
|
-
|
971
|
+
|
972
|
+
def auto_balancer(x_train, y_train):
|
973
|
+
|
974
|
+
infoauto_balancer = """
|
989
975
|
Function to balance the training data across different classes.
|
990
976
|
|
991
977
|
Arguments:
|
992
978
|
x_train (list): Input data for training.
|
993
979
|
y_train (list): Labels corresponding to the input data.
|
994
|
-
class_count (int): Number of classes.
|
995
980
|
|
996
981
|
Returns:
|
997
982
|
tuple: A tuple containing balanced input data and labels.
|
998
983
|
"""
|
999
|
-
|
1000
|
-
|
984
|
+
classes = np.arange(y_train.shape[1])
|
985
|
+
class_count = len(classes)
|
986
|
+
|
987
|
+
try:
|
988
|
+
ClassIndices = {i: np.where(np.array(y_train)[:, i] == 1)[
|
989
|
+
0] for i in range(class_count)}
|
1001
990
|
classes = [len(ClassIndices[i]) for i in range(class_count)]
|
1002
|
-
|
991
|
+
|
1003
992
|
if len(set(classes)) == 1:
|
1004
|
-
print(Fore.WHITE + "INFO: All training data have already balanced. from: auto_balancer"
|
993
|
+
print(Fore.WHITE + "INFO: All training data have already balanced. from: auto_balancer" + Style.RESET_ALL)
|
1005
994
|
return x_train, y_train
|
1006
|
-
|
995
|
+
|
1007
996
|
MinCount = min(classes)
|
1008
|
-
|
997
|
+
|
1009
998
|
BalancedIndices = []
|
1010
999
|
for i in range(class_count):
|
1011
1000
|
if len(ClassIndices[i]) > MinCount:
|
1012
|
-
SelectedIndices = np.random.choice(
|
1001
|
+
SelectedIndices = np.random.choice(
|
1002
|
+
ClassIndices[i], MinCount, replace=False)
|
1013
1003
|
else:
|
1014
1004
|
SelectedIndices = ClassIndices[i]
|
1015
1005
|
BalancedIndices.extend(SelectedIndices)
|
1016
|
-
|
1006
|
+
|
1017
1007
|
BalancedInputs = [x_train[idx] for idx in BalancedIndices]
|
1018
1008
|
BalancedLabels = [y_train[idx] for idx in BalancedIndices]
|
1019
|
-
|
1020
|
-
print(Fore.GREEN + "All Training Data Succesfully Balanced from: " + str(len(x_train)
|
1021
|
-
|
1009
|
+
|
1010
|
+
print(Fore.GREEN + "All Training Data Succesfully Balanced from: " + str(len(x_train)
|
1011
|
+
) + " to: " + str(len(BalancedInputs)) + ". from: auto_balancer " + Style.RESET_ALL)
|
1012
|
+
except:
|
1022
1013
|
print(Fore.RED + "ERROR: Inputs and labels must be same length check parameters" + infoauto_balancer)
|
1023
1014
|
return 'e'
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1015
|
+
|
1016
|
+
return BalancedInputs, BalancedLabels
|
1017
|
+
|
1018
|
+
|
1019
|
+
def synthetic_augmentation(x_train, y_train):
|
1028
1020
|
"""
|
1029
1021
|
Generates synthetic examples to balance classes with fewer examples.
|
1030
|
-
|
1022
|
+
|
1031
1023
|
Arguments:
|
1032
1024
|
x -- Input dataset (examples) - list format
|
1033
1025
|
y -- Class labels (one-hot encoded) - list format
|
1034
|
-
|
1035
|
-
|
1026
|
+
|
1036
1027
|
Returns:
|
1037
1028
|
x_balanced -- Balanced input dataset (list format)
|
1038
1029
|
y_balanced -- Balanced class labels (one-hot encoded, list format)
|
1039
1030
|
"""
|
1031
|
+
x = x_train
|
1032
|
+
y = y_train
|
1033
|
+
classes = np.arange(y_train.shape[1])
|
1034
|
+
class_count = len(classes)
|
1035
|
+
|
1040
1036
|
# Calculate class distribution
|
1041
1037
|
class_distribution = {i: 0 for i in range(class_count)}
|
1042
1038
|
for label in y:
|
1043
1039
|
class_distribution[np.argmax(label)] += 1
|
1044
|
-
|
1040
|
+
|
1045
1041
|
max_class_count = max(class_distribution.values())
|
1046
|
-
|
1042
|
+
|
1047
1043
|
x_balanced = list(x)
|
1048
1044
|
y_balanced = list(y)
|
1049
|
-
|
1045
|
+
|
1050
1046
|
for class_label in range(class_count):
|
1051
|
-
class_indices = [i for i, label in enumerate(
|
1047
|
+
class_indices = [i for i, label in enumerate(
|
1048
|
+
y) if np.argmax(label) == class_label]
|
1052
1049
|
num_samples = len(class_indices)
|
1053
|
-
|
1050
|
+
|
1054
1051
|
if num_samples < max_class_count:
|
1055
1052
|
while num_samples < max_class_count:
|
1056
|
-
|
1057
|
-
random_indices = np.random.choice(
|
1053
|
+
|
1054
|
+
random_indices = np.random.choice(
|
1055
|
+
class_indices, 2, replace=False)
|
1058
1056
|
sample1 = x[random_indices[0]]
|
1059
1057
|
sample2 = x[random_indices[1]]
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1058
|
+
|
1059
|
+
synthetic_sample = sample1 + \
|
1060
|
+
(np.array(sample2) - np.array(sample1)) * np.random.rand()
|
1061
|
+
|
1064
1062
|
x_balanced.append(synthetic_sample.tolist())
|
1065
|
-
y_balanced.append(y[class_indices[0]])
|
1066
|
-
|
1063
|
+
y_balanced.append(y[class_indices[0]])
|
1064
|
+
|
1067
1065
|
num_samples += 1
|
1068
|
-
|
1066
|
+
|
1069
1067
|
return np.array(x_balanced), np.array(y_balanced)
|
1070
1068
|
|
1069
|
+
|
1071
1070
|
def standard_scaler(x_train, x_test):
|
1072
|
-
|
1073
|
-
Standardizes training and test datasets.
|
1071
|
+
info_standard_scaler = """
|
1072
|
+
Standardizes training and test datasets. x_test may be None.
|
1074
1073
|
|
1075
1074
|
Args:
|
1076
1075
|
train_data: numpy.ndarray
|
1077
|
-
Training data
|
1076
|
+
Training data
|
1078
1077
|
test_data: numpy.ndarray
|
1079
|
-
Test data
|
1078
|
+
Test data
|
1080
1079
|
|
1081
1080
|
Returns:
|
1082
1081
|
tuple
|
1083
1082
|
Standardized training and test datasets
|
1084
1083
|
"""
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1084
|
+
try:
|
1085
|
+
|
1086
|
+
mean = np.mean(x_train, axis=0)
|
1087
|
+
std = np.std(x_train, axis=0)
|
1088
|
+
|
1089
|
+
if x_test == None:
|
1090
|
+
|
1091
|
+
train_data_scaled = (x_train - mean) / std
|
1092
|
+
return train_data_scaled
|
1093
|
+
|
1094
|
+
else:
|
1095
|
+
train_data_scaled = (x_train - mean) / std
|
1096
|
+
test_data_scaled = (x_test - mean) / std
|
1097
|
+
return train_data_scaled, test_data_scaled
|
1098
|
+
|
1099
|
+
except:
|
1100
|
+
print(
|
1101
|
+
Fore.RED + "ERROR: x_train and x_test must be list[numpyarray] from standard_scaler" + info_standard_scaler)
|
1102
|
+
|
1103
|
+
|
1104
|
+
def encode_one_hot(y_train, y_test):
|
1105
|
+
info_one_hot_encode = """
|
1106
|
+
Performs one-hot encoding on y_train and y_test data..
|
1107
|
+
|
1108
|
+
Args:
|
1109
|
+
y_train (numpy.ndarray): Eğitim etiketi verisi.
|
1110
|
+
y_test (numpy.ndarray): Test etiketi verisi.
|
1111
|
+
|
1112
|
+
Returns:
|
1113
|
+
tuple: One-hot encoded y_train ve y_test verileri.
|
1114
|
+
"""
|
1115
|
+
try:
|
1116
|
+
classes = np.unique(y_train)
|
1117
|
+
class_count = len(classes)
|
1118
|
+
|
1119
|
+
class_to_index = {cls: idx for idx, cls in enumerate(classes)}
|
1120
|
+
|
1121
|
+
y_train_encoded = np.zeros((y_train.shape[0], class_count))
|
1122
|
+
for i, label in enumerate(y_train):
|
1123
|
+
y_train_encoded[i, class_to_index[label]] = 1
|
1124
|
+
|
1125
|
+
y_test_encoded = np.zeros((y_test.shape[0], class_count))
|
1126
|
+
for i, label in enumerate(y_test):
|
1127
|
+
y_test_encoded[i, class_to_index[label]] = 1
|
1128
|
+
except:
|
1129
|
+
print(Fore.RED + 'ERROR: y_train and y_test must be numpy array. from: one_hot_encode' + info_one_hot_encode)
|
1130
|
+
|
1131
|
+
return y_train_encoded, y_test_encoded
|
1132
|
+
|
1133
|
+
|
1134
|
+
def split(X, y, test_size, random_state):
|
1135
|
+
"""
|
1136
|
+
Splits the given X (features) and y (labels) data into training and testing subsets.
|
1137
|
+
|
1138
|
+
Args:
|
1139
|
+
X (numpy.ndarray): Features data.
|
1140
|
+
y (numpy.ndarray): Labels data.
|
1141
|
+
test_size (float or int): Proportion or number of samples for the test subset.
|
1142
|
+
random_state (int or None): Seed for random state.
|
1143
|
+
|
1144
|
+
Returns:
|
1145
|
+
tuple: x_train, x_test, y_train, y_test as ordered training and testing data subsets.
|
1146
|
+
"""
|
1147
|
+
# Size of the dataset
|
1148
|
+
num_samples = X.shape[0]
|
1149
|
+
|
1150
|
+
if isinstance(test_size, float):
|
1151
|
+
test_size = int(test_size * num_samples)
|
1152
|
+
elif isinstance(test_size, int):
|
1153
|
+
if test_size > num_samples:
|
1154
|
+
raise ValueError(
|
1155
|
+
"test_size cannot be larger than the number of samples.")
|
1156
|
+
else:
|
1157
|
+
raise ValueError("test_size should be float or int.")
|
1158
|
+
|
1159
|
+
if random_state is not None:
|
1160
|
+
np.random.seed(random_state)
|
1161
|
+
|
1162
|
+
indices = np.arange(num_samples)
|
1163
|
+
np.random.shuffle(indices)
|
1164
|
+
|
1165
|
+
test_indices = indices[:test_size]
|
1166
|
+
train_indices = indices[test_size:]
|
1167
|
+
|
1168
|
+
x_train, x_test = X[train_indices], X[test_indices]
|
1169
|
+
y_train, y_test = y[train_indices], y[test_indices]
|
1170
|
+
|
1171
|
+
return x_train, x_test, y_train, y_test
|
1172
|
+
|
1173
|
+
|
1174
|
+
def metrics(y_ts, test_preds):
|
1175
|
+
"""
|
1176
|
+
Calculates precision, recall and F1 score for a classification task.
|
1177
|
+
|
1178
|
+
Args:
|
1179
|
+
y_test (list or numpy.ndarray): True labels.
|
1180
|
+
test_preds (list or numpy.ndarray): Predicted labels.
|
1181
|
+
|
1182
|
+
Returns:
|
1183
|
+
tuple: Precision, recall, F1 score.
|
1184
|
+
"""
|
1185
|
+
y_test_d = decode_one_hot(y_ts)
|
1186
|
+
y_test_d = np.array(y_test_d)
|
1187
|
+
y_pred = np.array(test_preds)
|
1188
|
+
|
1189
|
+
if y_test_d.ndim > 1:
|
1190
|
+
y_test_d = y_test_d.reshape(-1)
|
1191
|
+
if y_pred.ndim > 1:
|
1192
|
+
y_pred = y_pred.reshape(-1)
|
1193
|
+
|
1194
|
+
tp = {}
|
1195
|
+
fp = {}
|
1196
|
+
fn = {}
|
1197
|
+
|
1198
|
+
classes = np.unique(np.concatenate((y_test_d, y_pred)))
|
1199
|
+
|
1200
|
+
for c in classes:
|
1201
|
+
tp[c] = 0
|
1202
|
+
fp[c] = 0
|
1203
|
+
fn[c] = 0
|
1204
|
+
|
1205
|
+
for c in classes:
|
1206
|
+
for true, pred in zip(y_test_d, y_pred):
|
1207
|
+
if true == c and pred == c:
|
1208
|
+
tp[c] += 1
|
1209
|
+
elif true != c and pred == c:
|
1210
|
+
fp[c] += 1
|
1211
|
+
elif true == c and pred != c:
|
1212
|
+
fn[c] += 1
|
1213
|
+
|
1214
|
+
precision = {}
|
1215
|
+
recall = {}
|
1216
|
+
f1 = {}
|
1217
|
+
|
1218
|
+
for c in classes:
|
1219
|
+
precision[c] = tp[c] / (tp[c] + fp[c]) if (tp[c] + fp[c]) > 0 else 0
|
1220
|
+
recall[c] = tp[c] / (tp[c] + fn[c]) if (tp[c] + fn[c]) > 0 else 0
|
1221
|
+
f1[c] = 2 * (precision[c] * recall[c]) / (precision[c] +
|
1222
|
+
recall[c]) if (precision[c] + recall[c]) > 0 else 0
|
1223
|
+
|
1224
|
+
micro_precision = np.sum(list(tp.values())) / (np.sum(list(tp.values())) + np.sum(
|
1225
|
+
list(fp.values()))) if (np.sum(list(tp.values())) + np.sum(list(fp.values()))) > 0 else 0
|
1226
|
+
micro_recall = np.sum(list(tp.values())) / (np.sum(list(tp.values())) + np.sum(list(
|
1227
|
+
fn.values()))) if (np.sum(list(tp.values())) + np.sum(list(fn.values()))) > 0 else 0
|
1228
|
+
micro_f1 = 2 * (micro_precision * micro_recall) / (micro_precision +
|
1229
|
+
micro_recall) if (micro_precision + micro_recall) > 0 else 0
|
1230
|
+
|
1231
|
+
return micro_precision, micro_recall, micro_f1
|
1232
|
+
|
1233
|
+
|
1234
|
+
def decode_one_hot(encoded_data):
|
1235
|
+
"""
|
1236
|
+
Decodes one-hot encoded data to original categorical labels.
|
1237
|
+
|
1238
|
+
Args:
|
1239
|
+
encoded_data (numpy.ndarray): One-hot encoded data with shape (n_samples, n_classes).
|
1240
|
+
|
1241
|
+
Returns:
|
1242
|
+
numpy.ndarray: Decoded categorical labels with shape (n_samples,).
|
1243
|
+
"""
|
1244
|
+
|
1245
|
+
decoded_labels = np.argmax(encoded_data, axis=1)
|
1246
|
+
|
1247
|
+
return decoded_labels
|
1248
|
+
|
1249
|
+
|
1250
|
+
def roc_curve(y_true, y_score):
|
1251
|
+
"""
|
1252
|
+
Computes ROC curve.
|
1253
|
+
|
1254
|
+
Args:
|
1255
|
+
y_true (numpy.ndarray): True class labels (binary: 0 or 1).
|
1256
|
+
y_score (numpy.ndarray): Predicted probabilities for positive class.
|
1257
|
+
|
1258
|
+
Returns:
|
1259
|
+
tuple: FPR (False Positive Rate), TPR (True Positive Rate), thresholds.
|
1260
|
+
"""
|
1261
|
+
|
1262
|
+
idx = np.argsort(y_score)[::-1]
|
1263
|
+
y_score_sorted = y_score[idx]
|
1264
|
+
y_true_sorted = y_true[idx]
|
1265
|
+
|
1266
|
+
tpr = []
|
1267
|
+
fpr = []
|
1268
|
+
thresholds = np.linspace(0, 1, 100)
|
1269
|
+
|
1270
|
+
for threshold in thresholds:
|
1271
|
+
y_pred_binary = np.where(y_score_sorted >= threshold, 1, 0)
|
1272
|
+
|
1273
|
+
tp = np.sum((y_true_sorted == 1) & (y_pred_binary == 1))
|
1274
|
+
fn = np.sum((y_true_sorted == 1) & (y_pred_binary == 0))
|
1275
|
+
tn = np.sum((y_true_sorted == 0) & (y_pred_binary == 0))
|
1276
|
+
fp = np.sum((y_true_sorted == 0) & (y_pred_binary == 1))
|
1277
|
+
|
1278
|
+
# Check for division by zero
|
1279
|
+
if (tp + fn) == 0:
|
1280
|
+
tpr_value = 0.0
|
1281
|
+
else:
|
1282
|
+
tpr_value = tp / (tp + fn)
|
1283
|
+
|
1284
|
+
if (fp + tn) == 0:
|
1285
|
+
fpr_value = 0.0
|
1286
|
+
else:
|
1287
|
+
fpr_value = fp / (fp + tn)
|
1288
|
+
|
1289
|
+
tpr.append(tpr_value)
|
1290
|
+
fpr.append(fpr_value)
|
1291
|
+
|
1292
|
+
return fpr, tpr, thresholds
|
1293
|
+
|
1294
|
+
|
1295
|
+
def confusion_matrix(y_true, y_pred, class_count):
|
1296
|
+
"""
|
1297
|
+
Computes confusion matrix.
|
1298
|
+
|
1299
|
+
Args:
|
1300
|
+
y_true (numpy.ndarray): True class labels (1D array).
|
1301
|
+
y_pred (numpy.ndarray): Predicted class labels (1D array).
|
1302
|
+
num_classes (int): Number of classes.
|
1303
|
+
|
1304
|
+
Returns:
|
1305
|
+
numpy.ndarray: Confusion matrix of shape (num_classes, num_classes).
|
1306
|
+
"""
|
1307
|
+
confusion = np.zeros((class_count, class_count), dtype=int)
|
1308
|
+
|
1309
|
+
for i in range(len(y_true)):
|
1310
|
+
true_label = y_true[i]
|
1311
|
+
pred_label = y_pred[i]
|
1312
|
+
confusion[true_label, pred_label] += 1
|
1313
|
+
|
1314
|
+
return confusion
|
1315
|
+
|
1316
|
+
|
1317
|
+
def plot_evaluate(y_test, y_preds, acc_list):
|
1318
|
+
|
1319
|
+
acc = acc_list[len(acc_list) - 1]
|
1320
|
+
y_true = decode_one_hot(y_test)
|
1321
|
+
|
1322
|
+
y_true = np.array(y_true)
|
1323
|
+
y_preds = np.array(y_preds)
|
1324
|
+
fpr, tpr, thresholds = roc_curve(y_true, y_preds)
|
1325
|
+
precision, recall, f1 = metrics(y_test, y_preds)
|
1326
|
+
Class = np.unique(y_test)
|
1327
|
+
|
1328
|
+
|
1329
|
+
cm = confusion_matrix(y_true, y_preds, len(Class))
|
1330
|
+
|
1331
|
+
fig, axs = plt.subplots(2, 2, figsize=(16, 12))
|
1332
|
+
|
1333
|
+
# Confusion Matrix
|
1334
|
+
sns.heatmap(cm, annot=True, fmt='d', ax=axs[0, 0])
|
1335
|
+
axs[0, 0].set_title("Confusion Matrix")
|
1336
|
+
axs[0, 0].set_xlabel("Predicted Class")
|
1337
|
+
axs[0, 0].set_ylabel("Actual Class")
|
1338
|
+
|
1339
|
+
# ROC Curve
|
1340
|
+
axs[1, 0].plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve')
|
1341
|
+
axs[1, 0].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
|
1342
|
+
axs[1, 0].set_xlim([0.0, 1.0])
|
1343
|
+
axs[1, 0].set_ylim([0.0, 1.05])
|
1344
|
+
axs[1, 0].set_xlabel('False Positive Rate')
|
1345
|
+
axs[1, 0].set_ylabel('True Positive Rate')
|
1346
|
+
axs[1, 0].set_title('Receiver Operating Characteristic (ROC) Curve')
|
1347
|
+
axs[1, 0].legend(loc="lower right")
|
1088
1348
|
|
1349
|
+
# Precision, Recall, F1 Score, Accuracy
|
1350
|
+
metric = ['Precision', 'Recall', 'F1 Score', 'Accuracy']
|
1351
|
+
values = [precision, recall, f1, acc]
|
1352
|
+
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
|
1353
|
+
|
1354
|
+
|
1355
|
+
bars = axs[0, 1].bar(metric, values, color=colors)
|
1356
|
+
|
1357
|
+
|
1358
|
+
for bar, value in zip(bars, values):
|
1359
|
+
axs[0, 1].text(bar.get_x() + bar.get_width() / 2, bar.get_height() - 0.05, f'{value:.2f}',
|
1360
|
+
ha='center', va='bottom', fontsize=12, color='white', weight='bold')
|
1361
|
+
|
1362
|
+
axs[0, 1].set_ylim(0, 1)
|
1363
|
+
axs[0, 1].set_xlabel('Metrics')
|
1364
|
+
axs[0, 1].set_ylabel('Score')
|
1365
|
+
axs[0, 1].set_title('Precision, Recall, F1 Score, and Accuracy')
|
1366
|
+
axs[0, 1].grid(True, axis='y', linestyle='--', alpha=0.7)
|
1367
|
+
|
1368
|
+
|
1369
|
+
plt.plot(acc_list, marker='o', linestyle='-',
|
1370
|
+
color='r', label='Accuracy')
|
1089
1371
|
|
1090
|
-
train_data_scaled = (x_train - mean) / std
|
1091
|
-
test_data_scaled = (x_test - mean) / std
|
1092
1372
|
|
1093
|
-
|
1094
|
-
print(Fore.RED + "ERROR: x_train and x_test must be numpy array from standard_scaler" + info_standard_scaler)
|
1373
|
+
plt.axhline(y=1, color='g', linestyle='--', label='Maximum Accuracy')
|
1095
1374
|
|
1096
|
-
|
1375
|
+
|
1376
|
+
plt.xlabel('Samples')
|
1377
|
+
plt.ylabel('Accuracy')
|
1378
|
+
plt.title('Accuracy History')
|
1379
|
+
plt.legend()
|
1380
|
+
|
1381
|
+
|
1382
|
+
plt.tight_layout()
|
1383
|
+
plt.show()
|
1097
1384
|
|
1098
1385
|
def get_weights():
|
1099
1386
|
|