noshot 4.0.0__py3-none-any.whl → 6.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/ML TS XAI/XAI/Q1.ipynb +377 -0
- noshot/data/ML TS XAI/XAI/Q2.ipynb +362 -0
- noshot/data/ML TS XAI/XAI/Q3.ipynb +637 -0
- noshot/data/ML TS XAI/XAI/Q4.ipynb +206 -0
- noshot/data/ML TS XAI/XAI/Q5.ipynb +1018 -0
- {noshot-4.0.0.dist-info → noshot-6.0.0.dist-info}/METADATA +1 -1
- noshot-6.0.0.dist-info/RECORD +14 -0
- noshot/data/ML TS XAI/ML/ML Lab CIA 2 (I Found Only This Check)/Copy_of_Pistachio_csv.ipynb +0 -269
- noshot/data/ML TS XAI/ML/ML Lab CIA 2 (I Found Only This Check)/weatherAUS.ipynb +0 -155
- noshot/data/ML TS XAI/ML/Main/1. EDA-PCA (Balance Scale Dataset).ipynb +0 -139
- noshot/data/ML TS XAI/ML/Main/1. EDA-PCA (Rice Dataset).ipynb +0 -181
- noshot/data/ML TS XAI/ML/Main/10. HMM Veterbi.ipynb +0 -228
- noshot/data/ML TS XAI/ML/Main/2. KNN (Balance Scale Dataset).ipynb +0 -117
- noshot/data/ML TS XAI/ML/Main/2. KNN (Iris Dataset).ipynb +0 -165
- noshot/data/ML TS XAI/ML/Main/2. KNN (Sobar-72 Dataset).ipynb +0 -251
- noshot/data/ML TS XAI/ML/Main/3. LDA (Balance Scale Dataset).ipynb +0 -78
- noshot/data/ML TS XAI/ML/Main/3. LDA (NPHA Doctor Visits Dataset).ipynb +0 -114
- noshot/data/ML TS XAI/ML/Main/4. Linear Regression (Machine Dataset).ipynb +0 -115
- noshot/data/ML TS XAI/ML/Main/4. Linear Regression (Real Estate Dataset).ipynb +0 -159
- noshot/data/ML TS XAI/ML/Main/5. Logistic Regression (Magic04 Dataset).ipynb +0 -200
- noshot/data/ML TS XAI/ML/Main/5. Logistic Regression (Wine Dataset).ipynb +0 -112
- noshot/data/ML TS XAI/ML/Main/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb +0 -153
- noshot/data/ML TS XAI/ML/Main/6. Naive Bayes Classifier (Wine Dataset).ipynb +0 -89
- noshot/data/ML TS XAI/ML/Main/7. SVM (Rice Dataset).ipynb +0 -208
- noshot/data/ML TS XAI/ML/Main/8. FeedForward NN (Sobar72 Dataset).ipynb +0 -260
- noshot/data/ML TS XAI/ML/Main/9. CNN (Cifar10 Dataset).ipynb +0 -238
- noshot/data/ML TS XAI/ML/Main/data/agaricus-lepiota.data +0 -8124
- noshot/data/ML TS XAI/ML/Main/data/balance-scale.txt +0 -625
- noshot/data/ML TS XAI/ML/Main/data/doctor-visits.csv +0 -715
- noshot/data/ML TS XAI/ML/Main/data/iris.csv +0 -151
- noshot/data/ML TS XAI/ML/Main/data/machine-data.csv +0 -210
- noshot/data/ML TS XAI/ML/Main/data/magic04.data +0 -19020
- noshot/data/ML TS XAI/ML/Main/data/real-estate.xlsx +0 -0
- noshot/data/ML TS XAI/ML/Main/data/rice.arff +0 -3826
- noshot/data/ML TS XAI/ML/Main/data/sobar-72.csv +0 -73
- noshot/data/ML TS XAI/ML/Main/data/wine-dataset.csv +0 -179
- noshot/data/ML TS XAI/ML/Other Codes.ipynb +0 -158
- noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb +0 -691
- noshot-4.0.0.dist-info/RECORD +0 -40
- {noshot-4.0.0.dist-info → noshot-6.0.0.dist-info}/WHEEL +0 -0
- {noshot-4.0.0.dist-info → noshot-6.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {noshot-4.0.0.dist-info → noshot-6.0.0.dist-info}/top_level.txt +0 -0
@@ -1,159 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "code",
|
5
|
-
"execution_count": null,
|
6
|
-
"id": "4e998aac",
|
7
|
-
"metadata": {},
|
8
|
-
"outputs": [],
|
9
|
-
"source": [
|
10
|
-
"import numpy as np\n",
|
11
|
-
"import pandas as pd\n",
|
12
|
-
"import seaborn as sns\n",
|
13
|
-
"import matplotlib.pyplot as plt\n",
|
14
|
-
"from sklearn.model_selection import train_test_split"
|
15
|
-
]
|
16
|
-
},
|
17
|
-
{
|
18
|
-
"cell_type": "code",
|
19
|
-
"execution_count": null,
|
20
|
-
"id": "7fef393a",
|
21
|
-
"metadata": {},
|
22
|
-
"outputs": [],
|
23
|
-
"source": [
|
24
|
-
"df=pd.read_excel(\"data/real-estate.xlsx\")\n",
|
25
|
-
"print(\"Shape:\", df.shape)\n",
|
26
|
-
"df.head()"
|
27
|
-
]
|
28
|
-
},
|
29
|
-
{
|
30
|
-
"cell_type": "code",
|
31
|
-
"execution_count": null,
|
32
|
-
"id": "18f866b3",
|
33
|
-
"metadata": {},
|
34
|
-
"outputs": [],
|
35
|
-
"source": [
|
36
|
-
"df.isnull().sum()"
|
37
|
-
]
|
38
|
-
},
|
39
|
-
{
|
40
|
-
"cell_type": "code",
|
41
|
-
"execution_count": null,
|
42
|
-
"id": "c238436e",
|
43
|
-
"metadata": {},
|
44
|
-
"outputs": [],
|
45
|
-
"source": [
|
46
|
-
"sns.pairplot(df)"
|
47
|
-
]
|
48
|
-
},
|
49
|
-
{
|
50
|
-
"cell_type": "code",
|
51
|
-
"execution_count": null,
|
52
|
-
"id": "480aa3b6",
|
53
|
-
"metadata": {},
|
54
|
-
"outputs": [],
|
55
|
-
"source": [
|
56
|
-
"corr = df.corr()\n",
|
57
|
-
"sns.heatmap(corr)"
|
58
|
-
]
|
59
|
-
},
|
60
|
-
{
|
61
|
-
"cell_type": "code",
|
62
|
-
"execution_count": null,
|
63
|
-
"id": "7fe55d8d",
|
64
|
-
"metadata": {},
|
65
|
-
"outputs": [],
|
66
|
-
"source": [
|
67
|
-
"def lin_reg(colX):\n",
|
68
|
-
" X = df[colX].values\n",
|
69
|
-
" y = df['Y house price of unit area'].values\n",
|
70
|
-
"\n",
|
71
|
-
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
72
|
-
"\n",
|
73
|
-
" N = len(X_train)\n",
|
74
|
-
" sum_x = np.sum(X_train)\n",
|
75
|
-
" sum_y = np.sum(y_train)\n",
|
76
|
-
" sum_xy = np.sum(X_train * y_train)\n",
|
77
|
-
" sum_x2 = np.sum(X_train ** 2)\n",
|
78
|
-
"\n",
|
79
|
-
" beta1 = (N * sum_xy - sum_x * sum_y) / (N * sum_x2 - sum_x ** 2)\n",
|
80
|
-
" beta0 = (sum_y - beta1 * sum_x) / N\n",
|
81
|
-
"\n",
|
82
|
-
" y_pred_train = beta1 * X_train + beta0\n",
|
83
|
-
" y_pred_test = beta1 * X_test + beta0\n",
|
84
|
-
"\n",
|
85
|
-
" plt.scatter(X_train, y_train, color='blue', label='Training Data Points')\n",
|
86
|
-
" plt.plot(X_train, y_pred_train, color='red', label='Regression Line (Training)')\n",
|
87
|
-
" plt.xlabel(colX)\n",
|
88
|
-
" plt.ylabel('Y house price of unit area')\n",
|
89
|
-
" plt.title(f'Linear Regression: {colX} vs Y (Training Set)')\n",
|
90
|
-
" plt.legend()\n",
|
91
|
-
" plt.show()\n",
|
92
|
-
"\n",
|
93
|
-
" plt.scatter(X_test, y_test, color='blue', label='Test Data Points')\n",
|
94
|
-
" plt.plot(X_test, y_pred_test, color='red', label='Regression Line (Test)')\n",
|
95
|
-
" plt.xlabel(colX)\n",
|
96
|
-
" plt.ylabel('Y house price of unit area')\n",
|
97
|
-
" plt.title(f'Linear Regression: {colX} vs Y (Test Set)')\n",
|
98
|
-
" plt.legend()\n",
|
99
|
-
" plt.show()\n",
|
100
|
-
"\n",
|
101
|
-
" mse_train = np.mean((y_train - y_pred_train) ** 2)\n",
|
102
|
-
" mse_test = np.mean((y_test - y_pred_test) ** 2)\n",
|
103
|
-
" print(f\"Mean Squared Error (MSE) for Training Set: {mse_train}\")\n",
|
104
|
-
" print(f\"Mean Squared Error (MSE) for Test Set: {mse_test}\")"
|
105
|
-
]
|
106
|
-
},
|
107
|
-
{
|
108
|
-
"cell_type": "code",
|
109
|
-
"execution_count": null,
|
110
|
-
"id": "8d80579a",
|
111
|
-
"metadata": {},
|
112
|
-
"outputs": [],
|
113
|
-
"source": [
|
114
|
-
"lin_reg('X3 distance to the nearest MRT station')"
|
115
|
-
]
|
116
|
-
},
|
117
|
-
{
|
118
|
-
"cell_type": "code",
|
119
|
-
"execution_count": null,
|
120
|
-
"id": "6b162be8",
|
121
|
-
"metadata": {},
|
122
|
-
"outputs": [],
|
123
|
-
"source": [
|
124
|
-
"lin_reg('X5 latitude')"
|
125
|
-
]
|
126
|
-
},
|
127
|
-
{
|
128
|
-
"cell_type": "code",
|
129
|
-
"execution_count": null,
|
130
|
-
"id": "7b576d0a",
|
131
|
-
"metadata": {},
|
132
|
-
"outputs": [],
|
133
|
-
"source": [
|
134
|
-
"lin_reg('X6 longitude')"
|
135
|
-
]
|
136
|
-
}
|
137
|
-
],
|
138
|
-
"metadata": {
|
139
|
-
"kernelspec": {
|
140
|
-
"display_name": "Python 3 (ipykernel)",
|
141
|
-
"language": "python",
|
142
|
-
"name": "python3"
|
143
|
-
},
|
144
|
-
"language_info": {
|
145
|
-
"codemirror_mode": {
|
146
|
-
"name": "ipython",
|
147
|
-
"version": 3
|
148
|
-
},
|
149
|
-
"file_extension": ".py",
|
150
|
-
"mimetype": "text/x-python",
|
151
|
-
"name": "python",
|
152
|
-
"nbconvert_exporter": "python",
|
153
|
-
"pygments_lexer": "ipython3",
|
154
|
-
"version": "3.12.4"
|
155
|
-
}
|
156
|
-
},
|
157
|
-
"nbformat": 4,
|
158
|
-
"nbformat_minor": 5
|
159
|
-
}
|
@@ -1,200 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "code",
|
5
|
-
"execution_count": null,
|
6
|
-
"id": "5bafc01f",
|
7
|
-
"metadata": {},
|
8
|
-
"outputs": [],
|
9
|
-
"source": [
|
10
|
-
"import pandas as pd\n",
|
11
|
-
"import matplotlib.pyplot as plt\n",
|
12
|
-
"import seaborn as sns\n",
|
13
|
-
"from sklearn.model_selection import train_test_split\n",
|
14
|
-
"from sklearn.linear_model import LogisticRegression\n",
|
15
|
-
"from sklearn.metrics import accuracy_score, roc_curve, auc\n",
|
16
|
-
"from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay\n",
|
17
|
-
"from sklearn.preprocessing import MinMaxScaler\n",
|
18
|
-
"from sklearn.decomposition import PCA"
|
19
|
-
]
|
20
|
-
},
|
21
|
-
{
|
22
|
-
"cell_type": "code",
|
23
|
-
"execution_count": null,
|
24
|
-
"id": "6ef7990c",
|
25
|
-
"metadata": {},
|
26
|
-
"outputs": [],
|
27
|
-
"source": [
|
28
|
-
"df = pd.read_csv('data/magic04.data', header=None)\n",
|
29
|
-
"df.head()"
|
30
|
-
]
|
31
|
-
},
|
32
|
-
{
|
33
|
-
"cell_type": "code",
|
34
|
-
"execution_count": null,
|
35
|
-
"id": "1f1065b5",
|
36
|
-
"metadata": {},
|
37
|
-
"outputs": [],
|
38
|
-
"source": [
|
39
|
-
"df[10] = df[10].map({'g':0,'h':1})"
|
40
|
-
]
|
41
|
-
},
|
42
|
-
{
|
43
|
-
"cell_type": "code",
|
44
|
-
"execution_count": null,
|
45
|
-
"id": "5f228790",
|
46
|
-
"metadata": {},
|
47
|
-
"outputs": [],
|
48
|
-
"source": [
|
49
|
-
"sns.pairplot(df, hue=10)"
|
50
|
-
]
|
51
|
-
},
|
52
|
-
{
|
53
|
-
"cell_type": "code",
|
54
|
-
"execution_count": null,
|
55
|
-
"id": "a5e91b36",
|
56
|
-
"metadata": {},
|
57
|
-
"outputs": [],
|
58
|
-
"source": [
|
59
|
-
"X = df.drop(columns=[10])\n",
|
60
|
-
"X"
|
61
|
-
]
|
62
|
-
},
|
63
|
-
{
|
64
|
-
"cell_type": "code",
|
65
|
-
"execution_count": null,
|
66
|
-
"id": "dcbc168e",
|
67
|
-
"metadata": {},
|
68
|
-
"outputs": [],
|
69
|
-
"source": [
|
70
|
-
"X = MinMaxScaler().fit_transform(X)\n",
|
71
|
-
"X"
|
72
|
-
]
|
73
|
-
},
|
74
|
-
{
|
75
|
-
"cell_type": "code",
|
76
|
-
"execution_count": null,
|
77
|
-
"id": "a4a1e805",
|
78
|
-
"metadata": {},
|
79
|
-
"outputs": [],
|
80
|
-
"source": [
|
81
|
-
"y = df[10]\n",
|
82
|
-
"y"
|
83
|
-
]
|
84
|
-
},
|
85
|
-
{
|
86
|
-
"cell_type": "code",
|
87
|
-
"execution_count": null,
|
88
|
-
"id": "7edc3c7f",
|
89
|
-
"metadata": {},
|
90
|
-
"outputs": [],
|
91
|
-
"source": [
|
92
|
-
"y.value_counts()"
|
93
|
-
]
|
94
|
-
},
|
95
|
-
{
|
96
|
-
"cell_type": "code",
|
97
|
-
"execution_count": null,
|
98
|
-
"id": "243c084d",
|
99
|
-
"metadata": {},
|
100
|
-
"outputs": [],
|
101
|
-
"source": [
|
102
|
-
"lr = LogisticRegression(max_iter=10000, random_state=0)"
|
103
|
-
]
|
104
|
-
},
|
105
|
-
{
|
106
|
-
"cell_type": "code",
|
107
|
-
"execution_count": null,
|
108
|
-
"id": "92615b1f",
|
109
|
-
"metadata": {},
|
110
|
-
"outputs": [],
|
111
|
-
"source": [
|
112
|
-
"X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
|
113
|
-
]
|
114
|
-
},
|
115
|
-
{
|
116
|
-
"cell_type": "code",
|
117
|
-
"execution_count": null,
|
118
|
-
"id": "114a44f5",
|
119
|
-
"metadata": {},
|
120
|
-
"outputs": [],
|
121
|
-
"source": [
|
122
|
-
"lr.fit(X_train,y_train)"
|
123
|
-
]
|
124
|
-
},
|
125
|
-
{
|
126
|
-
"cell_type": "code",
|
127
|
-
"execution_count": null,
|
128
|
-
"id": "79e3ed49",
|
129
|
-
"metadata": {},
|
130
|
-
"outputs": [],
|
131
|
-
"source": [
|
132
|
-
"y_pred = lr.predict(X_test)\n",
|
133
|
-
"print(f\"Accuracy : {accuracy_score(y_test,y_pred)}\")"
|
134
|
-
]
|
135
|
-
},
|
136
|
-
{
|
137
|
-
"cell_type": "code",
|
138
|
-
"execution_count": null,
|
139
|
-
"id": "f377d0cc",
|
140
|
-
"metadata": {},
|
141
|
-
"outputs": [],
|
142
|
-
"source": [
|
143
|
-
"report = classification_report(y_test,y_pred)\n",
|
144
|
-
"print(report)"
|
145
|
-
]
|
146
|
-
},
|
147
|
-
{
|
148
|
-
"cell_type": "code",
|
149
|
-
"execution_count": null,
|
150
|
-
"id": "e42690dc",
|
151
|
-
"metadata": {},
|
152
|
-
"outputs": [],
|
153
|
-
"source": [
|
154
|
-
"cm = confusion_matrix(y_test,y_pred)\n",
|
155
|
-
"ConfusionMatrixDisplay(cm).plot()"
|
156
|
-
]
|
157
|
-
},
|
158
|
-
{
|
159
|
-
"cell_type": "code",
|
160
|
-
"execution_count": null,
|
161
|
-
"id": "bd35e7f4",
|
162
|
-
"metadata": {},
|
163
|
-
"outputs": [],
|
164
|
-
"source": [
|
165
|
-
"y_pred_proba = lr.predict_proba(X_test)[:,1]\n",
|
166
|
-
"fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba) \n",
|
167
|
-
"roc_auc = auc(fpr, tpr)\n",
|
168
|
-
"\n",
|
169
|
-
"plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)\n",
|
170
|
-
"plt.plot([0, 1], [0, 1], 'k--', label='No Skill')\n",
|
171
|
-
"plt.xlabel('False Positive Rate')\n",
|
172
|
-
"plt.ylabel('True Positive Rate')\n",
|
173
|
-
"plt.title('ROC Curve for Breast Cancer Classification')\n",
|
174
|
-
"plt.legend()\n",
|
175
|
-
"plt.show()"
|
176
|
-
]
|
177
|
-
}
|
178
|
-
],
|
179
|
-
"metadata": {
|
180
|
-
"kernelspec": {
|
181
|
-
"display_name": "Python 3 (ipykernel)",
|
182
|
-
"language": "python",
|
183
|
-
"name": "python3"
|
184
|
-
},
|
185
|
-
"language_info": {
|
186
|
-
"codemirror_mode": {
|
187
|
-
"name": "ipython",
|
188
|
-
"version": 3
|
189
|
-
},
|
190
|
-
"file_extension": ".py",
|
191
|
-
"mimetype": "text/x-python",
|
192
|
-
"name": "python",
|
193
|
-
"nbconvert_exporter": "python",
|
194
|
-
"pygments_lexer": "ipython3",
|
195
|
-
"version": "3.12.4"
|
196
|
-
}
|
197
|
-
},
|
198
|
-
"nbformat": 4,
|
199
|
-
"nbformat_minor": 5
|
200
|
-
}
|
@@ -1,112 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "code",
|
5
|
-
"execution_count": null,
|
6
|
-
"id": "0fcc8bb7-4d22-4d3b-b58a-302bb24f8f2e",
|
7
|
-
"metadata": {},
|
8
|
-
"outputs": [],
|
9
|
-
"source": [
|
10
|
-
"import itertools\n",
|
11
|
-
"import numpy as np\n",
|
12
|
-
"import pandas as pd\n",
|
13
|
-
"import matplotlib.pyplot as plt\n",
|
14
|
-
"from sklearn import linear_model,datasets\n",
|
15
|
-
"from sklearn.model_selection import train_test_split\n",
|
16
|
-
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
|
17
|
-
"\n",
|
18
|
-
"import warnings\n",
|
19
|
-
"warnings.filterwarnings('ignore')"
|
20
|
-
]
|
21
|
-
},
|
22
|
-
{
|
23
|
-
"cell_type": "code",
|
24
|
-
"execution_count": null,
|
25
|
-
"id": "d28e507b-fb15-4058-a161-656859a27c65",
|
26
|
-
"metadata": {},
|
27
|
-
"outputs": [],
|
28
|
-
"source": [
|
29
|
-
"wine = pd.read_csv('data/wine-dataset.csv')\n",
|
30
|
-
"print(\"Shape:\", wine.shape)\n",
|
31
|
-
"wine.head()"
|
32
|
-
]
|
33
|
-
},
|
34
|
-
{
|
35
|
-
"cell_type": "code",
|
36
|
-
"execution_count": null,
|
37
|
-
"id": "c4e953da-6941-43f2-a9ce-aab907876d45",
|
38
|
-
"metadata": {},
|
39
|
-
"outputs": [],
|
40
|
-
"source": [
|
41
|
-
"wine.columns"
|
42
|
-
]
|
43
|
-
},
|
44
|
-
{
|
45
|
-
"cell_type": "code",
|
46
|
-
"execution_count": null,
|
47
|
-
"id": "9ee44a66-dc4a-4c79-9dab-eec60669dd8b",
|
48
|
-
"metadata": {},
|
49
|
-
"outputs": [],
|
50
|
-
"source": [
|
51
|
-
"X = wine.iloc[:, :13]\n",
|
52
|
-
"y = wine.iloc[:, 13]"
|
53
|
-
]
|
54
|
-
},
|
55
|
-
{
|
56
|
-
"cell_type": "code",
|
57
|
-
"execution_count": null,
|
58
|
-
"id": "bd9d60dd-8272-46b4-8335-69d9751ed0c7",
|
59
|
-
"metadata": {},
|
60
|
-
"outputs": [],
|
61
|
-
"source": [
|
62
|
-
"X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.30, random_state=7)\n",
|
63
|
-
"\n",
|
64
|
-
"log_reg_model = linear_model.LogisticRegression()\n",
|
65
|
-
"log_reg_model.fit(X_train,y_train)"
|
66
|
-
]
|
67
|
-
},
|
68
|
-
{
|
69
|
-
"cell_type": "code",
|
70
|
-
"execution_count": null,
|
71
|
-
"id": "7c8fca42-c8d8-4334-9cc4-da4f5e1b0a1e",
|
72
|
-
"metadata": {},
|
73
|
-
"outputs": [],
|
74
|
-
"source": [
|
75
|
-
"log_reg_base_score = log_reg_model.score(X_test,y_test)\n",
|
76
|
-
"print(\"The score for the Logistic Regression Model is : \", log_reg_base_score)"
|
77
|
-
]
|
78
|
-
},
|
79
|
-
{
|
80
|
-
"cell_type": "code",
|
81
|
-
"execution_count": null,
|
82
|
-
"id": "61bbb23e-cb29-41ae-9ea3-82e8d465c7f2",
|
83
|
-
"metadata": {},
|
84
|
-
"outputs": [],
|
85
|
-
"source": [
|
86
|
-
"cm = confusion_matrix(y_test, log_reg_model.predict(X_test))\n",
|
87
|
-
"ConfusionMatrixDisplay(cm).plot()"
|
88
|
-
]
|
89
|
-
}
|
90
|
-
],
|
91
|
-
"metadata": {
|
92
|
-
"kernelspec": {
|
93
|
-
"display_name": "Python 3 (ipykernel)",
|
94
|
-
"language": "python",
|
95
|
-
"name": "python3"
|
96
|
-
},
|
97
|
-
"language_info": {
|
98
|
-
"codemirror_mode": {
|
99
|
-
"name": "ipython",
|
100
|
-
"version": 3
|
101
|
-
},
|
102
|
-
"file_extension": ".py",
|
103
|
-
"mimetype": "text/x-python",
|
104
|
-
"name": "python",
|
105
|
-
"nbconvert_exporter": "python",
|
106
|
-
"pygments_lexer": "ipython3",
|
107
|
-
"version": "3.12.4"
|
108
|
-
}
|
109
|
-
},
|
110
|
-
"nbformat": 4,
|
111
|
-
"nbformat_minor": 5
|
112
|
-
}
|
@@ -1,153 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "code",
|
5
|
-
"execution_count": null,
|
6
|
-
"id": "5bafc01f",
|
7
|
-
"metadata": {},
|
8
|
-
"outputs": [],
|
9
|
-
"source": [
|
10
|
-
"import pandas as pd\n",
|
11
|
-
"import matplotlib.pyplot as plt\n",
|
12
|
-
"import seaborn as sns\n",
|
13
|
-
"from sklearn.model_selection import train_test_split\n",
|
14
|
-
"from sklearn.naive_bayes import GaussianNB\n",
|
15
|
-
"from sklearn.metrics import accuracy_score, roc_curve, auc\n",
|
16
|
-
"from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay\n",
|
17
|
-
"from sklearn.preprocessing import LabelEncoder"
|
18
|
-
]
|
19
|
-
},
|
20
|
-
{
|
21
|
-
"cell_type": "code",
|
22
|
-
"execution_count": null,
|
23
|
-
"id": "6ef7990c",
|
24
|
-
"metadata": {},
|
25
|
-
"outputs": [],
|
26
|
-
"source": [
|
27
|
-
"df = pd.read_csv(\"data/agaricus-lepiota.data\", header=None)\n",
|
28
|
-
"df.head()"
|
29
|
-
]
|
30
|
-
},
|
31
|
-
{
|
32
|
-
"cell_type": "code",
|
33
|
-
"execution_count": null,
|
34
|
-
"id": "a5e91b36",
|
35
|
-
"metadata": {},
|
36
|
-
"outputs": [],
|
37
|
-
"source": [
|
38
|
-
"X = df.drop(columns=[0])\n",
|
39
|
-
"X"
|
40
|
-
]
|
41
|
-
},
|
42
|
-
{
|
43
|
-
"cell_type": "code",
|
44
|
-
"execution_count": null,
|
45
|
-
"id": "eb58da4a",
|
46
|
-
"metadata": {},
|
47
|
-
"outputs": [],
|
48
|
-
"source": [
|
49
|
-
"for col in X.columns: \n",
|
50
|
-
" X[col] = LabelEncoder().fit_transform(X[col])\n",
|
51
|
-
"X"
|
52
|
-
]
|
53
|
-
},
|
54
|
-
{
|
55
|
-
"cell_type": "code",
|
56
|
-
"execution_count": null,
|
57
|
-
"id": "a4a1e805",
|
58
|
-
"metadata": {},
|
59
|
-
"outputs": [],
|
60
|
-
"source": [
|
61
|
-
"y = df[0]\n",
|
62
|
-
"y = LabelEncoder().fit_transform(y)\n",
|
63
|
-
"y"
|
64
|
-
]
|
65
|
-
},
|
66
|
-
{
|
67
|
-
"cell_type": "code",
|
68
|
-
"execution_count": null,
|
69
|
-
"id": "114a44f5",
|
70
|
-
"metadata": {},
|
71
|
-
"outputs": [],
|
72
|
-
"source": [
|
73
|
-
"X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)\n",
|
74
|
-
"nb = GaussianNB()\n",
|
75
|
-
"nb.fit(X_train, y_train)"
|
76
|
-
]
|
77
|
-
},
|
78
|
-
{
|
79
|
-
"cell_type": "code",
|
80
|
-
"execution_count": null,
|
81
|
-
"id": "79e3ed49",
|
82
|
-
"metadata": {},
|
83
|
-
"outputs": [],
|
84
|
-
"source": [
|
85
|
-
"y_pred = nb.predict(X_test)\n",
|
86
|
-
"print(f\"Accuracy : {accuracy_score(y_test,y_pred)}\")"
|
87
|
-
]
|
88
|
-
},
|
89
|
-
{
|
90
|
-
"cell_type": "code",
|
91
|
-
"execution_count": null,
|
92
|
-
"id": "f377d0cc",
|
93
|
-
"metadata": {},
|
94
|
-
"outputs": [],
|
95
|
-
"source": [
|
96
|
-
"report = classification_report(y_test,y_pred)\n",
|
97
|
-
"print(report)"
|
98
|
-
]
|
99
|
-
},
|
100
|
-
{
|
101
|
-
"cell_type": "code",
|
102
|
-
"execution_count": null,
|
103
|
-
"id": "e42690dc",
|
104
|
-
"metadata": {},
|
105
|
-
"outputs": [],
|
106
|
-
"source": [
|
107
|
-
"cm = confusion_matrix(y_test,y_pred)\n",
|
108
|
-
"ConfusionMatrixDisplay(cm).plot()"
|
109
|
-
]
|
110
|
-
},
|
111
|
-
{
|
112
|
-
"cell_type": "code",
|
113
|
-
"execution_count": null,
|
114
|
-
"id": "bd35e7f4",
|
115
|
-
"metadata": {},
|
116
|
-
"outputs": [],
|
117
|
-
"source": [
|
118
|
-
"y_pred_proba = nb.predict_proba(X_test)[:,1]\n",
|
119
|
-
"fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba) \n",
|
120
|
-
"roc_auc = auc(fpr, tpr)\n",
|
121
|
-
"\n",
|
122
|
-
"plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)\n",
|
123
|
-
"plt.plot([0, 1], [0, 1], 'k--', label='No Skill')\n",
|
124
|
-
"plt.xlabel('False Positive Rate')\n",
|
125
|
-
"plt.ylabel('True Positive Rate')\n",
|
126
|
-
"plt.title('ROC Curve for Breast Cancer Classification')\n",
|
127
|
-
"plt.legend()\n",
|
128
|
-
"plt.show()"
|
129
|
-
]
|
130
|
-
}
|
131
|
-
],
|
132
|
-
"metadata": {
|
133
|
-
"kernelspec": {
|
134
|
-
"display_name": "Python 3 (ipykernel)",
|
135
|
-
"language": "python",
|
136
|
-
"name": "python3"
|
137
|
-
},
|
138
|
-
"language_info": {
|
139
|
-
"codemirror_mode": {
|
140
|
-
"name": "ipython",
|
141
|
-
"version": 3
|
142
|
-
},
|
143
|
-
"file_extension": ".py",
|
144
|
-
"mimetype": "text/x-python",
|
145
|
-
"name": "python",
|
146
|
-
"nbconvert_exporter": "python",
|
147
|
-
"pygments_lexer": "ipython3",
|
148
|
-
"version": "3.12.4"
|
149
|
-
}
|
150
|
-
},
|
151
|
-
"nbformat": 4,
|
152
|
-
"nbformat_minor": 5
|
153
|
-
}
|