noshot 9.0.0__py3-none-any.whl → 11.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/ML TS XAI/ML/ML Lab AllinOne.ipynb +961 -0
- {noshot-9.0.0.dist-info → noshot-11.0.0.dist-info}/METADATA +1 -1
- {noshot-9.0.0.dist-info → noshot-11.0.0.dist-info}/RECORD +6 -5
- {noshot-9.0.0.dist-info → noshot-11.0.0.dist-info}/WHEEL +1 -1
- {noshot-9.0.0.dist-info → noshot-11.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {noshot-9.0.0.dist-info → noshot-11.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,961 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "markdown",
|
5
|
+
"id": "007cdc19-d98f-4533-a27b-2ea68643fd09",
|
6
|
+
"metadata": {},
|
7
|
+
"source": [
|
8
|
+
"#### __Most plots Related to dataset with categorical variable as output are used in KNN exp__\n",
|
9
|
+
"#### __But it should also be used for relevant other experiments also like Naive Bayes, Logistic Regression__"
|
10
|
+
]
|
11
|
+
},
|
12
|
+
{
|
13
|
+
"cell_type": "markdown",
|
14
|
+
"id": "c73ca8b2-5a11-488d-97ca-160ad0ff4f18",
|
15
|
+
"metadata": {},
|
16
|
+
"source": [
|
17
|
+
"### __***PCA***__"
|
18
|
+
]
|
19
|
+
},
|
20
|
+
{
|
21
|
+
"cell_type": "code",
|
22
|
+
"execution_count": null,
|
23
|
+
"id": "e4a62599-bb3d-4b3d-8bcd-ec26893e3921",
|
24
|
+
"metadata": {},
|
25
|
+
"outputs": [],
|
26
|
+
"source": [
|
27
|
+
"import pandas as pd\n",
|
28
|
+
"import numpy as np\n",
|
29
|
+
"import seaborn as sns\n",
|
30
|
+
"import matplotlib.pyplot as plt\n",
|
31
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
32
|
+
"from sklearn.decomposition import PCA\n",
|
33
|
+
"\n",
|
34
|
+
"df = pd.read_table('data/balance-scale.csv', delimiter=',')\n",
|
35
|
+
"print(\"Shape:\", df.shape)\n",
|
36
|
+
"display(df.head())\n",
|
37
|
+
"\n",
|
38
|
+
"X = df.drop(columns='class name')\n",
|
39
|
+
"y = df['class name']\n",
|
40
|
+
"\n",
|
41
|
+
"scaled = StandardScaler().fit_transform(X)\n",
|
42
|
+
"pca = PCA(n_components=2).fit_transform(scaled)\n",
|
43
|
+
"\n",
|
44
|
+
"final = pd.DataFrame(pca, columns=['PC1', 'PC2'])\n",
|
45
|
+
"final['target'] = df['class name']\n",
|
46
|
+
"final.head()\n",
|
47
|
+
"\n",
|
48
|
+
"sns.countplot(df, x='class name', hue='class name')\n",
|
49
|
+
"plt.show()\n",
|
50
|
+
"sns.heatmap(X.corr(), cmap='Blues')\n",
|
51
|
+
"plt.show()\n",
|
52
|
+
"sns.pairplot(X)\n",
|
53
|
+
"plt.show()\n",
|
54
|
+
"sns.scatterplot(final, x='PC1', y='PC2', hue='target')\n",
|
55
|
+
"plt.show()"
|
56
|
+
]
|
57
|
+
},
|
58
|
+
{
|
59
|
+
"cell_type": "markdown",
|
60
|
+
"id": "9c779383-f645-4ee2-8bd0-4d9ce2f900fb",
|
61
|
+
"metadata": {},
|
62
|
+
"source": [
|
63
|
+
"### __***KNN***__"
|
64
|
+
]
|
65
|
+
},
|
66
|
+
{
|
67
|
+
"cell_type": "code",
|
68
|
+
"execution_count": null,
|
69
|
+
"id": "4b52d350-cc91-4505-96b5-8fad7f4eb6f7",
|
70
|
+
"metadata": {},
|
71
|
+
"outputs": [],
|
72
|
+
"source": [
|
73
|
+
"import numpy as np\n",
|
74
|
+
"import pandas as pd\n",
|
75
|
+
"import seaborn as sns\n",
|
76
|
+
"import matplotlib.pyplot as plt\n",
|
77
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
78
|
+
"from sklearn.model_selection import train_test_split, cross_val_score\n",
|
79
|
+
"from sklearn.decomposition import PCA\n",
|
80
|
+
"from sklearn.neighbors import KNeighborsClassifier\n",
|
81
|
+
"from sklearn.metrics import confusion_matrix, classification_report\n",
|
82
|
+
"from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay\n",
|
83
|
+
"from mlxtend.plotting import plot_decision_regions\n",
|
84
|
+
"from scipy.cluster.hierarchy import dendrogram, linkage\n",
|
85
|
+
"from scipy.spatial import Voronoi, voronoi_plot_2d\n",
|
86
|
+
"\n",
|
87
|
+
"df = pd.read_csv(\"data/sobar-72.csv\")\n",
|
88
|
+
"print(\"Shape:\", df.shape)\n",
|
89
|
+
"display(df.head())\n",
|
90
|
+
"\n",
|
91
|
+
"X = df.drop(columns='ca_cervix')\n",
|
92
|
+
"y = df['ca_cervix']\n",
|
93
|
+
"\n",
|
94
|
+
"X_scaled = StandardScaler().fit_transform(X)\n",
|
95
|
+
"X_pca = PCA(n_components=2).fit_transform(X_scaled)\n",
|
96
|
+
"X_train, X_test, y_train, y_test = train_test_split(X_pca, y, \n",
|
97
|
+
" test_size=0.4,\n",
|
98
|
+
" random_state=4)\n",
|
99
|
+
"\n",
|
100
|
+
"knn = KNeighborsClassifier(n_neighbors=15, metric='euclidean') \n",
|
101
|
+
"#can be ['euclidean', 'manhattan', 'minkowski']\n",
|
102
|
+
"knn.fit(X_train, y_train)\n",
|
103
|
+
"y_pred = knn.predict(X_test)\n",
|
104
|
+
"\n",
|
105
|
+
"sns.countplot(df, x='ca_cervix', hue='ca_cervix')\n",
|
106
|
+
"plt.show()\n",
|
107
|
+
"\n",
|
108
|
+
"sns.pairplot(df.iloc[:,:4])\n",
|
109
|
+
"plt.show()\n",
|
110
|
+
"\n",
|
111
|
+
"# dont use this mlxtend module may not be installed in lab\n",
|
112
|
+
"# ========================================================\n",
|
113
|
+
"plot_decision_regions(X_train, y_train.values, clf=knn, legend=2)\n",
|
114
|
+
"plt.xlabel('X')\n",
|
115
|
+
"plt.ylabel('Y')\n",
|
116
|
+
"plt.title(f'KNN with K=5 using Euclidean Distance')\n",
|
117
|
+
"plt.show()\n",
|
118
|
+
"# ========================================================\n",
|
119
|
+
"print(classification_report(y_test, y_pred))\n",
|
120
|
+
"print(\"Accuracy\", accuracy_score(y_test, y_pred))\n",
|
121
|
+
"ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()\n",
|
122
|
+
"plt.show()\n",
|
123
|
+
"\n",
|
124
|
+
"errors = [1 - cross_val_score(KNeighborsClassifier(n_neighbors=k), \n",
|
125
|
+
" X_train, y_train).mean() for k in range(1, 21)]\n",
|
126
|
+
"plt.plot(range(1, 21), errors, marker='o')\n",
|
127
|
+
"plt.title(\"Elbow Method for Optimal k\")\n",
|
128
|
+
"plt.xlabel(\"k\")\n",
|
129
|
+
"plt.ylabel(\"Error\")\n",
|
130
|
+
"plt.show()\n",
|
131
|
+
"\n",
|
132
|
+
"vor = Voronoi(X_pca)\n",
|
133
|
+
"voronoi_plot_2d(vor, show_vertices=False)\n",
|
134
|
+
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)\n",
|
135
|
+
"plt.show()\n",
|
136
|
+
"\n",
|
137
|
+
"# use this instead for decision boundary graph\n",
|
138
|
+
"x_min, x_max = X_pca[:,0].min() - 1, X_pca[:,0].max() + 1\n",
|
139
|
+
"y_min, y_max = X_pca[:,1].min() - 1, X_pca[:,1].max() + 1\n",
|
140
|
+
"xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), \n",
|
141
|
+
" np.arange(y_min, y_max, 0.02))\n",
|
142
|
+
"\n",
|
143
|
+
"Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)\n",
|
144
|
+
"\n",
|
145
|
+
"plt.figure()\n",
|
146
|
+
"plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)\n",
|
147
|
+
"plt.scatter(X_pca[:,0], X_pca[:,1], c=y, cmap=plt.cm.coolwarm, s=20, \n",
|
148
|
+
" edgecolors='k')\n",
|
149
|
+
"plt.title('Decision surface')\n",
|
150
|
+
"plt.show()"
|
151
|
+
]
|
152
|
+
},
|
153
|
+
{
|
154
|
+
"cell_type": "markdown",
|
155
|
+
"id": "b9a6637d-95a0-45c0-8527-0febe5ce29b4",
|
156
|
+
"metadata": {},
|
157
|
+
"source": [
|
158
|
+
"### __***LDA***__"
|
159
|
+
]
|
160
|
+
},
|
161
|
+
{
|
162
|
+
"cell_type": "code",
|
163
|
+
"execution_count": null,
|
164
|
+
"id": "6fcb1964-95eb-4d1a-9961-8cf3fe9961e4",
|
165
|
+
"metadata": {},
|
166
|
+
"outputs": [],
|
167
|
+
"source": [
|
168
|
+
"import pandas as pd\n",
|
169
|
+
"import matplotlib.pyplot as plt\n",
|
170
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
171
|
+
"from sklearn.model_selection import train_test_split\n",
|
172
|
+
"from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA\n",
|
173
|
+
"from sklearn.metrics import accuracy_score\n",
|
174
|
+
"\n",
|
175
|
+
"df = pd.read_csv(r\"data\\doctor-visits.csv\")\n",
|
176
|
+
"print(\"Shape:\", df.shape)\n",
|
177
|
+
"display(df.head())\n",
|
178
|
+
"\n",
|
179
|
+
"X = df.drop(columns=['Number of Doctors Visited'])\n",
|
180
|
+
"y = df['Number of Doctors Visited']\n",
|
181
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, \n",
|
182
|
+
" random_state=4)\n",
|
183
|
+
"\n",
|
184
|
+
"lda = LDA(n_components=2)\n",
|
185
|
+
"X_train = lda.fit_transform(X_train, y_train)\n",
|
186
|
+
"X_test = lda.fit_transform(X_test, y_test)\n",
|
187
|
+
"\n",
|
188
|
+
"lda.fit(X_train,y_train)\n",
|
189
|
+
"y_pred = lda.predict(X_test)\n",
|
190
|
+
"print (\"Accuracy:\", accuracy_score(y_test, y_pred))\n",
|
191
|
+
"\n",
|
192
|
+
"plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='viridis')\n",
|
193
|
+
"plt.title('LDA Dimensionality Reduction')\n",
|
194
|
+
"plt.xlabel('LDA Component 1')\n",
|
195
|
+
"plt.ylabel('LDA Component 2')\n",
|
196
|
+
"plt.show()"
|
197
|
+
]
|
198
|
+
},
|
199
|
+
{
|
200
|
+
"cell_type": "markdown",
|
201
|
+
"id": "468327ab-beff-425b-ba7a-3e2b373ba9ae",
|
202
|
+
"metadata": {},
|
203
|
+
"source": [
|
204
|
+
"### __***Linear Regression***__"
|
205
|
+
]
|
206
|
+
},
|
207
|
+
{
|
208
|
+
"cell_type": "code",
|
209
|
+
"execution_count": null,
|
210
|
+
"id": "2511f5c4-b720-4889-b1f3-ca92266bf09b",
|
211
|
+
"metadata": {},
|
212
|
+
"outputs": [],
|
213
|
+
"source": [
|
214
|
+
"import pandas as pd\n",
|
215
|
+
"import matplotlib.pyplot as plt\n",
|
216
|
+
"from sklearn.model_selection import train_test_split\n",
|
217
|
+
"from sklearn.linear_model import LinearRegression\n",
|
218
|
+
"from sklearn.metrics import r2_score, mean_squared_error\n",
|
219
|
+
"\n",
|
220
|
+
"df = pd.read_excel(\"data/real-estate.xlsx\")\n",
|
221
|
+
"print(\"Shape:\", df.shape)\n",
|
222
|
+
"display(df.head())\n",
|
223
|
+
"\n",
|
224
|
+
"X = df[['X5 latitude']].values\n",
|
225
|
+
"y = df['Y house price of unit area'].values\n",
|
226
|
+
"\n",
|
227
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
|
228
|
+
"\n",
|
229
|
+
"model = LinearRegression().fit(X_train, y_train)\n",
|
230
|
+
"y_train_pred = model.predict(X_train)\n",
|
231
|
+
"y_test_pred = model.predict(X_test)\n",
|
232
|
+
"\n",
|
233
|
+
"sns.heatmap(df.corr(), annot=True)\n",
|
234
|
+
"plt.show()\n",
|
235
|
+
"\n",
|
236
|
+
"plt.figure(figsize=(10,5))\n",
|
237
|
+
"plt.scatter(X_train, y_train, label='Train Data Points', edgecolor='k')\n",
|
238
|
+
"plt.scatter(X_test, y_test, label='Test Data Points', edgecolor='k')\n",
|
239
|
+
"plt.plot(X_train, model.predict(X_train), color='red', \n",
|
240
|
+
" label='Linear Regression Line')\n",
|
241
|
+
"plt.legend()\n",
|
242
|
+
"plt.grid()\n",
|
243
|
+
"plt.show()\n",
|
244
|
+
"\n",
|
245
|
+
"plt.figure(figsize=(10,5))\n",
|
246
|
+
"plt.scatter(X_test, y_test, label='Test Data Points', edgecolor='k')\n",
|
247
|
+
"plt.plot(X_test, model.predict(X_test), color='red', \n",
|
248
|
+
" label='Linear Regression Line')\n",
|
249
|
+
"for i in range(len(X_test)):\n",
|
250
|
+
" plt.plot((X_test[i], X_test[i]), (y_test[i], y_test_pred[i]), \n",
|
251
|
+
" color='blue', linestyle='--')\n",
|
252
|
+
"plt.legend()\n",
|
253
|
+
"plt.grid()\n",
|
254
|
+
"plt.show()\n",
|
255
|
+
"\n",
|
256
|
+
"print(\"Train MSE:\", mean_squared_error(y_train, y_train_pred))\n",
|
257
|
+
"print(\"Train R2 Score:\", r2_score(y_train, y_train_pred))\n",
|
258
|
+
"print(\"Test MSE:\", mean_squared_error(y_test, y_test_pred))\n",
|
259
|
+
"print(\"Test R2 Score:\", r2_score(y_test, y_test_pred))"
|
260
|
+
]
|
261
|
+
},
|
262
|
+
{
|
263
|
+
"cell_type": "markdown",
|
264
|
+
"id": "c4a1c9ec-ca2e-483c-961b-2214c6f966d2",
|
265
|
+
"metadata": {},
|
266
|
+
"source": [
|
267
|
+
"### __***Logistic Regression***__"
|
268
|
+
]
|
269
|
+
},
|
270
|
+
{
|
271
|
+
"cell_type": "code",
|
272
|
+
"execution_count": null,
|
273
|
+
"id": "39fd591b-1bf3-48ad-83df-7a0832fe5742",
|
274
|
+
"metadata": {},
|
275
|
+
"outputs": [],
|
276
|
+
"source": [
|
277
|
+
"import pandas as pd\n",
|
278
|
+
"import numpy as np\n",
|
279
|
+
"import seaborn as sns\n",
|
280
|
+
"import matplotlib.pyplot as plt\n",
|
281
|
+
"from sklearn.preprocessing import MinMaxScaler, LabelEncoder\n",
|
282
|
+
"from sklearn.model_selection import train_test_split\n",
|
283
|
+
"from sklearn.decomposition import PCA\n",
|
284
|
+
"from sklearn.linear_model import LogisticRegression\n",
|
285
|
+
"from sklearn.metrics import accuracy_score, classification_report, auc\n",
|
286
|
+
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve\n",
|
287
|
+
"\n",
|
288
|
+
"df = pd.read_csv('data/magic04.data', header=None)\n",
|
289
|
+
"display(df.head())\n",
|
290
|
+
"\n",
|
291
|
+
"X = MinMaxScaler().fit_transform(df.drop(columns=[10]))\n",
|
292
|
+
"X = PCA(n_components=1).fit_transform(X)\n",
|
293
|
+
"y = LabelEncoder().fit_transform(df[10]) # Convert 'g'/'h' to 0/1\n",
|
294
|
+
"\n",
|
295
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, \n",
|
296
|
+
" stratify=y)\n",
|
297
|
+
"lr = LogisticRegression(max_iter=5000, random_state=0)\n",
|
298
|
+
"lr.fit(X_train, y_train)\n",
|
299
|
+
"y_pred = lr.predict(X_test)\n",
|
300
|
+
"\n",
|
301
|
+
"print(f\"Accuracy: {accuracy_score(y_test, y_pred)}\")\n",
|
302
|
+
"print(classification_report(y_test, y_pred))\n",
|
303
|
+
"cm = confusion_matrix(y_test, y_pred)\n",
|
304
|
+
"ConfusionMatrixDisplay(cm, display_labels=['g', 'h']).plot()\n",
|
305
|
+
"plt.show()\n",
|
306
|
+
"\n",
|
307
|
+
"y_pred_proba = lr.predict_proba(X_test)[:, 1]\n",
|
308
|
+
"fpr, tpr, _ = roc_curve(y_test, y_pred_proba, pos_label=1)\n",
|
309
|
+
"roc_auc = auc(fpr, tpr)\n",
|
310
|
+
"plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')\n",
|
311
|
+
"plt.plot([0, 1], [0, 1], 'k--', label='No Skill')\n",
|
312
|
+
"plt.xlabel('False Positive Rate')\n",
|
313
|
+
"plt.ylabel('True Positive Rate')\n",
|
314
|
+
"plt.title('ROC Curve')\n",
|
315
|
+
"plt.legend()\n",
|
316
|
+
"plt.show()\n",
|
317
|
+
"\n",
|
318
|
+
"# may not proper s curve because of low model accuracy\n",
|
319
|
+
"# ====================================================\n",
|
320
|
+
"plt.figure(figsize=(10, 6))\n",
|
321
|
+
"plt.scatter(X, y, color='red', label='Data points (g = 1, h = 0)')\n",
|
322
|
+
"x_values = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)\n",
|
323
|
+
"y_values = lr.predict_proba(x_values)[:, 1] \n",
|
324
|
+
"plt.plot(x_values, y_values, color='blue', label='Logistic Regression S-Curve')\n",
|
325
|
+
"plt.axhline(0.5, color='green', linestyle='--', label='Threshold (0.5)')\n",
|
326
|
+
"plt.title('Logistic Regression: X(PCA Transformed) vs Probability of g')\n",
|
327
|
+
"plt.legend()\n",
|
328
|
+
"plt.grid()\n",
|
329
|
+
"plt.show()"
|
330
|
+
]
|
331
|
+
},
|
332
|
+
{
|
333
|
+
"cell_type": "markdown",
|
334
|
+
"id": "9a251e04-2e7b-4a94-921e-9fd3f18984e7",
|
335
|
+
"metadata": {},
|
336
|
+
"source": [
|
337
|
+
"### __***Naive Bayes***__"
|
338
|
+
]
|
339
|
+
},
|
340
|
+
{
|
341
|
+
"cell_type": "code",
|
342
|
+
"execution_count": null,
|
343
|
+
"id": "985bcacf-bcee-49ef-9c59-ede381fef350",
|
344
|
+
"metadata": {},
|
345
|
+
"outputs": [],
|
346
|
+
"source": [
|
347
|
+
"import pandas as pd\n",
|
348
|
+
"import matplotlib.pyplot as plt\n",
|
349
|
+
"from sklearn.preprocessing import LabelEncoder\n",
|
350
|
+
"from sklearn.model_selection import train_test_split\n",
|
351
|
+
"from sklearn.naive_bayes import GaussianNB\n",
|
352
|
+
"from sklearn.metrics import accuracy_score, classification_report, roc_curve\n",
|
353
|
+
"from sklearn.metrics import ConfusionMatrixDisplay, auc, confusion_matrix\n",
|
354
|
+
"\n",
|
355
|
+
"df = pd.read_csv(\"data/agaricus-lepiota.data\", header=None)\n",
|
356
|
+
"display(df.head())\n",
|
357
|
+
"\n",
|
358
|
+
"df = df.apply(LabelEncoder().fit_transform)\n",
|
359
|
+
"X = df.drop(columns=[0])\n",
|
360
|
+
"y = df[0]\n",
|
361
|
+
"\n",
|
362
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, \n",
|
363
|
+
" stratify=y)\n",
|
364
|
+
"\n",
|
365
|
+
"nb = GaussianNB()\n",
|
366
|
+
"nb.fit(X_train, y_train)\n",
|
367
|
+
"y_pred = nb.predict(X_test)\n",
|
368
|
+
"\n",
|
369
|
+
"print(f\"Accuracy: {accuracy_score(y_test, y_pred)}\")\n",
|
370
|
+
"print(classification_report(y_test, y_pred))\n",
|
371
|
+
"ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()\n",
|
372
|
+
"plt.show()\n",
|
373
|
+
"\n",
|
374
|
+
"y_pred_proba = nb.predict_proba(X_test)[:, 1]\n",
|
375
|
+
"fpr, tpr, _ = roc_curve(y_test, y_pred_proba)\n",
|
376
|
+
"roc_auc = auc(fpr, tpr)\n",
|
377
|
+
"plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')\n",
|
378
|
+
"plt.plot([0, 1], [0, 1], 'k--', label='No Skill')\n",
|
379
|
+
"plt.xlabel('False Positive Rate')\n",
|
380
|
+
"plt.ylabel('True Positive Rate')\n",
|
381
|
+
"plt.title('ROC Curve for Agaricus-Lepiota Classification')\n",
|
382
|
+
"plt.legend()\n",
|
383
|
+
"plt.show()"
|
384
|
+
]
|
385
|
+
},
|
386
|
+
{
|
387
|
+
"cell_type": "markdown",
|
388
|
+
"id": "77d7fd14-037a-4e11-b248-c3a7f136a9fc",
|
389
|
+
"metadata": {},
|
390
|
+
"source": [
|
391
|
+
"### __***SVM (Linear & Non-Linear)***__"
|
392
|
+
]
|
393
|
+
},
|
394
|
+
{
|
395
|
+
"cell_type": "code",
|
396
|
+
"execution_count": null,
|
397
|
+
"id": "17f1e639-d8ae-41f9-9ba7-1c489e122ff0",
|
398
|
+
"metadata": {},
|
399
|
+
"outputs": [],
|
400
|
+
"source": [
|
401
|
+
"import pandas as pd\n",
|
402
|
+
"import matplotlib.pyplot as plt\n",
|
403
|
+
"from sklearn.model_selection import train_test_split\n",
|
404
|
+
"from sklearn.metrics import accuracy_score, classification_report\n",
|
405
|
+
"from sklearn.metrics import ConfusionMatrixDisplay, roc_curve, auc\n",
|
406
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
407
|
+
"from sklearn.decomposition import PCA\n",
|
408
|
+
"from sklearn.svm import SVC\n",
|
409
|
+
"from mlxtend.plotting import plot_decision_regions\n",
|
410
|
+
"from scipy.io import arff\n",
|
411
|
+
"\n",
|
412
|
+
"data = pd.DataFrame(arff.loadarff(\"data/rice.arff\")[0])\n",
|
413
|
+
"data['Class'] = data['Class'].map({b'Cammeo': 0, b'Osmancik': 1})\n",
|
414
|
+
"\n",
|
415
|
+
"X = PCA(n_components=2).fit_transform(data.drop('Class', axis=1))\n",
|
416
|
+
"y = data['Class']\n",
|
417
|
+
"\n",
|
418
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, \n",
|
419
|
+
" random_state=42)\n",
|
420
|
+
"\n",
|
421
|
+
"scaler = StandardScaler()\n",
|
422
|
+
"X_train, X_test = scaler.fit_transform(X_train), scaler.transform(X_test)\n",
|
423
|
+
"\n",
|
424
|
+
"# kernel options: 'linear', 'rbf', 'poly', 'sigmoid'\n",
|
425
|
+
"kernel = 'linear'\n",
|
426
|
+
"\n",
|
427
|
+
"# gamma should be:\n",
|
428
|
+
"# - 'auto' for linear kernel\n",
|
429
|
+
"# - 'scale' or 'auto' for rbf, poly, sigmoid kernels\n",
|
430
|
+
"model = SVC(kernel=kernel, C=1, degree=5, \n",
|
431
|
+
" gamma='auto' if kernel == 'linear' else 'scale', probability=True)\n",
|
432
|
+
"model.fit(X_train, y_train)\n",
|
433
|
+
"y_pred = model.predict(X_test)\n",
|
434
|
+
"\n",
|
435
|
+
"print(f\"\\nSVM ({kernel}) Accuracy: {accuracy_score(y_test, y_pred):.2f}\")\n",
|
436
|
+
"print(classification_report(y_test, y_pred))\n",
|
437
|
+
"\n",
|
438
|
+
"plot_decision_regions(X_train, y_train.values, clf=model, legend=2)\n",
|
439
|
+
"plt.title(f'Decision Boundary ({kernel})')\n",
|
440
|
+
"plt.show()\n",
|
441
|
+
"\n",
|
442
|
+
"ConfusionMatrixDisplay.from_predictions(y_test, y_pred, display_labels=[0, 1])\n",
|
443
|
+
"plt.title(f'Confusion Matrix ({kernel})')\n",
|
444
|
+
"plt.show()\n",
|
445
|
+
"\n",
|
446
|
+
"proba = model.predict_proba(X_test)[:, 1]\n",
|
447
|
+
"fpr, tpr, _ = roc_curve(y_test, proba)\n",
|
448
|
+
"plt.plot(fpr, tpr, label=f'{kernel} (AUC = {auc(fpr, tpr):.2f})')\n",
|
449
|
+
"plt.plot([0, 1], [0, 1], 'k--')\n",
|
450
|
+
"plt.xlabel('False Positive Rate')\n",
|
451
|
+
"plt.ylabel('True Positive Rate')\n",
|
452
|
+
"plt.title(f'ROC Curve ({kernel})')\n",
|
453
|
+
"plt.legend()\n",
|
454
|
+
"plt.show()"
|
455
|
+
]
|
456
|
+
},
|
457
|
+
{
|
458
|
+
"cell_type": "markdown",
|
459
|
+
"id": "3047d014-1588-4ba3-b122-8a1e009db945",
|
460
|
+
"metadata": {},
|
461
|
+
"source": [
|
462
|
+
"### __***Feed Forward - Classification (Output Categorical)***__"
|
463
|
+
]
|
464
|
+
},
|
465
|
+
{
|
466
|
+
"cell_type": "code",
|
467
|
+
"execution_count": null,
|
468
|
+
"id": "02cd39c1-edaa-4f18-9298-68e111cf45ce",
|
469
|
+
"metadata": {},
|
470
|
+
"outputs": [],
|
471
|
+
"source": [
|
472
|
+
"import warnings\n",
|
473
|
+
"warnings.filterwarnings('ignore')"
|
474
|
+
]
|
475
|
+
},
|
476
|
+
{
|
477
|
+
"cell_type": "code",
|
478
|
+
"execution_count": null,
|
479
|
+
"id": "8d6b8e4d-e851-4874-af1f-2da2346a9aa3",
|
480
|
+
"metadata": {},
|
481
|
+
"outputs": [],
|
482
|
+
"source": [
|
483
|
+
"import numpy as np\n",
|
484
|
+
"import pandas as pd\n",
|
485
|
+
"import matplotlib.pyplot as plt\n",
|
486
|
+
"import tensorflow as tf\n",
|
487
|
+
"from sklearn.model_selection import train_test_split\n",
|
488
|
+
"from sklearn.metrics import classification_report, accuracy_score \n",
|
489
|
+
"from sklearn.metrics import ConfusionMatrixDisplay, roc_curve, auc\n",
|
490
|
+
"from tensorflow.keras.models import Sequential\n",
|
491
|
+
"from tensorflow.keras.layers import Dense, Dropout\n",
|
492
|
+
"from tensorflow.keras.optimizers import Adam\n",
|
493
|
+
"from tensorflow.keras import regularizers\n",
|
494
|
+
"\n",
|
495
|
+
"df = pd.read_csv(\"data/sobar-72.csv\")\n",
|
496
|
+
"print(\"Shape:\",df.shape)\n",
|
497
|
+
"display(df.head())\n",
|
498
|
+
"\n",
|
499
|
+
"X = df.drop(columns=['ca_cervix'])\n",
|
500
|
+
"y = tf.keras.utils.to_categorical(df['ca_cervix'], num_classes=2)\n",
|
501
|
+
"\n",
|
502
|
+
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, \n",
|
503
|
+
" random_state=42)\n",
|
504
|
+
"\n",
|
505
|
+
"# change l2 (Ridge) to l1 for Lasso regularization\n",
|
506
|
+
"model = Sequential([\n",
|
507
|
+
" Dense(128, activation='relu', input_shape=(19,), \n",
|
508
|
+
" kernel_regularizer=regularizers.l2(0.001)), # if no regularizer remove this\n",
|
509
|
+
" Dropout(0.3),\n",
|
510
|
+
" Dense(64, activation='relu', \n",
|
511
|
+
" kernel_regularizer=regularizers.l2(0.001)), # if no regularizer remove this\n",
|
512
|
+
" Dropout(0.3),\n",
|
513
|
+
" Dense(2, activation='softmax') # makes it classification\n",
|
514
|
+
"])\n",
|
515
|
+
"\n",
|
516
|
+
"model.compile(optimizer=Adam(), loss='categorical_crossentropy', \n",
|
517
|
+
" metrics=['accuracy'])\n",
|
518
|
+
"\n",
|
519
|
+
"history = model.fit(x_train, y_train, epochs=50, batch_size=32, \n",
|
520
|
+
" validation_split=0.2, verbose=1)\n",
|
521
|
+
"\n",
|
522
|
+
"def plot_history(history):\n",
|
523
|
+
" plt.figure(figsize=(12, 5))\n",
|
524
|
+
" plt.subplot(1, 2, 1)\n",
|
525
|
+
" plt.plot(history.history['loss'], label='Training Loss')\n",
|
526
|
+
" plt.plot(history.history['val_loss'], label='Validation Loss')\n",
|
527
|
+
" plt.title('Loss')\n",
|
528
|
+
" plt.subplot(1, 2, 2)\n",
|
529
|
+
" plt.plot(history.history['accuracy'], label='Training Accuracy')\n",
|
530
|
+
" plt.plot(history.history['val_accuracy'], label='Validation Accuracy')\n",
|
531
|
+
" plt.title('Accuracy')\n",
|
532
|
+
" plt.tight_layout()\n",
|
533
|
+
" plt.show()\n",
|
534
|
+
"\n",
|
535
|
+
"plot_history(history)\n",
|
536
|
+
"\n",
|
537
|
+
"loss, accuracy = model.evaluate(x_test, y_test)\n",
|
538
|
+
"print(f'Test accuracy: {accuracy:.4f}, Test loss: {loss:.4f}')\n",
|
539
|
+
"\n",
|
540
|
+
"y_pred = np.argmax(model.predict(x_test), axis=1)\n",
|
541
|
+
"y_test = np.argmax(y_test, axis=1)\n",
|
542
|
+
"\n",
|
543
|
+
"print(\"Classification Report:\")\n",
|
544
|
+
"print(classification_report(y_test, y_pred))\n",
|
545
|
+
"print(f\"Accuracy: {accuracy_score(y_test, y_pred):.2f}\")\n",
|
546
|
+
"ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap='Blues')\n",
|
547
|
+
"plt.show()\n",
|
548
|
+
"\n",
|
549
|
+
"y_pred_proba = model.predict(x_test)[:, 1]\n",
|
550
|
+
"fpr, tpr, _ = roc_curve(y_test, y_pred_proba)\n",
|
551
|
+
"plt.plot(fpr, tpr, label=f'ROC curve (area = {auc(fpr, tpr):.2f})')\n",
|
552
|
+
"plt.plot([0, 1], [0, 1], color='navy', linestyle='--')\n",
|
553
|
+
"plt.xlabel('False Positive Rate')\n",
|
554
|
+
"plt.ylabel('True Positive Rate')\n",
|
555
|
+
"plt.title('Receiver Operating Characteristic')\n",
|
556
|
+
"plt.legend(loc=\"lower right\")\n",
|
557
|
+
"plt.show()"
|
558
|
+
]
|
559
|
+
},
|
560
|
+
{
|
561
|
+
"cell_type": "markdown",
|
562
|
+
"id": "86c71922-b7d4-4ae8-ae4e-0f19b4b74af7",
|
563
|
+
"metadata": {},
|
564
|
+
"source": [
|
565
|
+
"### __***Feed Forward - Regression (Output Numerical)***__"
|
566
|
+
]
|
567
|
+
},
|
568
|
+
{
|
569
|
+
"cell_type": "code",
|
570
|
+
"execution_count": null,
|
571
|
+
"id": "ac35d507-57d3-4ea6-a535-3df21c0b4dcd",
|
572
|
+
"metadata": {},
|
573
|
+
"outputs": [],
|
574
|
+
"source": [
|
575
|
+
"import warnings\n",
|
576
|
+
"warnings.filterwarnings('ignore')"
|
577
|
+
]
|
578
|
+
},
|
579
|
+
{
|
580
|
+
"cell_type": "code",
|
581
|
+
"execution_count": null,
|
582
|
+
"id": "02e338ba-2fa5-4187-8337-a4db19af2faf",
|
583
|
+
"metadata": {},
|
584
|
+
"outputs": [],
|
585
|
+
"source": [
|
586
|
+
"import numpy as np\n",
|
587
|
+
"import matplotlib.pyplot as plt\n",
|
588
|
+
"from sklearn.model_selection import train_test_split\n",
|
589
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
590
|
+
"from sklearn.metrics import accuracy_score, mean_squared_error\n",
|
591
|
+
"import tensorflow as tf\n",
|
592
|
+
"from tensorflow.keras.models import Sequential\n",
|
593
|
+
"from tensorflow.keras.layers import Dense\n",
|
594
|
+
"\n",
|
595
|
+
"df = pd.read_csv('data/california.csv')\n",
|
596
|
+
"display(df.head())\n",
|
597
|
+
"\n",
|
598
|
+
"X = df.drop(columns='target')\n",
|
599
|
+
"y = df['target']\n",
|
600
|
+
"\n",
|
601
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, \n",
|
602
|
+
" random_state=42)\n",
|
603
|
+
"\n",
|
604
|
+
"scaler = StandardScaler()\n",
|
605
|
+
"X_train = scaler.fit_transform(X_train)\n",
|
606
|
+
"X_test = scaler.transform(X_test)\n",
|
607
|
+
"\n",
|
608
|
+
"# Simple FNN/MLP model\n",
|
609
|
+
"model = Sequential([\n",
|
610
|
+
" Dense(32, activation='relu', input_shape=(X_train.shape[1],)),\n",
|
611
|
+
" Dense(16, activation='relu'),\n",
|
612
|
+
" Dense(1) # makes its regression\n",
|
613
|
+
"])\n",
|
614
|
+
"\n",
|
615
|
+
"model.compile(optimizer='adam', loss='mse')\n",
|
616
|
+
"\n",
|
617
|
+
"history = model.fit(X_train, y_train, epochs=25, validation_split=0.2, verbose=1)\n",
|
618
|
+
"\n",
|
619
|
+
"y_pred = model.predict(X_test).flatten()\n",
|
620
|
+
"mse = mean_squared_error(y_test, y_pred)\n",
|
621
|
+
"print(f\"Regression MSE on California Housing dataset: {mse:.4f}\")\n",
|
622
|
+
"\n",
|
623
|
+
"plt.plot(history.history['loss'], label='Train Loss')\n",
|
624
|
+
"plt.plot(history.history['val_loss'], label='Val Loss')\n",
|
625
|
+
"plt.title('Regression Loss')\n",
|
626
|
+
"plt.xlabel('Epochs')\n",
|
627
|
+
"plt.ylabel('MSE Loss')\n",
|
628
|
+
"plt.legend()\n",
|
629
|
+
"\n",
|
630
|
+
"plt.tight_layout()\n",
|
631
|
+
"plt.show()"
|
632
|
+
]
|
633
|
+
},
|
634
|
+
{
|
635
|
+
"cell_type": "markdown",
|
636
|
+
"id": "5253df17-9602-4e1e-adec-df37b95a87f0",
|
637
|
+
"metadata": {},
|
638
|
+
"source": [
|
639
|
+
"### __***MLP (FNN from scratch above code or learn this)***__"
|
640
|
+
]
|
641
|
+
},
|
642
|
+
{
|
643
|
+
"cell_type": "code",
|
644
|
+
"execution_count": null,
|
645
|
+
"id": "886ac28f-4952-4a26-bd2f-912cac7e8107",
|
646
|
+
"metadata": {},
|
647
|
+
"outputs": [],
|
648
|
+
"source": [
|
649
|
+
"import pandas as pd\n",
|
650
|
+
"from sklearn.preprocessing import LabelEncoder\n",
|
651
|
+
"from sklearn.model_selection import train_test_split\n",
|
652
|
+
"from sklearn.metrics import classification_report, confusion_matrix\n",
|
653
|
+
"from sklearn.metrics import ConfusionMatrixDisplay\n",
|
654
|
+
"from sklearn.neural_network import MLPClassifier\n",
|
655
|
+
"\n",
|
656
|
+
"df = pd.read_csv('data/HeartDiseaseTrain-Test.csv')\n",
|
657
|
+
"display(df.head())\n",
|
658
|
+
"\n",
|
659
|
+
"X = df.drop('target', axis=1)\n",
|
660
|
+
"X = X.apply(LabelEncoder().fit_transform)\n",
|
661
|
+
"\n",
|
662
|
+
"X = X / X.max() #normalize features\n",
|
663
|
+
"y = df['target']\n",
|
664
|
+
"\n",
|
665
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, \n",
|
666
|
+
" random_state=1)\n",
|
667
|
+
"\n",
|
668
|
+
"# Using 'relu' activation; others to try: 'identity', 'tanh', 'logistic'\n",
|
669
|
+
"activation = 'relu'\n",
|
670
|
+
"hidden_layers = (8, 8, 8)\n",
|
671
|
+
"\n",
|
672
|
+
"print(f\"\\nActivation: {activation}, Hidden Layers: {hidden_layers}\")\n",
|
673
|
+
"model = MLPClassifier(hidden_layer_sizes=hidden_layers, activation=activation,\n",
|
674
|
+
" solver='adam', max_iter=500, random_state=42)\n",
|
675
|
+
"model.fit(X_train, y_train)\n",
|
676
|
+
"\n",
|
677
|
+
"preds_train = model.predict(X_train)\n",
|
678
|
+
"print(\"\\nTrain Results:\")\n",
|
679
|
+
"print(classification_report(y_train, preds_train, zero_division=0))\n",
|
680
|
+
"ConfusionMatrixDisplay.from_predictions(y_train, preds_train)\n",
|
681
|
+
"plt.show()\n",
|
682
|
+
"\n",
|
683
|
+
"preds_test = model.predict(X_test)\n",
|
684
|
+
"print(\"\\nTest Results:\")\n",
|
685
|
+
"print(classification_report(y_test, preds_test, zero_division=0))\n",
|
686
|
+
"ConfusionMatrixDisplay.from_predictions(y_test, preds_test)\n",
|
687
|
+
"plt.show()"
|
688
|
+
]
|
689
|
+
},
|
690
|
+
{
|
691
|
+
"cell_type": "markdown",
|
692
|
+
"id": "625ac47b-8d21-4e64-9729-791067f4edca",
|
693
|
+
"metadata": {},
|
694
|
+
"source": [
|
695
|
+
"### __***CNN***__"
|
696
|
+
]
|
697
|
+
},
|
698
|
+
{
|
699
|
+
"cell_type": "code",
|
700
|
+
"execution_count": null,
|
701
|
+
"id": "6e34cf42-1b2b-49a9-81c0-c66e380122b0",
|
702
|
+
"metadata": {},
|
703
|
+
"outputs": [],
|
704
|
+
"source": [
|
705
|
+
"import warnings\n",
|
706
|
+
"warnings.filterwarnings('ignore')"
|
707
|
+
]
|
708
|
+
},
|
709
|
+
{
|
710
|
+
"cell_type": "code",
|
711
|
+
"execution_count": null,
|
712
|
+
"id": "9d1d0dc8-af7c-4052-8834-936143fb490c",
|
713
|
+
"metadata": {},
|
714
|
+
"outputs": [],
|
715
|
+
"source": [
|
716
|
+
"import tensorflow as tf\n",
|
717
|
+
"from tensorflow.keras import layers, models\n",
|
718
|
+
"from keras.datasets import cifar10\n",
|
719
|
+
"from keras.utils import to_categorical\n",
|
720
|
+
"import matplotlib.pyplot as plt\n",
|
721
|
+
"from sklearn.metrics import roc_curve, auc, confusion_matrix\n",
|
722
|
+
"from sklearn.metrics import ConfusionMatrixDisplay\n",
|
723
|
+
"from sklearn.preprocessing import label_binarize\n",
|
724
|
+
"\n",
|
725
|
+
"(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n",
|
726
|
+
"\n",
|
727
|
+
"X_train = X_train.astype('float32') / 255.0\n",
|
728
|
+
"X_test = X_test.astype('float32') / 255.0\n",
|
729
|
+
"\n",
|
730
|
+
"y_train = to_categorical(y_train, 10)\n",
|
731
|
+
"y_test = to_categorical(y_test, 10)\n",
|
732
|
+
"\n",
|
733
|
+
"model = models.Sequential([\n",
|
734
|
+
" layers.Conv2D(32, kernel_size=(3, 3), activation=\"relu\", input_shape=(32, 32, 3)),\n",
|
735
|
+
" layers.MaxPooling2D(),\n",
|
736
|
+
" layers.Conv2D(64, kernel_size=(3, 3), activation=\"relu\"),\n",
|
737
|
+
" layers.MaxPooling2D(),\n",
|
738
|
+
" layers.Flatten(),\n",
|
739
|
+
" layers.Dense(10, activation=\"softmax\")\n",
|
740
|
+
"])\n",
|
741
|
+
"\n",
|
742
|
+
"model.compile(loss=\"categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n",
|
743
|
+
"\n",
|
744
|
+
"history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)\n",
|
745
|
+
"\n",
|
746
|
+
"y_pred = model.predict(X_test)\n",
|
747
|
+
"y_pred_classes = tf.argmax(y_pred, axis=1)\n",
|
748
|
+
"y_test_classes = tf.argmax(y_test, axis=1)\n",
|
749
|
+
"\n",
|
750
|
+
"conf_matrix = confusion_matrix(y_test_classes, y_pred_classes)\n",
|
751
|
+
"ConfusionMatrixDisplay(conf_matrix).plot(cmap='Blues')\n",
|
752
|
+
"plt.title('Confusion Matrix')\n",
|
753
|
+
"plt.show()\n",
|
754
|
+
"\n",
|
755
|
+
"plt.figure(figsize=(12, 5))\n",
|
756
|
+
"\n",
|
757
|
+
"plt.subplot(1, 2, 1)\n",
|
758
|
+
"plt.plot(history.history['accuracy'], label='Training Accuracy')\n",
|
759
|
+
"plt.plot(history.history['val_accuracy'], label='Validation Accuracy')\n",
|
760
|
+
"plt.title('Accuracy Curve')\n",
|
761
|
+
"plt.xlabel('Epochs')\n",
|
762
|
+
"plt.ylabel('Accuracy')\n",
|
763
|
+
"plt.legend()\n",
|
764
|
+
"\n",
|
765
|
+
"plt.subplot(1, 2, 2)\n",
|
766
|
+
"plt.plot(history.history['loss'], label='Training Loss')\n",
|
767
|
+
"plt.plot(history.history['val_loss'], label='Validation Loss')\n",
|
768
|
+
"plt.title('Loss Curve')\n",
|
769
|
+
"plt.xlabel('Epochs')\n",
|
770
|
+
"plt.ylabel('Loss')\n",
|
771
|
+
"plt.legend()\n",
|
772
|
+
"\n",
|
773
|
+
"plt.tight_layout()\n",
|
774
|
+
"plt.show()"
|
775
|
+
]
|
776
|
+
},
|
777
|
+
{
|
778
|
+
"cell_type": "markdown",
|
779
|
+
"id": "af7093fd-ea5c-4058-b4a3-3692a5e61eff",
|
780
|
+
"metadata": {},
|
781
|
+
"source": [
|
782
|
+
"### __***CNN Another Example***__"
|
783
|
+
]
|
784
|
+
},
|
785
|
+
{
|
786
|
+
"cell_type": "code",
|
787
|
+
"execution_count": null,
|
788
|
+
"id": "381a98d6-7828-43c8-b0a4-d1c24b229e66",
|
789
|
+
"metadata": {},
|
790
|
+
"outputs": [],
|
791
|
+
"source": [
|
792
|
+
"import numpy as np\n",
|
793
|
+
"import tensorflow as tf\n",
|
794
|
+
"from tensorflow.keras import layers, models\n",
|
795
|
+
"from tensorflow.keras.preprocessing import image\n",
|
796
|
+
"import os\n",
|
797
|
+
"\n",
|
798
|
+
"dataset_path = 'Pistachio_Image_Dataset'\n",
|
799
|
+
"\n",
|
800
|
+
"img_height, img_width = 180, 180\n",
|
801
|
+
"batch_size = 32\n",
|
802
|
+
"\n",
|
803
|
+
"train_ds = tf.keras.utils.image_dataset_from_directory(\n",
|
804
|
+
" dataset_path,\n",
|
805
|
+
" validation_split=0.2,\n",
|
806
|
+
" subset=\"training\",\n",
|
807
|
+
" seed=123,\n",
|
808
|
+
" image_size=(img_height, img_width),\n",
|
809
|
+
" batch_size=batch_size)\n",
|
810
|
+
"\n",
|
811
|
+
"val_ds = tf.keras.utils.image_dataset_from_directory(\n",
|
812
|
+
" dataset_path,\n",
|
813
|
+
" validation_split=0.2,\n",
|
814
|
+
" subset=\"validation\",\n",
|
815
|
+
" seed=123,\n",
|
816
|
+
" image_size=(img_height, img_width),\n",
|
817
|
+
" batch_size=batch_size)\n",
|
818
|
+
"\n",
|
819
|
+
"model = models.Sequential([\n",
|
820
|
+
" layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),\n",
|
821
|
+
" layers.Conv2D(32, 3, activation='relu'),\n",
|
822
|
+
" layers.MaxPooling2D(),\n",
|
823
|
+
" layers.Conv2D(64, 3, activation='relu'),\n",
|
824
|
+
" layers.MaxPooling2D(),\n",
|
825
|
+
" layers.Conv2D(128, 3, activation='relu'),\n",
|
826
|
+
" layers.MaxPooling2D(),\n",
|
827
|
+
" layers.Flatten(),\n",
|
828
|
+
" layers.Dense(128, activation='relu'),\n",
|
829
|
+
" layers.Dense(2)\n",
|
830
|
+
"])\n",
|
831
|
+
"\n",
|
832
|
+
"model.compile(optimizer='adam',\n",
|
833
|
+
" loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
|
834
|
+
" metrics=['accuracy'])\n",
|
835
|
+
"\n",
|
836
|
+
"epochs = 5\n",
|
837
|
+
"history = model.fit(\n",
|
838
|
+
" train_ds,\n",
|
839
|
+
" validation_data=val_ds,\n",
|
840
|
+
" epochs=epochs\n",
|
841
|
+
")\n",
|
842
|
+
"\n",
|
843
|
+
"class_names = ['Kirmizi_Pistachio', 'Siirt_Pistachio']\n",
|
844
|
+
"img_path = 'Pistachio_Image_Dataset/Siirt_Pistachio/siirt (11).jpg'\n",
|
845
|
+
"img = image.load_img(img_path, target_size=(180, 180))\n",
|
846
|
+
"\n",
|
847
|
+
"plt.imshow(img)\n",
|
848
|
+
"plt.title(\"Input Image\")\n",
|
849
|
+
"plt.axis(\"off\")\n",
|
850
|
+
"plt.show()\n",
|
851
|
+
"\n",
|
852
|
+
"img_array = image.img_to_array(img)\n",
|
853
|
+
"img_array = tf.expand_dims(img_array, 0)\n",
|
854
|
+
"\n",
|
855
|
+
"predictions = model.predict(img_array)\n",
|
856
|
+
"score = tf.nn.softmax(predictions[0])\n",
|
857
|
+
"\n",
|
858
|
+
"predicted_class = class_names[np.argmax(score)]\n",
|
859
|
+
"confidence = 100 * np.max(score)\n",
|
860
|
+
"\n",
|
861
|
+
"print(f\"Image most likely belongs to '{predicted_class}' with {confidence:.2f}% confidence.\")"
|
862
|
+
]
|
863
|
+
},
|
864
|
+
{
|
865
|
+
"cell_type": "markdown",
|
866
|
+
"id": "770b6523-d178-45f6-b6a7-a04d780f5a9a",
|
867
|
+
"metadata": {},
|
868
|
+
"source": [
|
869
|
+
"### __***HMM - Viterbi***__"
|
870
|
+
]
|
871
|
+
},
|
872
|
+
{
|
873
|
+
"cell_type": "code",
|
874
|
+
"execution_count": null,
|
875
|
+
"id": "eebdc5a0-8aef-41ff-ad89-d9424aa8a43f",
|
876
|
+
"metadata": {},
|
877
|
+
"outputs": [],
|
878
|
+
"source": [
|
879
|
+
"import numpy as np\n",
|
880
|
+
"import pandas as pd\n",
|
881
|
+
"import networkx as nx\n",
|
882
|
+
"import matplotlib.pyplot as plt\n",
|
883
|
+
"from hmmlearn.hmm import CategoricalHMM\n",
|
884
|
+
"\n",
|
885
|
+
"visibleStates = ['early', 'mid', 'late']\n",
|
886
|
+
"hiddenStates = ['Genuine User', 'Intruder']\n",
|
887
|
+
"hiddenInitial = [0.9, 0.1]\n",
|
888
|
+
"\n",
|
889
|
+
"hiddenTransition = np.array([\n",
|
890
|
+
" [0.7, 0.3],\n",
|
891
|
+
" [0.4, 0.6]\n",
|
892
|
+
"])\n",
|
893
|
+
"\n",
|
894
|
+
"emissionMatrix = np.array([\n",
|
895
|
+
" [0.8, 0.1, 0.1],\n",
|
896
|
+
" [0.1, 0.3, 0.6]\n",
|
897
|
+
"])\n",
|
898
|
+
"\n",
|
899
|
+
"graph = nx.DiGraph()\n",
|
900
|
+
"graph.add_nodes_from(visibleStates + hiddenStates)\n",
|
901
|
+
"\n",
|
902
|
+
"for i, x in enumerate(hiddenStates):\n",
|
903
|
+
" for j, y in enumerate(hiddenStates):\n",
|
904
|
+
" graph.add_edge(x, y, weight=hiddenTransition[i, j])\n",
|
905
|
+
"\n",
|
906
|
+
"for i, x in enumerate(hiddenStates):\n",
|
907
|
+
" for j, y in enumerate(visibleStates):\n",
|
908
|
+
" graph.add_edge(x, y, weight=emissionMatrix[i, j])\n",
|
909
|
+
"\n",
|
910
|
+
"pos = nx.circular_layout(graph)\n",
|
911
|
+
"nx.draw(graph, pos, with_labels=True, node_size=1500)\n",
|
912
|
+
"nx.draw_networkx_edge_labels(graph, pos, edge_labels=nx.get_edge_attributes(graph, 'weight'))\n",
|
913
|
+
"plt.show()\n",
|
914
|
+
"\n",
|
915
|
+
"observations = ['early', 'early', 'late', 'mid', 'early', 'late']\n",
|
916
|
+
"observationMap = {'early': 0, 'mid': 1, 'late': 2}\n",
|
917
|
+
"mappedSequence = np.array([observationMap[o] for o in observations]).reshape(-1, 1)\n",
|
918
|
+
"\n",
|
919
|
+
"model = CategoricalHMM(n_components=2)\n",
|
920
|
+
"model.startprob_ = hiddenInitial\n",
|
921
|
+
"model.transmat_ = hiddenTransition\n",
|
922
|
+
"model.emissionprob_ = emissionMatrix\n",
|
923
|
+
"\n",
|
924
|
+
"logValue, bestPath = model.decode(mappedSequence, algorithm=\"viterbi\")\n",
|
925
|
+
"\n",
|
926
|
+
"decodedPath = [hiddenStates[state] for state in bestPath]\n",
|
927
|
+
"\n",
|
928
|
+
"result = pd.DataFrame({\n",
|
929
|
+
" 'Observation': observations,\n",
|
930
|
+
" 'Predicted State': decodedPath\n",
|
931
|
+
"})\n",
|
932
|
+
"\n",
|
933
|
+
"print(\"\\nDecoded Path with Observations:\")\n",
|
934
|
+
"display(result)\n",
|
935
|
+
"\n",
|
936
|
+
"print(\"\\nLog Probability of Best Path:\", logValue)"
|
937
|
+
]
|
938
|
+
}
|
939
|
+
],
|
940
|
+
"metadata": {
|
941
|
+
"kernelspec": {
|
942
|
+
"display_name": "NEW-VENV-1",
|
943
|
+
"language": "python",
|
944
|
+
"name": "new-venv-1"
|
945
|
+
},
|
946
|
+
"language_info": {
|
947
|
+
"codemirror_mode": {
|
948
|
+
"name": "ipython",
|
949
|
+
"version": 3
|
950
|
+
},
|
951
|
+
"file_extension": ".py",
|
952
|
+
"mimetype": "text/x-python",
|
953
|
+
"name": "python",
|
954
|
+
"nbconvert_exporter": "python",
|
955
|
+
"pygments_lexer": "ipython3",
|
956
|
+
"version": "3.11.5"
|
957
|
+
}
|
958
|
+
},
|
959
|
+
"nbformat": 4,
|
960
|
+
"nbformat_minor": 5
|
961
|
+
}
|
@@ -2,6 +2,7 @@ noshot/__init__.py,sha256=000R40tii8lDFU8C1fBaD3SOnxD0PWRNWZU-km49YrU,21
|
|
2
2
|
noshot/main.py,sha256=zXegIqjJPARlPnQMS-B2dAENcvyaZkNwmue63Gm8lHU,663
|
3
3
|
noshot/data/ML TS XAI/ML/CNN(Image_for_Folders_5).ipynb,sha256=VAk1gwoDTBMSdXJxiOLJRvWnzJs84kdNr8Tn_1LaGZw,8802
|
4
4
|
noshot/data/ML TS XAI/ML/CNN(Image_form_Folder_2).ipynb,sha256=o3Ho3f1CcYhzNW5yB8PEt5WuxFvgc04_bT73wMmpx14,8772
|
5
|
+
noshot/data/ML TS XAI/ML/ML Lab AllinOne.ipynb,sha256=4E6ddpzBISsp-3aEm_mzMPKW1gSHBrMdhatEUiL59wQ,34015
|
5
6
|
noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb,sha256=dQ3HgLix6HLqPltFiPrElmEdYAsvR6flDpHEIjcngp4,24774
|
6
7
|
noshot/data/ML TS XAI/ML/Json Codes/ML LAB CIA 2.ipynb,sha256=mJFkK3jsrr1I967c3Ovm8jpMnO1wjAfb--pNeYnWZ7I,14767
|
7
8
|
noshot/data/ML TS XAI/ML/ML 1/1. EDA-PCA (Balance Scale Dataset).ipynb,sha256=1QYmUb1QZ4FtmdwoWhTbF9divKNMOxS8AMOy56At0xg,3625
|
@@ -64,8 +65,8 @@ noshot/data/ML TS XAI/ML/ML Lab H Sec/student-mat.csv,sha256=dnzX-s54dAWJJsKLcXC
|
|
64
65
|
noshot/data/ML TS XAI/ML/ML Lab H Sec/student-por.csv,sha256=6nnPKJRApzZilddcT1eTL_i0dSqvIvgmhM4pi3R83bs,117234
|
65
66
|
noshot/utils/__init__.py,sha256=QVrN1ZpzPXxZqDOqot5-t_ulFjZXVx7Cvr-Is9AK0po,110
|
66
67
|
noshot/utils/shell_utils.py,sha256=-XfgYlNQlULa_rRJ3vsfTns4m_jiueGEj396J_y0Gus,2611
|
67
|
-
noshot-
|
68
|
-
noshot-
|
69
|
-
noshot-
|
70
|
-
noshot-
|
71
|
-
noshot-
|
68
|
+
noshot-11.0.0.dist-info/licenses/LICENSE.txt,sha256=fgCruaVm5cUjFGOeEoGIimT6nnUunBqcNZHpGzK8TSw,1086
|
69
|
+
noshot-11.0.0.dist-info/METADATA,sha256=XK3d0s_DAtwG-YPjhwU2UiaSnoLvmVbhovIR1ilNZU0,2574
|
70
|
+
noshot-11.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
71
|
+
noshot-11.0.0.dist-info/top_level.txt,sha256=UL-c0HffdRwohz-y9icY_rnY48pQDdxGcBsgyCKh2Q8,7
|
72
|
+
noshot-11.0.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|