noshot 6.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. noshot/data/ML TS XAI/ML/CNN(Image_for_Folders_5).ipynb +201 -0
  2. noshot/data/ML TS XAI/ML/CNN(Image_form_Folder_2).ipynb +201 -0
  3. noshot/data/ML TS XAI/ML/ML 1/1. EDA-PCA (Balance Scale Dataset).ipynb +147 -0
  4. noshot/data/ML TS XAI/ML/ML 1/1. EDA-PCA (Rice Dataset).ipynb +181 -0
  5. noshot/data/ML TS XAI/ML/ML 1/10. HMM Veterbi.ipynb +152 -0
  6. noshot/data/ML TS XAI/ML/ML 1/2. KNN (Balance Scale Dataset).ipynb +117 -0
  7. noshot/data/ML TS XAI/ML/ML 1/2. KNN (Iris Dataset).ipynb +156 -0
  8. noshot/data/ML TS XAI/ML/ML 1/2. KNN (Sobar-72 Dataset).ipynb +215 -0
  9. noshot/data/ML TS XAI/ML/ML 1/3. LDA (Balance Scale Dataset).ipynb +78 -0
  10. noshot/data/ML TS XAI/ML/ML 1/3. LDA (NPHA Doctor Visits Dataset).ipynb +114 -0
  11. noshot/data/ML TS XAI/ML/ML 1/4. Linear Regression (Machine Dataset).ipynb +115 -0
  12. noshot/data/ML TS XAI/ML/ML 1/4. Linear Regression (Real Estate Dataset).ipynb +146 -0
  13. noshot/data/ML TS XAI/ML/ML 1/5. Logistic Regression (Magic04 Dataset).ipynb +130 -0
  14. noshot/data/ML TS XAI/ML/ML 1/5. Logistic Regression (Wine Dataset).ipynb +112 -0
  15. noshot/data/ML TS XAI/ML/ML 1/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb +118 -0
  16. noshot/data/ML TS XAI/ML/ML 1/6. Naive Bayes Classifier (Wine Dataset).ipynb +89 -0
  17. noshot/data/ML TS XAI/ML/ML 1/7. SVM (Rice Dataset).ipynb +120 -0
  18. noshot/data/ML TS XAI/ML/ML 1/8. FeedForward NN (Sobar72 Dataset).ipynb +262 -0
  19. noshot/data/ML TS XAI/ML/ML 1/9. CNN (Cifar10 Dataset).ipynb +156 -0
  20. noshot/data/ML TS XAI/ML/ML 2/1. PCA.ipynb +162 -0
  21. noshot/data/ML TS XAI/ML/ML 2/10. CNN.ipynb +100 -0
  22. noshot/data/ML TS XAI/ML/ML 2/11. HMM.ipynb +336 -0
  23. noshot/data/ML TS XAI/ML/ML 2/2. KNN.ipynb +149 -0
  24. noshot/data/ML TS XAI/ML/ML 2/3. LDA.ipynb +132 -0
  25. noshot/data/ML TS XAI/ML/ML 2/4. Linear Regression.ipynb +86 -0
  26. noshot/data/ML TS XAI/ML/ML 2/5. Logistic Regression.ipynb +115 -0
  27. noshot/data/ML TS XAI/ML/ML 2/6. Naive Bayes (Titanic).ipynb +196 -0
  28. noshot/data/ML TS XAI/ML/ML 2/6. Naive Bayes (Wine).ipynb +98 -0
  29. noshot/data/ML TS XAI/ML/ML 2/7. SVM Linear.ipynb +109 -0
  30. noshot/data/ML TS XAI/ML/ML 2/8. SVM Non-Linear.ipynb +195 -0
  31. noshot/data/ML TS XAI/ML/ML 2/9. FNN With Regularization.ipynb +189 -0
  32. noshot/data/ML TS XAI/ML/ML 2/9. FNN Without Regularization.ipynb +197 -0
  33. noshot/data/ML TS XAI/ML/ML 2/All in One Lab CIA 1 Q.ipynb +1087 -0
  34. noshot/data/ML TS XAI/ML/ML 3 (Latest)/1. PCA EDA.ipynb +274 -0
  35. noshot/data/ML TS XAI/ML/ML 3 (Latest)/10. CNN.ipynb +170 -0
  36. noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 2.ipynb +1087 -0
  37. noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 3.ipynb +178 -0
  38. noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 4.ipynb +185 -0
  39. noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM.ipynb +106 -0
  40. noshot/data/ML TS XAI/ML/ML 3 (Latest)/2. KNN.ipynb +177 -0
  41. noshot/data/ML TS XAI/ML/ML 3 (Latest)/3. LDA.ipynb +195 -0
  42. noshot/data/ML TS XAI/ML/ML 3 (Latest)/4. Linear Regression.ipynb +267 -0
  43. noshot/data/ML TS XAI/ML/ML 3 (Latest)/5. Logistic Regression.ipynb +104 -0
  44. noshot/data/ML TS XAI/ML/ML 3 (Latest)/6. Bayesian Classifier.ipynb +109 -0
  45. noshot/data/ML TS XAI/ML/ML 3 (Latest)/7. SVM.ipynb +220 -0
  46. noshot/data/ML TS XAI/ML/ML 3 (Latest)/8. MLP.ipynb +99 -0
  47. noshot/data/ML TS XAI/ML/ML 3 (Latest)/9. Ridge - Lasso.ipynb +211 -0
  48. noshot/data/ML TS XAI/ML/ML 3 (Latest)/9. Ridge Lasso 2.ipynb +99 -0
  49. noshot/data/ML TS XAI/ML/ML 3 (Latest)/Image Load Example.ipynb +118 -0
  50. noshot/data/ML TS XAI/ML/ML 3 (Latest)/Updated_Untitled.ipynb +603 -0
  51. noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb +691 -0
  52. {noshot-6.0.0.dist-info → noshot-8.0.0.dist-info}/METADATA +1 -1
  53. noshot-8.0.0.dist-info/RECORD +60 -0
  54. {noshot-6.0.0.dist-info → noshot-8.0.0.dist-info}/WHEEL +1 -1
  55. noshot/data/ML TS XAI/XAI/Q1.ipynb +0 -377
  56. noshot/data/ML TS XAI/XAI/Q2.ipynb +0 -362
  57. noshot/data/ML TS XAI/XAI/Q3.ipynb +0 -637
  58. noshot/data/ML TS XAI/XAI/Q4.ipynb +0 -206
  59. noshot/data/ML TS XAI/XAI/Q5.ipynb +0 -1018
  60. noshot-6.0.0.dist-info/RECORD +0 -14
  61. {noshot-6.0.0.dist-info → noshot-8.0.0.dist-info}/licenses/LICENSE.txt +0 -0
  62. {noshot-6.0.0.dist-info → noshot-8.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1087 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "R7euuRFaCdIZ"
7
+ },
8
+ "source": [
9
+ "### ***Required Packages***"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": null,
15
+ "metadata": {
16
+ "executionInfo": {
17
+ "elapsed": 6,
18
+ "status": "ok",
19
+ "timestamp": 1741051571358,
20
+ "user": {
21
+ "displayName": "Jaison A",
22
+ "userId": "07006398627763032071"
23
+ },
24
+ "user_tz": -330
25
+ },
26
+ "id": "c5Fdgw1I3gJP"
27
+ },
28
+ "outputs": [],
29
+ "source": [
30
+ "import pandas as pd\n",
31
+ "import numpy as np\n",
32
+ "import matplotlib.pyplot as plt\n",
33
+ "from sklearn.neighbors import KNeighborsClassifier\n",
34
+ "from sklearn.linear_model import LogisticRegression,LinearRegression\n",
35
+ "from sklearn.cluster import KMeans\n",
36
+ "from sklearn.model_selection import train_test_split\n",
37
+ "from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,f1_score,r2_score,adjusted_rand_score\n",
38
+ "from sklearn.decomposition import PCA\n",
39
+ "from sklearn.preprocessing import LabelEncoder,MinMaxScaler,StandardScaler\n",
40
+ "from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
41
+ "import seaborn as sns"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": null,
47
+ "metadata": {
48
+ "colab": {
49
+ "base_uri": "https://localhost:8080/"
50
+ },
51
+ "executionInfo": {
52
+ "elapsed": 2335,
53
+ "status": "ok",
54
+ "timestamp": 1741051573697,
55
+ "user": {
56
+ "displayName": "Jaison A",
57
+ "userId": "07006398627763032071"
58
+ },
59
+ "user_tz": -330
60
+ },
61
+ "id": "CZiiMp0u4QC1",
62
+ "outputId": "a0067b93-e036-4961-e8c6-31a318689332"
63
+ },
64
+ "outputs": [],
65
+ "source": [
66
+ "from google.colab import drive\n",
67
+ "drive.mount('/content/drive')"
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "markdown",
72
+ "metadata": {
73
+ "id": "qCogj3nw4UUy"
74
+ },
75
+ "source": [
76
+ "### ***Question_1***"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": null,
82
+ "metadata": {
83
+ "colab": {
84
+ "base_uri": "https://localhost:8080/",
85
+ "height": 0
86
+ },
87
+ "executionInfo": {
88
+ "elapsed": 9,
89
+ "status": "ok",
90
+ "timestamp": 1741051573708,
91
+ "user": {
92
+ "displayName": "Jaison A",
93
+ "userId": "07006398627763032071"
94
+ },
95
+ "user_tz": -330
96
+ },
97
+ "id": "G9xT9oSK4R7q",
98
+ "outputId": "e95932f3-44ae-4401-8723-860a9e41dd21"
99
+ },
100
+ "outputs": [],
101
+ "source": [
102
+ "df=pd.read_csv('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/KNN/heart_disease_uci.csv')\n",
103
+ "display(df.head())"
104
+ ]
105
+ },
106
+ {
107
+ "cell_type": "markdown",
108
+ "metadata": {
109
+ "id": "xDAWQGs54t31"
110
+ },
111
+ "source": [
112
+ "**Handle Missing values**"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": null,
118
+ "metadata": {
119
+ "colab": {
120
+ "base_uri": "https://localhost:8080/"
121
+ },
122
+ "executionInfo": {
123
+ "elapsed": 16,
124
+ "status": "ok",
125
+ "timestamp": 1741051573727,
126
+ "user": {
127
+ "displayName": "Jaison A",
128
+ "userId": "07006398627763032071"
129
+ },
130
+ "user_tz": -330
131
+ },
132
+ "id": "8w3eecNs4xU8",
133
+ "outputId": "24fdd56c-0327-4233-ae56-3e6a7eb41849"
134
+ },
135
+ "outputs": [],
136
+ "source": [
137
+ "features=['id','age','sex','dataset','cp','trestbps','chol','fbs','restecg','thalch','exang','oldpeak','slope','ca','thal']\n",
138
+ "target=['num']\n",
139
+ "\n",
140
+ "le=LabelEncoder()\n",
141
+ "df['sex']=le.fit_transform(df['sex'])\n",
142
+ "df['dataset']=le.fit_transform(df['dataset'])\n",
143
+ "df['cp']=le.fit_transform(df['cp'])\n",
144
+ "df['fbs']=le.fit_transform(df['fbs'])\n",
145
+ "df['restecg']=le.fit_transform(df['restecg'])\n",
146
+ "df['exang']=le.fit_transform(df['exang'])\n",
147
+ "df['slope']=le.fit_transform(df['slope'])\n",
148
+ "df['thal']=le.fit_transform(df['thal'])\n",
149
+ "\n",
150
+ "\n",
151
+ "\n",
152
+ "df['trestbps']=df['trestbps'].fillna(df['trestbps'].mean())\n",
153
+ "df['chol']=df['chol'].fillna(df['chol'].mean())\n",
154
+ "df['fbs']=df['fbs'].fillna(df['fbs'])\n",
155
+ "df['restecg']=df['restecg'].fillna(df['restecg'])\n",
156
+ "df['thalch']=df['thalch'].fillna(df['thalch'].mean())\n",
157
+ "df['exang']=df['exang'].fillna(df['exang'])\n",
158
+ "df['oldpeak']=df['oldpeak'].fillna(df['oldpeak'].mean())\n",
159
+ "df['thal']=df['thal'].fillna(df['thal'].mean())\n",
160
+ "df['ca']=df['ca'].fillna(df['ca'].mean())\n",
161
+ "df['slope']=df['slope'].fillna(df['slope'].mean())\n",
162
+ "\n",
163
+ "print(df.isnull().sum())"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "markdown",
168
+ "metadata": {
169
+ "id": "zCvcMIjt7jaL"
170
+ },
171
+ "source": [
172
+ "**Data Split ,Scaling ,KNN Model and Metrics**"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": null,
178
+ "metadata": {
179
+ "colab": {
180
+ "base_uri": "https://localhost:8080/"
181
+ },
182
+ "executionInfo": {
183
+ "elapsed": 285,
184
+ "status": "ok",
185
+ "timestamp": 1741051574014,
186
+ "user": {
187
+ "displayName": "Jaison A",
188
+ "userId": "07006398627763032071"
189
+ },
190
+ "user_tz": -330
191
+ },
192
+ "id": "8e2BPGlM7fGJ",
193
+ "outputId": "a135b7ff-b7d2-4872-b54f-c6910e75c2d8"
194
+ },
195
+ "outputs": [],
196
+ "source": [
197
+ "x=df[features]\n",
198
+ "y=df[target]\n",
199
+ "\n",
200
+ "X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)\n",
201
+ "\n",
202
+ "\n",
203
+ "scalers={'MinMax':MinMaxScaler(),'Standard':StandardScaler()}\n",
204
+ "\n",
205
+ "for name,scaler in scalers.items():\n",
206
+ " print(f'Applying {name} Scaling : ')\n",
207
+ " scaled_x_train=scaler.fit_transform(X_train)\n",
208
+ " scaled_x_test=scaler.transform(X_test)\n",
209
+ " for k in [3,5,7,9]:\n",
210
+ " knn=KNeighborsClassifier(n_neighbors=k)\n",
211
+ " knn.fit(scaled_x_train,y_train)\n",
212
+ " y_pred=knn.predict(scaled_x_test)\n",
213
+ " print(f\"K : {k}\")\n",
214
+ " print(f'Accuracy Score : {accuracy_score(y_test,y_pred)}')\n",
215
+ " print(f'Confusion Matrix : \\n{confusion_matrix(y_test,y_pred)}')\n",
216
+ " print(f'Classification Report : \\n{classification_report(y_test,y_pred)}')"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "markdown",
221
+ "metadata": {
222
+ "id": "ytnBNsap7xZZ"
223
+ },
224
+ "source": [
225
+ "### ***Question_2***"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": null,
231
+ "metadata": {
232
+ "colab": {
233
+ "base_uri": "https://localhost:8080/",
234
+ "height": 0
235
+ },
236
+ "executionInfo": {
237
+ "elapsed": 3491,
238
+ "status": "ok",
239
+ "timestamp": 1741051577507,
240
+ "user": {
241
+ "displayName": "Jaison A",
242
+ "userId": "07006398627763032071"
243
+ },
244
+ "user_tz": -330
245
+ },
246
+ "id": "wdzmD1O-7wm7",
247
+ "outputId": "b3424de8-6d74-4f0c-f0e8-eaad63772df9"
248
+ },
249
+ "outputs": [],
250
+ "source": [
251
+ "df=pd.read_excel('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/KNN/Telco_customer_churn.xlsx')\n",
252
+ "display(df.head())"
253
+ ]
254
+ },
255
+ {
256
+ "cell_type": "markdown",
257
+ "metadata": {
258
+ "id": "AjA-tcxd-12r"
259
+ },
260
+ "source": [
261
+ "**Handle Missing Values**"
262
+ ]
263
+ },
264
+ {
265
+ "cell_type": "code",
266
+ "execution_count": null,
267
+ "metadata": {
268
+ "colab": {
269
+ "base_uri": "https://localhost:8080/"
270
+ },
271
+ "executionInfo": {
272
+ "elapsed": 9,
273
+ "status": "ok",
274
+ "timestamp": 1741051577509,
275
+ "user": {
276
+ "displayName": "Jaison A",
277
+ "userId": "07006398627763032071"
278
+ },
279
+ "user_tz": -330
280
+ },
281
+ "id": "YWaaBkbf85Qp",
282
+ "outputId": "2a526a88-5be8-4e91-b6a5-80f36f48f8de"
283
+ },
284
+ "outputs": [],
285
+ "source": [
286
+ "df.drop(columns=['Churn Reason','CustomerID'],inplace=True)\n",
287
+ "print(df.isnull().sum())"
288
+ ]
289
+ },
290
+ {
291
+ "cell_type": "markdown",
292
+ "metadata": {
293
+ "id": "S-OTOJ83-twv"
294
+ },
295
+ "source": [
296
+ "**Encode Data**"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": null,
302
+ "metadata": {
303
+ "colab": {
304
+ "base_uri": "https://localhost:8080/",
305
+ "height": 0
306
+ },
307
+ "executionInfo": {
308
+ "elapsed": 61,
309
+ "status": "ok",
310
+ "timestamp": 1741051577564,
311
+ "user": {
312
+ "displayName": "Jaison A",
313
+ "userId": "07006398627763032071"
314
+ },
315
+ "user_tz": -330
316
+ },
317
+ "id": "gcWGzUiV9g1l",
318
+ "outputId": "5d78e4e4-d3dc-4609-ccfc-df783d1de7ed"
319
+ },
320
+ "outputs": [],
321
+ "source": [
322
+ "le=LabelEncoder()\n",
323
+ "for col in df.select_dtypes(include=['object']).columns:\n",
324
+ " df[col]=df[col].astype('str')\n",
325
+ " df[col]=le.fit_transform(df[col])\n",
326
+ "\n",
327
+ "display(df.head())"
328
+ ]
329
+ },
330
+ {
331
+ "cell_type": "markdown",
332
+ "metadata": {
333
+ "id": "hO_W1tgT-3MV"
334
+ },
335
+ "source": [
336
+ "**Data Split , Model Training and Metrics**"
337
+ ]
338
+ },
339
+ {
340
+ "cell_type": "code",
341
+ "execution_count": null,
342
+ "metadata": {
343
+ "colab": {
344
+ "base_uri": "https://localhost:8080/"
345
+ },
346
+ "executionInfo": {
347
+ "elapsed": 239,
348
+ "status": "ok",
349
+ "timestamp": 1741051577801,
350
+ "user": {
351
+ "displayName": "Jaison A",
352
+ "userId": "07006398627763032071"
353
+ },
354
+ "user_tz": -330
355
+ },
356
+ "id": "-8dwgDMc-95x",
357
+ "outputId": "725916d8-f062-4bf7-b3e4-57b91e7a3f51"
358
+ },
359
+ "outputs": [],
360
+ "source": [
361
+ "X_train,X_test,y_train,y_test=train_test_split(df.drop(columns=['Churn Value']),df['Churn Value'],test_size=0.2,random_state=42)\n",
362
+ "\n",
363
+ "scalers=StandardScaler()\n",
364
+ "\n",
365
+ "X_train_Scaled=scalers.fit_transform(X_train)\n",
366
+ "X_test_Scaled=scalers.transform(X_test)\n",
367
+ "\n",
368
+ "knn=KNeighborsClassifier(n_neighbors=5)\n",
369
+ "knn.fit(X_train_Scaled,y_train)\n",
370
+ "y_pred_knn=knn.predict(X_test_Scaled)\n",
371
+ "\n",
372
+ "logreg=LogisticRegression()\n",
373
+ "logreg.fit(X_train_Scaled,y_train)\n",
374
+ "y_pred_logreg=logreg.predict(X_test_Scaled)\n",
375
+ "\n",
376
+ "print(f'KNN Accuracy : {accuracy_score(y_test,y_pred_knn)}')\n",
377
+ "print(f'Logistic Regression Accuracy : {accuracy_score(y_test,y_pred_logreg)}')\n",
378
+ "\n",
379
+ "print(f\"KNN f1 Score : {f1_score(y_test,y_pred_knn)}\")\n",
380
+ "print(f\"Logistic Regression f1 Score : {f1_score(y_test,y_pred_logreg)}\")\n",
381
+ "\n",
382
+ "print(f\"KNN Classification Report : \\n{classification_report(y_test,y_pred_knn)}\")\n",
383
+ "print(f\"Logistic Regression Classification Report : \\n{classification_report(y_test,y_pred_logreg)}\")"
384
+ ]
385
+ },
386
+ {
387
+ "cell_type": "markdown",
388
+ "metadata": {
389
+ "id": "FU9VMoYaByfs"
390
+ },
391
+ "source": [
392
+ "### ***Question 3***"
393
+ ]
394
+ },
395
+ {
396
+ "cell_type": "code",
397
+ "execution_count": null,
398
+ "metadata": {
399
+ "colab": {
400
+ "base_uri": "https://localhost:8080/",
401
+ "height": 0
402
+ },
403
+ "executionInfo": {
404
+ "elapsed": 2763,
405
+ "status": "ok",
406
+ "timestamp": 1741051580562,
407
+ "user": {
408
+ "displayName": "Jaison A",
409
+ "userId": "07006398627763032071"
410
+ },
411
+ "user_tz": -330
412
+ },
413
+ "id": "WnZ6_jhgB1I7",
414
+ "outputId": "73b756b8-0fc3-45aa-c067-22ca9d42cefa"
415
+ },
416
+ "outputs": [],
417
+ "source": [
418
+ "df=pd.read_csv('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/PCA/all_stocks_5yr.csv')\n",
419
+ "display(df.head())"
420
+ ]
421
+ },
422
+ {
423
+ "cell_type": "code",
424
+ "execution_count": null,
425
+ "metadata": {
426
+ "executionInfo": {
427
+ "elapsed": 126,
428
+ "status": "ok",
429
+ "timestamp": 1741051580686,
430
+ "user": {
431
+ "displayName": "Jaison A",
432
+ "userId": "07006398627763032071"
433
+ },
434
+ "user_tz": -330
435
+ },
436
+ "id": "3nU3DybvD0kt"
437
+ },
438
+ "outputs": [],
439
+ "source": [
440
+ "features = ['open', 'high', 'low', 'close', 'volume']\n",
441
+ "data = df[features]\n",
442
+ "\n",
443
+ "# Handle missing values if any\n",
444
+ "data = data.dropna()\n",
445
+ "\n",
446
+ "# Standardize the data\n",
447
+ "scaler = StandardScaler()\n",
448
+ "data_scaled = scaler.fit_transform(data)\n"
449
+ ]
450
+ },
451
+ {
452
+ "cell_type": "code",
453
+ "execution_count": null,
454
+ "metadata": {
455
+ "colab": {
456
+ "base_uri": "https://localhost:8080/"
457
+ },
458
+ "executionInfo": {
459
+ "elapsed": 83,
460
+ "status": "ok",
461
+ "timestamp": 1741051580771,
462
+ "user": {
463
+ "displayName": "Jaison A",
464
+ "userId": "07006398627763032071"
465
+ },
466
+ "user_tz": -330
467
+ },
468
+ "id": "PPghwxj_D3QT",
469
+ "outputId": "d4c39545-9879-4464-fc35-3d57ec0e9763"
470
+ },
471
+ "outputs": [],
472
+ "source": [
473
+ "# Perform PCA and retain 90% variance\n",
474
+ "pca = PCA(n_components=0.90)\n",
475
+ "data_pca = pca.fit_transform(data_scaled)\n",
476
+ "\n",
477
+ "# Number of components required to retain 90% variance\n",
478
+ "num_components = pca.n_components_\n",
479
+ "print(f'Number of components to retain 90% variance: {num_components}')"
480
+ ]
481
+ },
482
+ {
483
+ "cell_type": "code",
484
+ "execution_count": null,
485
+ "metadata": {
486
+ "colab": {
487
+ "base_uri": "https://localhost:8080/",
488
+ "height": 0
489
+ },
490
+ "executionInfo": {
491
+ "elapsed": 393,
492
+ "status": "ok",
493
+ "timestamp": 1741051581177,
494
+ "user": {
495
+ "displayName": "Jaison A",
496
+ "userId": "07006398627763032071"
497
+ },
498
+ "user_tz": -330
499
+ },
500
+ "id": "WfqAFub0EAhg",
501
+ "outputId": "54331b4c-87fe-40f5-a68c-1673bdacdc9d"
502
+ },
503
+ "outputs": [],
504
+ "source": [
505
+ "# Plot variance explained by each component\n",
506
+ "plt.figure(figsize=(8, 5))\n",
507
+ "plt.plot(range(1, num_components + 1), np.cumsum(pca.explained_variance_ratio_), marker='o', linestyle='--')\n",
508
+ "plt.xlabel('Number of Components')\n",
509
+ "plt.ylabel('Cumulative Explained Variance')\n",
510
+ "plt.title('Explained Variance by Components')\n",
511
+ "plt.show()"
512
+ ]
513
+ },
514
+ {
515
+ "cell_type": "code",
516
+ "execution_count": null,
517
+ "metadata": {
518
+ "colab": {
519
+ "base_uri": "https://localhost:8080/",
520
+ "height": 0
521
+ },
522
+ "executionInfo": {
523
+ "elapsed": 52676,
524
+ "status": "ok",
525
+ "timestamp": 1741051633865,
526
+ "user": {
527
+ "displayName": "Jaison A",
528
+ "userId": "07006398627763032071"
529
+ },
530
+ "user_tz": -330
531
+ },
532
+ "id": "bzEs9MZnEENb",
533
+ "outputId": "073b9e4a-fd78-4086-92fa-70dad35ed944"
534
+ },
535
+ "outputs": [],
536
+ "source": [
537
+ "# Scatter plot before PCA\n",
538
+ "sns.pairplot(pd.DataFrame(data_scaled, columns=features), diag_kind='kde')\n",
539
+ "plt.suptitle('Stock Data Before PCA')\n",
540
+ "plt.show()\n"
541
+ ]
542
+ },
543
+ {
544
+ "cell_type": "code",
545
+ "execution_count": null,
546
+ "metadata": {
547
+ "colab": {
548
+ "base_uri": "https://localhost:8080/",
549
+ "height": 0
550
+ },
551
+ "executionInfo": {
552
+ "elapsed": 1697,
553
+ "status": "ok",
554
+ "timestamp": 1741051635567,
555
+ "user": {
556
+ "displayName": "Jaison A",
557
+ "userId": "07006398627763032071"
558
+ },
559
+ "user_tz": -330
560
+ },
561
+ "id": "LvNwIOpeEImM",
562
+ "outputId": "16fcf1f3-b788-4ead-c672-b8c25dddd7e6"
563
+ },
564
+ "outputs": [],
565
+ "source": [
566
+ "# Scatter plot after PCA\n",
567
+ "plt.scatter(data_pca[:, 0], data_pca[:, 1], alpha=0.5)\n",
568
+ "plt.xlabel('Principal Component 1')\n",
569
+ "plt.ylabel('Principal Component 2')\n",
570
+ "plt.title('Stock Data After PCA')\n",
571
+ "plt.show()"
572
+ ]
573
+ },
574
+ {
575
+ "cell_type": "code",
576
+ "execution_count": null,
577
+ "metadata": {
578
+ "colab": {
579
+ "base_uri": "https://localhost:8080/"
580
+ },
581
+ "executionInfo": {
582
+ "elapsed": 7873,
583
+ "status": "ok",
584
+ "timestamp": 1741051643443,
585
+ "user": {
586
+ "displayName": "Jaison A",
587
+ "userId": "07006398627763032071"
588
+ },
589
+ "user_tz": -330
590
+ },
591
+ "id": "V1WmxZ0lEl99",
592
+ "outputId": "7eff00e8-16ed-4879-f56e-8905540a0fee"
593
+ },
594
+ "outputs": [],
595
+ "source": [
596
+ "# Create binary classification target (1 if price increases, 0 if it decreases)\n",
597
+ "df['price_movement'] = np.where(df['close'].shift(-1) > df['close'], 1, 0)\n",
598
+ "df = df.dropna() # Remove NaNs that result from shift operation\n",
599
+ "y = df['price_movement'].values\n",
600
+ "\n",
601
+ "# Split data into train and test sets\n",
602
+ "X_train, X_test, y_train, y_test = train_test_split(data_pca, y, test_size=0.2, random_state=42)\n",
603
+ "\n",
604
+ "# Apply KNN classification\n",
605
+ "knn = KNeighborsClassifier(n_neighbors=5)\n",
606
+ "knn.fit(X_train, y_train)\n",
607
+ "y_pred = knn.predict(X_test)\n",
608
+ "\n",
609
+ "# Evaluate KNN classification performance\n",
610
+ "accuracy = accuracy_score(y_test, y_pred)\n",
611
+ "print(f'KNN Classification Accuracy: {accuracy}')\n",
612
+ "print('Classification Report:\\n', classification_report(y_test, y_pred))"
613
+ ]
614
+ },
615
+ {
616
+ "cell_type": "markdown",
617
+ "metadata": {
618
+ "id": "vugOPwlBFzmb"
619
+ },
620
+ "source": [
621
+ "### ***Question 4***"
622
+ ]
623
+ },
624
+ {
625
+ "cell_type": "code",
626
+ "execution_count": null,
627
+ "metadata": {
628
+ "colab": {
629
+ "base_uri": "https://localhost:8080/",
630
+ "height": 0
631
+ },
632
+ "executionInfo": {
633
+ "elapsed": 9,
634
+ "status": "ok",
635
+ "timestamp": 1741051643450,
636
+ "user": {
637
+ "displayName": "Jaison A",
638
+ "userId": "07006398627763032071"
639
+ },
640
+ "user_tz": -330
641
+ },
642
+ "id": "T7-4PahjFzED",
643
+ "outputId": "90198191-aa59-469a-c25f-c2784588917e"
644
+ },
645
+ "outputs": [],
646
+ "source": [
647
+ "df=pd.read_csv('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/KNN/heart_disease_uci.csv')\n",
648
+ "display(df.head())\n",
649
+ "df.info()"
650
+ ]
651
+ },
652
+ {
653
+ "cell_type": "code",
654
+ "execution_count": null,
655
+ "metadata": {
656
+ "colab": {
657
+ "base_uri": "https://localhost:8080/",
658
+ "height": 0
659
+ },
660
+ "executionInfo": {
661
+ "elapsed": 28,
662
+ "status": "ok",
663
+ "timestamp": 1741051643481,
664
+ "user": {
665
+ "displayName": "Jaison A",
666
+ "userId": "07006398627763032071"
667
+ },
668
+ "user_tz": -330
669
+ },
670
+ "id": "kRJLOdSDnaH3",
671
+ "outputId": "7f4ca5b9-dd81-4b25-f449-7ab5837dd475"
672
+ },
673
+ "outputs": [],
674
+ "source": [
675
+ "df=df.drop(columns=['id','ca'])\n",
676
+ "display(df.head())\n",
677
+ "\n",
678
+ "for col in df.select_dtypes(include=['object']).columns:\n",
679
+ " df[col]=df[col].astype('str')\n",
680
+ " df[col]=LabelEncoder().fit_transform(df[col])\n",
681
+ "\n",
682
+ "display(df.head())"
683
+ ]
684
+ },
685
+ {
686
+ "cell_type": "code",
687
+ "execution_count": null,
688
+ "metadata": {
689
+ "colab": {
690
+ "base_uri": "https://localhost:8080/"
691
+ },
692
+ "executionInfo": {
693
+ "elapsed": 7,
694
+ "status": "ok",
695
+ "timestamp": 1741051643490,
696
+ "user": {
697
+ "displayName": "Jaison A",
698
+ "userId": "07006398627763032071"
699
+ },
700
+ "user_tz": -330
701
+ },
702
+ "id": "FPX8T0AUo4Fq",
703
+ "outputId": "3fc16702-dbb5-4472-b04c-7993d26aa753"
704
+ },
705
+ "outputs": [],
706
+ "source": [
707
+ "print(df.isnull().sum())"
708
+ ]
709
+ },
710
+ {
711
+ "cell_type": "code",
712
+ "execution_count": null,
713
+ "metadata": {
714
+ "executionInfo": {
715
+ "elapsed": 2,
716
+ "status": "ok",
717
+ "timestamp": 1741051643494,
718
+ "user": {
719
+ "displayName": "Jaison A",
720
+ "userId": "07006398627763032071"
721
+ },
722
+ "user_tz": -330
723
+ },
724
+ "id": "MfxJ653BpOt7"
725
+ },
726
+ "outputs": [],
727
+ "source": [
728
+ "df['trestbps']=df['trestbps'].fillna(df['trestbps'].mean())\n",
729
+ "df['chol']=df['chol'].fillna(df['chol'].mean())\n",
730
+ "df['thalch']=df['thalch'].fillna(df['thalch'].mean())\n",
731
+ "df['oldpeak']=df['oldpeak'].fillna(df['oldpeak'].mean())"
732
+ ]
733
+ },
734
+ {
735
+ "cell_type": "markdown",
736
+ "metadata": {
737
+ "id": "vYZLZ6dyp0uy"
738
+ },
739
+ "source": [
740
+ "**Without PCA**"
741
+ ]
742
+ },
743
+ {
744
+ "cell_type": "code",
745
+ "execution_count": null,
746
+ "metadata": {
747
+ "colab": {
748
+ "base_uri": "https://localhost:8080/"
749
+ },
750
+ "executionInfo": {
751
+ "elapsed": 48,
752
+ "status": "ok",
753
+ "timestamp": 1741051643551,
754
+ "user": {
755
+ "displayName": "Jaison A",
756
+ "userId": "07006398627763032071"
757
+ },
758
+ "user_tz": -330
759
+ },
760
+ "id": "tKjY9CGjtW_Q",
761
+ "outputId": "506e2397-7fc4-4332-f7ac-581ba4a91be7"
762
+ },
763
+ "outputs": [],
764
+ "source": [
765
+ "kmeans=KMeans(n_clusters=5,random_state=42,n_init=10)\n",
766
+ "y_pred_kmeans=kmeans.fit_predict(df.drop(columns=['num']))\n",
767
+ "\n",
768
+ "ari=adjusted_rand_score(df['num'],y_pred_kmeans)\n",
769
+ "print(ari)"
770
+ ]
771
+ },
772
+ {
773
+ "cell_type": "markdown",
774
+ "metadata": {
775
+ "id": "deVfVji5uPnB"
776
+ },
777
+ "source": [
778
+ "**With PCA**"
779
+ ]
780
+ },
781
+ {
782
+ "cell_type": "code",
783
+ "execution_count": null,
784
+ "metadata": {
785
+ "colab": {
786
+ "base_uri": "https://localhost:8080/"
787
+ },
788
+ "executionInfo": {
789
+ "elapsed": 48,
790
+ "status": "ok",
791
+ "timestamp": 1741051643607,
792
+ "user": {
793
+ "displayName": "Jaison A",
794
+ "userId": "07006398627763032071"
795
+ },
796
+ "user_tz": -330
797
+ },
798
+ "id": "_DuzNnwAuRkQ",
799
+ "outputId": "8027f17a-f0a2-4737-b390-32ee25bbda7c"
800
+ },
801
+ "outputs": [],
802
+ "source": [
803
+ "pca=PCA(n_components=2)\n",
804
+ "pca_x=pca.fit_transform(df.drop(columns=['num']))\n",
805
+ "y_pred_kmeans=kmeans.fit_predict(pca_x)\n",
806
+ "ari=adjusted_rand_score(df['num'],y_pred_kmeans)\n",
807
+ "print(ari)"
808
+ ]
809
+ },
810
+ {
811
+ "cell_type": "code",
812
+ "execution_count": null,
813
+ "metadata": {
814
+ "colab": {
815
+ "base_uri": "https://localhost:8080/",
816
+ "height": 0
817
+ },
818
+ "executionInfo": {
819
+ "elapsed": 423,
820
+ "status": "ok",
821
+ "timestamp": 1741051644051,
822
+ "user": {
823
+ "displayName": "Jaison A",
824
+ "userId": "07006398627763032071"
825
+ },
826
+ "user_tz": -330
827
+ },
828
+ "id": "kcofB6n_skcD",
829
+ "outputId": "f55e545c-6fe9-4f19-cc1b-1513d89caabc"
830
+ },
831
+ "outputs": [],
832
+ "source": [
833
+ "plt.figure(figsize=(8, 6))\n",
834
+ "scatter = plt.scatter(pca_x[:, 0], pca_x[:, 1], c=y_pred_kmeans, cmap='viridis', alpha=0.6)\n",
835
+ "plt.colorbar(scatter, label='Digit Label')\n",
836
+ "plt.xlabel('Principal Component 1')\n",
837
+ "plt.ylabel('Principal Component 2')\n",
838
+ "plt.title('K Means Clustering')\n",
839
+ "plt.show()"
840
+ ]
841
+ },
842
+ {
843
+ "cell_type": "code",
844
+ "execution_count": null,
845
+ "metadata": {
846
+ "colab": {
847
+ "base_uri": "https://localhost:8080/"
848
+ },
849
+ "executionInfo": {
850
+ "elapsed": 9,
851
+ "status": "ok",
852
+ "timestamp": 1741051644055,
853
+ "user": {
854
+ "displayName": "Jaison A",
855
+ "userId": "07006398627763032071"
856
+ },
857
+ "user_tz": -330
858
+ },
859
+ "id": "pmdXFh2Pslhu",
860
+ "outputId": "1f864500-b714-4c40-db6e-c10bf593fbb4"
861
+ },
862
+ "outputs": [],
863
+ "source": [
864
+ "X=df.drop(columns=['num'])\n",
865
+ "X_reconstructed = pca.inverse_transform(pca_x)\n",
866
+ "reconstruction_error = np.mean(np.square(X - X_reconstructed))\n",
867
+ "print(f\"Reconstruction error: {reconstruction_error:.4f}\")"
868
+ ]
869
+ },
870
+ {
871
+ "cell_type": "markdown",
872
+ "metadata": {
873
+ "id": "z5iVUmUvF3mZ"
874
+ },
875
+ "source": [
876
+ "### ***Question 5***"
877
+ ]
878
+ },
879
+ {
880
+ "cell_type": "code",
881
+ "execution_count": null,
882
+ "metadata": {
883
+ "colab": {
884
+ "base_uri": "https://localhost:8080/",
885
+ "height": 206
886
+ },
887
+ "executionInfo": {
888
+ "elapsed": 394,
889
+ "status": "ok",
890
+ "timestamp": 1741051644442,
891
+ "user": {
892
+ "displayName": "Jaison A",
893
+ "userId": "07006398627763032071"
894
+ },
895
+ "user_tz": -330
896
+ },
897
+ "id": "CJ7Y5gaLF6LQ",
898
+ "outputId": "3cbf424b-07ba-4fef-a73f-c000a5e9c074"
899
+ },
900
+ "outputs": [],
901
+ "source": [
902
+ "df=pd.read_csv('/content/drive/MyDrive/sem 6/Lab/ML Lab/SOC_LAB1/dataset/Regression/Housing.csv')\n",
903
+ "le=LabelEncoder()\n",
904
+ "for col in df.select_dtypes(include=['object']).columns:\n",
905
+ " df[col]=le.fit_transform(df[col])\n",
906
+ "display(df.head())"
907
+ ]
908
+ },
909
+ {
910
+ "cell_type": "code",
911
+ "execution_count": null,
912
+ "metadata": {
913
+ "colab": {
914
+ "base_uri": "https://localhost:8080/",
915
+ "height": 465
916
+ },
917
+ "executionInfo": {
918
+ "elapsed": 193,
919
+ "status": "ok",
920
+ "timestamp": 1741051644636,
921
+ "user": {
922
+ "displayName": "Jaison A",
923
+ "userId": "07006398627763032071"
924
+ },
925
+ "user_tz": -330
926
+ },
927
+ "id": "W0cMte6vGo91",
928
+ "outputId": "f9299ed7-2902-484a-a2c5-5c085dfc7bcb"
929
+ },
930
+ "outputs": [],
931
+ "source": [
932
+ "plt.scatter(x=df['area'],y=df['price'])\n",
933
+ "plt.xlabel('area')\n",
934
+ "plt.ylabel('price')\n",
935
+ "plt.show()"
936
+ ]
937
+ },
938
+ {
939
+ "cell_type": "code",
940
+ "execution_count": null,
941
+ "metadata": {
942
+ "colab": {
943
+ "base_uri": "https://localhost:8080/"
944
+ },
945
+ "executionInfo": {
946
+ "elapsed": 14,
947
+ "status": "ok",
948
+ "timestamp": 1741051644653,
949
+ "user": {
950
+ "displayName": "Jaison A",
951
+ "userId": "07006398627763032071"
952
+ },
953
+ "user_tz": -330
954
+ },
955
+ "id": "tBRb-1x2G8cM",
956
+ "outputId": "a915ec62-947d-4805-f162-22a2ea2d3c45"
957
+ },
958
+ "outputs": [],
959
+ "source": [
960
+ "features=[\n",
961
+ " \"area\", \"bedrooms\", \"bathrooms\", \"stories\", \"mainroad\", \"guestroom\",\n",
962
+ " \"basement\", \"hotwaterheating\", \"airconditioning\", \"parking\",\n",
963
+ " \"prefarea\", \"furnishingstatus\"\n",
964
+ "]\n",
965
+ "\n",
966
+ "\n",
967
+ "x=df[['area']]\n",
968
+ "y=df['price']\n",
969
+ "\n",
970
+ "X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)\n",
971
+ "\n",
972
+ "lg=LinearRegression()\n",
973
+ "lg.fit(X_train,y_train)\n",
974
+ "\n",
975
+ "y_pred=lg.predict(X_test)\n",
976
+ "\n",
977
+ "print(f'r2_Score : {r2_score(y_test,y_pred)}')"
978
+ ]
979
+ },
980
+ {
981
+ "cell_type": "code",
982
+ "execution_count": null,
983
+ "metadata": {
984
+ "colab": {
985
+ "base_uri": "https://localhost:8080/"
986
+ },
987
+ "executionInfo": {
988
+ "elapsed": 18,
989
+ "status": "ok",
990
+ "timestamp": 1741051644668,
991
+ "user": {
992
+ "displayName": "Jaison A",
993
+ "userId": "07006398627763032071"
994
+ },
995
+ "user_tz": -330
996
+ },
997
+ "id": "d92QBRPwHIPD",
998
+ "outputId": "24a67bdc-2b7f-4cbb-a177-0281cd54dff6"
999
+ },
1000
+ "outputs": [],
1001
+ "source": [
1002
+ "y=df[features]\n",
1003
+ "x=df['price']\n",
1004
+ "\n",
1005
+ "X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)\n",
1006
+ "\n",
1007
+ "# Reshape X_train to a 2D array\n",
1008
+ "X_train = X_train.values.reshape(-1, 1)\n",
1009
+ "X_test = X_test.values.reshape(-1, 1) # Reshape X_test as well for consistency\n",
1010
+ "\n",
1011
+ "lg=LinearRegression()\n",
1012
+ "lg.fit(X_train,y_train)\n",
1013
+ "\n",
1014
+ "y_pred=lg.predict(X_test)\n",
1015
+ "\n",
1016
+ "print(f'r2_Score : {r2_score(y_test,y_pred)}')"
1017
+ ]
1018
+ },
1019
+ {
1020
+ "cell_type": "code",
1021
+ "execution_count": null,
1022
+ "metadata": {
1023
+ "colab": {
1024
+ "base_uri": "https://localhost:8080/"
1025
+ },
1026
+ "executionInfo": {
1027
+ "elapsed": 37,
1028
+ "status": "ok",
1029
+ "timestamp": 1741051644707,
1030
+ "user": {
1031
+ "displayName": "Jaison A",
1032
+ "userId": "07006398627763032071"
1033
+ },
1034
+ "user_tz": -330
1035
+ },
1036
+ "id": "bZr_ZoedHOJI",
1037
+ "outputId": "7a9a1d5e-18d6-4656-9ab0-e247abd8a5dc"
1038
+ },
1039
+ "outputs": [],
1040
+ "source": [
1041
+ "features=[\n",
1042
+ " \"area\", \"bedrooms\", \"bathrooms\", \"stories\", \"mainroad\", \"guestroom\",\n",
1043
+ " \"basement\", \"hotwaterheating\", \"airconditioning\", \"parking\",\n",
1044
+ " \"prefarea\", \"furnishingstatus\"\n",
1045
+ "]\n",
1046
+ "X=df[features]\n",
1047
+ "vif_data = pd.DataFrame()\n",
1048
+ "vif_data[\"Feature\"] = features\n",
1049
+ "vif_data[\"VIF\"] = [variance_inflation_factor(X.values, i) for i in range(len(features))]\n",
1050
+ "print(\"\\nVariance Inflation Factor (VIF):\")\n",
1051
+ "print(vif_data)"
1052
+ ]
1053
+ }
1054
+ ],
1055
+ "metadata": {
1056
+ "colab": {
1057
+ "authorship_tag": "ABX9TyOo5KSbG35NjuMjAiytt9Xd",
1058
+ "collapsed_sections": [
1059
+ "R7euuRFaCdIZ",
1060
+ "qCogj3nw4UUy",
1061
+ "ytnBNsap7xZZ",
1062
+ "FU9VMoYaByfs",
1063
+ "vugOPwlBFzmb"
1064
+ ],
1065
+ "provenance": []
1066
+ },
1067
+ "kernelspec": {
1068
+ "display_name": "Python 3 (ipykernel)",
1069
+ "language": "python",
1070
+ "name": "python3"
1071
+ },
1072
+ "language_info": {
1073
+ "codemirror_mode": {
1074
+ "name": "ipython",
1075
+ "version": 3
1076
+ },
1077
+ "file_extension": ".py",
1078
+ "mimetype": "text/x-python",
1079
+ "name": "python",
1080
+ "nbconvert_exporter": "python",
1081
+ "pygments_lexer": "ipython3",
1082
+ "version": "3.12.4"
1083
+ }
1084
+ },
1085
+ "nbformat": 4,
1086
+ "nbformat_minor": 4
1087
+ }