noshot 7.0.0__py3-none-any.whl → 9.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/ML TS XAI/ML/CNN(Image_for_Folders_5).ipynb +201 -0
- noshot/data/ML TS XAI/ML/CNN(Image_form_Folder_2).ipynb +201 -0
- noshot/data/ML TS XAI/ML/Json Codes/ML LAB CIA 2.ipynb +409 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/1. PCA EDA.ipynb +274 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/10. CNN.ipynb +170 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 2.ipynb +1087 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 3.ipynb +178 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM 4.ipynb +185 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/11. HMM.ipynb +106 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/2. KNN.ipynb +177 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/3. LDA.ipynb +195 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/4. Linear Regression.ipynb +267 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/5. Logistic Regression.ipynb +104 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/6. Bayesian Classifier.ipynb +109 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/7. SVM.ipynb +220 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/8. MLP.ipynb +99 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/9. Ridge - Lasso.ipynb +211 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/9. Ridge Lasso 2.ipynb +99 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/Image Load Example.ipynb +118 -0
- noshot/data/ML TS XAI/ML/ML 3 (Latest)/Updated_Untitled.ipynb +603 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/1. Iris Dataset (Softmax vs Sigmoid).ipynb +231 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/2. Student Dataset (Overfit vs Regularized).ipynb +269 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Categorical (Overfit vs Regularized).ipynb +274 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Numerical (Overfit vs Regularized).ipynb +263 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/4. Smart House System HMM.ipynb +198 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/5. Fraud Detection System HMM.ipynb +201 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/insurance.csv +1339 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/iris1.data +151 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/student-mat.csv +396 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/student-por.csv +650 -0
- {noshot-7.0.0.dist-info → noshot-9.0.0.dist-info}/METADATA +1 -1
- noshot-9.0.0.dist-info/RECORD +71 -0
- noshot-7.0.0.dist-info/RECORD +0 -41
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/1. EDA-PCA (Balance Scale Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/1. EDA-PCA (Rice Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/10. HMM Veterbi.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/2. KNN (Balance Scale Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/2. KNN (Iris Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/2. KNN (Sobar-72 Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/3. LDA (Balance Scale Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/3. LDA (NPHA Doctor Visits Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/4. Linear Regression (Machine Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/4. Linear Regression (Real Estate Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/5. Logistic Regression (Magic04 Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/5. Logistic Regression (Wine Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/6. Naive Bayes Classifier (Wine Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/7. SVM (Rice Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/8. FeedForward NN (Sobar72 Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Tamilan Code → ML 1}/9. CNN (Cifar10 Dataset).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/1. PCA.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/10. CNN.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/11. HMM.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/2. KNN.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/3. LDA.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/4. Linear Regression.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/5. Logistic Regression.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/6. Naive Bayes (Titanic).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/6. Naive Bayes (Wine).ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/7. SVM Linear.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/8. SVM Non-Linear.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/9. FNN With Regularization.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/9. FNN Without Regularization.ipynb +0 -0
- /noshot/data/ML TS XAI/ML/{Whitefang Code → ML 2}/All in One Lab CIA 1 Q.ipynb +0 -0
- {noshot-7.0.0.dist-info → noshot-9.0.0.dist-info}/WHEEL +0 -0
- {noshot-7.0.0.dist-info → noshot-9.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {noshot-7.0.0.dist-info → noshot-9.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,409 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "markdown",
|
5
|
+
"metadata": {
|
6
|
+
"id": "L1Yh9i9SlaTq"
|
7
|
+
},
|
8
|
+
"source": [
|
9
|
+
"***ML LAB CIA 2***"
|
10
|
+
]
|
11
|
+
},
|
12
|
+
{
|
13
|
+
"cell_type": "markdown",
|
14
|
+
"metadata": {
|
15
|
+
"id": "oEflBCT-lgmq"
|
16
|
+
},
|
17
|
+
"source": [
|
18
|
+
"**Q1**"
|
19
|
+
]
|
20
|
+
},
|
21
|
+
{
|
22
|
+
"cell_type": "code",
|
23
|
+
"execution_count": null,
|
24
|
+
"metadata": {
|
25
|
+
"colab": {
|
26
|
+
"base_uri": "https://localhost:8080/"
|
27
|
+
},
|
28
|
+
"id": "vjDqH0JGlZlk",
|
29
|
+
"outputId": "b476e41f-2c27-413b-a6f9-a118f9bc0b05"
|
30
|
+
},
|
31
|
+
"outputs": [],
|
32
|
+
"source": [
|
33
|
+
"import numpy as np\n",
|
34
|
+
"import pandas as pd\n",
|
35
|
+
"from sklearn.model_selection import train_test_split\n",
|
36
|
+
"from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
|
37
|
+
"from tensorflow.keras.models import Sequential\n",
|
38
|
+
"from tensorflow.keras.layers import Dense\n",
|
39
|
+
"from tensorflow.keras.utils import to_categorical\n",
|
40
|
+
"\n",
|
41
|
+
"# Load the Iris dataset\n",
|
42
|
+
"from sklearn.datasets import load_iris\n",
|
43
|
+
"iris = load_iris()\n",
|
44
|
+
"X = iris.data # Features (sepal/petal dimensions)\n",
|
45
|
+
"y = iris.target # Labels (species: 0, 1, 2)\n",
|
46
|
+
"\n",
|
47
|
+
"# Preprocess data\n",
|
48
|
+
"scaler = StandardScaler()\n",
|
49
|
+
"X = scaler.fit_transform(X)\n",
|
50
|
+
"y = to_categorical(y) # One-hot encode labels for SoftMax\n",
|
51
|
+
"\n",
|
52
|
+
"# Split data into train/test sets\n",
|
53
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
54
|
+
"\n",
|
55
|
+
"# Function to build and evaluate the model\n",
|
56
|
+
"def train_model(activation='softmax'):\n",
|
57
|
+
" model = Sequential([\n",
|
58
|
+
" Dense(16, activation='relu', input_shape=(4,)), # Hidden layer\n",
|
59
|
+
" Dense(3, activation=activation) # Output layer (SoftMax or Sigmoid)\n",
|
60
|
+
" ])\n",
|
61
|
+
"\n",
|
62
|
+
" # Compile with categorical crossentropy for SoftMax, binary for Sigmoid\n",
|
63
|
+
" loss = 'categorical_crossentropy' if activation == 'softmax' else 'binary_crossentropy'\n",
|
64
|
+
" model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])\n",
|
65
|
+
"\n",
|
66
|
+
" # Train\n",
|
67
|
+
" history = model.fit(X_train, y_train, epochs=50, validation_split=0.2, verbose=0)\n",
|
68
|
+
"\n",
|
69
|
+
" # Evaluate\n",
|
70
|
+
" _, accuracy = model.evaluate(X_test, y_test, verbose=0)\n",
|
71
|
+
" print(f\"Activation: {activation}, Test Accuracy: {accuracy:.4f}\")\n",
|
72
|
+
"\n",
|
73
|
+
"# Compare SoftMax vs. Sigmoid\n",
|
74
|
+
"train_model(activation='softmax') # Use this for multi-class (correct)\n",
|
75
|
+
"train_model(activation='sigmoid') # Incorrect for multi-class (for comparison)"
|
76
|
+
]
|
77
|
+
},
|
78
|
+
{
|
79
|
+
"cell_type": "markdown",
|
80
|
+
"metadata": {
|
81
|
+
"id": "SEFVF5sllivC"
|
82
|
+
},
|
83
|
+
"source": [
|
84
|
+
"**Q2**"
|
85
|
+
]
|
86
|
+
},
|
87
|
+
{
|
88
|
+
"cell_type": "code",
|
89
|
+
"execution_count": null,
|
90
|
+
"metadata": {
|
91
|
+
"colab": {
|
92
|
+
"base_uri": "https://localhost:8080/",
|
93
|
+
"height": 522
|
94
|
+
},
|
95
|
+
"id": "pQrKim1ylkGy",
|
96
|
+
"outputId": "e2c55935-21c9-402a-aa00-03cbcffec7d3"
|
97
|
+
},
|
98
|
+
"outputs": [],
|
99
|
+
"source": [
|
100
|
+
"import numpy as np\n",
|
101
|
+
"from sklearn.model_selection import train_test_split\n",
|
102
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
103
|
+
"from tensorflow.keras.models import Sequential\n",
|
104
|
+
"from tensorflow.keras.layers import Dense, Dropout, Input\n",
|
105
|
+
"from tensorflow.keras.regularizers import l2\n",
|
106
|
+
"import matplotlib.pyplot as plt\n",
|
107
|
+
"\n",
|
108
|
+
"# Generate synthetic data\n",
|
109
|
+
"np.random.seed(42)\n",
|
110
|
+
"X = np.random.rand(1000, 5) # 5 socio-economic features\n",
|
111
|
+
"y = X.dot(np.random.rand(5)) + np.random.rand(1000) * 0.1 # Grades (0-1 scale)\n",
|
112
|
+
"\n",
|
113
|
+
"# Split data\n",
|
114
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
115
|
+
"\n",
|
116
|
+
"# Standardize\n",
|
117
|
+
"scaler = StandardScaler()\n",
|
118
|
+
"X_train = scaler.fit_transform(X_train)\n",
|
119
|
+
"X_test = scaler.transform(X_test)\n",
|
120
|
+
"\n",
|
121
|
+
"# Build model (with optional regularization)\n",
|
122
|
+
"def build_model(use_regularization=False):\n",
|
123
|
+
" model = Sequential()\n",
|
124
|
+
" model.add(Input(shape=(5,))) # Explicit input layer\n",
|
125
|
+
"\n",
|
126
|
+
" # Hidden layers with conditional L2/dropout\n",
|
127
|
+
" reg = l2(0.01) if use_regularization else None\n",
|
128
|
+
" model.add(Dense(128, activation='relu', kernel_regularizer=reg))\n",
|
129
|
+
" model.add(Dense(128, activation='relu', kernel_regularizer=reg))\n",
|
130
|
+
" if use_regularization:\n",
|
131
|
+
" model.add(Dropout(0.5)) # Only add dropout if regularization is enabled\n",
|
132
|
+
"\n",
|
133
|
+
" model.add(Dense(1)) # Output layer (linear for regression)\n",
|
134
|
+
" model.compile(optimizer='adam', loss='mse', metrics=['mae'])\n",
|
135
|
+
" return model\n",
|
136
|
+
"\n",
|
137
|
+
"# Train without regularization (overfit)\n",
|
138
|
+
"model_no_reg = build_model(use_regularization=False)\n",
|
139
|
+
"history_no_reg = model_no_reg.fit(X_train, y_train, epochs=100,\n",
|
140
|
+
" validation_split=0.2, verbose=0)\n",
|
141
|
+
"\n",
|
142
|
+
"# Train with dropout + L2 (regularized)\n",
|
143
|
+
"model_with_reg = build_model(use_regularization=True)\n",
|
144
|
+
"history_with_reg = model_with_reg.fit(X_train, y_train, epochs=100,\n",
|
145
|
+
" validation_split=0.2, verbose=0)\n",
|
146
|
+
"\n",
|
147
|
+
"# Plot results\n",
|
148
|
+
"plt.figure(figsize=(10, 5))\n",
|
149
|
+
"plt.plot(history_no_reg.history['val_loss'], label='No Regularization', linestyle='--')\n",
|
150
|
+
"plt.plot(history_with_reg.history['val_loss'], label='With Dropout + L2', linestyle='--')\n",
|
151
|
+
"plt.xlabel('Epochs')\n",
|
152
|
+
"plt.ylabel('Validation Loss (MSE)')\n",
|
153
|
+
"plt.legend()\n",
|
154
|
+
"plt.title('Overfitting Mitigation with Regularization')\n",
|
155
|
+
"plt.show()\n",
|
156
|
+
"\n",
|
157
|
+
"# Test performance\n",
|
158
|
+
"print(\"Test MAE (No Regularization):\", model_no_reg.evaluate(X_test, y_test, verbose=0)[1])\n",
|
159
|
+
"print(\"Test MAE (With Regularization):\", model_with_reg.evaluate(X_test, y_test, verbose=0)[1])"
|
160
|
+
]
|
161
|
+
},
|
162
|
+
{
|
163
|
+
"cell_type": "markdown",
|
164
|
+
"metadata": {
|
165
|
+
"id": "nd5WFd9TloOS"
|
166
|
+
},
|
167
|
+
"source": [
|
168
|
+
"**Q3**"
|
169
|
+
]
|
170
|
+
},
|
171
|
+
{
|
172
|
+
"cell_type": "code",
|
173
|
+
"execution_count": null,
|
174
|
+
"metadata": {
|
175
|
+
"colab": {
|
176
|
+
"base_uri": "https://localhost:8080/",
|
177
|
+
"height": 576
|
178
|
+
},
|
179
|
+
"id": "vx4k9Z8-lprC",
|
180
|
+
"outputId": "6de49e13-d025-453c-f84c-f316bea51680"
|
181
|
+
},
|
182
|
+
"outputs": [],
|
183
|
+
"source": [
|
184
|
+
"import numpy as np\n",
|
185
|
+
"import pandas as pd\n",
|
186
|
+
"from sklearn.model_selection import train_test_split\n",
|
187
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
188
|
+
"from tensorflow.keras.models import Sequential\n",
|
189
|
+
"from tensorflow.keras.layers import Dense, Dropout\n",
|
190
|
+
"from tensorflow.keras.regularizers import l2\n",
|
191
|
+
"import matplotlib.pyplot as plt\n",
|
192
|
+
"\n",
|
193
|
+
"# Generate synthetic insurance claim data\n",
|
194
|
+
"np.random.seed(42)\n",
|
195
|
+
"n_samples = 1000\n",
|
196
|
+
"X = np.random.rand(n_samples, 10) # 10 features (e.g., age, BMI, medical history)\n",
|
197
|
+
"y = X.dot(np.random.rand(10)) * 10000 + np.random.randn(n_samples) * 500 # Claim amounts ($)\n",
|
198
|
+
"\n",
|
199
|
+
"# Split into train/test\n",
|
200
|
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
201
|
+
"\n",
|
202
|
+
"# Standardize features\n",
|
203
|
+
"scaler = StandardScaler()\n",
|
204
|
+
"X_train = scaler.fit_transform(X_train)\n",
|
205
|
+
"X_test = scaler.transform(X_test)\n",
|
206
|
+
"\n",
|
207
|
+
"# Function to build and train the model\n",
|
208
|
+
"def train_model(use_regularization=False):\n",
|
209
|
+
" model = Sequential()\n",
|
210
|
+
" model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))\n",
|
211
|
+
" model.add(Dense(256, activation='relu'))\n",
|
212
|
+
" model.add(Dense(128, activation='relu'))\n",
|
213
|
+
"\n",
|
214
|
+
" if use_regularization:\n",
|
215
|
+
" model.add(Dropout(0.5))\n",
|
216
|
+
" model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))\n",
|
217
|
+
" else:\n",
|
218
|
+
" model.add(Dense(64, activation='relu'))\n",
|
219
|
+
"\n",
|
220
|
+
" model.add(Dense(1)) # Output layer for regression\n",
|
221
|
+
"\n",
|
222
|
+
" model.compile(optimizer='adam', loss='mse', metrics=['mae'])\n",
|
223
|
+
"\n",
|
224
|
+
" history = model.fit(X_train, y_train, epochs=100,\n",
|
225
|
+
" validation_split=0.2, verbose=0)\n",
|
226
|
+
" return model, history\n",
|
227
|
+
"\n",
|
228
|
+
"# Intentionally overfit (no regularization)\n",
|
229
|
+
"model_overfit, history_overfit = train_model(use_regularization=False)\n",
|
230
|
+
"\n",
|
231
|
+
"# Apply regularization (dropout + L2)\n",
|
232
|
+
"model_reg, history_reg = train_model(use_regularization=True)\n",
|
233
|
+
"\n",
|
234
|
+
"# Plot training vs validation loss\n",
|
235
|
+
"plt.figure(figsize=(10, 5))\n",
|
236
|
+
"plt.plot(history_overfit.history['loss'], label='Train (Overfit)')\n",
|
237
|
+
"plt.plot(history_overfit.history['val_loss'], label='Validation (Overfit)', linestyle='--')\n",
|
238
|
+
"plt.plot(history_reg.history['val_loss'], label='Validation (Regularized)', linestyle='--')\n",
|
239
|
+
"plt.xlabel('Epochs')\n",
|
240
|
+
"plt.ylabel('Loss (MSE)')\n",
|
241
|
+
"plt.legend()\n",
|
242
|
+
"plt.title('Overfitting vs. Regularization')\n",
|
243
|
+
"plt.show()\n",
|
244
|
+
"\n",
|
245
|
+
"# Evaluate on test data\n",
|
246
|
+
"print(\"Test MAE (Overfit Model): ${:,.2f}\".format(model_overfit.evaluate(X_test, y_test, verbose=0)[1]))\n",
|
247
|
+
"print(\"Test MAE (Regularized Model): ${:,.2f}\".format(model_reg.evaluate(X_test, y_test, verbose=0)[1]))"
|
248
|
+
]
|
249
|
+
},
|
250
|
+
{
|
251
|
+
"cell_type": "markdown",
|
252
|
+
"metadata": {
|
253
|
+
"id": "yNu_4025lsQZ"
|
254
|
+
},
|
255
|
+
"source": [
|
256
|
+
"**Q4**"
|
257
|
+
]
|
258
|
+
},
|
259
|
+
{
|
260
|
+
"cell_type": "code",
|
261
|
+
"execution_count": null,
|
262
|
+
"metadata": {
|
263
|
+
"colab": {
|
264
|
+
"base_uri": "https://localhost:8080/"
|
265
|
+
},
|
266
|
+
"id": "d0uyyJUlltZy",
|
267
|
+
"outputId": "fa5a6938-fd8e-4873-b45c-974b776b3eaf"
|
268
|
+
},
|
269
|
+
"outputs": [],
|
270
|
+
"source": [
|
271
|
+
"import numpy as np\n",
|
272
|
+
"from hmmlearn import hmm\n",
|
273
|
+
"import matplotlib.pyplot as plt\n",
|
274
|
+
"\n",
|
275
|
+
"# Define the hidden states and observations\n",
|
276
|
+
"states = [\"Cooking\", \"Sleeping\", \"Watching TV\"]\n",
|
277
|
+
"observations = [\"kitchen\", \"bedroom\", \"living room\"]\n",
|
278
|
+
"\n",
|
279
|
+
"# Create simulated sensor data sequences\n",
|
280
|
+
"# Each sequence is a day's worth of room observations\n",
|
281
|
+
"room_sequences = [\n",
|
282
|
+
" ['kitchen', 'bedroom', 'living room', 'kitchen', 'bedroom'],\n",
|
283
|
+
" ['kitchen', 'living room', 'living room', 'bedroom', 'bedroom'],\n",
|
284
|
+
" ['living room', 'kitchen', 'bedroom', 'kitchen', 'bedroom'],\n",
|
285
|
+
" ['bedroom', 'bedroom', 'living room', 'kitchen', 'living room']\n",
|
286
|
+
"]\n",
|
287
|
+
"\n",
|
288
|
+
"# Convert observations to numerical values\n",
|
289
|
+
"obs_map = {obs: i for i, obs in enumerate(observations)}\n",
|
290
|
+
"num_sequences = len(room_sequences)\n",
|
291
|
+
"sequence_lengths = [len(seq) for seq in room_sequences]\n",
|
292
|
+
"X = np.concatenate([[obs_map[obs] for obs in seq] for seq in room_sequences]).reshape(-1, 1)\n",
|
293
|
+
"\n",
|
294
|
+
"# Build and train the HMM\n",
|
295
|
+
"model = hmm.CategoricalHMM(n_components=len(states), random_state=42)\n",
|
296
|
+
"model.fit(X, lengths=sequence_lengths)\n",
|
297
|
+
"\n",
|
298
|
+
"# Print learned parameters\n",
|
299
|
+
"print(\"Start Probabilities:\", model.startprob_)\n",
|
300
|
+
"print(\"\\nTransition Matrix:\")\n",
|
301
|
+
"print(model.transmat_)\n",
|
302
|
+
"print(\"\\nEmission Probabilities:\")\n",
|
303
|
+
"print(model.emissionprob_)\n",
|
304
|
+
"\n",
|
305
|
+
"# Predict activities for a new sequence\n",
|
306
|
+
"new_sequence = ['kitchen', 'living room', 'bedroom', 'kitchen']\n",
|
307
|
+
"numeric_seq = np.array([obs_map[obs] for obs in new_sequence]).reshape(-1, 1)\n",
|
308
|
+
"predicted_states = model.predict(numeric_seq)\n",
|
309
|
+
"\n",
|
310
|
+
"print(\"\\nPredicted Activities:\")\n",
|
311
|
+
"for obs, state in zip(new_sequence, predicted_states):\n",
|
312
|
+
" print(f\"{obs} -> {states[state]}\")"
|
313
|
+
]
|
314
|
+
},
|
315
|
+
{
|
316
|
+
"cell_type": "markdown",
|
317
|
+
"metadata": {
|
318
|
+
"id": "mbmjRZU6lvOB"
|
319
|
+
},
|
320
|
+
"source": [
|
321
|
+
"**Q5**"
|
322
|
+
]
|
323
|
+
},
|
324
|
+
{
|
325
|
+
"cell_type": "code",
|
326
|
+
"execution_count": null,
|
327
|
+
"metadata": {
|
328
|
+
"colab": {
|
329
|
+
"base_uri": "https://localhost:8080/"
|
330
|
+
},
|
331
|
+
"id": "c4WLDsM_lyk6",
|
332
|
+
"outputId": "5c54c8ac-8e0b-4c81-c5a0-b77cdb01af4f"
|
333
|
+
},
|
334
|
+
"outputs": [],
|
335
|
+
"source": [
|
336
|
+
"import numpy as np\n",
|
337
|
+
"from hmmlearn import hmm\n",
|
338
|
+
"\n",
|
339
|
+
"# Define states and observations\n",
|
340
|
+
"states = [\"Genuine\", \"Intruder\"]\n",
|
341
|
+
"observations = [\"early\", \"mid\", \"late\"] # Login times\n",
|
342
|
+
"\n",
|
343
|
+
"# Simulated login sequences (each sequence is a separate user's login pattern)\n",
|
344
|
+
"sequences = [\n",
|
345
|
+
" ['early', 'early', 'mid', 'early', 'mid'], # Genuine user 1\n",
|
346
|
+
" ['late', 'late', 'early', 'late', 'late'], # Intruder 1\n",
|
347
|
+
" ['early', 'mid', 'early', 'mid', 'early'], # Genuine user 2\n",
|
348
|
+
" ['mid', 'late', 'late', 'mid', 'late'], # Intruder 2\n",
|
349
|
+
" ['early', 'early', 'early', 'mid', 'early'], # Genuine user 3\n",
|
350
|
+
" ['late', 'mid', 'late', 'late', 'mid'] # Intruder 3\n",
|
351
|
+
"]\n",
|
352
|
+
"\n",
|
353
|
+
"# Convert to numerical values and proper format\n",
|
354
|
+
"obs_map = {obs: i for i, obs in enumerate(observations)}\n",
|
355
|
+
"X = np.concatenate([[[obs_map[obs]] for obs in seq] for seq in sequences])\n",
|
356
|
+
"lengths = [len(seq) for seq in sequences] # All lengths are 5 in this case\n",
|
357
|
+
"\n",
|
358
|
+
"# Train HMM\n",
|
359
|
+
"model = hmm.CategoricalHMM(\n",
|
360
|
+
" n_components=len(states),\n",
|
361
|
+
" random_state=42 # Increased iterations for better convergence\n",
|
362
|
+
")\n",
|
363
|
+
"model.fit(X, lengths=lengths)\n",
|
364
|
+
"\n",
|
365
|
+
"# Print learned parameters\n",
|
366
|
+
"print(\"Start Probabilities (Genuine vs Intruder):\\n\", model.startprob_)\n",
|
367
|
+
"print(\"\\nTransition Matrix:\\n\", model.transmat_)\n",
|
368
|
+
"print(\"\\nEmission Probabilities (Time of Day):\\n\", model.emissionprob_)\n",
|
369
|
+
"\n",
|
370
|
+
"# Predict on new sequences\n",
|
371
|
+
"test_sequences = [\n",
|
372
|
+
" ['early', 'mid', 'early', 'mid', 'early'], # Likely genuine\n",
|
373
|
+
" ['late', 'late', 'mid', 'late', 'late'], # Likely intruder\n",
|
374
|
+
"]\n",
|
375
|
+
"\n",
|
376
|
+
"for seq in test_sequences:\n",
|
377
|
+
" numeric_seq = np.array([[obs_map[obs]] for obs in seq])\n",
|
378
|
+
" logprob, state_sequence = model.decode(numeric_seq)\n",
|
379
|
+
" print(f\"\\nSequence: {seq}\")\n",
|
380
|
+
" print(\"Predicted States:\", [states[i] for i in state_sequence])\n",
|
381
|
+
" print(\"Log Probability:\", logprob)"
|
382
|
+
]
|
383
|
+
}
|
384
|
+
],
|
385
|
+
"metadata": {
|
386
|
+
"colab": {
|
387
|
+
"provenance": []
|
388
|
+
},
|
389
|
+
"kernelspec": {
|
390
|
+
"display_name": "Python 3 (ipykernel)",
|
391
|
+
"language": "python",
|
392
|
+
"name": "python3"
|
393
|
+
},
|
394
|
+
"language_info": {
|
395
|
+
"codemirror_mode": {
|
396
|
+
"name": "ipython",
|
397
|
+
"version": 3
|
398
|
+
},
|
399
|
+
"file_extension": ".py",
|
400
|
+
"mimetype": "text/x-python",
|
401
|
+
"name": "python",
|
402
|
+
"nbconvert_exporter": "python",
|
403
|
+
"pygments_lexer": "ipython3",
|
404
|
+
"version": "3.12.4"
|
405
|
+
}
|
406
|
+
},
|
407
|
+
"nbformat": 4,
|
408
|
+
"nbformat_minor": 4
|
409
|
+
}
|
@@ -0,0 +1,274 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "markdown",
|
5
|
+
"id": "d96121f7",
|
6
|
+
"metadata": {},
|
7
|
+
"source": [
|
8
|
+
"replaced matplot with seaborn\n",
|
9
|
+
"rotated plt.xticks(rotation=90)\n",
|
10
|
+
"scatterplot for city-mpg vs highwaympg instead of bar plot\n",
|
11
|
+
"Added a Missing default dict for all values\n",
|
12
|
+
"taken length,width,height for 2 Component PCA"
|
13
|
+
]
|
14
|
+
},
|
15
|
+
{
|
16
|
+
"cell_type": "code",
|
17
|
+
"execution_count": null,
|
18
|
+
"id": "fb8ab134",
|
19
|
+
"metadata": {},
|
20
|
+
"outputs": [],
|
21
|
+
"source": [
|
22
|
+
"import matplotlib.pyplot as plt\n",
|
23
|
+
"import pandas as pd\n",
|
24
|
+
"import numpy as np"
|
25
|
+
]
|
26
|
+
},
|
27
|
+
{
|
28
|
+
"cell_type": "code",
|
29
|
+
"execution_count": null,
|
30
|
+
"id": "7b0ed5b3",
|
31
|
+
"metadata": {},
|
32
|
+
"outputs": [],
|
33
|
+
"source": [
|
34
|
+
"df = pd.read_csv(\"Automobile_data.csv\")"
|
35
|
+
]
|
36
|
+
},
|
37
|
+
{
|
38
|
+
"cell_type": "code",
|
39
|
+
"execution_count": null,
|
40
|
+
"id": "fec34f55-5fe5-4fb6-bcc0-f4055bb13b1b",
|
41
|
+
"metadata": {},
|
42
|
+
"outputs": [],
|
43
|
+
"source": []
|
44
|
+
},
|
45
|
+
{
|
46
|
+
"cell_type": "code",
|
47
|
+
"execution_count": null,
|
48
|
+
"id": "184f6841-fc52-4ca0-b11f-b05917f9318f",
|
49
|
+
"metadata": {},
|
50
|
+
"outputs": [],
|
51
|
+
"source": [
|
52
|
+
"df.describe()"
|
53
|
+
]
|
54
|
+
},
|
55
|
+
{
|
56
|
+
"cell_type": "code",
|
57
|
+
"execution_count": null,
|
58
|
+
"id": "ef71509f",
|
59
|
+
"metadata": {},
|
60
|
+
"outputs": [],
|
61
|
+
"source": [
|
62
|
+
"df.head"
|
63
|
+
]
|
64
|
+
},
|
65
|
+
{
|
66
|
+
"cell_type": "code",
|
67
|
+
"execution_count": null,
|
68
|
+
"id": "fdf19690",
|
69
|
+
"metadata": {},
|
70
|
+
"outputs": [],
|
71
|
+
"source": [
|
72
|
+
"from collections import defaultdict\n",
|
73
|
+
"Missing = defaultdict(int)\n",
|
74
|
+
"for x in df:\n",
|
75
|
+
" for y in df[x]:\n",
|
76
|
+
" if y == \"?\":\n",
|
77
|
+
" Missing[x] +=1"
|
78
|
+
]
|
79
|
+
},
|
80
|
+
{
|
81
|
+
"cell_type": "code",
|
82
|
+
"execution_count": null,
|
83
|
+
"id": "086900c0",
|
84
|
+
"metadata": {},
|
85
|
+
"outputs": [],
|
86
|
+
"source": [
|
87
|
+
"Missing"
|
88
|
+
]
|
89
|
+
},
|
90
|
+
{
|
91
|
+
"cell_type": "code",
|
92
|
+
"execution_count": null,
|
93
|
+
"id": "dd6202b2",
|
94
|
+
"metadata": {},
|
95
|
+
"outputs": [],
|
96
|
+
"source": [
|
97
|
+
"df.shape"
|
98
|
+
]
|
99
|
+
},
|
100
|
+
{
|
101
|
+
"cell_type": "code",
|
102
|
+
"execution_count": null,
|
103
|
+
"id": "a1db07e9",
|
104
|
+
"metadata": {},
|
105
|
+
"outputs": [],
|
106
|
+
"source": [
|
107
|
+
"df.dtypes"
|
108
|
+
]
|
109
|
+
},
|
110
|
+
{
|
111
|
+
"cell_type": "code",
|
112
|
+
"execution_count": null,
|
113
|
+
"id": "173511bb",
|
114
|
+
"metadata": {},
|
115
|
+
"outputs": [],
|
116
|
+
"source": [
|
117
|
+
"plt.figure(figsize=(12,6))\n",
|
118
|
+
"sns.countplot(x='make', data=df)\n",
|
119
|
+
"plt.xlabel('Make')\n",
|
120
|
+
"plt.ylabel('Distribution')\n",
|
121
|
+
"plt.xticks(rotation=60)\n",
|
122
|
+
"plt.title('Make Distribution')"
|
123
|
+
]
|
124
|
+
},
|
125
|
+
{
|
126
|
+
"cell_type": "code",
|
127
|
+
"execution_count": null,
|
128
|
+
"id": "a2b24412",
|
129
|
+
"metadata": {},
|
130
|
+
"outputs": [],
|
131
|
+
"source": [
|
132
|
+
"plt.figure(figsize=(12,6))\n",
|
133
|
+
"sns.countplot(x='make', data=df)\n",
|
134
|
+
"plt.xlabel('Aspiration')\n",
|
135
|
+
"plt.ylabel('Distribution')\n",
|
136
|
+
"plt.xticks(rotation=60)\n",
|
137
|
+
"plt.title('Aspiration Distribution')"
|
138
|
+
]
|
139
|
+
},
|
140
|
+
{
|
141
|
+
"cell_type": "code",
|
142
|
+
"execution_count": null,
|
143
|
+
"id": "8691ca0d",
|
144
|
+
"metadata": {},
|
145
|
+
"outputs": [],
|
146
|
+
"source": [
|
147
|
+
"plt.figure(figsize=(12,6))\n",
|
148
|
+
"sns.lineplot(data=df[\"wheel-base\"], linewidth=2.5,color=\"orange\")\n",
|
149
|
+
"plt.xlabel('Wheel Base')\n",
|
150
|
+
"plt.ylabel('Distribution')\n",
|
151
|
+
"plt.title('Wheel Base Distribution')"
|
152
|
+
]
|
153
|
+
},
|
154
|
+
{
|
155
|
+
"cell_type": "code",
|
156
|
+
"execution_count": null,
|
157
|
+
"id": "a955e558",
|
158
|
+
"metadata": {},
|
159
|
+
"outputs": [],
|
160
|
+
"source": [
|
161
|
+
"plt.figure(figsize=(12,6))\n",
|
162
|
+
"sns.scatterplot(x=df[\"city-mpg\"], y=df[\"highway-mpg\"])\n",
|
163
|
+
"plt.xlabel('Wheel Base')\n",
|
164
|
+
"plt.ylabel('Distribution')\n",
|
165
|
+
"plt.title('Wheel Base Distribution')"
|
166
|
+
]
|
167
|
+
},
|
168
|
+
{
|
169
|
+
"cell_type": "code",
|
170
|
+
"execution_count": null,
|
171
|
+
"id": "9fa0a175",
|
172
|
+
"metadata": {},
|
173
|
+
"outputs": [],
|
174
|
+
"source": [
|
175
|
+
"x = df.iloc[:,10:13]"
|
176
|
+
]
|
177
|
+
},
|
178
|
+
{
|
179
|
+
"cell_type": "code",
|
180
|
+
"execution_count": null,
|
181
|
+
"id": "6ab98574",
|
182
|
+
"metadata": {},
|
183
|
+
"outputs": [],
|
184
|
+
"source": [
|
185
|
+
"x"
|
186
|
+
]
|
187
|
+
},
|
188
|
+
{
|
189
|
+
"cell_type": "code",
|
190
|
+
"execution_count": null,
|
191
|
+
"id": "264750f2",
|
192
|
+
"metadata": {},
|
193
|
+
"outputs": [],
|
194
|
+
"source": [
|
195
|
+
"df[\"body-style\"].unique()"
|
196
|
+
]
|
197
|
+
},
|
198
|
+
{
|
199
|
+
"cell_type": "code",
|
200
|
+
"execution_count": null,
|
201
|
+
"id": "8008a442",
|
202
|
+
"metadata": {},
|
203
|
+
"outputs": [],
|
204
|
+
"source": [
|
205
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
206
|
+
"from sklearn.decomposition import PCA\n",
|
207
|
+
"\n",
|
208
|
+
"y = df[\"body-style\"]\n",
|
209
|
+
"x = StandardScaler().fit_transform(x)\n",
|
210
|
+
"pca = PCA(n_components=2)\n",
|
211
|
+
"pct = pca.fit_transform(x)\n",
|
212
|
+
"\n",
|
213
|
+
"principal_df = pd.DataFrame(pct,columns=['pc1','pc2'])\n",
|
214
|
+
"print(\"principal-df:\\n\",principal_df)\n",
|
215
|
+
"\n",
|
216
|
+
"finaldf= pd.concat([principal_df,df[\"body-style\"]],axis=1)\n",
|
217
|
+
"print(\"finaldf:\\n\",finaldf)\n",
|
218
|
+
"\n",
|
219
|
+
"finaldf.head()"
|
220
|
+
]
|
221
|
+
},
|
222
|
+
{
|
223
|
+
"cell_type": "code",
|
224
|
+
"execution_count": null,
|
225
|
+
"id": "17a39e75",
|
226
|
+
"metadata": {},
|
227
|
+
"outputs": [],
|
228
|
+
"source": [
|
229
|
+
"\n",
|
230
|
+
"fig = plt.figure(figsize = (8,8))\n",
|
231
|
+
"ax = fig.add_subplot(1,1,1)\n",
|
232
|
+
"ax.set_xlabel('Principal Component 1', fontsize = 15)\n",
|
233
|
+
"ax.set_ylabel('Principal Component 2', fontsize = 15)\n",
|
234
|
+
"ax.set_title('2 component PCA', fontsize = 20)\n",
|
235
|
+
"targets = df[\"body-style\"].unique()\n",
|
236
|
+
"colors = ['r', 'g','b','orange','yellow']\n",
|
237
|
+
"for target, color in zip(targets,colors):\n",
|
238
|
+
" indicesToKeep = finaldf[\"body-style\"] == target\n",
|
239
|
+
" ax.scatter(finaldf.loc[indicesToKeep, 'pc1'], finaldf.loc[indicesToKeep, 'pc2'], c = color, s = 50)\n",
|
240
|
+
" ax.legend(['Front','Rear'])\n",
|
241
|
+
" ax.grid()"
|
242
|
+
]
|
243
|
+
},
|
244
|
+
{
|
245
|
+
"cell_type": "code",
|
246
|
+
"execution_count": null,
|
247
|
+
"id": "9eccee2c",
|
248
|
+
"metadata": {},
|
249
|
+
"outputs": [],
|
250
|
+
"source": []
|
251
|
+
}
|
252
|
+
],
|
253
|
+
"metadata": {
|
254
|
+
"kernelspec": {
|
255
|
+
"display_name": "Python 3 (ipykernel)",
|
256
|
+
"language": "python",
|
257
|
+
"name": "python3"
|
258
|
+
},
|
259
|
+
"language_info": {
|
260
|
+
"codemirror_mode": {
|
261
|
+
"name": "ipython",
|
262
|
+
"version": 3
|
263
|
+
},
|
264
|
+
"file_extension": ".py",
|
265
|
+
"mimetype": "text/x-python",
|
266
|
+
"name": "python",
|
267
|
+
"nbconvert_exporter": "python",
|
268
|
+
"pygments_lexer": "ipython3",
|
269
|
+
"version": "3.12.4"
|
270
|
+
}
|
271
|
+
},
|
272
|
+
"nbformat": 4,
|
273
|
+
"nbformat_minor": 5
|
274
|
+
}
|