noshot 8.0.0__py3-none-any.whl → 9.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/ML TS XAI/ML/Json Codes/ML LAB CIA 2.ipynb +409 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/1. Iris Dataset (Softmax vs Sigmoid).ipynb +231 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/2. Student Dataset (Overfit vs Regularized).ipynb +269 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Categorical (Overfit vs Regularized).ipynb +274 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Numerical (Overfit vs Regularized).ipynb +263 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/4. Smart House System HMM.ipynb +198 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/5. Fraud Detection System HMM.ipynb +201 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/insurance.csv +1339 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/iris1.data +151 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/student-mat.csv +396 -0
- noshot/data/ML TS XAI/ML/ML Lab H Sec/student-por.csv +650 -0
- {noshot-8.0.0.dist-info → noshot-9.0.0.dist-info}/METADATA +1 -1
- {noshot-8.0.0.dist-info → noshot-9.0.0.dist-info}/RECORD +16 -5
- {noshot-8.0.0.dist-info → noshot-9.0.0.dist-info}/WHEEL +0 -0
- {noshot-8.0.0.dist-info → noshot-9.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {noshot-8.0.0.dist-info → noshot-9.0.0.dist-info}/top_level.txt +0 -0
noshot/data/ML TS XAI/ML/ML Lab H Sec/3. Insurance Target Numerical (Overfit vs Regularized).ipynb
ADDED
@@ -0,0 +1,263 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": null,
|
6
|
+
"id": "bd31d202-9ef3-419d-85aa-478e28199448",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [],
|
9
|
+
"source": [
|
10
|
+
"import numpy as np\n",
|
11
|
+
"import pandas as pd\n",
|
12
|
+
"import matplotlib.pyplot as plt\n",
|
13
|
+
"import tensorflow as tf\n",
|
14
|
+
"from tensorflow.keras.layers import Input, Dense, Dropout\n",
|
15
|
+
"from tensorflow.keras.models import Sequential\n",
|
16
|
+
"from tensorflow.keras.regularizers import l2\n",
|
17
|
+
"from tensorflow.keras.callbacks import EarlyStopping\n",
|
18
|
+
"from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
|
19
|
+
"from sklearn.model_selection import train_test_split\n",
|
20
|
+
"from sklearn.metrics import mean_squared_error, r2_score"
|
21
|
+
]
|
22
|
+
},
|
23
|
+
{
|
24
|
+
"cell_type": "code",
|
25
|
+
"execution_count": null,
|
26
|
+
"id": "10fef1a2-2c53-42b4-b8ec-f1dae5b14a1c",
|
27
|
+
"metadata": {},
|
28
|
+
"outputs": [],
|
29
|
+
"source": [
|
30
|
+
"df = pd.read_csv('insurance.csv')\n",
|
31
|
+
"df.head()"
|
32
|
+
]
|
33
|
+
},
|
34
|
+
{
|
35
|
+
"cell_type": "code",
|
36
|
+
"execution_count": null,
|
37
|
+
"id": "6079f0b7-b6b3-493e-8a73-ff3f944db314",
|
38
|
+
"metadata": {},
|
39
|
+
"outputs": [],
|
40
|
+
"source": [
|
41
|
+
"cat_cols = ['sex', 'smoker', 'region']\n",
|
42
|
+
"df[cat_cols] = df[cat_cols].apply(LabelEncoder().fit_transform)\n",
|
43
|
+
"print(df.head())"
|
44
|
+
]
|
45
|
+
},
|
46
|
+
{
|
47
|
+
"cell_type": "code",
|
48
|
+
"execution_count": null,
|
49
|
+
"id": "4a4d03e5-a817-4b66-9ab6-d32b18fe441a",
|
50
|
+
"metadata": {},
|
51
|
+
"outputs": [],
|
52
|
+
"source": [
|
53
|
+
"X = df.drop(columns='charges')\n",
|
54
|
+
"y = df['charges']\n",
|
55
|
+
"X = StandardScaler().fit_transform(X)\n",
|
56
|
+
"X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, \n",
|
57
|
+
" random_state=42)\n",
|
58
|
+
"X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2, \n",
|
59
|
+
" random_state=42)"
|
60
|
+
]
|
61
|
+
},
|
62
|
+
{
|
63
|
+
"cell_type": "code",
|
64
|
+
"execution_count": null,
|
65
|
+
"id": "f4c58e24-e103-4d92-993c-6db9f9b57657",
|
66
|
+
"metadata": {},
|
67
|
+
"outputs": [],
|
68
|
+
"source": [
|
69
|
+
"overfit_model = Sequential([\n",
|
70
|
+
" Input(shape=(X_train.shape[1],)),\n",
|
71
|
+
" Dense(64, activation='relu'),\n",
|
72
|
+
" Dense(64, activation='relu'),\n",
|
73
|
+
" Dense(1, activation='linear'),\n",
|
74
|
+
"])"
|
75
|
+
]
|
76
|
+
},
|
77
|
+
{
|
78
|
+
"cell_type": "code",
|
79
|
+
"execution_count": null,
|
80
|
+
"id": "533969a3-9b46-4fd7-a21a-968e96821c08",
|
81
|
+
"metadata": {},
|
82
|
+
"outputs": [],
|
83
|
+
"source": [
|
84
|
+
"reg_model = Sequential([\n",
|
85
|
+
" Input(shape=(X_train.shape[1],)),\n",
|
86
|
+
" Dense(64, activation='relu'),\n",
|
87
|
+
" Dropout(0.1),\n",
|
88
|
+
" Dense(64, activation='relu', kernel_regularizer=l2(0.001)),\n",
|
89
|
+
" Dropout(0.1),\n",
|
90
|
+
" Dense(1, activation='linear'),\n",
|
91
|
+
"])"
|
92
|
+
]
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"cell_type": "code",
|
96
|
+
"execution_count": null,
|
97
|
+
"id": "c9471a5d-3c49-4531-bae6-8138c9812261",
|
98
|
+
"metadata": {},
|
99
|
+
"outputs": [],
|
100
|
+
"source": [
|
101
|
+
"print(\"\\nOverfit Model Summary:\")\n",
|
102
|
+
"overfit_model.summary()"
|
103
|
+
]
|
104
|
+
},
|
105
|
+
{
|
106
|
+
"cell_type": "code",
|
107
|
+
"execution_count": null,
|
108
|
+
"id": "3efc2e9b-2b1e-416c-888e-9ea092de9d3f",
|
109
|
+
"metadata": {},
|
110
|
+
"outputs": [],
|
111
|
+
"source": [
|
112
|
+
"print(\"\\nRegularized Model Summary:\")\n",
|
113
|
+
"reg_model.summary()"
|
114
|
+
]
|
115
|
+
},
|
116
|
+
{
|
117
|
+
"cell_type": "code",
|
118
|
+
"execution_count": null,
|
119
|
+
"id": "c76d9120-15c2-447b-9e9c-83c16c118096",
|
120
|
+
"metadata": {},
|
121
|
+
"outputs": [],
|
122
|
+
"source": [
|
123
|
+
"overfit_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])\n",
|
124
|
+
"reg_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])"
|
125
|
+
]
|
126
|
+
},
|
127
|
+
{
|
128
|
+
"cell_type": "code",
|
129
|
+
"execution_count": null,
|
130
|
+
"id": "c05813ae-dd34-4caf-b008-9e683216c891",
|
131
|
+
"metadata": {},
|
132
|
+
"outputs": [],
|
133
|
+
"source": [
|
134
|
+
"history_overfit = overfit_model.fit(\n",
|
135
|
+
" X_train, y_train,\n",
|
136
|
+
" validation_data=(X_val, y_val),\n",
|
137
|
+
" epochs=20,\n",
|
138
|
+
" batch_size=16,\n",
|
139
|
+
" verbose=1\n",
|
140
|
+
")"
|
141
|
+
]
|
142
|
+
},
|
143
|
+
{
|
144
|
+
"cell_type": "code",
|
145
|
+
"execution_count": null,
|
146
|
+
"id": "124d821f-3272-4a0b-9a24-0fedc7f7cfee",
|
147
|
+
"metadata": {},
|
148
|
+
"outputs": [],
|
149
|
+
"source": [
|
150
|
+
"history_reg = reg_model.fit(\n",
|
151
|
+
" X_train, y_train,\n",
|
152
|
+
" validation_data=(X_val, y_val),\n",
|
153
|
+
" epochs=20,\n",
|
154
|
+
" batch_size=16,\n",
|
155
|
+
" verbose=1\n",
|
156
|
+
")"
|
157
|
+
]
|
158
|
+
},
|
159
|
+
{
|
160
|
+
"cell_type": "code",
|
161
|
+
"execution_count": null,
|
162
|
+
"id": "856e0e03-f681-4061-83b9-83da2823adda",
|
163
|
+
"metadata": {},
|
164
|
+
"outputs": [],
|
165
|
+
"source": [
|
166
|
+
"plt.figure(figsize=(14, 6))\n",
|
167
|
+
"plt.subplot(1, 2, 1)\n",
|
168
|
+
"plt.plot(history_overfit.history['val_loss'], label='Overfit Val Loss', color='blue', linestyle='--')\n",
|
169
|
+
"plt.plot(history_reg.history['val_loss'], label='Regularized Val Loss', color='brown', linestyle='--')\n",
|
170
|
+
"plt.title('Model Loss Comparison (MSE)')\n",
|
171
|
+
"plt.xlabel('Epoch')\n",
|
172
|
+
"plt.ylabel('Mean Squared Error Loss')\n",
|
173
|
+
"plt.legend()\n",
|
174
|
+
"plt.grid(True)"
|
175
|
+
]
|
176
|
+
},
|
177
|
+
{
|
178
|
+
"cell_type": "code",
|
179
|
+
"execution_count": null,
|
180
|
+
"id": "f5315d68-df04-4883-b021-5b3d1be6c180",
|
181
|
+
"metadata": {},
|
182
|
+
"outputs": [],
|
183
|
+
"source": [
|
184
|
+
"plt.plot(history_overfit.history['val_mae'], label='Overfit Val MAE', color='blue')\n",
|
185
|
+
"plt.plot(history_reg.history['val_mae'], label='Regularized Val MAE', color='brown')\n",
|
186
|
+
"plt.title('Model Mean Absolute Error Comparison')\n",
|
187
|
+
"plt.xlabel('Epoch')\n",
|
188
|
+
"plt.ylabel('Mean Absolute Error')\n",
|
189
|
+
"plt.legend()\n",
|
190
|
+
"plt.grid(True)\n",
|
191
|
+
"plt.show()"
|
192
|
+
]
|
193
|
+
},
|
194
|
+
{
|
195
|
+
"cell_type": "code",
|
196
|
+
"execution_count": null,
|
197
|
+
"id": "bef16d6a-c694-495e-91b8-4b6021146b21",
|
198
|
+
"metadata": {},
|
199
|
+
"outputs": [],
|
200
|
+
"source": [
|
201
|
+
"loss_overfit, mae_overfit, mse_overfit_eval = overfit_model.evaluate(X_test, y_test, verbose=0)\n",
|
202
|
+
"y_pred_overfit = overfit_model.predict(X_test, verbose=0).flatten()\n",
|
203
|
+
"r2_overfit = r2_score(y_test, y_pred_overfit)\n",
|
204
|
+
"\n",
|
205
|
+
"loss_reg, mae_reg, mse_reg_eval = reg_model.evaluate(X_test, y_test, verbose=0)\n",
|
206
|
+
"y_pred_reg = reg_model.predict(X_test, verbose=0).flatten()\n",
|
207
|
+
"r2_reg = r2_score(y_test, y_pred_reg)"
|
208
|
+
]
|
209
|
+
},
|
210
|
+
{
|
211
|
+
"cell_type": "code",
|
212
|
+
"execution_count": null,
|
213
|
+
"id": "cce008a5-7588-4574-a2a9-5b34f12038d5",
|
214
|
+
"metadata": {},
|
215
|
+
"outputs": [],
|
216
|
+
"source": [
|
217
|
+
"print(\"\\nTest Set Performance:\")\n",
|
218
|
+
"print(f\"Overfit Model - MSE: {mse_overfit_eval:.2f}, MAE: {mae_overfit:.2f}, R2: {r2_overfit:.4f}\")\n",
|
219
|
+
"print(f\"Regularized Model - MSE: {mse_reg_eval:.2f}, MAE: {mae_reg:.2f}, R2: {r2_reg:.4f}\")"
|
220
|
+
]
|
221
|
+
},
|
222
|
+
{
|
223
|
+
"cell_type": "code",
|
224
|
+
"execution_count": null,
|
225
|
+
"id": "40c9647b-55d7-4d9a-a0ae-061f71cbec72",
|
226
|
+
"metadata": {},
|
227
|
+
"outputs": [],
|
228
|
+
"source": [
|
229
|
+
"plt.figure(figsize=(10, 7))\n",
|
230
|
+
"plt.scatter(y_test, y_pred_overfit, color='skyblue', edgecolors='k', linewidth=0.5, label=f'Overfit Model (R2={r2_overfit:.3f})')\n",
|
231
|
+
"plt.scatter(y_test, y_pred_reg, color='sandybrown', edgecolors='k', linewidth=0.5, label=f'Regularized Model (R2={r2_reg:.3f})')\n",
|
232
|
+
"plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='black', linestyle='--', linewidth=2, label='Perfect Prediction')\n",
|
233
|
+
"plt.title('Actual vs Predicted Charges (Test Set)')\n",
|
234
|
+
"plt.xlabel('Actual Charges ($)')\n",
|
235
|
+
"plt.ylabel('Predicted Charges ($)')\n",
|
236
|
+
"plt.legend()\n",
|
237
|
+
"plt.grid(True)\n",
|
238
|
+
"plt.show()"
|
239
|
+
]
|
240
|
+
}
|
241
|
+
],
|
242
|
+
"metadata": {
|
243
|
+
"kernelspec": {
|
244
|
+
"display_name": "NEW-VENV-1",
|
245
|
+
"language": "python",
|
246
|
+
"name": "new-venv-1"
|
247
|
+
},
|
248
|
+
"language_info": {
|
249
|
+
"codemirror_mode": {
|
250
|
+
"name": "ipython",
|
251
|
+
"version": 3
|
252
|
+
},
|
253
|
+
"file_extension": ".py",
|
254
|
+
"mimetype": "text/x-python",
|
255
|
+
"name": "python",
|
256
|
+
"nbconvert_exporter": "python",
|
257
|
+
"pygments_lexer": "ipython3",
|
258
|
+
"version": "3.11.5"
|
259
|
+
}
|
260
|
+
},
|
261
|
+
"nbformat": 4,
|
262
|
+
"nbformat_minor": 5
|
263
|
+
}
|
@@ -0,0 +1,198 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": null,
|
6
|
+
"id": "2d6993c0-6499-43e5-bb2e-66de7de4c05c",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [],
|
9
|
+
"source": [
|
10
|
+
"import numpy as np\n",
|
11
|
+
"import pandas as pd\n",
|
12
|
+
"import networkx as nx\n",
|
13
|
+
"from pprint import pprint\n",
|
14
|
+
"import matplotlib.pyplot as plt\n",
|
15
|
+
"from hmmlearn.hmm import CategoricalHMM"
|
16
|
+
]
|
17
|
+
},
|
18
|
+
{
|
19
|
+
"cell_type": "code",
|
20
|
+
"execution_count": null,
|
21
|
+
"id": "8809eb33-e876-4f7b-b61e-9eb9c93be95e",
|
22
|
+
"metadata": {},
|
23
|
+
"outputs": [],
|
24
|
+
"source": [
|
25
|
+
"states = ['Kitchen', 'Bedroom', 'Living Room']\n",
|
26
|
+
"hidden_states = ['Cooking', 'Sleeping', 'Watching TV']"
|
27
|
+
]
|
28
|
+
},
|
29
|
+
{
|
30
|
+
"cell_type": "code",
|
31
|
+
"execution_count": null,
|
32
|
+
"id": "d6570caa-6d4a-436b-802f-24d39e83b79f",
|
33
|
+
"metadata": {},
|
34
|
+
"outputs": [],
|
35
|
+
"source": [
|
36
|
+
"pi_states = [0.4, 0.3, 0.3]\n",
|
37
|
+
"pi_hidden = [0.1, 0.2, 0.7]"
|
38
|
+
]
|
39
|
+
},
|
40
|
+
{
|
41
|
+
"cell_type": "code",
|
42
|
+
"execution_count": null,
|
43
|
+
"id": "5f265573-be69-4218-b7af-dea81b1b80b6",
|
44
|
+
"metadata": {},
|
45
|
+
"outputs": [],
|
46
|
+
"source": [
|
47
|
+
"state_space = pd.Series(pi_states, index=states)\n",
|
48
|
+
"hidden_state_space = pd.Series(pi_hidden, index=hidden_states)"
|
49
|
+
]
|
50
|
+
},
|
51
|
+
{
|
52
|
+
"cell_type": "code",
|
53
|
+
"execution_count": null,
|
54
|
+
"id": "1e72ab96-f199-40c8-a074-1d7cced28bd3",
|
55
|
+
"metadata": {},
|
56
|
+
"outputs": [],
|
57
|
+
"source": [
|
58
|
+
"q_df = pd.DataFrame([[0.4, 0.2, 0.4], [0.45, 0.45, 0.1], [0.45, 0.25, 0.3]], \n",
|
59
|
+
" columns=states, index=states)\n",
|
60
|
+
"a_df = pd.DataFrame([[0.3, 0.5, 0.2], [0.1, 0.7, 0.2], [0.2, 0.3, 0.5]], \n",
|
61
|
+
" columns=hidden_states, index=hidden_states)\n",
|
62
|
+
"b_df = pd.DataFrame([[0.8, 0.1, 0.1], [0.1, 0.8, 0.1], [0.2, 0.1, 0.7]], \n",
|
63
|
+
" columns=states, index=hidden_states)"
|
64
|
+
]
|
65
|
+
},
|
66
|
+
{
|
67
|
+
"cell_type": "code",
|
68
|
+
"execution_count": null,
|
69
|
+
"id": "5cc63a43-7265-4804-b9ec-fe5c41712778",
|
70
|
+
"metadata": {},
|
71
|
+
"outputs": [],
|
72
|
+
"source": [
|
73
|
+
"q_df"
|
74
|
+
]
|
75
|
+
},
|
76
|
+
{
|
77
|
+
"cell_type": "code",
|
78
|
+
"execution_count": null,
|
79
|
+
"id": "d5fb306d-957d-4fcb-a0bb-9f92791e4f6f",
|
80
|
+
"metadata": {},
|
81
|
+
"outputs": [],
|
82
|
+
"source": [
|
83
|
+
"a_df"
|
84
|
+
]
|
85
|
+
},
|
86
|
+
{
|
87
|
+
"cell_type": "code",
|
88
|
+
"execution_count": null,
|
89
|
+
"id": "e33cb732-da75-4a7e-ba03-d15ba0b9b157",
|
90
|
+
"metadata": {},
|
91
|
+
"outputs": [],
|
92
|
+
"source": [
|
93
|
+
"b_df"
|
94
|
+
]
|
95
|
+
},
|
96
|
+
{
|
97
|
+
"cell_type": "code",
|
98
|
+
"execution_count": null,
|
99
|
+
"id": "a30892b0-f44b-4c2e-b3d9-93997a3884ed",
|
100
|
+
"metadata": {},
|
101
|
+
"outputs": [],
|
102
|
+
"source": [
|
103
|
+
"q = q_df.values\n",
|
104
|
+
"a = a_df.values\n",
|
105
|
+
"b = b_df.values"
|
106
|
+
]
|
107
|
+
},
|
108
|
+
{
|
109
|
+
"cell_type": "code",
|
110
|
+
"execution_count": null,
|
111
|
+
"id": "7f65fbf1-9b59-400b-8fe2-eedcc3153268",
|
112
|
+
"metadata": {},
|
113
|
+
"outputs": [],
|
114
|
+
"source": [
|
115
|
+
"def add_edges(df):\n",
|
116
|
+
" return {(idx, col): df.loc[idx, col] for idx in df.index for col in df.columns}"
|
117
|
+
]
|
118
|
+
},
|
119
|
+
{
|
120
|
+
"cell_type": "code",
|
121
|
+
"execution_count": null,
|
122
|
+
"id": "67b48793-859b-4628-9f12-b3345040e788",
|
123
|
+
"metadata": {},
|
124
|
+
"outputs": [],
|
125
|
+
"source": [
|
126
|
+
"edge_wts = add_edges(q_df)\n",
|
127
|
+
"hidden_edge_wts = add_edges(a_df)\n",
|
128
|
+
"emit_edge_wts = add_edges(b_df)"
|
129
|
+
]
|
130
|
+
},
|
131
|
+
{
|
132
|
+
"cell_type": "code",
|
133
|
+
"execution_count": null,
|
134
|
+
"id": "d9dd66a3-a331-43c6-8699-2bf81a03e600",
|
135
|
+
"metadata": {},
|
136
|
+
"outputs": [],
|
137
|
+
"source": [
|
138
|
+
"G = nx.DiGraph()\n",
|
139
|
+
"G.add_nodes_from(states + hidden_states)\n",
|
140
|
+
"\n",
|
141
|
+
"for k, v in {**hidden_edge_wts, **emit_edge_wts}.items():\n",
|
142
|
+
" G.add_edge(k[0], k[1], weight=v, label=f\"{v:.2f}\")\n",
|
143
|
+
"\n",
|
144
|
+
"pos = nx.circular_layout(G)\n",
|
145
|
+
"node_colors = ['skyblue' if node in hidden_states else 'lightgreen' for node in G]\n",
|
146
|
+
"nx.draw(G, pos, with_labels=True, arrows=True, node_color=node_colors, node_size=1000, font_size=5, font_weight='bold')\n",
|
147
|
+
"nx.draw_networkx_edge_labels(G, pos, edge_labels=nx.get_edge_attributes(G, 'label'), font_size=8)\n",
|
148
|
+
"plt.show()"
|
149
|
+
]
|
150
|
+
},
|
151
|
+
{
|
152
|
+
"cell_type": "code",
|
153
|
+
"execution_count": null,
|
154
|
+
"id": "01ba9e94-fc32-4a71-aab7-35ec817dcd62",
|
155
|
+
"metadata": {},
|
156
|
+
"outputs": [],
|
157
|
+
"source": [
|
158
|
+
"obs_seq = ['kitchen', 'bedroom', 'living room', 'kitchen', 'bedroom']\n",
|
159
|
+
"obs_map = {'kitchen': 0, 'bedroom': 1, 'living room': 2}\n",
|
160
|
+
"obs_idx = np.array([obs_map[o.lower()] for o in obs_seq]).reshape(-1, 1)\n",
|
161
|
+
"\n",
|
162
|
+
"model = CategoricalHMM(n_components=len(hidden_states))\n",
|
163
|
+
"model.startprob_ = pi_hidden\n",
|
164
|
+
"model.transmat_ = a\n",
|
165
|
+
"model.emissionprob_ = b\n",
|
166
|
+
"\n",
|
167
|
+
"logprob, path = model.decode(obs_idx, algorithm=\"viterbi\")\n",
|
168
|
+
"state_map = {0: 'Cooking', 1: 'Sleeping', 2: 'Watching TV'}\n",
|
169
|
+
"state_path = [state_map[v] for v in path]\n",
|
170
|
+
"\n",
|
171
|
+
"# Display result\n",
|
172
|
+
"result = pd.DataFrame({'Observation': obs_seq, 'Best_path': state_path})\n",
|
173
|
+
"print(result)"
|
174
|
+
]
|
175
|
+
}
|
176
|
+
],
|
177
|
+
"metadata": {
|
178
|
+
"kernelspec": {
|
179
|
+
"display_name": "NEW-VENV-1",
|
180
|
+
"language": "python",
|
181
|
+
"name": "new-venv-1"
|
182
|
+
},
|
183
|
+
"language_info": {
|
184
|
+
"codemirror_mode": {
|
185
|
+
"name": "ipython",
|
186
|
+
"version": 3
|
187
|
+
},
|
188
|
+
"file_extension": ".py",
|
189
|
+
"mimetype": "text/x-python",
|
190
|
+
"name": "python",
|
191
|
+
"nbconvert_exporter": "python",
|
192
|
+
"pygments_lexer": "ipython3",
|
193
|
+
"version": "3.11.5"
|
194
|
+
}
|
195
|
+
},
|
196
|
+
"nbformat": 4,
|
197
|
+
"nbformat_minor": 5
|
198
|
+
}
|
@@ -0,0 +1,201 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": null,
|
6
|
+
"id": "4be5c15a-adaa-4a0a-b378-c416ee136765",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [],
|
9
|
+
"source": [
|
10
|
+
"import numpy as np\n",
|
11
|
+
"import pandas as pd\n",
|
12
|
+
"import networkx as network\n",
|
13
|
+
"import matplotlib.pyplot as plot\n",
|
14
|
+
"from hmmlearn.hmm import CategoricalHMM"
|
15
|
+
]
|
16
|
+
},
|
17
|
+
{
|
18
|
+
"cell_type": "code",
|
19
|
+
"execution_count": null,
|
20
|
+
"id": "eebd9858-a356-448d-bc92-646a1871dd5a",
|
21
|
+
"metadata": {},
|
22
|
+
"outputs": [],
|
23
|
+
"source": [
|
24
|
+
"visibleStates = ['early', 'mid', 'late']\n",
|
25
|
+
"hiddenStates = ['Genuine User', 'Intruder']\n",
|
26
|
+
"\n",
|
27
|
+
"hiddenInitial = [0.9, 0.1]\n",
|
28
|
+
"visibleInitial = [0.33, 0.33, 0.33]\n",
|
29
|
+
"\n",
|
30
|
+
"hiddenTransition = pd.DataFrame([\n",
|
31
|
+
" [0.7, 0.3],\n",
|
32
|
+
" [0.4, 0.6]\n",
|
33
|
+
"], columns=hiddenStates, index=hiddenStates)\n",
|
34
|
+
"\n",
|
35
|
+
"emissionMatrix = pd.DataFrame([\n",
|
36
|
+
" [0.8, 0.1, 0.1],\n",
|
37
|
+
" [0.1, 0.3, 0.6]\n",
|
38
|
+
"], columns=visibleStates, index=hiddenStates)"
|
39
|
+
]
|
40
|
+
},
|
41
|
+
{
|
42
|
+
"cell_type": "code",
|
43
|
+
"execution_count": null,
|
44
|
+
"id": "d2f7f001-0f8a-4c26-a457-4319606b41d6",
|
45
|
+
"metadata": {},
|
46
|
+
"outputs": [],
|
47
|
+
"source": [
|
48
|
+
"hiddenTransition"
|
49
|
+
]
|
50
|
+
},
|
51
|
+
{
|
52
|
+
"cell_type": "code",
|
53
|
+
"execution_count": null,
|
54
|
+
"id": "a58b0543-e154-42e5-819f-dfd70be0916b",
|
55
|
+
"metadata": {},
|
56
|
+
"outputs": [],
|
57
|
+
"source": [
|
58
|
+
"emissionMatrix"
|
59
|
+
]
|
60
|
+
},
|
61
|
+
{
|
62
|
+
"cell_type": "code",
|
63
|
+
"execution_count": null,
|
64
|
+
"id": "c1cad5e7-a61e-4c04-98cc-920fdf763a1c",
|
65
|
+
"metadata": {},
|
66
|
+
"outputs": [],
|
67
|
+
"source": [
|
68
|
+
"transitionArray = hiddenTransition.values\n",
|
69
|
+
"emissionArray = emissionMatrix.values"
|
70
|
+
]
|
71
|
+
},
|
72
|
+
{
|
73
|
+
"cell_type": "code",
|
74
|
+
"execution_count": null,
|
75
|
+
"id": "06f63c03-c3af-4183-8be6-d04e8614976c",
|
76
|
+
"metadata": {},
|
77
|
+
"outputs": [],
|
78
|
+
"source": [
|
79
|
+
"def edgeDictionary(dataframe):\n",
|
80
|
+
" return {(row, column): dataframe.loc[row, column] \n",
|
81
|
+
" for row in dataframe.index \n",
|
82
|
+
" for column in dataframe.columns}"
|
83
|
+
]
|
84
|
+
},
|
85
|
+
{
|
86
|
+
"cell_type": "code",
|
87
|
+
"execution_count": null,
|
88
|
+
"id": "fb957ff2-a6ad-4900-b7a3-0f87bb845d1a",
|
89
|
+
"metadata": {},
|
90
|
+
"outputs": [],
|
91
|
+
"source": [
|
92
|
+
"hiddenEdges = edgeDictionary(hiddenTransition)\n",
|
93
|
+
"emissionEdges = edgeDictionary(emissionMatrix)"
|
94
|
+
]
|
95
|
+
},
|
96
|
+
{
|
97
|
+
"cell_type": "code",
|
98
|
+
"execution_count": null,
|
99
|
+
"id": "80df443e-cf00-4030-8e46-de748aeb461d",
|
100
|
+
"metadata": {},
|
101
|
+
"outputs": [],
|
102
|
+
"source": [
|
103
|
+
"graph = network.DiGraph()\n",
|
104
|
+
"graph.add_nodes_from(visibleStates + hiddenStates)\n",
|
105
|
+
"\n",
|
106
|
+
"for key, value in {**hiddenEdges, **emissionEdges}.items():\n",
|
107
|
+
" graph.add_edge(key[0], key[1], weight=value, label=f\"{value:.2f}\")\n",
|
108
|
+
"\n",
|
109
|
+
"position = network.circular_layout(graph)"
|
110
|
+
]
|
111
|
+
},
|
112
|
+
{
|
113
|
+
"cell_type": "code",
|
114
|
+
"execution_count": null,
|
115
|
+
"id": "7dfcedd7-4298-4526-8b45-638237720b35",
|
116
|
+
"metadata": {},
|
117
|
+
"outputs": [],
|
118
|
+
"source": [
|
119
|
+
"nodeColors = ['skyblue' if node in hiddenStates else 'lightgreen' for node in graph]\n",
|
120
|
+
"network.draw(graph, position, with_labels=True, arrows=True, node_color=nodeColors, node_size=1000, font_size=7, font_weight='bold')\n",
|
121
|
+
"network.draw_networkx_edge_labels(graph, position, edge_labels=network.get_edge_attributes(graph, 'label'), font_size=8)\n",
|
122
|
+
"plot.title(\"HMM State and Emission Graph\")\n",
|
123
|
+
"plot.show()"
|
124
|
+
]
|
125
|
+
},
|
126
|
+
{
|
127
|
+
"cell_type": "code",
|
128
|
+
"execution_count": null,
|
129
|
+
"id": "5bb756d0-c0d8-4509-b95b-182f30dd4a13",
|
130
|
+
"metadata": {},
|
131
|
+
"outputs": [],
|
132
|
+
"source": [
|
133
|
+
"observations = ['early', 'early', 'late', 'mid', 'early', 'late']\n",
|
134
|
+
"observationMap = {'early': 0, 'mid': 1, 'late': 2}\n",
|
135
|
+
"mappedSequence = np.array([observationMap[value.lower()] for value in observations]).reshape(-1, 1)"
|
136
|
+
]
|
137
|
+
},
|
138
|
+
{
|
139
|
+
"cell_type": "code",
|
140
|
+
"execution_count": null,
|
141
|
+
"id": "307fdcc3-aa78-4125-a37c-67f69fa5d6ea",
|
142
|
+
"metadata": {},
|
143
|
+
"outputs": [],
|
144
|
+
"source": [
|
145
|
+
"model = CategoricalHMM(n_components=len(hiddenStates), init_params=\"\")\n",
|
146
|
+
"model.startprob_ = np.array(hiddenInitial)\n",
|
147
|
+
"model.transmat_ = transitionArray\n",
|
148
|
+
"model.emissionprob_ = emissionArray\n",
|
149
|
+
"model.n_features = len(visibleStates)"
|
150
|
+
]
|
151
|
+
},
|
152
|
+
{
|
153
|
+
"cell_type": "code",
|
154
|
+
"execution_count": null,
|
155
|
+
"id": "4779218b-a7f8-4832-b5ba-b5979ba13389",
|
156
|
+
"metadata": {},
|
157
|
+
"outputs": [],
|
158
|
+
"source": [
|
159
|
+
"logValue, bestPath = model.decode(mappedSequence, algorithm=\"viterbi\")\n",
|
160
|
+
"\n",
|
161
|
+
"stateMap = {index: state for index, state in enumerate(hiddenStates)}\n",
|
162
|
+
"decodedPath = [stateMap[state] for state in bestPath]"
|
163
|
+
]
|
164
|
+
},
|
165
|
+
{
|
166
|
+
"cell_type": "code",
|
167
|
+
"execution_count": null,
|
168
|
+
"id": "b89319aa-b567-443c-98a9-60198caf1882",
|
169
|
+
"metadata": {},
|
170
|
+
"outputs": [],
|
171
|
+
"source": [
|
172
|
+
"resultFrame = pd.DataFrame({'Observation': observations, 'Predicted State': decodedPath})\n",
|
173
|
+
"print(\"\\nDecoded Path with Observations:\")\n",
|
174
|
+
"print(resultFrame)\n",
|
175
|
+
"\n",
|
176
|
+
"print(\"\\nLog Probability of Best Path:\", logValue)"
|
177
|
+
]
|
178
|
+
}
|
179
|
+
],
|
180
|
+
"metadata": {
|
181
|
+
"kernelspec": {
|
182
|
+
"display_name": "NEW-VENV-1",
|
183
|
+
"language": "python",
|
184
|
+
"name": "new-venv-1"
|
185
|
+
},
|
186
|
+
"language_info": {
|
187
|
+
"codemirror_mode": {
|
188
|
+
"name": "ipython",
|
189
|
+
"version": 3
|
190
|
+
},
|
191
|
+
"file_extension": ".py",
|
192
|
+
"mimetype": "text/x-python",
|
193
|
+
"name": "python",
|
194
|
+
"nbconvert_exporter": "python",
|
195
|
+
"pygments_lexer": "ipython3",
|
196
|
+
"version": "3.11.5"
|
197
|
+
}
|
198
|
+
},
|
199
|
+
"nbformat": 4,
|
200
|
+
"nbformat_minor": 5
|
201
|
+
}
|