noshot 1.0.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. noshot/data/ML TS XAI/TS/bill-charge.ipynb +239 -0
  2. noshot/data/ML TS XAI/{XAI/XAI 2/Exp-3 (EDA-loan).ipynb → TS/daily-min-temperatures.ipynb } +68 -50
  3. noshot/data/ML TS XAI/TS/data/bill-data.csv +21 -0
  4. noshot/data/ML TS XAI/TS/data/daily-min-temperatures.csv +3651 -0
  5. noshot/data/ML TS XAI/TS/data/monthly-sunspots.csv +2821 -0
  6. noshot/data/ML TS XAI/TS/monthly-sunspots.ipynb +241 -0
  7. {noshot-1.0.0.dist-info → noshot-2.0.0.dist-info}/METADATA +1 -1
  8. noshot-2.0.0.dist-info/RECORD +15 -0
  9. {noshot-1.0.0.dist-info → noshot-2.0.0.dist-info}/WHEEL +1 -1
  10. noshot/data/ML TS XAI/TS/10. Seasonal ARIMA Forecasting.ipynb +0 -246
  11. noshot/data/ML TS XAI/TS/11. Multivariate ARIMA Forecasting.ipynb +0 -228
  12. noshot/data/ML TS XAI/TS/6. ACF PACF.ipynb +0 -77
  13. noshot/data/ML TS XAI/TS/7. Differencing.ipynb +0 -167
  14. noshot/data/ML TS XAI/TS/8. ARMA Forecasting.ipynb +0 -197
  15. noshot/data/ML TS XAI/TS/9. ARIMA Forecasting.ipynb +0 -220
  16. noshot/data/ML TS XAI/XAI/XAI 1/EDA2_chipsdatset.ipynb +0 -633
  17. noshot/data/ML TS XAI/XAI/XAI 1/EDA_IRISH_8thjan.ipynb +0 -326
  18. noshot/data/ML TS XAI/XAI/XAI 1/XAI_EX1 MODEL BIAS (FINAL).ipynb +0 -487
  19. noshot/data/ML TS XAI/XAI/XAI 1/complete_guide_to_eda_on_text_data.ipynb +0 -845
  20. noshot/data/ML TS XAI/XAI/XAI 1/deepchecksframeworks.ipynb +0 -100
  21. noshot/data/ML TS XAI/XAI/XAI 1/deepexplainers (mnist).ipynb +0 -90
  22. noshot/data/ML TS XAI/XAI/XAI 1/guidedbackpropagation.ipynb +0 -203
  23. noshot/data/ML TS XAI/XAI/XAI 1/updated_image_EDA1_with_LRP.ipynb +0 -3998
  24. noshot/data/ML TS XAI/XAI/XAI 1/zebrastripes.ipynb +0 -271
  25. noshot/data/ML TS XAI/XAI/XAI 2/EXP_5.ipynb +0 -1545
  26. noshot/data/ML TS XAI/XAI/XAI 2/Exp-3 (EDA-movie).ipynb +0 -229
  27. noshot/data/ML TS XAI/XAI/XAI 2/Exp-4(Flower dataset).ipynb +0 -237
  28. noshot/data/ML TS XAI/XAI/XAI 2/Exp-4.ipynb +0 -241
  29. noshot/data/ML TS XAI/XAI/XAI 2/Exp_2.ipynb +0 -352
  30. noshot/data/ML TS XAI/XAI/XAI 2/Exp_7.ipynb +0 -110
  31. noshot/data/ML TS XAI/XAI/XAI 2/FeatureImportance_SensitivityAnalysis.ipynb +0 -708
  32. noshot-1.0.0.dist-info/RECORD +0 -32
  33. {noshot-1.0.0.dist-info → noshot-2.0.0.dist-info}/licenses/LICENSE.txt +0 -0
  34. {noshot-1.0.0.dist-info → noshot-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,708 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {
6
- "id": "8TKef6Jkajof"
7
- },
8
- "source": [
9
- "### Loading the modules"
10
- ]
11
- },
12
- {
13
- "cell_type": "code",
14
- "execution_count": null,
15
- "metadata": {
16
- "colab": {
17
- "base_uri": "https://localhost:8080/"
18
- },
19
- "id": "7KYFQmlqajof",
20
- "outputId": "1a71ad6f-3091-4c5c-ec82-03cf1e1aab21"
21
- },
22
- "outputs": [],
23
- "source": [
24
- "!pip install --upgrade scipy\n",
25
- "!pip install seaborn\n",
26
- "import pandas as pd\n",
27
- "import numpy as np\n",
28
- "import matplotlib.pyplot as plt\n",
29
- "\n",
30
- "import warnings\n",
31
- "warnings.filterwarnings(\"ignore\")\n",
32
- "np.random.seed(5)\n",
33
- "from sklearn import preprocessing\n",
34
- "from sklearn.model_selection import train_test_split\n",
35
- "from sklearn.ensemble import RandomForestClassifier as RFC\n",
36
- "import tensorflow as tf\n",
37
- "tf.get_logger().setLevel(40) # suppress deprecation messages\n",
38
- "from tensorflow.keras.layers import Dense, Input, Embedding, Concatenate, Reshape, Dropout, Lambda\n",
39
- "from tensorflow.keras.models import Model\n",
40
- "from tensorflow.keras.utils import to_categorical"
41
- ]
42
- },
43
- {
44
- "cell_type": "markdown",
45
- "metadata": {
46
- "id": "Wb_u2GSOajog"
47
- },
48
- "source": [
49
- "### Loading the data"
50
- ]
51
- },
52
- {
53
- "cell_type": "code",
54
- "execution_count": null,
55
- "metadata": {
56
- "colab": {
57
- "base_uri": "https://localhost:8080/",
58
- "height": 503
59
- },
60
- "id": "1CjwRgzkajog",
61
- "outputId": "72d70ee8-aacc-45b3-e675-282d2181beca"
62
- },
63
- "outputs": [],
64
- "source": [
65
- "data = pd.read_csv('/content/Titanic-Dataset.csv')\n",
66
- "data.head()"
67
- ]
68
- },
69
- {
70
- "cell_type": "code",
71
- "execution_count": null,
72
- "metadata": {
73
- "colab": {
74
- "base_uri": "https://localhost:8080/"
75
- },
76
- "id": "rLs-UAb8ajoh",
77
- "outputId": "3a8ed330-414b-44c5-dd96-9bab35313207"
78
- },
79
- "outputs": [],
80
- "source": [
81
- "data.shape"
82
- ]
83
- },
84
- {
85
- "cell_type": "code",
86
- "execution_count": null,
87
- "metadata": {
88
- "colab": {
89
- "base_uri": "https://localhost:8080/",
90
- "height": 387
91
- },
92
- "id": "whspZ3Reajoh",
93
- "outputId": "7b7deede-2214-4107-c49e-3aceaf8a82cc"
94
- },
95
- "outputs": [],
96
- "source": [
97
- "data.hist(layout = (2,5), figsize=(15,8), color = 'r')\n",
98
- "print('Data Distribution')"
99
- ]
100
- },
101
- {
102
- "cell_type": "code",
103
- "execution_count": null,
104
- "metadata": {
105
- "colab": {
106
- "base_uri": "https://localhost:8080/",
107
- "height": 466
108
- },
109
- "id": "jI55ZqFMajoh",
110
- "outputId": "c3e366a4-efcc-4d4c-e192-151bc70381bc"
111
- },
112
- "outputs": [],
113
- "source": [
114
- "import seaborn as sns\n",
115
- "\n",
116
- "print('This looks like a fairly imbalanced dataset')\n",
117
- "sns.countplot(x=\"Survived\", data=data, palette=\"bwr\")\n",
118
- "plt.show()"
119
- ]
120
- },
121
- {
122
- "cell_type": "code",
123
- "execution_count": null,
124
- "metadata": {
125
- "colab": {
126
- "base_uri": "https://localhost:8080/",
127
- "height": 178
128
- },
129
- "id": "sxB-fTkYajoh",
130
- "outputId": "5f15942e-2b8e-4ba0-95ae-4e0850e2aecd"
131
- },
132
- "outputs": [],
133
- "source": [
134
- "data['Survived'].value_counts()"
135
- ]
136
- },
137
- {
138
- "cell_type": "code",
139
- "execution_count": null,
140
- "metadata": {
141
- "colab": {
142
- "base_uri": "https://localhost:8080/"
143
- },
144
- "id": "7vlvwRCRajoh",
145
- "outputId": "524e4854-4866-40e7-fb9e-06e1b25253ab"
146
- },
147
- "outputs": [],
148
- "source": [
149
- "print('Percentage of data belonging to class 1 is',int((268/768)*100))\n",
150
- "print('Percentage of data belonging to class 0 is',int((500/768)*100))"
151
- ]
152
- },
153
- {
154
- "cell_type": "markdown",
155
- "metadata": {
156
- "id": "uY6hXRMyajoi"
157
- },
158
- "source": [
159
- "### Null Check"
160
- ]
161
- },
162
- {
163
- "cell_type": "code",
164
- "execution_count": null,
165
- "metadata": {
166
- "colab": {
167
- "base_uri": "https://localhost:8080/",
168
- "height": 460
169
- },
170
- "id": "Hxz-qLufajoi",
171
- "outputId": "9ff24398-48d8-422a-87dd-da5f2a430b8f"
172
- },
173
- "outputs": [],
174
- "source": [
175
- "data.isnull().sum()"
176
- ]
177
- },
178
- {
179
- "cell_type": "markdown",
180
- "metadata": {
181
- "id": "HpavXNQeajoi"
182
- },
183
- "source": [
184
- "### Duplication Check"
185
- ]
186
- },
187
- {
188
- "cell_type": "code",
189
- "execution_count": null,
190
- "metadata": {
191
- "colab": {
192
- "base_uri": "https://localhost:8080/"
193
- },
194
- "id": "xxoDvfGGajoi",
195
- "outputId": "c2461cac-f676-47d5-e41f-742a64243869"
196
- },
197
- "outputs": [],
198
- "source": [
199
- "data.duplicated().any()"
200
- ]
201
- },
202
- {
203
- "cell_type": "markdown",
204
- "metadata": {
205
- "id": "rvyds736ajoi"
206
- },
207
- "source": [
208
- "### Data Description"
209
- ]
210
- },
211
- {
212
- "cell_type": "code",
213
- "execution_count": null,
214
- "metadata": {
215
- "colab": {
216
- "base_uri": "https://localhost:8080/",
217
- "height": 320
218
- },
219
- "id": "Lpco8HJTajoj",
220
- "outputId": "22c7389c-0614-4c10-bdc3-2e32e3c721a8"
221
- },
222
- "outputs": [],
223
- "source": [
224
- "data.describe()"
225
- ]
226
- },
227
- {
228
- "cell_type": "markdown",
229
- "metadata": {
230
- "id": "hzy-s1qYajoj"
231
- },
232
- "source": [
233
- "### Data Correlation"
234
- ]
235
- },
236
- {
237
- "cell_type": "code",
238
- "execution_count": null,
239
- "metadata": {
240
- "colab": {
241
- "base_uri": "https://localhost:8080/",
242
- "height": 505
243
- },
244
- "id": "RCdfuJhjajoj",
245
- "outputId": "86716729-98d5-4183-c4f6-00194fb98287"
246
- },
247
- "outputs": [],
248
- "source": [
249
- "numerical_data = data.select_dtypes(include=np.number)\n",
250
- "correlation_matrix = numerical_data.corr()\n",
251
- "\n",
252
- " # Now you can visualize the correlation matrix using a heatmap if you want:\n",
253
- "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')\n",
254
- "plt.show()"
255
- ]
256
- },
257
- {
258
- "cell_type": "markdown",
259
- "metadata": {
260
- "id": "pevqOcfVajoj"
261
- },
262
- "source": [
263
- "### Outlier Check"
264
- ]
265
- },
266
- {
267
- "cell_type": "code",
268
- "execution_count": null,
269
- "metadata": {
270
- "colab": {
271
- "base_uri": "https://localhost:8080/",
272
- "height": 73
273
- },
274
- "id": "9ETrhB9iajok",
275
- "outputId": "84cd8c89-6315-4437-9477-7009d08bfbbe"
276
- },
277
- "outputs": [],
278
- "source": [
279
- "data[(data['PassengerId'] == 0) & (data['Survived'] == 0) & (data['Pclass'] == 0)]"
280
- ]
281
- },
282
- {
283
- "cell_type": "code",
284
- "execution_count": null,
285
- "metadata": {
286
- "colab": {
287
- "base_uri": "https://localhost:8080/",
288
- "height": 877
289
- },
290
- "id": "WQR6YylPajok",
291
- "outputId": "6114eb59-63fc-4fcc-830d-b50bb70dd21b"
292
- },
293
- "outputs": [],
294
- "source": [
295
- "data[(data['Survived'] == 0)]"
296
- ]
297
- },
298
- {
299
- "cell_type": "markdown",
300
- "metadata": {
301
- "id": "NBQF8fhuajok"
302
- },
303
- "source": [
304
- "### Noise removal"
305
- ]
306
- },
307
- {
308
- "cell_type": "code",
309
- "execution_count": null,
310
- "metadata": {
311
- "colab": {
312
- "base_uri": "https://localhost:8080/"
313
- },
314
- "id": "aZ9yq8w8ajok",
315
- "outputId": "d4111ad5-7a99-4a94-af64-b3f64809dbef"
316
- },
317
- "outputs": [],
318
- "source": [
319
- "cleaned_data = data[(data['Survived'] != 0)]\n",
320
- "cleaned_data.shape"
321
- ]
322
- },
323
- {
324
- "cell_type": "markdown",
325
- "metadata": {
326
- "id": "MOJp9AuIajok"
327
- },
328
- "source": [
329
- "### Feature Engineering"
330
- ]
331
- },
332
- {
333
- "cell_type": "code",
334
- "execution_count": null,
335
- "metadata": {
336
- "colab": {
337
- "base_uri": "https://localhost:8080/"
338
- },
339
- "id": "wV0BlJJ1ajol",
340
- "outputId": "9a23ce1e-efda-4f32-fe34-f490ad3c437f"
341
- },
342
- "outputs": [],
343
- "source": [
344
- "feature_engg_data = cleaned_data.copy()\n",
345
- "outlier_data = cleaned_data.copy()\n",
346
- "factor = 3\n",
347
- "\n",
348
- "\n",
349
- "columns_to_include = ['PassengerId'\t,'Survived',\t'Pclass','Age'\t,'SibSp',\t'Parch'\t,'Fare']\n",
350
- "for column in columns_to_include:\n",
351
- " upper_lim = feature_engg_data[column].mean () + feature_engg_data[column].std () * factor\n",
352
- " lower_lim = feature_engg_data[column].mean () - feature_engg_data[column].std () * factor\n",
353
- " feature_engg_data = feature_engg_data[(feature_engg_data[column] < upper_lim) & (feature_engg_data[column] > lower_lim)]\n",
354
- "\n",
355
- "outlier_data = pd.concat([outlier_data, feature_engg_data]).drop_duplicates(keep=False)\n",
356
- "\n",
357
- "print(feature_engg_data.shape)\n",
358
- "print(outlier_data.shape)"
359
- ]
360
- },
361
- {
362
- "cell_type": "markdown",
363
- "metadata": {
364
- "id": "03ebxkHzajol"
365
- },
366
- "source": [
367
- "### Normalization"
368
- ]
369
- },
370
- {
371
- "cell_type": "code",
372
- "execution_count": null,
373
- "metadata": {
374
- "colab": {
375
- "base_uri": "https://localhost:8080/",
376
- "height": 425
377
- },
378
- "id": "u6o2xRP3ajol",
379
- "outputId": "14ede3fb-2e3d-4338-b782-a5f5d98aac6a"
380
- },
381
- "outputs": [],
382
- "source": [
383
- "\n",
384
- "\n",
385
- "factor = 2 # Reduced factor\n",
386
- "def normalize_data(df):\n",
387
- " val = df.values\n",
388
- " min_max_normalizer = preprocessing.MinMaxScaler()\n",
389
- " norm_val = min_max_normalizer.fit_transform(val)\n",
390
- " df2 = pd.DataFrame(norm_val, columns=df.columns)\n",
391
- "\n",
392
- " if df.empty:\n",
393
- " print(\"DataFrame is empty. Skipping normalization.\")\n",
394
- " return df\n",
395
- " else:\n",
396
- " print('not right ')\n",
397
- "\n",
398
- "norm_feature_engg_data = normalize_data(feature_engg_data)\n",
399
- "norm_outlier_data = normalize_data(outlier_data)"
400
- ]
401
- },
402
- {
403
- "cell_type": "markdown",
404
- "metadata": {
405
- "id": "6PbwZdYFajom"
406
- },
407
- "source": [
408
- "### Train-Test split"
409
- ]
410
- },
411
- {
412
- "cell_type": "code",
413
- "execution_count": null,
414
- "metadata": {
415
- "colab": {
416
- "base_uri": "https://localhost:8080/",
417
- "height": 245
418
- },
419
- "id": "xF_ng_jUajom",
420
- "outputId": "9004a6d9-76ff-4bfc-99a8-33f7cad9fc95"
421
- },
422
- "outputs": [],
423
- "source": [
424
- "input_data = norm_feature_engg_data.drop(['Survived'],axis='columns')\n",
425
- "targets =norm_feature_engg_data.filter(['Survived'],axis='columns')\n",
426
- "\n",
427
- "x, x_test, y, y_test = train_test_split(input_data,targets,test_size=0.1,train_size=0.9, random_state=5)\n",
428
- "x_train, x_valid, y_train, y_valid = train_test_split(x,y,test_size = 0.22,train_size =0.78, random_state=5)"
429
- ]
430
- },
431
- {
432
- "cell_type": "code",
433
- "execution_count": null,
434
- "metadata": {
435
- "id": "W72xPVnTajom"
436
- },
437
- "outputs": [],
438
- "source": [
439
- "def apply_RFC(X,y,columns):\n",
440
- " rfc = RFC(n_estimators=500,min_samples_leaf=round(len(X)*.01),random_state=5,n_jobs=-1)\n",
441
- " imp_features = rfc.fit(X,y).feature_importances_\n",
442
- " imp_features = pd.DataFrame(imp_features,columns=['Feature Importance'],index=columns)\n",
443
- " imp_features.sort_values(by=['Feature Importance'],inplace=True,ascending=False)\n",
444
- " imp_features['Moving Sum'] = imp_features['Feature Importance'].cumsum()\n",
445
- " imp_features = imp_features[imp_features['Moving Sum']<=0.95]\n",
446
- " top_features = imp_features.index.tolist()\n",
447
- " return imp_features, top_features"
448
- ]
449
- },
450
- {
451
- "cell_type": "code",
452
- "execution_count": null,
453
- "metadata": {
454
- "colab": {
455
- "base_uri": "https://localhost:8080/",
456
- "height": 228
457
- },
458
- "id": "3v7w3rA0ajon",
459
- "outputId": "6800cd0b-ef07-4e39-bd61-e1cc79fab7f1"
460
- },
461
- "outputs": [],
462
- "source": [
463
- "important_features, top_features = apply_RFC(x,y, data.columns.drop('Survived'))\n",
464
- "sns.barplot(important_features['Feature Importance'], important_features.index, palette = 'tab10')\n",
465
- "plt.title('Random Forest Feature Importance for: '+\"Titanic Dataset\")\n",
466
- "plt.show()"
467
- ]
468
- },
469
- {
470
- "cell_type": "code",
471
- "execution_count": null,
472
- "metadata": {
473
- "colab": {
474
- "base_uri": "https://localhost:8080/",
475
- "height": 332
476
- },
477
- "id": "yCoEW-SHajon",
478
- "outputId": "165910d8-df92-4d74-e467-d55b1e4a2f35"
479
- },
480
- "outputs": [],
481
- "source": [
482
- "# For this we need a trained model. So, let's train a model first, may be with a neural network architecture.\n",
483
- "\n",
484
- "def model():\n",
485
- " '''\n",
486
- " Simple 3 layered Neural Network model for binary classification\n",
487
- " '''\n",
488
- " inp = Input(shape=(x_train.shape[1],))\n",
489
- " x = Dense(40, activation='relu')(inp)\n",
490
- " x = Dense(40, activation='relu')(x)\n",
491
- " op = Dense(2, activation='softmax')(x)\n",
492
- " model = Model(inputs=inp, outputs=op)\n",
493
- " model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
494
- " return model\n",
495
- "\n",
496
- "model = model()\n",
497
- "model.fit(x_train, to_categorical(y_train), batch_size=64, epochs=300, verbose=0)"
498
- ]
499
- },
500
- {
501
- "cell_type": "code",
502
- "execution_count": null,
503
- "metadata": {
504
- "id": "fh231xMWajon",
505
- "outputId": "e7a32bce-74d8-48f1-cdf0-b95cef2fdceb"
506
- },
507
- "outputs": [],
508
- "source": [
509
- "# Evaluate the trained model\n",
510
- "model.evaluate(x_test, to_categorical(y_test))[1]"
511
- ]
512
- },
513
- {
514
- "cell_type": "markdown",
515
- "metadata": {
516
- "id": "ta_aHQFtajoo"
517
- },
518
- "source": [
519
- "Although we are not concerned about the final model accuracy, but we do have a decent model to try sensitivity analysis on. Next, we will take a query instance to perform the 6-σ (six sigma) variation rule for Sensitivity analysis on the query instance."
520
- ]
521
- },
522
- {
523
- "cell_type": "code",
524
- "execution_count": null,
525
- "metadata": {
526
- "id": "7gQXI06jajoo",
527
- "outputId": "c4f28a3e-dc44-4ec9-de0b-be5ce552225e"
528
- },
529
- "outputs": [],
530
- "source": [
531
- "query_instance = x_test.iloc[5].values.reshape((1,) + x_test.iloc[5].shape)\n",
532
- "print(\"Let's take a look at the normalized query data instance in which all the features are in the range of (0.0 - 1.0):\" )\n",
533
- "df_query = pd.DataFrame(query_instance, columns = input_data.columns)\n",
534
- "df_query"
535
- ]
536
- },
537
- {
538
- "cell_type": "code",
539
- "execution_count": null,
540
- "metadata": {
541
- "id": "jqwBNsIyajop",
542
- "outputId": "63d294b2-03ad-4495-b294-d6ccf2dd45f6"
543
- },
544
- "outputs": [],
545
- "source": [
546
- "predicted_outcome = np.argmax(model.predict(query_instance))\n",
547
- "true_label = int(y_test.iloc[5][0])\n",
548
- "print(f\" The true label is : {true_label}\")\n",
549
- "print(f\" The predicted outcome is : {predicted_outcome}\")"
550
- ]
551
- },
552
- {
553
- "cell_type": "markdown",
554
- "metadata": {
555
- "id": "QwdpQRCIajop"
556
- },
557
- "source": [
558
- "We can clearly see the model is correctly predicting the presence of diabetes. Now, let's see if it changes when we are doing sensitivity analysis, one by one for all the features."
559
- ]
560
- },
561
- {
562
- "cell_type": "markdown",
563
- "metadata": {
564
- "id": "urfPmdM0ajop"
565
- },
566
- "source": [
567
- "The measure for standard deviation(σ) can be calculated on the nomalized training data as we will be using the normalized data for the prediction part."
568
- ]
569
- },
570
- {
571
- "cell_type": "code",
572
- "execution_count": null,
573
- "metadata": {
574
- "id": "CriOKzCIajoq"
575
- },
576
- "outputs": [],
577
- "source": [
578
- "sigma_glucose = np.std(x['Glucose'])\n",
579
- "sigma_bmi = np.std(x['BMI'])\n",
580
- "sigma_age = np.std(x['Age'])\n",
581
- "sigma_dpf = np.std(x['DiabetesPedigreeFunction'])\n",
582
- "sigma_pregnancies = np.std(x['Pregnancies'])\n",
583
- "sigma_insulin = np.std(x['Insulin'])\n",
584
- "sigma_bp = np.std(x['BloodPressure'])"
585
- ]
586
- },
587
- {
588
- "cell_type": "code",
589
- "execution_count": null,
590
- "metadata": {
591
- "id": "XHm6I6SPajoq",
592
- "outputId": "c72b595c-3094-462d-899a-7ffbfec95a9a"
593
- },
594
- "outputs": [],
595
- "source": [
596
- "# Let's see the sensitivity analysis plots now\n",
597
- "def sensitivity_analysis_plot(measure_tuple): #the function takes one argument measure_tuple,which has features (glucose,BMI),and std deviation\n",
598
- " '''\n",
599
- " Sensitivity Analysis plot using the 6-σ variation method\n",
600
- " '''\n",
601
- " (measure, sigma) = measure_tuple\n",
602
- "\n",
603
- " sensitivity_output = [] #intialize a empty list\n",
604
- " original_value = df_query[measure].copy() #the original value is copied here\n",
605
- " for k in [-3, -2, -1, 1, 2, 3]:\n",
606
- " df_query[measure] = original_value.copy()\n",
607
- " df_query[measure] = np.clip(df_query[measure] + k * sigma, 0.0, 1.0)\n",
608
- " sensitivity_output.append(np.argmax(model.predict(df_query.values)))#the most likely class\n",
609
- " plt.plot(['-3σ', '-2σ', '-σ', 'σ', '2σ', '3σ'], sensitivity_output, 'r.-', label = 'Sensitivity output')\n",
610
- " plt.axhline(y = predicted_outcome, color = 'b', linestyle = '--', label = 'Original Prediction')\n",
611
- " plt.title(f'6-σ variation sensitity plot for the feature: {measure}')\n",
612
- " plt.legend()\n",
613
- " plt.show()\n",
614
- "\n",
615
- "measure_tuple_list = [('Glucose', sigma_glucose),\n",
616
- " ('BMI', sigma_bmi),\n",
617
- " ('Age', sigma_age),\n",
618
- " ('DiabetesPedigreeFunction', sigma_dpf),\n",
619
- " ('Pregnancies', sigma_pregnancies),\n",
620
- " ('Insulin', sigma_insulin),\n",
621
- " ('BloodPressure', sigma_bp)]\n",
622
- "\n",
623
- "for measure_tuple in measure_tuple_list:\n",
624
- " sensitivity_analysis_plot(measure_tuple)"
625
- ]
626
- },
627
- {
628
- "cell_type": "markdown",
629
- "metadata": {
630
- "id": "6UOYwVPIajoq"
631
- },
632
- "source": [
633
- "From the above plots, we observe how each of the features are sensitive towards positive or negative changes and how each feature contributes towards influencing the model outcome.The features about Insulin, Diabetes Pedigree Function and Number of Preganancies doesn't seem to be sensitive towards any changes. The features giving information about Glucose, BMI, Blood Pressure seems to positive influence towards the outcome. That means, if the values for these features are increased, it may lead to the presence of diabetes according to the model. SUrprisingly, the feature Age shows a negative influence, which means if the age is increased, the model is less sensitive towards predicting the outcome as diabetes. This is contradicting our prior knowledge and hence is quite an interesting observation and needs to be inspected further."
634
- ]
635
- },
636
- {
637
- "cell_type": "markdown",
638
- "metadata": {
639
- "id": "Tkw0-qLIajoq"
640
- },
641
- "source": [
642
- "### Final Thoughts"
643
- ]
644
- },
645
- {
646
- "cell_type": "markdown",
647
- "metadata": {
648
- "id": "6l9KPGqDajor"
649
- },
650
- "source": [
651
- "We have seen how influence based methods like feature importance and sensitivity analysis can be applied to explain the influence of features towards the model's decision making process. But I have only show examples related to classification problem. I would strongly recommend you to try out these methods for explaining models used for regression based problems as well."
652
- ]
653
- },
654
- {
655
- "cell_type": "markdown",
656
- "metadata": {
657
- "id": "F-NW-O3Eajor"
658
- },
659
- "source": [
660
- "### Reference"
661
- ]
662
- },
663
- {
664
- "cell_type": "markdown",
665
- "metadata": {
666
- "id": "sihXlZQhajor"
667
- },
668
- "source": [
669
- "1. Kaggle | Pima Indians Diabetes Database - https://www.kaggle.com/uciml/pima-indians-diabetes-database?select=diabetes.csv\n",
670
- "2. How to Calculate Feature Importance With Python | Machine Learning Mastery - https://machinelearningmastery.com/calculate-feature-importance-with-python/\n",
671
- "3. Some of the utility functions and code are taken from the GitHub Repository of the author - Aditya Bhattacharya https://github.com/adib0073"
672
- ]
673
- },
674
- {
675
- "cell_type": "code",
676
- "execution_count": null,
677
- "metadata": {
678
- "id": "4IIeXhSIajor"
679
- },
680
- "outputs": [],
681
- "source": []
682
- }
683
- ],
684
- "metadata": {
685
- "colab": {
686
- "provenance": []
687
- },
688
- "kernelspec": {
689
- "display_name": "Python 3 (ipykernel)",
690
- "language": "python",
691
- "name": "python3"
692
- },
693
- "language_info": {
694
- "codemirror_mode": {
695
- "name": "ipython",
696
- "version": 3
697
- },
698
- "file_extension": ".py",
699
- "mimetype": "text/x-python",
700
- "name": "python",
701
- "nbconvert_exporter": "python",
702
- "pygments_lexer": "ipython3",
703
- "version": "3.12.4"
704
- }
705
- },
706
- "nbformat": 4,
707
- "nbformat_minor": 4
708
- }