noshot 6.0.0__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb +691 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/1. EDA-PCA (Balance Scale Dataset).ipynb +147 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/1. EDA-PCA (Rice Dataset).ipynb +181 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/10. HMM Veterbi.ipynb +152 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Balance Scale Dataset).ipynb +117 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Iris Dataset).ipynb +156 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Sobar-72 Dataset).ipynb +215 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/3. LDA (Balance Scale Dataset).ipynb +78 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/3. LDA (NPHA Doctor Visits Dataset).ipynb +114 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/4. Linear Regression (Machine Dataset).ipynb +115 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/4. Linear Regression (Real Estate Dataset).ipynb +146 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/5. Logistic Regression (Magic04 Dataset).ipynb +130 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/5. Logistic Regression (Wine Dataset).ipynb +112 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb +118 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/6. Naive Bayes Classifier (Wine Dataset).ipynb +89 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/7. SVM (Rice Dataset).ipynb +120 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/8. FeedForward NN (Sobar72 Dataset).ipynb +262 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/9. CNN (Cifar10 Dataset).ipynb +156 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/1. PCA.ipynb +162 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/10. CNN.ipynb +100 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/11. HMM.ipynb +336 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/2. KNN.ipynb +149 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/3. LDA.ipynb +132 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/4. Linear Regression.ipynb +86 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/5. Logistic Regression.ipynb +115 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/6. Naive Bayes (Titanic).ipynb +196 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/6. Naive Bayes (Wine).ipynb +98 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/7. SVM Linear.ipynb +109 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/8. SVM Non-Linear.ipynb +195 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/9. FNN With Regularization.ipynb +189 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/9. FNN Without Regularization.ipynb +197 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/All in One Lab CIA 1 Q.ipynb +1087 -0
- {noshot-6.0.0.dist-info → noshot-7.0.0.dist-info}/METADATA +1 -1
- noshot-7.0.0.dist-info/RECORD +41 -0
- {noshot-6.0.0.dist-info → noshot-7.0.0.dist-info}/WHEEL +1 -1
- noshot/data/ML TS XAI/XAI/Q1.ipynb +0 -377
- noshot/data/ML TS XAI/XAI/Q2.ipynb +0 -362
- noshot/data/ML TS XAI/XAI/Q3.ipynb +0 -637
- noshot/data/ML TS XAI/XAI/Q4.ipynb +0 -206
- noshot/data/ML TS XAI/XAI/Q5.ipynb +0 -1018
- noshot-6.0.0.dist-info/RECORD +0 -14
- {noshot-6.0.0.dist-info → noshot-7.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {noshot-6.0.0.dist-info → noshot-7.0.0.dist-info}/top_level.txt +0 -0
@@ -1,362 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "raw",
|
5
|
-
"id": "29299513-721d-4214-9a2e-897ded70a9f6",
|
6
|
-
"metadata": {},
|
7
|
-
"source": [
|
8
|
-
"1.\tPerform minimum of ten exploratory data analysis on the following text data (use the following code to download text data) \n",
|
9
|
-
"from sklearn.datasets import fetch_20newsgroups\n",
|
10
|
-
"data = fetch_20newsgroups(subset='train')\n",
|
11
|
-
"print(data.data[0]) # first news article\n",
|
12
|
-
"\n",
|
13
|
-
"2.\tPerform a LIME-based explanation for a text classification model using the LIME Text Explainer. What insights can you draw from the explanations.\n"
|
14
|
-
]
|
15
|
-
},
|
16
|
-
{
|
17
|
-
"cell_type": "code",
|
18
|
-
"execution_count": null,
|
19
|
-
"id": "38fcd124-e405-40a0-a98c-7c3a2937bdc4",
|
20
|
-
"metadata": {},
|
21
|
-
"outputs": [],
|
22
|
-
"source": [
|
23
|
-
"import matplotlib.pyplot as plt\n",
|
24
|
-
"import seaborn as sns\n",
|
25
|
-
"import pandas as pd"
|
26
|
-
]
|
27
|
-
},
|
28
|
-
{
|
29
|
-
"cell_type": "code",
|
30
|
-
"execution_count": null,
|
31
|
-
"id": "c68cc4f4",
|
32
|
-
"metadata": {},
|
33
|
-
"outputs": [],
|
34
|
-
"source": [
|
35
|
-
"df=pd.read_csv('news.csv')\n",
|
36
|
-
"df.head()"
|
37
|
-
]
|
38
|
-
},
|
39
|
-
{
|
40
|
-
"cell_type": "code",
|
41
|
-
"execution_count": null,
|
42
|
-
"id": "c512dc22-82f3-47d9-ab9e-207d39389922",
|
43
|
-
"metadata": {},
|
44
|
-
"outputs": [],
|
45
|
-
"source": [
|
46
|
-
"# 1. Category distribution – Bar Plot\n",
|
47
|
-
"plt.figure(figsize=(12, 6))\n",
|
48
|
-
"sns.countplot(x=df['target']) # bar plot of target category indices\n",
|
49
|
-
"plt.title(\"Documents per Category\")\n",
|
50
|
-
"target_names=df['target'].unique()\n",
|
51
|
-
"plt.xticks(ticks=range(len(target_names)), labels=target_names, rotation=90) # label x-axis with category names\n",
|
52
|
-
"plt.tight_layout()\n",
|
53
|
-
"plt.show()\n"
|
54
|
-
]
|
55
|
-
},
|
56
|
-
{
|
57
|
-
"cell_type": "code",
|
58
|
-
"execution_count": null,
|
59
|
-
"id": "1a51ec41-e1cd-4a0a-a196-147e796a8e33",
|
60
|
-
"metadata": {},
|
61
|
-
"outputs": [],
|
62
|
-
"source": [
|
63
|
-
"# 2. Document length distribution – Histogram\n",
|
64
|
-
"doc_lengths = [len(doc.split()) for doc in df['document']] # compute word count per document\n",
|
65
|
-
"sns.histplot(doc_lengths, bins=50)\n",
|
66
|
-
"plt.title(\"Document Length Distribution\")\n",
|
67
|
-
"plt.xlabel(\"Words per document\")\n",
|
68
|
-
"plt.ylabel(\"Frequency\")\n",
|
69
|
-
"plt.show()"
|
70
|
-
]
|
71
|
-
},
|
72
|
-
{
|
73
|
-
"cell_type": "code",
|
74
|
-
"execution_count": null,
|
75
|
-
"id": "31477220-42cb-4b09-b823-5b1a15b85340",
|
76
|
-
"metadata": {
|
77
|
-
"scrolled": true
|
78
|
-
},
|
79
|
-
"outputs": [],
|
80
|
-
"source": [
|
81
|
-
"# 3. Average document length per category – Horizontal Bar Plot\n",
|
82
|
-
"df.rename(columns={'document':'text','target':'category'})\n",
|
83
|
-
"df['doc_len'] = df['text'].apply(lambda x: len(x.split()))\n",
|
84
|
-
"avg_len = df.groupby('category')['doc_len'].mean().sort_values()\n",
|
85
|
-
"plt.figure(figsize=(12, 6))\n",
|
86
|
-
"avg_len.plot(kind='barh')\n",
|
87
|
-
"plt.title(\"Average Document Length per Category\")\n",
|
88
|
-
"plt.xlabel(\"Average Word Count\")\n",
|
89
|
-
"plt.show()"
|
90
|
-
]
|
91
|
-
},
|
92
|
-
{
|
93
|
-
"cell_type": "code",
|
94
|
-
"execution_count": null,
|
95
|
-
"id": "57a3d581-a786-451e-ae59-1791b6dbc892",
|
96
|
-
"metadata": {},
|
97
|
-
"outputs": [],
|
98
|
-
"source": [
|
99
|
-
"# 4. Shortest and longest documents – Text output\n",
|
100
|
-
"shortest_doc = min(df['text'], key=lambda x: len(x.split()))\n",
|
101
|
-
"longest_doc = max(df['text'], key=lambda x: len(x.split()))\n",
|
102
|
-
"print(\"\\nShortest Document:\\n\", shortest_doc[:300], \"...\")\n",
|
103
|
-
"print(\"\\nLongest Document:\\n\", longest_doc[:300], \"...\")\n"
|
104
|
-
]
|
105
|
-
},
|
106
|
-
{
|
107
|
-
"cell_type": "code",
|
108
|
-
"execution_count": null,
|
109
|
-
"id": "cbeeeb8f-1b4a-434c-9090-a768c64cacc5",
|
110
|
-
"metadata": {},
|
111
|
-
"outputs": [],
|
112
|
-
"source": [
|
113
|
-
"# 5. Top 10 longest documents per category – Bar Plot\n",
|
114
|
-
"top_docs = df.groupby('category')['doc_len'].nlargest(10).reset_index()\n",
|
115
|
-
"plt.figure(figsize=(12, 6))\n",
|
116
|
-
"sns.boxplot(x='category', y='doc_len', data=top_docs)\n",
|
117
|
-
"plt.xticks(rotation=90)\n",
|
118
|
-
"plt.title(\"Top 10 Longest Documents per Category\")\n",
|
119
|
-
"plt.ylabel(\"Word Count\")\n",
|
120
|
-
"plt.show()\n"
|
121
|
-
]
|
122
|
-
},
|
123
|
-
{
|
124
|
-
"cell_type": "code",
|
125
|
-
"execution_count": null,
|
126
|
-
"id": "83b4114a-2a97-46da-901f-af73d947f672",
|
127
|
-
"metadata": {},
|
128
|
-
"outputs": [],
|
129
|
-
"source": [
|
130
|
-
"# 6. Median document length per category – Bar Plot\n",
|
131
|
-
"median_len = df.groupby('category')['doc_len'].median().sort_values()\n",
|
132
|
-
"plt.figure(figsize=(12, 6))\n",
|
133
|
-
"median_len.plot(kind='barh')\n",
|
134
|
-
"plt.title(\"Median Document Length per Category\")\n",
|
135
|
-
"plt.xlabel(\"Median Word Count\")\n",
|
136
|
-
"plt.show()"
|
137
|
-
]
|
138
|
-
},
|
139
|
-
{
|
140
|
-
"cell_type": "code",
|
141
|
-
"execution_count": null,
|
142
|
-
"id": "15f7c924-46b9-4ced-81f9-a1595ebe5075",
|
143
|
-
"metadata": {},
|
144
|
-
"outputs": [],
|
145
|
-
"source": [
|
146
|
-
"# 7. Boxplot of document lengths per category – Box Plot\n",
|
147
|
-
"plt.figure(figsize=(14, 6))\n",
|
148
|
-
"sns.boxplot(x='category', y='doc_len', data=df)\n",
|
149
|
-
"plt.xticks(rotation=90)\n",
|
150
|
-
"plt.title(\"Document Length Distribution by Category\")\n",
|
151
|
-
"plt.ylabel(\"Word Count\")\n",
|
152
|
-
"plt.tight_layout()\n",
|
153
|
-
"plt.show()"
|
154
|
-
]
|
155
|
-
},
|
156
|
-
{
|
157
|
-
"cell_type": "code",
|
158
|
-
"execution_count": null,
|
159
|
-
"id": "4bfa4a18-e8a9-4e08-9efc-870da7aa1f81",
|
160
|
-
"metadata": {},
|
161
|
-
"outputs": [],
|
162
|
-
"source": [
|
163
|
-
"# 8. Number of empty or very short docs – Text output\n",
|
164
|
-
"short_docs = df[df['doc_len'] < 5]\n",
|
165
|
-
"print(f\"\\nNumber of documents with less than 5 words: {len(short_docs)}\")"
|
166
|
-
]
|
167
|
-
},
|
168
|
-
{
|
169
|
-
"cell_type": "code",
|
170
|
-
"execution_count": null,
|
171
|
-
"id": "2ad5a6b9-9589-4b45-a4a7-29a330d82daf",
|
172
|
-
"metadata": {},
|
173
|
-
"outputs": [],
|
174
|
-
"source": [
|
175
|
-
"# 9. Bar chart of total characters per category – Bar Plot\n",
|
176
|
-
"df['char_len'] = df['text'].apply(len)\n",
|
177
|
-
"total_chars = df.groupby('category')['char_len'].sum().sort_values()\n",
|
178
|
-
"plt.figure(figsize=(12, 6))\n",
|
179
|
-
"total_chars.plot(kind='barh')\n",
|
180
|
-
"plt.title(\"Total Characters per Category\")\n",
|
181
|
-
"plt.xlabel(\"Total Characters\")\n",
|
182
|
-
"plt.show()\n"
|
183
|
-
]
|
184
|
-
},
|
185
|
-
{
|
186
|
-
"cell_type": "code",
|
187
|
-
"execution_count": null,
|
188
|
-
"id": "ea3801ab-d444-4ffe-a8b6-f119f3928e3c",
|
189
|
-
"metadata": {},
|
190
|
-
"outputs": [],
|
191
|
-
"source": [
|
192
|
-
"word_lengths = []\n",
|
193
|
-
"for text in data.data:\n",
|
194
|
-
" words = text.split()\n",
|
195
|
-
" word_lengths.extend([len(word) for word in words])\n",
|
196
|
-
"\n",
|
197
|
-
"plt.figure(figsize=(8, 5))\n",
|
198
|
-
"sns.histplot(word_lengths, bins=30)\n",
|
199
|
-
"plt.title(\"Distribution of Word Lengths\")\n",
|
200
|
-
"plt.xlabel(\"Word Length\")\n",
|
201
|
-
"plt.ylabel(\"Frequency\")\n",
|
202
|
-
"plt.show()"
|
203
|
-
]
|
204
|
-
},
|
205
|
-
{
|
206
|
-
"cell_type": "code",
|
207
|
-
"execution_count": null,
|
208
|
-
"id": "b225dae4-c541-478b-9d53-e4150fb3820a",
|
209
|
-
"metadata": {},
|
210
|
-
"outputs": [],
|
211
|
-
"source": [
|
212
|
-
"import numpy as np\n",
|
213
|
-
"import lime\n",
|
214
|
-
"import lime.lime_text\n",
|
215
|
-
"from sklearn.pipeline import make_pipeline\n",
|
216
|
-
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
217
|
-
"from sklearn.linear_model import LogisticRegression\n",
|
218
|
-
"from lime.lime_text import LimeTextExplainer\n",
|
219
|
-
"import matplotlib.pyplot as plt"
|
220
|
-
]
|
221
|
-
},
|
222
|
-
{
|
223
|
-
"cell_type": "code",
|
224
|
-
"execution_count": null,
|
225
|
-
"id": "0fcd1486-ca85-4b45-951a-59992e0a6be7",
|
226
|
-
"metadata": {},
|
227
|
-
"outputs": [],
|
228
|
-
"source": [
|
229
|
-
"df= pd.read_csv(\"questions.csv\")\n",
|
230
|
-
"\n",
|
231
|
-
"texts= df[\"question1\"][:400]\n",
|
232
|
-
"labels=df[\"is_duplicate\"][:400]"
|
233
|
-
]
|
234
|
-
},
|
235
|
-
{
|
236
|
-
"cell_type": "code",
|
237
|
-
"execution_count": null,
|
238
|
-
"id": "d4b4f174-8ea7-4536-91b5-29501b4ea88b",
|
239
|
-
"metadata": {},
|
240
|
-
"outputs": [],
|
241
|
-
"source": [
|
242
|
-
"vectorizer = TfidfVectorizer()\n",
|
243
|
-
"X = vectorizer.fit_transform(texts)\n",
|
244
|
-
"classifier = LogisticRegression()\n",
|
245
|
-
"classifier.fit(X, labels)"
|
246
|
-
]
|
247
|
-
},
|
248
|
-
{
|
249
|
-
"cell_type": "code",
|
250
|
-
"execution_count": null,
|
251
|
-
"id": "e6fde476-0113-465d-876d-95cde0ed35b3",
|
252
|
-
"metadata": {},
|
253
|
-
"outputs": [],
|
254
|
-
"source": [
|
255
|
-
"pipeline = make_pipeline(vectorizer, classifier)\n",
|
256
|
-
"\n",
|
257
|
-
"# LIME Explainer\n",
|
258
|
-
"explainer = LimeTextExplainer(class_names=[\"Negative\", \"Positive\"])\n",
|
259
|
-
"\n",
|
260
|
-
"def explain_text(text):\n",
|
261
|
-
" exp = explainer.explain_instance(\n",
|
262
|
-
" text, pipeline.predict_proba, num_features=5\n",
|
263
|
-
" )\n",
|
264
|
-
" exp.show_in_notebook(text=True)\n",
|
265
|
-
" exp.save_to_file('lime_explanation.html')\n",
|
266
|
-
"\n",
|
267
|
-
" fig = exp.as_pyplot_figure()\n",
|
268
|
-
" plt.show()\n",
|
269
|
-
"\n",
|
270
|
-
" return exp\n",
|
271
|
-
"\n",
|
272
|
-
"# Test explanation\n",
|
273
|
-
"sample_text = \"I really enjoyed this film, it was fantastic!\"\n",
|
274
|
-
"explanation = explain_text(sample_text)"
|
275
|
-
]
|
276
|
-
},
|
277
|
-
{
|
278
|
-
"cell_type": "code",
|
279
|
-
"execution_count": null,
|
280
|
-
"id": "0af8afbc",
|
281
|
-
"metadata": {},
|
282
|
-
"outputs": [],
|
283
|
-
"source": [
|
284
|
-
"import pandas as pd\n",
|
285
|
-
"import numpy as np\n",
|
286
|
-
"import lime\n",
|
287
|
-
"import lime.lime_text\n",
|
288
|
-
"from sklearn.pipeline import make_pipeline\n",
|
289
|
-
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
290
|
-
"from sklearn.linear_model import LogisticRegression\n",
|
291
|
-
"from lime.lime_text import LimeTextExplainer\n",
|
292
|
-
"import matplotlib.pyplot as plt\n",
|
293
|
-
"\n",
|
294
|
-
"# Load the dataset\n",
|
295
|
-
"df = pd.read_csv(\"train.csv\")\n",
|
296
|
-
"\n",
|
297
|
-
"# Preprocess the data\n",
|
298
|
-
"texts = df[\"question1\"].fillna('') + \" \" + df[\"question2\"].fillna('')\n",
|
299
|
-
"labels = df[\"is_duplicate\"]\n",
|
300
|
-
"\n",
|
301
|
-
"# Vectorize the text data\n",
|
302
|
-
"vectorizer = TfidfVectorizer()\n",
|
303
|
-
"X = vectorizer.fit_transform(texts)\n",
|
304
|
-
"\n",
|
305
|
-
"# Train a classifier\n",
|
306
|
-
"classifier = LogisticRegression(max_iter=100)\n",
|
307
|
-
"classifier.fit(X, labels)\n",
|
308
|
-
"\n",
|
309
|
-
"# Create a pipeline\n",
|
310
|
-
"pipeline = make_pipeline(vectorizer, classifier)\n",
|
311
|
-
"\n",
|
312
|
-
"# Initialize LIME Explainer\n",
|
313
|
-
"explainer = LimeTextExplainer(class_names=[\"Not Duplicate\", \"Duplicate\"])\n",
|
314
|
-
"\n",
|
315
|
-
"def explain_text(text):\n",
|
316
|
-
" exp = explainer.explain_instance(\n",
|
317
|
-
" text, pipeline.predict_proba, num_features=5\n",
|
318
|
-
" )\n",
|
319
|
-
" exp.show_in_notebook(text=True)\n",
|
320
|
-
" exp.save_to_file('lime_explanation.html')\n",
|
321
|
-
"\n",
|
322
|
-
" fig = exp.as_pyplot_figure()\n",
|
323
|
-
" plt.show()\n",
|
324
|
-
"\n",
|
325
|
-
" return exp\n",
|
326
|
-
"\n",
|
327
|
-
"# Test explanation\n",
|
328
|
-
"sample_text = \"How can I improve my coding skills?\" # Replace with any question pair\n",
|
329
|
-
"explanation = explain_text(sample_text)\n"
|
330
|
-
]
|
331
|
-
},
|
332
|
-
{
|
333
|
-
"cell_type": "code",
|
334
|
-
"execution_count": null,
|
335
|
-
"id": "cf552f12",
|
336
|
-
"metadata": {},
|
337
|
-
"outputs": [],
|
338
|
-
"source": []
|
339
|
-
}
|
340
|
-
],
|
341
|
-
"metadata": {
|
342
|
-
"kernelspec": {
|
343
|
-
"display_name": "Python 3 (ipykernel)",
|
344
|
-
"language": "python",
|
345
|
-
"name": "python3"
|
346
|
-
},
|
347
|
-
"language_info": {
|
348
|
-
"codemirror_mode": {
|
349
|
-
"name": "ipython",
|
350
|
-
"version": 3
|
351
|
-
},
|
352
|
-
"file_extension": ".py",
|
353
|
-
"mimetype": "text/x-python",
|
354
|
-
"name": "python",
|
355
|
-
"nbconvert_exporter": "python",
|
356
|
-
"pygments_lexer": "ipython3",
|
357
|
-
"version": "3.12.4"
|
358
|
-
}
|
359
|
-
},
|
360
|
-
"nbformat": 4,
|
361
|
-
"nbformat_minor": 5
|
362
|
-
}
|