noshot 6.0.0__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb +691 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/1. EDA-PCA (Balance Scale Dataset).ipynb +147 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/1. EDA-PCA (Rice Dataset).ipynb +181 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/10. HMM Veterbi.ipynb +152 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Balance Scale Dataset).ipynb +117 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Iris Dataset).ipynb +156 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Sobar-72 Dataset).ipynb +215 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/3. LDA (Balance Scale Dataset).ipynb +78 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/3. LDA (NPHA Doctor Visits Dataset).ipynb +114 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/4. Linear Regression (Machine Dataset).ipynb +115 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/4. Linear Regression (Real Estate Dataset).ipynb +146 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/5. Logistic Regression (Magic04 Dataset).ipynb +130 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/5. Logistic Regression (Wine Dataset).ipynb +112 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb +118 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/6. Naive Bayes Classifier (Wine Dataset).ipynb +89 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/7. SVM (Rice Dataset).ipynb +120 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/8. FeedForward NN (Sobar72 Dataset).ipynb +262 -0
- noshot/data/ML TS XAI/ML/Tamilan Code/9. CNN (Cifar10 Dataset).ipynb +156 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/1. PCA.ipynb +162 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/10. CNN.ipynb +100 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/11. HMM.ipynb +336 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/2. KNN.ipynb +149 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/3. LDA.ipynb +132 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/4. Linear Regression.ipynb +86 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/5. Logistic Regression.ipynb +115 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/6. Naive Bayes (Titanic).ipynb +196 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/6. Naive Bayes (Wine).ipynb +98 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/7. SVM Linear.ipynb +109 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/8. SVM Non-Linear.ipynb +195 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/9. FNN With Regularization.ipynb +189 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/9. FNN Without Regularization.ipynb +197 -0
- noshot/data/ML TS XAI/ML/Whitefang Code/All in One Lab CIA 1 Q.ipynb +1087 -0
- {noshot-6.0.0.dist-info → noshot-7.0.0.dist-info}/METADATA +1 -1
- noshot-7.0.0.dist-info/RECORD +41 -0
- {noshot-6.0.0.dist-info → noshot-7.0.0.dist-info}/WHEEL +1 -1
- noshot/data/ML TS XAI/XAI/Q1.ipynb +0 -377
- noshot/data/ML TS XAI/XAI/Q2.ipynb +0 -362
- noshot/data/ML TS XAI/XAI/Q3.ipynb +0 -637
- noshot/data/ML TS XAI/XAI/Q4.ipynb +0 -206
- noshot/data/ML TS XAI/XAI/Q5.ipynb +0 -1018
- noshot-6.0.0.dist-info/RECORD +0 -14
- {noshot-6.0.0.dist-info → noshot-7.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {noshot-6.0.0.dist-info → noshot-7.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
noshot/__init__.py,sha256=000R40tii8lDFU8C1fBaD3SOnxD0PWRNWZU-km49YrU,21
|
2
|
+
noshot/main.py,sha256=zXegIqjJPARlPnQMS-B2dAENcvyaZkNwmue63Gm8lHU,663
|
3
|
+
noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb,sha256=dQ3HgLix6HLqPltFiPrElmEdYAsvR6flDpHEIjcngp4,24774
|
4
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/1. EDA-PCA (Balance Scale Dataset).ipynb,sha256=1QYmUb1QZ4FtmdwoWhTbF9divKNMOxS8AMOy56At0xg,3625
|
5
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/1. EDA-PCA (Rice Dataset).ipynb,sha256=1rp60fJyQl0bxzFWeJb6XR8VRtlQeonv9Yw5_9pvIH8,4133
|
6
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/10. HMM Veterbi.ipynb,sha256=0ESvYG9FT7wgcL2JUzMH2ChpSzevz2eez0X53a9wK20,4986
|
7
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Balance Scale Dataset).ipynb,sha256=tbkkRm6xHnmM-K8cRpnK8LH1pUmQl30bdyo0dFSNFcw,2988
|
8
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Iris Dataset).ipynb,sha256=9vxuGgpq2poMGb_AOJY_rpvUCzHwd-iCVYSXxseYVRs,4287
|
9
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Sobar-72 Dataset).ipynb,sha256=oEHLzQlc0aD1HiardgHPbTL2F-uXcm2_htA_dSmM68M,5840
|
10
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/3. LDA (Balance Scale Dataset).ipynb,sha256=Z3zwZQKJmvCEgzTWN1OqgiOAF9Lw5oLIY1A63SRJ5tg,2101
|
11
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/3. LDA (NPHA Doctor Visits Dataset).ipynb,sha256=N_IFGBAckF8vJI0lPPbZ1soG50B1_IVyACCyU7jvo3U,2651
|
12
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/4. Linear Regression (Machine Dataset).ipynb,sha256=PxFEgyFi6n5nURhtjeT__OP5T-UsggOI9RfBKfpDNBo,3081
|
13
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/4. Linear Regression (Real Estate Dataset).ipynb,sha256=avtEqkS38VccYJrQa91kjpmYG43dsDYiMcYtp70SbpA,3895
|
14
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/5. Logistic Regression (Magic04 Dataset).ipynb,sha256=sSujtrR8C9GGjpIR4v6YN6gTF1cYMIxz5Ufnv_Fp5-I,3376
|
15
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/5. Logistic Regression (Wine Dataset).ipynb,sha256=YphX35eCBBWu5sCSLS6bw__Em4gbwAzOW49z_Zv-tRs,2668
|
16
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb,sha256=gHvmS1w__3JxhdsxjcSstgrCfoBWfxp8e738O1rVlew,3077
|
17
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/6. Naive Bayes Classifier (Wine Dataset).ipynb,sha256=Ile_WuRAt8Is1HbKdDXu-ogHvQRNBGyxpd8OWauEEek,2058
|
18
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/7. SVM (Rice Dataset).ipynb,sha256=zJ4GGRSwNY73DQCEeAP8ladl6H_WB54B1C_nSyKb9q8,3762
|
19
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/8. FeedForward NN (Sobar72 Dataset).ipynb,sha256=JaXAnYDa1AViE2WErFX8QzExbNyGvDYTsf3Vdlie8rs,7122
|
20
|
+
noshot/data/ML TS XAI/ML/Tamilan Code/9. CNN (Cifar10 Dataset).ipynb,sha256=Jt_x0JTXNM1KqbYQ8afLtj0qIHysN63UUzFnmZfCE3c,3996
|
21
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/1. PCA.ipynb,sha256=QiJKjyYDWetwngiOwTi4fzuDIorkNLilAFV47V56kO4,3907
|
22
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/10. CNN.ipynb,sha256=zraQfH-LW-CYMMawfVX--jaejlcTB2SE92wscb_eb50,3329
|
23
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/11. HMM.ipynb,sha256=RvE_6vM5OWlFKVvGG9-K9sQfz9AtC_fRP5lgRgQrndo,11203
|
24
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/2. KNN.ipynb,sha256=CP1tuMZoL6MyMIZXn7PL_Epof_0l5EWhKz6ySg3u_W4,4049
|
25
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/3. LDA.ipynb,sha256=-VyjQ_i6r-1KaGagT3Aoq8UQ_1xYxcDPhmORxuu5eBg,3183
|
26
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/4. Linear Regression.ipynb,sha256=e6qdlsdkQn-2D8s55C5ekZrd8oClxIglwsJoyW624GQ,2630
|
27
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/5. Logistic Regression.ipynb,sha256=yC-rMnCgSjKyY7iVeuoIVlXq6ge8xYLKUijL2gAMuMo,3074
|
28
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/6. Naive Bayes (Titanic).ipynb,sha256=EItNyvs2EHMY42SBEHlKxJ8_y6Oi4qlJOjsEMcOGCWg,4572
|
29
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/6. Naive Bayes (Wine).ipynb,sha256=iW3yRzgGRgkhG-VkIGNU5LJuk-ef4ZlxmPx4Vl_PCSQ,2278
|
30
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/7. SVM Linear.ipynb,sha256=ZUd1r_W94BdAOMhpXfL6gCylrAgU7E2NOI3xkW4vnHM,3526
|
31
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/8. SVM Non-Linear.ipynb,sha256=E4psLvzD8XzKGTFyd2759CRjhUa-7WO8Ow577nDLIWo,6351
|
32
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/9. FNN With Regularization.ipynb,sha256=SKdyms9nCdr3e0O3Os6Om3kFz9ebahv0OueqhJ4Psc4,6980
|
33
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/9. FNN Without Regularization.ipynb,sha256=ZsdOcoPzaXM8bQV2ct5uOjRj6wF9Km0cc9iR1zRdXXQ,7520
|
34
|
+
noshot/data/ML TS XAI/ML/Whitefang Code/All in One Lab CIA 1 Q.ipynb,sha256=wJLu6e0vgrXxH_J1pVM8wB6Wg-o3lPcuzZ45hId1g2o,27364
|
35
|
+
noshot/utils/__init__.py,sha256=QVrN1ZpzPXxZqDOqot5-t_ulFjZXVx7Cvr-Is9AK0po,110
|
36
|
+
noshot/utils/shell_utils.py,sha256=-XfgYlNQlULa_rRJ3vsfTns4m_jiueGEj396J_y0Gus,2611
|
37
|
+
noshot-7.0.0.dist-info/licenses/LICENSE.txt,sha256=fgCruaVm5cUjFGOeEoGIimT6nnUunBqcNZHpGzK8TSw,1086
|
38
|
+
noshot-7.0.0.dist-info/METADATA,sha256=Z32kxd-l1ofmEoeEHljTLGJlzZrGDU5E1TE-C4nl8ac,2573
|
39
|
+
noshot-7.0.0.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
|
40
|
+
noshot-7.0.0.dist-info/top_level.txt,sha256=UL-c0HffdRwohz-y9icY_rnY48pQDdxGcBsgyCKh2Q8,7
|
41
|
+
noshot-7.0.0.dist-info/RECORD,,
|
@@ -1,377 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "markdown",
|
5
|
-
"id": "b4680af5-77e8-4743-8acb-b60446e0a4d4",
|
6
|
-
"metadata": {},
|
7
|
-
"source": [
|
8
|
-
"<h1>Download the Credit Card Fraud Detection dataset. Use the SMOTE (Synthetic\n",
|
9
|
-
"Minority Oversampling Technique) algorithm to balance the dataset. Then, train\n",
|
10
|
-
"and evaluate a Logistic Regression model on the data before and after applying\n",
|
11
|
-
"SMOTE. Compare the models performance in both cases.</h1>"
|
12
|
-
]
|
13
|
-
},
|
14
|
-
{
|
15
|
-
"cell_type": "markdown",
|
16
|
-
"id": "ac8b845d-dc46-426d-a390-a7361b68685c",
|
17
|
-
"metadata": {},
|
18
|
-
"source": [
|
19
|
-
"<h1>dataset link</h1>"
|
20
|
-
]
|
21
|
-
},
|
22
|
-
{
|
23
|
-
"cell_type": "markdown",
|
24
|
-
"id": "16013e87-7144-4430-8f08-306d0a4ba365",
|
25
|
-
"metadata": {},
|
26
|
-
"source": [
|
27
|
-
"<h1>https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud</h1>"
|
28
|
-
]
|
29
|
-
},
|
30
|
-
{
|
31
|
-
"cell_type": "code",
|
32
|
-
"execution_count": null,
|
33
|
-
"id": "25445452",
|
34
|
-
"metadata": {},
|
35
|
-
"outputs": [],
|
36
|
-
"source": [
|
37
|
-
"#!pip install pandas matplotlib seaborn scikit-learn imbalanced-learn --quiet\n"
|
38
|
-
]
|
39
|
-
},
|
40
|
-
{
|
41
|
-
"cell_type": "code",
|
42
|
-
"execution_count": null,
|
43
|
-
"id": "e7899e39",
|
44
|
-
"metadata": {},
|
45
|
-
"outputs": [],
|
46
|
-
"source": [
|
47
|
-
"#pip install -U scikit-learn imbalanced-learn\n"
|
48
|
-
]
|
49
|
-
},
|
50
|
-
{
|
51
|
-
"cell_type": "code",
|
52
|
-
"execution_count": null,
|
53
|
-
"id": "e1255b28-ef76-4019-80c6-8f8d57b142d0",
|
54
|
-
"metadata": {},
|
55
|
-
"outputs": [],
|
56
|
-
"source": [
|
57
|
-
"import pandas as pd\n",
|
58
|
-
"import matplotlib.pyplot as plt\n",
|
59
|
-
"import seaborn as sns\n",
|
60
|
-
"from sklearn.model_selection import train_test_split\n",
|
61
|
-
"from sklearn.linear_model import LogisticRegression\n",
|
62
|
-
"from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc\n",
|
63
|
-
"from imblearn.over_sampling import SMOTE\n",
|
64
|
-
"import warnings\n",
|
65
|
-
"warnings.filterwarnings('ignore')\n",
|
66
|
-
"# Load dataset\n",
|
67
|
-
"df = pd.read_csv(\"creditcard.csv\")\n",
|
68
|
-
"\n",
|
69
|
-
"# Features and target\n",
|
70
|
-
"X = df.drop(columns=[\"Class\"])\n",
|
71
|
-
"y = df[\"Class\"]\n",
|
72
|
-
"\n",
|
73
|
-
"# Train-test split\n",
|
74
|
-
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
|
75
|
-
"\n",
|
76
|
-
"# Class distribution before SMOTE\n",
|
77
|
-
"plt.figure(figsize=(5, 4))\n",
|
78
|
-
"sns.countplot(x=y)\n",
|
79
|
-
"plt.title(\"Class Distribution Before SMOTE\")\n",
|
80
|
-
"plt.xlabel(\"Class\")\n",
|
81
|
-
"plt.ylabel(\"Count\")\n",
|
82
|
-
"plt.show()\n",
|
83
|
-
"\n"
|
84
|
-
]
|
85
|
-
},
|
86
|
-
{
|
87
|
-
"cell_type": "code",
|
88
|
-
"execution_count": null,
|
89
|
-
"id": "83bd4937",
|
90
|
-
"metadata": {},
|
91
|
-
"outputs": [],
|
92
|
-
"source": [
|
93
|
-
"df['Class'].value_counts()"
|
94
|
-
]
|
95
|
-
},
|
96
|
-
{
|
97
|
-
"cell_type": "code",
|
98
|
-
"execution_count": null,
|
99
|
-
"id": "03293d81-832c-40ce-bfa8-9b83183aeaa0",
|
100
|
-
"metadata": {},
|
101
|
-
"outputs": [],
|
102
|
-
"source": [
|
103
|
-
"# Logistic Regression without SMOTE\n",
|
104
|
-
"model = LogisticRegression(max_iter=1000)\n",
|
105
|
-
"model.fit(X_train, y_train)\n",
|
106
|
-
"y_pred = model.predict(X_test)\n"
|
107
|
-
]
|
108
|
-
},
|
109
|
-
{
|
110
|
-
"cell_type": "code",
|
111
|
-
"execution_count": null,
|
112
|
-
"id": "8d13d00e-4b51-4984-aa8a-bed9bdb28d9a",
|
113
|
-
"metadata": {},
|
114
|
-
"outputs": [],
|
115
|
-
"source": [
|
116
|
-
"plt.title(\"Confusion Matrix Before SMOTE\")\n",
|
117
|
-
"sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt=\"d\")\n",
|
118
|
-
"plt.show()"
|
119
|
-
]
|
120
|
-
},
|
121
|
-
{
|
122
|
-
"cell_type": "code",
|
123
|
-
"execution_count": null,
|
124
|
-
"id": "3bdc518e-3080-4866-a26f-b20b7411f3c3",
|
125
|
-
"metadata": {},
|
126
|
-
"outputs": [],
|
127
|
-
"source": [
|
128
|
-
"# Apply SMOTE\n",
|
129
|
-
"smote = SMOTE(random_state=42)\n",
|
130
|
-
"X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)\n",
|
131
|
-
"\n",
|
132
|
-
"# Class distribution after SMOTE\n",
|
133
|
-
"plt.figure(figsize=(5, 4))\n",
|
134
|
-
"sns.countplot(x=y_train_smote)\n",
|
135
|
-
"plt.title(\"Class Distribution After SMOTE\")\n",
|
136
|
-
"plt.xlabel(\"Class\")\n",
|
137
|
-
"plt.ylabel(\"Count\")\n",
|
138
|
-
"plt.show()\n",
|
139
|
-
"\n",
|
140
|
-
"# Logistic Regression with SMOTE\n",
|
141
|
-
"model_smote = LogisticRegression(max_iter=1000)\n",
|
142
|
-
"model_smote.fit(X_train_smote, y_train_smote)\n",
|
143
|
-
"y_pred_smote = model_smote.predict(X_test)\n"
|
144
|
-
]
|
145
|
-
},
|
146
|
-
{
|
147
|
-
"cell_type": "code",
|
148
|
-
"execution_count": null,
|
149
|
-
"id": "43dd7ca8-d47f-4695-b67a-5759f08245ef",
|
150
|
-
"metadata": {},
|
151
|
-
"outputs": [],
|
152
|
-
"source": [
|
153
|
-
"plt.title(\"Confusion Matrix After SMOTE\")\n",
|
154
|
-
"sns.heatmap(confusion_matrix(y_test, y_pred_smote), annot=True, fmt=\"d\")\n",
|
155
|
-
"plt.show()"
|
156
|
-
]
|
157
|
-
},
|
158
|
-
{
|
159
|
-
"cell_type": "code",
|
160
|
-
"execution_count": null,
|
161
|
-
"id": "b362b271-4469-4da2-a692-9961234f0024",
|
162
|
-
"metadata": {},
|
163
|
-
"outputs": [],
|
164
|
-
"source": [
|
165
|
-
"\n",
|
166
|
-
"fpr1, tpr1, _ = roc_curve(y_test, model.predict_proba(X_test)[:, 1])\n",
|
167
|
-
"fpr2, tpr2, _ = roc_curve(y_test, model_smote.predict_proba(X_test)[:, 1])\n",
|
168
|
-
"plt.figure(figsize=(6, 4))\n",
|
169
|
-
"plt.plot(fpr1, tpr1, label=f\"Before SMOTE (AUC = {auc(fpr1, tpr1):.2f})\")\n",
|
170
|
-
"plt.plot(fpr2, tpr2, label=f\"After SMOTE (AUC = {auc(fpr2, tpr2):.2f})\")\n",
|
171
|
-
"plt.plot([0, 1], [0, 1], 'k--')\n",
|
172
|
-
"plt.xlabel(\"False Positive Rate\")\n",
|
173
|
-
"plt.ylabel(\"True Positive Rate\")\n",
|
174
|
-
"plt.title(\"ROC Curve Comparison\")\n",
|
175
|
-
"plt.legend()\n",
|
176
|
-
"plt.show()\n",
|
177
|
-
"\n",
|
178
|
-
"# Classification reports\n",
|
179
|
-
"print(\"Before SMOTE:\\n\", classification_report(y_test, y_pred))\n",
|
180
|
-
"print(\"After SMOTE:\\n\", classification_report(y_test, y_pred_smote))\n"
|
181
|
-
]
|
182
|
-
},
|
183
|
-
{
|
184
|
-
"cell_type": "markdown",
|
185
|
-
"id": "00d7baa5-b1a1-4bb5-b8c3-2552f5572af1",
|
186
|
-
"metadata": {},
|
187
|
-
"source": [
|
188
|
-
"<h1>Load minist data set using the following code:\n",
|
189
|
-
"from tensorflow.keras.datasets import mnist\n",
|
190
|
-
"# Loads the MNIST dataset\n",
|
191
|
-
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
|
192
|
-
"Perform minimum of five EDA on the above mentioned data set.</h1>"
|
193
|
-
]
|
194
|
-
},
|
195
|
-
{
|
196
|
-
"cell_type": "code",
|
197
|
-
"execution_count": null,
|
198
|
-
"id": "6860aef9-f583-44d1-a538-b8f8adc3026f",
|
199
|
-
"metadata": {},
|
200
|
-
"outputs": [],
|
201
|
-
"source": [
|
202
|
-
"import numpy as np\n",
|
203
|
-
"import matplotlib.pyplot as plt\n",
|
204
|
-
"import seaborn as sns\n",
|
205
|
-
"from tensorflow.keras.datasets import mnist\n",
|
206
|
-
"from tensorflow.image import flip_left_right, rot90"
|
207
|
-
]
|
208
|
-
},
|
209
|
-
{
|
210
|
-
"cell_type": "code",
|
211
|
-
"execution_count": null,
|
212
|
-
"id": "17ce9851-25fb-40e7-ac3b-27c3f386fe49",
|
213
|
-
"metadata": {},
|
214
|
-
"outputs": [],
|
215
|
-
"source": [
|
216
|
-
"(x_train, y_train), (x_test, y_test) = mnist.load_data()"
|
217
|
-
]
|
218
|
-
},
|
219
|
-
{
|
220
|
-
"cell_type": "code",
|
221
|
-
"execution_count": null,
|
222
|
-
"id": "a48e2cfb-5f35-4350-b62b-3e63a446c646",
|
223
|
-
"metadata": {},
|
224
|
-
"outputs": [],
|
225
|
-
"source": [
|
226
|
-
"# 1. Function to plot images from the dataset\n",
|
227
|
-
"def plot_sample_images(images, labels, count=10):\n",
|
228
|
-
" plt.figure(figsize=(15, 2))\n",
|
229
|
-
" for i in range(count):\n",
|
230
|
-
" plt.subplot(1, count, i+1)\n",
|
231
|
-
" plt.imshow(images[i], cmap='gray')\n",
|
232
|
-
" plt.title(f\"Label: {labels[i]}\")\n",
|
233
|
-
" plt.axis('off')\n",
|
234
|
-
" plt.show()\n",
|
235
|
-
"\n",
|
236
|
-
"plot_sample_images(x_train, y_train)"
|
237
|
-
]
|
238
|
-
},
|
239
|
-
{
|
240
|
-
"cell_type": "code",
|
241
|
-
"execution_count": null,
|
242
|
-
"id": "0f97abbe-26ce-421f-9dc4-e3c86a6deb45",
|
243
|
-
"metadata": {},
|
244
|
-
"outputs": [],
|
245
|
-
"source": [
|
246
|
-
"# 2.Visualize class distribution\n",
|
247
|
-
"sns.countplot(x=y_train)\n",
|
248
|
-
"plt.title(\"Class Distribution\")\n",
|
249
|
-
"plt.xlabel(\"Digit\")\n",
|
250
|
-
"plt.ylabel(\"Count\")\n",
|
251
|
-
"plt.show()\n"
|
252
|
-
]
|
253
|
-
},
|
254
|
-
{
|
255
|
-
"cell_type": "code",
|
256
|
-
"execution_count": null,
|
257
|
-
"id": "686a47bc-374b-4a92-a3b3-53455700643d",
|
258
|
-
"metadata": {},
|
259
|
-
"outputs": [],
|
260
|
-
"source": [
|
261
|
-
"# 3.Plot the distribution of image sizes\n",
|
262
|
-
"sizes = [(img.shape[0], img.shape[1]) for img in x_train]\n",
|
263
|
-
"sns.histplot(sizes)\n",
|
264
|
-
"plt.title(\"Image Size Distribution\")\n",
|
265
|
-
"plt.xlabel(\"Size\")\n",
|
266
|
-
"plt.ylabel(\"Frequency\")\n",
|
267
|
-
"plt.show()"
|
268
|
-
]
|
269
|
-
},
|
270
|
-
{
|
271
|
-
"cell_type": "code",
|
272
|
-
"execution_count": null,
|
273
|
-
"id": "afe4ceee-437a-40ca-ba7b-79dab6f8e668",
|
274
|
-
"metadata": {},
|
275
|
-
"outputs": [],
|
276
|
-
"source": [
|
277
|
-
"# 4.Plot the distribution of pixel values (RGB channels) for a sample image\n",
|
278
|
-
"sample_img = x_train[0]\n",
|
279
|
-
"plt.hist(sample_img.ravel(), bins=50, color='blue', alpha=0.7)\n",
|
280
|
-
"plt.title(\"Pixel Value Distribution\")\n",
|
281
|
-
"plt.xlabel(\"Pixel Intensity\")\n",
|
282
|
-
"plt.ylabel(\"Count\")\n",
|
283
|
-
"plt.show()\n"
|
284
|
-
]
|
285
|
-
},
|
286
|
-
{
|
287
|
-
"cell_type": "code",
|
288
|
-
"execution_count": null,
|
289
|
-
"id": "492d7e9f-a98e-43f0-9a08-c2bed7fec88f",
|
290
|
-
"metadata": {},
|
291
|
-
"outputs": [],
|
292
|
-
"source": [
|
293
|
-
"#5. Function to apply basic augmentation techniques\n",
|
294
|
-
"def augment_image(img):\n",
|
295
|
-
" flipped = flip_left_right(img[..., np.newaxis])\n",
|
296
|
-
" rotated = rot90(img[..., np.newaxis])\n",
|
297
|
-
" return flipped.numpy().squeeze(), rotated.numpy().squeeze()\n",
|
298
|
-
"\n",
|
299
|
-
"flip, rot = augment_image(x_train[0])\n",
|
300
|
-
"plt.subplot(1, 3, 1)\n",
|
301
|
-
"plt.imshow(x_train[0], cmap='gray')\n",
|
302
|
-
"plt.title(\"Original\")\n",
|
303
|
-
"plt.axis('off')\n",
|
304
|
-
"plt.subplot(1, 3, 2)\n",
|
305
|
-
"plt.imshow(flip, cmap='gray')\n",
|
306
|
-
"plt.title(\"Flipped\")\n",
|
307
|
-
"plt.axis('off')\n",
|
308
|
-
"plt.subplot(1, 3, 3)\n",
|
309
|
-
"plt.imshow(rot, cmap='gray')\n",
|
310
|
-
"plt.title(\"Rotated\")\n",
|
311
|
-
"plt.axis('off')\n",
|
312
|
-
"plt.show()"
|
313
|
-
]
|
314
|
-
},
|
315
|
-
{
|
316
|
-
"cell_type": "code",
|
317
|
-
"execution_count": null,
|
318
|
-
"id": "cbb73cd8-3006-4962-bb82-da6cb38aed10",
|
319
|
-
"metadata": {},
|
320
|
-
"outputs": [],
|
321
|
-
"source": [
|
322
|
-
"# 6.Calculate mean and standard deviation of pixel values\n",
|
323
|
-
"mean = np.mean(x_train)\n",
|
324
|
-
"std = np.std(x_train)\n",
|
325
|
-
"print(f\"Mean pixel value: {mean:.2f}, Standard deviation: {std:.2f}\")\n"
|
326
|
-
]
|
327
|
-
},
|
328
|
-
{
|
329
|
-
"cell_type": "code",
|
330
|
-
"execution_count": null,
|
331
|
-
"id": "881c91f4-5d2b-4538-85d4-9607ee130c85",
|
332
|
-
"metadata": {},
|
333
|
-
"outputs": [],
|
334
|
-
"source": [
|
335
|
-
"#7. Display one image from each class\n",
|
336
|
-
"plt.figure(figsize=(12, 4))\n",
|
337
|
-
"for digit in range(10):\n",
|
338
|
-
" idx = np.where(y_train == digit)[0][0]\n",
|
339
|
-
" plt.subplot(2, 5, digit+1)\n",
|
340
|
-
" plt.imshow(x_train[idx], cmap='gray')\n",
|
341
|
-
" plt.title(f\"Digit: {digit}\")\n",
|
342
|
-
" plt.axis('off')\n",
|
343
|
-
"plt.tight_layout()\n",
|
344
|
-
"plt.show()\n"
|
345
|
-
]
|
346
|
-
},
|
347
|
-
{
|
348
|
-
"cell_type": "code",
|
349
|
-
"execution_count": null,
|
350
|
-
"id": "14cfcbec-306a-4996-9cb7-d73d32fb2d54",
|
351
|
-
"metadata": {},
|
352
|
-
"outputs": [],
|
353
|
-
"source": []
|
354
|
-
}
|
355
|
-
],
|
356
|
-
"metadata": {
|
357
|
-
"kernelspec": {
|
358
|
-
"display_name": "Python 3 (ipykernel)",
|
359
|
-
"language": "python",
|
360
|
-
"name": "python3"
|
361
|
-
},
|
362
|
-
"language_info": {
|
363
|
-
"codemirror_mode": {
|
364
|
-
"name": "ipython",
|
365
|
-
"version": 3
|
366
|
-
},
|
367
|
-
"file_extension": ".py",
|
368
|
-
"mimetype": "text/x-python",
|
369
|
-
"name": "python",
|
370
|
-
"nbconvert_exporter": "python",
|
371
|
-
"pygments_lexer": "ipython3",
|
372
|
-
"version": "3.12.4"
|
373
|
-
}
|
374
|
-
},
|
375
|
-
"nbformat": 4,
|
376
|
-
"nbformat_minor": 5
|
377
|
-
}
|