noshot 1.0.0__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noshot/data/ML TS XAI/ML/Main/1. EDA-PCA (Balance Scale Dataset).ipynb +139 -0
- noshot/data/ML TS XAI/ML/Main/1. EDA-PCA (Rice Dataset).ipynb +181 -0
- noshot/data/ML TS XAI/ML/Main/10. HMM Veterbi.ipynb +228 -0
- noshot/data/ML TS XAI/ML/Main/2. KNN (Balance Scale Dataset).ipynb +117 -0
- noshot/data/ML TS XAI/ML/Main/2. KNN (Iris Dataset).ipynb +165 -0
- noshot/data/ML TS XAI/ML/Main/2. KNN (Sobar-72 Dataset).ipynb +251 -0
- noshot/data/ML TS XAI/ML/Main/3. LDA (Balance Scale Dataset).ipynb +78 -0
- noshot/data/ML TS XAI/ML/Main/3. LDA (NPHA Doctor Visits Dataset).ipynb +114 -0
- noshot/data/ML TS XAI/ML/Main/4. Linear Regression (Machine Dataset).ipynb +115 -0
- noshot/data/ML TS XAI/ML/Main/4. Linear Regression (Real Estate Dataset).ipynb +159 -0
- noshot/data/ML TS XAI/{XAI/XAI 2/Exp-3 (EDA-loan).ipynb → ML/Main/5. Logistic Regression (Magic04 Dataset).ipynb } +53 -74
- noshot/data/ML TS XAI/ML/Main/5. Logistic Regression (Wine Dataset).ipynb +112 -0
- noshot/data/ML TS XAI/ML/Main/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb +153 -0
- noshot/data/ML TS XAI/ML/Main/6. Naive Bayes Classifier (Wine Dataset).ipynb +89 -0
- noshot/data/ML TS XAI/ML/Main/7. SVM (Rice Dataset).ipynb +208 -0
- noshot/data/ML TS XAI/ML/Main/8. FeedForward NN (Sobar72 Dataset).ipynb +260 -0
- noshot/data/ML TS XAI/ML/Main/9. CNN (Cifar10 Dataset).ipynb +238 -0
- noshot/data/ML TS XAI/ML/Main/data/agaricus-lepiota.data +8124 -0
- noshot/data/ML TS XAI/ML/Main/data/balance-scale.txt +625 -0
- noshot/data/ML TS XAI/ML/Main/data/doctor-visits.csv +715 -0
- noshot/data/ML TS XAI/ML/Main/data/iris.csv +151 -0
- noshot/data/ML TS XAI/ML/Main/data/machine-data.csv +210 -0
- noshot/data/ML TS XAI/ML/Main/data/magic04.data +19020 -0
- noshot/data/ML TS XAI/ML/Main/data/real-estate.xlsx +0 -0
- noshot/data/ML TS XAI/ML/Main/data/rice.arff +3826 -0
- noshot/data/ML TS XAI/ML/Main/data/sobar-72.csv +73 -0
- noshot/data/ML TS XAI/ML/Main/data/wine-dataset.csv +179 -0
- noshot/data/ML TS XAI/ML/Other Codes.ipynb +158 -0
- noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb +691 -0
- {noshot-1.0.0.dist-info → noshot-3.0.0.dist-info}/METADATA +1 -1
- noshot-3.0.0.dist-info/RECORD +38 -0
- {noshot-1.0.0.dist-info → noshot-3.0.0.dist-info}/WHEEL +1 -1
- noshot/data/ML TS XAI/TS/10. Seasonal ARIMA Forecasting.ipynb +0 -246
- noshot/data/ML TS XAI/TS/11. Multivariate ARIMA Forecasting.ipynb +0 -228
- noshot/data/ML TS XAI/TS/6. ACF PACF.ipynb +0 -77
- noshot/data/ML TS XAI/TS/7. Differencing.ipynb +0 -167
- noshot/data/ML TS XAI/TS/8. ARMA Forecasting.ipynb +0 -197
- noshot/data/ML TS XAI/TS/9. ARIMA Forecasting.ipynb +0 -220
- noshot/data/ML TS XAI/XAI/XAI 1/EDA2_chipsdatset.ipynb +0 -633
- noshot/data/ML TS XAI/XAI/XAI 1/EDA_IRISH_8thjan.ipynb +0 -326
- noshot/data/ML TS XAI/XAI/XAI 1/XAI_EX1 MODEL BIAS (FINAL).ipynb +0 -487
- noshot/data/ML TS XAI/XAI/XAI 1/complete_guide_to_eda_on_text_data.ipynb +0 -845
- noshot/data/ML TS XAI/XAI/XAI 1/deepchecksframeworks.ipynb +0 -100
- noshot/data/ML TS XAI/XAI/XAI 1/deepexplainers (mnist).ipynb +0 -90
- noshot/data/ML TS XAI/XAI/XAI 1/guidedbackpropagation.ipynb +0 -203
- noshot/data/ML TS XAI/XAI/XAI 1/updated_image_EDA1_with_LRP.ipynb +0 -3998
- noshot/data/ML TS XAI/XAI/XAI 1/zebrastripes.ipynb +0 -271
- noshot/data/ML TS XAI/XAI/XAI 2/EXP_5.ipynb +0 -1545
- noshot/data/ML TS XAI/XAI/XAI 2/Exp-3 (EDA-movie).ipynb +0 -229
- noshot/data/ML TS XAI/XAI/XAI 2/Exp-4(Flower dataset).ipynb +0 -237
- noshot/data/ML TS XAI/XAI/XAI 2/Exp-4.ipynb +0 -241
- noshot/data/ML TS XAI/XAI/XAI 2/Exp_2.ipynb +0 -352
- noshot/data/ML TS XAI/XAI/XAI 2/Exp_7.ipynb +0 -110
- noshot/data/ML TS XAI/XAI/XAI 2/FeatureImportance_SensitivityAnalysis.ipynb +0 -708
- noshot-1.0.0.dist-info/RECORD +0 -32
- {noshot-1.0.0.dist-info → noshot-3.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {noshot-1.0.0.dist-info → noshot-3.0.0.dist-info}/top_level.txt +0 -0
@@ -1,352 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "code",
|
5
|
-
"execution_count": null,
|
6
|
-
"metadata": {
|
7
|
-
"id": "Gdwg19aUY6j6"
|
8
|
-
},
|
9
|
-
"outputs": [],
|
10
|
-
"source": [
|
11
|
-
"import matplotlib.pyplot as plt\n",
|
12
|
-
"from sklearn.datasets import make_classification\n",
|
13
|
-
"from imblearn.over_sampling import SMOTE\n",
|
14
|
-
"from collections import Counter\n",
|
15
|
-
"from sklearn.naive_bayes import GaussianNB # Importing Naive Bayes\n",
|
16
|
-
"from sklearn.model_selection import train_test_split\n",
|
17
|
-
"from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay\n",
|
18
|
-
"import seaborn as sns\n"
|
19
|
-
]
|
20
|
-
},
|
21
|
-
{
|
22
|
-
"cell_type": "code",
|
23
|
-
"execution_count": null,
|
24
|
-
"metadata": {
|
25
|
-
"colab": {
|
26
|
-
"base_uri": "https://localhost:8080/",
|
27
|
-
"height": 1000
|
28
|
-
},
|
29
|
-
"id": "hrwNktWpZoNM",
|
30
|
-
"outputId": "e5640cd0-28f0-4c67-a0cd-b22b22091984"
|
31
|
-
},
|
32
|
-
"outputs": [],
|
33
|
-
"source": [
|
34
|
-
"X, y = make_classification(n_samples=1000, n_features=10, n_classes=2,\n",
|
35
|
-
" class_sep=2, weights=[0.9, 0.1], random_state=42)\n",
|
36
|
-
"\n",
|
37
|
-
"print(\"Original class distribution:\", Counter(y)) #counter is used to count the number of occurrences\n",
|
38
|
-
"\n",
|
39
|
-
"# Plot the class distribution\n",
|
40
|
-
"plt.figure(figsize=(6,4))\n",
|
41
|
-
"plt.bar(['Class 0', 'Class 1'], [Counter(y)[0], Counter(y)[1]], color=['cyan', 'black'])\n",
|
42
|
-
"plt.title('Original Class Distribution')\n",
|
43
|
-
"plt.ylabel('Frequency')\n",
|
44
|
-
"plt.show()\n",
|
45
|
-
"\n",
|
46
|
-
"# Apply SMOTE (Synthetic Minority Over-sampling Technique) for oversampling\n",
|
47
|
-
"smote = SMOTE(random_state=42)\n",
|
48
|
-
"\n",
|
49
|
-
"X_res, y_res = smote.fit_resample(X, y)\n",
|
50
|
-
"\n",
|
51
|
-
"# Display new class distribution\n",
|
52
|
-
"print(\"Resampled class distribution:\", Counter(y_res))\n",
|
53
|
-
"\n",
|
54
|
-
"# Plot the resampled class distribution\n",
|
55
|
-
"plt.figure(figsize=(6,4))\n",
|
56
|
-
"plt.bar(['Class 0', 'Class 1'], [Counter(y_res)[0], Counter(y_res)[1]], color=['cyan', 'black'])\n",
|
57
|
-
"plt.title('Resampled Class Distribution (SMOTE)')\n",
|
58
|
-
"plt.ylabel('Frequency')\n",
|
59
|
-
"plt.show()\n",
|
60
|
-
"\n",
|
61
|
-
"# Train and evaluate a Naive Bayes classifier on the resampled data\n",
|
62
|
-
"X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.3, random_state=42)\n",
|
63
|
-
"\n",
|
64
|
-
"# Using Naive Bayes Classifier\n",
|
65
|
-
"clf = GaussianNB()\n",
|
66
|
-
"clf.fit(X_train, y_train)\n",
|
67
|
-
"y_pred_resampled = clf.predict(X_test)\n",
|
68
|
-
"\n",
|
69
|
-
"# Evaluate the model on the resampled dataset\n",
|
70
|
-
"print(\"Classification report on resampled data:\")\n",
|
71
|
-
"print(classification_report(y_test, y_pred_resampled))\n",
|
72
|
-
"\n",
|
73
|
-
"# Confusion Matrix for resampled data\n",
|
74
|
-
"cm_resampled = confusion_matrix(y_test, y_pred_resampled)\n",
|
75
|
-
"disp = ConfusionMatrixDisplay(confusion_matrix=cm_resampled, display_labels=['Class 0', 'Class 1'])\n",
|
76
|
-
"disp.plot(cmap='Blues')\n",
|
77
|
-
"plt.title(\"Confusion Matrix - Resampled Data\")\n",
|
78
|
-
"plt.show()\n",
|
79
|
-
"\n",
|
80
|
-
"# Now, evaluate the model on the original imbalanced data\n",
|
81
|
-
"X_train_imbalanced, X_test_imbalanced, y_train_imbalanced, y_test_imbalanced = train_test_split(X, y, test_size=0.3, random_state=42)\n",
|
82
|
-
"\n",
|
83
|
-
"# Train the Naive Bayes Classifier on imbalanced data\n",
|
84
|
-
"clf.fit(X_train_imbalanced, y_train_imbalanced)\n",
|
85
|
-
"y_pred_imbalanced = clf.predict(X_test_imbalanced)\n",
|
86
|
-
"\n",
|
87
|
-
"# Evaluate the model on the original imbalanced dataset\n",
|
88
|
-
"print(\"Classification report on imbalanced data:\")\n",
|
89
|
-
"print(classification_report(y_test_imbalanced, y_pred_imbalanced))\n",
|
90
|
-
"\n",
|
91
|
-
"# Confusion Matrix for imbalanced data\n",
|
92
|
-
"cm_imbalanced = confusion_matrix(y_test_imbalanced, y_pred_imbalanced)\n",
|
93
|
-
"disp_imbalanced = ConfusionMatrixDisplay(confusion_matrix=cm_imbalanced, display_labels=['Class 0', 'Class 1'])\n",
|
94
|
-
"disp_imbalanced.plot(cmap='Blues')\n",
|
95
|
-
"plt.title(\"Confusion Matrix - Imbalanced Data\")\n",
|
96
|
-
"plt.show()"
|
97
|
-
]
|
98
|
-
},
|
99
|
-
{
|
100
|
-
"cell_type": "code",
|
101
|
-
"execution_count": null,
|
102
|
-
"metadata": {
|
103
|
-
"colab": {
|
104
|
-
"base_uri": "https://localhost:8080/",
|
105
|
-
"height": 455
|
106
|
-
},
|
107
|
-
"id": "iXasw8cxb8OV",
|
108
|
-
"outputId": "eb0e821f-61fd-4a85-c94a-fdcfa55b2ed4"
|
109
|
-
},
|
110
|
-
"outputs": [],
|
111
|
-
"source": [
|
112
|
-
"import numpy as np\n",
|
113
|
-
"import matplotlib.pyplot as plt\n",
|
114
|
-
"from sklearn.linear_model import LinearRegression\n",
|
115
|
-
"from sklearn.model_selection import train_test_split\n",
|
116
|
-
"\n",
|
117
|
-
"# Generate data with a linear relationship (y = 2X + 1)\n",
|
118
|
-
"np.random.seed(42)\n",
|
119
|
-
"X = np.linspace(0, 10, 100)#generates 100 evenly spaced points in the range from 0 to 10\n",
|
120
|
-
"y = 2 * X + 1 # True underlying function\n",
|
121
|
-
"\n",
|
122
|
-
"# Add some noise to the data\n",
|
123
|
-
"noise = np.random.normal(0, 2, X.shape)#take ranom values from a normal distribution of mean 0 and SD 2 same as X.shape\n",
|
124
|
-
"y_noisy = y + noise #y is y added with noise\n",
|
125
|
-
"\n",
|
126
|
-
"# Introduce outliers\n",
|
127
|
-
"X_outliers = np.array([2, 4, 6, 8])\n",
|
128
|
-
"y_outliers = np.array([25, 30, 28, 35]) # Outliers with large values\n",
|
129
|
-
"X_combined = np.concatenate((X, X_outliers))\n",
|
130
|
-
"y_combined = np.concatenate((y_noisy, y_outliers))\n",
|
131
|
-
"\n",
|
132
|
-
"# Split the data into training and test sets\n",
|
133
|
-
"X_train, X_test, y_train, y_test = train_test_split(X_combined, y_combined, test_size=0.2, random_state=42)\n",
|
134
|
-
"\n",
|
135
|
-
"# Reshape data to fit into linear regression model\n",
|
136
|
-
"X_train = X_train.reshape(-1, 1) #1: Tells NumPy to automatically calculate the number of rows based on the total number of elements\n",
|
137
|
-
"\n",
|
138
|
-
"# and the specified number of columns (1 in this case).\n",
|
139
|
-
"X_test = X_test.reshape(-1, 1)\n",
|
140
|
-
"\n",
|
141
|
-
"\n",
|
142
|
-
"# Fit the linear regression model\n",
|
143
|
-
"model = LinearRegression()\n",
|
144
|
-
"model.fit(X_train, y_train)\n",
|
145
|
-
"\n",
|
146
|
-
"# Predict on both training and testing data\n",
|
147
|
-
"y_train_pred = model.predict(X_train)\n",
|
148
|
-
"y_test_pred = model.predict(X_test)\n",
|
149
|
-
"# Plot the results\n",
|
150
|
-
"plt.figure(figsize=(10, 6))\n",
|
151
|
-
"plt.scatter(X_train, y_train, color='blue', label='Training data')\n",
|
152
|
-
"plt.scatter(X_test, y_test, color='green', label='Test data')\n",
|
153
|
-
"plt.plot(X_test, y_test_pred, color='cyan', label='2nd Fitted line')\n",
|
154
|
-
"plt.plot(X_train, y_train_pred, color='red', label='1st Fitted line (with outliers)',linestyle='-.')\n",
|
155
|
-
"plt.plot(X, y, color='black', label='True line (y = 2X + 1)', linestyle='--')\n",
|
156
|
-
"plt.legend()\n",
|
157
|
-
"plt.xlabel('X')\n",
|
158
|
-
"plt.ylabel('y')\n",
|
159
|
-
"plt.title('Linear Regression with Outliers')\n",
|
160
|
-
"plt.show()\n"
|
161
|
-
]
|
162
|
-
},
|
163
|
-
{
|
164
|
-
"cell_type": "code",
|
165
|
-
"execution_count": null,
|
166
|
-
"metadata": {
|
167
|
-
"colab": {
|
168
|
-
"base_uri": "https://localhost:8080/",
|
169
|
-
"height": 363
|
170
|
-
},
|
171
|
-
"id": "fi-j-O-dfIWz",
|
172
|
-
"outputId": "81b98378-aff1-436a-e32b-47a5f23929ba"
|
173
|
-
},
|
174
|
-
"outputs": [],
|
175
|
-
"source": [
|
176
|
-
"import numpy as np\n",
|
177
|
-
"import matplotlib.pyplot as plt\n",
|
178
|
-
"from sklearn.linear_model import LogisticRegression\n",
|
179
|
-
"from sklearn.metrics import accuracy_score, log_loss\n",
|
180
|
-
"from scipy.stats import entropy\n",
|
181
|
-
"\n",
|
182
|
-
"# Step 1: Generate synthetic data (initial dataset)\n",
|
183
|
-
"np.random.seed(42)\n",
|
184
|
-
"n_samples = 500\n",
|
185
|
-
"X1 = np.random.normal(0, 1, n_samples) # Feature 1\n",
|
186
|
-
"X2 = np.random.normal(0, 1, n_samples) # Feature 2\n",
|
187
|
-
"X = np.column_stack((X1, X2))\n",
|
188
|
-
"y = (X1 + X2 > 0).astype(int) # Binary target\n",
|
189
|
-
"\n",
|
190
|
-
"# Step 2: Fit logistic regression on the initial data\n",
|
191
|
-
"model = LogisticRegression()\n",
|
192
|
-
"model.fit(X, y)\n",
|
193
|
-
"\n",
|
194
|
-
"# Step 3: Simulate drift (change the distribution of features)\n",
|
195
|
-
"X1_drifted = np.random.normal(1, 1, n_samples) # Shift mean from 0 to 1\n",
|
196
|
-
"X2_drifted = np.random.normal(1, 1, n_samples) # Shift mean from 0 to 1\n",
|
197
|
-
"X_drifted = np.column_stack((X1_drifted, X2_drifted))\n",
|
198
|
-
"y_drifted = (X1_drifted + X2_drifted > 0).astype(int) # New labels based on drifted data\n",
|
199
|
-
"\n",
|
200
|
-
"# Step 4: Detect drift using KL divergence\n",
|
201
|
-
"def kl_divergence(p, q, bins=10):\n",
|
202
|
-
" \"\"\"Calculate KL divergence between two distributions.\"\"\"\n",
|
203
|
-
" p_hist, _ = np.histogram(p, bins=bins, density=True)\n",
|
204
|
-
" q_hist, _ = np.histogram(q, bins=bins, density=True)\n",
|
205
|
-
" p_hist += 1e-10 # Avoid division by zero\n",
|
206
|
-
" q_hist += 1e-10\n",
|
207
|
-
" return entropy(p_hist, q_hist)\n",
|
208
|
-
"\n",
|
209
|
-
"# Calculate KL divergence for each feature\n",
|
210
|
-
"kl_X1 = kl_divergence(X1, X1_drifted)\n",
|
211
|
-
"kl_X2 = kl_divergence(X2, X2_drifted)\n",
|
212
|
-
"\n",
|
213
|
-
"# Step 5: Evaluate model performance on drifted data\n",
|
214
|
-
"y_pred_drifted = model.predict(X_drifted)\n",
|
215
|
-
"accuracy_drifted = accuracy_score(y_drifted, y_pred_drifted)\n",
|
216
|
-
"log_loss_drifted = log_loss(y_drifted, model.predict_proba(X_drifted))\n",
|
217
|
-
"\n",
|
218
|
-
"# Step 6: Visualization\n",
|
219
|
-
"plt.figure(figsize=(12, 5))\n",
|
220
|
-
"\n",
|
221
|
-
"# Plot original and drifted distributions\n",
|
222
|
-
"plt.subplot(1, 2, 1)\n",
|
223
|
-
"plt.hist(X1, bins=20, alpha=0.6, label=\"Feature 1 (original)\", color=\"blue\")\n",
|
224
|
-
"plt.hist(X1_drifted, bins=20, alpha=0.6, label=\"Feature 1 (drifted)\", color=\"orange\")\n",
|
225
|
-
"plt.title(f\"KL Divergence for Feature 1: {kl_X1:.4f}\")\n",
|
226
|
-
"plt.legend()\n",
|
227
|
-
"\n",
|
228
|
-
"plt.subplot(1, 2, 2)\n",
|
229
|
-
"plt.hist(X2, bins=20, alpha=0.6, label=\"Feature 2 (original)\", color=\"blue\")\n",
|
230
|
-
"plt.hist(X2_drifted, bins=20, alpha=0.6, label=\"Feature 2 (drifted)\", color=\"orange\")\n",
|
231
|
-
"plt.title(f\"KL Divergence for Feature 2: {kl_X2:.4f}\")\n",
|
232
|
-
"plt.legend()\n",
|
233
|
-
"\n",
|
234
|
-
"plt.show()\n",
|
235
|
-
"\n",
|
236
|
-
"# Step 7: Print performance metrics\n",
|
237
|
-
"print(f\"Model accuracy on drifted data: {accuracy_drifted:.4f}\")\n",
|
238
|
-
"print(f\"Log loss on drifted data: {log_loss_drifted:.4f}\")\n"
|
239
|
-
]
|
240
|
-
},
|
241
|
-
{
|
242
|
-
"cell_type": "code",
|
243
|
-
"execution_count": null,
|
244
|
-
"metadata": {
|
245
|
-
"colab": {
|
246
|
-
"base_uri": "https://localhost:8080/"
|
247
|
-
},
|
248
|
-
"id": "hAnWt1NKfg_c",
|
249
|
-
"outputId": "cd7ab20d-b6d8-433c-f6b2-5875068ac0ae"
|
250
|
-
},
|
251
|
-
"outputs": [],
|
252
|
-
"source": [
|
253
|
-
"import pandas as pd\n",
|
254
|
-
"import numpy as np\n",
|
255
|
-
"from sklearn.model_selection import train_test_split\n",
|
256
|
-
"from sklearn.linear_model import LogisticRegression\n",
|
257
|
-
"from sklearn.metrics import classification_report\n",
|
258
|
-
"\n",
|
259
|
-
"# Simulated dataset\n",
|
260
|
-
"data = pd.DataFrame({\n",
|
261
|
-
" 'Income': [30000, 45000, 60000, 80000, 20000, 50000],\n",
|
262
|
-
" 'CreditScore': [600, 650, 700, 750, 550, 680],\n",
|
263
|
-
" 'Gender': [0, 1, 0, 1, 0, 1], # 0: Male, 1: Female\n",
|
264
|
-
" 'Approved': [0, 1, 1, 1, 0, 1]\n",
|
265
|
-
"})\n",
|
266
|
-
"\n",
|
267
|
-
"X = data[['Income', 'CreditScore', 'Gender']]\n",
|
268
|
-
"y = data['Approved']\n",
|
269
|
-
"\n",
|
270
|
-
"# Train-test split\n",
|
271
|
-
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
|
272
|
-
"\n",
|
273
|
-
"# Logistic Regression model\n",
|
274
|
-
"model = LogisticRegression()\n",
|
275
|
-
"model.fit(X_train, y_train)\n",
|
276
|
-
"y_pred = model.predict(X_test)\n",
|
277
|
-
"\n",
|
278
|
-
"# Display results\n",
|
279
|
-
"print(\"Classification Report:\\n\", classification_report(y_test, y_pred))\n",
|
280
|
-
"\n",
|
281
|
-
"# Identifying bias in coefficients\n",
|
282
|
-
"print(\"Model Coefficients:\", model.coef_)\n"
|
283
|
-
]
|
284
|
-
},
|
285
|
-
{
|
286
|
-
"cell_type": "code",
|
287
|
-
"execution_count": null,
|
288
|
-
"metadata": {
|
289
|
-
"colab": {
|
290
|
-
"base_uri": "https://localhost:8080/"
|
291
|
-
},
|
292
|
-
"id": "MoNlmk5jfikz",
|
293
|
-
"outputId": "8b987f2b-983f-48cf-89b5-f203712a144e"
|
294
|
-
},
|
295
|
-
"outputs": [],
|
296
|
-
"source": [
|
297
|
-
"import tensorflow as tf\n",
|
298
|
-
"from tensorflow.keras import Sequential\n",
|
299
|
-
"from tensorflow.keras.layers import Dense\n",
|
300
|
-
"import numpy as np\n",
|
301
|
-
"\n",
|
302
|
-
"# Small synthetic dataset\n",
|
303
|
-
"X = np.random.rand(100, 1)\n",
|
304
|
-
"y = X**2 + np.random.normal(0, 0.05, (100, 1))\n",
|
305
|
-
"\n",
|
306
|
-
"# Train-test split\n",
|
307
|
-
"X_train, X_test = X[:80], X[80:]\n",
|
308
|
-
"y_train, y_test = y[:80], y[80:]\n",
|
309
|
-
"\n",
|
310
|
-
"# Overfitted Neural Network\n",
|
311
|
-
"model = Sequential([\n",
|
312
|
-
" Dense(128, activation='relu', input_dim=1),\n",
|
313
|
-
" Dense(128, activation='relu'),\n",
|
314
|
-
" Dense(1)\n",
|
315
|
-
"])\n",
|
316
|
-
"\n",
|
317
|
-
"model.compile(optimizer='adam', loss='mse', metrics=['mae'])\n",
|
318
|
-
"model.fit(X_train, y_train, epochs=200, verbose=0)\n",
|
319
|
-
"\n",
|
320
|
-
"# Evaluation\n",
|
321
|
-
"train_loss = model.evaluate(X_train, y_train, verbose=0)\n",
|
322
|
-
"test_loss = model.evaluate(X_test, y_test, verbose=0)\n",
|
323
|
-
"print(\"Train Loss:\", train_loss)\n",
|
324
|
-
"print(\"Test Loss:\", test_loss)\n"
|
325
|
-
]
|
326
|
-
}
|
327
|
-
],
|
328
|
-
"metadata": {
|
329
|
-
"colab": {
|
330
|
-
"provenance": []
|
331
|
-
},
|
332
|
-
"kernelspec": {
|
333
|
-
"display_name": "Python 3 (ipykernel)",
|
334
|
-
"language": "python",
|
335
|
-
"name": "python3"
|
336
|
-
},
|
337
|
-
"language_info": {
|
338
|
-
"codemirror_mode": {
|
339
|
-
"name": "ipython",
|
340
|
-
"version": 3
|
341
|
-
},
|
342
|
-
"file_extension": ".py",
|
343
|
-
"mimetype": "text/x-python",
|
344
|
-
"name": "python",
|
345
|
-
"nbconvert_exporter": "python",
|
346
|
-
"pygments_lexer": "ipython3",
|
347
|
-
"version": "3.12.4"
|
348
|
-
}
|
349
|
-
},
|
350
|
-
"nbformat": 4,
|
351
|
-
"nbformat_minor": 4
|
352
|
-
}
|
@@ -1,110 +0,0 @@
|
|
1
|
-
{
|
2
|
-
"cells": [
|
3
|
-
{
|
4
|
-
"cell_type": "markdown",
|
5
|
-
"metadata": {
|
6
|
-
"id": "FFBo6iBNxTi5"
|
7
|
-
},
|
8
|
-
"source": [
|
9
|
-
"# **Deepchecks**"
|
10
|
-
]
|
11
|
-
},
|
12
|
-
{
|
13
|
-
"cell_type": "code",
|
14
|
-
"execution_count": null,
|
15
|
-
"metadata": {
|
16
|
-
"colab": {
|
17
|
-
"base_uri": "https://localhost:8080/",
|
18
|
-
"height": 1000,
|
19
|
-
"resources": {
|
20
|
-
"http://localhost:8080/jupyterlab-plotly.js": {
|
21
|
-
"data": "",
|
22
|
-
"headers": [
|
23
|
-
[
|
24
|
-
"content-length",
|
25
|
-
"0"
|
26
|
-
]
|
27
|
-
],
|
28
|
-
"ok": false,
|
29
|
-
"status": 404,
|
30
|
-
"status_text": "Not Found"
|
31
|
-
}
|
32
|
-
}
|
33
|
-
},
|
34
|
-
"id": "9SHBGTax-S9g",
|
35
|
-
"outputId": "6e74a110-e2d7-4c07-e1a1-e60f1ee98bac"
|
36
|
-
},
|
37
|
-
"outputs": [],
|
38
|
-
"source": [
|
39
|
-
"!pip install numpy==1.24.3\n",
|
40
|
-
"import pandas as pd\n",
|
41
|
-
"from sklearn.model_selection import train_test_split\n",
|
42
|
-
"!pip install deepchecks\n",
|
43
|
-
"from deepchecks.tabular import Dataset\n",
|
44
|
-
"from deepchecks.tabular.suites import data_integrity, train_test_validation\n",
|
45
|
-
"\n",
|
46
|
-
"# Load the dataset manually (replace with the actual path)\n",
|
47
|
-
"file_path = \"/content/adults.csv\" # Update this with your dataset file path\n",
|
48
|
-
"column_names = [\n",
|
49
|
-
" 'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',\n",
|
50
|
-
" 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',\n",
|
51
|
-
" 'hours-per-week', 'native-country', 'income'\n",
|
52
|
-
"]\n",
|
53
|
-
"\n",
|
54
|
-
"# Read CSV file\n",
|
55
|
-
"df = pd.read_csv(file_path, names=column_names, skipinitialspace=True)\n",
|
56
|
-
"\n",
|
57
|
-
"# Convert categorical features to string (if needed)\n",
|
58
|
-
"cat_features = ['workclass', 'education', 'marital-status', 'occupation',\n",
|
59
|
-
" 'relationship', 'race', 'sex', 'native-country']\n",
|
60
|
-
"df[cat_features] = df[cat_features].astype(str)\n",
|
61
|
-
"\n",
|
62
|
-
"# Separate features and labels\n",
|
63
|
-
"X = df.drop(columns=['income'])\n",
|
64
|
-
"y = df['income']\n",
|
65
|
-
"\n",
|
66
|
-
"# Split into train and test sets\n",
|
67
|
-
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
68
|
-
"\n",
|
69
|
-
"# Create Deepchecks Dataset objects\n",
|
70
|
-
"train_ds = Dataset(X_train, label=y_train, cat_features=cat_features)\n",
|
71
|
-
"test_ds = Dataset(X_test, label=y_test, cat_features=cat_features)\n",
|
72
|
-
"\n",
|
73
|
-
"# Run Data Integrity Suite\n",
|
74
|
-
"integrity_suite = data_integrity()\n",
|
75
|
-
"integrity_result = integrity_suite.run(train_ds)\n",
|
76
|
-
"integrity_result.show()\n",
|
77
|
-
"\n",
|
78
|
-
"# Run Train-Test Validation Suite\n",
|
79
|
-
"validation_suite = train_test_validation()\n",
|
80
|
-
"validation_result = validation_suite.run(train_ds, test_ds)\n",
|
81
|
-
"validation_result.show()\n",
|
82
|
-
"\n"
|
83
|
-
]
|
84
|
-
}
|
85
|
-
],
|
86
|
-
"metadata": {
|
87
|
-
"colab": {
|
88
|
-
"provenance": []
|
89
|
-
},
|
90
|
-
"kernelspec": {
|
91
|
-
"display_name": "Python 3 (ipykernel)",
|
92
|
-
"language": "python",
|
93
|
-
"name": "python3"
|
94
|
-
},
|
95
|
-
"language_info": {
|
96
|
-
"codemirror_mode": {
|
97
|
-
"name": "ipython",
|
98
|
-
"version": 3
|
99
|
-
},
|
100
|
-
"file_extension": ".py",
|
101
|
-
"mimetype": "text/x-python",
|
102
|
-
"name": "python",
|
103
|
-
"nbconvert_exporter": "python",
|
104
|
-
"pygments_lexer": "ipython3",
|
105
|
-
"version": "3.12.4"
|
106
|
-
}
|
107
|
-
},
|
108
|
-
"nbformat": 4,
|
109
|
-
"nbformat_minor": 4
|
110
|
-
}
|