noshot 5.0.0__tar.gz → 6.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: noshot
3
- Version: 5.0.0
3
+ Version: 6.0.0
4
4
  Summary: Support library for Artificial Intelligence, Machine Learning and Data Science tools
5
5
  Author: Tim Stan S
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: noshot
3
- Version: 5.0.0
3
+ Version: 6.0.0
4
4
  Summary: Support library for Artificial Intelligence, Machine Learning and Data Science tools
5
5
  Author: Tim Stan S
6
6
  License: MIT
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as f:
5
5
 
6
6
  setup(
7
7
  name="noshot",
8
- version="5.0.0",
8
+ version="6.0.0",
9
9
  author="Tim Stan S",
10
10
  description="Support library for Artificial Intelligence, Machine Learning and Data Science tools",
11
11
  long_description=long_description,
@@ -0,0 +1,377 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "b4680af5-77e8-4743-8acb-b60446e0a4d4",
6
+ "metadata": {},
7
+ "source": [
8
+ "<h1>Download the Credit Card Fraud Detection dataset. Use the SMOTE (Synthetic\n",
9
+ "Minority Oversampling Technique) algorithm to balance the dataset. Then, train\n",
10
+ "and evaluate a Logistic Regression model on the data before and after applying\n",
11
+ "SMOTE. Compare the models performance in both cases.</h1>"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "markdown",
16
+ "id": "ac8b845d-dc46-426d-a390-a7361b68685c",
17
+ "metadata": {},
18
+ "source": [
19
+ "<h1>dataset link</h1>"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "markdown",
24
+ "id": "16013e87-7144-4430-8f08-306d0a4ba365",
25
+ "metadata": {},
26
+ "source": [
27
+ "<h1>https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud</h1>"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": null,
33
+ "id": "25445452",
34
+ "metadata": {},
35
+ "outputs": [],
36
+ "source": [
37
+ "#!pip install pandas matplotlib seaborn scikit-learn imbalanced-learn --quiet\n"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "id": "e7899e39",
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "#pip install -U scikit-learn imbalanced-learn\n"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": null,
53
+ "id": "e1255b28-ef76-4019-80c6-8f8d57b142d0",
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "import pandas as pd\n",
58
+ "import matplotlib.pyplot as plt\n",
59
+ "import seaborn as sns\n",
60
+ "from sklearn.model_selection import train_test_split\n",
61
+ "from sklearn.linear_model import LogisticRegression\n",
62
+ "from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc\n",
63
+ "from imblearn.over_sampling import SMOTE\n",
64
+ "import warnings\n",
65
+ "warnings.filterwarnings('ignore')\n",
66
+ "# Load dataset\n",
67
+ "df = pd.read_csv(\"creditcard.csv\")\n",
68
+ "\n",
69
+ "# Features and target\n",
70
+ "X = df.drop(columns=[\"Class\"])\n",
71
+ "y = df[\"Class\"]\n",
72
+ "\n",
73
+ "# Train-test split\n",
74
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
75
+ "\n",
76
+ "# Class distribution before SMOTE\n",
77
+ "plt.figure(figsize=(5, 4))\n",
78
+ "sns.countplot(x=y)\n",
79
+ "plt.title(\"Class Distribution Before SMOTE\")\n",
80
+ "plt.xlabel(\"Class\")\n",
81
+ "plt.ylabel(\"Count\")\n",
82
+ "plt.show()\n",
83
+ "\n"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": null,
89
+ "id": "83bd4937",
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "df['Class'].value_counts()"
94
+ ]
95
+ },
96
+ {
97
+ "cell_type": "code",
98
+ "execution_count": null,
99
+ "id": "03293d81-832c-40ce-bfa8-9b83183aeaa0",
100
+ "metadata": {},
101
+ "outputs": [],
102
+ "source": [
103
+ "# Logistic Regression without SMOTE\n",
104
+ "model = LogisticRegression(max_iter=1000)\n",
105
+ "model.fit(X_train, y_train)\n",
106
+ "y_pred = model.predict(X_test)\n"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": null,
112
+ "id": "8d13d00e-4b51-4984-aa8a-bed9bdb28d9a",
113
+ "metadata": {},
114
+ "outputs": [],
115
+ "source": [
116
+ "plt.title(\"Confusion Matrix Before SMOTE\")\n",
117
+ "sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt=\"d\")\n",
118
+ "plt.show()"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": null,
124
+ "id": "3bdc518e-3080-4866-a26f-b20b7411f3c3",
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "# Apply SMOTE\n",
129
+ "smote = SMOTE(random_state=42)\n",
130
+ "X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)\n",
131
+ "\n",
132
+ "# Class distribution after SMOTE\n",
133
+ "plt.figure(figsize=(5, 4))\n",
134
+ "sns.countplot(x=y_train_smote)\n",
135
+ "plt.title(\"Class Distribution After SMOTE\")\n",
136
+ "plt.xlabel(\"Class\")\n",
137
+ "plt.ylabel(\"Count\")\n",
138
+ "plt.show()\n",
139
+ "\n",
140
+ "# Logistic Regression with SMOTE\n",
141
+ "model_smote = LogisticRegression(max_iter=1000)\n",
142
+ "model_smote.fit(X_train_smote, y_train_smote)\n",
143
+ "y_pred_smote = model_smote.predict(X_test)\n"
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "code",
148
+ "execution_count": null,
149
+ "id": "43dd7ca8-d47f-4695-b67a-5759f08245ef",
150
+ "metadata": {},
151
+ "outputs": [],
152
+ "source": [
153
+ "plt.title(\"Confusion Matrix After SMOTE\")\n",
154
+ "sns.heatmap(confusion_matrix(y_test, y_pred_smote), annot=True, fmt=\"d\")\n",
155
+ "plt.show()"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": null,
161
+ "id": "b362b271-4469-4da2-a692-9961234f0024",
162
+ "metadata": {},
163
+ "outputs": [],
164
+ "source": [
165
+ "\n",
166
+ "fpr1, tpr1, _ = roc_curve(y_test, model.predict_proba(X_test)[:, 1])\n",
167
+ "fpr2, tpr2, _ = roc_curve(y_test, model_smote.predict_proba(X_test)[:, 1])\n",
168
+ "plt.figure(figsize=(6, 4))\n",
169
+ "plt.plot(fpr1, tpr1, label=f\"Before SMOTE (AUC = {auc(fpr1, tpr1):.2f})\")\n",
170
+ "plt.plot(fpr2, tpr2, label=f\"After SMOTE (AUC = {auc(fpr2, tpr2):.2f})\")\n",
171
+ "plt.plot([0, 1], [0, 1], 'k--')\n",
172
+ "plt.xlabel(\"False Positive Rate\")\n",
173
+ "plt.ylabel(\"True Positive Rate\")\n",
174
+ "plt.title(\"ROC Curve Comparison\")\n",
175
+ "plt.legend()\n",
176
+ "plt.show()\n",
177
+ "\n",
178
+ "# Classification reports\n",
179
+ "print(\"Before SMOTE:\\n\", classification_report(y_test, y_pred))\n",
180
+ "print(\"After SMOTE:\\n\", classification_report(y_test, y_pred_smote))\n"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "markdown",
185
+ "id": "00d7baa5-b1a1-4bb5-b8c3-2552f5572af1",
186
+ "metadata": {},
187
+ "source": [
188
+ "<h1>Load minist data set using the following code:\n",
189
+ "from tensorflow.keras.datasets import mnist\n",
190
+ "# Loads the MNIST dataset\n",
191
+ "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
192
+ "Perform minimum of five EDA on the above mentioned data set.</h1>"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "code",
197
+ "execution_count": null,
198
+ "id": "6860aef9-f583-44d1-a538-b8f8adc3026f",
199
+ "metadata": {},
200
+ "outputs": [],
201
+ "source": [
202
+ "import numpy as np\n",
203
+ "import matplotlib.pyplot as plt\n",
204
+ "import seaborn as sns\n",
205
+ "from tensorflow.keras.datasets import mnist\n",
206
+ "from tensorflow.image import flip_left_right, rot90"
207
+ ]
208
+ },
209
+ {
210
+ "cell_type": "code",
211
+ "execution_count": null,
212
+ "id": "17ce9851-25fb-40e7-ac3b-27c3f386fe49",
213
+ "metadata": {},
214
+ "outputs": [],
215
+ "source": [
216
+ "(x_train, y_train), (x_test, y_test) = mnist.load_data()"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": null,
222
+ "id": "a48e2cfb-5f35-4350-b62b-3e63a446c646",
223
+ "metadata": {},
224
+ "outputs": [],
225
+ "source": [
226
+ "# 1. Function to plot images from the dataset\n",
227
+ "def plot_sample_images(images, labels, count=10):\n",
228
+ " plt.figure(figsize=(15, 2))\n",
229
+ " for i in range(count):\n",
230
+ " plt.subplot(1, count, i+1)\n",
231
+ " plt.imshow(images[i], cmap='gray')\n",
232
+ " plt.title(f\"Label: {labels[i]}\")\n",
233
+ " plt.axis('off')\n",
234
+ " plt.show()\n",
235
+ "\n",
236
+ "plot_sample_images(x_train, y_train)"
237
+ ]
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": null,
242
+ "id": "0f97abbe-26ce-421f-9dc4-e3c86a6deb45",
243
+ "metadata": {},
244
+ "outputs": [],
245
+ "source": [
246
+ "# 2.Visualize class distribution\n",
247
+ "sns.countplot(x=y_train)\n",
248
+ "plt.title(\"Class Distribution\")\n",
249
+ "plt.xlabel(\"Digit\")\n",
250
+ "plt.ylabel(\"Count\")\n",
251
+ "plt.show()\n"
252
+ ]
253
+ },
254
+ {
255
+ "cell_type": "code",
256
+ "execution_count": null,
257
+ "id": "686a47bc-374b-4a92-a3b3-53455700643d",
258
+ "metadata": {},
259
+ "outputs": [],
260
+ "source": [
261
+ "# 3.Plot the distribution of image sizes\n",
262
+ "sizes = [(img.shape[0], img.shape[1]) for img in x_train]\n",
263
+ "sns.histplot(sizes)\n",
264
+ "plt.title(\"Image Size Distribution\")\n",
265
+ "plt.xlabel(\"Size\")\n",
266
+ "plt.ylabel(\"Frequency\")\n",
267
+ "plt.show()"
268
+ ]
269
+ },
270
+ {
271
+ "cell_type": "code",
272
+ "execution_count": null,
273
+ "id": "afe4ceee-437a-40ca-ba7b-79dab6f8e668",
274
+ "metadata": {},
275
+ "outputs": [],
276
+ "source": [
277
+ "# 4.Plot the distribution of pixel values (RGB channels) for a sample image\n",
278
+ "sample_img = x_train[0]\n",
279
+ "plt.hist(sample_img.ravel(), bins=50, color='blue', alpha=0.7)\n",
280
+ "plt.title(\"Pixel Value Distribution\")\n",
281
+ "plt.xlabel(\"Pixel Intensity\")\n",
282
+ "plt.ylabel(\"Count\")\n",
283
+ "plt.show()\n"
284
+ ]
285
+ },
286
+ {
287
+ "cell_type": "code",
288
+ "execution_count": null,
289
+ "id": "492d7e9f-a98e-43f0-9a08-c2bed7fec88f",
290
+ "metadata": {},
291
+ "outputs": [],
292
+ "source": [
293
+ "#5. Function to apply basic augmentation techniques\n",
294
+ "def augment_image(img):\n",
295
+ " flipped = flip_left_right(img[..., np.newaxis])\n",
296
+ " rotated = rot90(img[..., np.newaxis])\n",
297
+ " return flipped.numpy().squeeze(), rotated.numpy().squeeze()\n",
298
+ "\n",
299
+ "flip, rot = augment_image(x_train[0])\n",
300
+ "plt.subplot(1, 3, 1)\n",
301
+ "plt.imshow(x_train[0], cmap='gray')\n",
302
+ "plt.title(\"Original\")\n",
303
+ "plt.axis('off')\n",
304
+ "plt.subplot(1, 3, 2)\n",
305
+ "plt.imshow(flip, cmap='gray')\n",
306
+ "plt.title(\"Flipped\")\n",
307
+ "plt.axis('off')\n",
308
+ "plt.subplot(1, 3, 3)\n",
309
+ "plt.imshow(rot, cmap='gray')\n",
310
+ "plt.title(\"Rotated\")\n",
311
+ "plt.axis('off')\n",
312
+ "plt.show()"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": null,
318
+ "id": "cbb73cd8-3006-4962-bb82-da6cb38aed10",
319
+ "metadata": {},
320
+ "outputs": [],
321
+ "source": [
322
+ "# 6.Calculate mean and standard deviation of pixel values\n",
323
+ "mean = np.mean(x_train)\n",
324
+ "std = np.std(x_train)\n",
325
+ "print(f\"Mean pixel value: {mean:.2f}, Standard deviation: {std:.2f}\")\n"
326
+ ]
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": null,
331
+ "id": "881c91f4-5d2b-4538-85d4-9607ee130c85",
332
+ "metadata": {},
333
+ "outputs": [],
334
+ "source": [
335
+ "#7. Display one image from each class\n",
336
+ "plt.figure(figsize=(12, 4))\n",
337
+ "for digit in range(10):\n",
338
+ " idx = np.where(y_train == digit)[0][0]\n",
339
+ " plt.subplot(2, 5, digit+1)\n",
340
+ " plt.imshow(x_train[idx], cmap='gray')\n",
341
+ " plt.title(f\"Digit: {digit}\")\n",
342
+ " plt.axis('off')\n",
343
+ "plt.tight_layout()\n",
344
+ "plt.show()\n"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": null,
350
+ "id": "14cfcbec-306a-4996-9cb7-d73d32fb2d54",
351
+ "metadata": {},
352
+ "outputs": [],
353
+ "source": []
354
+ }
355
+ ],
356
+ "metadata": {
357
+ "kernelspec": {
358
+ "display_name": "Python 3 (ipykernel)",
359
+ "language": "python",
360
+ "name": "python3"
361
+ },
362
+ "language_info": {
363
+ "codemirror_mode": {
364
+ "name": "ipython",
365
+ "version": 3
366
+ },
367
+ "file_extension": ".py",
368
+ "mimetype": "text/x-python",
369
+ "name": "python",
370
+ "nbconvert_exporter": "python",
371
+ "pygments_lexer": "ipython3",
372
+ "version": "3.12.4"
373
+ }
374
+ },
375
+ "nbformat": 4,
376
+ "nbformat_minor": 5
377
+ }