noshot 5.0.0__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. noshot/data/ML TS XAI/ML/Rolls Royce AllinOne.ipynb +691 -0
  2. noshot/data/ML TS XAI/ML/Tamilan Code/1. EDA-PCA (Balance Scale Dataset).ipynb +147 -0
  3. noshot/data/ML TS XAI/ML/Tamilan Code/1. EDA-PCA (Rice Dataset).ipynb +181 -0
  4. noshot/data/ML TS XAI/ML/Tamilan Code/10. HMM Veterbi.ipynb +152 -0
  5. noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Balance Scale Dataset).ipynb +117 -0
  6. noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Iris Dataset).ipynb +156 -0
  7. noshot/data/ML TS XAI/ML/Tamilan Code/2. KNN (Sobar-72 Dataset).ipynb +215 -0
  8. noshot/data/ML TS XAI/ML/Tamilan Code/3. LDA (Balance Scale Dataset).ipynb +78 -0
  9. noshot/data/ML TS XAI/ML/Tamilan Code/3. LDA (NPHA Doctor Visits Dataset).ipynb +114 -0
  10. noshot/data/ML TS XAI/ML/Tamilan Code/4. Linear Regression (Machine Dataset).ipynb +115 -0
  11. noshot/data/ML TS XAI/ML/Tamilan Code/4. Linear Regression (Real Estate Dataset).ipynb +146 -0
  12. noshot/data/ML TS XAI/ML/Tamilan Code/5. Logistic Regression (Magic04 Dataset).ipynb +130 -0
  13. noshot/data/ML TS XAI/ML/Tamilan Code/5. Logistic Regression (Wine Dataset).ipynb +112 -0
  14. noshot/data/ML TS XAI/ML/Tamilan Code/6. Naive Bayes Classifier (Agaricus Lepiota Dataset).ipynb +118 -0
  15. noshot/data/ML TS XAI/ML/Tamilan Code/6. Naive Bayes Classifier (Wine Dataset).ipynb +89 -0
  16. noshot/data/ML TS XAI/ML/Tamilan Code/7. SVM (Rice Dataset).ipynb +120 -0
  17. noshot/data/ML TS XAI/ML/Tamilan Code/8. FeedForward NN (Sobar72 Dataset).ipynb +262 -0
  18. noshot/data/ML TS XAI/ML/Tamilan Code/9. CNN (Cifar10 Dataset).ipynb +156 -0
  19. noshot/data/ML TS XAI/ML/Whitefang Code/1. PCA.ipynb +162 -0
  20. noshot/data/ML TS XAI/ML/Whitefang Code/10. CNN.ipynb +100 -0
  21. noshot/data/ML TS XAI/ML/Whitefang Code/11. HMM.ipynb +336 -0
  22. noshot/data/ML TS XAI/ML/Whitefang Code/2. KNN.ipynb +149 -0
  23. noshot/data/ML TS XAI/ML/Whitefang Code/3. LDA.ipynb +132 -0
  24. noshot/data/ML TS XAI/ML/Whitefang Code/4. Linear Regression.ipynb +86 -0
  25. noshot/data/ML TS XAI/ML/Whitefang Code/5. Logistic Regression.ipynb +115 -0
  26. noshot/data/ML TS XAI/ML/Whitefang Code/6. Naive Bayes (Titanic).ipynb +196 -0
  27. noshot/data/ML TS XAI/ML/Whitefang Code/6. Naive Bayes (Wine).ipynb +98 -0
  28. noshot/data/ML TS XAI/ML/Whitefang Code/7. SVM Linear.ipynb +109 -0
  29. noshot/data/ML TS XAI/ML/Whitefang Code/8. SVM Non-Linear.ipynb +195 -0
  30. noshot/data/ML TS XAI/ML/Whitefang Code/9. FNN With Regularization.ipynb +189 -0
  31. noshot/data/ML TS XAI/ML/Whitefang Code/9. FNN Without Regularization.ipynb +197 -0
  32. noshot/data/ML TS XAI/ML/Whitefang Code/All in One Lab CIA 1 Q.ipynb +1087 -0
  33. {noshot-5.0.0.dist-info → noshot-7.0.0.dist-info}/METADATA +1 -1
  34. noshot-7.0.0.dist-info/RECORD +41 -0
  35. {noshot-5.0.0.dist-info → noshot-7.0.0.dist-info}/WHEEL +1 -1
  36. noshot/data/ML TS XAI/XAI/Q1.ipynb +0 -535
  37. noshot/data/ML TS XAI/XAI/Q2.ipynb +0 -38129
  38. noshot/data/ML TS XAI/XAI/Q3.ipynb +0 -1340
  39. noshot/data/ML TS XAI/XAI/Q4.ipynb +0 -246
  40. noshot/data/ML TS XAI/XAI/Q5.ipynb +0 -2450
  41. noshot-5.0.0.dist-info/RECORD +0 -14
  42. {noshot-5.0.0.dist-info → noshot-7.0.0.dist-info}/licenses/LICENSE.txt +0 -0
  43. {noshot-5.0.0.dist-info → noshot-7.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,146 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "c885166e-e06f-46c2-bfe0-bc9b64e7e1e5",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import numpy as np\n",
11
+ "import pandas as pd\n",
12
+ "import seaborn as sns\n",
13
+ "import matplotlib.pyplot as plt\n",
14
+ "from sklearn.model_selection import train_test_split"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": null,
20
+ "id": "b00fd97c-b199-41f0-825a-4b9574967f5f",
21
+ "metadata": {},
22
+ "outputs": [],
23
+ "source": [
24
+ "df = pd.read_excel(\"data/real-estate.xlsx\")\n",
25
+ "df.head()"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "id": "90bb7a26-1e9b-4cc1-b104-ee76588ae090",
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "df.isnull().sum()"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "id": "9be23824-cb35-4c3c-9173-8dd99a3a2ec4",
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": [
45
+ "sns.pairplot(df)\n",
46
+ "plt.show()"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": null,
52
+ "id": "517a7b77-ba3b-42d4-9941-c386c1d5b8f6",
53
+ "metadata": {},
54
+ "outputs": [],
55
+ "source": [
56
+ "sns.heatmap(df.corr(), annot=True, fmt='.2f', cmap='coolwarm')\n",
57
+ "plt.title('Correlation Heatmap')\n",
58
+ "plt.show()"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "code",
63
+ "execution_count": null,
64
+ "id": "7b3b0dc7-366e-4d69-a537-27856bc39643",
65
+ "metadata": {},
66
+ "outputs": [],
67
+ "source": [
68
+ "def lin_reg(colX):\n",
69
+ " X = df[colX].values\n",
70
+ " y = df['Y house price of unit area'].values\n",
71
+ "\n",
72
+ " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
73
+ "\n",
74
+ " N = len(X_train)\n",
75
+ " beta1 = (N * np.sum(X_train * y_train) - np.sum(X_train) * np.sum(y_train)) / (N * np.sum(X_train ** 2) - (np.sum(X_train)) ** 2)\n",
76
+ " beta0 = (np.sum(y_train) - beta1 * np.sum(X_train)) / N\n",
77
+ "\n",
78
+ " y_pred_train = beta1 * X_train + beta0\n",
79
+ " y_pred_test = beta1 * X_test + beta0\n",
80
+ "\n",
81
+ " plt.figure(figsize=(8, 5))\n",
82
+ " plt.scatter(X_train, y_train, color='blue', label='Train Data')\n",
83
+ " plt.scatter(X_test, y_test, color='green', label='Test Data')\n",
84
+ " x_line = np.linspace(min(X.min(), X_test.min()), max(X.max(), X_test.max()), 100)\n",
85
+ " y_line = beta1 * x_line + beta0\n",
86
+ " plt.plot(x_line, y_line, color='red', linewidth=2, label='Fitted Line')\n",
87
+ " plt.xlabel(colX)\n",
88
+ " plt.ylabel('Y house price of unit area')\n",
89
+ " plt.title(f'Linear Regression: {colX} vs Y')\n",
90
+ " plt.legend()\n",
91
+ " plt.grid(True)\n",
92
+ " plt.show()\n",
93
+ "\n",
94
+ " mse_train = np.mean((y_train - beta1 * X_train - beta0) ** 2)\n",
95
+ " mse_test = np.mean((y_test - beta1 * X_test - beta0) ** 2)\n",
96
+ "\n",
97
+ " return mse_train, mse_test"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": null,
103
+ "id": "341f4931-35d0-4a8b-a971-81e2376ad1a1",
104
+ "metadata": {},
105
+ "outputs": [],
106
+ "source": [
107
+ "mse1 = lin_reg('X3 distance to the nearest MRT station')\n",
108
+ "mse2 = lin_reg('X5 latitude')\n",
109
+ "mse3 = lin_reg('X6 longitude')"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": null,
115
+ "id": "6e5e18a2-00bf-4443-900d-b470ff4bd150",
116
+ "metadata": {},
117
+ "outputs": [],
118
+ "source": [
119
+ "print(mse1)\n",
120
+ "print(mse2)\n",
121
+ "print(mse3)"
122
+ ]
123
+ }
124
+ ],
125
+ "metadata": {
126
+ "kernelspec": {
127
+ "display_name": "Python 3 (ipykernel)",
128
+ "language": "python",
129
+ "name": "python3"
130
+ },
131
+ "language_info": {
132
+ "codemirror_mode": {
133
+ "name": "ipython",
134
+ "version": 3
135
+ },
136
+ "file_extension": ".py",
137
+ "mimetype": "text/x-python",
138
+ "name": "python",
139
+ "nbconvert_exporter": "python",
140
+ "pygments_lexer": "ipython3",
141
+ "version": "3.12.4"
142
+ }
143
+ },
144
+ "nbformat": 4,
145
+ "nbformat_minor": 5
146
+ }
@@ -0,0 +1,130 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "9be0f819-b381-4c85-ab7c-1535c061da6c",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "import matplotlib.pyplot as plt\n",
12
+ "import seaborn as sns\n",
13
+ "from sklearn.model_selection import train_test_split\n",
14
+ "from sklearn.linear_model import LogisticRegression\n",
15
+ "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc\n",
16
+ "from sklearn.preprocessing import MinMaxScaler"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": null,
22
+ "id": "a5c77375-2288-46a3-a1f8-6b0d47bfa320",
23
+ "metadata": {},
24
+ "outputs": [],
25
+ "source": [
26
+ "df = pd.read_csv('data/magic04.data', header=None)\n",
27
+ "df[10] = df[10].map({'g': 0, 'h': 1})\n",
28
+ "df.head()"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": null,
34
+ "id": "95a1be58-9838-4125-86c8-cb9f2d380dc3",
35
+ "metadata": {},
36
+ "outputs": [],
37
+ "source": [
38
+ "sns.pairplot(df[[0,1,2,10]], hue=10)"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": null,
44
+ "id": "09deb8ab-ac80-42de-b313-9dc92cbb1b28",
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "X = MinMaxScaler().fit_transform(df.drop(columns=[10]))\n",
49
+ "y = df[10]\n",
50
+ "\n",
51
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": null,
57
+ "id": "146b237b-1478-4780-b877-6fb903d94b31",
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "lr = LogisticRegression(max_iter=10000, random_state=0)\n",
62
+ "lr.fit(X_train, y_train)"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "id": "a6757ba6-33bb-42d5-97c0-420e15f56b90",
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "y_pred = lr.predict(X_test)\n",
73
+ "print(f\"Accuracy: {accuracy_score(y_test, y_pred)}\")\n",
74
+ "print(classification_report(y_test, y_pred))"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": null,
80
+ "id": "a9279e9f-f869-4f0d-ab36-107fff7d05be",
81
+ "metadata": {},
82
+ "outputs": [],
83
+ "source": [
84
+ "cm = confusion_matrix(y_test, y_pred)\n",
85
+ "ConfusionMatrixDisplay(cm).plot()"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": null,
91
+ "id": "8a72838c-0530-4e67-a98c-0e29f72a504e",
92
+ "metadata": {},
93
+ "outputs": [],
94
+ "source": [
95
+ "y_pred_proba = lr.predict_proba(X_test)[:, 1]\n",
96
+ "fpr, tpr, _ = roc_curve(y_test, y_pred_proba)\n",
97
+ "roc_auc = auc(fpr, tpr)\n",
98
+ "\n",
99
+ "plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')\n",
100
+ "plt.plot([0, 1], [0, 1], 'k--', label='No Skill')\n",
101
+ "plt.xlabel('False Positive Rate')\n",
102
+ "plt.ylabel('True Positive Rate')\n",
103
+ "plt.title('ROC Curve for Breast Cancer Classification')\n",
104
+ "plt.legend()\n",
105
+ "plt.show()"
106
+ ]
107
+ }
108
+ ],
109
+ "metadata": {
110
+ "kernelspec": {
111
+ "display_name": "Python 3 (ipykernel)",
112
+ "language": "python",
113
+ "name": "python3"
114
+ },
115
+ "language_info": {
116
+ "codemirror_mode": {
117
+ "name": "ipython",
118
+ "version": 3
119
+ },
120
+ "file_extension": ".py",
121
+ "mimetype": "text/x-python",
122
+ "name": "python",
123
+ "nbconvert_exporter": "python",
124
+ "pygments_lexer": "ipython3",
125
+ "version": "3.12.4"
126
+ }
127
+ },
128
+ "nbformat": 4,
129
+ "nbformat_minor": 5
130
+ }
@@ -0,0 +1,112 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "0fcc8bb7-4d22-4d3b-b58a-302bb24f8f2e",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import itertools\n",
11
+ "import numpy as np\n",
12
+ "import pandas as pd\n",
13
+ "import matplotlib.pyplot as plt\n",
14
+ "from sklearn import linear_model,datasets\n",
15
+ "from sklearn.model_selection import train_test_split\n",
16
+ "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
17
+ "\n",
18
+ "import warnings\n",
19
+ "warnings.filterwarnings('ignore')"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": null,
25
+ "id": "d28e507b-fb15-4058-a161-656859a27c65",
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "wine = pd.read_csv('data/wine-dataset.csv')\n",
30
+ "print(\"Shape:\", wine.shape)\n",
31
+ "wine.head()"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": null,
37
+ "id": "c4e953da-6941-43f2-a9ce-aab907876d45",
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "wine.columns"
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": null,
47
+ "id": "9ee44a66-dc4a-4c79-9dab-eec60669dd8b",
48
+ "metadata": {},
49
+ "outputs": [],
50
+ "source": [
51
+ "X = wine.iloc[:, :13]\n",
52
+ "y = wine.iloc[:, 13]"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "id": "bd9d60dd-8272-46b4-8335-69d9751ed0c7",
59
+ "metadata": {},
60
+ "outputs": [],
61
+ "source": [
62
+ "X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.30, random_state=7)\n",
63
+ "\n",
64
+ "log_reg_model = linear_model.LogisticRegression()\n",
65
+ "log_reg_model.fit(X_train,y_train)"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": null,
71
+ "id": "7c8fca42-c8d8-4334-9cc4-da4f5e1b0a1e",
72
+ "metadata": {},
73
+ "outputs": [],
74
+ "source": [
75
+ "log_reg_base_score = log_reg_model.score(X_test,y_test)\n",
76
+ "print(\"The score for the Logistic Regression Model is : \", log_reg_base_score)"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": null,
82
+ "id": "61bbb23e-cb29-41ae-9ea3-82e8d465c7f2",
83
+ "metadata": {},
84
+ "outputs": [],
85
+ "source": [
86
+ "cm = confusion_matrix(y_test, log_reg_model.predict(X_test))\n",
87
+ "ConfusionMatrixDisplay(cm).plot()"
88
+ ]
89
+ }
90
+ ],
91
+ "metadata": {
92
+ "kernelspec": {
93
+ "display_name": "Python 3 (ipykernel)",
94
+ "language": "python",
95
+ "name": "python3"
96
+ },
97
+ "language_info": {
98
+ "codemirror_mode": {
99
+ "name": "ipython",
100
+ "version": 3
101
+ },
102
+ "file_extension": ".py",
103
+ "mimetype": "text/x-python",
104
+ "name": "python",
105
+ "nbconvert_exporter": "python",
106
+ "pygments_lexer": "ipython3",
107
+ "version": "3.12.4"
108
+ }
109
+ },
110
+ "nbformat": 4,
111
+ "nbformat_minor": 5
112
+ }
@@ -0,0 +1,118 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "621687f2-cda2-449c-be36-91e2e0d4966d",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "import matplotlib.pyplot as plt\n",
12
+ "from sklearn.model_selection import train_test_split\n",
13
+ "from sklearn.naive_bayes import GaussianNB\n",
14
+ "from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc\n",
15
+ "from sklearn.preprocessing import LabelEncoder"
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "code",
20
+ "execution_count": null,
21
+ "id": "e1793fec-c86e-4f2e-a9b7-dc1e68bffc1e",
22
+ "metadata": {},
23
+ "outputs": [],
24
+ "source": [
25
+ "df = pd.read_csv(\"data/agaricus-lepiota.data\", header=None)\n",
26
+ "df.head()"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "id": "d2944f82-5053-4703-abe8-c58b5147f4d9",
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "X = df.drop(columns=[0]).apply(LabelEncoder().fit_transform)\n",
37
+ "y = LabelEncoder().fit_transform(df[0])"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "id": "65d4d4b5-96a0-4a9d-8e95-635430642b49",
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)\n",
48
+ "\n",
49
+ "nb = GaussianNB()\n",
50
+ "nb.fit(X_train, y_train)"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": null,
56
+ "id": "d453eb4a-81b5-4af1-a0af-9d644d4650ad",
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "y_pred = nb.predict(X_test)\n",
61
+ "print(f\"Accuracy: {accuracy_score(y_test, y_pred)}\")\n",
62
+ "print(classification_report(y_test, y_pred))"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "execution_count": null,
68
+ "id": "b2c5580c-9f8a-48fb-b06a-af1087854391",
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()\n",
73
+ "plt.show()"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "id": "b64b2b36-199b-4292-b0f2-646e0136bd3f",
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "y_pred_proba = nb.predict_proba(X_test)[:, 1]\n",
84
+ "fpr, tpr, _ = roc_curve(y_test, y_pred_proba)\n",
85
+ "roc_auc = auc(fpr, tpr)\n",
86
+ "\n",
87
+ "plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')\n",
88
+ "plt.plot([0, 1], [0, 1], 'k--', label='No Skill')\n",
89
+ "plt.xlabel('False Positive Rate')\n",
90
+ "plt.ylabel('True Positive Rate')\n",
91
+ "plt.title('ROC Curve for Agaricus-Lepiota Classification')\n",
92
+ "plt.legend()\n",
93
+ "plt.show()"
94
+ ]
95
+ }
96
+ ],
97
+ "metadata": {
98
+ "kernelspec": {
99
+ "display_name": "Python 3 (ipykernel)",
100
+ "language": "python",
101
+ "name": "python3"
102
+ },
103
+ "language_info": {
104
+ "codemirror_mode": {
105
+ "name": "ipython",
106
+ "version": 3
107
+ },
108
+ "file_extension": ".py",
109
+ "mimetype": "text/x-python",
110
+ "name": "python",
111
+ "nbconvert_exporter": "python",
112
+ "pygments_lexer": "ipython3",
113
+ "version": "3.12.4"
114
+ }
115
+ },
116
+ "nbformat": 4,
117
+ "nbformat_minor": 5
118
+ }
@@ -0,0 +1,89 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "939c616d-2779-4e21-adcf-1d070898d65b",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from sklearn import datasets\n",
11
+ "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
12
+ "from sklearn.model_selection import train_test_split\n",
13
+ "from sklearn.naive_bayes import GaussianNB\n",
14
+ "import pandas as pd"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": null,
20
+ "id": "17720a0d-e788-4b1d-b2b2-a542f6b824a2",
21
+ "metadata": {},
22
+ "outputs": [],
23
+ "source": [
24
+ "wine = pd.read_csv('data/wine-dataset.csv')\n",
25
+ "print(wine.shape)"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "id": "a050923e-4382-4ff7-93bf-446b117c0ef5",
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "X = wine.iloc[:, :13]\n",
36
+ "X.head()"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": null,
42
+ "id": "9f1a4355-718e-40ed-b892-3e3d03c4ef3c",
43
+ "metadata": {},
44
+ "outputs": [],
45
+ "source": [
46
+ "y = wine.iloc[:, 13]\n",
47
+ "y"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": null,
53
+ "id": "dd3f31ef-c0d2-48dd-9fb7-338c10f9fbf9",
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)\n",
58
+ "\n",
59
+ "gnb = GaussianNB().fit(X_train, y_train)\n",
60
+ "gnb_predictions = gnb.predict(X_test)\n",
61
+ "accuracy = gnb.score(X_test, y_test)\n",
62
+ "accuracy\n",
63
+ "cm = confusion_matrix(y_test, gnb_predictions)\n",
64
+ "ConfusionMatrixDisplay(cm).plot()"
65
+ ]
66
+ }
67
+ ],
68
+ "metadata": {
69
+ "kernelspec": {
70
+ "display_name": "Python 3 (ipykernel)",
71
+ "language": "python",
72
+ "name": "python3"
73
+ },
74
+ "language_info": {
75
+ "codemirror_mode": {
76
+ "name": "ipython",
77
+ "version": 3
78
+ },
79
+ "file_extension": ".py",
80
+ "mimetype": "text/x-python",
81
+ "name": "python",
82
+ "nbconvert_exporter": "python",
83
+ "pygments_lexer": "ipython3",
84
+ "version": "3.12.4"
85
+ }
86
+ },
87
+ "nbformat": 4,
88
+ "nbformat_minor": 5
89
+ }