noshot 0.3.6__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {noshot-0.3.6 → noshot-0.3.7}/PKG-INFO +1 -1
- {noshot-0.3.6 → noshot-0.3.7}/noshot.egg-info/PKG-INFO +1 -1
- noshot-0.3.7/noshot.egg-info/SOURCES.txt +56 -0
- {noshot-0.3.6 → noshot-0.3.7}/setup.py +1 -1
- noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA/1/1.ipynb +133 -0
- noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA/2/2.ipynb +139 -0
- noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA/3/3.ipynb +130 -0
- noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA/4/4.ipynb +141 -0
- noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA/1 - AirPassengers/1 - AirPassengers.ipynb +198 -0
- noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA/2 - Daily-total-female-births/2 - daily-total-female-births.ipynb +209 -0
- noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA/3 - Bill Charge/3 - Bill Charge.ipynb +169 -0
- noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA/4 - Daily min temperatures/4 - daily-min-temperatures.ipynb +181 -0
- noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA/5 - shampoo sales/5 - Shampoo sales.ipynb +213 -0
- noshot-0.3.6/noshot.egg-info/SOURCES.txt +0 -56
- noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file/1/1.ipynb +0 -255
- noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file/2/2.ipynb +0 -399
- noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file/3/3.ipynb +0 -276
- noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file/4/4.ipynb +0 -265
- noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different/1 - AirPassengers/1 - AirPassengers.ipynb +0 -563
- noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different/2 - Daily-total-female-births/2 - daily-total-female-births.ipynb +0 -688
- noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different/3 - Bill Charge/3 - Bill Charge.ipynb +0 -819
- noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different/4 - Daily min temperatures/4 - daily-min-temperatures.ipynb +0 -573
- noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different/5 - shampoo sales/5 - Shampoo sales.ipynb +0 -421
- {noshot-0.3.6 → noshot-0.3.7}/LICENSE.txt +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/README.md +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/noshot.egg-info/dependency_links.txt +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/noshot.egg-info/not-zip-safe +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/noshot.egg-info/top_level.txt +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/setup.cfg +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/__init__.py +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/1. PCA - EDA.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/2. KNN Classifier.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/4. Linear Regression.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/5. Logistic Regression.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/6. Bayesian Classifier.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/data/balance-scale.csv +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/data/balance-scale.txt +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/data/machine-data.csv +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/ML/data/wine-dataset.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file → noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA}/1/Question.txt +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file → noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA}/1/airfoil_self_noise.dat +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file → noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA}/2/Question.txt +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file → noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA}/2/pop_failures.dat +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file → noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA}/3/Qu.txt +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file → noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA}/3/go_track_tracks.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file → noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA}/4/Wilt.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/ML Lab CIA - Healthy directly upload file → noshot-0.3.7/src/noshot/data/ML TS XAI/ML Lab CIA}/4/qu.txt +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/TS/1. EDA - Handling Time Series Data.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/TS/2. Feature Engineering.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/TS/3. Temporal Relationships.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/TS/4. Up-Down-Sampling and Interpolation.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/TS/5. Stationarity - Trend - Seasonality.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/TS/6. Autocorrelation - Partial Autocorrelation.ipynb +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/TS/AllinOne.ipynb +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different/4 - Daily min temperatures → noshot-0.3.7/src/noshot/data/ML TS XAI/TS/data}/daily-min-temperatures.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different/2 - Daily-total-female-births → noshot-0.3.7/src/noshot/data/ML TS XAI/TS/data}/daily-total-female-births.csv +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/TS/data/raw_sales.csv +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/data/ML TS XAI/TS/data/shampoo_sales.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different → noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA}/1 - AirPassengers/AirPassengers.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/TS/data → noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA/2 - Daily-total-female-births}/daily-total-female-births.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different → noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA}/3 - Bill Charge/bill charge.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/TS/data → noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA/4 - Daily min temperatures}/daily-min-temperatures.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different → noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA}/5 - shampoo sales/shampoo_sales.csv +0 -0
- {noshot-0.3.6/src/noshot/data/ML TS XAI/TSLabCIA-Question order may be different → noshot-0.3.7/src/noshot/data/ML TS XAI/TS Lab CIA}/Questions TMS 27 Feb 25.pdf +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/main.py +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/utils/__init__.py +0 -0
- {noshot-0.3.6 → noshot-0.3.7}/src/noshot/utils/shell_utils.py +0 -0
@@ -0,0 +1,56 @@
|
|
1
|
+
LICENSE.txt
|
2
|
+
README.md
|
3
|
+
setup.py
|
4
|
+
noshot.egg-info/PKG-INFO
|
5
|
+
noshot.egg-info/SOURCES.txt
|
6
|
+
noshot.egg-info/dependency_links.txt
|
7
|
+
noshot.egg-info/not-zip-safe
|
8
|
+
noshot.egg-info/top_level.txt
|
9
|
+
src/noshot/__init__.py
|
10
|
+
src/noshot/main.py
|
11
|
+
src/noshot/data/ML TS XAI/ML/1. PCA - EDA.ipynb
|
12
|
+
src/noshot/data/ML TS XAI/ML/2. KNN Classifier.ipynb
|
13
|
+
src/noshot/data/ML TS XAI/ML/3. Linear Discriminant Analysis.ipynb
|
14
|
+
src/noshot/data/ML TS XAI/ML/4. Linear Regression.ipynb
|
15
|
+
src/noshot/data/ML TS XAI/ML/5. Logistic Regression.ipynb
|
16
|
+
src/noshot/data/ML TS XAI/ML/6. Bayesian Classifier.ipynb
|
17
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/1/1.ipynb
|
18
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/1/Question.txt
|
19
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/1/airfoil_self_noise.dat
|
20
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/2/2.ipynb
|
21
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/2/Question.txt
|
22
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/2/pop_failures.dat
|
23
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/3/3.ipynb
|
24
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/3/Qu.txt
|
25
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/3/go_track_tracks.csv
|
26
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/4/4.ipynb
|
27
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/4/Wilt.csv
|
28
|
+
src/noshot/data/ML TS XAI/ML Lab CIA/4/qu.txt
|
29
|
+
src/noshot/data/ML TS XAI/ML/data/balance-scale.csv
|
30
|
+
src/noshot/data/ML TS XAI/ML/data/balance-scale.txt
|
31
|
+
src/noshot/data/ML TS XAI/ML/data/machine-data.csv
|
32
|
+
src/noshot/data/ML TS XAI/ML/data/wine-dataset.csv
|
33
|
+
src/noshot/data/ML TS XAI/TS/1. EDA - Handling Time Series Data.ipynb
|
34
|
+
src/noshot/data/ML TS XAI/TS/2. Feature Engineering.ipynb
|
35
|
+
src/noshot/data/ML TS XAI/TS/3. Temporal Relationships.ipynb
|
36
|
+
src/noshot/data/ML TS XAI/TS/4. Up-Down-Sampling and Interpolation.ipynb
|
37
|
+
src/noshot/data/ML TS XAI/TS/5. Stationarity - Trend - Seasonality.ipynb
|
38
|
+
src/noshot/data/ML TS XAI/TS/6. Autocorrelation - Partial Autocorrelation.ipynb
|
39
|
+
src/noshot/data/ML TS XAI/TS/AllinOne.ipynb
|
40
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/Questions TMS 27 Feb 25.pdf
|
41
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/1 - AirPassengers/1 - AirPassengers.ipynb
|
42
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/1 - AirPassengers/AirPassengers.csv
|
43
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/2 - Daily-total-female-births/2 - daily-total-female-births.ipynb
|
44
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/2 - Daily-total-female-births/daily-total-female-births.csv
|
45
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/3 - Bill Charge/3 - Bill Charge.ipynb
|
46
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/3 - Bill Charge/bill charge.csv
|
47
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/4 - Daily min temperatures/4 - daily-min-temperatures.ipynb
|
48
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/4 - Daily min temperatures/daily-min-temperatures.csv
|
49
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/5 - shampoo sales/5 - Shampoo sales.ipynb
|
50
|
+
src/noshot/data/ML TS XAI/TS Lab CIA/5 - shampoo sales/shampoo_sales.csv
|
51
|
+
src/noshot/data/ML TS XAI/TS/data/daily-min-temperatures.csv
|
52
|
+
src/noshot/data/ML TS XAI/TS/data/daily-total-female-births.csv
|
53
|
+
src/noshot/data/ML TS XAI/TS/data/raw_sales.csv
|
54
|
+
src/noshot/data/ML TS XAI/TS/data/shampoo_sales.csv
|
55
|
+
src/noshot/utils/__init__.py
|
56
|
+
src/noshot/utils/shell_utils.py
|
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as f:
|
|
5
5
|
|
6
6
|
setup(
|
7
7
|
name="noshot",
|
8
|
-
version="0.3.
|
8
|
+
version="0.3.7",
|
9
9
|
author="Tim Stan S",
|
10
10
|
description="Support library for Artificial Intelligence, Machine Learning and Data Science tools",
|
11
11
|
long_description=long_description,
|
@@ -0,0 +1,133 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": null,
|
6
|
+
"id": "31067fce-1168-4c6e-97c2-bfc4fb40904b",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [],
|
9
|
+
"source": [
|
10
|
+
"import pandas as pd\n",
|
11
|
+
"import numpy as np\n",
|
12
|
+
"import matplotlib.pyplot as plt\n",
|
13
|
+
"import seaborn as sns\n",
|
14
|
+
"from sklearn.decomposition import PCA\n",
|
15
|
+
"from sklearn.linear_model import LinearRegression\n",
|
16
|
+
"from sklearn.model_selection import train_test_split\n",
|
17
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
18
|
+
"from sklearn.metrics import r2_score, mean_squared_error"
|
19
|
+
]
|
20
|
+
},
|
21
|
+
{
|
22
|
+
"cell_type": "code",
|
23
|
+
"execution_count": null,
|
24
|
+
"id": "30e4ba93-9e95-4b51-a3e4-89931c193a3a",
|
25
|
+
"metadata": {},
|
26
|
+
"outputs": [],
|
27
|
+
"source": [
|
28
|
+
"file_path = \"airfoil_self_noise.dat\"\n",
|
29
|
+
"columns = [\"Frequency\", \"Angle of Attack\", \"Chord Length\", \"Free-stream Velocity\", \"Suction Side Thickness\", \"Scaled SPL\"]\n",
|
30
|
+
"df = pd.read_csv(file_path, sep=\"\\t\", header=None, names=columns)\n",
|
31
|
+
"df.head()"
|
32
|
+
]
|
33
|
+
},
|
34
|
+
{
|
35
|
+
"cell_type": "code",
|
36
|
+
"execution_count": null,
|
37
|
+
"id": "c99f7732-9da4-4f2e-8ad2-16722962c435",
|
38
|
+
"metadata": {},
|
39
|
+
"outputs": [],
|
40
|
+
"source": [
|
41
|
+
"df.columns = df.columns.str.strip()\n",
|
42
|
+
"X = df.iloc[:, :-1].values # Features\n",
|
43
|
+
"y = df.iloc[:, -1].values # Target"
|
44
|
+
]
|
45
|
+
},
|
46
|
+
{
|
47
|
+
"cell_type": "code",
|
48
|
+
"execution_count": null,
|
49
|
+
"id": "15940be7-1bdd-497e-81b4-eccd14424881",
|
50
|
+
"metadata": {},
|
51
|
+
"outputs": [],
|
52
|
+
"source": [
|
53
|
+
"scaler = StandardScaler()\n",
|
54
|
+
"X_scaled = scaler.fit_transform(X)\n",
|
55
|
+
"\n",
|
56
|
+
"pca = PCA(n_components=2)\n",
|
57
|
+
"X_pca = pca.fit_transform(X_scaled)\n",
|
58
|
+
"\n",
|
59
|
+
"X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)\n",
|
60
|
+
"X_pca_train, X_pca_test, _, _ = train_test_split(X_pca, y, test_size=0.2, random_state=42)\n",
|
61
|
+
"\n",
|
62
|
+
"lr_original = LinearRegression()\n",
|
63
|
+
"lr_original.fit(X_train, y_train)\n",
|
64
|
+
"y_pred_original = lr_original.predict(X_test)\n",
|
65
|
+
"\n",
|
66
|
+
"lr_pca = LinearRegression()\n",
|
67
|
+
"lr_pca.fit(X_pca_train, y_train)\n",
|
68
|
+
"y_pred_pca = lr_pca.predict(X_pca_test)"
|
69
|
+
]
|
70
|
+
},
|
71
|
+
{
|
72
|
+
"cell_type": "code",
|
73
|
+
"execution_count": null,
|
74
|
+
"id": "617f4fdf-6722-4caf-bef3-66240c3cbc0e",
|
75
|
+
"metadata": {},
|
76
|
+
"outputs": [],
|
77
|
+
"source": [
|
78
|
+
"print(\"R2 Original:\", r2_score(y_test, y_pred_original))\n",
|
79
|
+
"print(\"RMSE Original:\", np.sqrt(mean_squared_error(y_test, y_pred_original)))\n",
|
80
|
+
"print(\"R2 PCA:\", r2_score(y_test, y_pred_pca))\n",
|
81
|
+
"print(\"RMSE PCA:\", np.sqrt(mean_squared_error(y_test, y_pred_pca)))"
|
82
|
+
]
|
83
|
+
},
|
84
|
+
{
|
85
|
+
"cell_type": "code",
|
86
|
+
"execution_count": null,
|
87
|
+
"id": "83ed2bce-0dfe-4bc4-b24b-356113eb6be3",
|
88
|
+
"metadata": {},
|
89
|
+
"outputs": [],
|
90
|
+
"source": [
|
91
|
+
"plt.figure(figsize=(12, 5))\n",
|
92
|
+
"\n",
|
93
|
+
"plt.subplot(1, 2, 1)\n",
|
94
|
+
"sns.scatterplot(x=y_test, y=y_pred_original, alpha=0.5)\n",
|
95
|
+
"plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], '--', color='red')\n",
|
96
|
+
"plt.xlabel(\"Actual\")\n",
|
97
|
+
"plt.ylabel(\"Predicted\")\n",
|
98
|
+
"plt.title(\"Linear Regression on Original Data\")\n",
|
99
|
+
"\n",
|
100
|
+
"plt.subplot(1, 2, 2)\n",
|
101
|
+
"sns.scatterplot(x=y_test, y=y_pred_pca, alpha=0.5)\n",
|
102
|
+
"plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], '--', color='red')\n",
|
103
|
+
"plt.xlabel(\"Actual\")\n",
|
104
|
+
"plt.ylabel(\"Predicted\")\n",
|
105
|
+
"plt.title(\"Linear Regression on PCA-Reduced Data\")\n",
|
106
|
+
"\n",
|
107
|
+
"plt.tight_layout()\n",
|
108
|
+
"plt.show()"
|
109
|
+
]
|
110
|
+
}
|
111
|
+
],
|
112
|
+
"metadata": {
|
113
|
+
"kernelspec": {
|
114
|
+
"display_name": "Python 3 (ipykernel)",
|
115
|
+
"language": "python",
|
116
|
+
"name": "python3"
|
117
|
+
},
|
118
|
+
"language_info": {
|
119
|
+
"codemirror_mode": {
|
120
|
+
"name": "ipython",
|
121
|
+
"version": 3
|
122
|
+
},
|
123
|
+
"file_extension": ".py",
|
124
|
+
"mimetype": "text/x-python",
|
125
|
+
"name": "python",
|
126
|
+
"nbconvert_exporter": "python",
|
127
|
+
"pygments_lexer": "ipython3",
|
128
|
+
"version": "3.12.4"
|
129
|
+
}
|
130
|
+
},
|
131
|
+
"nbformat": 4,
|
132
|
+
"nbformat_minor": 5
|
133
|
+
}
|
@@ -0,0 +1,139 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": null,
|
6
|
+
"id": "0c44baad-6341-4743-ae5d-502ce6647dfc",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [],
|
9
|
+
"source": [
|
10
|
+
"import pandas as pd\n",
|
11
|
+
"import numpy as np\n",
|
12
|
+
"import matplotlib.pyplot as plt\n",
|
13
|
+
"import seaborn as sns\n",
|
14
|
+
"from sklearn.decomposition import PCA\n",
|
15
|
+
"from sklearn.neighbors import KNeighborsClassifier\n",
|
16
|
+
"from sklearn.model_selection import train_test_split\n",
|
17
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
18
|
+
"from sklearn.metrics import accuracy_score"
|
19
|
+
]
|
20
|
+
},
|
21
|
+
{
|
22
|
+
"cell_type": "code",
|
23
|
+
"execution_count": null,
|
24
|
+
"id": "328266f0-f099-47a9-b146-0a1df89d5b47",
|
25
|
+
"metadata": {},
|
26
|
+
"outputs": [],
|
27
|
+
"source": [
|
28
|
+
"import warnings\n",
|
29
|
+
"warnings.filterwarnings('ignore')"
|
30
|
+
]
|
31
|
+
},
|
32
|
+
{
|
33
|
+
"cell_type": "code",
|
34
|
+
"execution_count": null,
|
35
|
+
"id": "d4ad00b9-b339-4af3-9fdc-aada76a5eac5",
|
36
|
+
"metadata": {},
|
37
|
+
"outputs": [],
|
38
|
+
"source": [
|
39
|
+
"file_path = \"pop_failures.dat\"\n",
|
40
|
+
"df = pd.read_table(file_path, sep=\"\\s+\")\n",
|
41
|
+
"print(\"Dataset Shape:\", df.shape)\n",
|
42
|
+
"df.head()"
|
43
|
+
]
|
44
|
+
},
|
45
|
+
{
|
46
|
+
"cell_type": "code",
|
47
|
+
"execution_count": null,
|
48
|
+
"id": "8bf01bf7-e86b-43dc-8787-d4d50afc5f56",
|
49
|
+
"metadata": {},
|
50
|
+
"outputs": [],
|
51
|
+
"source": [
|
52
|
+
"df.info()"
|
53
|
+
]
|
54
|
+
},
|
55
|
+
{
|
56
|
+
"cell_type": "code",
|
57
|
+
"execution_count": null,
|
58
|
+
"id": "b21b1c88-35e1-477a-9c8c-469ff2cb49ea",
|
59
|
+
"metadata": {},
|
60
|
+
"outputs": [],
|
61
|
+
"source": [
|
62
|
+
"df.dropna(inplace = True)"
|
63
|
+
]
|
64
|
+
},
|
65
|
+
{
|
66
|
+
"cell_type": "code",
|
67
|
+
"execution_count": null,
|
68
|
+
"id": "3f459656-d365-4d6c-9a2a-63f021d3d27e",
|
69
|
+
"metadata": {},
|
70
|
+
"outputs": [],
|
71
|
+
"source": [
|
72
|
+
"X = df.iloc[:, 2:20].values\n",
|
73
|
+
"y = df.iloc[:, 20].values\n",
|
74
|
+
"\n",
|
75
|
+
"scaler = StandardScaler()\n",
|
76
|
+
"X_scaled = scaler.fit_transform(X)\n",
|
77
|
+
"\n",
|
78
|
+
"pca = PCA(n_components=2)\n",
|
79
|
+
"X_pca = pca.fit_transform(X_scaled)\n",
|
80
|
+
"\n",
|
81
|
+
"X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)\n",
|
82
|
+
"X_pca_train, X_pca_test, _, _ = train_test_split(X_pca, y, test_size=0.2, random_state=42)\n",
|
83
|
+
"\n",
|
84
|
+
"k_values = range(1, 21)\n",
|
85
|
+
"accuracies_original = []\n",
|
86
|
+
"accuracies_pca = []\n",
|
87
|
+
"\n",
|
88
|
+
"for k in k_values:\n",
|
89
|
+
" knn = KNeighborsClassifier(n_neighbors=k)\n",
|
90
|
+
" knn.fit(X_train, y_train)\n",
|
91
|
+
" y_pred_original = knn.predict(X_test)\n",
|
92
|
+
" accuracies_original.append(accuracy_score(y_test, y_pred_original))\n",
|
93
|
+
" \n",
|
94
|
+
" knn.fit(X_pca_train, y_train)\n",
|
95
|
+
" y_pred_pca = knn.predict(X_pca_test)\n",
|
96
|
+
" accuracies_pca.append(accuracy_score(y_test, y_pred_pca))"
|
97
|
+
]
|
98
|
+
},
|
99
|
+
{
|
100
|
+
"cell_type": "code",
|
101
|
+
"execution_count": null,
|
102
|
+
"id": "f4ff07e6-a4af-433b-b9fe-f9134dea12e1",
|
103
|
+
"metadata": {},
|
104
|
+
"outputs": [],
|
105
|
+
"source": [
|
106
|
+
"plt.figure(figsize=(10, 5))\n",
|
107
|
+
"plt.plot(k_values, accuracies_original, label='KNN without PCA', marker='o')\n",
|
108
|
+
"plt.plot(k_values, accuracies_pca, label='KNN with PCA', marker='s')\n",
|
109
|
+
"plt.xlabel(\"K Value\")\n",
|
110
|
+
"plt.ylabel(\"Accuracy\")\n",
|
111
|
+
"plt.title(\"KNN Accuracy Comparison with and without PCA\")\n",
|
112
|
+
"plt.legend()\n",
|
113
|
+
"plt.grid()\n",
|
114
|
+
"plt.show()"
|
115
|
+
]
|
116
|
+
}
|
117
|
+
],
|
118
|
+
"metadata": {
|
119
|
+
"kernelspec": {
|
120
|
+
"display_name": "Python 3 (ipykernel)",
|
121
|
+
"language": "python",
|
122
|
+
"name": "python3"
|
123
|
+
},
|
124
|
+
"language_info": {
|
125
|
+
"codemirror_mode": {
|
126
|
+
"name": "ipython",
|
127
|
+
"version": 3
|
128
|
+
},
|
129
|
+
"file_extension": ".py",
|
130
|
+
"mimetype": "text/x-python",
|
131
|
+
"name": "python",
|
132
|
+
"nbconvert_exporter": "python",
|
133
|
+
"pygments_lexer": "ipython3",
|
134
|
+
"version": "3.12.4"
|
135
|
+
}
|
136
|
+
},
|
137
|
+
"nbformat": 4,
|
138
|
+
"nbformat_minor": 5
|
139
|
+
}
|
@@ -0,0 +1,130 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": null,
|
6
|
+
"id": "5bd9b810-1eef-4f1c-8b46-86e8e8f013d1",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [],
|
9
|
+
"source": [
|
10
|
+
"import pandas as pd\n",
|
11
|
+
"import numpy as np\n",
|
12
|
+
"import matplotlib.pyplot as plt\n",
|
13
|
+
"from sklearn.decomposition import PCA\n",
|
14
|
+
"from sklearn.linear_model import LinearRegression\n",
|
15
|
+
"from sklearn.model_selection import train_test_split\n",
|
16
|
+
"from sklearn.preprocessing import StandardScaler\n",
|
17
|
+
"from sklearn.metrics import r2_score, mean_squared_error"
|
18
|
+
]
|
19
|
+
},
|
20
|
+
{
|
21
|
+
"cell_type": "code",
|
22
|
+
"execution_count": null,
|
23
|
+
"id": "733faa11-400f-4160-8292-e8bd90948264",
|
24
|
+
"metadata": {},
|
25
|
+
"outputs": [],
|
26
|
+
"source": [
|
27
|
+
"file_path = \"go_track_tracks.csv\"\n",
|
28
|
+
"df = pd.read_csv(file_path)\n",
|
29
|
+
"df.head()"
|
30
|
+
]
|
31
|
+
},
|
32
|
+
{
|
33
|
+
"cell_type": "code",
|
34
|
+
"execution_count": null,
|
35
|
+
"id": "bd1254cc-242c-439b-9baf-8c0b08daf597",
|
36
|
+
"metadata": {},
|
37
|
+
"outputs": [],
|
38
|
+
"source": [
|
39
|
+
"df = df.select_dtypes(include=[np.number]).dropna()\n",
|
40
|
+
"\n",
|
41
|
+
"X = df.iloc[:, :-1].values # Features\n",
|
42
|
+
"y = df.iloc[:, -1].values # Target\n",
|
43
|
+
"\n",
|
44
|
+
"scaler = StandardScaler()\n",
|
45
|
+
"X_scaled = scaler.fit_transform(X)\n",
|
46
|
+
"\n",
|
47
|
+
"pca = PCA(n_components=2)\n",
|
48
|
+
"X_pca = pca.fit_transform(X_scaled)\n",
|
49
|
+
"\n",
|
50
|
+
"X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)\n",
|
51
|
+
"X_pca_train, X_pca_test, _, _ = train_test_split(X_pca, y, test_size=0.2, random_state=42)\n",
|
52
|
+
"\n",
|
53
|
+
"lr_original = LinearRegression()\n",
|
54
|
+
"lr_original.fit(X_train, y_train)\n",
|
55
|
+
"y_pred_original = lr_original.predict(X_test)\n",
|
56
|
+
"\n",
|
57
|
+
"lr_pca = LinearRegression()\n",
|
58
|
+
"lr_pca.fit(X_pca_train, y_train)\n",
|
59
|
+
"y_pred_pca = lr_pca.predict(X_pca_test)"
|
60
|
+
]
|
61
|
+
},
|
62
|
+
{
|
63
|
+
"cell_type": "code",
|
64
|
+
"execution_count": null,
|
65
|
+
"id": "e0116213-f55e-4512-95a0-e1983bddcd30",
|
66
|
+
"metadata": {},
|
67
|
+
"outputs": [],
|
68
|
+
"source": [
|
69
|
+
"r2_original = r2_score(y_test, y_pred_original)\n",
|
70
|
+
"rmse_original = np.sqrt(mean_squared_error(y_test, y_pred_original))\n",
|
71
|
+
"\n",
|
72
|
+
"r2_pca = r2_score(y_test, y_pred_pca)\n",
|
73
|
+
"rmse_pca = np.sqrt(mean_squared_error(y_test, y_pred_pca))\n",
|
74
|
+
"\n",
|
75
|
+
"print(\"R2 Original:\", r2_original)\n",
|
76
|
+
"print(\"RMSE Original:\", rmse_original)\n",
|
77
|
+
"print(\"R2 PCA:\", r2_pca)\n",
|
78
|
+
"print(\"RMSE PCA:\", rmse_pca)"
|
79
|
+
]
|
80
|
+
},
|
81
|
+
{
|
82
|
+
"cell_type": "code",
|
83
|
+
"execution_count": null,
|
84
|
+
"id": "ba2a1ef3-1554-4164-9966-3bc59fdb69e3",
|
85
|
+
"metadata": {},
|
86
|
+
"outputs": [],
|
87
|
+
"source": [
|
88
|
+
"plt.figure(figsize=(10, 5))\n",
|
89
|
+
"\n",
|
90
|
+
"plt.subplot(1, 2, 1)\n",
|
91
|
+
"plt.scatter(y_test, y_pred_original, alpha=0.5)\n",
|
92
|
+
"plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], '--', color='red')\n",
|
93
|
+
"plt.xlabel(\"Actual\")\n",
|
94
|
+
"plt.ylabel(\"Predicted\")\n",
|
95
|
+
"plt.title(\"Linear Regression on Original Data\")\n",
|
96
|
+
"\n",
|
97
|
+
"plt.subplot(1, 2, 2)\n",
|
98
|
+
"plt.scatter(y_test, y_pred_pca, alpha=0.5)\n",
|
99
|
+
"plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], '--', color='red')\n",
|
100
|
+
"plt.xlabel(\"Actual\")\n",
|
101
|
+
"plt.ylabel(\"Predicted\")\n",
|
102
|
+
"plt.title(\"Linear Regression on PCA-Reduced Data\")\n",
|
103
|
+
"\n",
|
104
|
+
"plt.tight_layout()\n",
|
105
|
+
"plt.show()"
|
106
|
+
]
|
107
|
+
}
|
108
|
+
],
|
109
|
+
"metadata": {
|
110
|
+
"kernelspec": {
|
111
|
+
"display_name": "Python 3 (ipykernel)",
|
112
|
+
"language": "python",
|
113
|
+
"name": "python3"
|
114
|
+
},
|
115
|
+
"language_info": {
|
116
|
+
"codemirror_mode": {
|
117
|
+
"name": "ipython",
|
118
|
+
"version": 3
|
119
|
+
},
|
120
|
+
"file_extension": ".py",
|
121
|
+
"mimetype": "text/x-python",
|
122
|
+
"name": "python",
|
123
|
+
"nbconvert_exporter": "python",
|
124
|
+
"pygments_lexer": "ipython3",
|
125
|
+
"version": "3.12.4"
|
126
|
+
}
|
127
|
+
},
|
128
|
+
"nbformat": 4,
|
129
|
+
"nbformat_minor": 5
|
130
|
+
}
|
@@ -0,0 +1,141 @@
|
|
1
|
+
{
|
2
|
+
"cells": [
|
3
|
+
{
|
4
|
+
"cell_type": "code",
|
5
|
+
"execution_count": null,
|
6
|
+
"id": "8b01d639-7417-4a71-a735-d519043691ac",
|
7
|
+
"metadata": {},
|
8
|
+
"outputs": [],
|
9
|
+
"source": [
|
10
|
+
"import pandas as pd\n",
|
11
|
+
"import numpy as np\n",
|
12
|
+
"import matplotlib.pyplot as plt\n",
|
13
|
+
"from sklearn.decomposition import PCA\n",
|
14
|
+
"from sklearn.neighbors import KNeighborsClassifier\n",
|
15
|
+
"from sklearn.model_selection import train_test_split\n",
|
16
|
+
"from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
|
17
|
+
"from sklearn.metrics import accuracy_score"
|
18
|
+
]
|
19
|
+
},
|
20
|
+
{
|
21
|
+
"cell_type": "code",
|
22
|
+
"execution_count": null,
|
23
|
+
"id": "03cbb0a7-0a95-4e08-94a9-028c664ecbe1",
|
24
|
+
"metadata": {},
|
25
|
+
"outputs": [],
|
26
|
+
"source": [
|
27
|
+
"file_path = \"Wilt.csv\"\n",
|
28
|
+
"df = pd.read_csv(file_path)\n",
|
29
|
+
"df.head()"
|
30
|
+
]
|
31
|
+
},
|
32
|
+
{
|
33
|
+
"cell_type": "code",
|
34
|
+
"execution_count": null,
|
35
|
+
"id": "0a4961c3-0fea-401b-a7f0-5f6fd0eb9e69",
|
36
|
+
"metadata": {},
|
37
|
+
"outputs": [],
|
38
|
+
"source": [
|
39
|
+
"y = df.iloc[:, 0]\n",
|
40
|
+
"X = df.iloc[:, 1:]"
|
41
|
+
]
|
42
|
+
},
|
43
|
+
{
|
44
|
+
"cell_type": "code",
|
45
|
+
"execution_count": null,
|
46
|
+
"id": "d6699a1a-5436-40d7-84b9-f2c3d5e87850",
|
47
|
+
"metadata": {},
|
48
|
+
"outputs": [],
|
49
|
+
"source": [
|
50
|
+
"if y.dtype == 'object':\n",
|
51
|
+
" class_mapping = {label: idx for idx, label in enumerate(y.unique())}\n",
|
52
|
+
" y = y.map(class_mapping)\n",
|
53
|
+
"\n",
|
54
|
+
"scaler = StandardScaler()\n",
|
55
|
+
"X_scaled = scaler.fit_transform(X)\n",
|
56
|
+
"\n",
|
57
|
+
"pca = PCA(n_components=2)\n",
|
58
|
+
"X_pca = pca.fit_transform(X_scaled)\n",
|
59
|
+
"\n",
|
60
|
+
"X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)\n",
|
61
|
+
"X_pca_train, X_pca_test, _, _ = train_test_split(X_pca, y, test_size=0.2, random_state=42)"
|
62
|
+
]
|
63
|
+
},
|
64
|
+
{
|
65
|
+
"cell_type": "code",
|
66
|
+
"execution_count": null,
|
67
|
+
"id": "0c6c271e-3725-4472-b082-a96aa9850ec6",
|
68
|
+
"metadata": {},
|
69
|
+
"outputs": [],
|
70
|
+
"source": [
|
71
|
+
"knn_original = KNeighborsClassifier(n_neighbors=5)\n",
|
72
|
+
"knn_original.fit(X_train, y_train)\n",
|
73
|
+
"y_pred_original = knn_original.predict(X_test)\n",
|
74
|
+
"accuracy_original = accuracy_score(y_test, y_pred_original)\n",
|
75
|
+
"\n",
|
76
|
+
"knn_pca = KNeighborsClassifier(n_neighbors=5)\n",
|
77
|
+
"knn_pca.fit(X_pca_train, y_train)\n",
|
78
|
+
"y_pred_pca = knn_pca.predict(X_pca_test)\n",
|
79
|
+
"accuracy_pca = accuracy_score(y_test, y_pred_pca)\n",
|
80
|
+
"\n",
|
81
|
+
"print(\"Accuracy without PCA:\", accuracy_original)\n",
|
82
|
+
"print(\"Accuracy with PCA:\", accuracy_pca)"
|
83
|
+
]
|
84
|
+
},
|
85
|
+
{
|
86
|
+
"cell_type": "code",
|
87
|
+
"execution_count": null,
|
88
|
+
"id": "5b129aaa-8fba-4dac-a4be-e94c277d40ae",
|
89
|
+
"metadata": {},
|
90
|
+
"outputs": [],
|
91
|
+
"source": [
|
92
|
+
"plt.figure(figsize=(6, 4))\n",
|
93
|
+
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='coolwarm', alpha=0.5)\n",
|
94
|
+
"plt.xlabel(\"Principal Component 1\")\n",
|
95
|
+
"plt.ylabel(\"Principal Component 2\")\n",
|
96
|
+
"plt.title(\"PCA Visualization of Wilt Dataset\")\n",
|
97
|
+
"plt.colorbar(label=\"Class\")\n",
|
98
|
+
"plt.show()"
|
99
|
+
]
|
100
|
+
},
|
101
|
+
{
|
102
|
+
"cell_type": "code",
|
103
|
+
"execution_count": null,
|
104
|
+
"id": "a4b9dcf0-d091-4ece-9651-e84932fb1eba",
|
105
|
+
"metadata": {},
|
106
|
+
"outputs": [],
|
107
|
+
"source": [
|
108
|
+
"labels = ['Without PCA', 'With PCA']\n",
|
109
|
+
"accuracies = [accuracy_original, accuracy_pca]\n",
|
110
|
+
"plt.figure(figsize=(6, 4))\n",
|
111
|
+
"plt.bar(labels, accuracies, color=['blue', 'orange'])\n",
|
112
|
+
"plt.xlabel(\"Model\")\n",
|
113
|
+
"plt.ylabel(\"Accuracy\")\n",
|
114
|
+
"plt.title(\"KNN Classification Accuracy Comparison\")\n",
|
115
|
+
"plt.ylim(0, 1)\n",
|
116
|
+
"plt.show()"
|
117
|
+
]
|
118
|
+
}
|
119
|
+
],
|
120
|
+
"metadata": {
|
121
|
+
"kernelspec": {
|
122
|
+
"display_name": "Python 3 (ipykernel)",
|
123
|
+
"language": "python",
|
124
|
+
"name": "python3"
|
125
|
+
},
|
126
|
+
"language_info": {
|
127
|
+
"codemirror_mode": {
|
128
|
+
"name": "ipython",
|
129
|
+
"version": 3
|
130
|
+
},
|
131
|
+
"file_extension": ".py",
|
132
|
+
"mimetype": "text/x-python",
|
133
|
+
"name": "python",
|
134
|
+
"nbconvert_exporter": "python",
|
135
|
+
"pygments_lexer": "ipython3",
|
136
|
+
"version": "3.12.4"
|
137
|
+
}
|
138
|
+
},
|
139
|
+
"nbformat": 4,
|
140
|
+
"nbformat_minor": 5
|
141
|
+
}
|