learnx 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- learnx-0.4.0/PKG-INFO +25 -0
- learnx-0.4.0/README.md +8 -0
- learnx-0.4.0/pyproject.toml +17 -0
- learnx-0.4.0/src/learnx/__init__.py +393 -0
learnx-0.4.0/PKG-INFO
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: learnx
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary:
|
|
5
|
+
Author: MAX
|
|
6
|
+
Requires-Python: >=3.7
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# mlcode_cc
|
|
19
|
+
|
|
20
|
+
A custom machine learning utility package.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
```bash
|
|
24
|
+
pip install mlcode_cc
|
|
25
|
+
```
|
learnx-0.4.0/README.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "learnx"
|
|
3
|
+
version = "0.4.0"
|
|
4
|
+
description = ""
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "MAX"}
|
|
7
|
+
]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
requires-python = ">=3.7"
|
|
10
|
+
dependencies = []
|
|
11
|
+
|
|
12
|
+
[tool.poetry]
|
|
13
|
+
packages = [{include = "learnx", from = "src"}]
|
|
14
|
+
|
|
15
|
+
[build-system]
|
|
16
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
17
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
|
|
3
|
+
def lab1():
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import numpy as np
|
|
6
|
+
import seaborn as sns
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
from sklearn.datasets import fetch_california_housing
|
|
9
|
+
data = fetch_california_housing(as_frame=True)
|
|
10
|
+
housing_df = data.frame
|
|
11
|
+
numerical_features = housing_df.select_dtypes(include=[np.number]).columns
|
|
12
|
+
|
|
13
|
+
plt.figure(figsize=(15, 10))
|
|
14
|
+
for i, feature in enumerate(numerical_features):
|
|
15
|
+
plt.subplot(3, 3, i + 1)
|
|
16
|
+
sns.histplot(housing_df[feature], kde=True, bins=30, color='blue')
|
|
17
|
+
plt.title(f'Distribution of {feature}')
|
|
18
|
+
plt.tight_layout()
|
|
19
|
+
plt.show()
|
|
20
|
+
|
|
21
|
+
plt.figure(figsize=(15, 10))
|
|
22
|
+
for i, feature in enumerate(numerical_features):
|
|
23
|
+
plt.subplot(3, 3, i + 1)
|
|
24
|
+
sns.boxplot(x=housing_df[feature], color='orange')
|
|
25
|
+
plt.title(f'Box Plot of {feature}')
|
|
26
|
+
plt.tight_layout()
|
|
27
|
+
plt.show()
|
|
28
|
+
|
|
29
|
+
print("Outliers Detection:")
|
|
30
|
+
outliers_summary = {}
|
|
31
|
+
for feature in numerical_features:
|
|
32
|
+
Q1 = housing_df[feature].quantile(0.25)
|
|
33
|
+
Q3 = housing_df[feature].quantile(0.75)
|
|
34
|
+
IQR = Q3 - Q1
|
|
35
|
+
lower_bound = Q1 - 1.5 * IQR
|
|
36
|
+
upper_bound = Q3 + 1.5 * IQR
|
|
37
|
+
outliers = housing_df[(housing_df[feature] < lower_bound) | (housing_df[feature] > upper_bound)]
|
|
38
|
+
outliers_summary[feature] = len(outliers)
|
|
39
|
+
print(f"{feature}: {len(outliers)} outliers")
|
|
40
|
+
|
|
41
|
+
print("\nDataset Summary:")
|
|
42
|
+
print(housing_df.describe())
|
|
43
|
+
|
|
44
|
+
def lab2():
|
|
45
|
+
import pandas as pd
|
|
46
|
+
import seaborn as sns
|
|
47
|
+
import matplotlib.pyplot as plt
|
|
48
|
+
from sklearn.datasets import fetch_california_housing
|
|
49
|
+
|
|
50
|
+
california_data = fetch_california_housing(as_frame=True)
|
|
51
|
+
data = california_data.frame
|
|
52
|
+
|
|
53
|
+
correlation_matrix = data.corr()
|
|
54
|
+
|
|
55
|
+
plt.figure(figsize=(10, 8))
|
|
56
|
+
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
|
|
57
|
+
plt.title('Correlation Matrix of California Housing Features')
|
|
58
|
+
plt.show()
|
|
59
|
+
|
|
60
|
+
sns.pairplot(data, diag_kind='kde', plot_kws={'alpha': 0.5})
|
|
61
|
+
plt.suptitle('Pair Plot of California Housing Features', y=1.02)
|
|
62
|
+
plt.show()
|
|
63
|
+
|
|
64
|
+
def lab3():
|
|
65
|
+
|
|
66
|
+
import numpy as np
|
|
67
|
+
import pandas as pd
|
|
68
|
+
from sklearn.datasets import load_iris
|
|
69
|
+
from sklearn.decomposition import PCA
|
|
70
|
+
import matplotlib.pyplot as plt
|
|
71
|
+
|
|
72
|
+
iris = load_iris()
|
|
73
|
+
data = iris.data
|
|
74
|
+
labels = iris.target
|
|
75
|
+
label_names = iris.target_names
|
|
76
|
+
|
|
77
|
+
iris_df = pd.DataFrame(data, columns=iris.feature_names)
|
|
78
|
+
|
|
79
|
+
pca = PCA(n_components=2)
|
|
80
|
+
data_reduced = pca.fit_transform(data)
|
|
81
|
+
|
|
82
|
+
reduced_df = pd.DataFrame(data_reduced, columns=['Principal Component 1', 'Principal Component 2'])
|
|
83
|
+
reduced_df['Label'] = labels
|
|
84
|
+
|
|
85
|
+
plt.figure(figsize=(8, 6))
|
|
86
|
+
colors = ['r', 'g', 'b']
|
|
87
|
+
for i, label in enumerate(np.unique(labels)):
|
|
88
|
+
plt.scatter(
|
|
89
|
+
reduced_df[reduced_df['Label'] == label]['Principal Component 1'],
|
|
90
|
+
reduced_df[reduced_df['Label'] == label]['Principal Component 2'],
|
|
91
|
+
label=label_names[label],
|
|
92
|
+
color=colors[i]
|
|
93
|
+
)
|
|
94
|
+
plt.title('PCA on Iris Dataset')
|
|
95
|
+
plt.xlabel('Principal Component 1')
|
|
96
|
+
plt.ylabel('Principal Component 2')
|
|
97
|
+
plt.legend()
|
|
98
|
+
plt.grid()
|
|
99
|
+
plt.show()
|
|
100
|
+
|
|
101
|
+
def lab4():
|
|
102
|
+
import pandas as pd
|
|
103
|
+
def find_s_algorithm(file_path):
|
|
104
|
+
data=pd.read_csv(file_path)
|
|
105
|
+
print("Training Datasets")
|
|
106
|
+
print(data)
|
|
107
|
+
att=data.columns[:-1]
|
|
108
|
+
class_label=data.columns[-1]
|
|
109
|
+
hypo=['0' for _ in att]
|
|
110
|
+
for index,row in data.iterrows():
|
|
111
|
+
if row[class_label]=='Yes':
|
|
112
|
+
for i, value in enumerate(row[att]):
|
|
113
|
+
if hypo[i]=='0' or hypo[i]==value:
|
|
114
|
+
hypo[i]=value
|
|
115
|
+
else:
|
|
116
|
+
hypo[i]='?'
|
|
117
|
+
return hypo
|
|
118
|
+
file_path=r''
|
|
119
|
+
hypothesis=find_s_algorithm(file_path)
|
|
120
|
+
print("The final hypothesis:",hypothesis)
|
|
121
|
+
|
|
122
|
+
def lab5():
|
|
123
|
+
import numpy as np
|
|
124
|
+
import matplotlib.pyplot as plt
|
|
125
|
+
from collections import Counter
|
|
126
|
+
|
|
127
|
+
data=np.random.rand(100)
|
|
128
|
+
labels=['Class1' if x<=.5 else 'Class2' for x in data[:50]]
|
|
129
|
+
|
|
130
|
+
def eucli_dist(x1,x2):
|
|
131
|
+
return abs(x1-x2)
|
|
132
|
+
|
|
133
|
+
def knn_classifier(train_data,train_labels,test_point,k):
|
|
134
|
+
dist=[(eucli_dist(test_point,train_data[i]),train_labels[i])for i in range(len(train_data))]
|
|
135
|
+
dist.sort(key=lambda x : x[0])
|
|
136
|
+
knn=dist[:k]
|
|
137
|
+
kn_labels=[label for _ ,label in knn]
|
|
138
|
+
return Counter(kn_labels).most_common(1)[0][0]
|
|
139
|
+
|
|
140
|
+
train_data=data[:50]
|
|
141
|
+
train_labels=labels
|
|
142
|
+
test_data=data[50:]
|
|
143
|
+
k_values=[1,2,3,4,5,20,30]
|
|
144
|
+
|
|
145
|
+
print("---knn classification---")
|
|
146
|
+
print("Train data : First 50 points classified as rule (x<=.5 -> (class1) x>.5 -> (class2))")
|
|
147
|
+
print("Test data : Remaining points ")
|
|
148
|
+
result={}
|
|
149
|
+
|
|
150
|
+
for k in k_values:
|
|
151
|
+
print(f"KNN Classification for k = {k} : ")
|
|
152
|
+
classified_labels=[knn_classifier(train_data,train_labels,test_point,k) for test_point in test_data]
|
|
153
|
+
result[k]=classified_labels
|
|
154
|
+
for i,label in enumerate(classified_labels,start=51):
|
|
155
|
+
print(f" point x{i} , (value : {test_data[i-51]:.4f}) classified as {label}")
|
|
156
|
+
print("/n")
|
|
157
|
+
print("classification completed !")
|
|
158
|
+
|
|
159
|
+
for k in k_values:
|
|
160
|
+
classified_labels=result[k]
|
|
161
|
+
class1_point=[test_data[i] for i in range(len(test_data)) if classified_labels[i] == "Class1"]
|
|
162
|
+
class2_point=[test_data[i] for i in range(len(test_data)) if classified_labels[i] == "Class2"]
|
|
163
|
+
plt.figure(figsize=(5,4))
|
|
164
|
+
plt.scatter(train_data ,[0]*len(train_data),c=['blue' if label=="Class1" else 'red' for label in train_labels],
|
|
165
|
+
label="Training data",marker="o")
|
|
166
|
+
plt.scatter(class1_point,[1]*len(class1_point),c='blue',label="Class Test 1",marker="x")
|
|
167
|
+
plt.scatter(class2_point,[1]*len(class2_point),c='red',label="Class Test 2",marker="x")
|
|
168
|
+
plt.title(f"k-NN Classification Results for k = {k}")
|
|
169
|
+
plt.xlabel("Data points")
|
|
170
|
+
plt.ylabel("classification level")
|
|
171
|
+
plt.legend()
|
|
172
|
+
plt.grid()
|
|
173
|
+
plt.show()
|
|
174
|
+
|
|
175
|
+
def lab6():
|
|
176
|
+
import numpy as np
|
|
177
|
+
import matplotlib.pyplot as plt
|
|
178
|
+
def gaussian_kernel(x, xi, tau):
|
|
179
|
+
return np.exp(-np.sum((x - xi) ** 2) / (2 * tau ** 2))
|
|
180
|
+
|
|
181
|
+
def locally_weighted_regression(x, X, y, tau):
|
|
182
|
+
m = X.shape[0]
|
|
183
|
+
weights = np.array([gaussian_kernel(x, X[i], tau) for i in range(m)])
|
|
184
|
+
W = np.diag(weights)
|
|
185
|
+
X_transpose_W = X.T @ W
|
|
186
|
+
theta = np.linalg.inv(X_transpose_W @ X) @ X_transpose_W @ y
|
|
187
|
+
return x @ theta
|
|
188
|
+
|
|
189
|
+
np.random.seed(42)
|
|
190
|
+
X = np.linspace(0, 2 * np.pi, 100)
|
|
191
|
+
y = np.sin(X) + 0.1 * np.random.randn(100)
|
|
192
|
+
X_bias = np.c_[np.ones(X.shape), X]
|
|
193
|
+
x_test = np.linspace(0, 2 * np.pi, 200)
|
|
194
|
+
x_test_bias = np.c_[np.ones(x_test.shape), x_test]
|
|
195
|
+
tau = 0.5
|
|
196
|
+
y_pred = np.array([locally_weighted_regression(xi, X_bias, y, tau) for xi in x_test_bias])
|
|
197
|
+
|
|
198
|
+
plt.figure(figsize=(10, 6))
|
|
199
|
+
plt.scatter(X, y, color='red', label='Training Data', alpha=0.7)
|
|
200
|
+
plt.plot(x_test, y_pred, color='blue', label=f'LWR Fit (tau={tau})', linewidth=2)
|
|
201
|
+
plt.xlabel('X', fontsize=12)
|
|
202
|
+
plt.ylabel('y', fontsize=12)
|
|
203
|
+
plt.title('Locally Weighted Regression', fontsize=14)
|
|
204
|
+
plt.legend(fontsize=10)
|
|
205
|
+
plt.grid(alpha=0.3)
|
|
206
|
+
plt.show()
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def lab7():
|
|
210
|
+
import numpy as np
|
|
211
|
+
import pandas as pd
|
|
212
|
+
import matplotlib.pyplot as plt
|
|
213
|
+
from sklearn.datasets import fetch_california_housing
|
|
214
|
+
from sklearn.model_selection import train_test_split
|
|
215
|
+
from sklearn.linear_model import LinearRegression
|
|
216
|
+
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
|
|
217
|
+
from sklearn.pipeline import make_pipeline
|
|
218
|
+
from sklearn.metrics import mean_squared_error, r2_score
|
|
219
|
+
|
|
220
|
+
def linear_regression_california():
|
|
221
|
+
housing = fetch_california_housing(as_frame=True)
|
|
222
|
+
X = housing.data[["AveRooms"]]
|
|
223
|
+
y = housing.target
|
|
224
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
225
|
+
model = LinearRegression()
|
|
226
|
+
model.fit(X_train, y_train)
|
|
227
|
+
y_pred = model.predict(X_test)
|
|
228
|
+
|
|
229
|
+
plt.scatter(X_test, y_test, color="blue", label="Actual")
|
|
230
|
+
plt.plot(X_test, y_pred, color="red", label="Predicted")
|
|
231
|
+
plt.xlabel("Average number of rooms (AveRooms)")
|
|
232
|
+
plt.ylabel("Median value of homes ($100,000)")
|
|
233
|
+
plt.title("Linear Regression - California Housing Dataset")
|
|
234
|
+
plt.legend()
|
|
235
|
+
plt.show()
|
|
236
|
+
|
|
237
|
+
print("Linear Regression - California Housing Dataset")
|
|
238
|
+
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
|
|
239
|
+
print("R^2 Score:", r2_score(y_test, y_pred))
|
|
240
|
+
|
|
241
|
+
def polynomial_regression_auto_mpg():
|
|
242
|
+
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
|
|
243
|
+
column_names = ["mpg", "cylinders", "displacement", "horsepower", "weight", "acceleration", "model_year",
|
|
244
|
+
"origin"]
|
|
245
|
+
data = pd.read_csv(url, sep='\s+', names=column_names, na_values="?")
|
|
246
|
+
data = data.dropna()
|
|
247
|
+
X = data["displacement"].values.reshape(-1, 1)
|
|
248
|
+
y = data["mpg"].values
|
|
249
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
250
|
+
poly_model = make_pipeline(PolynomialFeatures(degree=2), StandardScaler(), LinearRegression())
|
|
251
|
+
poly_model.fit(X_train, y_train)
|
|
252
|
+
y_pred = poly_model.predict(X_test)
|
|
253
|
+
|
|
254
|
+
plt.scatter(X_test, y_test, color="blue", label="Actual")
|
|
255
|
+
plt.scatter(X_test, y_pred, color="red", label="Predicted")
|
|
256
|
+
plt.xlabel("Displacement")
|
|
257
|
+
plt.ylabel("Miles per gallon (mpg)")
|
|
258
|
+
plt.title("Polynomial Regression - Auto MPG Dataset")
|
|
259
|
+
plt.legend()
|
|
260
|
+
plt.show()
|
|
261
|
+
print("Polynomial Regression - Auto MPG Dataset")
|
|
262
|
+
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
|
|
263
|
+
print("R^2 Score:", r2_score(y_test, y_pred))
|
|
264
|
+
|
|
265
|
+
if __name__ == "__main__":
|
|
266
|
+
print("Demonstrating Linear Regression and Polynomial Regression\n")
|
|
267
|
+
linear_regression_california()
|
|
268
|
+
polynomial_regression_auto_mpg()
|
|
269
|
+
|
|
270
|
+
def lab8():
|
|
271
|
+
import numpy as np
|
|
272
|
+
import matplotlib.pyplot as plt
|
|
273
|
+
from sklearn.datasets import load_breast_cancer
|
|
274
|
+
from sklearn.model_selection import train_test_split
|
|
275
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
276
|
+
from sklearn.metrics import accuracy_score
|
|
277
|
+
from sklearn import tree
|
|
278
|
+
|
|
279
|
+
data = load_breast_cancer()
|
|
280
|
+
X = data.data
|
|
281
|
+
y = data.target
|
|
282
|
+
|
|
283
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
284
|
+
|
|
285
|
+
clf = DecisionTreeClassifier(random_state=42)
|
|
286
|
+
clf.fit(X_train, y_train)
|
|
287
|
+
|
|
288
|
+
y_pred = clf.predict(X_test)
|
|
289
|
+
|
|
290
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
291
|
+
print(f"Model Accuracy: {accuracy * 100:.2f}%")
|
|
292
|
+
|
|
293
|
+
new_sample = X_test[0].reshape(1, -1)
|
|
294
|
+
|
|
295
|
+
prediction = clf.predict(new_sample)
|
|
296
|
+
|
|
297
|
+
prediction_class = "Benign" if prediction == 1 else "Malignant"
|
|
298
|
+
print(f"Predicted Class for the new sample: {prediction_class}")
|
|
299
|
+
|
|
300
|
+
plt.figure(figsize=(12, 8))
|
|
301
|
+
tree.plot_tree(
|
|
302
|
+
clf, filled=True, feature_names=data.feature_names.tolist(),
|
|
303
|
+
class_names=data.target_names.tolist()
|
|
304
|
+
)
|
|
305
|
+
plt.title("Decision Tree - Breast Cancer Dataset")
|
|
306
|
+
plt.show()
|
|
307
|
+
|
|
308
|
+
def lab9():
|
|
309
|
+
import numpy as np
|
|
310
|
+
from sklearn.datasets import fetch_olivetti_faces
|
|
311
|
+
from sklearn.model_selection import train_test_split, cross_val_score
|
|
312
|
+
from sklearn.naive_bayes import GaussianNB
|
|
313
|
+
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
|
314
|
+
import matplotlib.pyplot as plt
|
|
315
|
+
|
|
316
|
+
data = fetch_olivetti_faces(shuffle=True, random_state=42)
|
|
317
|
+
X = data.data
|
|
318
|
+
y = data.target
|
|
319
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
320
|
+
gnb = GaussianNB()
|
|
321
|
+
gnb.fit(X_train, y_train)
|
|
322
|
+
y_pred = gnb.predict(X_test)
|
|
323
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
324
|
+
|
|
325
|
+
print(f'Accuracy: {accuracy * 100:.2f}%')
|
|
326
|
+
print("\nClassification Report:")
|
|
327
|
+
print(classification_report(y_test, y_pred, zero_division=1))
|
|
328
|
+
print("\nConfusion Matrix:")
|
|
329
|
+
print(confusion_matrix(y_test, y_pred))
|
|
330
|
+
|
|
331
|
+
cross_val_accuracy = cross_val_score(gnb, X, y, cv=5, scoring='accuracy')
|
|
332
|
+
print(f'\nCross-validation accuracy: {cross_val_accuracy.mean() * 100:.2f}%')
|
|
333
|
+
fig, axes = plt.subplots(3, 5, figsize=(12, 8))
|
|
334
|
+
for ax, image, label, prediction in zip(axes.ravel(), X_test, y_test, y_pred):
|
|
335
|
+
ax.imshow(image.reshape(64, 64), cmap=plt.cm.gray)
|
|
336
|
+
ax.set_title(f"True: {label}, Pred: {prediction}")
|
|
337
|
+
ax.axis('off')
|
|
338
|
+
plt.show()
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def lab10():
|
|
342
|
+
import numpy as np
|
|
343
|
+
import pandas as pd
|
|
344
|
+
import matplotlib.pyplot as plt
|
|
345
|
+
import seaborn as sns
|
|
346
|
+
from sklearn.datasets import load_breast_cancer
|
|
347
|
+
from sklearn.cluster import KMeans
|
|
348
|
+
from sklearn.preprocessing import StandardScaler
|
|
349
|
+
from sklearn.decomposition import PCA
|
|
350
|
+
from sklearn.metrics import confusion_matrix, classification_report
|
|
351
|
+
|
|
352
|
+
data = load_breast_cancer()
|
|
353
|
+
X = data.data
|
|
354
|
+
y = data.target
|
|
355
|
+
scaler = StandardScaler()
|
|
356
|
+
X_scaled = scaler.fit_transform(X)
|
|
357
|
+
kmeans = KMeans(n_clusters=2, random_state=42)
|
|
358
|
+
y_kmeans = kmeans.fit_predict(X_scaled)
|
|
359
|
+
print("Confusion Matrix:")
|
|
360
|
+
print(confusion_matrix(y, y_kmeans))
|
|
361
|
+
print("\nClassification Report:")
|
|
362
|
+
print(classification_report(y, y_kmeans))
|
|
363
|
+
|
|
364
|
+
pca = PCA(n_components=2)
|
|
365
|
+
X_pca = pca. fit_transform(X_scaled)
|
|
366
|
+
df = pd.DataFrame(X_pca, columns=['PC1', 'PC2'])
|
|
367
|
+
df['Cluster'] = y_kmeans
|
|
368
|
+
df['True Label'] = y
|
|
369
|
+
plt.figure(figsize=(8, 6))
|
|
370
|
+
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster', palette='Set1', s=100, edgecolor='black', alpha=0.7)
|
|
371
|
+
plt.title('K-Means Clustering of Breast Cancer Dataset')
|
|
372
|
+
plt.xlabel('Principal Component 1')
|
|
373
|
+
plt.ylabel('Principal Component 2')
|
|
374
|
+
plt.legend(title="Cluster")
|
|
375
|
+
plt.show()
|
|
376
|
+
plt.figure(figsize=(8, 6))
|
|
377
|
+
sns.scatterplot(data=df, x='PC1', y='PC2', hue='True Label', palette='coolwarm', s=100, edgecolor='black',
|
|
378
|
+
alpha=0.7)
|
|
379
|
+
plt.title('True Labels of Breast Cancer Dataset')
|
|
380
|
+
plt.xlabel('Principal Component 1')
|
|
381
|
+
plt.ylabel('Principal Component 2')
|
|
382
|
+
plt.legend(title="True Label")
|
|
383
|
+
plt.show()
|
|
384
|
+
plt.figure(figsize=(8, 6))
|
|
385
|
+
sns.scatterplot(data=df, x='PC1', y='PC2', hue='Cluster', palette='Set1', s=100, edgecolor='black', alpha=0.7)
|
|
386
|
+
centers = pca.transform(kmeans.cluster_centers_)
|
|
387
|
+
plt.scatter(centers[:, 0], centers[:, 1], s=200, c='red', marker='X', label='Centroids')
|
|
388
|
+
plt.title('K-Means Clustering with Centroids')
|
|
389
|
+
|
|
390
|
+
plt.xlabel('Principal Component 1')
|
|
391
|
+
plt.ylabel('Principal Component 2')
|
|
392
|
+
plt.legend(title="Cluster")
|
|
393
|
+
plt.show()
|