scklearn 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scklearn/__init__.py +1 -0
- scklearn/printer.py +9 -0
- scklearn/snippets.py +654 -0
- scklearn-0.0.1.dist-info/METADATA +9 -0
- scklearn-0.0.1.dist-info/RECORD +7 -0
- scklearn-0.0.1.dist-info/WHEEL +5 -0
- scklearn-0.0.1.dist-info/top_level.txt +1 -0
scklearn/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .printer import train_model
|
scklearn/printer.py
ADDED
scklearn/snippets.py
ADDED
|
@@ -0,0 +1,654 @@
|
|
|
1
|
+
codes = {
|
|
2
|
+
|
|
3
|
+
1: r'''
|
|
4
|
+
#1
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from sklearn.datasets import load_wine
|
|
7
|
+
from sklearn.preprocessing import MinMaxScaler
|
|
8
|
+
|
|
9
|
+
# ------------------------------------------
|
|
10
|
+
# a) Creation and Loading of Datasets
|
|
11
|
+
# ------------------------------------------
|
|
12
|
+
|
|
13
|
+
# i. Create dataset manually
|
|
14
|
+
director_info = pd.DataFrame({
|
|
15
|
+
'director': ['Francis Ford Coppola', 'Orson Welles', 'Richard Linklater'],
|
|
16
|
+
'country': ['USA', 'USA', 'USA']
|
|
17
|
+
})
|
|
18
|
+
|
|
19
|
+
print("Director Info Dataset:")
|
|
20
|
+
print(director_info)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ii. Load CSV dataset
|
|
24
|
+
df = pd.read_csv("metacritic_movies.csv")
|
|
25
|
+
print("\nMetacritic Movies Dataset:")
|
|
26
|
+
print(df.head())
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# iii. Load dataset using sklearn
|
|
30
|
+
wine = load_wine()
|
|
31
|
+
df_wine = pd.DataFrame(wine.data, columns=wine.feature_names)
|
|
32
|
+
df_wine['target'] = wine.target
|
|
33
|
+
|
|
34
|
+
print("\nWine Dataset:")
|
|
35
|
+
print(df_wine.head())
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ------------------------------------------
|
|
39
|
+
# b) Statistical Measures
|
|
40
|
+
# ------------------------------------------
|
|
41
|
+
|
|
42
|
+
num_data = df[['duration', 'metascore', 'userscore']]
|
|
43
|
+
|
|
44
|
+
print("\nMean:\n", num_data.mean())
|
|
45
|
+
print("\nMedian:\n", num_data.median())
|
|
46
|
+
print("\nMode:\n", num_data.mode().iloc[0])
|
|
47
|
+
print("\nVariance:\n", num_data.var())
|
|
48
|
+
print("\nStandard Deviation:\n", num_data.std())
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ------------------------------------------
|
|
52
|
+
# c) Data Preprocessing
|
|
53
|
+
# ------------------------------------------
|
|
54
|
+
|
|
55
|
+
# i. Reshaping
|
|
56
|
+
reshaped_data = df['duration'].values.reshape(-1, 1)
|
|
57
|
+
print("\nReshaped Data:")
|
|
58
|
+
print(reshaped_data[:5])
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ii. Filtering
|
|
62
|
+
filtered_data = df[df['metascore'] > 80]
|
|
63
|
+
print("\nFiltered Data (Metascore > 80):")
|
|
64
|
+
print(filtered_data[['title', 'metascore']].head())
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# iii. Merging
|
|
68
|
+
merged_df = pd.merge(df, director_info, on='director', how='left')
|
|
69
|
+
print("\nMerged Dataset:")
|
|
70
|
+
print(merged_df[['title', 'director', 'country']].head())
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# iv. Handling Missing Values
|
|
74
|
+
print("\nMissing Values Before Handling:")
|
|
75
|
+
print(df.isnull().sum())
|
|
76
|
+
|
|
77
|
+
df['metascore'].fillna(df['metascore'].mean(), inplace=True)
|
|
78
|
+
df['userscore'].fillna(df['userscore'].mean(), inplace=True)
|
|
79
|
+
df['director'].fillna(df['director'].mode()[0], inplace=True)
|
|
80
|
+
|
|
81
|
+
print("\nMissing Values After Handling:")
|
|
82
|
+
print(df.isnull().sum())
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# v. Feature Normalization
|
|
86
|
+
scaler = MinMaxScaler()
|
|
87
|
+
df[['duration', 'metascore', 'userscore']] = scaler.fit_transform(
|
|
88
|
+
df[['duration', 'metascore', 'userscore']]
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
print("\nNormalized Features:")
|
|
92
|
+
print(df[['duration', 'metascore', 'userscore']].head())
|
|
93
|
+
''',
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
2: r'''
|
|
97
|
+
#2
|
|
98
|
+
import numpy as np
|
|
99
|
+
import matplotlib.pyplot as plt
|
|
100
|
+
from sklearn.datasets import load_iris
|
|
101
|
+
from sklearn.model_selection import train_test_split
|
|
102
|
+
from sklearn.linear_model import LinearRegression
|
|
103
|
+
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
|
|
104
|
+
|
|
105
|
+
# ------------------------------------------
|
|
106
|
+
# Load Iris Dataset
|
|
107
|
+
# ------------------------------------------
|
|
108
|
+
|
|
109
|
+
iris = load_iris()
|
|
110
|
+
|
|
111
|
+
# Use only ONE feature (Simple Linear Regression)
|
|
112
|
+
X = iris.data[:, 0].reshape(-1, 1)
|
|
113
|
+
y = iris.target
|
|
114
|
+
|
|
115
|
+
print("Feature Shape:", X.shape)
|
|
116
|
+
print("Target Shape:", y.shape)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
# ------------------------------------------
|
|
120
|
+
# Split Dataset
|
|
121
|
+
# ------------------------------------------
|
|
122
|
+
|
|
123
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
124
|
+
X, y, test_size=0.3, random_state=42
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ------------------------------------------
|
|
129
|
+
# Create and Train Model
|
|
130
|
+
# ------------------------------------------
|
|
131
|
+
|
|
132
|
+
model = LinearRegression()
|
|
133
|
+
model.fit(X_train, y_train)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ------------------------------------------
|
|
137
|
+
# Prediction
|
|
138
|
+
# ------------------------------------------
|
|
139
|
+
|
|
140
|
+
y_pred = model.predict(X_test)
|
|
141
|
+
|
|
142
|
+
# Convert continuous output → class labels
|
|
143
|
+
y_pred_class = np.round(y_pred).astype(int)
|
|
144
|
+
y_pred_class = np.clip(y_pred_class, 0, 2)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# ------------------------------------------
|
|
148
|
+
# Confusion Matrix
|
|
149
|
+
# ------------------------------------------
|
|
150
|
+
|
|
151
|
+
cm = confusion_matrix(y_test, y_pred_class)
|
|
152
|
+
|
|
153
|
+
print("\nConfusion Matrix:\n", cm)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# ------------------------------------------
|
|
157
|
+
# Plot Confusion Matrix
|
|
158
|
+
# ------------------------------------------
|
|
159
|
+
|
|
160
|
+
disp = ConfusionMatrixDisplay(
|
|
161
|
+
confusion_matrix=cm,
|
|
162
|
+
display_labels=iris.target_names
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
disp.plot(cmap="Blues")
|
|
166
|
+
plt.title("Confusion Matrix - Simple Linear Regression (Iris)")
|
|
167
|
+
plt.show()
|
|
168
|
+
''',
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
3: r'''
|
|
172
|
+
import pandas as pd
|
|
173
|
+
from sklearn.datasets import load_wine
|
|
174
|
+
from sklearn.model_selection import train_test_split
|
|
175
|
+
from sklearn.linear_model import LinearRegression
|
|
176
|
+
from sklearn.metrics import mean_squared_error, r2_score
|
|
177
|
+
|
|
178
|
+
# Load dataset
|
|
179
|
+
wine = load_wine()
|
|
180
|
+
df = pd.DataFrame(wine.data, columns=wine.feature_names)
|
|
181
|
+
df["target"] = wine.target
|
|
182
|
+
|
|
183
|
+
# Define X and y
|
|
184
|
+
X = df[["alcohol", "malic_acid", "ash"]]
|
|
185
|
+
y = df["target"]
|
|
186
|
+
|
|
187
|
+
# Split
|
|
188
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
189
|
+
X, y, test_size=0.3, random_state=42
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Model
|
|
193
|
+
model = LinearRegression()
|
|
194
|
+
model.fit(X_train, y_train)
|
|
195
|
+
|
|
196
|
+
# Prediction
|
|
197
|
+
y_pred = model.predict(X_test)
|
|
198
|
+
|
|
199
|
+
# Evaluation
|
|
200
|
+
mse = mean_squared_error(y_test, y_pred)
|
|
201
|
+
r2 = r2_score(y_test, y_pred)
|
|
202
|
+
|
|
203
|
+
print("MSE:", mse)
|
|
204
|
+
print("R2 Score:", r2)
|
|
205
|
+
|
|
206
|
+
# Coefficients
|
|
207
|
+
coefficients = pd.DataFrame({
|
|
208
|
+
"Feature": X.columns,
|
|
209
|
+
"Coefficient": model.coef_
|
|
210
|
+
})
|
|
211
|
+
|
|
212
|
+
print("\nCoefficients:\n", coefficients)
|
|
213
|
+
''',
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
4: r'''
|
|
217
|
+
from sklearn.datasets import load_iris
|
|
218
|
+
from sklearn.model_selection import train_test_split
|
|
219
|
+
from sklearn.svm import SVC
|
|
220
|
+
from sklearn.metrics import confusion_matrix, accuracy_score
|
|
221
|
+
import matplotlib.pyplot as plt
|
|
222
|
+
import seaborn as sns
|
|
223
|
+
|
|
224
|
+
# Load dataset
|
|
225
|
+
data = load_iris()
|
|
226
|
+
X = data.data
|
|
227
|
+
y = data.target
|
|
228
|
+
|
|
229
|
+
# Split
|
|
230
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
231
|
+
X, y, test_size=0.2, random_state=42
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Model
|
|
235
|
+
model = SVC(kernel='linear')
|
|
236
|
+
model.fit(X_train, y_train)
|
|
237
|
+
|
|
238
|
+
# Prediction
|
|
239
|
+
y_pred = model.predict(X_test)
|
|
240
|
+
|
|
241
|
+
# Confusion Matrix
|
|
242
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
243
|
+
print("Confusion Matrix:\n", cm)
|
|
244
|
+
|
|
245
|
+
# Accuracy
|
|
246
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
247
|
+
print("Accuracy:", accuracy)
|
|
248
|
+
|
|
249
|
+
# Plot
|
|
250
|
+
sns.heatmap(cm, annot=True, cmap='Blues')
|
|
251
|
+
plt.xlabel("Predicted")
|
|
252
|
+
plt.ylabel("Actual")
|
|
253
|
+
plt.title("Confusion Matrix")
|
|
254
|
+
plt.show()
|
|
255
|
+
''',
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
5: r'''
|
|
259
|
+
import matplotlib.pyplot as plt
|
|
260
|
+
from sklearn.datasets import load_breast_cancer
|
|
261
|
+
from sklearn.model_selection import train_test_split
|
|
262
|
+
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
|
|
263
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
264
|
+
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
|
|
265
|
+
|
|
266
|
+
# Load dataset
|
|
267
|
+
data = load_breast_cancer()
|
|
268
|
+
X = data.data
|
|
269
|
+
y = data.target
|
|
270
|
+
|
|
271
|
+
# Split
|
|
272
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
273
|
+
X, y, test_size=0.3, random_state=42
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# ---------------- BAGGING ----------------
|
|
277
|
+
bagging_model = BaggingClassifier(
|
|
278
|
+
estimator=DecisionTreeClassifier(),
|
|
279
|
+
n_estimators=50,
|
|
280
|
+
random_state=42
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
bagging_model.fit(X_train, y_train)
|
|
284
|
+
y_pred_bag = bagging_model.predict(X_test)
|
|
285
|
+
bag_acc = accuracy_score(y_test, y_pred_bag)
|
|
286
|
+
|
|
287
|
+
# ---------------- BOOSTING ----------------
|
|
288
|
+
boosting_model = AdaBoostClassifier(
|
|
289
|
+
estimator=DecisionTreeClassifier(max_depth=1),
|
|
290
|
+
n_estimators=50,
|
|
291
|
+
learning_rate=1.0,
|
|
292
|
+
random_state=42
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
boosting_model.fit(X_train, y_train)
|
|
296
|
+
y_pred_boost = boosting_model.predict(X_test)
|
|
297
|
+
boost_acc = accuracy_score(y_test, y_pred_boost)
|
|
298
|
+
|
|
299
|
+
# Accuracy comparison
|
|
300
|
+
plt.bar(['Bagging', 'Boosting'], [bag_acc, boost_acc])
|
|
301
|
+
plt.title("Accuracy Comparison")
|
|
302
|
+
plt.show()
|
|
303
|
+
|
|
304
|
+
# Confusion Matrix - Bagging
|
|
305
|
+
cm_bag = confusion_matrix(y_test, y_pred_bag)
|
|
306
|
+
ConfusionMatrixDisplay(cm_bag).plot()
|
|
307
|
+
plt.title("Bagging CM")
|
|
308
|
+
plt.show()
|
|
309
|
+
|
|
310
|
+
# Confusion Matrix - Boosting
|
|
311
|
+
cm_boost = confusion_matrix(y_test, y_pred_boost)
|
|
312
|
+
ConfusionMatrixDisplay(cm_boost).plot()
|
|
313
|
+
plt.title("Boosting CM")
|
|
314
|
+
plt.show()
|
|
315
|
+
|
|
316
|
+
# Feature Importance (Boosting)
|
|
317
|
+
importances = boosting_model.feature_importances_
|
|
318
|
+
plt.bar(range(len(importances)), importances)
|
|
319
|
+
plt.title("Feature Importance")
|
|
320
|
+
plt.show()
|
|
321
|
+
''',
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
6: r'''
|
|
325
|
+
import matplotlib.pyplot as plt
|
|
326
|
+
from sklearn.datasets import load_iris
|
|
327
|
+
from sklearn.model_selection import train_test_split
|
|
328
|
+
from sklearn.tree import DecisionTreeClassifier, plot_tree
|
|
329
|
+
from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay
|
|
330
|
+
|
|
331
|
+
# Load dataset
|
|
332
|
+
data = load_iris()
|
|
333
|
+
X = data.data
|
|
334
|
+
y = data.target
|
|
335
|
+
|
|
336
|
+
# Split
|
|
337
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
338
|
+
X, y, test_size=0.3, random_state=42
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Model (ID3 using entropy)
|
|
342
|
+
model = DecisionTreeClassifier(criterion='entropy')
|
|
343
|
+
model.fit(X_train, y_train)
|
|
344
|
+
|
|
345
|
+
# Prediction
|
|
346
|
+
y_pred = model.predict(X_test)
|
|
347
|
+
|
|
348
|
+
# Confusion Matrix
|
|
349
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
350
|
+
print("Confusion Matrix:\n", cm)
|
|
351
|
+
|
|
352
|
+
# Accuracy
|
|
353
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
354
|
+
print("Accuracy:", accuracy)
|
|
355
|
+
|
|
356
|
+
# Tree Plot
|
|
357
|
+
plt.figure()
|
|
358
|
+
plot_tree(
|
|
359
|
+
model,
|
|
360
|
+
feature_names=data.feature_names,
|
|
361
|
+
class_names=data.target_names,
|
|
362
|
+
filled=True
|
|
363
|
+
)
|
|
364
|
+
plt.title("Decision Tree")
|
|
365
|
+
plt.show()
|
|
366
|
+
|
|
367
|
+
# Confusion Matrix Plot
|
|
368
|
+
ConfusionMatrixDisplay(cm, display_labels=data.target_names).plot()
|
|
369
|
+
plt.title("Confusion Matrix")
|
|
370
|
+
plt.show()
|
|
371
|
+
|
|
372
|
+
# Accuracy Bar
|
|
373
|
+
plt.bar(['Decision Tree'], [accuracy])
|
|
374
|
+
plt.title("Accuracy")
|
|
375
|
+
plt.show()
|
|
376
|
+
''',
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
# Continue adding these too
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
7: r'''
|
|
383
|
+
import pandas as pd
|
|
384
|
+
import matplotlib.pyplot as plt
|
|
385
|
+
import seaborn as sns
|
|
386
|
+
|
|
387
|
+
from sklearn.model_selection import train_test_split
|
|
388
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
389
|
+
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
|
|
390
|
+
from sklearn.preprocessing import LabelEncoder
|
|
391
|
+
|
|
392
|
+
# Load dataset
|
|
393
|
+
url = "https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/csv/iris.csv"
|
|
394
|
+
df = pd.read_csv(url)
|
|
395
|
+
|
|
396
|
+
# Features and target
|
|
397
|
+
X = df[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']]
|
|
398
|
+
y = df['Name']
|
|
399
|
+
|
|
400
|
+
# Encode target
|
|
401
|
+
le = LabelEncoder()
|
|
402
|
+
y_encoded = le.fit_transform(y)
|
|
403
|
+
|
|
404
|
+
# Split
|
|
405
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
406
|
+
X, y_encoded, test_size=0.3, random_state=42
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
# K values
|
|
410
|
+
k_values = [1, 3, 5, 7]
|
|
411
|
+
results = []
|
|
412
|
+
|
|
413
|
+
for k in k_values:
|
|
414
|
+
|
|
415
|
+
print("\n============================")
|
|
416
|
+
print(f"K = {k}")
|
|
417
|
+
print("============================")
|
|
418
|
+
|
|
419
|
+
# Model
|
|
420
|
+
knn = KNeighborsClassifier(n_neighbors=k)
|
|
421
|
+
knn.fit(X_train, y_train)
|
|
422
|
+
|
|
423
|
+
# Prediction
|
|
424
|
+
y_pred = knn.predict(X_test)
|
|
425
|
+
|
|
426
|
+
# Accuracy
|
|
427
|
+
acc = accuracy_score(y_test, y_pred)
|
|
428
|
+
results.append([k, acc])
|
|
429
|
+
print("Accuracy:", acc)
|
|
430
|
+
|
|
431
|
+
# Confusion Matrix
|
|
432
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
433
|
+
print("Confusion Matrix:\n", cm)
|
|
434
|
+
|
|
435
|
+
# Classification Report
|
|
436
|
+
print(classification_report(y_test, y_pred, target_names=le.classes_))
|
|
437
|
+
|
|
438
|
+
# Plot CM
|
|
439
|
+
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
|
|
440
|
+
xticklabels=le.classes_,
|
|
441
|
+
yticklabels=le.classes_)
|
|
442
|
+
plt.title(f'Confusion Matrix (k={k})')
|
|
443
|
+
plt.xlabel('Predicted')
|
|
444
|
+
plt.ylabel('Actual')
|
|
445
|
+
plt.show()
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
# Compare results
|
|
449
|
+
result_df = pd.DataFrame(results, columns=['K', 'Accuracy'])
|
|
450
|
+
print("\nComparison:\n", result_df)
|
|
451
|
+
|
|
452
|
+
# Plot accuracy vs K
|
|
453
|
+
plt.plot(result_df['K'], result_df['Accuracy'], marker='o')
|
|
454
|
+
plt.title("K vs Accuracy")
|
|
455
|
+
plt.xlabel("K")
|
|
456
|
+
plt.ylabel("Accuracy")
|
|
457
|
+
plt.grid(True)
|
|
458
|
+
plt.show()
|
|
459
|
+
''',
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
8: r'''
|
|
463
|
+
import pandas as pd
|
|
464
|
+
import matplotlib.pyplot as plt
|
|
465
|
+
from sklearn.cluster import KMeans
|
|
466
|
+
from sklearn.preprocessing import StandardScaler
|
|
467
|
+
|
|
468
|
+
# Load dataset
|
|
469
|
+
url = "https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/csv/iris.csv"
|
|
470
|
+
df = pd.read_csv(url)
|
|
471
|
+
|
|
472
|
+
# Features only
|
|
473
|
+
X = df[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']]
|
|
474
|
+
|
|
475
|
+
# Scaling
|
|
476
|
+
scaler = StandardScaler()
|
|
477
|
+
X_scaled = scaler.fit_transform(X)
|
|
478
|
+
|
|
479
|
+
# K values
|
|
480
|
+
k_values = [1, 3, 5]
|
|
481
|
+
inertia_values = []
|
|
482
|
+
|
|
483
|
+
for k in k_values:
|
|
484
|
+
|
|
485
|
+
print("\n============================")
|
|
486
|
+
print(f"K = {k}")
|
|
487
|
+
print("============================")
|
|
488
|
+
|
|
489
|
+
# Model
|
|
490
|
+
kmeans = KMeans(n_clusters=k, random_state=42)
|
|
491
|
+
kmeans.fit(X_scaled)
|
|
492
|
+
|
|
493
|
+
# Results
|
|
494
|
+
labels = kmeans.labels_
|
|
495
|
+
inertia = kmeans.inertia_
|
|
496
|
+
inertia_values.append(inertia)
|
|
497
|
+
|
|
498
|
+
print("Inertia:", inertia)
|
|
499
|
+
|
|
500
|
+
# Plot clusters
|
|
501
|
+
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=labels)
|
|
502
|
+
plt.title(f"K-Means (k={k})")
|
|
503
|
+
plt.xlabel("Feature 1")
|
|
504
|
+
plt.ylabel("Feature 2")
|
|
505
|
+
plt.show()
|
|
506
|
+
|
|
507
|
+
# Elbow method
|
|
508
|
+
plt.plot(k_values, inertia_values, marker='o')
|
|
509
|
+
plt.title("K vs Inertia")
|
|
510
|
+
plt.xlabel("K")
|
|
511
|
+
plt.ylabel("Inertia")
|
|
512
|
+
plt.grid(True)
|
|
513
|
+
plt.show()
|
|
514
|
+
''',
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
# Final additions
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
9: r'''
|
|
522
|
+
import pandas as pd
|
|
523
|
+
import matplotlib.pyplot as plt
|
|
524
|
+
|
|
525
|
+
from sklearn.model_selection import train_test_split
|
|
526
|
+
from sklearn.preprocessing import LabelEncoder, StandardScaler
|
|
527
|
+
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
|
|
528
|
+
from sklearn.neural_network import MLPClassifier
|
|
529
|
+
|
|
530
|
+
# Load dataset
|
|
531
|
+
url = "https://raw.githubusercontent.com/pandas-dev/pandas/main/pandas/tests/io/data/csv/iris.csv"
|
|
532
|
+
df = pd.read_csv(url)
|
|
533
|
+
|
|
534
|
+
# Features & target
|
|
535
|
+
X = df[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']]
|
|
536
|
+
y = df['Name']
|
|
537
|
+
|
|
538
|
+
# Encode labels
|
|
539
|
+
le = LabelEncoder()
|
|
540
|
+
y_encoded = le.fit_transform(y)
|
|
541
|
+
|
|
542
|
+
# Scale data
|
|
543
|
+
scaler = StandardScaler()
|
|
544
|
+
X_scaled = scaler.fit_transform(X)
|
|
545
|
+
|
|
546
|
+
# Split
|
|
547
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
548
|
+
X_scaled, y_encoded, test_size=0.3, random_state=42
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
# ANN Model
|
|
552
|
+
ann = MLPClassifier(
|
|
553
|
+
hidden_layer_sizes=(8, 8),
|
|
554
|
+
activation='relu',
|
|
555
|
+
solver='adam',
|
|
556
|
+
max_iter=500,
|
|
557
|
+
random_state=42
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
# Train
|
|
561
|
+
ann.fit(X_train, y_train)
|
|
562
|
+
|
|
563
|
+
# Predict
|
|
564
|
+
y_pred = ann.predict(X_test)
|
|
565
|
+
|
|
566
|
+
# Evaluation
|
|
567
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
568
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
569
|
+
|
|
570
|
+
print("Accuracy:", accuracy)
|
|
571
|
+
print("Confusion Matrix:\n", cm)
|
|
572
|
+
|
|
573
|
+
print("\nClassification Report:\n",
|
|
574
|
+
classification_report(y_test, y_pred, target_names=le.classes_))
|
|
575
|
+
|
|
576
|
+
# Plot confusion matrix
|
|
577
|
+
plt.imshow(cm)
|
|
578
|
+
plt.title("Confusion Matrix")
|
|
579
|
+
plt.xlabel("Predicted")
|
|
580
|
+
plt.ylabel("Actual")
|
|
581
|
+
plt.colorbar()
|
|
582
|
+
plt.show()
|
|
583
|
+
''',
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
10: r'''
|
|
587
|
+
import numpy as np
|
|
588
|
+
import matplotlib.pyplot as plt
|
|
589
|
+
|
|
590
|
+
from tensorflow.keras.datasets import mnist
|
|
591
|
+
from tensorflow.keras.models import Sequential
|
|
592
|
+
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
|
|
593
|
+
from tensorflow.keras.utils import to_categorical
|
|
594
|
+
|
|
595
|
+
from sklearn.metrics import confusion_matrix
|
|
596
|
+
|
|
597
|
+
# Load dataset
|
|
598
|
+
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
|
599
|
+
|
|
600
|
+
# Reshape
|
|
601
|
+
X_train = X_train.reshape(-1, 28, 28, 1)
|
|
602
|
+
X_test = X_test.reshape(-1, 28, 28, 1)
|
|
603
|
+
|
|
604
|
+
# Normalize
|
|
605
|
+
X_train = X_train / 255.0
|
|
606
|
+
X_test = X_test / 255.0
|
|
607
|
+
|
|
608
|
+
# One-hot encoding
|
|
609
|
+
y_train_cat = to_categorical(y_train, 10)
|
|
610
|
+
y_test_cat = to_categorical(y_test, 10)
|
|
611
|
+
|
|
612
|
+
# Model
|
|
613
|
+
model = Sequential()
|
|
614
|
+
|
|
615
|
+
model.add(Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)))
|
|
616
|
+
model.add(MaxPooling2D((2,2)))
|
|
617
|
+
|
|
618
|
+
model.add(Conv2D(64, (3,3), activation='relu'))
|
|
619
|
+
model.add(MaxPooling2D((2,2)))
|
|
620
|
+
|
|
621
|
+
model.add(Flatten())
|
|
622
|
+
|
|
623
|
+
model.add(Dense(128, activation='relu'))
|
|
624
|
+
model.add(Dense(10, activation='softmax'))
|
|
625
|
+
|
|
626
|
+
# Compile
|
|
627
|
+
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
|
|
628
|
+
|
|
629
|
+
# Train
|
|
630
|
+
model.fit(X_train, y_train_cat, epochs=5, batch_size=64)
|
|
631
|
+
|
|
632
|
+
# Evaluate
|
|
633
|
+
loss, accuracy = model.evaluate(X_test, y_test_cat)
|
|
634
|
+
print("Accuracy:", accuracy)
|
|
635
|
+
|
|
636
|
+
# Predict
|
|
637
|
+
y_pred = model.predict(X_test)
|
|
638
|
+
y_pred_classes = np.argmax(y_pred, axis=1)
|
|
639
|
+
|
|
640
|
+
# Confusion matrix
|
|
641
|
+
cm = confusion_matrix(y_test, y_pred_classes)
|
|
642
|
+
print("Confusion Matrix:\n", cm)
|
|
643
|
+
|
|
644
|
+
# Plot
|
|
645
|
+
plt.imshow(cm)
|
|
646
|
+
plt.title("Confusion Matrix")
|
|
647
|
+
plt.xlabel("Predicted")
|
|
648
|
+
plt.ylabel("Actual")
|
|
649
|
+
plt.colorbar()
|
|
650
|
+
plt.show()
|
|
651
|
+
''',
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
scklearn/__init__.py,sha256=n6XbWVGPuKe6JYnnzMokRuVFGlRGxGYOufwVwxA7W2s,32
|
|
2
|
+
scklearn/printer.py,sha256=w0XGyi9pfz4nSR5RKkSs2O3s_rrnUvKTeZKfg59fqwA,166
|
|
3
|
+
scklearn/snippets.py,sha256=IFQCks5y7dNjC7O7k1M1MB7niCs6J6GzT-hkFJihv3w,15659
|
|
4
|
+
scklearn-0.0.1.dist-info/METADATA,sha256=9cBqbkZVvLZ7oCeCt5Xz7cCO2DLKtnt1JX7_tNOaYbA,187
|
|
5
|
+
scklearn-0.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
scklearn-0.0.1.dist-info/top_level.txt,sha256=y9aqJT1z0HrCSEn__9Iffc7z7H9t8JZgSGLlKCBHDe0,9
|
|
7
|
+
scklearn-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
scklearn
|