matplotliblib 0.1__tar.gz → 0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {matplotliblib-0.1 → matplotliblib-0.3}/PKG-INFO +2 -2
- {matplotliblib-0.1 → matplotliblib-0.3}/matplotliblib/__init__.py +2 -1
- {matplotliblib-0.1 → matplotliblib-0.3}/matplotliblib/main.py +292 -76
- {matplotliblib-0.1 → matplotliblib-0.3}/matplotliblib.egg-info/PKG-INFO +2 -2
- {matplotliblib-0.1 → matplotliblib-0.3}/setup.py +3 -3
- {matplotliblib-0.1 → matplotliblib-0.3}/README.md +0 -0
- {matplotliblib-0.1 → matplotliblib-0.3}/matplotliblib.egg-info/SOURCES.txt +0 -0
- {matplotliblib-0.1 → matplotliblib-0.3}/matplotliblib.egg-info/dependency_links.txt +0 -0
- {matplotliblib-0.1 → matplotliblib-0.3}/matplotliblib.egg-info/top_level.txt +0 -0
- {matplotliblib-0.1 → matplotliblib-0.3}/setup.cfg +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: matplotliblib
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3
|
|
4
4
|
Summary: Collection of machine learning tools
|
|
5
|
-
Author:
|
|
5
|
+
Author: Anonymus
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
7
7
|
Classifier: Operating System :: OS Independent
|
|
8
8
|
Requires-Python: >=3.7
|
|
@@ -17,35 +17,25 @@ print("Description")
|
|
|
17
17
|
df.describe()
|
|
18
18
|
|
|
19
19
|
print("Null values: \n", df.isnull().sum())
|
|
20
|
-
df.hist(figsize=(10,6))
|
|
21
|
-
plt.show()
|
|
22
20
|
|
|
23
|
-
|
|
24
|
-
#Hisogram:
|
|
25
21
|
df.hist(figsize=(10,6))
|
|
26
22
|
plt.show()
|
|
27
23
|
|
|
28
|
-
#Boxplot
|
|
29
24
|
sns.boxplot(data=df)
|
|
30
25
|
plt.show()
|
|
31
26
|
|
|
32
|
-
|
|
33
|
-
#Countplot
|
|
34
27
|
sns.countplot(x='species', data=df)
|
|
35
28
|
plt.show()
|
|
36
29
|
|
|
37
|
-
|
|
38
30
|
#Scatter Plot
|
|
39
31
|
plt.scatter(df['sepal_length'], df['sepal_width'])
|
|
40
32
|
plt.xlabel('Sepal Length')
|
|
41
33
|
plt.ylabel('Sepal Width')
|
|
42
34
|
plt.show()
|
|
43
35
|
|
|
44
|
-
#Pairplot
|
|
45
36
|
sns.pairplot(df, hue='species')
|
|
46
37
|
plt.show()
|
|
47
38
|
|
|
48
|
-
#Scatter Multiple
|
|
49
39
|
plt.figure(figsize=(10,6))
|
|
50
40
|
plt.scatter(df['petal_length'], df['sepal_length'], label='Sepal Length')
|
|
51
41
|
plt.scatter(df['petal_length'], df['sepal_width'], label='Sepal Width')
|
|
@@ -57,13 +47,11 @@ plt.title('Scatter Multiple Plot (Iris Dataset)')
|
|
|
57
47
|
plt.legend()
|
|
58
48
|
plt.show()
|
|
59
49
|
|
|
60
|
-
#Scatter Matrix
|
|
61
50
|
from pandas.plotting import scatter_matrix
|
|
62
51
|
plt.figure(figsize=(10,8))
|
|
63
52
|
scatter_matrix(df.iloc[:, :4], figsize=(10,8), diagonal='hist')
|
|
64
53
|
plt.show()
|
|
65
54
|
|
|
66
|
-
#Parallel Coordinates plot
|
|
67
55
|
from pandas.plotting import parallel_coordinates
|
|
68
56
|
|
|
69
57
|
plt.figure(figsize=(10,6))
|
|
@@ -93,7 +81,7 @@ plt.show()
|
|
|
93
81
|
|
|
94
82
|
def decisiontree():
|
|
95
83
|
print('''
|
|
96
|
-
|
|
84
|
+
import pandas as pd
|
|
97
85
|
import numpy as np
|
|
98
86
|
from sklearn.tree import DecisionTreeClassifier
|
|
99
87
|
from sklearn.model_selection import train_test_split
|
|
@@ -124,7 +112,7 @@ plot_tree(dt_model, feature_names=iris.feature_names,
|
|
|
124
112
|
class_names=iris.target_names, filled=True)
|
|
125
113
|
plt.show()
|
|
126
114
|
|
|
127
|
-
#
|
|
115
|
+
# mtcars dataset
|
|
128
116
|
import pandas as pd
|
|
129
117
|
import numpy as np
|
|
130
118
|
from sklearn.tree import DecisionTreeClassifier
|
|
@@ -192,7 +180,7 @@ plt.ylabel("Petal Width")
|
|
|
192
180
|
plt.title("K-Means Clustering on Iris Dataset")
|
|
193
181
|
plt.show()
|
|
194
182
|
|
|
195
|
-
# DBSCAN
|
|
183
|
+
# DBSCAN
|
|
196
184
|
import numpy as np
|
|
197
185
|
import matplotlib.pyplot as plt
|
|
198
186
|
from sklearn.datasets import load_iris
|
|
@@ -216,42 +204,30 @@ plt.xlabel("Sepal Length")
|
|
|
216
204
|
plt.ylabel("Sepal Width")
|
|
217
205
|
plt.title("DBSCAN Clustering on Iris Dataset")
|
|
218
206
|
plt.show()
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
207
|
''')
|
|
224
208
|
|
|
225
209
|
def regression():
|
|
226
210
|
print('''
|
|
227
|
-
#
|
|
228
|
-
|
|
229
|
-
# Basic operations
|
|
211
|
+
#Slr
|
|
230
212
|
import numpy as np
|
|
231
213
|
import pandas as pd
|
|
232
214
|
|
|
233
|
-
# Plotting
|
|
234
215
|
import matplotlib.pyplot as plt
|
|
235
216
|
import seaborn as sns
|
|
236
217
|
|
|
237
|
-
# sklearn for model building & evaluation
|
|
238
218
|
from sklearn.linear_model import LinearRegression
|
|
239
219
|
from sklearn.model_selection import train_test_split, cross_val_score, KFold
|
|
240
220
|
from sklearn.metrics import mean_squared_error, r2_score
|
|
241
221
|
|
|
242
|
-
#
|
|
222
|
+
#(p-values, std err)
|
|
243
223
|
import statsmodels.api as sm
|
|
244
|
-
|
|
245
|
-
# For VIF (multicollinearity)
|
|
246
224
|
from statsmodels.stats.outliers_influence import variance_inflation_factor
|
|
247
225
|
|
|
248
|
-
# For qqplot
|
|
249
226
|
import scipy.stats as stats
|
|
250
227
|
|
|
251
228
|
hours = np.array([1, 2, 4, 5, 5, 6])
|
|
252
229
|
score = np.array([64, 66, 76, 73, 74, 81])
|
|
253
230
|
|
|
254
|
-
# Put into a DataFrame for convenience
|
|
255
231
|
df = pd.DataFrame({"hours": hours, "score": score})
|
|
256
232
|
|
|
257
233
|
# Scatter plot with regression line visually
|
|
@@ -262,37 +238,32 @@ plt.xlabel("Hours studied")
|
|
|
262
238
|
plt.ylabel("Exam Score")
|
|
263
239
|
plt.show()
|
|
264
240
|
|
|
265
|
-
# Boxplot
|
|
241
|
+
# Boxplot
|
|
266
242
|
plt.figure()
|
|
267
243
|
sns.boxplot(y=df["score"])
|
|
268
244
|
plt.title("Boxplot of Exam Scores")
|
|
269
245
|
plt.show()
|
|
270
246
|
|
|
271
|
-
# Prepare X and y
|
|
272
247
|
X = df[["hours"]] # 2D array (n_samples, n_features)
|
|
273
248
|
y = df["score"]
|
|
274
249
|
|
|
275
|
-
# Fit model (sklearn)
|
|
276
250
|
lr = LinearRegression()
|
|
277
251
|
lr.fit(X, y)
|
|
278
252
|
|
|
279
|
-
# Coefficients
|
|
280
253
|
b0 = lr.intercept_
|
|
281
254
|
b1 = lr.coef_[0]
|
|
282
255
|
print(f"Intercept (b0): {b0:.4f}")
|
|
283
256
|
print(f"Slope (b1): {b1:.4f}")
|
|
284
257
|
|
|
285
|
-
|
|
286
|
-
X_sm = sm.add_constant(X) # adds column of 1s for intercept
|
|
258
|
+
X_sm = sm.add_constant(X)
|
|
287
259
|
model_sm = sm.OLS(y, X_sm).fit()
|
|
288
260
|
print(model_sm.summary())
|
|
289
261
|
|
|
290
|
-
#
|
|
262
|
+
#(single value)
|
|
291
263
|
new_hours = pd.DataFrame({"hours": [3]})
|
|
292
264
|
predicted_score = lr.predict(new_hours)
|
|
293
265
|
print("Predicted score for 3 hours:", predicted_score[0])
|
|
294
266
|
|
|
295
|
-
# For presentation: create a DataFrame with a range and show predictions
|
|
296
267
|
grid = pd.DataFrame({"hours": np.linspace(df.hours.min(), df.hours.max(), 50)})
|
|
297
268
|
grid["predicted_score"] = lr.predict(grid)
|
|
298
269
|
grid.head()
|
|
@@ -309,19 +280,19 @@ plt.ylabel("Residuals")
|
|
|
309
280
|
plt.title("Residuals vs Fitted")
|
|
310
281
|
plt.show()
|
|
311
282
|
|
|
312
|
-
#
|
|
283
|
+
#QQ
|
|
313
284
|
plt.figure()
|
|
314
285
|
sm.qqplot(df["residuals"], line='45', fit=True)
|
|
315
286
|
plt.title("Q-Q plot of residuals")
|
|
316
287
|
plt.show()
|
|
317
288
|
|
|
318
|
-
#
|
|
289
|
+
#Hist
|
|
319
290
|
plt.figure()
|
|
320
291
|
sns.histplot(df["residuals"], kde=True)
|
|
321
292
|
plt.title("Histogram of residuals")
|
|
322
293
|
plt.show()
|
|
323
294
|
|
|
324
|
-
#
|
|
295
|
+
#mlr
|
|
325
296
|
import numpy as np
|
|
326
297
|
import pandas as pd
|
|
327
298
|
import matplotlib.pyplot as plt
|
|
@@ -338,23 +309,21 @@ from statsmodels.stats.stattools import durbin_watson
|
|
|
338
309
|
import scipy.stats as stats
|
|
339
310
|
|
|
340
311
|
house = pd.read_csv("index.csv")
|
|
341
|
-
# Quick peek
|
|
342
312
|
house.head()
|
|
343
313
|
house.shape
|
|
344
314
|
|
|
345
|
-
house.info()
|
|
346
|
-
house.describe().T
|
|
347
|
-
house.columns
|
|
315
|
+
house.info()
|
|
316
|
+
house.describe().T
|
|
317
|
+
house.columns
|
|
348
318
|
|
|
349
|
-
# Check missing values
|
|
350
319
|
house.isnull().sum()
|
|
351
320
|
|
|
352
|
-
#
|
|
321
|
+
#Pairwiserelation
|
|
353
322
|
sns.pairplot(house[['death_rate','doctor_avail','hosp_avail','annual_income','density_per_capita']])
|
|
354
323
|
plt.suptitle("Pairwise plots", y=1.02)
|
|
355
324
|
plt.show()
|
|
356
325
|
|
|
357
|
-
# Correlation matrix
|
|
326
|
+
# Correlation matrix
|
|
358
327
|
corr = house[['death_rate','doctor_avail','hosp_avail','annual_income','density_per_capita']].corr()
|
|
359
328
|
print(corr)
|
|
360
329
|
sns.heatmap(corr, annot=True, fmt=".2f")
|
|
@@ -364,10 +333,8 @@ plt.show()
|
|
|
364
333
|
X = house[['death_rate','doctor_avail','hosp_avail','annual_income']]
|
|
365
334
|
y = house['density_per_capita']
|
|
366
335
|
|
|
367
|
-
# (Optional) Train-test split for validation
|
|
368
336
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
369
337
|
|
|
370
|
-
# Fit with sklearn (fast, gives coefficients)
|
|
371
338
|
lr = LinearRegression()
|
|
372
339
|
lr.fit(X_train, y_train)
|
|
373
340
|
|
|
@@ -376,20 +343,16 @@ print("Coefficients (sklearn):")
|
|
|
376
343
|
for name, coef in zip(X.columns, lr.coef_):
|
|
377
344
|
print(f" {name}: {coef:.6f}")
|
|
378
345
|
|
|
379
|
-
#
|
|
380
|
-
X_sm = sm.add_constant(X)
|
|
346
|
+
# p-values, std errors, R-squared etc.
|
|
347
|
+
X_sm = sm.add_constant(X)
|
|
381
348
|
model_sm = sm.OLS(y, X_sm).fit()
|
|
382
349
|
|
|
383
|
-
# Use statsmodels' summary (detailed regression table)
|
|
384
350
|
print(model_sm.summary())
|
|
385
351
|
|
|
386
|
-
# Predict on training data or new data
|
|
387
352
|
house['predicted'] = model_sm.predict(X_sm) # statsmodels predict expects same X with constant
|
|
388
353
|
|
|
389
|
-
# If using sklearn model (trained on X_train)
|
|
390
354
|
y_test_pred = lr.predict(X_test)
|
|
391
355
|
|
|
392
|
-
# Example: predict for a new observation (replace with real numbers)
|
|
393
356
|
new_obs = pd.DataFrame({'death_rate':[2.0], 'doctor_avail':[1.5], 'hosp_avail':[0.7], 'annual_income':[35000]})
|
|
394
357
|
pred_new = lr.predict(new_obs) # sklearn
|
|
395
358
|
print("Predicted density_per_capita for new_obs:", pred_new[0])
|
|
@@ -399,7 +362,6 @@ r2 = r2_score(y_test, y_test_pred)
|
|
|
399
362
|
print("Test RMSE:", rmse)
|
|
400
363
|
print("Test R2:", r2)
|
|
401
364
|
|
|
402
|
-
# Step 6 - Visualize the model
|
|
403
365
|
for col in X.columns:
|
|
404
366
|
plt.figure()
|
|
405
367
|
sns.scatterplot(x=house[col], y=house['density_per_capita'])
|
|
@@ -419,7 +381,7 @@ plt.ylabel('Actual')
|
|
|
419
381
|
plt.title('Actual vs Predicted')
|
|
420
382
|
plt.show()
|
|
421
383
|
|
|
422
|
-
# Residuals vs Fitted (
|
|
384
|
+
# Residuals vs Fitted (heteroscedasticity / non-linearity)
|
|
423
385
|
residuals = house['density_per_capita'] - house['predicted']
|
|
424
386
|
plt.figure()
|
|
425
387
|
sns.scatterplot(x=house['predicted'], y=residuals)
|
|
@@ -429,22 +391,17 @@ plt.ylabel('Residuals')
|
|
|
429
391
|
plt.title('Residuals vs Fitted')
|
|
430
392
|
plt.show()
|
|
431
393
|
|
|
432
|
-
# Q-Q Plot
|
|
394
|
+
# Q-Q Plot
|
|
433
395
|
sm.qqplot(residuals, line='45', fit=True)
|
|
434
396
|
plt.title('Q-Q plot of residuals')
|
|
435
397
|
plt.show()
|
|
436
398
|
|
|
437
|
-
# Shapiro test (numeric)
|
|
438
399
|
stat, pval = stats.shapiro(residuals)
|
|
439
400
|
print("Shapiro-Wilk: stat=%.4f, p=%.4f" % (stat, pval))
|
|
440
401
|
|
|
441
402
|
# Influence / Leverage plot (outliers & influential points)
|
|
442
403
|
sm.graphics.influence_plot(model_sm, criterion="cooks")
|
|
443
404
|
plt.show()
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
405
|
''')
|
|
449
406
|
|
|
450
407
|
def association():
|
|
@@ -477,8 +434,23 @@ rules = association_rules(
|
|
|
477
434
|
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
|
|
478
435
|
|
|
479
436
|
|
|
480
|
-
#
|
|
437
|
+
# FP-Growth
|
|
438
|
+
import pandas as pd
|
|
439
|
+
from mlxtend.preprocessing import TransactionEncoder
|
|
440
|
+
from mlxtend.frequent_patterns import apriori, association_rules
|
|
481
441
|
from mlxtend.frequent_patterns import fpgrowth
|
|
442
|
+
|
|
443
|
+
transactions = [
|
|
444
|
+
['Milk', 'Bread'],
|
|
445
|
+
['Bread', 'Diaper', 'Beer', 'Eggs'],
|
|
446
|
+
['Milk', 'Diaper', 'Beer', 'Cola'],
|
|
447
|
+
['Bread', 'Milk', 'Diaper', 'Beer'],
|
|
448
|
+
['Bread', 'Milk', 'Diaper', 'Cola']
|
|
449
|
+
]
|
|
450
|
+
|
|
451
|
+
te = TransactionEncoder()
|
|
452
|
+
te_array = te.fit(transactions).transform(transactions)
|
|
453
|
+
df = pd.DataFrame(te_array, columns=te.columns_)
|
|
482
454
|
print(df)
|
|
483
455
|
frequent_itemsets_fp = fpgrowth(df, min_support=0.4, use_colnames=True)
|
|
484
456
|
print(frequent_itemsets_fp)
|
|
@@ -621,33 +593,26 @@ plt.plot(forecast, label='Forecast', color='red')
|
|
|
621
593
|
plt.legend()
|
|
622
594
|
plt.show()
|
|
623
595
|
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
596
|
''')
|
|
629
597
|
|
|
630
598
|
def anomaly():
|
|
631
599
|
print('''
|
|
600
|
+
# Z-Score
|
|
632
601
|
import pandas as pd
|
|
633
602
|
from scipy.stats import zscore
|
|
634
603
|
|
|
635
|
-
# Salary data
|
|
636
604
|
df = pd.DataFrame({
|
|
637
605
|
'Employee_ID': ['E01','E02','E03','E04','E05','E06','E07'],
|
|
638
606
|
'Salary': [48000, 52000, 50500, 49200, 51000, 120000, 18000]
|
|
639
607
|
})
|
|
640
608
|
|
|
641
|
-
# Z-score calculation
|
|
642
609
|
df['Z_Score'] = zscore(df['Salary'])
|
|
643
610
|
print(df['Z_Score'])
|
|
644
611
|
|
|
645
|
-
# Detect outliers
|
|
646
612
|
outliers = df[abs(df['Z_Score']) > 2]
|
|
647
613
|
print(outliers)
|
|
648
614
|
|
|
649
615
|
# IQR Method
|
|
650
|
-
# Employee salary data
|
|
651
616
|
data = {
|
|
652
617
|
'Employee_ID': ['E01','E02','E03','E04','E05','E06','E07'],
|
|
653
618
|
'Salary': [48000, 52000, 50500, 49200, 51000, 120000, 18000]
|
|
@@ -655,18 +620,14 @@ data = {
|
|
|
655
620
|
|
|
656
621
|
df = pd.DataFrame(data)
|
|
657
622
|
|
|
658
|
-
# Calculate Q1 and Q3
|
|
659
623
|
Q1 = df['Salary'].quantile(0.25)
|
|
660
624
|
Q3 = df['Salary'].quantile(0.75)
|
|
661
625
|
|
|
662
|
-
# Calculate IQR
|
|
663
626
|
IQR = Q3 - Q1
|
|
664
627
|
|
|
665
|
-
# Define lower and upper bounds
|
|
666
628
|
lower_bound = Q1 - 1.5 * IQR
|
|
667
629
|
upper_bound = Q3 + 1.5 * IQR
|
|
668
630
|
|
|
669
|
-
# Detect outliers
|
|
670
631
|
outliers = df[
|
|
671
632
|
(df['Salary'] < lower_bound) |
|
|
672
633
|
(df['Salary'] > upper_bound)
|
|
@@ -677,8 +638,52 @@ print("Upper Bound:", upper_bound)
|
|
|
677
638
|
print("\nDetected Outliers:")
|
|
678
639
|
print(outliers)
|
|
679
640
|
|
|
641
|
+
#IsolationForest
|
|
642
|
+
from sklearn.ensemble import IsolationForest
|
|
643
|
+
import pandas as pd
|
|
644
|
+
|
|
645
|
+
df = pd.DataFrame({
|
|
646
|
+
'Amount': [2800, 3200, 3500, 2900, 3100, 18000, 50]
|
|
647
|
+
})
|
|
648
|
+
|
|
649
|
+
model = IsolationForest(contamination=0.2, random_state=42)
|
|
650
|
+
df['Anomaly'] = model.fit_predict(df)
|
|
651
|
+
|
|
652
|
+
print(df)
|
|
653
|
+
|
|
654
|
+
#KNN
|
|
655
|
+
from sklearn.neighbors import NearestNeighbors
|
|
656
|
+
import pandas as pd
|
|
657
|
+
|
|
658
|
+
df = pd.DataFrame({
|
|
659
|
+
'Delivery_Time': [46, 48, 50, 47, 49, 95, 10]
|
|
660
|
+
})
|
|
661
|
+
|
|
662
|
+
nbrs = NearestNeighbors(n_neighbors=3)
|
|
663
|
+
nbrs.fit(df)
|
|
664
|
+
|
|
665
|
+
distances, _ = nbrs.kneighbors(df)
|
|
666
|
+
df['Avg_Distance'] = distances.mean(axis=1)
|
|
667
|
+
|
|
668
|
+
threshold = df['Avg_Distance'].mean() + 2*df['Avg_Distance'].std()
|
|
669
|
+
outliers = df[df['Avg_Distance'] > threshold]
|
|
670
|
+
|
|
671
|
+
print(outliers)
|
|
672
|
+
|
|
673
|
+
#LOF
|
|
674
|
+
from sklearn.neighbors import LocalOutlierFactor
|
|
675
|
+
import pandas as pd
|
|
676
|
+
|
|
677
|
+
df = pd.DataFrame({
|
|
678
|
+
'Spending_Score': [45, 50, 48, 52, 49, 98, 5],
|
|
679
|
+
'Purchase_Frequency': [18, 20, 22, 19, 21, 65, 2]
|
|
680
|
+
})
|
|
680
681
|
|
|
682
|
+
lof = LocalOutlierFactor(n_neighbors=3)
|
|
683
|
+
df['Outlier'] = lof.fit_predict(df)
|
|
681
684
|
|
|
685
|
+
outliers = df[df['Outlier'] == -1]
|
|
686
|
+
print(outliers)
|
|
682
687
|
''')
|
|
683
688
|
|
|
684
689
|
def text():
|
|
@@ -783,3 +788,214 @@ print(similarity)
|
|
|
783
788
|
|
|
784
789
|
''')
|
|
785
790
|
|
|
791
|
+
def associationm():
|
|
792
|
+
print('''
|
|
793
|
+
# apriori Algorithm
|
|
794
|
+
transactions = [
|
|
795
|
+
{'Milk', 'Bread'},
|
|
796
|
+
{'Bread', 'Diaper', 'Beer', 'Eggs'},
|
|
797
|
+
{'Milk', 'Diaper', 'Beer', 'Cola'},
|
|
798
|
+
{'Bread', 'Milk', 'Diaper', 'Beer'},
|
|
799
|
+
{'Bread', 'Milk', 'Diaper', 'Cola'}
|
|
800
|
+
]
|
|
801
|
+
|
|
802
|
+
total_transactions = len(transactions)
|
|
803
|
+
min_support = 0.4
|
|
804
|
+
min_confidence = 0.6
|
|
805
|
+
|
|
806
|
+
def calculate_support(itemset):
|
|
807
|
+
count = 0
|
|
808
|
+
for transaction in transactions:
|
|
809
|
+
if itemset.issubset(transaction):
|
|
810
|
+
count += 1
|
|
811
|
+
return count / total_transactions
|
|
812
|
+
|
|
813
|
+
items = set().union(*transactions)
|
|
814
|
+
frequent_1 = {}
|
|
815
|
+
|
|
816
|
+
for item in items:
|
|
817
|
+
sup = calculate_support({item})
|
|
818
|
+
if sup >= min_support:
|
|
819
|
+
frequent_1[frozenset([item])] = sup
|
|
820
|
+
|
|
821
|
+
print("Frequent 1-Itemsets:")
|
|
822
|
+
for k, v in frequent_1.items():
|
|
823
|
+
print(set(k), ":", v)
|
|
824
|
+
|
|
825
|
+
from itertools import combinations
|
|
826
|
+
|
|
827
|
+
frequent_2 = {}
|
|
828
|
+
|
|
829
|
+
for pair in combinations(items, 2):
|
|
830
|
+
sup = calculate_support(set(pair))
|
|
831
|
+
if sup >= min_support:
|
|
832
|
+
frequent_2[frozenset(pair)] = sup
|
|
833
|
+
|
|
834
|
+
print("\nFrequent 2-Itemsets:")
|
|
835
|
+
for k, v in frequent_2.items():
|
|
836
|
+
print(set(k), ":", v)
|
|
837
|
+
|
|
838
|
+
frequent_3 = {}
|
|
839
|
+
|
|
840
|
+
for triple in combinations(items, 3):
|
|
841
|
+
sup = calculate_support(set(triple))
|
|
842
|
+
if sup >= min_support:
|
|
843
|
+
frequent_3[frozenset(triple)] = sup
|
|
844
|
+
|
|
845
|
+
print("\nFrequent 3-Itemsets:")
|
|
846
|
+
for k, v in frequent_3.items():
|
|
847
|
+
print(set(k), ":", v)
|
|
848
|
+
|
|
849
|
+
def calculate_confidence(A, B):
|
|
850
|
+
return calculate_support(A.union(B)) / calculate_support(A)
|
|
851
|
+
|
|
852
|
+
rules = []
|
|
853
|
+
|
|
854
|
+
for itemset in frequent_2:
|
|
855
|
+
for item in itemset:
|
|
856
|
+
A = frozenset([item])
|
|
857
|
+
B = itemset - A
|
|
858
|
+
conf = calculate_confidence(set(A), set(B))
|
|
859
|
+
if conf >= min_confidence:
|
|
860
|
+
rules.append((A, B, conf))
|
|
861
|
+
|
|
862
|
+
print("\nAssociation Rules:")
|
|
863
|
+
for rule in rules:
|
|
864
|
+
print(set(rule[0]), "→", set(rule[1]), "Confidence:", rule[2])
|
|
865
|
+
|
|
866
|
+
def calculate_lift(A, B):
|
|
867
|
+
return calculate_confidence(A, B) / calculate_support(B)
|
|
868
|
+
|
|
869
|
+
print("\nAssociation Rules with Lift:")
|
|
870
|
+
for rule in rules:
|
|
871
|
+
lift = calculate_lift(set(rule[0]), set(rule[1]))
|
|
872
|
+
print(set(rule[0]), "→", set(rule[1]), "Lift:", lift)
|
|
873
|
+
|
|
874
|
+
# FP-Growth Algorithm
|
|
875
|
+
transactions = [
|
|
876
|
+
['Milk', 'Bread'],
|
|
877
|
+
['Bread', 'Diaper', 'Beer', 'Eggs'],
|
|
878
|
+
['Milk', 'Diaper', 'Beer', 'Cola'],
|
|
879
|
+
['Bread', 'Milk', 'Diaper', 'Beer'],
|
|
880
|
+
['Bread', 'Milk', 'Diaper', 'Cola']
|
|
881
|
+
]
|
|
882
|
+
|
|
883
|
+
min_support = 2
|
|
884
|
+
|
|
885
|
+
from collections import defaultdict
|
|
886
|
+
|
|
887
|
+
item_count = defaultdict(int)
|
|
888
|
+
|
|
889
|
+
for transaction in transactions:
|
|
890
|
+
for item in transaction:
|
|
891
|
+
item_count[item] += 1
|
|
892
|
+
|
|
893
|
+
print("Item Frequencies:")
|
|
894
|
+
for item, count in item_count.items():
|
|
895
|
+
print(item, ":", count)
|
|
896
|
+
|
|
897
|
+
frequent_items = {item: count for item, count in item_count.items()
|
|
898
|
+
if count >= min_support}
|
|
899
|
+
|
|
900
|
+
print("\nFrequent Items:")
|
|
901
|
+
print(frequent_items)
|
|
902
|
+
|
|
903
|
+
sorted_transactions = []
|
|
904
|
+
|
|
905
|
+
for transaction in transactions:
|
|
906
|
+
filtered = [item for item in transaction if item in frequent_items]
|
|
907
|
+
sorted_trans = sorted(filtered,
|
|
908
|
+
key=lambda x: frequent_items[x],
|
|
909
|
+
reverse=True)
|
|
910
|
+
sorted_transactions.append(sorted_trans)
|
|
911
|
+
|
|
912
|
+
print("\nSorted Transactions:")
|
|
913
|
+
for t in sorted_transactions:
|
|
914
|
+
print(t)
|
|
915
|
+
|
|
916
|
+
class FPNode:
|
|
917
|
+
def __init__(self, item, count, parent):
|
|
918
|
+
self.item = item
|
|
919
|
+
self.count = count
|
|
920
|
+
self.parent = parent
|
|
921
|
+
self.children = {}
|
|
922
|
+
|
|
923
|
+
root = FPNode(None, 0, None)
|
|
924
|
+
|
|
925
|
+
def insert_tree(transaction, node):
|
|
926
|
+
if len(transaction) == 0:
|
|
927
|
+
return
|
|
928
|
+
first = transaction[0]
|
|
929
|
+
if first in node.children:
|
|
930
|
+
node.children[first].count += 1
|
|
931
|
+
else:
|
|
932
|
+
node.children[first] = FPNode(first, 1, node)
|
|
933
|
+
insert_tree(transaction[1:], node.children[first])
|
|
934
|
+
|
|
935
|
+
for transaction in sorted_transactions:
|
|
936
|
+
insert_tree(transaction, root)
|
|
937
|
+
|
|
938
|
+
def display_tree(node, indent=0):
|
|
939
|
+
for child in node.children.values():
|
|
940
|
+
print(" " * indent, child.item, ":", child.count)
|
|
941
|
+
display_tree(child, indent + 4)
|
|
942
|
+
|
|
943
|
+
print("\nFP-Tree:")
|
|
944
|
+
display_tree(root)
|
|
945
|
+
|
|
946
|
+
frequent_patterns = {}
|
|
947
|
+
|
|
948
|
+
for item, count in frequent_items.items():
|
|
949
|
+
frequent_patterns[(item,)] = count
|
|
950
|
+
|
|
951
|
+
print("\nFrequent Patterns:")
|
|
952
|
+
for pattern, count in frequent_patterns.items():
|
|
953
|
+
print(pattern, ":", count)
|
|
954
|
+
|
|
955
|
+
''')
|
|
956
|
+
|
|
957
|
+
def logreg():
|
|
958
|
+
print('''
|
|
959
|
+
import pandas as pd
|
|
960
|
+
import numpy as np
|
|
961
|
+
import seaborn as sns
|
|
962
|
+
import matplotlib.pyplot as plt
|
|
963
|
+
from sklearn.model_selection import train_test_split
|
|
964
|
+
from sklearn.linear_model import LogisticRegression
|
|
965
|
+
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
|
|
966
|
+
|
|
967
|
+
df = sns.load_dataset("titanic")
|
|
968
|
+
|
|
969
|
+
df = df[["survived", "pclass", "sex", "age", "sibsp", "parch", "fare"]]
|
|
970
|
+
|
|
971
|
+
df["age"] = df["age"].fillna(df["age"].median())
|
|
972
|
+
|
|
973
|
+
df["sex"] = df["sex"].map({"male": 0, "female": 1})
|
|
974
|
+
|
|
975
|
+
X = df.drop("survived", axis=1)
|
|
976
|
+
y = df["survived"]
|
|
977
|
+
|
|
978
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
979
|
+
|
|
980
|
+
model = LogisticRegression(max_iter=500)
|
|
981
|
+
model.fit(X_train, y_train)
|
|
982
|
+
|
|
983
|
+
y_pred = model.predict(X_test)
|
|
984
|
+
|
|
985
|
+
acc = accuracy_score(y_test, y_pred)
|
|
986
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
987
|
+
report = classification_report(y_test, y_pred)
|
|
988
|
+
|
|
989
|
+
print("Accuracy:", acc)
|
|
990
|
+
print("\nConfusion Matrix:\n", cm)
|
|
991
|
+
print("\nClassification Report:\n", report)
|
|
992
|
+
|
|
993
|
+
plt.figure(figsize=(6, 4))
|
|
994
|
+
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False,
|
|
995
|
+
xticklabels=["Predicted 0", "Predicted 1"],
|
|
996
|
+
yticklabels=["Actual 0", "Actual 1"])
|
|
997
|
+
plt.title("Confusion Matrix")
|
|
998
|
+
plt.ylabel("Actual")
|
|
999
|
+
plt.xlabel("Predicted")
|
|
1000
|
+
plt.show()
|
|
1001
|
+
''')
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: matplotliblib
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3
|
|
4
4
|
Summary: Collection of machine learning tools
|
|
5
|
-
Author:
|
|
5
|
+
Author: Anonymus
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
7
7
|
Classifier: Operating System :: OS Independent
|
|
8
8
|
Requires-Python: >=3.7
|
|
@@ -2,10 +2,10 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="matplotliblib",
|
|
5
|
-
version="0.
|
|
5
|
+
version="0.3",
|
|
6
6
|
packages=find_packages(),
|
|
7
7
|
|
|
8
|
-
author="
|
|
8
|
+
author="Anonymus",
|
|
9
9
|
description="Collection of machine learning tools",
|
|
10
10
|
|
|
11
11
|
python_requires=">=3.7",
|
|
@@ -14,4 +14,4 @@ setup(
|
|
|
14
14
|
"Programming Language :: Python :: 3",
|
|
15
15
|
"Operating System :: OS Independent",
|
|
16
16
|
],
|
|
17
|
-
)
|
|
17
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|