matplotliblib 0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: matplotliblib
3
+ Version: 0.1
4
+ Summary: Collection of machine learning tools
5
+ Author: .
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: >=3.7
9
+ Dynamic: author
10
+ Dynamic: classifier
11
+ Dynamic: requires-python
12
+ Dynamic: summary
File without changes
@@ -0,0 +1,14 @@
1
+ from .main import *
2
+
3
+ __all__ = [
4
+ "data",
5
+ "decisiontree",
6
+ "kmeans",
7
+ "regression",
8
+ "association",
9
+ "cnn",
10
+ "recommendation",
11
+ "timeseries",
12
+ "anomaly",
13
+ "text"
14
+ ]
@@ -0,0 +1,785 @@
1
+ def data():
2
+ print('''
3
+ import pandas as pd
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+
7
+ df = sns.load_dataset('iris')
8
+
9
+ print(df.head())
10
+ print(df.tail())
11
+
12
+ print(help(df))
13
+ print("Information:")
14
+ df.info()
15
+
16
+ print("Description")
17
+ df.describe()
18
+
19
+ print("Null values: \n", df.isnull().sum())
20
+ df.hist(figsize=(10,6))
21
+ plt.show()
22
+
23
+
24
+ #Hisogram:
25
+ df.hist(figsize=(10,6))
26
+ plt.show()
27
+
28
+ #Boxplot
29
+ sns.boxplot(data=df)
30
+ plt.show()
31
+
32
+
33
+ #Countplot
34
+ sns.countplot(x='species', data=df)
35
+ plt.show()
36
+
37
+
38
+ #Scatter Plot
39
+ plt.scatter(df['sepal_length'], df['sepal_width'])
40
+ plt.xlabel('Sepal Length')
41
+ plt.ylabel('Sepal Width')
42
+ plt.show()
43
+
44
+ #Pairplot
45
+ sns.pairplot(df, hue='species')
46
+ plt.show()
47
+
48
+ #Scatter Multiple
49
+ plt.figure(figsize=(10,6))
50
+ plt.scatter(df['petal_length'], df['sepal_length'], label='Sepal Length')
51
+ plt.scatter(df['petal_length'], df['sepal_width'], label='Sepal Width')
52
+ plt.scatter(df['petal_length'], df['petal_width'], label='Petal Width')
53
+
54
+ plt.xlabel('Petal Length')
55
+ plt.ylabel('Values of Other Attributes')
56
+ plt.title('Scatter Multiple Plot (Iris Dataset)')
57
+ plt.legend()
58
+ plt.show()
59
+
60
+ #Scatter Matrix
61
+ from pandas.plotting import scatter_matrix
62
+ plt.figure(figsize=(10,8))
63
+ scatter_matrix(df.iloc[:, :4], figsize=(10,8), diagonal='hist')
64
+ plt.show()
65
+
66
+ #Parallel Coordinates plot
67
+ from pandas.plotting import parallel_coordinates
68
+
69
+ plt.figure(figsize=(10,6))
70
+ parallel_coordinates(df, 'species')
71
+ plt.title('Parallel Coordinates Plot - Iris Dataset')
72
+ plt.show()
73
+
74
+ #Deviation Chart (from mean)
75
+ mean_vals = df.iloc[:, :4].mean()
76
+
77
+ plt.figure(figsize=(10,6))
78
+ plt.plot(df.iloc[:, :4] - mean_vals)
79
+ plt.title('Deviation Chart - Deviation from Mean')
80
+ plt.xlabel('Record Index')
81
+ plt.ylabel('Deviation')
82
+ plt.show()
83
+
84
+
85
+ #Andrews Curve
86
+ from pandas.plotting import andrews_curves
87
+
88
+ plt.figure(figsize=(10,6))
89
+ andrews_curves(df, 'species')
90
+ plt.title('Andrews Curves - Iris Dataset')
91
+ plt.show()
92
+ ''')
93
+
94
+ def decisiontree():
95
+ print('''
96
+ import pandas as pd
97
+ import numpy as np
98
+ from sklearn.tree import DecisionTreeClassifier
99
+ from sklearn.model_selection import train_test_split
100
+ from sklearn.metrics import accuracy_score, classification_report
101
+
102
+ from sklearn.datasets import load_iris
103
+
104
+ iris = load_iris()
105
+ X = iris.data
106
+ y = iris.target
107
+
108
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
109
+ dt_model = DecisionTreeClassifier(criterion="gini", max_depth=3)
110
+ dt_model.fit(X_train, y_train)
111
+ y_pred = dt_model.predict(X_test)
112
+ print(y_pred)
113
+
114
+ accuracy = accuracy_score(y_test, y_pred)
115
+ print("Accuracy:", accuracy)
116
+
117
+ print(classification_report(y_test, y_pred))
118
+
119
+ from sklearn.tree import plot_tree
120
+ import matplotlib.pyplot as plt
121
+
122
+ plt.figure(figsize=(12,8))
123
+ plot_tree(dt_model, feature_names=iris.feature_names,
124
+ class_names=iris.target_names, filled=True)
125
+ plt.show()
126
+
127
+ # Create decision tree model on mtcars dataset
128
+ import pandas as pd
129
+ import numpy as np
130
+ from sklearn.tree import DecisionTreeClassifier
131
+ from sklearn.model_selection import train_test_split
132
+ from sklearn.metrics import accuracy_score, classification_report
133
+
134
+ mtcars = pd.read_csv("mtcars.csv")
135
+ mtcars.head()
136
+
137
+ mtcars = mtcars.drop(columns=['model'])
138
+ X = mtcars.drop('am', axis=1)
139
+ y = mtcars['am']
140
+
141
+
142
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
143
+ dt_model = DecisionTreeClassifier(criterion="gini",max_depth=3)
144
+ dt_model.fit(X_train, y_train)
145
+ y_pred = dt_model.predict(X_test)
146
+
147
+ accuracy = accuracy_score(y_test, y_pred)
148
+ print("Accuracy:", accuracy)
149
+
150
+ print(classification_report(y_test, y_pred))
151
+
152
+
153
+ from sklearn.tree import plot_tree
154
+ import matplotlib.pyplot as plt
155
+
156
+ plt.figure(figsize=(14,8))
157
+ plot_tree(
158
+ dt_model,
159
+ feature_names=X.columns,
160
+ class_names=["Automatic", "Manual"],
161
+ filled=True
162
+ )
163
+ plt.show()
164
+
165
+
166
+ ''')
167
+
168
+ def kmeans():
169
+ print('''
170
+ import pandas as pd
171
+ import matplotlib.pyplot as plt
172
+ from sklearn.datasets import load_iris
173
+ from sklearn.cluster import KMeans
174
+
175
+ iris = load_iris()
176
+ X = iris.data
177
+
178
+ k = 3
179
+
180
+ kmeans = KMeans(n_clusters=3, random_state=0)
181
+ kmeans.fit(X)
182
+
183
+ labels = kmeans.labels_
184
+ centroids = kmeans.cluster_centers_
185
+ print(labels)
186
+ print(centroids)
187
+
188
+ plt.scatter(X[:,2], X[:,3], c=labels)
189
+ plt.scatter(centroids[:,2], centroids[:,3], marker='X')
190
+ plt.xlabel("Petal Length")
191
+ plt.ylabel("Petal Width")
192
+ plt.title("K-Means Clustering on Iris Dataset")
193
+ plt.show()
194
+
195
+ # DBSCAN using Iris Dataset
196
+ import numpy as np
197
+ import matplotlib.pyplot as plt
198
+ from sklearn.datasets import load_iris
199
+ from sklearn.cluster import DBSCAN
200
+ from sklearn.preprocessing import StandardScaler
201
+
202
+ iris = load_iris()
203
+ X = iris.data
204
+
205
+ scaler = StandardScaler()
206
+ X_scaled = scaler.fit_transform(X)
207
+
208
+ dbscan = DBSCAN(eps=0.6, min_samples=5)
209
+
210
+ labels = dbscan.fit_predict(X_scaled)
211
+
212
+ print(np.unique(labels))
213
+
214
+ plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=labels)
215
+ plt.xlabel("Sepal Length")
216
+ plt.ylabel("Sepal Width")
217
+ plt.title("DBSCAN Clustering on Iris Dataset")
218
+ plt.show()
219
+
220
+
221
+
222
+
223
+ ''')
224
+
225
+ def regression():
226
+ print('''
227
+ #Simple Linear Regression
228
+
229
+ # Basic operations
230
+ import numpy as np
231
+ import pandas as pd
232
+
233
+ # Plotting
234
+ import matplotlib.pyplot as plt
235
+ import seaborn as sns
236
+
237
+ # sklearn for model building & evaluation
238
+ from sklearn.linear_model import LinearRegression
239
+ from sklearn.model_selection import train_test_split, cross_val_score, KFold
240
+ from sklearn.metrics import mean_squared_error, r2_score
241
+
242
+ # statsmodels for detailed regression output (p-values, std err, etc.)
243
+ import statsmodels.api as sm
244
+
245
+ # For VIF (multicollinearity)
246
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
247
+
248
+ # For qqplot
249
+ import scipy.stats as stats
250
+
251
+ hours = np.array([1, 2, 4, 5, 5, 6])
252
+ score = np.array([64, 66, 76, 73, 74, 81])
253
+
254
+ # Put into a DataFrame for convenience
255
+ df = pd.DataFrame({"hours": hours, "score": score})
256
+
257
+ # Scatter plot with regression line visually
258
+ sns.scatterplot(x="hours", y="score", data=df, s=80)
259
+ sns.regplot(x="hours", y="score", data=df, ci=None, scatter=False) # fitted line
260
+ plt.title("Hours studied vs Exam Score")
261
+ plt.xlabel("Hours studied")
262
+ plt.ylabel("Exam Score")
263
+ plt.show()
264
+
265
+ # Boxplot to check distribution & outliers (response variable)
266
+ plt.figure()
267
+ sns.boxplot(y=df["score"])
268
+ plt.title("Boxplot of Exam Scores")
269
+ plt.show()
270
+
271
+ # Prepare X and y
272
+ X = df[["hours"]] # 2D array (n_samples, n_features)
273
+ y = df["score"]
274
+
275
+ # Fit model (sklearn)
276
+ lr = LinearRegression()
277
+ lr.fit(X, y)
278
+
279
+ # Coefficients
280
+ b0 = lr.intercept_
281
+ b1 = lr.coef_[0]
282
+ print(f"Intercept (b0): {b0:.4f}")
283
+ print(f"Slope (b1): {b1:.4f}")
284
+
285
+ # Add constant for intercept term
286
+ X_sm = sm.add_constant(X) # adds column of 1s for intercept
287
+ model_sm = sm.OLS(y, X_sm).fit()
288
+ print(model_sm.summary())
289
+
290
+ # Predict with sklearn model (single value)
291
+ new_hours = pd.DataFrame({"hours": [3]})
292
+ predicted_score = lr.predict(new_hours)
293
+ print("Predicted score for 3 hours:", predicted_score[0])
294
+
295
+ # For presentation: create a DataFrame with a range and show predictions
296
+ grid = pd.DataFrame({"hours": np.linspace(df.hours.min(), df.hours.max(), 50)})
297
+ grid["predicted_score"] = lr.predict(grid)
298
+ grid.head()
299
+
300
+ df["predicted"] = lr.predict(X)
301
+ df["residuals"] = df["score"] - df["predicted"]
302
+
303
+ # Residual vs Fitted
304
+ plt.figure()
305
+ plt.scatter(df["predicted"], df["residuals"], s=60)
306
+ plt.axhline(0, linestyle='--')
307
+ plt.xlabel("Fitted values")
308
+ plt.ylabel("Residuals")
309
+ plt.title("Residuals vs Fitted")
310
+ plt.show()
311
+
312
+ # QQ-plot of residuals
313
+ plt.figure()
314
+ sm.qqplot(df["residuals"], line='45', fit=True)
315
+ plt.title("Q-Q plot of residuals")
316
+ plt.show()
317
+
318
+ # Histogram of residuals (optional)
319
+ plt.figure()
320
+ sns.histplot(df["residuals"], kde=True)
321
+ plt.title("Histogram of residuals")
322
+ plt.show()
323
+
324
+ # Multiple Linear Regression
325
+ import numpy as np
326
+ import pandas as pd
327
+ import matplotlib.pyplot as plt
328
+ import seaborn as sns
329
+
330
+ from sklearn.linear_model import LinearRegression
331
+ from sklearn.metrics import mean_squared_error, r2_score
332
+ from sklearn.model_selection import train_test_split, KFold, cross_val_score
333
+
334
+ import statsmodels.api as sm
335
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
336
+ from statsmodels.stats.diagnostic import het_breuschpagan
337
+ from statsmodels.stats.stattools import durbin_watson
338
+ import scipy.stats as stats
339
+
340
+ house = pd.read_csv("index.csv")
341
+ # Quick peek
342
+ house.head()
343
+ house.shape
344
+
345
+ house.info() # data types and non-null counts
346
+ house.describe().T # mean, std, min, max, quartiles
347
+ house.columns # column names
348
+
349
+ # Check missing values
350
+ house.isnull().sum()
351
+
352
+ # Pairwise relationships
353
+ sns.pairplot(house[['death_rate','doctor_avail','hosp_avail','annual_income','density_per_capita']])
354
+ plt.suptitle("Pairwise plots", y=1.02)
355
+ plt.show()
356
+
357
+ # Correlation matrix (helpful for collinearity)
358
+ corr = house[['death_rate','doctor_avail','hosp_avail','annual_income','density_per_capita']].corr()
359
+ print(corr)
360
+ sns.heatmap(corr, annot=True, fmt=".2f")
361
+ plt.title("Correlation matrix")
362
+ plt.show()
363
+
364
+ X = house[['death_rate','doctor_avail','hosp_avail','annual_income']]
365
+ y = house['density_per_capita']
366
+
367
+ # (Optional) Train-test split for validation
368
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
369
+
370
+ # Fit with sklearn (fast, gives coefficients)
371
+ lr = LinearRegression()
372
+ lr.fit(X_train, y_train)
373
+
374
+ print("Intercept (sklearn):", lr.intercept_)
375
+ print("Coefficients (sklearn):")
376
+ for name, coef in zip(X.columns, lr.coef_):
377
+ print(f" {name}: {coef:.6f}")
378
+
379
+ # Fit with statsmodels to get p-values, std errors, R-squared etc.
380
+ X_sm = sm.add_constant(X) # adds intercept column
381
+ model_sm = sm.OLS(y, X_sm).fit()
382
+
383
+ # Use statsmodels' summary (detailed regression table)
384
+ print(model_sm.summary())
385
+
386
+ # Predict on training data or new data
387
+ house['predicted'] = model_sm.predict(X_sm) # statsmodels predict expects same X with constant
388
+
389
+ # If using sklearn model (trained on X_train)
390
+ y_test_pred = lr.predict(X_test)
391
+
392
+ # Example: predict for a new observation (replace with real numbers)
393
+ new_obs = pd.DataFrame({'death_rate':[2.0], 'doctor_avail':[1.5], 'hosp_avail':[0.7], 'annual_income':[35000]})
394
+ pred_new = lr.predict(new_obs) # sklearn
395
+ print("Predicted density_per_capita for new_obs:", pred_new[0])
396
+
397
+ rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
398
+ r2 = r2_score(y_test, y_test_pred)
399
+ print("Test RMSE:", rmse)
400
+ print("Test R2:", r2)
401
+
402
+ # Step 6 - Visualize the model
403
+ for col in X.columns:
404
+ plt.figure()
405
+ sns.scatterplot(x=house[col], y=house['density_per_capita'])
406
+ sns.regplot(x=house[col], y=house['density_per_capita'], ci=None, scatter=False)
407
+ plt.xlabel(col)
408
+ plt.ylabel('density_per_capita')
409
+ plt.title(f'density_per_capita vs {col}')
410
+ plt.show()
411
+
412
+ # Actual Vs Predicted
413
+ plt.figure()
414
+ sns.scatterplot(x=house['predicted'], y=house['density_per_capita'])
415
+ plt.plot([house['predicted'].min(), house['predicted'].max()],
416
+ [house['predicted'].min(), house['predicted'].max()], color='red', linestyle='--')
417
+ plt.xlabel('Predicted')
418
+ plt.ylabel('Actual')
419
+ plt.title('Actual vs Predicted')
420
+ plt.show()
421
+
422
+ # Residuals vs Fitted (diagnose heteroscedasticity / non-linearity)
423
+ residuals = house['density_per_capita'] - house['predicted']
424
+ plt.figure()
425
+ sns.scatterplot(x=house['predicted'], y=residuals)
426
+ plt.axhline(0, color='red', linestyle='--')
427
+ plt.xlabel('Fitted values')
428
+ plt.ylabel('Residuals')
429
+ plt.title('Residuals vs Fitted')
430
+ plt.show()
431
+
432
+ # Q-Q Plot for residuals (normality)
433
+ sm.qqplot(residuals, line='45', fit=True)
434
+ plt.title('Q-Q plot of residuals')
435
+ plt.show()
436
+
437
+ # Shapiro test (numeric)
438
+ stat, pval = stats.shapiro(residuals)
439
+ print("Shapiro-Wilk: stat=%.4f, p=%.4f" % (stat, pval))
440
+
441
+ # Influence / Leverage plot (outliers & influential points)
442
+ sm.graphics.influence_plot(model_sm, criterion="cooks")
443
+ plt.show()
444
+
445
+
446
+
447
+
448
+ ''')
449
+
450
+ def association():
451
+ print('''
452
+ import pandas as pd
453
+ from mlxtend.preprocessing import TransactionEncoder
454
+ from mlxtend.frequent_patterns import apriori, association_rules
455
+
456
+ transactions = [
457
+ ['Milk', 'Bread'],
458
+ ['Bread', 'Diaper', 'Beer', 'Eggs'],
459
+ ['Milk', 'Diaper', 'Beer', 'Cola'],
460
+ ['Bread', 'Milk', 'Diaper', 'Beer'],
461
+ ['Bread', 'Milk', 'Diaper', 'Cola']
462
+ ]
463
+
464
+ te = TransactionEncoder()
465
+ te_array = te.fit(transactions).transform(transactions)
466
+ df = pd.DataFrame(te_array, columns=te.columns_)
467
+ print(df)
468
+
469
+ frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)
470
+ print(frequent_itemsets)
471
+
472
+ rules = association_rules(
473
+ frequent_itemsets,
474
+ metric="confidence",
475
+ min_threshold=0.6
476
+ )
477
+ print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
478
+
479
+
480
+ # USING FP-Growth ALgorithm
481
+ from mlxtend.frequent_patterns import fpgrowth
482
+ print(df)
483
+ frequent_itemsets_fp = fpgrowth(df, min_support=0.4, use_colnames=True)
484
+ print(frequent_itemsets_fp)
485
+ rules_fp = association_rules(
486
+ frequent_itemsets_fp,
487
+ metric="confidence",
488
+ min_threshold=0.6
489
+ )
490
+ print(rules_fp[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
491
+
492
+
493
+ ''')
494
+
495
+ def cnn():
496
+ print('''
497
+ import tensorflow as tf
498
+ from tensorflow import keras
499
+ from tensorflow.keras import layers
500
+ import matplotlib.pyplot as plt
501
+
502
+ (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
503
+
504
+ x_train = x_train / 255.0
505
+ x_test = x_test / 255.0
506
+
507
+ x_train = x_train.reshape(-1,28,28,1)
508
+ x_test = x_test.reshape(-1,28,28,1)
509
+
510
+ model = keras.Sequential([
511
+ layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
512
+ layers.MaxPooling2D((2,2)),
513
+ layers.Flatten(),
514
+ layers.Dense(64, activation='relu'),
515
+ layers.Dense(10, activation='softmax')
516
+ ])
517
+
518
+ model.compile(optimizer='adam',
519
+ loss='sparse_categorical_crossentropy',
520
+ metrics=['accuracy'])
521
+
522
+ model.fit(x_train, y_train, epochs=5, validation_split=0.1)
523
+
524
+ test_loss, test_acc = model.evaluate(x_test, y_test)
525
+ print("Test Accuracy:", test_acc)
526
+
527
+ plt.imshow(x_test[0].reshape(28,28), cmap='gray')
528
+ plt.show()
529
+
530
+ prediction = model.predict(x_test[0].reshape(1,28,28,1))
531
+ print("Predicted Digit:", prediction.argmax())
532
+
533
+
534
+ ''')
535
+
536
+ def recommendation():
537
+ print('''
538
+ import pandas as pd
539
+ from sklearn.metrics.pairwise import cosine_similarity
540
+
541
+ data = {
542
+ 'User': ['U1','U1','U2','U2','U3','U3'],
543
+ 'Item': ['I1','I2','I1','I3','I2','I3'],
544
+ 'Rating': [5,4,4,5,3,4]
545
+ }
546
+ df = pd.DataFrame(data)
547
+
548
+ matrix = df.pivot_table(index='User', columns='Item', values='Rating').fillna(0)
549
+ print(matrix)
550
+
551
+ similarity = cosine_similarity(matrix.T)
552
+ similarity_df = pd.DataFrame(similarity, index=matrix.columns, columns=matrix.columns)
553
+ print(similarity_df)
554
+
555
+ def recommend(item):
556
+ return similarity_df[item].sort_values(ascending=False)
557
+
558
+ print(recommend('I1'))
559
+
560
+
561
+ ''')
562
+
563
+ def timeseries():
564
+ print('''
565
+ import pandas as pd
566
+ import numpy as np
567
+ import matplotlib.pyplot as plt
568
+
569
+ from statsmodels.tsa.stattools import adfuller
570
+ from statsmodels.tsa.arima.model import ARIMA
571
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
572
+
573
+ data = pd.read_csv("AirPassengers.csv")
574
+ data['Month'] = pd.to_datetime(data['Month'])
575
+ data.set_index('Month', inplace=True)
576
+ data.head()
577
+
578
+
579
+ plt.figure()
580
+ plt.plot(data['Passengers'])
581
+ plt.title('Monthly Air Passengers')
582
+ plt.xlabel('Year')
583
+ plt.ylabel('Number of Passengers')
584
+ plt.show()
585
+
586
+ result = adfuller(data['Passengers'])
587
+
588
+ print("ADF Statistic:", result[0])
589
+ print("p-value:", result[1])
590
+
591
+ data_diff = data['Passengers'].diff().dropna()
592
+
593
+ plt.figure()
594
+ plt.plot(data_diff)
595
+ plt.title('First Differenced Series')
596
+ plt.show()
597
+
598
+ adfuller(data_diff)
599
+
600
+ from statsmodels.graphics.tsaplots import plot_pacf
601
+ import matplotlib.pyplot as plt
602
+
603
+ plot_pacf(data_diff, lags=20)
604
+ plt.show()
605
+
606
+ from statsmodels.graphics.tsaplots import plot_acf
607
+
608
+ plot_acf(data_diff, lags=20)
609
+ plt.show()
610
+
611
+ model = ARIMA(data['Passengers'], order=(1,1,1))
612
+ model_fit = model.fit()
613
+ print(model_fit.summary())
614
+
615
+ forecast = model_fit.forecast(steps=12)
616
+ print(forecast)
617
+
618
+ plt.figure()
619
+ plt.plot(data['Passengers'], label='Actual')
620
+ plt.plot(forecast, label='Forecast', color='red')
621
+ plt.legend()
622
+ plt.show()
623
+
624
+
625
+
626
+
627
+
628
+ ''')
629
+
630
+ def anomaly():
631
+ print('''
632
+ import pandas as pd
633
+ from scipy.stats import zscore
634
+
635
+ # Salary data
636
+ df = pd.DataFrame({
637
+ 'Employee_ID': ['E01','E02','E03','E04','E05','E06','E07'],
638
+ 'Salary': [48000, 52000, 50500, 49200, 51000, 120000, 18000]
639
+ })
640
+
641
+ # Z-score calculation
642
+ df['Z_Score'] = zscore(df['Salary'])
643
+ print(df['Z_Score'])
644
+
645
+ # Detect outliers
646
+ outliers = df[abs(df['Z_Score']) > 2]
647
+ print(outliers)
648
+
649
+ # IQR Method
650
+ # Employee salary data
651
+ data = {
652
+ 'Employee_ID': ['E01','E02','E03','E04','E05','E06','E07'],
653
+ 'Salary': [48000, 52000, 50500, 49200, 51000, 120000, 18000]
654
+ }
655
+
656
+ df = pd.DataFrame(data)
657
+
658
+ # Calculate Q1 and Q3
659
+ Q1 = df['Salary'].quantile(0.25)
660
+ Q3 = df['Salary'].quantile(0.75)
661
+
662
+ # Calculate IQR
663
+ IQR = Q3 - Q1
664
+
665
+ # Define lower and upper bounds
666
+ lower_bound = Q1 - 1.5 * IQR
667
+ upper_bound = Q3 + 1.5 * IQR
668
+
669
+ # Detect outliers
670
+ outliers = df[
671
+ (df['Salary'] < lower_bound) |
672
+ (df['Salary'] > upper_bound)
673
+ ]
674
+
675
+ print("Lower Bound:", lower_bound)
676
+ print("Upper Bound:", upper_bound)
677
+ print("\nDetected Outliers:")
678
+ print(outliers)
679
+
680
+
681
+
682
+ ''')
683
+
684
+ def text():
685
+ print('''
686
+ #Document Classification
687
+ from sklearn.feature_extraction.text import TfidfVectorizer
688
+ from sklearn.naive_bayes import MultinomialNB
689
+
690
+ texts = [
691
+ "Meeting scheduled with project team",
692
+ "Family dinner this weekend",
693
+ "Project deadline extended",
694
+ "Birthday party invitation"
695
+ ]
696
+
697
+ labels = ["Work", "Personal", "Work", "Personal"]
698
+
699
+ vectorizer = TfidfVectorizer()
700
+ X = vectorizer.fit_transform(texts)
701
+
702
+ model = MultinomialNB()
703
+ model.fit(X, labels)
704
+
705
+ prediction = model.predict(vectorizer.transform(["Project meeting tomorrow"]))
706
+ print(prediction)
707
+
708
+ # Sentiment Analysis
709
+ from sklearn.feature_extraction.text import TfidfVectorizer
710
+ from sklearn.linear_model import LogisticRegression
711
+
712
+ reviews = [
713
+ "The product is amazing",
714
+ "Very bad quality",
715
+ "I am happy with the purchase",
716
+ "Worst experience ever"
717
+ ]
718
+
719
+ sentiment = ["Positive", "Negative", "Positive", "Negative"]
720
+
721
+ vectorizer = TfidfVectorizer()
722
+ X = vectorizer.fit_transform(reviews)
723
+
724
+ model = LogisticRegression()
725
+ model.fit(X, sentiment)
726
+
727
+ print(model.predict(vectorizer.transform(["The product quality is good"])))
728
+
729
+ # Search Engines
730
+ from sklearn.feature_extraction.text import TfidfVectorizer
731
+ import numpy as np
732
+
733
+ documents = [
734
+ "Data science and machine learning",
735
+ "Introduction to text mining",
736
+ "Python for data analysis"
737
+ ]
738
+
739
+ query = ["text mining"]
740
+
741
+ vectorizer = TfidfVectorizer()
742
+ tfidf = vectorizer.fit_transform(documents + query)
743
+
744
+ similarity = (tfidf * tfidf.T).toarray()
745
+ print("Most relevant document index:", np.argmax(similarity[-1][:-1]))
746
+
747
+ # Spam Detection
748
+ from sklearn.feature_extraction.text import TfidfVectorizer
749
+ from sklearn.naive_bayes import MultinomialNB
750
+
751
+ messages = [
752
+ "Win a free lottery now",
753
+ "Meeting scheduled tomorrow",
754
+ "Urgent offer claim now",
755
+ "Project discussion today"
756
+ ]
757
+
758
+ labels = ["Spam", "Not Spam", "Spam", "Not Spam"]
759
+
760
+ vectorizer = TfidfVectorizer()
761
+ X = vectorizer.fit_transform(messages)
762
+
763
+ model = MultinomialNB()
764
+ model.fit(X, labels)
765
+
766
+ print(model.predict(vectorizer.transform(["Free offer just for you"])))
767
+
768
+ # Recommendation Systems
769
+ from sklearn.feature_extraction.text import TfidfVectorizer
770
+ from sklearn.metrics.pairwise import cosine_similarity
771
+
772
+ movies = [
773
+ "Action adventure and hero story",
774
+ "Romantic love story",
775
+ "Adventure and fantasy world"
776
+ ]
777
+
778
+ vectorizer = TfidfVectorizer()
779
+ tfidf = vectorizer.fit_transform(movies)
780
+
781
+ similarity = cosine_similarity(tfidf)
782
+ print(similarity)
783
+
784
+ ''')
785
+
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: matplotliblib
3
+ Version: 0.1
4
+ Summary: Collection of machine learning tools
5
+ Author: .
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: >=3.7
9
+ Dynamic: author
10
+ Dynamic: classifier
11
+ Dynamic: requires-python
12
+ Dynamic: summary
@@ -0,0 +1,8 @@
1
+ README.md
2
+ setup.py
3
+ matplotliblib/__init__.py
4
+ matplotliblib/main.py
5
+ matplotliblib.egg-info/PKG-INFO
6
+ matplotliblib.egg-info/SOURCES.txt
7
+ matplotliblib.egg-info/dependency_links.txt
8
+ matplotliblib.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ matplotliblib
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,17 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="matplotliblib",
5
+ version="0.1",
6
+ packages=find_packages(),
7
+
8
+ author=".",
9
+ description="Collection of machine learning tools",
10
+
11
+ python_requires=">=3.7",
12
+
13
+ classifiers=[
14
+ "Programming Language :: Python :: 3",
15
+ "Operating System :: OS Independent",
16
+ ],
17
+ )