shreenath-ml-scripts 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
ml_prac_scripts/eda.py ADDED
@@ -0,0 +1,65 @@
1
+ # -*- coding: utf-8 -*-
2
+ """eda.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1Fm6OC_xJb4m29eC8pGFUqGFzpMxKvUiA
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import matplotlib.pyplot as plt
13
+ import seaborn as sns
14
+
15
+ df = sns.load_dataset("titanic")
16
+
17
+ df.head()
18
+
19
+ df.info()
20
+
21
+ df.describe()
22
+
23
+ df.isnull().sum()
24
+
25
+ df["age"] = df["age"].fillna(df["age"].median())
26
+
27
+ df['embarked'] = df['embarked'].fillna(df['embarked'].mode()[0])
28
+
29
+ df.drop('deck', axis=1, inplace=True)
30
+
31
+ df.dropna(inplace=True)
32
+
33
+ df.info()
34
+
35
+ sns.boxplot(x=df['fare'])
36
+ plt.title('Fare Boxplot (Before Handling Outliers)')
37
+ plt.show()
38
+
39
+ Q1 = df['fare'].quantile(0.25)
40
+ Q3 = df['fare'].quantile(0.75)
41
+ IQR = Q3 - Q1
42
+
43
+ lower_bound = Q1 - 1.5 * IQR
44
+ upper_bound = Q3 + 1.5 * IQR
45
+
46
+ df['fare'] = np.where(df['fare'] > upper_bound, upper_bound, np.where(df['fare'] < lower_bound, lower_bound, df['fare']))
47
+
48
+ print(f"Outliers capped at: {upper_bound}")
49
+
50
+ df['log_fare'] = np.log1p(df['fare'])
51
+
52
+ plt.figure(figsize=(10,4))
53
+ plt.subplot(1, 2, 1)
54
+ sns.histplot(df['fare'], kde=True).set_title('Original Fare')
55
+ plt.subplot(1, 2, 2)
56
+ sns.histplot(df['log_fare'], kde=True).set_title('Log Transformed Fare')
57
+ plt.show()
58
+
59
+ df['sex'] = df['sex'].map({'male': 0, 'female': 1})
60
+
61
+ df = pd.get_dummies(df, columns=['embarked'], drop_first=True)
62
+
63
+ print("\nFinal Dataframe Head:")
64
+ print(df[['survived', 'pclass', 'sex', 'age', 'log_fare', 'embarked_Q', 'embarked_S']].head())
65
+
@@ -0,0 +1,47 @@
1
+ # -*- coding: utf-8 -*-
2
+ """KMeansClustering.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1Yt6p5RJKdDBlYIy11bsGPmTO5cJ6kdnA
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import matplotlib.pyplot as plt
13
+ from sklearn.cluster import KMeans
14
+ from sklearn.preprocessing import LabelEncoder
15
+ from sklearn.preprocessing import StandardScaler
16
+ import warnings
17
+ import seaborn as sns
18
+ warnings.filterwarnings("ignore")
19
+
20
+ df = pd.read_csv("sales.csv", encoding='latin-1')
21
+ df.head()
22
+
23
+ scaler = StandardScaler()
24
+
25
+ df_numeric = df.select_dtypes(include=np.number)
26
+ X_scaled = scaler.fit_transform(df_numeric)
27
+
28
+ inertia = []
29
+ for k in range(1, 11):
30
+ k_means = KMeans(n_clusters=k, random_state=42, n_init=10)
31
+ k_means.fit(X_scaled)
32
+ inertia.append(k_means.inertia_)
33
+
34
+ plt.figure(figsize=(8, 5))
35
+ sns.lineplot(x=range(1, 11), y=inertia, marker='o', linestyle='-')
36
+ plt.title('Elbow Method for Optimal k')
37
+ plt.xlabel('Number of Clusters (k)')
38
+ plt.ylabel('Inertia')
39
+
40
+ optimal_k = 3
41
+ kmeans = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
42
+ kmeans.fit(X_scaled)
43
+ labels = kmeans.labels_
44
+
45
+ plt.figure(figsize=(10, 6))
46
+ sns.scatterplot(x=X_scaled[:, 0], y=X_scaled[:, 1], hue=labels)
47
+
ml_prac_scripts/knn.py ADDED
@@ -0,0 +1,36 @@
1
+ # -*- coding: utf-8 -*-
2
+ """knn.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1u49MdRl6i0Jlmx9I1gm1iYjiHEXGI97C
8
+ """
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+ from sklearn.model_selection import train_test_split
13
+ from sklearn.preprocessing import LabelEncoder
14
+ from sklearn.neighbors import KNeighborsClassifier
15
+ from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, f1_score, precision_score
16
+
17
+ url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
18
+ column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
19
+ df = pd.read_csv(url, names=column_names)
20
+
21
+ label_enoder = LabelEncoder()
22
+ df['species'] = label_enoder.fit_transform(df['species'])
23
+
24
+ X = df.drop('species', axis=1)
25
+ y = df['species']
26
+
27
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
28
+
29
+ knn = KNeighborsClassifier(n_neighbors=3)
30
+ knn.fit(X_train, y_train)
31
+
32
+ y_pred = knn.predict(X_test)
33
+
34
+ print("Accuracy:", accuracy_score(y_test, y_pred))
35
+ print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
36
+
@@ -0,0 +1,58 @@
1
+ # -*- coding: utf-8 -*-
2
+ """linear_regression.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1AqzDrmjgKq5uD5lNbJdJJFdzGUUCW3hs
8
+ """
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+ import matplotlib.pyplot as plt
13
+ from sklearn.model_selection import train_test_split
14
+ from sklearn.linear_model import LinearRegression
15
+ from sklearn.metrics import mean_squared_error, r2_score
16
+ from sklearn.preprocessing import StandardScaler
17
+
18
+ df = pd.read_csv("HousingData.csv")
19
+ df.head()
20
+
21
+ df["CRIM"] = df["CRIM"].fillna(df["CRIM"].mean())
22
+ df["ZN"] = df["ZN"].fillna(df["ZN"].mean())
23
+ df["INDUS"] = df["INDUS"].fillna(df["INDUS"].mean())
24
+ df["CHAS"] = df["CHAS"].fillna(df["CHAS"].mean())
25
+ df["AGE"] = df["AGE"].fillna(df["AGE"].mean())
26
+ df["LSTAT"] = df["LSTAT"].fillna(df["LSTAT"].mean())
27
+
28
+ df.info()
29
+
30
+ X = df.drop('MEDV', axis=1)
31
+ y = df['MEDV']
32
+ X_scaled = StandardScaler().fit_transform(X)
33
+ X_scaled
34
+
35
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
36
+
37
+ model = LinearRegression()
38
+ model.fit(X_train, y_train)
39
+ y_pred = model.predict(X_test)
40
+ y_pred
41
+
42
+ mse = mean_squared_error(y_test, y_pred)
43
+ r2 = r2_score(y_test, y_pred)
44
+ print("Mean Squared Error:", mse)
45
+ print("R-squared:", r2)
46
+
47
+ plt.figure(figsize=(10, 6))
48
+ plt.scatter(y_test, y_pred)
49
+ plt.xlabel("Actual Values")
50
+ plt.ylabel("Predicted Values")
51
+ plt.title("Actual vs. Predicted Values")
52
+
53
+ # Add the line of best fit (y=x)
54
+ plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], '-', lw=1)
55
+ plt.plot([y_pred.min(), y_pred.max()], [y_pred.min(), y_pred.max()], '-', lw=1)
56
+
57
+ plt.show()
58
+
@@ -0,0 +1,58 @@
1
+ # -*- coding: utf-8 -*-
2
+ """logistic_regression.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1_9gGFH3ONYSD_saWKC9Vf2wJ-YU3if1G
8
+ """
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+ import matplotlib.pyplot as plt
13
+ import seaborn as sns
14
+ from sklearn.preprocessing import MinMaxScaler, LabelEncoder
15
+ from sklearn.model_selection import train_test_split
16
+ from sklearn.linear_model import LogisticRegression
17
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
18
+ import seaborn as sns
19
+
20
+ df = pd.read_csv("Social_Network_Ads.csv")
21
+ numeric_columns = ['Age', 'EstimatedSalary']
22
+ df.head()
23
+
24
+ scaler = MinMaxScaler()
25
+ scaled = scaler.fit_transform(df[numeric_columns])
26
+ scaled = pd.DataFrame(scaled, columns=numeric_columns)
27
+ scaled
28
+
29
+ label_encoder = LabelEncoder()
30
+ encoded = df.copy()
31
+ encoded['Gender'] = label_encoder.fit_transform(df['Gender'])
32
+ encoded
33
+
34
+ X = scaled
35
+ X['Gender'] = encoded['Gender']
36
+ y = df['Purchased']
37
+
38
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
39
+
40
+ model = LogisticRegression()
41
+
42
+ model.fit(X_train, y_train)
43
+
44
+ y_pred = model.predict(X_test)
45
+
46
+ accuracy = accuracy_score(y_test, y_pred)
47
+ print(f"Accuracy: {accuracy:.4f}\n")
48
+
49
+ print("Classification Report:")
50
+
51
+ print(classification_report(y_test, y_pred, target_names=['Not Purchased (0)', 'Purchased (1)']))
52
+
53
+ print("Confusion Matrix:")
54
+
55
+ print(confusion_matrix(y_test, y_pred))
56
+ cm = confusion_matrix(y_test, y_pred)
57
+ sns.heatmap(cm, annot=True, fmt='d', cmap="Blues")
58
+
ml_prac_scripts/pca.py ADDED
@@ -0,0 +1,86 @@
1
+ # -*- coding: utf-8 -*-
2
+ """pca.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1vS6JOW2HWZiPOYrryS6XDOjq0G3Ja_9y
8
+ """
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+ from sklearn.decomposition import PCA
13
+ from sklearn.preprocessing import StandardScaler
14
+ import matplotlib.pyplot as plt
15
+
16
+ df = pd.read_csv("Iris.csv")
17
+ df.columns = ["id", "sepal_length", "sepal_width", "petal_length", "petal_width", "species"]
18
+ df.head()
19
+
20
+ features = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
21
+ x = df.loc[:, features].values
22
+ y = df.loc[:, ["species"]].values
23
+
24
+ x = StandardScaler().fit_transform(x)
25
+
26
+ pca = PCA(n_components=2)
27
+ principal_components = pca.fit_transform(x)
28
+ principal_components
29
+
30
+ principalDF = pd.DataFrame(data = principal_components, columns=["pc1", "pc2"])
31
+ final = pd.concat([principalDF, df[["species"]]], axis=1)
32
+ final.head()
33
+
34
+ plt.figure()
35
+ plt.xlabel("pc1")
36
+ plt.ylabel("pc2")
37
+ plt.title("2 component PCA")
38
+ targets = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
39
+ colors = ["r", "g", "b"]
40
+ for target, color in zip(targets, colors):
41
+ indicesToKeep = final["species"] == target
42
+ plt.scatter(final.loc[indicesToKeep, "pc1"], final.loc[indicesToKeep, "pc2"], c=color, s=50)
43
+ plt.legend(targets)
44
+ plt.grid()
45
+ plt.show()
46
+
47
+ import numpy as np
48
+ import pandas as pd
49
+ import matplotlib.pyplot as plt
50
+ from sklearn.decomposition import PCA
51
+ from sklearn.preprocessing import StandardScaler
52
+
53
+ df = pd.read_csv("Iris.csv")
54
+ df.info()
55
+
56
+ df.columns = ["id", "speal_len", "sepal_width", "petal_len", "petal_width", "species"]
57
+ features = ["speal_len", "sepal_width", "petal_len", "petal_width"]
58
+
59
+ X = df.loc[:, features].values
60
+ y = df.loc[:, ["species"]].values
61
+
62
+ # X = StandardScaler().fit_transform(X)
63
+
64
+ pca = PCA(n_components=2)
65
+ p_com = pca.fit_transform(X)
66
+
67
+ p_df = pd.DataFrame(data=p_com, columns=["pca1", "pc2"])
68
+ final = pd.concat([p_df, df[["species"]]], axis=1)
69
+
70
+ plt.figure()
71
+ plt.xlabel("pc1")
72
+ plt.ylabel("pc2")
73
+ plt.title("2 components PCA")
74
+ targets = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
75
+ colors = ["r", "g", "b"]
76
+
77
+ for t, c in zip(targets, colors):
78
+ indices = final["species"] == t
79
+ plt.scatter(
80
+ final.loc[indices, "pca1"],
81
+ final.loc[indices, "pc2"],
82
+ c=c,
83
+ s=50
84
+ )
85
+ plt.legend(targets)
86
+
ml_prac_scripts/svm.py ADDED
@@ -0,0 +1,52 @@
1
+ # -*- coding: utf-8 -*-
2
+ """svm.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1gtCM25ZOphz_jwcev_e6g8YH-yMt5RDP
8
+ """
9
+
10
+ import pandas as pd
11
+ from sklearn.model_selection import train_test_split
12
+ from sklearn.svm import SVC
13
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
14
+ from sklearn.preprocessing import StandardScaler
15
+ from sklearn.feature_extraction.text import TfidfVectorizer
16
+ import seaborn as sns
17
+
18
+ df = pd.read_csv("emails.csv")
19
+ df.dropna()
20
+ df = df.drop(columns=["Email No."])
21
+ df.isna().sum()
22
+
23
+ vectorizer = TfidfVectorizer(stop_words='english')
24
+
25
+ X = vectorizer.fit_transform(df)
26
+
27
+ X = df.drop(columns=['Prediction'])
28
+ y = df['Prediction']
29
+
30
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
31
+
32
+ model = SVC(kernel='linear', C=1.0, random_state=42)
33
+ model.fit(X_train, y_train)
34
+
35
+ y_pred = model.predict(X_test)
36
+
37
+ print("Accuracy:", accuracy_score(y_test, y_pred))
38
+ print("\nClassification Report:\n")
39
+ print(classification_report(y_test, y_pred))
40
+ print("\nConfusion Matrix:\n")
41
+ print(confusion_matrix(y_test, y_pred))
42
+ cm = confusion_matrix(y_test, y_pred)
43
+
44
+ sns.heatmap(
45
+ cm,
46
+ annot=True,
47
+ fmt='d',
48
+ cmap='Blues',
49
+ xticklabels=['Not Spam', 'Spam'],
50
+ yticklabels=['Not Spam', 'Spam']
51
+ )
52
+
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: shreenath-ml-scripts
3
+ Version: 0.1.0
4
+ Summary: A collection of machine learning scripts
5
+ Author: Shreenath
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Requires-Dist: pandas
9
+ Requires-Dist: numpy
10
+ Requires-Dist: matplotlib
11
+ Requires-Dist: scikit-learn
12
+ Requires-Dist: seaborn
13
+ Dynamic: author
14
+ Dynamic: classifier
15
+ Dynamic: requires-dist
16
+ Dynamic: summary
@@ -0,0 +1,12 @@
1
+ ml_prac_scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ ml_prac_scripts/eda.py,sha256=p-F5SkONgMznpSjgP4JCPcm5NToN8-nLAlYtp2wBc-s,1455
3
+ ml_prac_scripts/kmeansclustering.py,sha256=QwJngX7XTOzVDye3SokpQWJyv1-urZTzIKHWqZjYnvw,1224
4
+ ml_prac_scripts/knn.py,sha256=srSgSePCgpfSu4FmVIRlDuT4SSQQgPqd1n-T0VU-dxM,1134
5
+ ml_prac_scripts/linear_regression.py,sha256=3X7nYTzHlaWM3kt45_67xYyOO5nJXBcsoubT2lc6mRs,1618
6
+ ml_prac_scripts/logistic_regression.py,sha256=Hc7BfXujLDDnYX1r0ahdljyyjcEx_QMQI-BvHLGICHI,1541
7
+ ml_prac_scripts/pca.py,sha256=Tc5p8fWmS_XdJTqePDrb1muqlPnGaxTGSLeotoglP48,2287
8
+ ml_prac_scripts/svm.py,sha256=JIAO-J2MmxyRt9OJPcdVobEUQRX79a-w7Uxk7jfxnhw,1334
9
+ shreenath_ml_scripts-0.1.0.dist-info/METADATA,sha256=lemZTBdSNZXVP_z4G90iX74oa6kXpOeJm7anC2S-0VY,427
10
+ shreenath_ml_scripts-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
11
+ shreenath_ml_scripts-0.1.0.dist-info/top_level.txt,sha256=QxnhFgPBYlkHDXciaFOgFraw4_XX7-if-yAL_maKmEY,16
12
+ shreenath_ml_scripts-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ ml_prac_scripts