myawesomepkg 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- myawesomepkg/TSAPY1/1 (A) Working with Numpy Arrays.py +1146 -0
- myawesomepkg/TSAPY1/1(B)Aggregation (1).py +319 -0
- myawesomepkg/TSAPY1/1(C) Broadcasting .py +328 -0
- myawesomepkg/TSAPY1/10-A_Load_stringr.py +77 -0
- myawesomepkg/TSAPY1/10-B_Forcats.py +70 -0
- myawesomepkg/TSAPY1/2(a) Comparison, Masking And Boolean Logic (1).py +497 -0
- myawesomepkg/TSAPY1/2(b)Fancy Indexing.py +594 -0
- myawesomepkg/TSAPY1/2(c) Sorting Arrays.py +528 -0
- myawesomepkg/TSAPY1/2(d) Structured Array.py +350 -0
- myawesomepkg/TSAPY1/3 (A) Handling Missing Data.py +1013 -0
- myawesomepkg/TSAPY1/4A_Merge_Joins.py +1209 -0
- myawesomepkg/TSAPY1/9A_Dplyr.py +85 -0
- myawesomepkg/TSAPY1/9B_Tidyr.py +71 -0
- myawesomepkg/TSAPY1/Aggregation_Groupin_Pivot_Filter_Vectorice_Time_Series.py +1999 -0
- myawesomepkg/TSAPY1/Combining_Joins.py +1209 -0
- myawesomepkg/TSAPY1/P4-1-different_distance_methods_(euclidean)_with_prediction,_test_score_and_confusion_matrix1.py +131 -0
- myawesomepkg/TSAPY1/P4-2-k_means_clustering_with_prediction,_test_score_and_confusion_matrix2.py +150 -0
- myawesomepkg/TSAPY1/Pract3_C.py +482 -0
- myawesomepkg/TSAPY1/Pract5_Data_Visualization.py +481 -0
- myawesomepkg/TSAPY1/Practical 6.py +860 -0
- myawesomepkg/TSAPY1/Practical No 1.py +148 -0
- myawesomepkg/TSAPY1/Practical No 2.py +115 -0
- myawesomepkg/TSAPY1/Practical No 3.py +168 -0
- myawesomepkg/TSAPY1/Practical No 4 A.py +233 -0
- myawesomepkg/TSAPY1/Practical No 4 B.py +137 -0
- myawesomepkg/TSAPY1/Practical No 5.py +52 -0
- myawesomepkg/TSAPY1/Practical No 6.py +29 -0
- myawesomepkg/TSAPY1/Practical No 7.py +67 -0
- myawesomepkg/TSAPY1/Practical No 8.py +108 -0
- myawesomepkg/TSAPY1/Print_R.py +123 -0
- myawesomepkg/TSAPY1/R_Graph.py +32 -0
- myawesomepkg/TSAPY1/Working_Ggplot.py +53 -0
- myawesomepkg/TSAPY1/__init__.py +0 -0
- myawesomepkg/TSAPY1/p1_2_pca_iris.py +141 -0
- myawesomepkg/TSAPY1/p2_1_find_s.py +78 -0
- myawesomepkg/TSAPY1/p2_bcandidate_elimination_algorithm_(1).py +85 -0
- myawesomepkg/TSAPY1/p3_1_least_square_regression.py +105 -0
- myawesomepkg/TSAPY1/p3_2_logistic_regression_algorithm.py +79 -0
- myawesomepkg/TSAPY1/p5_1_hierarchical_clustering.py +143 -0
- myawesomepkg/TSAPY1/p5_2_k_nearest_neighbour_algorithm.py +104 -0
- myawesomepkg/TSAPY1/p6_1_id3_algorithm_.py +199 -0
- myawesomepkg/TSAPY1/p7_1_ann_backpropagation_algorithm.py +116 -0
- myawesomepkg/TSAPY1/p7_2_bds_association_rule_mining.py +99 -0
- myawesomepkg/TSAPY1/p8_1_gaussian_naive_bayes_.py +97 -0
- myawesomepkg/TSAPY1/p8_2_naive_bayes_document_classifier.py +111 -0
- myawesomepkg/TSAPY1/p9_1bayesian_network.py +91 -0
- myawesomepkg/TSAPY1/p9_b_loess_regression.py +113 -0
- myawesomepkg/TSAPY1/p_1_test_and_train.py +98 -0
- myawesomepkg/TSAPY1/pract3A-B.py +3212 -0
- myawesomepkg/TSAPY1/practical_no_3.py +167 -0
- myawesomepkg/TSAPY1/practical_no_4.py +215 -0
- myawesomepkg/TSAPY1/practical_no_4b.py +78 -0
- myawesomepkg/TSAPY1/practical_no_5_ac_and_pca.py +39 -0
- myawesomepkg/TSAPY1/practical_no_6.py +37 -0
- myawesomepkg/TSAPY1/practical_no_7.py +69 -0
- myawesomepkg/TSAPY1/practical_no_8.py +79 -0
- myawesomepkg/TSAPY1/tsa_practical_no_1.py +287 -0
- myawesomepkg/TSAPY1/tsa_practical_no_2.py +121 -0
- myawesomepkg/__init__.py +1 -0
- myawesomepkg/core.py +2 -0
- myawesomepkg-0.1.8.dist-info/METADATA +17 -0
- myawesomepkg-0.1.8.dist-info/RECORD +64 -0
- myawesomepkg-0.1.8.dist-info/WHEEL +5 -0
- myawesomepkg-0.1.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""different Distance methods (Euclidean) with Prediction, Test Score and Confusion Matrix1.ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/1FAIOs_ScwVaM7Q1T7kf39YA35Jjte2Pd
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
# Importing required libraries
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from sklearn.model_selection import train_test_split
|
|
13
|
+
from sklearn.preprocessing import LabelEncoder
|
|
14
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
15
|
+
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
|
|
16
|
+
|
|
17
|
+
# Load the dataset
|
|
18
|
+
data = pd.read_csv("/content/Irisdata.csv")
|
|
19
|
+
|
|
20
|
+
# Selecting features and target
|
|
21
|
+
X = data.iloc[:, [1, 2, 3, 4]].values
|
|
22
|
+
y = data.iloc[:, 5].values
|
|
23
|
+
|
|
24
|
+
# Encoding target labels
|
|
25
|
+
le = LabelEncoder()
|
|
26
|
+
y = le.fit_transform(y)
|
|
27
|
+
|
|
28
|
+
# Splitting data into training and testing sets
|
|
29
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
30
|
+
|
|
31
|
+
# Creating KNN model using Euclidean distance
|
|
32
|
+
model = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
|
|
33
|
+
model.fit(X_train, y_train)
|
|
34
|
+
|
|
35
|
+
# Making predictions
|
|
36
|
+
y_pred = model.predict(X_test)
|
|
37
|
+
|
|
38
|
+
# Evaluating model performance
|
|
39
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
40
|
+
acc = accuracy_score(y_test, y_pred)
|
|
41
|
+
|
|
42
|
+
print("Confusion Matrix:\n", cm)
|
|
43
|
+
print("\nAccuracy Score:", round(acc * 100, 2), "%")
|
|
44
|
+
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
|
|
45
|
+
|
|
46
|
+
# Example prediction
|
|
47
|
+
sample = [[5.1, 3.5, 1.4, 0.2]]
|
|
48
|
+
predicted_class = model.predict(sample)
|
|
49
|
+
print("\nPredicted Class for", sample, ":", le.inverse_transform(predicted_class)[0])
|
|
50
|
+
|
|
51
|
+
# ---- Visualization of KNN Results ----
|
|
52
|
+
import matplotlib.pyplot as plt
|
|
53
|
+
import numpy as np
|
|
54
|
+
|
|
55
|
+
# Only for 2D plotting (we'll use the first two features: SepalLength and SepalWidth)
|
|
56
|
+
X_plot = X_train[:, :2]
|
|
57
|
+
y_plot = y_train
|
|
58
|
+
|
|
59
|
+
# Train again using only first 2 features (for visualization)
|
|
60
|
+
model_2d = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
|
|
61
|
+
model_2d.fit(X_plot, y_plot)
|
|
62
|
+
|
|
63
|
+
# Create a meshgrid for background decision boundary
|
|
64
|
+
x_min, x_max = X_plot[:, 0].min() - 1, X_plot[:, 0].max() + 1
|
|
65
|
+
y_min, y_max = X_plot[:, 1].min() - 1, X_plot[:, 1].max() + 1
|
|
66
|
+
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
|
|
67
|
+
np.arange(y_min, y_max, 0.1))
|
|
68
|
+
|
|
69
|
+
# Predict for each point in meshgrid
|
|
70
|
+
Z = model_2d.predict(np.c_[xx.ravel(), yy.ravel()])
|
|
71
|
+
Z = Z.reshape(xx.shape)
|
|
72
|
+
|
|
73
|
+
# Plot decision boundary and training points
|
|
74
|
+
plt.figure(figsize=(8,6))
|
|
75
|
+
plt.contourf(xx, yy, Z, alpha=0.3)
|
|
76
|
+
plt.scatter(X_plot[:, 0], X_plot[:, 1], c=y_plot, s=40, edgecolor='k')
|
|
77
|
+
plt.title("KNN Classification (Euclidean Distance)")
|
|
78
|
+
plt.xlabel("Sepal Length")
|
|
79
|
+
plt.ylabel("Sepal Width")
|
|
80
|
+
plt.show()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
#second code
|
|
85
|
+
|
|
86
|
+
4.a Euclidean
|
|
87
|
+
import numpy as np
|
|
88
|
+
import pandas as pd
|
|
89
|
+
from sklearn.model_selection import train_test_split
|
|
90
|
+
from sklearn.metrics import confusion_matrix, accuracy_score
|
|
91
|
+
|
|
92
|
+
# --- Step 1: Sample dataset ---
|
|
93
|
+
# For simplicity, use an example dataset
|
|
94
|
+
from sklearn.datasets import load_iris
|
|
95
|
+
data = load_iris()
|
|
96
|
+
X = data.data
|
|
97
|
+
y = data.target
|
|
98
|
+
|
|
99
|
+
# --- Step 2: Split into training and testing ---
|
|
100
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
101
|
+
|
|
102
|
+
# --- Step 3: Define a function for Euclidean distance ---
|
|
103
|
+
def euclidean_distance(x1, x2):
|
|
104
|
+
return np.sqrt(np.sum((x1 - x2)**2))
|
|
105
|
+
|
|
106
|
+
# --- Step 4: Implement KNN manually using Euclidean distance ---
|
|
107
|
+
def knn_predict(X_train, y_train, X_test, k=3):
|
|
108
|
+
predictions = []
|
|
109
|
+
for test_point in X_test:
|
|
110
|
+
# Calculate all distances
|
|
111
|
+
distances = [euclidean_distance(test_point, x_train) for x_train in X_train]
|
|
112
|
+
# Get indices of k nearest neighbors
|
|
113
|
+
k_indices = np.argsort(distances)[:k]
|
|
114
|
+
# Get the labels of k nearest points
|
|
115
|
+
k_neighbor_labels = [y_train[i] for i in k_indices]
|
|
116
|
+
# Take the majority vote
|
|
117
|
+
most_common = max(set(k_neighbor_labels), key=k_neighbor_labels.count)
|
|
118
|
+
predictions.append(most_common)
|
|
119
|
+
return np.array(predictions)
|
|
120
|
+
|
|
121
|
+
# --- Step 5: Make predictions ---
|
|
122
|
+
y_pred = knn_predict(X_train, y_train, X_test, k=3)
|
|
123
|
+
|
|
124
|
+
# --- Step 6: Evaluate model ---
|
|
125
|
+
acc = accuracy_score(y_test, y_pred)
|
|
126
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
127
|
+
|
|
128
|
+
print("Predictions:", y_pred)
|
|
129
|
+
print("\nTest Accuracy:", acc)
|
|
130
|
+
print("\nConfusion Matrix:\n", cm)
|
|
131
|
+
|
myawesomepkg/TSAPY1/P4-2-k_means_clustering_with_prediction,_test_score_and_confusion_matrix2.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""K-Means clustering with Prediction, Test Score and Confusion Matrix2.ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/1PhxwPtvymskRZcO18J6px8vkMhN1KNUR
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
# Importing required libraries
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import numpy as np
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
from sklearn.cluster import KMeans
|
|
15
|
+
from sklearn.preprocessing import LabelEncoder
|
|
16
|
+
from sklearn.metrics import confusion_matrix, accuracy_score
|
|
17
|
+
|
|
18
|
+
# Load the Iris dataset
|
|
19
|
+
data = pd.read_csv("/content/Irisdata.csv")
|
|
20
|
+
|
|
21
|
+
# Selecting features (independent variables)
|
|
22
|
+
X = data.iloc[:, [1, 2, 3, 4]].values
|
|
23
|
+
|
|
24
|
+
# Extracting actual target labels (species)
|
|
25
|
+
y = data.iloc[:, 5].values
|
|
26
|
+
|
|
27
|
+
# Encoding string labels into numbers
|
|
28
|
+
le = LabelEncoder()
|
|
29
|
+
y_encoded = le.fit_transform(y)
|
|
30
|
+
|
|
31
|
+
# Finding the optimal number of clusters using the Elbow method
|
|
32
|
+
wcss = []
|
|
33
|
+
for i in range(1, 11):
|
|
34
|
+
kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
|
|
35
|
+
kmeans.fit(X)
|
|
36
|
+
wcss.append(kmeans.inertia_)
|
|
37
|
+
|
|
38
|
+
# Plotting the Elbow graph
|
|
39
|
+
plt.plot(range(1, 11), wcss, marker='o')
|
|
40
|
+
plt.title('The Elbow Method')
|
|
41
|
+
plt.xlabel('Number of Clusters')
|
|
42
|
+
plt.ylabel('WCSS (Within Cluster Sum of Squares)')
|
|
43
|
+
plt.show()
|
|
44
|
+
|
|
45
|
+
# Applying K-Means with 3 clusters (based on Elbow method)
|
|
46
|
+
kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=300, n_init=10, random_state=0)
|
|
47
|
+
y_kmeans = kmeans.fit_predict(X)
|
|
48
|
+
|
|
49
|
+
# Aligning K-Means cluster labels with actual species labels
|
|
50
|
+
from scipy.stats import mode
|
|
51
|
+
|
|
52
|
+
labels = np.zeros_like(y_kmeans)
|
|
53
|
+
for i in range(3):
|
|
54
|
+
mask = (y_kmeans == i)
|
|
55
|
+
labels[mask] = mode(y_encoded[mask])[0]
|
|
56
|
+
|
|
57
|
+
# Calculating confusion matrix and accuracy
|
|
58
|
+
cm = confusion_matrix(y_encoded, labels)
|
|
59
|
+
acc = accuracy_score(y_encoded, labels)
|
|
60
|
+
|
|
61
|
+
print("\nConfusion Matrix:\n", cm)
|
|
62
|
+
print("\nAccuracy Score:", round(acc * 100, 2), "%")
|
|
63
|
+
|
|
64
|
+
# Visualizing the clusters
|
|
65
|
+
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s=100, c='red', label='Cluster 1')
|
|
66
|
+
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s=100, c='blue', label='Cluster 2')
|
|
67
|
+
plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s=100, c='green', label='Cluster 3')
|
|
68
|
+
|
|
69
|
+
# Plotting centroids
|
|
70
|
+
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
|
|
71
|
+
s=200, c='yellow', label='Centroids', marker='X')
|
|
72
|
+
|
|
73
|
+
plt.title('K-Means Clustering (Iris Dataset)')
|
|
74
|
+
plt.xlabel('Sepal Length')
|
|
75
|
+
plt.ylabel('Sepal Width')
|
|
76
|
+
plt.legend()
|
|
77
|
+
plt.show()
|
|
78
|
+
|
|
79
|
+
# Example Prediction
|
|
80
|
+
sample = [[5.1, 3.5, 1.4, 0.2]]
|
|
81
|
+
pred_cluster = kmeans.predict(sample)
|
|
82
|
+
print("\nPredicted Cluster for sample", sample, ":", pred_cluster[0])
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
#second code+++++++++++++++++++++++++++++++++++++++++++=
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
import numpy as np
|
|
90
|
+
import matplotlib.pyplot as plt
|
|
91
|
+
import pandas as pd
|
|
92
|
+
from sklearn.cluster import KMeans
|
|
93
|
+
|
|
94
|
+
# Load dataset
|
|
95
|
+
dataset = pd.read_csv('/content/Irisdata.csv')
|
|
96
|
+
print(dataset.head())
|
|
97
|
+
|
|
98
|
+
# Select features (make sure to exclude ID or non-numeric columns)
|
|
99
|
+
X = dataset.iloc[:, [1, 2, 3, 4]].values
|
|
100
|
+
|
|
101
|
+
# Elbow method to find optimal number of clusters
|
|
102
|
+
wcss = []
|
|
103
|
+
for i in range(1, 11):
|
|
104
|
+
kmeans = KMeans(
|
|
105
|
+
n_clusters=i,
|
|
106
|
+
init='k-means++',
|
|
107
|
+
max_iter=300,
|
|
108
|
+
n_init=10,
|
|
109
|
+
random_state=0
|
|
110
|
+
)
|
|
111
|
+
kmeans.fit(X)
|
|
112
|
+
wcss.append(kmeans.inertia_)
|
|
113
|
+
|
|
114
|
+
# Plot Elbow Curve
|
|
115
|
+
plt.figure(figsize=(8, 5))
|
|
116
|
+
plt.plot(range(1, 11), wcss, marker='o')
|
|
117
|
+
plt.title('Elbow Method for Optimal k')
|
|
118
|
+
plt.xlabel('Number of Clusters')
|
|
119
|
+
plt.ylabel('WCSS (Within Cluster Sum of Squares)')
|
|
120
|
+
plt.grid(True)
|
|
121
|
+
plt.show()
|
|
122
|
+
|
|
123
|
+
# Applying KMeans with optimal clusters (3 for Iris)
|
|
124
|
+
kmeans = KMeans(
|
|
125
|
+
n_clusters=3,
|
|
126
|
+
init='k-means++',
|
|
127
|
+
max_iter=300,
|
|
128
|
+
n_init=10,
|
|
129
|
+
random_state=0
|
|
130
|
+
)
|
|
131
|
+
y_kmeans = kmeans.fit_predict(X)
|
|
132
|
+
|
|
133
|
+
# Plot the clusters (using the first two features for 2D visualization)
|
|
134
|
+
plt.figure(figsize=(8, 6))
|
|
135
|
+
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s=100, c='red', label='Iris-setosa')
|
|
136
|
+
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s=100, c='blue', label='Iris-versicolour')
|
|
137
|
+
plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s=100, c='green', label='Iris-virginica')
|
|
138
|
+
|
|
139
|
+
# Plot centroids
|
|
140
|
+
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
|
|
141
|
+
s=200, c='yellow', edgecolor='black', marker='X', label='Centroids')
|
|
142
|
+
|
|
143
|
+
plt.title('Clusters of Iris Species')
|
|
144
|
+
plt.xlabel('Feature 1')
|
|
145
|
+
plt.ylabel('Feature 2')
|
|
146
|
+
plt.legend()
|
|
147
|
+
plt.grid(True)
|
|
148
|
+
plt.show()
|
|
149
|
+
|
|
150
|
+
|