sklearne 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sklearne/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .loader import show_code
sklearne/data/A1.txt ADDED
@@ -0,0 +1,74 @@
1
+ # A1
2
+ # Design and implement pattern recognition system to identify and extract unique species patterns from the Iris dataset
3
+ # Step 1: Import Required Libraries
4
+ import numpy as np
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+
9
+ from sklearn.datasets import load_iris
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.preprocessing import StandardScaler
12
+ from sklearn.decomposition import PCA
13
+ from sklearn.neighbors import KNeighborsClassifier
14
+ from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
15
+
16
+ # Step 2: Load Dataset
17
+ iris = load_iris()
18
+ X = iris.data
19
+ y = iris.target
20
+ feature_names = iris.feature_names
21
+ target_names = iris.target_names
22
+
23
+ # Create DataFrame for easier handling
24
+ df = pd.DataFrame(X, columns=feature_names)
25
+ df['species'] = pd.Series(y).map(dict(zip(range(3), target_names)))
26
+
27
+ # Step 3: Feature Analysis (Pattern Recognition)
28
+ print("Feature-wise Mean by Species:")
29
+ print(df.groupby('species').mean()) # Summary of patterns
30
+
31
+ # Step 4: Visualizations - Patterns between features
32
+ sns.pairplot(df, hue='species', palette='Set2')
33
+ plt.suptitle("Pairplot of Iris Features by Species", y=1.02)
34
+ plt.show()
35
+
36
+ # Step 5: PCA for Pattern Visualization (2D)
37
+ scaler = StandardScaler()
38
+ X_scaled = scaler.fit_transform(X)
39
+
40
+ pca = PCA(n_components=2)
41
+ X_pca = pca.fit_transform(X_scaled)
42
+
43
+ plt.figure(figsize=(8, 6))
44
+ for i, label in enumerate(np.unique(y)):
45
+ plt.scatter(X_pca[y == label, 0], X_pca[y == label, 1], label=target_names[label])
46
+ plt.title("PCA of Iris Dataset")
47
+ plt.xlabel("Principal Component 1")
48
+ plt.ylabel("Principal Component 2")
49
+ plt.legend()
50
+ plt.grid(True)
51
+ plt.show()
52
+
53
+ # Step 6: Classification using KNN
54
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
55
+
56
+ knn = KNeighborsClassifier(n_neighbors=5)
57
+ knn.fit(X_train, y_train)
58
+
59
+ y_pred = knn.predict(X_test)
60
+
61
+ # Step 7: Evaluation
62
+ print("\nClassification Report:")
63
+ print(classification_report(y_test, y_pred, target_names=target_names))
64
+ print(f"Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}%")
65
+
66
+ # Confusion Matrix
67
+ cm = confusion_matrix(y_test, y_pred)
68
+ plt.figure(figsize=(6, 5))
69
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=target_names, yticklabels=target_names)
70
+ plt.title("Confusion Matrix")
71
+ plt.xlabel("Predicted")
72
+ plt.ylabel("True")
73
+ plt.tight_layout()
74
+ plt.show()
sklearne/data/A2.txt ADDED
@@ -0,0 +1,50 @@
1
+ # A2
2
+ # Develop a text classification model that can effectively identify, extract features, and classify documents from the 20 Newsgroups dataset into one of the 20 predefined categories using pattern recognition techniques.
3
+ # Import required libraries
4
+ import numpy as np
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+
9
+ from sklearn.datasets import fetch_20newsgroups
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.feature_extraction.text import TfidfVectorizer
12
+ from sklearn.naive_bayes import MultinomialNB
13
+ from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
14
+
15
+ # Step 1: Load the dataset
16
+ newsgroups = fetch_20newsgroups(subset='all', shuffle=True, random_state=42)
17
+ X = newsgroups.data
18
+ y = newsgroups.target
19
+ target_names = newsgroups.target_names
20
+
21
+ # Step 2: Train-test split
22
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
23
+
24
+ # Step 3: Feature Extraction using TF-IDF (Pattern Recognition Technique)
25
+ vectorizer = TfidfVectorizer(stop_words='english', max_df=0.5)
26
+ X_train_tfidf = vectorizer.fit_transform(X_train)
27
+ X_test_tfidf = vectorizer.transform(X_test)
28
+
29
+ # Step 4: Model Training using Naive Bayes
30
+ model = MultinomialNB()
31
+ model.fit(X_train_tfidf, y_train)
32
+
33
+ # Step 5: Prediction
34
+ y_pred = model.predict(X_test_tfidf)
35
+
36
+ # Step 6: Evaluation
37
+ print("Accuracy:", accuracy_score(y_test, y_pred))
38
+ print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=target_names))
39
+
40
+ # Step 7: Confusion Matrix Visualization
41
+ conf_mat = confusion_matrix(y_test, y_pred)
42
+ plt.figure(figsize=(12, 10))
43
+ sns.heatmap(conf_mat, annot=False, cmap='Blues', xticklabels=target_names, yticklabels=target_names)
44
+ plt.title("Confusion Matrix - 20 Newsgroups")
45
+ plt.xlabel("Predicted")
46
+ plt.ylabel("True")
47
+ plt.xticks(rotation=90)
48
+ plt.yticks(rotation=0)
49
+ plt.tight_layout()
50
+ plt.show()
sklearne/data/A3.txt ADDED
@@ -0,0 +1,44 @@
1
+ # A3
2
+ # Design a statistical model to analyze wine quality using Gaussian distribution methods. Utilize synthetic data generated with NumPy or the Wine Quality Dataset
3
+
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import matplotlib.pyplot as plt
8
+ import seaborn as sns
9
+ from scipy.stats import norm
10
+
11
+ # Generate synthetic wine data (only 3 features)
12
+ n = 1000
13
+ fixed_acidity = np.random.normal(7.0, 0.7, n)
14
+ volatile_acidity = np.random.normal(0.5, 0.1, n)
15
+ citric_acid = np.random.normal(0.3, 0.1, n)
16
+
17
+ # Create DataFrame
18
+ df = pd.DataFrame({"fixed_acidity": fixed_acidity, "volatile_acidity": volatile_acidity, "citric_acid": citric_acid})
19
+
20
+ # Generate synthetic wine quality
21
+ df["wine_quality"] = np.clip(
22
+ (0.3 * df["fixed_acidity"] + 1.5 * df["volatile_acidity"] + 0.8 * df["citric_acid"] + np.random.normal(0, 0.5, n)).round().astype(int),
23
+ 3, 8
24
+ )
25
+
26
+ # Display summary
27
+ print(df.describe())
28
+
29
+ # Plot histogram for 'fixed_acidity' with Gaussian fit
30
+ plt.figure(figsize=(10, 6))
31
+ sns.histplot(df['fixed_acidity'], kde=True, stat="density", color="skyblue", bins=30)
32
+
33
+ # Fit a Gaussian distribution to the data
34
+ mu, std = norm.fit(df['fixed_acidity'])
35
+ x = np.linspace(df['fixed_acidity'].min(), df['fixed_acidity'].max(), 100)
36
+ plt.plot(x, norm.pdf(x, mu, std), 'k', lw=2)
37
+
38
+ # Add title and labels
39
+ plt.title(f"Fixed Acidity Distribution ~ N({mu:.2f}, {std:.2f})", fontsize=14)
40
+ plt.xlabel("Fixed Acidity", fontsize=12)
41
+ plt.ylabel("Density", fontsize=12)
42
+
43
+ # Show the plot
44
+ plt.show()
sklearne/data/A4.txt ADDED
@@ -0,0 +1,41 @@
1
+ # A4
2
+ # Develop a classification system for handwritten digit recognition using the MNIST dataset, leveraging Bayes' Decision Theory to optimize decision-making and minimize classification error.
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.datasets import fetch_openml
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.naive_bayes import GaussianNB
9
+ from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
10
+ from sklearn.decomposition import PCA
11
+
12
+ # Dataset Loading
13
+ mnist = fetch_openml('mnist_784', version=1, as_frame=False)
14
+ X, y = mnist.data, mnist.target.astype(np.int8)
15
+ X = X / 255.0 # Normalize pixel values
16
+
17
+ # Reduce dimensionality using PCA
18
+ pca = PCA(n_components=50) # You can try 30–100 and tune this
19
+ X_pca = pca.fit_transform(X)
20
+
21
+ # Model Development (Using GaussianNB as an approximation to Bayes' Decision)
22
+ model = GaussianNB()
23
+
24
+ # Training and Testing
25
+ X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
26
+ model.fit(X_train, y_train)
27
+ y_pred = model.predict(X_test)
28
+
29
+ # Evaluation
30
+ print("\nEvaluation Metrics:")
31
+ print(f"Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
32
+ print("\nClassification Report:\n", classification_report(y_test, y_pred))
33
+
34
+ # Confusion Matrix
35
+ conf_mat = confusion_matrix(y_test, y_pred)
36
+ plt.figure(figsize=(10, 7))
37
+ sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues')
38
+ plt.title("Confusion Matrix")
39
+ plt.xlabel("Predicted")
40
+ plt.ylabel("True")
41
+ plt.show()
sklearne/data/A5.txt ADDED
@@ -0,0 +1,48 @@
1
+ #A5
2
+ #Develop an anomaly detection system for high-dimensional network traffic data using the KDD Cup 1999 dataset
3
+ import pandas as pd
4
+ import numpy as np
5
+ from sklearn.datasets import fetch_kddcup99
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import StandardScaler
8
+ from sklearn.decomposition import PCA
9
+ from sklearn.neighbors import KNeighborsClassifier
10
+ from sklearn.metrics import classification_report
11
+
12
+ # Load only a subset of the data for speed (optional: use 20,000 samples)
13
+ kdd = fetch_kddcup99(percent10=True, shuffle=True)
14
+ X_raw = kdd["data"][:20000]
15
+ y_raw = kdd["target"][:20000]
16
+
17
+ # Convert to DataFrame
18
+ df = pd.DataFrame(X_raw, columns=kdd["feature_names"])
19
+
20
+ # Binary label: 0 for normal, 1 for anomaly
21
+ df["binary_label"] = np.where(y_raw == b'normal.', 0, 1)
22
+
23
+ # One-hot encode categorical columns (with limited unique categories)
24
+ df = pd.get_dummies(df, columns=["protocol_type", "service", "flag"], drop_first=True)
25
+
26
+ # Features and label
27
+ X = df.drop(['binary_label'], axis=1)
28
+ y = df['binary_label']
29
+
30
+ # Scale features
31
+ scaler = StandardScaler()
32
+ X_scaled = scaler.fit_transform(X)
33
+
34
+ # PCA for faster training (keep only 10 components)
35
+ pca = PCA(n_components=10, random_state=42)
36
+ X_reduced = pca.fit_transform(X_scaled)
37
+
38
+ # Train/test split
39
+ X_train, X_test, y_train, y_test = train_test_split(X_reduced, y, test_size=0.2, random_state=42)
40
+
41
+ # KNN with fewer neighbors for speed
42
+ knn = KNeighborsClassifier(n_neighbors=3, n_jobs=-1) # Use all CPU cores
43
+ knn.fit(X_train, y_train)
44
+
45
+ # Prediction & Evaluation
46
+ y_pred = knn.predict(X_test)
47
+ print(f"Accuracy: {knn.score(X_test, y_test) * 100:.2f}%")
48
+ print(classification_report(y_test, y_pred))
sklearne/data/B1.txt ADDED
@@ -0,0 +1,74 @@
1
+ #B1
2
+ # Implement a Hidden Markov Model (HMM) to recognize the sequence of weather patterns (e.g., sunny, cloudy, rainy) based on temperature and humidity observations. Use both discrete and continuous HMMs to compare their performance.
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from hmmlearn import hmm
6
+ from sklearn.preprocessing import KBinsDiscretizer
7
+ import warnings
8
+
9
+ warnings.filterwarnings("ignore")
10
+ np.random.seed(42)
11
+
12
+ # Step 1: Define states
13
+ states = ["Sunny", "Cloudy", "Rainy"]
14
+ n_states = len(states)
15
+
16
+ # Step 2: Transition probabilities
17
+ trans_probs = np.array([
18
+ [0.6, 0.3, 0.1],
19
+ [0.2, 0.5, 0.3],
20
+ [0.1, 0.3, 0.6]
21
+ ])
22
+
23
+ # Step 3: Emission properties (Temp, Humidity)
24
+ means = np.array([[30, 40], [25, 50], [20, 80]])
25
+ covars = np.array([
26
+ [[5, 0], [0, 5]],
27
+ [[4, 0], [0, 4]],
28
+ [[5, 0], [0, 5]]
29
+ ])
30
+
31
+ # Step 4: Generate synthetic data
32
+ n_samples = 300
33
+ hidden_states = np.random.choice(n_states, size=n_samples, p=[0.5, 0.3, 0.2])
34
+ observations = np.array([
35
+ np.random.multivariate_normal(means[s], covars[s]) for s in hidden_states
36
+ ])
37
+
38
+ # Visualize the generated data
39
+ plt.scatter(observations[:, 0], observations[:, 1], c=hidden_states, cmap='viridis')
40
+ plt.xlabel("Temperature")
41
+ plt.ylabel("Humidity")
42
+ plt.title("Synthetic Weather Observations")
43
+ plt.show()
44
+
45
+ # === Discrete HMM (Temperature only) ===
46
+ discretizer = KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='uniform')
47
+ X_discrete = discretizer.fit_transform(observations[:, [0]]).astype(int)
48
+
49
+ model_discrete = hmm.MultinomialHMM(n_components=n_states, n_iter=100)
50
+ model_discrete.fit(X_discrete)
51
+ pred_discrete = model_discrete.predict(X_discrete)
52
+
53
+ # Accuracy (rough comparison)
54
+ acc_discrete = np.mean(pred_discrete == hidden_states)
55
+ print(f"Discrete HMM Accuracy: {acc_discrete:.2f}")
56
+
57
+ # === Continuous HMM (Full Temp & Humidity) ===
58
+ model_continuous = hmm.GaussianHMM(n_components=n_states, covariance_type="full", n_iter=100)
59
+ model_continuous.fit(observations)
60
+ pred_continuous = model_continuous.predict(observations)
61
+
62
+ acc_continuous = np.mean(pred_continuous == hidden_states)
63
+ print(f"Continuous HMM Accuracy: {acc_continuous:.2f}")
64
+
65
+ # Visualize predictions (first 50 steps)
66
+ plt.figure(figsize=(10, 4))
67
+ plt.plot(hidden_states[:50], "bo-", label="True States")
68
+ plt.plot(pred_discrete[:50], "r--", label="Discrete HMM")
69
+ plt.plot(pred_continuous[:50], "g.-", label="Continuous HMM")
70
+ plt.legend()
71
+ plt.title("True vs Predicted Hidden States")
72
+ plt.xlabel("Time Step")
73
+ plt.ylabel("State Index")
74
+ plt.show()
sklearne/data/B2.txt ADDED
@@ -0,0 +1,49 @@
1
+ #B2
2
+ # Build a Discrete Hidden Markov Model (HMM) to analyze DNA sequences and predict gene regions. Use Maximum Likelihood Estimation to train the model with a given dataset of labeled sequences
3
+ import numpy as np
4
+ from hmmlearn import hmm
5
+ from sklearn.preprocessing import LabelEncoder
6
+ import matplotlib.pyplot as plt
7
+
8
+ # Example sequences
9
+ sequences = [
10
+ "ATGCGCGTATCGT", # Mostly gene
11
+ "CGTACGTAGCTA", # Mix
12
+ "TTATTAGCGTTA" # Mostly intergenic
13
+ ]
14
+
15
+ # Corresponding labels (0 = intergenic, 1 = gene)
16
+ labels = [
17
+ [1,1,1,1,1,1,0,0,0,0,0,0,0],
18
+ [0,0,1,1,1,1,0,0,0,1,1,1],
19
+ [0,0,0,0,0,1,1,0,0,0,0,0]
20
+ ]
21
+
22
+ # Flatten sequence and labels for training
23
+ all_seq = ''.join(sequences)
24
+ all_labels = np.concatenate(labels)
25
+
26
+ # Encode DNA characters A/C/G/T to integers 0-3
27
+ le = LabelEncoder()
28
+ le.fit(['A', 'C', 'G', 'T'])
29
+ X = le.transform(list(all_seq)).reshape(-1, 1)
30
+
31
+ # Train HMM (Discrete = MultinomialHMM)
32
+ model = hmm.MultinomialHMM(n_components=2, n_iter=100, tol=0.01)
33
+ model.fit(X)
34
+
35
+ # Predict hidden states
36
+ predicted_states = model.predict(X)
37
+
38
+ # Compare with true labels
39
+ accuracy = np.mean(predicted_states == all_labels)
40
+ print(f"Prediction Accuracy (approx): {accuracy:.2f}")
41
+
42
+ # Visualize true vs predicted
43
+ plt.plot(all_labels[:50], label="True State")
44
+ plt.plot(predicted_states[:50], '--', label="Predicted")
45
+ plt.title("Gene Prediction - HMM")
46
+ plt.xlabel("Sequence Position")
47
+ plt.ylabel("State (0=Intergenic, 1=Gene)")
48
+ plt.legend()
49
+ plt.show()
sklearne/data/B4.txt ADDED
@@ -0,0 +1,44 @@
1
+ # B4
2
+ # Create a program that fits a mixture of Gaussians to a dataset of handwritten digit features and clusters them into distinct groups. Use the Expectation-Maximization method to estimate the parameters of the Gaussian mixture model.
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.datasets import fetch_openml
7
+ from sklearn.decomposition import PCA
8
+ from sklearn.mixture import GaussianMixture
9
+ from sklearn.metrics import adjusted_rand_score
10
+
11
+ # Load MNIST dataset
12
+ mnist = fetch_openml("mnist_784", version=1, as_frame=False)
13
+ X, y = mnist.data, mnist.target.astype(int)
14
+
15
+ # Normalize pixel values
16
+ X = X / 255.0
17
+
18
+ # Reduce dimensionality with PCA (to speed up EM)
19
+ pca = PCA(n_components=50)
20
+ X_pca = pca.fit_transform(X)
21
+
22
+ # Fit Gaussian Mixture Model
23
+ n_components = 10 # Assume 10 digits (0-9)
24
+ gmm = GaussianMixture(n_components=n_components, covariance_type='full', max_iter=100, random_state=42)
25
+ gmm.fit(X_pca)
26
+
27
+ # Predict cluster labels
28
+ cluster_labels = gmm.predict(X_pca)
29
+
30
+ # Optional: Check clustering quality with Adjusted Rand Index
31
+ ari = adjusted_rand_score(y, cluster_labels)
32
+ print(f"Adjusted Rand Index: {ari:.2f}")
33
+
34
+ # Visualize clusters in 2D
35
+ pca_2d = PCA(n_components=2)
36
+ X_2d = pca_2d.fit_transform(X_pca)
37
+
38
+ plt.figure(figsize=(10, 6))
39
+ sns.scatterplot(x=X_2d[:, 0], y=X_2d[:, 1], hue=cluster_labels, palette="tab10", legend="full", s=10)
40
+ plt.title("GMM Clustering of MNIST (2D PCA Projection)")
41
+ plt.xlabel("PCA 1")
42
+ plt.ylabel("PCA 2")
43
+ plt.legend(title="Cluster")
44
+ plt.show()
sklearne/data/B6.txt ADDED
@@ -0,0 +1,76 @@
1
+ #6 Use non-parametric K-Nearest Neighbor (KNN) techniques to classify grayscale images of shapes (e.g., circles, squares, and triangles). #Evaluate and compare the classification accuracy of both methods.
2
+
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.neighbors import KNeighborsClassifier
7
+ from sklearn.metrics import accuracy_score, classification_report
8
+ import cv2 # Required for triangle generation
9
+
10
+ # Function to generate square
11
+ def generate_square(image_size=64):
12
+ img = np.zeros((image_size, image_size), dtype=np.uint8)
13
+ img[16:48, 16:48] = 255 # Create a square in the center
14
+ return img
15
+
16
+ # Function to generate circle
17
+ def generate_circle(image_size=64):
18
+ img = np.zeros((image_size, image_size), dtype=np.uint8)
19
+ y, x = np.ogrid[:image_size, :image_size]
20
+ mask = (x - image_size // 2) ** 2 + (y - image_size // 2) ** 2 <= (image_size // 4) ** 2
21
+ img[mask] = 255
22
+ return img
23
+
24
+ # Function to generate triangle
25
+ def generate_triangle(image_size=64):
26
+ img = np.zeros((image_size, image_size), dtype=np.uint8)
27
+ pts = np.array([[32, 16], [16, 48], [48, 48]], np.int32)
28
+ cv2.fillPoly(img, [pts], 255)
29
+ return img
30
+
31
+ # Function to generate the appropriate shape based on the label
32
+ def generate_shape(shape):
33
+ if shape == 'square':
34
+ return generate_square()
35
+ elif shape == 'circle':
36
+ return generate_circle()
37
+ elif shape == 'triangle':
38
+ return generate_triangle()
39
+
40
+ # Create synthetic dataset
41
+ shapes = ['square', 'circle', 'triangle']
42
+ X = [] # Features (flattened images)
43
+ y = [] # Labels (0: Square, 1: Circle, 2: Triangle)
44
+ n_samples = 1000
45
+
46
+ for shape_idx, shape in enumerate(shapes):
47
+ for _ in range(n_samples // 3):
48
+ img = generate_shape(shape)
49
+ X.append(img.flatten()) # Flatten image to 1D
50
+ y.append(shape_idx) # Assign corresponding label
51
+
52
+ X = np.array(X)
53
+ y = np.array(y)
54
+
55
+ # Split dataset into training and testing sets
56
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
57
+
58
+ # Train KNN classifier
59
+ knn = KNeighborsClassifier(n_neighbors=3)
60
+ knn.fit(X_train, y_train)
61
+
62
+ # Make predictions and evaluate
63
+ y_pred = knn.predict(X_test)
64
+ accuracy = accuracy_score(y_test, y_pred)
65
+
66
+ print(f"Accuracy: {accuracy * 100:.2f}%")
67
+ print("\nClassification Report:\n", classification_report(y_test, y_pred))
68
+
69
+ # Visualize test images with predicted labels
70
+ fig, axes = plt.subplots(1, 5, figsize=(12, 6))
71
+ for i in range(5):
72
+ axes[i].imshow(X_test[i].reshape(64, 64), cmap='gray')
73
+ axes[i].set_title(f"Pred: {shapes[y_pred[i]]}")
74
+ axes[i].axis('off')
75
+
76
+ plt.show()
sklearne/data/B7.txt ADDED
@@ -0,0 +1,55 @@
1
+ #B7
2
+ # Build a Python application to classify iris flowers using the Nearest Neighbor Rule. Use a given dataset with features such as petal length and width. Experiment with different values of K and evaluate the model's accuracy
3
+
4
+
5
+ import numpy as np
6
+ from sklearn.datasets import load_iris
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.neighbors import KNeighborsClassifier
9
+ from sklearn.metrics import accuracy_score, classification_report
10
+ import matplotlib.pyplot as plt
11
+
12
+ # Step 1: Load the Iris dataset
13
+ iris = load_iris()
14
+ X = iris.data # Features (petal length, petal width, etc.)
15
+ y = iris.target # Labels (species of flowers)
16
+
17
+ # Step 2: Split the dataset into training and testing sets
18
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
19
+
20
+ # Step 3: Define a function to train and evaluate the KNN model with different K values
21
+ def evaluate_knn(k_values):
22
+ for k in k_values:
23
+ # Initialize the KNN classifier with a given value of k
24
+ knn = KNeighborsClassifier(n_neighbors=k)
25
+
26
+ # Train the classifier on the training data
27
+ knn.fit(X_train, y_train)
28
+
29
+ # Make predictions on the test data
30
+ y_pred = knn.predict(X_test)
31
+
32
+ # Calculate accuracy
33
+ accuracy = accuracy_score(y_test, y_pred)
34
+ print(f"K={k} - Accuracy: {accuracy * 100:.2f}%")
35
+ print(f"Classification Report for K={k}:\n{classification_report(y_test, y_pred)}")
36
+
37
+ # Step 4: Experiment with different K values
38
+ k_values = [1, 3, 5, 7, 9] # Different values of K to try
39
+ evaluate_knn(k_values)
40
+
41
+ # Optional: Plot the effect of K on accuracy (for visualization)
42
+ accuracies = []
43
+ for k in k_values:
44
+ knn = KNeighborsClassifier(n_neighbors=k)
45
+ knn.fit(X_train, y_train)
46
+ y_pred = knn.predict(X_test)
47
+ accuracies.append(accuracy_score(y_test, y_pred))
48
+
49
+ # Plot accuracy vs K value
50
+ plt.plot(k_values, accuracies, marker='o')
51
+ plt.title('KNN Classifier Accuracy vs K')
52
+ plt.xlabel('K value')
53
+ plt.ylabel('Accuracy')
54
+ plt.grid(True)
55
+ plt.show()
sklearne/loader.py ADDED
@@ -0,0 +1,12 @@
1
+ import os
2
+
3
+ def show_code(filename):
4
+ file_path = os.path.join(os.path.dirname(__file__), 'data', filename)
5
+ if not os.path.exists(file_path):
6
+ return f"File {filename} not found."
7
+
8
+ with open(file_path, 'r') as f:
9
+ content = f.read()
10
+
11
+ print(content) # shows the code in Jupyter output
12
+ return content # also returns content if needed
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.1
2
+ Name: sklearne
3
+ Version: 1.0
4
+ Summary: Saver
5
+ Author: Your Name
@@ -0,0 +1,16 @@
1
+ sklearne/__init__.py,sha256=uk0QwGzWTdxut9oI3_0723e8w0kYC3b__AqlSruVpSU,31
2
+ sklearne/loader.py,sha256=PXerhBcG88Z6FhLV1izCPz_a_jQ2nN10dpE1U834j0Q,379
3
+ sklearne/data/A1.txt,sha256=OD3cGtqSVqfH5XSD0kxd4-yTv4hJ90VJvZ2dGL2Ssqc,2439
4
+ sklearne/data/A2.txt,sha256=ctmfmMhFXZCUXlAFqnEMtUXPYhT2QQinYLovtHtNCXQ,1943
5
+ sklearne/data/A3.txt,sha256=B_pUaofgsAnhovRXOTsiHjIlFoNJulLE6lvr8v1ZJgE,1507
6
+ sklearne/data/A4.txt,sha256=N_PgZKbBUyZqefBf05aQbeaukuyfhUOjtCeGll92gYQ,1560
7
+ sklearne/data/A5.txt,sha256=P-8wK-Hd7B_BMrJ0OsfiYKyfirOy2oF6wm8dW8bnF9Y,1724
8
+ sklearne/data/B1.txt,sha256=XenxamzxPD78T_fOQ15AkNU5sWcrOMWp-Zg60atRX4A,2564
9
+ sklearne/data/B2.txt,sha256=azrRFUVR1Y4DgOkDtsj-z2_PUCoFOAfzRO1NwiC-dYo,1502
10
+ sklearne/data/B4.txt,sha256=P949GYoOoCj5KCM62Q9CrW16mPD7t-W0y4hFSUHMWbk,1565
11
+ sklearne/data/B6.txt,sha256=Aq615q1NecnjT_KYNbKdJaLu6Zis1-ioTyXC-6eRoh0,2728
12
+ sklearne/data/B7.txt,sha256=S3HC0Il-_FHOLGbMvlvNmZb1rtfRHYV8mtxkE08tq7c,2109
13
+ sklearne-1.0.dist-info/METADATA,sha256=2OJbnz8lh-DXOeeolFnJa5tmecLiNxG7L-a8BInXcYQ,88
14
+ sklearne-1.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
15
+ sklearne-1.0.dist-info/top_level.txt,sha256=v7HoQGFOgDk0Ia60xJb3HE_zt-QB6YueCQjJyse8pEY,9
16
+ sklearne-1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.6.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ sklearne