myawesomepkg 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- myawesomepkg/TSAPY1/1 (A) Working with Numpy Arrays.py +1146 -0
- myawesomepkg/TSAPY1/1(B)Aggregation (1).py +319 -0
- myawesomepkg/TSAPY1/1(C) Broadcasting .py +328 -0
- myawesomepkg/TSAPY1/10-A_Load_stringr.py +77 -0
- myawesomepkg/TSAPY1/10-B_Forcats.py +70 -0
- myawesomepkg/TSAPY1/2(a) Comparison, Masking And Boolean Logic (1).py +497 -0
- myawesomepkg/TSAPY1/2(b)Fancy Indexing.py +594 -0
- myawesomepkg/TSAPY1/2(c) Sorting Arrays.py +528 -0
- myawesomepkg/TSAPY1/2(d) Structured Array.py +350 -0
- myawesomepkg/TSAPY1/3 (A) Handling Missing Data.py +1013 -0
- myawesomepkg/TSAPY1/4A_Merge_Joins.py +1209 -0
- myawesomepkg/TSAPY1/9A_Dplyr.py +85 -0
- myawesomepkg/TSAPY1/9B_Tidyr.py +71 -0
- myawesomepkg/TSAPY1/Aggregation_Groupin_Pivot_Filter_Vectorice_Time_Series.py +1999 -0
- myawesomepkg/TSAPY1/Combining_Joins.py +1209 -0
- myawesomepkg/TSAPY1/P4-1-different_distance_methods_(euclidean)_with_prediction,_test_score_and_confusion_matrix1.py +131 -0
- myawesomepkg/TSAPY1/P4-2-k_means_clustering_with_prediction,_test_score_and_confusion_matrix2.py +150 -0
- myawesomepkg/TSAPY1/Pract3_C.py +482 -0
- myawesomepkg/TSAPY1/Pract5_Data_Visualization.py +481 -0
- myawesomepkg/TSAPY1/Practical 6.py +860 -0
- myawesomepkg/TSAPY1/Practical No 1.py +148 -0
- myawesomepkg/TSAPY1/Practical No 2.py +115 -0
- myawesomepkg/TSAPY1/Practical No 3.py +168 -0
- myawesomepkg/TSAPY1/Practical No 4 A.py +233 -0
- myawesomepkg/TSAPY1/Practical No 4 B.py +137 -0
- myawesomepkg/TSAPY1/Practical No 5.py +52 -0
- myawesomepkg/TSAPY1/Practical No 6.py +29 -0
- myawesomepkg/TSAPY1/Practical No 7.py +67 -0
- myawesomepkg/TSAPY1/Practical No 8.py +108 -0
- myawesomepkg/TSAPY1/Print_R.py +123 -0
- myawesomepkg/TSAPY1/R_Graph.py +32 -0
- myawesomepkg/TSAPY1/Working_Ggplot.py +53 -0
- myawesomepkg/TSAPY1/__init__.py +0 -0
- myawesomepkg/TSAPY1/p1_2_pca_iris.py +141 -0
- myawesomepkg/TSAPY1/p2_1_find_s.py +78 -0
- myawesomepkg/TSAPY1/p2_bcandidate_elimination_algorithm_(1).py +85 -0
- myawesomepkg/TSAPY1/p3_1_least_square_regression.py +105 -0
- myawesomepkg/TSAPY1/p3_2_logistic_regression_algorithm.py +79 -0
- myawesomepkg/TSAPY1/p5_1_hierarchical_clustering.py +143 -0
- myawesomepkg/TSAPY1/p5_2_k_nearest_neighbour_algorithm.py +104 -0
- myawesomepkg/TSAPY1/p6_1_id3_algorithm_.py +199 -0
- myawesomepkg/TSAPY1/p7_1_ann_backpropagation_algorithm.py +116 -0
- myawesomepkg/TSAPY1/p7_2_bds_association_rule_mining.py +99 -0
- myawesomepkg/TSAPY1/p8_1_gaussian_naive_bayes_.py +97 -0
- myawesomepkg/TSAPY1/p8_2_naive_bayes_document_classifier.py +111 -0
- myawesomepkg/TSAPY1/p9_1bayesian_network.py +91 -0
- myawesomepkg/TSAPY1/p9_b_loess_regression.py +113 -0
- myawesomepkg/TSAPY1/p_1_test_and_train.py +98 -0
- myawesomepkg/TSAPY1/pract3A-B.py +3212 -0
- myawesomepkg/TSAPY1/practical_no_3.py +167 -0
- myawesomepkg/TSAPY1/practical_no_4.py +215 -0
- myawesomepkg/TSAPY1/practical_no_4b.py +78 -0
- myawesomepkg/TSAPY1/practical_no_5_ac_and_pca.py +39 -0
- myawesomepkg/TSAPY1/practical_no_6.py +37 -0
- myawesomepkg/TSAPY1/practical_no_7.py +69 -0
- myawesomepkg/TSAPY1/practical_no_8.py +79 -0
- myawesomepkg/TSAPY1/tsa_practical_no_1.py +287 -0
- myawesomepkg/TSAPY1/tsa_practical_no_2.py +121 -0
- myawesomepkg/__init__.py +1 -0
- myawesomepkg/core.py +2 -0
- myawesomepkg-0.1.8.dist-info/METADATA +17 -0
- myawesomepkg-0.1.8.dist-info/RECORD +64 -0
- myawesomepkg-0.1.8.dist-info/WHEEL +5 -0
- myawesomepkg-0.1.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""P8-2 Naive Bayes Document Classifier.ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/1R4hGEod7Xzfmett09KGokYk375k1xX5W
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
msg=pd.read_excel('/content/naivetext.xlsx')
|
|
13
|
+
msg
|
|
14
|
+
|
|
15
|
+
print('The dimensions of the dataset',msg.shape)
|
|
16
|
+
|
|
17
|
+
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
|
|
18
|
+
X=msg.message
|
|
19
|
+
y=msg.labelnum
|
|
20
|
+
print(X)
|
|
21
|
+
print(y)
|
|
22
|
+
|
|
23
|
+
#splitting the dataset into train and test data
|
|
24
|
+
from sklearn.model_selection import train_test_split
|
|
25
|
+
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
|
|
26
|
+
|
|
27
|
+
print ('\n The total number of Training Data :',ytrain.shape)
|
|
28
|
+
|
|
29
|
+
print ('\n The total number of Test Data :',ytest.shape)
|
|
30
|
+
|
|
31
|
+
#output of count vectoriser is a sparse matrix
|
|
32
|
+
|
|
33
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
|
34
|
+
count_vect = CountVectorizer()
|
|
35
|
+
xtrain_dtm = count_vect.fit_transform(xtrain)
|
|
36
|
+
xtest_dtm=count_vect.transform(xtest)
|
|
37
|
+
print('\n The words or Tokens in the text documents \n',count_vect.get_feature_names_out())
|
|
38
|
+
|
|
39
|
+
df = pd.DataFrame(xtrain_dtm.toarray(), columns=count_vect.get_feature_names_out())
|
|
40
|
+
|
|
41
|
+
# Training Naive Bayes (NB) classifier on training data
|
|
42
|
+
from sklearn.naive_bayes import MultinomialNB
|
|
43
|
+
|
|
44
|
+
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
|
|
45
|
+
predicted = clf.predict(xtest_dtm)
|
|
46
|
+
|
|
47
|
+
#printing accuracy, Confusion matrix, Precision and Recall
|
|
48
|
+
from sklearn import metrics
|
|
49
|
+
print('\n Accuracy of the classifer is ', metrics.accuracy_score(ytest,predicted))
|
|
50
|
+
|
|
51
|
+
#Checking the prediction made
|
|
52
|
+
print(xtest)
|
|
53
|
+
print(predicted)
|
|
54
|
+
|
|
55
|
+
print('\n Confusion matrix\n', metrics.confusion_matrix(ytest,predicted))
|
|
56
|
+
print('\n The value of Recall' ,
|
|
57
|
+
metrics.recall_score(ytest,predicted))
|
|
58
|
+
|
|
59
|
+
# Simple Naive Bayes Text Classifier
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# my code+++++++++++++++++++
|
|
64
|
+
import pandas as pd
|
|
65
|
+
from sklearn.model_selection import train_test_split
|
|
66
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
|
67
|
+
from sklearn.naive_bayes import MultinomialNB
|
|
68
|
+
from sklearn import metrics
|
|
69
|
+
|
|
70
|
+
# Load dataset
|
|
71
|
+
data = pd.read_excel("/content/naivetext.xlsx")
|
|
72
|
+
print("Dataset Loaded Successfully!")
|
|
73
|
+
print("Shape of Dataset:", data.shape)
|
|
74
|
+
|
|
75
|
+
# Convert labels to numbers (pos = 1, neg = 0)
|
|
76
|
+
data['labelnum'] = data['label'].map({'pos': 1, 'neg': 0})
|
|
77
|
+
|
|
78
|
+
# Split input (X) and output (y)
|
|
79
|
+
X = data['message']
|
|
80
|
+
y = data['labelnum']
|
|
81
|
+
|
|
82
|
+
# Split data into training and testing
|
|
83
|
+
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
|
|
84
|
+
print("\nTraining Samples:", len(x_train))
|
|
85
|
+
print("Testing Samples:", len(x_test))
|
|
86
|
+
|
|
87
|
+
# Convert text into numeric features
|
|
88
|
+
vectorizer = CountVectorizer()
|
|
89
|
+
x_train_dtm = vectorizer.fit_transform(x_train)
|
|
90
|
+
x_test_dtm = vectorizer.transform(x_test)
|
|
91
|
+
|
|
92
|
+
print("\nWords (Tokens):")
|
|
93
|
+
print(vectorizer.get_feature_names_out())
|
|
94
|
+
|
|
95
|
+
# Train Naive Bayes model
|
|
96
|
+
model = MultinomialNB()
|
|
97
|
+
model.fit(x_train_dtm, y_train)
|
|
98
|
+
|
|
99
|
+
# Predict on test data
|
|
100
|
+
y_pred = model.predict(x_test_dtm)
|
|
101
|
+
|
|
102
|
+
# Show results
|
|
103
|
+
print("\nAccuracy:", metrics.accuracy_score(y_test, y_pred))
|
|
104
|
+
print("\nConfusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))
|
|
105
|
+
print("\nRecall:", metrics.recall_score(y_test, y_pred))
|
|
106
|
+
|
|
107
|
+
# Check few predictions
|
|
108
|
+
print("\nSample Predictions:")
|
|
109
|
+
for msg, pred in zip(x_test[:5], y_pred[:5]):
|
|
110
|
+
print(f"Message: {msg} --> Predicted Label: {'pos' if pred == 1 else 'neg'}")
|
|
111
|
+
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""P9-1Bayesian Network.ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/1K14g_pZINxfYSFzX4UtRfyawHgOG9TI1
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
pip install pgmpy
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import csv
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from pgmpy.models import BayesianNetwork
|
|
16
|
+
from pgmpy.estimators import MaximumLikelihoodEstimator
|
|
17
|
+
from pgmpy.inference import VariableElimination
|
|
18
|
+
|
|
19
|
+
#read Cleveland Heart Disease data
|
|
20
|
+
heartDisease = pd.read_csv('/content/heart (1).csv')
|
|
21
|
+
heartDisease = heartDisease.replace('?',np.nan)
|
|
22
|
+
|
|
23
|
+
#display the data
|
|
24
|
+
print('Few examples from the dataset are given below')
|
|
25
|
+
print(heartDisease.head())
|
|
26
|
+
|
|
27
|
+
#Model Bayesian Network
|
|
28
|
+
#ImportError: BayesianNetwork has been deprecated. Please use DiscreteBayesianNetwork instead.
|
|
29
|
+
|
|
30
|
+
model=DiscreteBayesianNetwork([('age','target'),('sex','target'),
|
|
31
|
+
('exang','target'),('cp','target'),
|
|
32
|
+
('target','restecg'),('target','chol')])
|
|
33
|
+
|
|
34
|
+
#Learning CPDs using Maximum Likelihood Estimators
|
|
35
|
+
print('\n Learning CPD using Maximum likelihood estimators')
|
|
36
|
+
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)
|
|
37
|
+
|
|
38
|
+
# Inferencing with Bayesian Network
|
|
39
|
+
print('\n Inferencing with Bayesian Network:')
|
|
40
|
+
HeartDisease_infer = VariableElimination(model)
|
|
41
|
+
|
|
42
|
+
#computing the Probability of RestEcg given HeartDisease is present
|
|
43
|
+
print('\n 1. Probability of HeartDisease given RestEcg')
|
|
44
|
+
q=HeartDisease_infer.query(variables=['restecg'],evidence ={'target':1}, joint=False)
|
|
45
|
+
print(q['restecg'])
|
|
46
|
+
|
|
47
|
+
#computing the Probability of HeartDisease given Chestpain
|
|
48
|
+
print('\n 2. Probability of HeartDisease given Chestpain')
|
|
49
|
+
q=HeartDisease_infer.query(variables=['target'],evidence ={'cp':3}, joint=False)
|
|
50
|
+
print(q['target'])
|
|
51
|
+
|
|
52
|
+
import pandas as pd
|
|
53
|
+
import numpy as np
|
|
54
|
+
from pgmpy.models import DiscreteBayesianNetwork
|
|
55
|
+
from pgmpy.estimators import MaximumLikelihoodEstimator
|
|
56
|
+
from pgmpy.inference import VariableElimination
|
|
57
|
+
|
|
58
|
+
# Load dataset
|
|
59
|
+
data = pd.read_csv('/content/heart (1).csv')
|
|
60
|
+
data = data.replace('?', np.nan)
|
|
61
|
+
|
|
62
|
+
print("Sample data:")
|
|
63
|
+
print(data.head())
|
|
64
|
+
|
|
65
|
+
# Define the Bayesian Network structure using DiscreteBayesianNetwork
|
|
66
|
+
model = DiscreteBayesianNetwork([
|
|
67
|
+
('age', 'target'),
|
|
68
|
+
('sex', 'target'),
|
|
69
|
+
('exang', 'target'),
|
|
70
|
+
('cp', 'target'),
|
|
71
|
+
('target', 'restecg'),
|
|
72
|
+
('target', 'chol')
|
|
73
|
+
])
|
|
74
|
+
|
|
75
|
+
# Fit the model
|
|
76
|
+
model.fit(data, estimator=MaximumLikelihoodEstimator)
|
|
77
|
+
print("\nModel training complete using Maximum Likelihood Estimation")
|
|
78
|
+
|
|
79
|
+
# Inference
|
|
80
|
+
infer = VariableElimination(model)
|
|
81
|
+
|
|
82
|
+
# Query 1: Probability of RestEcg given HeartDisease
|
|
83
|
+
print("\n1. Probability of RestEcg given HeartDisease:")
|
|
84
|
+
q1 = infer.query(variables=['restecg'], evidence={'target': 1})
|
|
85
|
+
print(q1)
|
|
86
|
+
|
|
87
|
+
# Query 2: Probability of HeartDisease given Chest Pain (cp=3)
|
|
88
|
+
print("\n2. Probability of HeartDisease given Chest Pain:")
|
|
89
|
+
q2 = infer.query(variables=['target'], evidence={'cp': 3})
|
|
90
|
+
print(q2)
|
|
91
|
+
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""P9-B-Loess Regression.ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/1acJ-zD2I0flDn0hYujYD4B6NI2hEALAS
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import matplotlib.pyplot as plt
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import numpy as np
|
|
13
|
+
def kernel(point, xmat, k):
|
|
14
|
+
m,n = np.shape(xmat)
|
|
15
|
+
weights = np.asmatrix(np.eye((m)))
|
|
16
|
+
for j in range(m):
|
|
17
|
+
diff = point - xmat[j]
|
|
18
|
+
weights[j,j] = np.exp(diff*diff.T/(-2.0*k**2))
|
|
19
|
+
return weights
|
|
20
|
+
|
|
21
|
+
def localWeight(point, xmat, ymat, k):
|
|
22
|
+
wei = kernel(point,xmat,k)
|
|
23
|
+
W = (xmat.T*(wei*xmat)).I*(xmat.T*(wei*ymat.T))
|
|
24
|
+
return W
|
|
25
|
+
|
|
26
|
+
def localWeightRegression(xmat, ymat, k):
|
|
27
|
+
m,n = np.shape(xmat) #Determine the dimensions of xmat(X)
|
|
28
|
+
ypred = np.zeros(m) #Set ypred
|
|
29
|
+
for i in range(m):
|
|
30
|
+
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
|
|
31
|
+
return ypred
|
|
32
|
+
|
|
33
|
+
# load data points
|
|
34
|
+
data = pd.read_csv('/content/10-dataset.csv')
|
|
35
|
+
bill = np.array(data.total_bill)
|
|
36
|
+
tip = np.array(data.tip)
|
|
37
|
+
|
|
38
|
+
#`np.mat` was removed in the NumPy 2.0 release. Use `np.asmatrix` instead.
|
|
39
|
+
|
|
40
|
+
#preparing and add 1 in bill
|
|
41
|
+
mbill = np.asmatrix(bill)
|
|
42
|
+
mtip = np.asmatrix(tip)
|
|
43
|
+
|
|
44
|
+
m= np.shape(mbill)[1]
|
|
45
|
+
one = np.asmatrix(np.ones(m))
|
|
46
|
+
X = np.hstack((one.T,mbill.T))
|
|
47
|
+
|
|
48
|
+
#set k here
|
|
49
|
+
ypred = localWeightRegression(X,mtip,0.4)
|
|
50
|
+
SortIndex = X[:,1].argsort(0)
|
|
51
|
+
xsort = X[SortIndex][:,0]
|
|
52
|
+
|
|
53
|
+
fig = plt.figure()
|
|
54
|
+
ax = fig.add_subplot(1,1,1)
|
|
55
|
+
ax.scatter(bill,tip, color='green')
|
|
56
|
+
ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5)
|
|
57
|
+
plt.xlabel('Total bill')
|
|
58
|
+
plt.ylabel('Tip')
|
|
59
|
+
plt.show();
|
|
60
|
+
|
|
61
|
+
import numpy as np
|
|
62
|
+
import pandas as pd
|
|
63
|
+
import matplotlib.pyplot as plt
|
|
64
|
+
|
|
65
|
+
# Load dataset
|
|
66
|
+
data = pd.read_csv('/content/10-dataset.csv')
|
|
67
|
+
bill = data['total_bill'].values
|
|
68
|
+
tip = data['tip'].values
|
|
69
|
+
|
|
70
|
+
# Prepare X matrix with intercept term
|
|
71
|
+
X = np.column_stack((np.ones(len(bill)), bill))
|
|
72
|
+
y = tip.reshape(-1, 1)
|
|
73
|
+
|
|
74
|
+
# Gaussian Kernel function to calculate weights
|
|
75
|
+
def kernel(point, X, k):
|
|
76
|
+
m = X.shape[0]
|
|
77
|
+
W = np.eye(m)
|
|
78
|
+
for i in range(m):
|
|
79
|
+
diff = point - X[i]
|
|
80
|
+
W[i,i] = np.exp(-(diff @ diff.T) / (2 * k**2))
|
|
81
|
+
return W
|
|
82
|
+
|
|
83
|
+
# Calculate weights and regression coefficients for a point
|
|
84
|
+
def lwlr_point(point, X, y, k):
|
|
85
|
+
W = kernel(point, X, k)
|
|
86
|
+
theta = np.linalg.inv(X.T @ W @ X) @ (X.T @ W @ y)
|
|
87
|
+
return point @ theta
|
|
88
|
+
|
|
89
|
+
# Predict for all points
|
|
90
|
+
def lwlr(X, y, k):
|
|
91
|
+
y_pred = np.zeros(len(X))
|
|
92
|
+
for i in range(len(X)):
|
|
93
|
+
y_pred[i] = lwlr_point(X[i], X, y, k)
|
|
94
|
+
return y_pred
|
|
95
|
+
|
|
96
|
+
# Apply LWLR with bandwidth k=0.4
|
|
97
|
+
k = 0.4
|
|
98
|
+
y_pred = lwlr(X, y, k)
|
|
99
|
+
|
|
100
|
+
# Sort X for plotting smooth curve
|
|
101
|
+
sort_idx = X[:,1].argsort()
|
|
102
|
+
X_sorted = X[sort_idx]
|
|
103
|
+
y_pred_sorted = y_pred[sort_idx]
|
|
104
|
+
|
|
105
|
+
# Plot results
|
|
106
|
+
plt.scatter(bill, tip, color='green', label='Data points')
|
|
107
|
+
plt.plot(X_sorted[:,1], y_pred_sorted, color='red', linewidth=2, label='LWLR fit')
|
|
108
|
+
plt.xlabel('Total bill')
|
|
109
|
+
plt.ylabel('Tip')
|
|
110
|
+
plt.title('Locally Weighted Linear Regression')
|
|
111
|
+
plt.legend()
|
|
112
|
+
plt.show()
|
|
113
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""P-1_test and train.ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/14lT2R6bt-YH_7p_1kdPFnVQ3YIIiaBLJ
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
#Design a simple machine learning model to train the training instances and test the same
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import matplotlib.pyplot as plt
|
|
13
|
+
df = pd.read_csv("/content/Cars.csv") #Read data
|
|
14
|
+
df.head()
|
|
15
|
+
|
|
16
|
+
plt.scatter(df['Milage'],df['Sell Price']) #Plot to compare Milage(Independant variable) with Sell Price(Dependant variable)
|
|
17
|
+
plt.show()
|
|
18
|
+
plt.scatter(df['Age'],df['Sell Price'])#Plot to compare Age(Independant variable) with Sell Price(Dependant variable)
|
|
19
|
+
plt.show()
|
|
20
|
+
X = df[['Milage','Age']] #Determine X
|
|
21
|
+
Y = df['Sell Price'] # Determine Y
|
|
22
|
+
X #Display X
|
|
23
|
+
Y #Display Y
|
|
24
|
+
from sklearn.model_selection import train_test_split
|
|
25
|
+
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2)#Create a training set and testing set
|
|
26
|
+
len(X_train)
|
|
27
|
+
len(X_test)
|
|
28
|
+
from sklearn.linear_model import LinearRegression
|
|
29
|
+
clf = LinearRegression()
|
|
30
|
+
clf.fit(X_train,Y_train) # Train the model
|
|
31
|
+
clf.predict(X_test) # Use the rained model to predict the testing set
|
|
32
|
+
clf.score(X_test,Y_test) #Calculate the accuracy
|
|
33
|
+
|
|
34
|
+
# my coode
|
|
35
|
+
|
|
36
|
+
# Importing necessary libraries
|
|
37
|
+
import pandas as pd
|
|
38
|
+
import matplotlib.pyplot as plt
|
|
39
|
+
from sklearn.model_selection import train_test_split
|
|
40
|
+
from sklearn.linear_model import LinearRegression
|
|
41
|
+
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
|
|
42
|
+
|
|
43
|
+
# Load dataset
|
|
44
|
+
df = pd.read_csv(r"/content/Cars.csv") # Use raw string for Windows path
|
|
45
|
+
print("✅ Data Loaded Successfully!")
|
|
46
|
+
print(df.head())
|
|
47
|
+
|
|
48
|
+
# Data visualization
|
|
49
|
+
plt.figure(figsize=(10,4))
|
|
50
|
+
plt.subplot(1,2,1)
|
|
51
|
+
plt.scatter(df['Milage'], df['Sell Price'], color='blue')
|
|
52
|
+
plt.title('Milage vs Sell Price')
|
|
53
|
+
plt.xlabel('Milage')
|
|
54
|
+
plt.ylabel('Sell Price')
|
|
55
|
+
|
|
56
|
+
plt.subplot(1,2,2)
|
|
57
|
+
plt.scatter(df['Age'], df['Sell Price'], color='green')
|
|
58
|
+
plt.title('Age vs Sell Price')
|
|
59
|
+
plt.xlabel('Age')
|
|
60
|
+
plt.ylabel('Sell Price')
|
|
61
|
+
|
|
62
|
+
plt.tight_layout()
|
|
63
|
+
plt.show()
|
|
64
|
+
|
|
65
|
+
# Features (X) and Target (Y)
|
|
66
|
+
X = df[['Milage', 'Age']]
|
|
67
|
+
Y = df['Sell Price']
|
|
68
|
+
|
|
69
|
+
# Splitting data into training and testing sets (80% train, 20% test)
|
|
70
|
+
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
|
|
71
|
+
|
|
72
|
+
# Create and train the Linear Regression model
|
|
73
|
+
model = LinearRegression()
|
|
74
|
+
model.fit(X_train, Y_train)
|
|
75
|
+
|
|
76
|
+
# Predict the test set results
|
|
77
|
+
Y_pred = model.predict(X_test)
|
|
78
|
+
|
|
79
|
+
# Evaluate the model
|
|
80
|
+
r2 = r2_score(Y_test, Y_pred)
|
|
81
|
+
mae = mean_absolute_error(Y_test, Y_pred)
|
|
82
|
+
mse = mean_squared_error(Y_test, Y_pred)
|
|
83
|
+
|
|
84
|
+
# Display metrics
|
|
85
|
+
print("\n📊 Model Evaluation Metrics:")
|
|
86
|
+
print(f"R² Score (Accuracy): {r2:.4f}")
|
|
87
|
+
print(f"Mean Absolute Error: {mae:.2f}")
|
|
88
|
+
print(f"Mean Squared Error: {mse:.2f}")
|
|
89
|
+
|
|
90
|
+
# Compare actual vs predicted values visually
|
|
91
|
+
plt.figure(figsize=(6,4))
|
|
92
|
+
plt.scatter(Y_test, Y_pred, color='purple')
|
|
93
|
+
plt.xlabel('Actual Sell Price')
|
|
94
|
+
plt.ylabel('Predicted Sell Price')
|
|
95
|
+
plt.title('Actual vs Predicted Sell Price')
|
|
96
|
+
plt.grid(True)
|
|
97
|
+
plt.show()
|
|
98
|
+
|