myawesomepkg 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- myawesomepkg/TSAPY1/1 (A) Working with Numpy Arrays.py +1146 -0
- myawesomepkg/TSAPY1/1(B)Aggregation (1).py +319 -0
- myawesomepkg/TSAPY1/1(C) Broadcasting .py +328 -0
- myawesomepkg/TSAPY1/10-A_Load_stringr.py +77 -0
- myawesomepkg/TSAPY1/10-B_Forcats.py +70 -0
- myawesomepkg/TSAPY1/2(a) Comparison, Masking And Boolean Logic (1).py +497 -0
- myawesomepkg/TSAPY1/2(b)Fancy Indexing.py +594 -0
- myawesomepkg/TSAPY1/2(c) Sorting Arrays.py +528 -0
- myawesomepkg/TSAPY1/2(d) Structured Array.py +350 -0
- myawesomepkg/TSAPY1/3 (A) Handling Missing Data.py +1013 -0
- myawesomepkg/TSAPY1/4A_Merge_Joins.py +1209 -0
- myawesomepkg/TSAPY1/9A_Dplyr.py +85 -0
- myawesomepkg/TSAPY1/9B_Tidyr.py +71 -0
- myawesomepkg/TSAPY1/Aggregation_Groupin_Pivot_Filter_Vectorice_Time_Series.py +1999 -0
- myawesomepkg/TSAPY1/Combining_Joins.py +1209 -0
- myawesomepkg/TSAPY1/P4-1-different_distance_methods_(euclidean)_with_prediction,_test_score_and_confusion_matrix1.py +131 -0
- myawesomepkg/TSAPY1/P4-2-k_means_clustering_with_prediction,_test_score_and_confusion_matrix2.py +150 -0
- myawesomepkg/TSAPY1/Pract3_C.py +482 -0
- myawesomepkg/TSAPY1/Pract5_Data_Visualization.py +481 -0
- myawesomepkg/TSAPY1/Practical 6.py +860 -0
- myawesomepkg/TSAPY1/Practical No 1.py +148 -0
- myawesomepkg/TSAPY1/Practical No 2.py +115 -0
- myawesomepkg/TSAPY1/Practical No 3.py +168 -0
- myawesomepkg/TSAPY1/Practical No 4 A.py +233 -0
- myawesomepkg/TSAPY1/Practical No 4 B.py +137 -0
- myawesomepkg/TSAPY1/Practical No 5.py +52 -0
- myawesomepkg/TSAPY1/Practical No 6.py +29 -0
- myawesomepkg/TSAPY1/Practical No 7.py +67 -0
- myawesomepkg/TSAPY1/Practical No 8.py +108 -0
- myawesomepkg/TSAPY1/Print_R.py +123 -0
- myawesomepkg/TSAPY1/R_Graph.py +32 -0
- myawesomepkg/TSAPY1/Working_Ggplot.py +53 -0
- myawesomepkg/TSAPY1/__init__.py +0 -0
- myawesomepkg/TSAPY1/p1_2_pca_iris.py +141 -0
- myawesomepkg/TSAPY1/p2_1_find_s.py +78 -0
- myawesomepkg/TSAPY1/p2_bcandidate_elimination_algorithm_(1).py +85 -0
- myawesomepkg/TSAPY1/p3_1_least_square_regression.py +105 -0
- myawesomepkg/TSAPY1/p3_2_logistic_regression_algorithm.py +79 -0
- myawesomepkg/TSAPY1/p5_1_hierarchical_clustering.py +143 -0
- myawesomepkg/TSAPY1/p5_2_k_nearest_neighbour_algorithm.py +104 -0
- myawesomepkg/TSAPY1/p6_1_id3_algorithm_.py +199 -0
- myawesomepkg/TSAPY1/p7_1_ann_backpropagation_algorithm.py +116 -0
- myawesomepkg/TSAPY1/p7_2_bds_association_rule_mining.py +99 -0
- myawesomepkg/TSAPY1/p8_1_gaussian_naive_bayes_.py +97 -0
- myawesomepkg/TSAPY1/p8_2_naive_bayes_document_classifier.py +111 -0
- myawesomepkg/TSAPY1/p9_1bayesian_network.py +91 -0
- myawesomepkg/TSAPY1/p9_b_loess_regression.py +113 -0
- myawesomepkg/TSAPY1/p_1_test_and_train.py +98 -0
- myawesomepkg/TSAPY1/pract3A-B.py +3212 -0
- myawesomepkg/TSAPY1/practical_no_3.py +167 -0
- myawesomepkg/TSAPY1/practical_no_4.py +215 -0
- myawesomepkg/TSAPY1/practical_no_4b.py +78 -0
- myawesomepkg/TSAPY1/practical_no_5_ac_and_pca.py +39 -0
- myawesomepkg/TSAPY1/practical_no_6.py +37 -0
- myawesomepkg/TSAPY1/practical_no_7.py +69 -0
- myawesomepkg/TSAPY1/practical_no_8.py +79 -0
- myawesomepkg/TSAPY1/tsa_practical_no_1.py +287 -0
- myawesomepkg/TSAPY1/tsa_practical_no_2.py +121 -0
- myawesomepkg/__init__.py +1 -0
- myawesomepkg/core.py +2 -0
- myawesomepkg-0.1.8.dist-info/METADATA +17 -0
- myawesomepkg-0.1.8.dist-info/RECORD +64 -0
- myawesomepkg-0.1.8.dist-info/WHEEL +5 -0
- myawesomepkg-0.1.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""P5-2-k-Nearest Neighbour algorithm.ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/11ueW4m9D1Cgcs82q8_mtcg4Vgh5Qb0zi
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import numpy as np
|
|
12
|
+
from matplotlib import pyplot as plt
|
|
13
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
14
|
+
from sklearn import preprocessing
|
|
15
|
+
from sklearn.model_selection import train_test_split
|
|
16
|
+
iris= pd.read_csv('/content/iris.csv') # Import the iris.csv dataset
|
|
17
|
+
iris.head() #Display the iris dataset
|
|
18
|
+
iris.shape #Display the dimensions of the iris dataset
|
|
19
|
+
iris['variety'].value_counts()#Dislay the no of instances belonging to every value of the dependent variable
|
|
20
|
+
iris.columns #Display the column_headers of the iris dataset
|
|
21
|
+
iris.values #Display the values of the dataset
|
|
22
|
+
X=iris.iloc[:,:4] #Define the independent variable vector X
|
|
23
|
+
X.head()#Display X
|
|
24
|
+
y=iris.iloc[:,-1] #Define the dependent variable y
|
|
25
|
+
y.head()#Display y
|
|
26
|
+
X=preprocessing.StandardScaler().fit_transform(X)#Preprocess the data to achieve a mean of 0 and standard deviation of 1
|
|
27
|
+
X[0:4] #Display the preprocessed data
|
|
28
|
+
X_train, X_test, Y_train, Y_test = train_test_split(X,y,test_size=0.3) #Create the training set and the testing set
|
|
29
|
+
Y_test.shape #Display the dimensions of Y_test
|
|
30
|
+
knnmodel=KNeighborsClassifier(n_neighbors=3) #Build the KNN model for k=3
|
|
31
|
+
knnmodel.fit(X_train,Y_train) #Train the KNN Model
|
|
32
|
+
Y_pred = knnmodel.predict(X_test) #Use the KNN model for predicting the class of the test set
|
|
33
|
+
Y_pred #Display the result of prediction
|
|
34
|
+
|
|
35
|
+
#Calculate the accuracy of the model
|
|
36
|
+
from sklearn.metrics import accuracy_score
|
|
37
|
+
accuracy_score(Y_test,Y_pred)
|
|
38
|
+
|
|
39
|
+
#Constructing the confusion matrix
|
|
40
|
+
from sklearn.metrics import confusion_matrix
|
|
41
|
+
cm=confusion_matrix(Y_test.values,Y_pred)
|
|
42
|
+
cm
|
|
43
|
+
|
|
44
|
+
#Visualization of the output
|
|
45
|
+
|
|
46
|
+
cm1=pd.DataFrame(data=cm,index=['Setosa','Versicolor','Virginica'],columns=['Setosa','Versicolor','Virginica'])
|
|
47
|
+
cm1
|
|
48
|
+
pred_output=pd.DataFrame(data=[Y_test.values,Y_pred],index=['Y_test','Y_pred'])
|
|
49
|
+
pred_output.transpose()
|
|
50
|
+
|
|
51
|
+
# Import necessary libraries
|
|
52
|
+
import pandas as pd
|
|
53
|
+
import numpy as np
|
|
54
|
+
import matplotlib.pyplot as plt
|
|
55
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
56
|
+
from sklearn.preprocessing import StandardScaler
|
|
57
|
+
from sklearn.model_selection import train_test_split
|
|
58
|
+
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
|
|
59
|
+
|
|
60
|
+
# Load the dataset
|
|
61
|
+
iris = pd.read_csv(r"/content/iris.csv")
|
|
62
|
+
|
|
63
|
+
# Display basic information
|
|
64
|
+
print("Dataset Head:\n", iris.head())
|
|
65
|
+
print("\nShape of dataset:", iris.shape)
|
|
66
|
+
print("\nClass distribution:\n", iris['variety'].value_counts())
|
|
67
|
+
|
|
68
|
+
# Split data into features and target
|
|
69
|
+
X = iris.iloc[:, :-1] # Independent variables
|
|
70
|
+
y = iris.iloc[:, -1] # Target variable
|
|
71
|
+
|
|
72
|
+
# Standardize the features
|
|
73
|
+
scaler = StandardScaler()
|
|
74
|
+
X_scaled = scaler.fit_transform(X)
|
|
75
|
+
|
|
76
|
+
# Split into train and test sets
|
|
77
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)
|
|
78
|
+
|
|
79
|
+
# Build and train the KNN model
|
|
80
|
+
knn_model = KNeighborsClassifier(n_neighbors=3)
|
|
81
|
+
knn_model.fit(X_train, y_train)
|
|
82
|
+
|
|
83
|
+
# Make predictions
|
|
84
|
+
y_pred = knn_model.predict(X_test)
|
|
85
|
+
|
|
86
|
+
# Calculate model accuracy
|
|
87
|
+
accuracy = accuracy_score(y_test, y_pred)
|
|
88
|
+
print("\nModel Accuracy: {:.2f}%".format(accuracy * 100))
|
|
89
|
+
|
|
90
|
+
# Construct the confusion matrix
|
|
91
|
+
cm = confusion_matrix(y_test, y_pred)
|
|
92
|
+
cm_df = pd.DataFrame(cm, index=iris['variety'].unique(), columns=iris['variety'].unique())
|
|
93
|
+
print("\nConfusion Matrix:\n", cm_df)
|
|
94
|
+
|
|
95
|
+
# Visualize confusion matrix
|
|
96
|
+
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=iris['variety'].unique())
|
|
97
|
+
disp.plot(cmap='Blues')
|
|
98
|
+
plt.title("Confusion Matrix - KNN Classifier")
|
|
99
|
+
plt.show()
|
|
100
|
+
|
|
101
|
+
# Combine predictions and actual values for comparison
|
|
102
|
+
pred_output = pd.DataFrame({'Actual': y_test.values, 'Predicted': y_pred})
|
|
103
|
+
print("\nPrediction Results:\n", pred_output.head(10))
|
|
104
|
+
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""P6-1-ID3 Algorithm .ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/145Q19eQyjP4BwvvSJCQ3_q12QGmQJ-il
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import warnings
|
|
11
|
+
warnings.filterwarnings('ignore')
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
from pandas import DataFrame
|
|
15
|
+
|
|
16
|
+
df_tennis = pd.DataFrame(data=pd.read_csv('/content/PlayTennis.csv'))
|
|
17
|
+
df_tennis.head()
|
|
18
|
+
|
|
19
|
+
#Entropy formula procedure
|
|
20
|
+
def entropy(probs):
|
|
21
|
+
import math
|
|
22
|
+
return sum([-prob*math.log(prob,2)for prob in probs])
|
|
23
|
+
|
|
24
|
+
#Entropy calculation for the attributes
|
|
25
|
+
def entropy_of_list(a_list): #Input list of class labels
|
|
26
|
+
from collections import Counter
|
|
27
|
+
cnt=Counter(x for x in a_list) #Counter calculates the proportion of class
|
|
28
|
+
num_instances=len(a_list)*1.0 #=14
|
|
29
|
+
probs=[x/num_instances for x in cnt.values()] #x means number of YES/NO 9/14 and 5/14
|
|
30
|
+
return entropy(probs) #Call Entropy
|
|
31
|
+
|
|
32
|
+
#Calculating the Entropy of the target variable
|
|
33
|
+
total_entropy=entropy_of_list(df_tennis['PlayTennis'])
|
|
34
|
+
print('\n The total Entropy is:\n',total_entropy)
|
|
35
|
+
|
|
36
|
+
#Calculating Information Gain
|
|
37
|
+
def information_gain(df,split_attribute_name,target_attribute_name,trace=0):
|
|
38
|
+
df_split=df.groupby(split_attribute_name)#Split datasets into groups or subsets by literals
|
|
39
|
+
nobs=len(df.index)*1.0 #count total no of observations
|
|
40
|
+
#Compute Entropy and Proportion for each group
|
|
41
|
+
df_agg_ent=df_split.agg({target_attribute_name:[entropy_of_list,lambda x:len(x)/nobs]})[target_attribute_name]
|
|
42
|
+
df_agg_ent.columns=['Entropy','PropObservations'] #Renaming columns
|
|
43
|
+
new_entropy=sum(df_agg_ent['Entropy']*df_agg_ent['PropObservations'])
|
|
44
|
+
old_entropy=entropy_of_list(df[target_attribute_name])
|
|
45
|
+
return old_entropy-new_entropy
|
|
46
|
+
|
|
47
|
+
"""Explanation:df_split.agg()
|
|
48
|
+
For each group, aggregate (agg) the target_attribute_name (PlayTennis) using two functions:
|
|
49
|
+
|
|
50
|
+
entropy_of_list → entropy of that group’s labels.
|
|
51
|
+
|
|
52
|
+
lambda x: len(x)/nobs → proportion of total rows that belong to this group.
|
|
53
|
+
|
|
54
|
+
Example for "Outlook":
|
|
55
|
+
|
|
56
|
+
Group "Sunny" (5 rows): entropy ≈ 0.971, proportion = 5/14.
|
|
57
|
+
|
|
58
|
+
Group "Overcast" (4 rows): entropy = 0.0, proportion = 4/14.
|
|
59
|
+
|
|
60
|
+
Group "Rain" (5 rows): entropy ≈ 0.971, proportion = 5/14.
|
|
61
|
+
|
|
62
|
+
The [target_attribute_name] selects only the part of the DataFrame we care about.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def id3(df,target_attribute_name,attribute_names,default_class=None):
|
|
66
|
+
from collections import Counter
|
|
67
|
+
cnt=Counter(x for x in df[target_attribute_name])# Count class labels YES/NO
|
|
68
|
+
if len(cnt)==1:#Pure subset indicating only one class left
|
|
69
|
+
return next(iter(cnt))#next input data set, or raises StopIteration when EOF is hit
|
|
70
|
+
elif df.empty or (not attribute_names):
|
|
71
|
+
return default_class #Return None for empty data set or no attributes left
|
|
72
|
+
else:
|
|
73
|
+
default_class=max(cnt.keys())#Assign the default class as the max between Yes/No
|
|
74
|
+
#Compute Information Gain for each attribute
|
|
75
|
+
gainz=[information_gain(df,attr,target_attribute_name)for attr in attribute_names]
|
|
76
|
+
#Select best attribute with maximum information gain
|
|
77
|
+
index_of_max=gainz.index(max(gainz))#Index of Best_Attribute
|
|
78
|
+
best_attr=attribute_names[index_of_max]
|
|
79
|
+
tree={best_attr:{}}#Initiate the tree with the best attribute as a node
|
|
80
|
+
#After choosing the best attribute, it’s removed from the list
|
|
81
|
+
remaining_attribute_names=[i for i in attribute_names if i!=best_attr]
|
|
82
|
+
#Recursive splitting and building subtrees
|
|
83
|
+
for attr_val,data_subset in df.groupby(best_attr):
|
|
84
|
+
subtree=id3(data_subset,target_attribute_name,remaining_attribute_names,default_class)
|
|
85
|
+
tree[best_attr][attr_val]=subtree # Attach the resulting subtree to the main tree
|
|
86
|
+
return tree
|
|
87
|
+
|
|
88
|
+
attribute_names=list(df_tennis.columns)
|
|
89
|
+
attribute_names.remove('PlayTennis')#Remove the class attirbute
|
|
90
|
+
|
|
91
|
+
#Run Algorithm:
|
|
92
|
+
from pprint import pprint
|
|
93
|
+
tree=id3(df_tennis,'PlayTennis',attribute_names)
|
|
94
|
+
print("\n The resulatnt DECISION TREE is:\n")
|
|
95
|
+
pprint(tree)
|
|
96
|
+
|
|
97
|
+
#Result for new instance
|
|
98
|
+
def classify(instance,tree,default=None):
|
|
99
|
+
attribute=next(iter(tree)) #Get first attribute from the tree
|
|
100
|
+
if instance[attribute] in tree[attribute].keys(): #Check if instance value exists in the tree
|
|
101
|
+
result=tree[attribute][instance[attribute]] #Get the subtree
|
|
102
|
+
if isinstance(result,dict):#Recursive case: if result is still a tree
|
|
103
|
+
return classify(instance,result)
|
|
104
|
+
else:
|
|
105
|
+
return result
|
|
106
|
+
|
|
107
|
+
"""Get subtree:
|
|
108
|
+
If instance["Outlook"] = "Sunny", then
|
|
109
|
+
result = {'Humidity': {'High': 'No', 'Normal': 'Yes'}}
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
#Creation of Training set
|
|
113
|
+
training_data=df_tennis.iloc[0:-4]
|
|
114
|
+
print(training_data)
|
|
115
|
+
|
|
116
|
+
#Test set
|
|
117
|
+
test_data=df_tennis.iloc[-4:]
|
|
118
|
+
print(test_data)
|
|
119
|
+
|
|
120
|
+
train_tree=id3(training_data,'PlayTennis',attribute_names)
|
|
121
|
+
|
|
122
|
+
#Prediction on new Data
|
|
123
|
+
test_data['predicted']=test_data.apply(classify,axis=1,args=(train_tree,'yes'))
|
|
124
|
+
print(test_data['predicted'])
|
|
125
|
+
|
|
126
|
+
import pandas as pd
|
|
127
|
+
import numpy as np
|
|
128
|
+
import math
|
|
129
|
+
from collections import Counter
|
|
130
|
+
from pprint import pprint
|
|
131
|
+
import warnings
|
|
132
|
+
warnings.filterwarnings("ignore")
|
|
133
|
+
|
|
134
|
+
# Load dataset
|
|
135
|
+
df = pd.read_csv(r"/content/PlayTennis.csv")
|
|
136
|
+
print("Dataset:\n", df.head())
|
|
137
|
+
|
|
138
|
+
# ---------- Step 1: Entropy Calculation ----------
|
|
139
|
+
def entropy(values):
|
|
140
|
+
probs = [v / len(values) for v in Counter(values).values()]
|
|
141
|
+
return -sum(p * math.log2(p) for p in probs)
|
|
142
|
+
|
|
143
|
+
# ---------- Step 2: Information Gain ----------
|
|
144
|
+
def information_gain(df, attr, target):
|
|
145
|
+
total_entropy = entropy(df[target])
|
|
146
|
+
vals, counts = np.unique(df[attr], return_counts=True)
|
|
147
|
+
weighted_entropy = sum(
|
|
148
|
+
(counts[i] / len(df)) * entropy(df[df[attr] == vals[i]][target])
|
|
149
|
+
for i in range(len(vals))
|
|
150
|
+
)
|
|
151
|
+
return total_entropy - weighted_entropy
|
|
152
|
+
|
|
153
|
+
# ---------- Step 3: ID3 Algorithm ----------
|
|
154
|
+
def id3(df, target, attributes):
|
|
155
|
+
# Base cases
|
|
156
|
+
if len(df[target].unique()) == 1:
|
|
157
|
+
return df[target].iloc[0]
|
|
158
|
+
if not attributes:
|
|
159
|
+
return df[target].mode()[0]
|
|
160
|
+
|
|
161
|
+
# Choose best attribute
|
|
162
|
+
gains = [information_gain(df, attr, target) for attr in attributes]
|
|
163
|
+
best_attr = attributes[np.argmax(gains)]
|
|
164
|
+
|
|
165
|
+
# Build tree recursively
|
|
166
|
+
tree = {best_attr: {}}
|
|
167
|
+
for val in df[best_attr].unique():
|
|
168
|
+
subset = df[df[best_attr] == val]
|
|
169
|
+
subtree = id3(subset, target, [a for a in attributes if a != best_attr])
|
|
170
|
+
tree[best_attr][val] = subtree
|
|
171
|
+
return tree
|
|
172
|
+
|
|
173
|
+
# ---------- Step 4: Train the Model ----------
|
|
174
|
+
attributes = list(df.columns)
|
|
175
|
+
attributes.remove('PlayTennis')
|
|
176
|
+
tree = id3(df, 'PlayTennis', attributes)
|
|
177
|
+
|
|
178
|
+
print("\nDecision Tree:")
|
|
179
|
+
pprint(tree)
|
|
180
|
+
|
|
181
|
+
# ---------- Step 5: Classification ----------
|
|
182
|
+
def classify(instance, tree):
|
|
183
|
+
attr = next(iter(tree))
|
|
184
|
+
val = instance[attr]
|
|
185
|
+
subtree = tree[attr].get(val, None)
|
|
186
|
+
if isinstance(subtree, dict):
|
|
187
|
+
return classify(instance, subtree)
|
|
188
|
+
return subtree
|
|
189
|
+
|
|
190
|
+
# ---------- Step 6: Train-Test Split ----------
|
|
191
|
+
train = df.iloc[:-4]
|
|
192
|
+
test = df.iloc[-4:]
|
|
193
|
+
|
|
194
|
+
tree_train = id3(train, 'PlayTennis', attributes)
|
|
195
|
+
|
|
196
|
+
# Predict for test set
|
|
197
|
+
test['Predicted'] = test.apply(classify, axis=1, args=(tree_train,))
|
|
198
|
+
print("\nPredictions:\n", test[['PlayTennis', 'Predicted']])
|
|
199
|
+
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""P7-1-ANN Backpropagation algorithm.ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/1m_PxuQ8-4tq235R3C2j8vSIODh7TXBmq
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)#two inputs [sleep,study]
|
|
13
|
+
y = np.array(([92], [86], [89]), dtype=float)#one output expected perecntage in exam
|
|
14
|
+
X = X/np.amax(X,axis=0) #maximum of X array longitudinally
|
|
15
|
+
y = y/100 #Convert to fraction
|
|
16
|
+
|
|
17
|
+
#Variable initialization
|
|
18
|
+
epoch=5 #Setting training iterations
|
|
19
|
+
lr=0.1 #Setting learning rate
|
|
20
|
+
inputlayer_neurons = 2 #number of features in data set
|
|
21
|
+
hiddenlayer_neurons = 3 #number of hidden layers neurons
|
|
22
|
+
output_neurons = 1 #number of neurons at output layer
|
|
23
|
+
|
|
24
|
+
#weight and bias initialization
|
|
25
|
+
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
|
|
26
|
+
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
|
|
27
|
+
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
|
|
28
|
+
bout=np.random.uniform(size=(1,output_neurons))
|
|
29
|
+
|
|
30
|
+
#Sigmoid Function
|
|
31
|
+
def sigmoid (x):
|
|
32
|
+
return 1/(1 + np.exp(-x))
|
|
33
|
+
|
|
34
|
+
#Derivative of Sigmoid Function
|
|
35
|
+
def derivatives_sigmoid(x):
|
|
36
|
+
return x * (1 - x)
|
|
37
|
+
|
|
38
|
+
#draws a random range of numbers uniformly of dim x*y
|
|
39
|
+
for i in range(epoch):
|
|
40
|
+
#Forward Propogation
|
|
41
|
+
hinp1=np.dot(X,wh)
|
|
42
|
+
hinp=hinp1 + bh
|
|
43
|
+
hlayer_act = sigmoid(hinp)
|
|
44
|
+
outinp1=np.dot(hlayer_act,wout)
|
|
45
|
+
outinp= outinp1+bout
|
|
46
|
+
output = sigmoid(outinp)
|
|
47
|
+
#Backpropagation
|
|
48
|
+
EO = y-output
|
|
49
|
+
outgrad = derivatives_sigmoid(output)
|
|
50
|
+
d_output = EO * outgrad
|
|
51
|
+
EH = d_output.dot(wout.T)
|
|
52
|
+
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed to error
|
|
53
|
+
d_hiddenlayer = EH * hiddengrad
|
|
54
|
+
|
|
55
|
+
wout += hlayer_act.T.dot(d_output) *lr # dotproduct of nextlayererror and currentlayerop
|
|
56
|
+
wh += X.T.dot(d_hiddenlayer) *lr
|
|
57
|
+
|
|
58
|
+
print ("-----------Epoch-", i+1, "Starts----------")
|
|
59
|
+
print("Input: \n" + str(X))
|
|
60
|
+
print("Actual Output: \n" + str(y))
|
|
61
|
+
print("Predicted Output: \n" ,output)
|
|
62
|
+
print ("-----------Epoch-", i+1, "Ends----------\n")
|
|
63
|
+
|
|
64
|
+
print("Input: \n" + str(X))
|
|
65
|
+
print("Actual Output: \n" + str(y))
|
|
66
|
+
print("Predicted Output: \n" ,output)
|
|
67
|
+
|
|
68
|
+
import numpy as np
|
|
69
|
+
|
|
70
|
+
# Input (Sleep, Study hours) and Output (Exam %)
|
|
71
|
+
X = np.array([[2, 9], [1, 5], [3, 6]], dtype=float)
|
|
72
|
+
y = np.array([[92], [86], [89]], dtype=float)
|
|
73
|
+
|
|
74
|
+
# Normalize input and output
|
|
75
|
+
X = X / np.max(X, axis=0)
|
|
76
|
+
y = y / 100
|
|
77
|
+
|
|
78
|
+
# Hyperparameters
|
|
79
|
+
epochs = 5
|
|
80
|
+
lr = 0.1
|
|
81
|
+
input_neurons, hidden_neurons, output_neurons = 2, 3, 1
|
|
82
|
+
|
|
83
|
+
# Weight & Bias initialization
|
|
84
|
+
wh = np.random.rand(input_neurons, hidden_neurons)
|
|
85
|
+
bh = np.random.rand(1, hidden_neurons)
|
|
86
|
+
wout = np.random.rand(hidden_neurons, output_neurons)
|
|
87
|
+
bout = np.random.rand(1, output_neurons)
|
|
88
|
+
|
|
89
|
+
# Activation functions
|
|
90
|
+
def sigmoid(x): return 1 / (1 + np.exp(-x))
|
|
91
|
+
def sigmoid_derivative(x): return x * (1 - x)
|
|
92
|
+
|
|
93
|
+
# Training loop
|
|
94
|
+
for epoch in range(epochs):
|
|
95
|
+
# Forward Propagation
|
|
96
|
+
hidden_input = np.dot(X, wh) + bh
|
|
97
|
+
hidden_output = sigmoid(hidden_input)
|
|
98
|
+
final_input = np.dot(hidden_output, wout) + bout
|
|
99
|
+
output = sigmoid(final_input)
|
|
100
|
+
|
|
101
|
+
# Backpropagation
|
|
102
|
+
error = y - output
|
|
103
|
+
d_output = error * sigmoid_derivative(output)
|
|
104
|
+
d_hidden = d_output.dot(wout.T) * sigmoid_derivative(hidden_output)
|
|
105
|
+
|
|
106
|
+
# Weight & bias updates
|
|
107
|
+
wout += hidden_output.T.dot(d_output) * lr
|
|
108
|
+
wh += X.T.dot(d_hidden) * lr
|
|
109
|
+
|
|
110
|
+
# Display progress
|
|
111
|
+
print(f"\n---- Epoch {epoch+1} ----")
|
|
112
|
+
print("Predicted Output:\n", output)
|
|
113
|
+
|
|
114
|
+
# Final results
|
|
115
|
+
print("\nFinal Predicted Output:\n", output)
|
|
116
|
+
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""P7-2 BDS Association Rule Mining.ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/1Uk4gqoyuXgyg-PkXLM8bEVdLG-W-CwAa
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
pip install apyori
|
|
11
|
+
|
|
12
|
+
pip install mlxtend
|
|
13
|
+
|
|
14
|
+
import matplotlib.pyplot as plt
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import numpy as np
|
|
17
|
+
#from apyori import apriori
|
|
18
|
+
|
|
19
|
+
data = pd.read_csv("/content/bread_basket.csv")
|
|
20
|
+
data.head()
|
|
21
|
+
|
|
22
|
+
transactions = []
|
|
23
|
+
# Combine items in the same transaction into one place
|
|
24
|
+
for i in data.Transaction.unique():
|
|
25
|
+
list_trans = list(set(data[data.Transaction == i]["Item"]))
|
|
26
|
+
if len(list_trans) > 0:
|
|
27
|
+
transactions.append(list_trans)
|
|
28
|
+
|
|
29
|
+
print(transactions)
|
|
30
|
+
|
|
31
|
+
"""The Apriori module requires a data frame with values of 0 and 1 or True and False. Therefore, we will use One Hot Encode the data to meet the requirement of the Apriori module given by mlxtend library"""
|
|
32
|
+
|
|
33
|
+
import mlxtend
|
|
34
|
+
from mlxtend.preprocessing import TransactionEncoder
|
|
35
|
+
from mlxtend.frequent_patterns import association_rules, apriori
|
|
36
|
+
|
|
37
|
+
trans_encoding = TransactionEncoder()
|
|
38
|
+
df2 = trans_encoding.fit(transactions).transform(transactions)
|
|
39
|
+
df3 = pd.DataFrame(df2, columns=trans_encoding.columns_)
|
|
40
|
+
|
|
41
|
+
df3.head()
|
|
42
|
+
|
|
43
|
+
"""The frequent sets are found using the apriori function. We will use the min_support = 0.05 , which implies the minimum support required for an itemset to be chosen. Meanwhile,use_colnames = True keeps column names for itemsets to make them more understandable."""
|
|
44
|
+
|
|
45
|
+
frequent_set = apriori(df3, min_support = 0.05, use_colnames = True)
|
|
46
|
+
|
|
47
|
+
frequent_set
|
|
48
|
+
|
|
49
|
+
"""From these frequent sets, We want to find the association rules which determine if A is bought, then B is also purchased"""
|
|
50
|
+
|
|
51
|
+
rules = association_rules(frequent_set, metric = 'lift', min_threshold = 1)
|
|
52
|
+
|
|
53
|
+
rules
|
|
54
|
+
|
|
55
|
+
"""Conclusion:
|
|
56
|
+
As we can see, the results consist of two association rules only. Cake and Coffee are bought more frequently than random with the lift = 1.1 and 53% confidence.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# Install required libraries
|
|
60
|
+
# pip install mlxtend
|
|
61
|
+
|
|
62
|
+
import pandas as pd
|
|
63
|
+
from mlxtend.preprocessing import TransactionEncoder
|
|
64
|
+
from mlxtend.frequent_patterns import apriori, association_rules
|
|
65
|
+
|
|
66
|
+
# Step 1: Load the dataset
|
|
67
|
+
data = pd.read_csv("/content/bread_basket.csv")
|
|
68
|
+
print(data.head())
|
|
69
|
+
|
|
70
|
+
# Step 2: Create a list of transactions
|
|
71
|
+
transactions = []
|
|
72
|
+
for t_id in data["Transaction"].unique():
|
|
73
|
+
items = list(set(data[data["Transaction"] == t_id]["Item"]))
|
|
74
|
+
if items:
|
|
75
|
+
transactions.append(items)
|
|
76
|
+
|
|
77
|
+
print(f"\nTotal Transactions: {len(transactions)}")
|
|
78
|
+
|
|
79
|
+
# Step 3: Convert to one-hot encoded DataFrame
|
|
80
|
+
te = TransactionEncoder()
|
|
81
|
+
encoded_data = te.fit(transactions).transform(transactions)
|
|
82
|
+
df = pd.DataFrame(encoded_data, columns=te.columns_)
|
|
83
|
+
|
|
84
|
+
print("\nOne-hot encoded data sample:")
|
|
85
|
+
print(df.head())
|
|
86
|
+
|
|
87
|
+
# Step 4: Generate frequent itemsets using Apriori
|
|
88
|
+
frequent_items = apriori(df, min_support=0.05, use_colnames=True)
|
|
89
|
+
print("\nFrequent Itemsets:")
|
|
90
|
+
print(frequent_items)
|
|
91
|
+
|
|
92
|
+
# Step 5: Generate association rules
|
|
93
|
+
rules = association_rules(frequent_items, metric="lift", min_threshold=1)
|
|
94
|
+
print("\nAssociation Rules:")
|
|
95
|
+
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
|
|
96
|
+
|
|
97
|
+
# Step 6: Summary
|
|
98
|
+
print("\nConclusion:")
|
|
99
|
+
print("Items like Cake and Coffee are often bought together with high support and confidence.")
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""P8-1 Gaussian Naive Bayes .ipynb
|
|
3
|
+
|
|
4
|
+
Automatically generated by Colab.
|
|
5
|
+
|
|
6
|
+
Original file is located at
|
|
7
|
+
https://colab.research.google.com/drive/1hPQd9ZfvblikDc3KJosyOf3atsItL260
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
#Import dataset
|
|
14
|
+
from sklearn import datasets
|
|
15
|
+
|
|
16
|
+
#Load dataset
|
|
17
|
+
wine = datasets.load_wine()
|
|
18
|
+
print(wine)
|
|
19
|
+
|
|
20
|
+
#print the names of the 13 features
|
|
21
|
+
|
|
22
|
+
print ("Features: ", wine.feature_names)
|
|
23
|
+
|
|
24
|
+
#print the label type of wine
|
|
25
|
+
|
|
26
|
+
print ("Labels: ", wine.target_names)
|
|
27
|
+
|
|
28
|
+
X=pd.DataFrame(wine['data'])
|
|
29
|
+
print(X.head())
|
|
30
|
+
|
|
31
|
+
#Checking the dimensions of the dataset
|
|
32
|
+
print(wine.data.shape)
|
|
33
|
+
|
|
34
|
+
#print the wine labels (0:Class_0, 1:class_2, 2:class_2)
|
|
35
|
+
y=print (wine.target)
|
|
36
|
+
|
|
37
|
+
from sklearn.model_selection import train_test_split
|
|
38
|
+
X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.30,random_state=109)
|
|
39
|
+
|
|
40
|
+
#Import Gaussian Naive Bayes model
|
|
41
|
+
from sklearn.naive_bayes import GaussianNB
|
|
42
|
+
|
|
43
|
+
#Create a Gaussian Classifier
|
|
44
|
+
gnb = GaussianNB()
|
|
45
|
+
|
|
46
|
+
#Train the model using the training sets
|
|
47
|
+
gnb.fit(X_train, y_train)
|
|
48
|
+
|
|
49
|
+
#Predict the response for test dataset
|
|
50
|
+
y_pred = gnb.predict(X_test)
|
|
51
|
+
print(y_pred)
|
|
52
|
+
|
|
53
|
+
#Import scikit-learn metrics module for accuracy calculation
|
|
54
|
+
from sklearn import metrics
|
|
55
|
+
|
|
56
|
+
# Model Accuracy
|
|
57
|
+
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
|
|
58
|
+
|
|
59
|
+
#confusion matrix
|
|
60
|
+
from sklearn.metrics import confusion_matrix
|
|
61
|
+
cm=np.array(confusion_matrix(y_test,y_pred))
|
|
62
|
+
cm
|
|
63
|
+
|
|
64
|
+
# Simple Gaussian Naive Bayes Example using Wine Dataset
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
#my code+++++++++++++
|
|
68
|
+
|
|
69
|
+
from sklearn.datasets import load_wine
|
|
70
|
+
from sklearn.model_selection import train_test_split
|
|
71
|
+
from sklearn.naive_bayes import GaussianNB
|
|
72
|
+
from sklearn.metrics import accuracy_score, confusion_matrix
|
|
73
|
+
|
|
74
|
+
# Load dataset
|
|
75
|
+
wine = load_wine()
|
|
76
|
+
|
|
77
|
+
# Features and labels
|
|
78
|
+
X = wine.data
|
|
79
|
+
y = wine.target
|
|
80
|
+
|
|
81
|
+
# Split the data into training and testing sets
|
|
82
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
83
|
+
|
|
84
|
+
# Create and train the model
|
|
85
|
+
model = GaussianNB()
|
|
86
|
+
model.fit(X_train, y_train)
|
|
87
|
+
|
|
88
|
+
# Predict on test data
|
|
89
|
+
y_pred = model.predict(X_test)
|
|
90
|
+
|
|
91
|
+
# Print results
|
|
92
|
+
print("Feature names:", wine.feature_names)
|
|
93
|
+
print("Target names:", wine.target_names)
|
|
94
|
+
print("\nPredicted labels:", y_pred)
|
|
95
|
+
print("Accuracy:", accuracy_score(y_test, y_pred))
|
|
96
|
+
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
|
|
97
|
+
|