myawesomepkg 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. myawesomepkg/TSAPY1/1 (A) Working with Numpy Arrays.py +1146 -0
  2. myawesomepkg/TSAPY1/1(B)Aggregation (1).py +319 -0
  3. myawesomepkg/TSAPY1/1(C) Broadcasting .py +328 -0
  4. myawesomepkg/TSAPY1/10-A_Load_stringr.py +77 -0
  5. myawesomepkg/TSAPY1/10-B_Forcats.py +70 -0
  6. myawesomepkg/TSAPY1/2(a) Comparison, Masking And Boolean Logic (1).py +497 -0
  7. myawesomepkg/TSAPY1/2(b)Fancy Indexing.py +594 -0
  8. myawesomepkg/TSAPY1/2(c) Sorting Arrays.py +528 -0
  9. myawesomepkg/TSAPY1/2(d) Structured Array.py +350 -0
  10. myawesomepkg/TSAPY1/3 (A) Handling Missing Data.py +1013 -0
  11. myawesomepkg/TSAPY1/4A_Merge_Joins.py +1209 -0
  12. myawesomepkg/TSAPY1/9A_Dplyr.py +85 -0
  13. myawesomepkg/TSAPY1/9B_Tidyr.py +71 -0
  14. myawesomepkg/TSAPY1/Aggregation_Groupin_Pivot_Filter_Vectorice_Time_Series.py +1999 -0
  15. myawesomepkg/TSAPY1/Combining_Joins.py +1209 -0
  16. myawesomepkg/TSAPY1/P4-1-different_distance_methods_(euclidean)_with_prediction,_test_score_and_confusion_matrix1.py +131 -0
  17. myawesomepkg/TSAPY1/P4-2-k_means_clustering_with_prediction,_test_score_and_confusion_matrix2.py +150 -0
  18. myawesomepkg/TSAPY1/Pract3_C.py +482 -0
  19. myawesomepkg/TSAPY1/Pract5_Data_Visualization.py +481 -0
  20. myawesomepkg/TSAPY1/Practical 6.py +860 -0
  21. myawesomepkg/TSAPY1/Practical No 1.py +148 -0
  22. myawesomepkg/TSAPY1/Practical No 2.py +115 -0
  23. myawesomepkg/TSAPY1/Practical No 3.py +168 -0
  24. myawesomepkg/TSAPY1/Practical No 4 A.py +233 -0
  25. myawesomepkg/TSAPY1/Practical No 4 B.py +137 -0
  26. myawesomepkg/TSAPY1/Practical No 5.py +52 -0
  27. myawesomepkg/TSAPY1/Practical No 6.py +29 -0
  28. myawesomepkg/TSAPY1/Practical No 7.py +67 -0
  29. myawesomepkg/TSAPY1/Practical No 8.py +108 -0
  30. myawesomepkg/TSAPY1/Print_R.py +123 -0
  31. myawesomepkg/TSAPY1/R_Graph.py +32 -0
  32. myawesomepkg/TSAPY1/Working_Ggplot.py +53 -0
  33. myawesomepkg/TSAPY1/__init__.py +0 -0
  34. myawesomepkg/TSAPY1/p1_2_pca_iris.py +141 -0
  35. myawesomepkg/TSAPY1/p2_1_find_s.py +78 -0
  36. myawesomepkg/TSAPY1/p2_bcandidate_elimination_algorithm_(1).py +85 -0
  37. myawesomepkg/TSAPY1/p3_1_least_square_regression.py +105 -0
  38. myawesomepkg/TSAPY1/p3_2_logistic_regression_algorithm.py +79 -0
  39. myawesomepkg/TSAPY1/p5_1_hierarchical_clustering.py +143 -0
  40. myawesomepkg/TSAPY1/p5_2_k_nearest_neighbour_algorithm.py +104 -0
  41. myawesomepkg/TSAPY1/p6_1_id3_algorithm_.py +199 -0
  42. myawesomepkg/TSAPY1/p7_1_ann_backpropagation_algorithm.py +116 -0
  43. myawesomepkg/TSAPY1/p7_2_bds_association_rule_mining.py +99 -0
  44. myawesomepkg/TSAPY1/p8_1_gaussian_naive_bayes_.py +97 -0
  45. myawesomepkg/TSAPY1/p8_2_naive_bayes_document_classifier.py +111 -0
  46. myawesomepkg/TSAPY1/p9_1bayesian_network.py +91 -0
  47. myawesomepkg/TSAPY1/p9_b_loess_regression.py +113 -0
  48. myawesomepkg/TSAPY1/p_1_test_and_train.py +98 -0
  49. myawesomepkg/TSAPY1/pract3A-B.py +3212 -0
  50. myawesomepkg/TSAPY1/practical_no_3.py +167 -0
  51. myawesomepkg/TSAPY1/practical_no_4.py +215 -0
  52. myawesomepkg/TSAPY1/practical_no_4b.py +78 -0
  53. myawesomepkg/TSAPY1/practical_no_5_ac_and_pca.py +39 -0
  54. myawesomepkg/TSAPY1/practical_no_6.py +37 -0
  55. myawesomepkg/TSAPY1/practical_no_7.py +69 -0
  56. myawesomepkg/TSAPY1/practical_no_8.py +79 -0
  57. myawesomepkg/TSAPY1/tsa_practical_no_1.py +287 -0
  58. myawesomepkg/TSAPY1/tsa_practical_no_2.py +121 -0
  59. myawesomepkg/__init__.py +1 -0
  60. myawesomepkg/core.py +2 -0
  61. myawesomepkg-0.1.8.dist-info/METADATA +17 -0
  62. myawesomepkg-0.1.8.dist-info/RECORD +64 -0
  63. myawesomepkg-0.1.8.dist-info/WHEEL +5 -0
  64. myawesomepkg-0.1.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,104 @@
1
+ # -*- coding: utf-8 -*-
2
+ """P5-2-k-Nearest Neighbour algorithm.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/11ueW4m9D1Cgcs82q8_mtcg4Vgh5Qb0zi
8
+ """
9
+
10
+ import pandas as pd
11
+ import numpy as np
12
+ from matplotlib import pyplot as plt
13
+ from sklearn.neighbors import KNeighborsClassifier
14
+ from sklearn import preprocessing
15
+ from sklearn.model_selection import train_test_split
16
+ iris= pd.read_csv('/content/iris.csv') # Import the iris.csv dataset
17
+ iris.head() #Display the iris dataset
18
+ iris.shape #Display the dimensions of the iris dataset
19
+ iris['variety'].value_counts()#Dislay the no of instances belonging to every value of the dependent variable
20
+ iris.columns #Display the column_headers of the iris dataset
21
+ iris.values #Display the values of the dataset
22
+ X=iris.iloc[:,:4] #Define the independent variable vector X
23
+ X.head()#Display X
24
+ y=iris.iloc[:,-1] #Define the dependent variable y
25
+ y.head()#Display y
26
+ X=preprocessing.StandardScaler().fit_transform(X)#Preprocess the data to achieve a mean of 0 and standard deviation of 1
27
+ X[0:4] #Display the preprocessed data
28
+ X_train, X_test, Y_train, Y_test = train_test_split(X,y,test_size=0.3) #Create the training set and the testing set
29
+ Y_test.shape #Display the dimensions of Y_test
30
+ knnmodel=KNeighborsClassifier(n_neighbors=3) #Build the KNN model for k=3
31
+ knnmodel.fit(X_train,Y_train) #Train the KNN Model
32
+ Y_pred = knnmodel.predict(X_test) #Use the KNN model for predicting the class of the test set
33
+ Y_pred #Display the result of prediction
34
+
35
+ #Calculate the accuracy of the model
36
+ from sklearn.metrics import accuracy_score
37
+ accuracy_score(Y_test,Y_pred)
38
+
39
+ #Constructing the confusion matrix
40
+ from sklearn.metrics import confusion_matrix
41
+ cm=confusion_matrix(Y_test.values,Y_pred)
42
+ cm
43
+
44
+ #Visualization of the output
45
+
46
+ cm1=pd.DataFrame(data=cm,index=['Setosa','Versicolor','Virginica'],columns=['Setosa','Versicolor','Virginica'])
47
+ cm1
48
+ pred_output=pd.DataFrame(data=[Y_test.values,Y_pred],index=['Y_test','Y_pred'])
49
+ pred_output.transpose()
50
+
51
+ # Import necessary libraries
52
+ import pandas as pd
53
+ import numpy as np
54
+ import matplotlib.pyplot as plt
55
+ from sklearn.neighbors import KNeighborsClassifier
56
+ from sklearn.preprocessing import StandardScaler
57
+ from sklearn.model_selection import train_test_split
58
+ from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
59
+
60
+ # Load the dataset
61
+ iris = pd.read_csv(r"/content/iris.csv")
62
+
63
+ # Display basic information
64
+ print("Dataset Head:\n", iris.head())
65
+ print("\nShape of dataset:", iris.shape)
66
+ print("\nClass distribution:\n", iris['variety'].value_counts())
67
+
68
+ # Split data into features and target
69
+ X = iris.iloc[:, :-1] # Independent variables
70
+ y = iris.iloc[:, -1] # Target variable
71
+
72
+ # Standardize the features
73
+ scaler = StandardScaler()
74
+ X_scaled = scaler.fit_transform(X)
75
+
76
+ # Split into train and test sets
77
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)
78
+
79
+ # Build and train the KNN model
80
+ knn_model = KNeighborsClassifier(n_neighbors=3)
81
+ knn_model.fit(X_train, y_train)
82
+
83
+ # Make predictions
84
+ y_pred = knn_model.predict(X_test)
85
+
86
+ # Calculate model accuracy
87
+ accuracy = accuracy_score(y_test, y_pred)
88
+ print("\nModel Accuracy: {:.2f}%".format(accuracy * 100))
89
+
90
+ # Construct the confusion matrix
91
+ cm = confusion_matrix(y_test, y_pred)
92
+ cm_df = pd.DataFrame(cm, index=iris['variety'].unique(), columns=iris['variety'].unique())
93
+ print("\nConfusion Matrix:\n", cm_df)
94
+
95
+ # Visualize confusion matrix
96
+ disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=iris['variety'].unique())
97
+ disp.plot(cmap='Blues')
98
+ plt.title("Confusion Matrix - KNN Classifier")
99
+ plt.show()
100
+
101
+ # Combine predictions and actual values for comparison
102
+ pred_output = pd.DataFrame({'Actual': y_test.values, 'Predicted': y_pred})
103
+ print("\nPrediction Results:\n", pred_output.head(10))
104
+
@@ -0,0 +1,199 @@
1
+ # -*- coding: utf-8 -*-
2
+ """P6-1-ID3 Algorithm .ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/145Q19eQyjP4BwvvSJCQ3_q12QGmQJ-il
8
+ """
9
+
10
+ import warnings
11
+ warnings.filterwarnings('ignore')
12
+
13
+ import pandas as pd
14
+ from pandas import DataFrame
15
+
16
+ df_tennis = pd.DataFrame(data=pd.read_csv('/content/PlayTennis.csv'))
17
+ df_tennis.head()
18
+
19
+ #Entropy formula procedure
20
+ def entropy(probs):
21
+ import math
22
+ return sum([-prob*math.log(prob,2)for prob in probs])
23
+
24
+ #Entropy calculation for the attributes
25
+ def entropy_of_list(a_list): #Input list of class labels
26
+ from collections import Counter
27
+ cnt=Counter(x for x in a_list) #Counter calculates the proportion of class
28
+ num_instances=len(a_list)*1.0 #=14
29
+ probs=[x/num_instances for x in cnt.values()] #x means number of YES/NO 9/14 and 5/14
30
+ return entropy(probs) #Call Entropy
31
+
32
+ #Calculating the Entropy of the target variable
33
+ total_entropy=entropy_of_list(df_tennis['PlayTennis'])
34
+ print('\n The total Entropy is:\n',total_entropy)
35
+
36
+ #Calculating Information Gain
37
+ def information_gain(df,split_attribute_name,target_attribute_name,trace=0):
38
+ df_split=df.groupby(split_attribute_name)#Split datasets into groups or subsets by literals
39
+ nobs=len(df.index)*1.0 #count total no of observations
40
+ #Compute Entropy and Proportion for each group
41
+ df_agg_ent=df_split.agg({target_attribute_name:[entropy_of_list,lambda x:len(x)/nobs]})[target_attribute_name]
42
+ df_agg_ent.columns=['Entropy','PropObservations'] #Renaming columns
43
+ new_entropy=sum(df_agg_ent['Entropy']*df_agg_ent['PropObservations'])
44
+ old_entropy=entropy_of_list(df[target_attribute_name])
45
+ return old_entropy-new_entropy
46
+
47
+ """Explanation:df_split.agg()
48
+ For each group, aggregate (agg) the target_attribute_name (PlayTennis) using two functions:
49
+
50
+ entropy_of_list → entropy of that group’s labels.
51
+
52
+ lambda x: len(x)/nobs → proportion of total rows that belong to this group.
53
+
54
+ Example for "Outlook":
55
+
56
+ Group "Sunny" (5 rows): entropy ≈ 0.971, proportion = 5/14.
57
+
58
+ Group "Overcast" (4 rows): entropy = 0.0, proportion = 4/14.
59
+
60
+ Group "Rain" (5 rows): entropy ≈ 0.971, proportion = 5/14.
61
+
62
+ The [target_attribute_name] selects only the part of the DataFrame we care about.
63
+ """
64
+
65
+ def id3(df,target_attribute_name,attribute_names,default_class=None):
66
+ from collections import Counter
67
+ cnt=Counter(x for x in df[target_attribute_name])# Count class labels YES/NO
68
+ if len(cnt)==1:#Pure subset indicating only one class left
69
+ return next(iter(cnt))#next input data set, or raises StopIteration when EOF is hit
70
+ elif df.empty or (not attribute_names):
71
+ return default_class #Return None for empty data set or no attributes left
72
+ else:
73
+ default_class=max(cnt.keys())#Assign the default class as the max between Yes/No
74
+ #Compute Information Gain for each attribute
75
+ gainz=[information_gain(df,attr,target_attribute_name)for attr in attribute_names]
76
+ #Select best attribute with maximum information gain
77
+ index_of_max=gainz.index(max(gainz))#Index of Best_Attribute
78
+ best_attr=attribute_names[index_of_max]
79
+ tree={best_attr:{}}#Initiate the tree with the best attribute as a node
80
+ #After choosing the best attribute, it’s removed from the list
81
+ remaining_attribute_names=[i for i in attribute_names if i!=best_attr]
82
+ #Recursive splitting and building subtrees
83
+ for attr_val,data_subset in df.groupby(best_attr):
84
+ subtree=id3(data_subset,target_attribute_name,remaining_attribute_names,default_class)
85
+ tree[best_attr][attr_val]=subtree # Attach the resulting subtree to the main tree
86
+ return tree
87
+
88
+ attribute_names=list(df_tennis.columns)
89
+ attribute_names.remove('PlayTennis')#Remove the class attirbute
90
+
91
+ #Run Algorithm:
92
+ from pprint import pprint
93
+ tree=id3(df_tennis,'PlayTennis',attribute_names)
94
+ print("\n The resulatnt DECISION TREE is:\n")
95
+ pprint(tree)
96
+
97
+ #Result for new instance
98
+ def classify(instance,tree,default=None):
99
+ attribute=next(iter(tree)) #Get first attribute from the tree
100
+ if instance[attribute] in tree[attribute].keys(): #Check if instance value exists in the tree
101
+ result=tree[attribute][instance[attribute]] #Get the subtree
102
+ if isinstance(result,dict):#Recursive case: if result is still a tree
103
+ return classify(instance,result)
104
+ else:
105
+ return result
106
+
107
+ """Get subtree:
108
+ If instance["Outlook"] = "Sunny", then
109
+ result = {'Humidity': {'High': 'No', 'Normal': 'Yes'}}
110
+ """
111
+
112
+ #Creation of Training set
113
+ training_data=df_tennis.iloc[0:-4]
114
+ print(training_data)
115
+
116
+ #Test set
117
+ test_data=df_tennis.iloc[-4:]
118
+ print(test_data)
119
+
120
+ train_tree=id3(training_data,'PlayTennis',attribute_names)
121
+
122
+ #Prediction on new Data
123
+ test_data['predicted']=test_data.apply(classify,axis=1,args=(train_tree,'yes'))
124
+ print(test_data['predicted'])
125
+
126
+ import pandas as pd
127
+ import numpy as np
128
+ import math
129
+ from collections import Counter
130
+ from pprint import pprint
131
+ import warnings
132
+ warnings.filterwarnings("ignore")
133
+
134
+ # Load dataset
135
+ df = pd.read_csv(r"/content/PlayTennis.csv")
136
+ print("Dataset:\n", df.head())
137
+
138
+ # ---------- Step 1: Entropy Calculation ----------
139
+ def entropy(values):
140
+ probs = [v / len(values) for v in Counter(values).values()]
141
+ return -sum(p * math.log2(p) for p in probs)
142
+
143
+ # ---------- Step 2: Information Gain ----------
144
+ def information_gain(df, attr, target):
145
+ total_entropy = entropy(df[target])
146
+ vals, counts = np.unique(df[attr], return_counts=True)
147
+ weighted_entropy = sum(
148
+ (counts[i] / len(df)) * entropy(df[df[attr] == vals[i]][target])
149
+ for i in range(len(vals))
150
+ )
151
+ return total_entropy - weighted_entropy
152
+
153
+ # ---------- Step 3: ID3 Algorithm ----------
154
+ def id3(df, target, attributes):
155
+ # Base cases
156
+ if len(df[target].unique()) == 1:
157
+ return df[target].iloc[0]
158
+ if not attributes:
159
+ return df[target].mode()[0]
160
+
161
+ # Choose best attribute
162
+ gains = [information_gain(df, attr, target) for attr in attributes]
163
+ best_attr = attributes[np.argmax(gains)]
164
+
165
+ # Build tree recursively
166
+ tree = {best_attr: {}}
167
+ for val in df[best_attr].unique():
168
+ subset = df[df[best_attr] == val]
169
+ subtree = id3(subset, target, [a for a in attributes if a != best_attr])
170
+ tree[best_attr][val] = subtree
171
+ return tree
172
+
173
+ # ---------- Step 4: Train the Model ----------
174
+ attributes = list(df.columns)
175
+ attributes.remove('PlayTennis')
176
+ tree = id3(df, 'PlayTennis', attributes)
177
+
178
+ print("\nDecision Tree:")
179
+ pprint(tree)
180
+
181
+ # ---------- Step 5: Classification ----------
182
+ def classify(instance, tree):
183
+ attr = next(iter(tree))
184
+ val = instance[attr]
185
+ subtree = tree[attr].get(val, None)
186
+ if isinstance(subtree, dict):
187
+ return classify(instance, subtree)
188
+ return subtree
189
+
190
+ # ---------- Step 6: Train-Test Split ----------
191
+ train = df.iloc[:-4]
192
+ test = df.iloc[-4:]
193
+
194
+ tree_train = id3(train, 'PlayTennis', attributes)
195
+
196
+ # Predict for test set
197
+ test['Predicted'] = test.apply(classify, axis=1, args=(tree_train,))
198
+ print("\nPredictions:\n", test[['PlayTennis', 'Predicted']])
199
+
@@ -0,0 +1,116 @@
1
+ # -*- coding: utf-8 -*-
2
+ """P7-1-ANN Backpropagation algorithm.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1m_PxuQ8-4tq235R3C2j8vSIODh7TXBmq
8
+ """
9
+
10
+ import numpy as np
11
+
12
+ X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)#two inputs [sleep,study]
13
+ y = np.array(([92], [86], [89]), dtype=float)#one output expected perecntage in exam
14
+ X = X/np.amax(X,axis=0) #maximum of X array longitudinally
15
+ y = y/100 #Convert to fraction
16
+
17
+ #Variable initialization
18
+ epoch=5 #Setting training iterations
19
+ lr=0.1 #Setting learning rate
20
+ inputlayer_neurons = 2 #number of features in data set
21
+ hiddenlayer_neurons = 3 #number of hidden layers neurons
22
+ output_neurons = 1 #number of neurons at output layer
23
+
24
+ #weight and bias initialization
25
+ wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
26
+ bh=np.random.uniform(size=(1,hiddenlayer_neurons))
27
+ wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
28
+ bout=np.random.uniform(size=(1,output_neurons))
29
+
30
+ #Sigmoid Function
31
+ def sigmoid (x):
32
+ return 1/(1 + np.exp(-x))
33
+
34
+ #Derivative of Sigmoid Function
35
+ def derivatives_sigmoid(x):
36
+ return x * (1 - x)
37
+
38
+ #draws a random range of numbers uniformly of dim x*y
39
+ for i in range(epoch):
40
+ #Forward Propogation
41
+ hinp1=np.dot(X,wh)
42
+ hinp=hinp1 + bh
43
+ hlayer_act = sigmoid(hinp)
44
+ outinp1=np.dot(hlayer_act,wout)
45
+ outinp= outinp1+bout
46
+ output = sigmoid(outinp)
47
+ #Backpropagation
48
+ EO = y-output
49
+ outgrad = derivatives_sigmoid(output)
50
+ d_output = EO * outgrad
51
+ EH = d_output.dot(wout.T)
52
+ hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer wts contributed to error
53
+ d_hiddenlayer = EH * hiddengrad
54
+
55
+ wout += hlayer_act.T.dot(d_output) *lr # dotproduct of nextlayererror and currentlayerop
56
+ wh += X.T.dot(d_hiddenlayer) *lr
57
+
58
+ print ("-----------Epoch-", i+1, "Starts----------")
59
+ print("Input: \n" + str(X))
60
+ print("Actual Output: \n" + str(y))
61
+ print("Predicted Output: \n" ,output)
62
+ print ("-----------Epoch-", i+1, "Ends----------\n")
63
+
64
+ print("Input: \n" + str(X))
65
+ print("Actual Output: \n" + str(y))
66
+ print("Predicted Output: \n" ,output)
67
+
68
+ import numpy as np
69
+
70
+ # Input (Sleep, Study hours) and Output (Exam %)
71
+ X = np.array([[2, 9], [1, 5], [3, 6]], dtype=float)
72
+ y = np.array([[92], [86], [89]], dtype=float)
73
+
74
+ # Normalize input and output
75
+ X = X / np.max(X, axis=0)
76
+ y = y / 100
77
+
78
+ # Hyperparameters
79
+ epochs = 5
80
+ lr = 0.1
81
+ input_neurons, hidden_neurons, output_neurons = 2, 3, 1
82
+
83
+ # Weight & Bias initialization
84
+ wh = np.random.rand(input_neurons, hidden_neurons)
85
+ bh = np.random.rand(1, hidden_neurons)
86
+ wout = np.random.rand(hidden_neurons, output_neurons)
87
+ bout = np.random.rand(1, output_neurons)
88
+
89
+ # Activation functions
90
+ def sigmoid(x): return 1 / (1 + np.exp(-x))
91
+ def sigmoid_derivative(x): return x * (1 - x)
92
+
93
+ # Training loop
94
+ for epoch in range(epochs):
95
+ # Forward Propagation
96
+ hidden_input = np.dot(X, wh) + bh
97
+ hidden_output = sigmoid(hidden_input)
98
+ final_input = np.dot(hidden_output, wout) + bout
99
+ output = sigmoid(final_input)
100
+
101
+ # Backpropagation
102
+ error = y - output
103
+ d_output = error * sigmoid_derivative(output)
104
+ d_hidden = d_output.dot(wout.T) * sigmoid_derivative(hidden_output)
105
+
106
+ # Weight & bias updates
107
+ wout += hidden_output.T.dot(d_output) * lr
108
+ wh += X.T.dot(d_hidden) * lr
109
+
110
+ # Display progress
111
+ print(f"\n---- Epoch {epoch+1} ----")
112
+ print("Predicted Output:\n", output)
113
+
114
+ # Final results
115
+ print("\nFinal Predicted Output:\n", output)
116
+
@@ -0,0 +1,99 @@
1
+ # -*- coding: utf-8 -*-
2
+ """P7-2 BDS Association Rule Mining.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1Uk4gqoyuXgyg-PkXLM8bEVdLG-W-CwAa
8
+ """
9
+
10
+ pip install apyori
11
+
12
+ pip install mlxtend
13
+
14
+ import matplotlib.pyplot as plt
15
+ import pandas as pd
16
+ import numpy as np
17
+ #from apyori import apriori
18
+
19
+ data = pd.read_csv("/content/bread_basket.csv")
20
+ data.head()
21
+
22
+ transactions = []
23
+ # Combine items in the same transaction into one place
24
+ for i in data.Transaction.unique():
25
+ list_trans = list(set(data[data.Transaction == i]["Item"]))
26
+ if len(list_trans) > 0:
27
+ transactions.append(list_trans)
28
+
29
+ print(transactions)
30
+
31
+ """The Apriori module requires a data frame with values of 0 and 1 or True and False. Therefore, we will use One Hot Encode the data to meet the requirement of the Apriori module given by mlxtend library"""
32
+
33
+ import mlxtend
34
+ from mlxtend.preprocessing import TransactionEncoder
35
+ from mlxtend.frequent_patterns import association_rules, apriori
36
+
37
+ trans_encoding = TransactionEncoder()
38
+ df2 = trans_encoding.fit(transactions).transform(transactions)
39
+ df3 = pd.DataFrame(df2, columns=trans_encoding.columns_)
40
+
41
+ df3.head()
42
+
43
+ """The frequent sets are found using the apriori function. We will use the min_support = 0.05 , which implies the minimum support required for an itemset to be chosen. Meanwhile,use_colnames = True keeps column names for itemsets to make them more understandable."""
44
+
45
+ frequent_set = apriori(df3, min_support = 0.05, use_colnames = True)
46
+
47
+ frequent_set
48
+
49
+ """From these frequent sets, We want to find the association rules which determine if A is bought, then B is also purchased"""
50
+
51
+ rules = association_rules(frequent_set, metric = 'lift', min_threshold = 1)
52
+
53
+ rules
54
+
55
+ """Conclusion:
56
+ As we can see, the results consist of two association rules only. Cake and Coffee are bought more frequently than random with the lift = 1.1 and 53% confidence.
57
+ """
58
+
59
+ # Install required libraries
60
+ # pip install mlxtend
61
+
62
+ import pandas as pd
63
+ from mlxtend.preprocessing import TransactionEncoder
64
+ from mlxtend.frequent_patterns import apriori, association_rules
65
+
66
+ # Step 1: Load the dataset
67
+ data = pd.read_csv("/content/bread_basket.csv")
68
+ print(data.head())
69
+
70
+ # Step 2: Create a list of transactions
71
+ transactions = []
72
+ for t_id in data["Transaction"].unique():
73
+ items = list(set(data[data["Transaction"] == t_id]["Item"]))
74
+ if items:
75
+ transactions.append(items)
76
+
77
+ print(f"\nTotal Transactions: {len(transactions)}")
78
+
79
+ # Step 3: Convert to one-hot encoded DataFrame
80
+ te = TransactionEncoder()
81
+ encoded_data = te.fit(transactions).transform(transactions)
82
+ df = pd.DataFrame(encoded_data, columns=te.columns_)
83
+
84
+ print("\nOne-hot encoded data sample:")
85
+ print(df.head())
86
+
87
+ # Step 4: Generate frequent itemsets using Apriori
88
+ frequent_items = apriori(df, min_support=0.05, use_colnames=True)
89
+ print("\nFrequent Itemsets:")
90
+ print(frequent_items)
91
+
92
+ # Step 5: Generate association rules
93
+ rules = association_rules(frequent_items, metric="lift", min_threshold=1)
94
+ print("\nAssociation Rules:")
95
+ print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])
96
+
97
+ # Step 6: Summary
98
+ print("\nConclusion:")
99
+ print("Items like Cake and Coffee are often bought together with high support and confidence.")
@@ -0,0 +1,97 @@
1
+ # -*- coding: utf-8 -*-
2
+ """P8-1 Gaussian Naive Bayes .ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1hPQd9ZfvblikDc3KJosyOf3atsItL260
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ #Import dataset
14
+ from sklearn import datasets
15
+
16
+ #Load dataset
17
+ wine = datasets.load_wine()
18
+ print(wine)
19
+
20
+ #print the names of the 13 features
21
+
22
+ print ("Features: ", wine.feature_names)
23
+
24
+ #print the label type of wine
25
+
26
+ print ("Labels: ", wine.target_names)
27
+
28
+ X=pd.DataFrame(wine['data'])
29
+ print(X.head())
30
+
31
+ #Checking the dimensions of the dataset
32
+ print(wine.data.shape)
33
+
34
+ #print the wine labels (0:Class_0, 1:class_2, 2:class_2)
35
+ y=print (wine.target)
36
+
37
+ from sklearn.model_selection import train_test_split
38
+ X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.30,random_state=109)
39
+
40
+ #Import Gaussian Naive Bayes model
41
+ from sklearn.naive_bayes import GaussianNB
42
+
43
+ #Create a Gaussian Classifier
44
+ gnb = GaussianNB()
45
+
46
+ #Train the model using the training sets
47
+ gnb.fit(X_train, y_train)
48
+
49
+ #Predict the response for test dataset
50
+ y_pred = gnb.predict(X_test)
51
+ print(y_pred)
52
+
53
+ #Import scikit-learn metrics module for accuracy calculation
54
+ from sklearn import metrics
55
+
56
+ # Model Accuracy
57
+ print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
58
+
59
+ #confusion matrix
60
+ from sklearn.metrics import confusion_matrix
61
+ cm=np.array(confusion_matrix(y_test,y_pred))
62
+ cm
63
+
64
+ # Simple Gaussian Naive Bayes Example using Wine Dataset
65
+
66
+
67
+ #my code+++++++++++++
68
+
69
+ from sklearn.datasets import load_wine
70
+ from sklearn.model_selection import train_test_split
71
+ from sklearn.naive_bayes import GaussianNB
72
+ from sklearn.metrics import accuracy_score, confusion_matrix
73
+
74
+ # Load dataset
75
+ wine = load_wine()
76
+
77
+ # Features and labels
78
+ X = wine.data
79
+ y = wine.target
80
+
81
+ # Split the data into training and testing sets
82
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
83
+
84
+ # Create and train the model
85
+ model = GaussianNB()
86
+ model.fit(X_train, y_train)
87
+
88
+ # Predict on test data
89
+ y_pred = model.predict(X_test)
90
+
91
+ # Print results
92
+ print("Feature names:", wine.feature_names)
93
+ print("Target names:", wine.target_names)
94
+ print("\nPredicted labels:", y_pred)
95
+ print("Accuracy:", accuracy_score(y_test, y_pred))
96
+ print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
97
+