regen-ml-codeprinter 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- regen_ml_codeprinter-0.1.1/PKG-INFO +11 -0
- regen_ml_codeprinter-0.1.1/README.md +3 -0
- regen_ml_codeprinter-0.1.1/codeprinter/init.py +3 -0
- regen_ml_codeprinter-0.1.1/codeprinter/printer.py +192 -0
- regen_ml_codeprinter-0.1.1/pyproject.toml +13 -0
- regen_ml_codeprinter-0.1.1/regen_ml_codeprinter.egg-info/PKG-INFO +11 -0
- regen_ml_codeprinter-0.1.1/regen_ml_codeprinter.egg-info/SOURCES.txt +8 -0
- regen_ml_codeprinter-0.1.1/regen_ml_codeprinter.egg-info/dependency_links.txt +1 -0
- regen_ml_codeprinter-0.1.1/regen_ml_codeprinter.egg-info/top_level.txt +1 -0
- regen_ml_codeprinter-0.1.1/setup.cfg +4 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
def print_codes():
|
|
2
|
+
content = r'''
|
|
3
|
+
#Shape nikala h aur kuch nai kiya h
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import numpy as np
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
import seaborn as sns
|
|
9
|
+
bank_df = pd.read_csv('Bank_Customer_retirement.csv')
|
|
10
|
+
bank_df.shape
|
|
11
|
+
|
|
12
|
+
#Random Forest(Pract 12)
|
|
13
|
+
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from sklearn.preprocessing import LabelEncoder
|
|
16
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
17
|
+
from sklearn.model_selection import train_test_split
|
|
18
|
+
from sklearn.metrics import classification_report
|
|
19
|
+
|
|
20
|
+
df=pd.read_csv("playgolf.csv")
|
|
21
|
+
|
|
22
|
+
for c in df.columns:
|
|
23
|
+
df[c]=LabelEncoder().fit_transform(df[c])
|
|
24
|
+
|
|
25
|
+
X=df.drop('PlayGolf',axis=1)
|
|
26
|
+
y=df['PlayGolf']
|
|
27
|
+
|
|
28
|
+
xtr,xte,ytr,yte=train_test_split(
|
|
29
|
+
X,y,test_size=0.2,random_state=42
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
m=RandomForestClassifier(n_estimators=5)
|
|
33
|
+
m.fit(xtr,ytr)
|
|
34
|
+
|
|
35
|
+
print(classification_report(yte,m.predict(xte)))
|
|
36
|
+
print(m.predict([[1,1,0,0]]))
|
|
37
|
+
|
|
38
|
+
#Ploting Very Sexy one do Compulsory
|
|
39
|
+
|
|
40
|
+
from sklearn.tree import plot_tree
|
|
41
|
+
import matplotlib.pyplot as plt
|
|
42
|
+
|
|
43
|
+
for i in range(5):
|
|
44
|
+
plt.figure(figsize=(20,10))
|
|
45
|
+
plot_tree(m.estimators_[i],
|
|
46
|
+
feature_names=['Outlook','Temp','Humidity','Windy'],
|
|
47
|
+
class_names=['Yes','No'],
|
|
48
|
+
filled=True)
|
|
49
|
+
plt.show()
|
|
50
|
+
|
|
51
|
+
# Voting Classifier
|
|
52
|
+
|
|
53
|
+
from sklearn.ensemble import VotingClassifier
|
|
54
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
55
|
+
from sklearn.linear_model import LogisticRegression
|
|
56
|
+
from sklearn.naive_bayes import BernoulliNB, GaussianNB
|
|
57
|
+
from sklearn.svm import SVC
|
|
58
|
+
|
|
59
|
+
# Define the individual models (placeholders with default parameters)
|
|
60
|
+
dtcmodel = DecisionTreeClassifier()
|
|
61
|
+
lrmodel = LogisticRegression(random_state=42)
|
|
62
|
+
bnbmodel = BernoulliNB()
|
|
63
|
+
gnbmodel = GaussianNB()
|
|
64
|
+
svcmodel = SVC(probability=True, random_state=42)
|
|
65
|
+
|
|
66
|
+
# Train the individual models first
|
|
67
|
+
dtcmodel.fit(xtr, ytr)
|
|
68
|
+
lrmodel.fit(xtr, ytr)
|
|
69
|
+
bnbmodel.fit(xtr, ytr)
|
|
70
|
+
gnbmodel.fit(xtr, ytr)
|
|
71
|
+
svcmodel.fit(xtr, ytr)
|
|
72
|
+
|
|
73
|
+
voting_clf = VotingClassifier(estimators=[
|
|
74
|
+
('DT', dtcmodel),
|
|
75
|
+
('LR', lrmodel),
|
|
76
|
+
('BNB', bnbmodel),
|
|
77
|
+
('GNB', gnbmodel),
|
|
78
|
+
('SVC', svcmodel)
|
|
79
|
+
], voting='hard') # 'hard' for class labels, 'soft' for probabilities if all support it
|
|
80
|
+
|
|
81
|
+
voting_clf.fit(xtr, ytr)
|
|
82
|
+
|
|
83
|
+
#Adaboost (Pract 13)
|
|
84
|
+
|
|
85
|
+
import pandas as pd
|
|
86
|
+
from sklearn import model_selection
|
|
87
|
+
from sklearn.ensemble import AdaBoostClassifier
|
|
88
|
+
|
|
89
|
+
df=pd.read_csv("pimaindiansdiabetes.csv")
|
|
90
|
+
|
|
91
|
+
X=df.iloc[:,:5] # Corrected to include columns 0-4
|
|
92
|
+
y=df.iloc[:,5] # Corrected to select column 5
|
|
93
|
+
|
|
94
|
+
cv=model_selection.KFold(
|
|
95
|
+
n_splits=10,
|
|
96
|
+
random_state=42,
|
|
97
|
+
shuffle=True
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
m=AdaBoostClassifier(
|
|
101
|
+
n_estimators=30,
|
|
102
|
+
random_state=42
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
print(model_selection.cross_val_score(m,X,y,cv=cv).mean())
|
|
106
|
+
|
|
107
|
+
#SGB
|
|
108
|
+
|
|
109
|
+
import pandas as pd
|
|
110
|
+
from sklearn import model_selection
|
|
111
|
+
from sklearn.ensemble import GradientBoostingClassifier
|
|
112
|
+
|
|
113
|
+
df=pd.read_csv("pimaindiansdiabetes.csv")
|
|
114
|
+
|
|
115
|
+
X=df.iloc[:,:5] # Corrected to include columns 0-4
|
|
116
|
+
y=df.iloc[:,5] # Corrected to select column 5
|
|
117
|
+
|
|
118
|
+
cv=model_selection.KFold(
|
|
119
|
+
n_splits=10,
|
|
120
|
+
random_state=42,
|
|
121
|
+
shuffle=True
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
m=GradientBoostingClassifier(
|
|
125
|
+
n_estimators=30,
|
|
126
|
+
random_state=42
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
print(model_selection.cross_val_score(m,X,y,cv=cv).mean())
|
|
130
|
+
|
|
131
|
+
#K-Means (Practical 11)
|
|
132
|
+
|
|
133
|
+
import pandas as pd
|
|
134
|
+
from sklearn.cluster import KMeans
|
|
135
|
+
|
|
136
|
+
df=pd.read_csv("Countryclusters.csv")
|
|
137
|
+
|
|
138
|
+
d={'English':1,'Hindi':2,'French':3,'German':4,'Japanese':5}
|
|
139
|
+
# df['Language']=df['Language'].map(d) # Removed this line as 'Language' column does not exist
|
|
140
|
+
|
|
141
|
+
X=df.iloc[:,1:4]
|
|
142
|
+
|
|
143
|
+
m=KMeans(n_clusters=2,n_init=10)
|
|
144
|
+
print(m.fit_predict(X))
|
|
145
|
+
|
|
146
|
+
#Elbow Method
|
|
147
|
+
|
|
148
|
+
import pandas as pd
|
|
149
|
+
import matplotlib.pyplot as plt
|
|
150
|
+
from sklearn.cluster import KMeans
|
|
151
|
+
|
|
152
|
+
df=pd.read_csv("Countryclusters.csv")
|
|
153
|
+
|
|
154
|
+
d={'English':1,'Hindi':2,'French':3,'German':4,'Japanese':5}
|
|
155
|
+
# df['Language']=df['Language'].map(d) # This line caused a KeyError because 'Language' column does not exist
|
|
156
|
+
|
|
157
|
+
X=df.iloc[:,1:4]
|
|
158
|
+
|
|
159
|
+
wcss=[]
|
|
160
|
+
|
|
161
|
+
for i in range(1,7):
|
|
162
|
+
wcss.append(KMeans(i,n_init=10).fit(X).inertia_)
|
|
163
|
+
|
|
164
|
+
plt.plot(range(1,7),wcss)
|
|
165
|
+
plt.show()
|
|
166
|
+
|
|
167
|
+
#Logistic Regression (Practical 9)
|
|
168
|
+
|
|
169
|
+
import pandas as pd
|
|
170
|
+
from sklearn.model_selection import train_test_split
|
|
171
|
+
from sklearn.linear_model import LogisticRegression
|
|
172
|
+
from sklearn.metrics import classification_report
|
|
173
|
+
|
|
174
|
+
df=pd.read_csv("titanic.csv").dropna()
|
|
175
|
+
|
|
176
|
+
# Removed 'Name' and 'Ticket' from the drop list as they were not found in the DataFrame.
|
|
177
|
+
df.drop(['Sex'],axis=1,inplace=True)
|
|
178
|
+
|
|
179
|
+
X=df.drop('Survived',axis=1)
|
|
180
|
+
y=df['Survived']
|
|
181
|
+
|
|
182
|
+
xtr,xte,ytr,yte=train_test_split(
|
|
183
|
+
X,y,test_size=0.30,random_state=101
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
m=LogisticRegression()
|
|
187
|
+
m.fit(xtr,ytr)
|
|
188
|
+
|
|
189
|
+
print(classification_report(yte,m.predict(xte)))
|
|
190
|
+
'''
|
|
191
|
+
|
|
192
|
+
print(content)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "regen-ml-codeprinter"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "Prints code automatically on import"
|
|
9
|
+
authors = [
|
|
10
|
+
{name="Ash"}
|
|
11
|
+
]
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.8"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
codeprinter
|