pipcode-ani 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pipcode_ani-0.1.0/PKG-INFO +9 -0
- pipcode_ani-0.1.0/README.md +1 -0
- pipcode_ani-0.1.0/pipcode/__init__.py +1 -0
- pipcode_ani-0.1.0/pipcode/core.py +806 -0
- pipcode_ani-0.1.0/pipcode_ani.egg-info/PKG-INFO +9 -0
- pipcode_ani-0.1.0/pipcode_ani.egg-info/SOURCES.txt +8 -0
- pipcode_ani-0.1.0/pipcode_ani.egg-info/dependency_links.txt +1 -0
- pipcode_ani-0.1.0/pipcode_ani.egg-info/top_level.txt +1 -0
- pipcode_ani-0.1.0/pyproject.toml +11 -0
- pipcode_ani-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# pipcode
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .core import *
|
|
@@ -0,0 +1,806 @@
|
|
|
1
|
+
# ===================== Practical Printer Library =====================
|
|
2
|
+
|
|
3
|
+
def practical_1():
|
|
4
|
+
print("""# ===================== Practical 1 =====================
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
8
|
+
|
|
9
|
+
print("Before:\\n", df)
|
|
10
|
+
print("\\nMissing:\\n", df.isnull().sum())
|
|
11
|
+
|
|
12
|
+
num_cols = df.select_dtypes(include=['float64', 'int64']).columns
|
|
13
|
+
df[num_cols] = df[num_cols].fillna(df[num_cols].mean())
|
|
14
|
+
|
|
15
|
+
print("\\nAfter:\\n", df)
|
|
16
|
+
print("\\nMissing After:\\n", df.isnull().sum())
|
|
17
|
+
""")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def practical_2():
|
|
21
|
+
print("""# ===================== Practical 2 =====================
|
|
22
|
+
import pandas as pd
|
|
23
|
+
|
|
24
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
25
|
+
|
|
26
|
+
print("Before:\\n", df['Department'].value_counts())
|
|
27
|
+
|
|
28
|
+
df['Department'].fillna(df['Department'].mode()[0], inplace=True)
|
|
29
|
+
|
|
30
|
+
print("\\nAfter:\\n", df['Department'].value_counts())
|
|
31
|
+
""")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def practical_3():
|
|
35
|
+
print("""# ===================== Practical 3 =====================
|
|
36
|
+
import pandas as pd
|
|
37
|
+
from sklearn.preprocessing import LabelEncoder
|
|
38
|
+
|
|
39
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
40
|
+
|
|
41
|
+
le = LabelEncoder()
|
|
42
|
+
df['Department'] = df['Department'].astype(str)
|
|
43
|
+
|
|
44
|
+
df['Department_Encoded'] = le.fit_transform(df['Department'])
|
|
45
|
+
|
|
46
|
+
print(df[['Department', 'Department_Encoded']])
|
|
47
|
+
""")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def practical_4():
|
|
51
|
+
print("""# ===================== Practical 4 =====================
|
|
52
|
+
import pandas as pd
|
|
53
|
+
|
|
54
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
55
|
+
|
|
56
|
+
df = pd.get_dummies(df, columns=['Department', 'Education'])
|
|
57
|
+
|
|
58
|
+
print(df.head())
|
|
59
|
+
""")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def practical_5():
|
|
63
|
+
print("""# ===================== Practical 5 =====================
|
|
64
|
+
import pandas as pd
|
|
65
|
+
from sklearn.preprocessing import MinMaxScaler
|
|
66
|
+
|
|
67
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
68
|
+
|
|
69
|
+
num_cols = ['Age', 'Salary', 'Experience', 'PerformanceScore']
|
|
70
|
+
|
|
71
|
+
scaler = MinMaxScaler()
|
|
72
|
+
df[num_cols] = scaler.fit_transform(df[num_cols])
|
|
73
|
+
|
|
74
|
+
print(df[num_cols].head())
|
|
75
|
+
""")
|
|
76
|
+
|
|
77
|
+
def practical_6():
|
|
78
|
+
print("""# ===================== Practical 6 =====================
|
|
79
|
+
import pandas as pd
|
|
80
|
+
from sklearn.preprocessing import StandardScaler
|
|
81
|
+
|
|
82
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
83
|
+
|
|
84
|
+
num_cols = ['Age', 'Salary', 'Experience', 'PerformanceScore']
|
|
85
|
+
|
|
86
|
+
scaler = StandardScaler()
|
|
87
|
+
df[num_cols] = scaler.fit_transform(df[num_cols])
|
|
88
|
+
|
|
89
|
+
print(df[num_cols].mean())
|
|
90
|
+
print(df[num_cols].std())
|
|
91
|
+
""")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def practical_7():
|
|
95
|
+
print("""# ===================== Practical 7 =====================
|
|
96
|
+
import pandas as pd
|
|
97
|
+
|
|
98
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
99
|
+
|
|
100
|
+
df['New_Feature'] = df['Experience'] * df['PerformanceScore']
|
|
101
|
+
|
|
102
|
+
print(df.head())
|
|
103
|
+
""")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def practical_8():
|
|
107
|
+
print("""# ===================== Practical 8 =====================
|
|
108
|
+
import pandas as pd
|
|
109
|
+
|
|
110
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
111
|
+
|
|
112
|
+
print(df.describe())
|
|
113
|
+
|
|
114
|
+
print("\\nMedian:\\n", df.median(numeric_only=True))
|
|
115
|
+
""")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def practical_9():
|
|
119
|
+
print("""# ===================== Practical 9 =====================
|
|
120
|
+
import pandas as pd
|
|
121
|
+
import matplotlib.pyplot as plt
|
|
122
|
+
|
|
123
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
124
|
+
|
|
125
|
+
df.hist()
|
|
126
|
+
plt.show()
|
|
127
|
+
""")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def practical_10():
|
|
131
|
+
print("""# ===================== Practical 10 =====================
|
|
132
|
+
import pandas as pd
|
|
133
|
+
import matplotlib.pyplot as plt
|
|
134
|
+
|
|
135
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
136
|
+
|
|
137
|
+
df.boxplot()
|
|
138
|
+
plt.show()
|
|
139
|
+
""")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def practical_11():
|
|
143
|
+
print("""# ===================== Practical 11 =====================
|
|
144
|
+
import pandas as pd
|
|
145
|
+
|
|
146
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
147
|
+
|
|
148
|
+
corr = df.corr(numeric_only=True)
|
|
149
|
+
print(corr)
|
|
150
|
+
""")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def practical_12():
|
|
154
|
+
print("""# ===================== Practical 12 =====================
|
|
155
|
+
import pandas as pd
|
|
156
|
+
import seaborn as sns
|
|
157
|
+
import matplotlib.pyplot as plt
|
|
158
|
+
|
|
159
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
160
|
+
|
|
161
|
+
corr = df.corr(numeric_only=True)
|
|
162
|
+
|
|
163
|
+
sns.heatmap(corr, annot=True)
|
|
164
|
+
plt.show()
|
|
165
|
+
""")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def practical_13():
|
|
169
|
+
print("""# ===================== Practical 13 =====================
|
|
170
|
+
import pandas as pd
|
|
171
|
+
import seaborn as sns
|
|
172
|
+
import matplotlib.pyplot as plt
|
|
173
|
+
|
|
174
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
175
|
+
|
|
176
|
+
sns.pairplot(df)
|
|
177
|
+
plt.show()
|
|
178
|
+
""")
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def practical_14():
|
|
182
|
+
print("""# ===================== Practical 14 =====================
|
|
183
|
+
import pandas as pd
|
|
184
|
+
|
|
185
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
186
|
+
|
|
187
|
+
print(df.skew(numeric_only=True))
|
|
188
|
+
""")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def practical_15():
|
|
192
|
+
print("""# ===================== Practical 15 =====================
|
|
193
|
+
import pandas as pd
|
|
194
|
+
from sklearn.linear_model import LogisticRegression
|
|
195
|
+
|
|
196
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
197
|
+
|
|
198
|
+
df = df.dropna()
|
|
199
|
+
|
|
200
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
201
|
+
y = df['Target_Class']
|
|
202
|
+
|
|
203
|
+
model = LogisticRegression()
|
|
204
|
+
model.fit(X, y)
|
|
205
|
+
""")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def practical_16():
|
|
209
|
+
print("""# ===================== Practical 16 =====================
|
|
210
|
+
import pandas as pd
|
|
211
|
+
from sklearn.model_selection import train_test_split
|
|
212
|
+
|
|
213
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
214
|
+
|
|
215
|
+
df = df.dropna()
|
|
216
|
+
|
|
217
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
218
|
+
y = df['Target_Class']
|
|
219
|
+
|
|
220
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
221
|
+
""")
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def practical_17():
|
|
225
|
+
print("""# ===================== Practical 17 =====================
|
|
226
|
+
import pandas as pd
|
|
227
|
+
from sklearn.model_selection import train_test_split
|
|
228
|
+
from sklearn.linear_model import LogisticRegression
|
|
229
|
+
|
|
230
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
231
|
+
|
|
232
|
+
df = df.dropna()
|
|
233
|
+
|
|
234
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
235
|
+
y = df['Target_Class']
|
|
236
|
+
|
|
237
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
238
|
+
|
|
239
|
+
model = LogisticRegression()
|
|
240
|
+
model.fit(X_train, y_train)
|
|
241
|
+
|
|
242
|
+
y_pred = model.predict(X_test)
|
|
243
|
+
|
|
244
|
+
print(y_pred)
|
|
245
|
+
""")
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def practical_18():
|
|
249
|
+
print("""# ===================== Practical 18 =====================
|
|
250
|
+
import pandas as pd
|
|
251
|
+
from sklearn.model_selection import train_test_split
|
|
252
|
+
from sklearn.linear_model import LogisticRegression
|
|
253
|
+
from sklearn.metrics import accuracy_score
|
|
254
|
+
|
|
255
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
256
|
+
|
|
257
|
+
df = df.dropna()
|
|
258
|
+
|
|
259
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
260
|
+
y = df['Target_Class']
|
|
261
|
+
|
|
262
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
263
|
+
|
|
264
|
+
model = LogisticRegression()
|
|
265
|
+
model.fit(X_train, y_train)
|
|
266
|
+
|
|
267
|
+
y_pred = model.predict(X_test)
|
|
268
|
+
|
|
269
|
+
print("Accuracy:", accuracy_score(y_test, y_pred))
|
|
270
|
+
""")
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def practical_19():
|
|
274
|
+
print("""# ===================== Practical 19 =====================
|
|
275
|
+
import pandas as pd
|
|
276
|
+
from sklearn.model_selection import train_test_split
|
|
277
|
+
from sklearn.linear_model import LogisticRegression
|
|
278
|
+
from sklearn.metrics import confusion_matrix
|
|
279
|
+
|
|
280
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
281
|
+
|
|
282
|
+
df = df.dropna()
|
|
283
|
+
|
|
284
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
285
|
+
y = df['Target_Class']
|
|
286
|
+
|
|
287
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
288
|
+
|
|
289
|
+
model = LogisticRegression()
|
|
290
|
+
model.fit(X_train, y_train)
|
|
291
|
+
|
|
292
|
+
y_pred = model.predict(X_test)
|
|
293
|
+
|
|
294
|
+
print(confusion_matrix(y_test, y_pred))
|
|
295
|
+
""")
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def practical_20():
|
|
299
|
+
print("""# ===================== Practical 20 =====================
|
|
300
|
+
import pandas as pd
|
|
301
|
+
from sklearn.model_selection import train_test_split
|
|
302
|
+
from sklearn.linear_model import LogisticRegression
|
|
303
|
+
from sklearn.metrics import classification_report
|
|
304
|
+
|
|
305
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
306
|
+
|
|
307
|
+
df = df.dropna()
|
|
308
|
+
|
|
309
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
310
|
+
y = df['Target_Class']
|
|
311
|
+
|
|
312
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
313
|
+
|
|
314
|
+
model = LogisticRegression()
|
|
315
|
+
model.fit(X_train, y_train)
|
|
316
|
+
|
|
317
|
+
y_pred = model.predict(X_test)
|
|
318
|
+
|
|
319
|
+
print(classification_report(y_test, y_pred))
|
|
320
|
+
""")
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def practical_21():
|
|
324
|
+
print("""# ===================== Practical 21 =====================
|
|
325
|
+
import pandas as pd
|
|
326
|
+
from sklearn.model_selection import train_test_split
|
|
327
|
+
from sklearn.linear_model import LogisticRegression
|
|
328
|
+
from sklearn.preprocessing import StandardScaler
|
|
329
|
+
from sklearn.metrics import accuracy_score
|
|
330
|
+
|
|
331
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
332
|
+
df = df.dropna()
|
|
333
|
+
|
|
334
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
335
|
+
y = df['Target_Class']
|
|
336
|
+
|
|
337
|
+
# Without scaling
|
|
338
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
339
|
+
model = LogisticRegression()
|
|
340
|
+
model.fit(X_train, y_train)
|
|
341
|
+
y_pred = model.predict(X_test)
|
|
342
|
+
print("Without Scaling Accuracy:", accuracy_score(y_test, y_pred))
|
|
343
|
+
|
|
344
|
+
# With scaling
|
|
345
|
+
scaler = StandardScaler()
|
|
346
|
+
X_scaled = scaler.fit_transform(X)
|
|
347
|
+
|
|
348
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25)
|
|
349
|
+
model = LogisticRegression()
|
|
350
|
+
model.fit(X_train, y_train)
|
|
351
|
+
y_pred = model.predict(X_test)
|
|
352
|
+
print("With Scaling Accuracy:", accuracy_score(y_test, y_pred))
|
|
353
|
+
""")
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def practical_22():
|
|
357
|
+
print("""# ===================== Practical 22 =====================
|
|
358
|
+
import pandas as pd
|
|
359
|
+
from sklearn.model_selection import train_test_split
|
|
360
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
361
|
+
|
|
362
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
363
|
+
df = df.dropna()
|
|
364
|
+
|
|
365
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
366
|
+
y = df['Target_Class']
|
|
367
|
+
|
|
368
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
369
|
+
|
|
370
|
+
knn = KNeighborsClassifier(n_neighbors=3)
|
|
371
|
+
knn.fit(X_train, y_train)
|
|
372
|
+
|
|
373
|
+
y_pred = knn.predict(X_test)
|
|
374
|
+
print(y_pred)
|
|
375
|
+
""")
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def practical_23():
|
|
379
|
+
print("""# ===================== Practical 23 =====================
|
|
380
|
+
import pandas as pd
|
|
381
|
+
from sklearn.model_selection import train_test_split
|
|
382
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
383
|
+
from sklearn.metrics import accuracy_score
|
|
384
|
+
|
|
385
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
386
|
+
df = df.dropna()
|
|
387
|
+
|
|
388
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
389
|
+
y = df['Target_Class']
|
|
390
|
+
|
|
391
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
392
|
+
|
|
393
|
+
knn = KNeighborsClassifier(n_neighbors=3)
|
|
394
|
+
knn.fit(X_train, y_train)
|
|
395
|
+
|
|
396
|
+
y_pred = knn.predict(X_test)
|
|
397
|
+
print("Accuracy:", accuracy_score(y_test, y_pred))
|
|
398
|
+
""")
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def practical_24():
|
|
402
|
+
print("""# ===================== Practical 24 =====================
|
|
403
|
+
import pandas as pd
|
|
404
|
+
from sklearn.model_selection import train_test_split
|
|
405
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
406
|
+
from sklearn.metrics import accuracy_score
|
|
407
|
+
|
|
408
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
409
|
+
df = df.dropna()
|
|
410
|
+
|
|
411
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
412
|
+
y = df['Target_Class']
|
|
413
|
+
|
|
414
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
415
|
+
|
|
416
|
+
for k in range(1, 11):
|
|
417
|
+
model = KNeighborsClassifier(n_neighbors=k)
|
|
418
|
+
model.fit(X_train, y_train)
|
|
419
|
+
pred = model.predict(X_test)
|
|
420
|
+
print("k =", k, "Accuracy =", accuracy_score(y_test, pred))
|
|
421
|
+
""")
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def practical_25():
|
|
425
|
+
print("""# ===================== Practical 25 =====================
|
|
426
|
+
import pandas as pd
|
|
427
|
+
from sklearn.model_selection import train_test_split
|
|
428
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
429
|
+
from sklearn.preprocessing import StandardScaler
|
|
430
|
+
from sklearn.metrics import accuracy_score
|
|
431
|
+
|
|
432
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
433
|
+
df = df.dropna()
|
|
434
|
+
|
|
435
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
436
|
+
y = df['Target_Class']
|
|
437
|
+
|
|
438
|
+
# Without scaling
|
|
439
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
|
|
440
|
+
model = KNeighborsClassifier(n_neighbors=3)
|
|
441
|
+
model.fit(X_train, y_train)
|
|
442
|
+
pred = model.predict(X_test)
|
|
443
|
+
print("Without Scaling:", accuracy_score(y_test, pred))
|
|
444
|
+
|
|
445
|
+
# With scaling
|
|
446
|
+
scaler = StandardScaler()
|
|
447
|
+
X_scaled = scaler.fit_transform(X)
|
|
448
|
+
|
|
449
|
+
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25)
|
|
450
|
+
model = KNeighborsClassifier(n_neighbors=3)
|
|
451
|
+
model.fit(X_train, y_train)
|
|
452
|
+
pred = model.predict(X_test)
|
|
453
|
+
print("With Scaling:", accuracy_score(y_test, pred))
|
|
454
|
+
""")
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def practical_31():
|
|
458
|
+
print("""# ===================== Practical 31 =====================
|
|
459
|
+
import pandas as pd
|
|
460
|
+
import numpy as np
|
|
461
|
+
import matplotlib.pyplot as plt
|
|
462
|
+
from sklearn.linear_model import LinearRegression
|
|
463
|
+
from sklearn.preprocessing import PolynomialFeatures
|
|
464
|
+
|
|
465
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
466
|
+
df = df.dropna()
|
|
467
|
+
|
|
468
|
+
X = df[['Age']]
|
|
469
|
+
y = df['Target_Regression']
|
|
470
|
+
|
|
471
|
+
poly = PolynomialFeatures(degree=2)
|
|
472
|
+
X_poly = poly.fit_transform(X)
|
|
473
|
+
|
|
474
|
+
model = LinearRegression()
|
|
475
|
+
model.fit(X_poly, y)
|
|
476
|
+
|
|
477
|
+
X_sorted = np.sort(X.values, axis=0)
|
|
478
|
+
y_poly = model.predict(poly.transform(X_sorted))
|
|
479
|
+
|
|
480
|
+
plt.scatter(X, y)
|
|
481
|
+
plt.plot(X_sorted, y_poly)
|
|
482
|
+
plt.show()
|
|
483
|
+
""")
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def practical_32():
|
|
487
|
+
print("""# ===================== Practical 32 =====================
|
|
488
|
+
import pandas as pd
|
|
489
|
+
from sklearn.model_selection import train_test_split
|
|
490
|
+
from sklearn.linear_model import LinearRegression
|
|
491
|
+
from sklearn.metrics import mean_squared_error
|
|
492
|
+
|
|
493
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
494
|
+
df = df.dropna()
|
|
495
|
+
|
|
496
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
497
|
+
y = df['Target_Regression']
|
|
498
|
+
|
|
499
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
|
500
|
+
|
|
501
|
+
model = LinearRegression()
|
|
502
|
+
model.fit(X_train, y_train)
|
|
503
|
+
|
|
504
|
+
y_pred = model.predict(X_test)
|
|
505
|
+
|
|
506
|
+
print("MSE:", mean_squared_error(y_test, y_pred))
|
|
507
|
+
""")
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def practical_33():
|
|
511
|
+
print("""# ===================== Practical 33 =====================
|
|
512
|
+
import pandas as pd
|
|
513
|
+
from sklearn.model_selection import train_test_split
|
|
514
|
+
from sklearn.linear_model import Ridge
|
|
515
|
+
from sklearn.metrics import mean_squared_error
|
|
516
|
+
|
|
517
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
518
|
+
df = df.dropna()
|
|
519
|
+
|
|
520
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
521
|
+
y = df['Target_Regression']
|
|
522
|
+
|
|
523
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
|
524
|
+
|
|
525
|
+
model = Ridge(alpha=0.1)
|
|
526
|
+
model.fit(X_train, y_train)
|
|
527
|
+
|
|
528
|
+
y_pred = model.predict(X_test)
|
|
529
|
+
|
|
530
|
+
print("Ridge MSE:", mean_squared_error(y_test, y_pred))
|
|
531
|
+
""")
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def practical_34():
|
|
535
|
+
print("""# ===================== Practical 34 =====================
|
|
536
|
+
import pandas as pd
|
|
537
|
+
from sklearn.model_selection import train_test_split
|
|
538
|
+
from sklearn.linear_model import Lasso
|
|
539
|
+
from sklearn.metrics import mean_squared_error
|
|
540
|
+
|
|
541
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
542
|
+
df = df.dropna()
|
|
543
|
+
|
|
544
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
545
|
+
y = df['Target_Regression']
|
|
546
|
+
|
|
547
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
|
548
|
+
|
|
549
|
+
model = Lasso(alpha=0.1)
|
|
550
|
+
model.fit(X_train, y_train)
|
|
551
|
+
|
|
552
|
+
y_pred = model.predict(X_test)
|
|
553
|
+
|
|
554
|
+
print("Lasso MSE:", mean_squared_error(y_test, y_pred))
|
|
555
|
+
""")
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
def practical_35():
|
|
559
|
+
print("""# ===================== Practical 35 =====================
|
|
560
|
+
import pandas as pd
|
|
561
|
+
from sklearn.model_selection import train_test_split
|
|
562
|
+
from sklearn.linear_model import LinearRegression, Ridge, Lasso
|
|
563
|
+
from sklearn.metrics import mean_squared_error
|
|
564
|
+
|
|
565
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
566
|
+
df = df.dropna()
|
|
567
|
+
|
|
568
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
569
|
+
y = df['Target_Regression']
|
|
570
|
+
|
|
571
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
|
572
|
+
|
|
573
|
+
lr = LinearRegression()
|
|
574
|
+
ridge = Ridge(alpha=0.1)
|
|
575
|
+
lasso = Lasso(alpha=0.1)
|
|
576
|
+
|
|
577
|
+
lr.fit(X_train, y_train)
|
|
578
|
+
ridge.fit(X_train, y_train)
|
|
579
|
+
lasso.fit(X_train, y_train)
|
|
580
|
+
|
|
581
|
+
pred_lr = lr.predict(X_test)
|
|
582
|
+
pred_ridge = ridge.predict(X_test)
|
|
583
|
+
pred_lasso = lasso.predict(X_test)
|
|
584
|
+
|
|
585
|
+
print("Linear MSE:", mean_squared_error(y_test, pred_lr))
|
|
586
|
+
print("Ridge MSE:", mean_squared_error(y_test, pred_ridge))
|
|
587
|
+
print("Lasso MSE:", mean_squared_error(y_test, pred_lasso))
|
|
588
|
+
""")
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def practical_36():
|
|
592
|
+
print("""# ===================== Practical 36 =====================
|
|
593
|
+
import pandas as pd
|
|
594
|
+
from sklearn.model_selection import cross_val_score
|
|
595
|
+
from sklearn.linear_model import LinearRegression
|
|
596
|
+
|
|
597
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
598
|
+
df = df.dropna()
|
|
599
|
+
|
|
600
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
601
|
+
y = df['Target_Regression']
|
|
602
|
+
|
|
603
|
+
model = LinearRegression()
|
|
604
|
+
|
|
605
|
+
scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
|
|
606
|
+
|
|
607
|
+
print("Average MSE:", -scores.mean())
|
|
608
|
+
""")
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
def practical_37():
|
|
612
|
+
print("""# ===================== Practical 37 =====================
|
|
613
|
+
import pandas as pd
|
|
614
|
+
from sklearn.model_selection import train_test_split, cross_val_score
|
|
615
|
+
from sklearn.linear_model import LinearRegression
|
|
616
|
+
from sklearn.metrics import mean_squared_error
|
|
617
|
+
|
|
618
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
619
|
+
df = df.dropna()
|
|
620
|
+
|
|
621
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
622
|
+
y = df['Target_Regression']
|
|
623
|
+
|
|
624
|
+
# Validation
|
|
625
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
|
626
|
+
model = LinearRegression()
|
|
627
|
+
model.fit(X_train, y_train)
|
|
628
|
+
pred = model.predict(X_test)
|
|
629
|
+
|
|
630
|
+
print("Validation MSE:", mean_squared_error(y_test, pred))
|
|
631
|
+
|
|
632
|
+
# Cross-validation
|
|
633
|
+
scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
|
|
634
|
+
print("Cross-Validation MSE:", -scores.mean())
|
|
635
|
+
""")
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def practical_38():
|
|
639
|
+
print("""# ===================== Practical 38 =====================
|
|
640
|
+
import pandas as pd
|
|
641
|
+
from sklearn.model_selection import train_test_split
|
|
642
|
+
from sklearn.linear_model import LinearRegression, Ridge, Lasso
|
|
643
|
+
from sklearn.metrics import mean_squared_error
|
|
644
|
+
|
|
645
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
646
|
+
df = df.dropna()
|
|
647
|
+
|
|
648
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
649
|
+
y = df['Target_Regression']
|
|
650
|
+
|
|
651
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
|
652
|
+
|
|
653
|
+
models = {
|
|
654
|
+
"Linear": LinearRegression(),
|
|
655
|
+
"Ridge": Ridge(alpha=0.1),
|
|
656
|
+
"Lasso": Lasso(alpha=0.1)
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
for name, model in models.items():
|
|
660
|
+
model.fit(X_train, y_train)
|
|
661
|
+
pred = model.predict(X_test)
|
|
662
|
+
print(name, "MSE:", mean_squared_error(y_test, pred))
|
|
663
|
+
""")
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def practical_39():
|
|
667
|
+
print("""# ===================== Practical 39 =====================
|
|
668
|
+
import pandas as pd
|
|
669
|
+
from sklearn.model_selection import GridSearchCV
|
|
670
|
+
from sklearn.linear_model import Ridge
|
|
671
|
+
|
|
672
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
673
|
+
df = df.dropna()
|
|
674
|
+
|
|
675
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
676
|
+
y = df['Target_Regression']
|
|
677
|
+
|
|
678
|
+
model = Ridge()
|
|
679
|
+
|
|
680
|
+
params = {'alpha': [0.001, 0.01, 0.1, 1, 10]}
|
|
681
|
+
|
|
682
|
+
grid = GridSearchCV(model, params, cv=5, scoring='neg_mean_squared_error')
|
|
683
|
+
grid.fit(X, y)
|
|
684
|
+
|
|
685
|
+
print("Best Alpha:", grid.best_params_)
|
|
686
|
+
""")
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def practical_40():
|
|
690
|
+
print("""# ===================== Practical 40 =====================
|
|
691
|
+
import pandas as pd
|
|
692
|
+
import matplotlib.pyplot as plt
|
|
693
|
+
from sklearn.linear_model import Ridge
|
|
694
|
+
from sklearn.model_selection import cross_val_score
|
|
695
|
+
|
|
696
|
+
df = pd.read_csv("ml_practical_dataset.csv")
|
|
697
|
+
df = df.dropna()
|
|
698
|
+
|
|
699
|
+
X = df[['Age', 'Salary', 'Experience', 'PerformanceScore']]
|
|
700
|
+
y = df['Target_Regression']
|
|
701
|
+
|
|
702
|
+
alphas = [0.001, 0.01, 0.1, 1, 10]
|
|
703
|
+
errors = []
|
|
704
|
+
|
|
705
|
+
for a in alphas:
|
|
706
|
+
model = Ridge(alpha=a)
|
|
707
|
+
scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
|
|
708
|
+
errors.append(-scores.mean())
|
|
709
|
+
|
|
710
|
+
plt.plot(alphas, errors)
|
|
711
|
+
plt.xscale('log')
|
|
712
|
+
plt.xlabel("Alpha")
|
|
713
|
+
plt.ylabel("MSE")
|
|
714
|
+
plt.show()
|
|
715
|
+
""")
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
def show_all_questions():
|
|
719
|
+
print("""===================== ALL PRACTICAL QUESTIONS =====================
|
|
720
|
+
|
|
721
|
+
1. Load the given dataset and identify missing values in numerical columns, then replace them using mean imputation and display the dataset before and after preprocessing.
|
|
722
|
+
|
|
723
|
+
2. Using the given dataset, identify missing values in categorical columns and replace them using mode, then compare frequency distribution before and after.
|
|
724
|
+
|
|
725
|
+
3. Using the given dataset, apply Label Encoding on a categorical column and display the transformed values.
|
|
726
|
+
|
|
727
|
+
4. Using the given dataset, apply One-Hot Encoding on categorical features and show the increase in number of columns.
|
|
728
|
+
|
|
729
|
+
5. Using the given dataset, perform Min-Max scaling on numerical features and verify that all values lie between 0 and 1.
|
|
730
|
+
|
|
731
|
+
6. Using the given dataset, apply StandardScaler and verify mean is approximately 0 and standard deviation is 1.
|
|
732
|
+
|
|
733
|
+
7. Using the given dataset, create a new feature using existing columns and display the updated dataset.
|
|
734
|
+
|
|
735
|
+
8. Using the given dataset, display summary statistics including mean, median, minimum, maximum, and standard deviation.
|
|
736
|
+
|
|
737
|
+
9. Using the given dataset, plot histograms for numerical features and describe the distribution.
|
|
738
|
+
|
|
739
|
+
10. Using the given dataset, draw boxplots for selected features and identify outliers.
|
|
740
|
+
|
|
741
|
+
11. Using the given dataset, compute the correlation matrix and identify highly correlated features.
|
|
742
|
+
|
|
743
|
+
12. Using the given dataset, plot a heatmap for correlation matrix and interpret relationships.
|
|
744
|
+
|
|
745
|
+
13. Using the given dataset, generate a pairplot and analyze relationships between variables.
|
|
746
|
+
|
|
747
|
+
14. Using the given dataset, calculate skewness of features and identify skewed variables.
|
|
748
|
+
|
|
749
|
+
15. Using the given dataset, implement a Logistic Regression model for binary classification.
|
|
750
|
+
|
|
751
|
+
16. Using the given dataset, split the data into training and testing sets in 75:25 ratio.
|
|
752
|
+
|
|
753
|
+
17. Using the given dataset, train the Logistic Regression model and predict class labels.
|
|
754
|
+
|
|
755
|
+
18. Using the given dataset, calculate accuracy score of the Logistic Regression model.
|
|
756
|
+
|
|
757
|
+
19. Using the given dataset, generate a confusion matrix and interpret results.
|
|
758
|
+
|
|
759
|
+
20. Using the given dataset, print classification report including precision, recall, and F1-score.
|
|
760
|
+
|
|
761
|
+
21. Using the given dataset, train Logistic Regression with and without scaling and compare results.
|
|
762
|
+
|
|
763
|
+
22. Using the given dataset, implement k-NN classifier with k = 3.
|
|
764
|
+
|
|
765
|
+
23. Using the given dataset, evaluate accuracy of k-NN model.
|
|
766
|
+
|
|
767
|
+
24. Using the given dataset, train k-NN model for k values from 1 to 10.
|
|
768
|
+
|
|
769
|
+
25. Using the given dataset, apply feature scaling and observe its effect on k-NN.
|
|
770
|
+
|
|
771
|
+
26. Using the given dataset, implement Multiple Linear Regression.
|
|
772
|
+
|
|
773
|
+
27. Using the given dataset, predict target values using regression model.
|
|
774
|
+
|
|
775
|
+
28. Using the given dataset, calculate Mean Squared Error (MSE).
|
|
776
|
+
|
|
777
|
+
29. Using the given dataset, calculate R² score and interpret performance.
|
|
778
|
+
|
|
779
|
+
30. Using the given dataset, compare linear and polynomial regression results.
|
|
780
|
+
|
|
781
|
+
31. Using the given dataset, plot regression curve for visualization.
|
|
782
|
+
|
|
783
|
+
32. Using the given dataset, implement Linear Regression and compute MSE.
|
|
784
|
+
|
|
785
|
+
33. Using the given dataset, implement Ridge Regression with given alpha value.
|
|
786
|
+
|
|
787
|
+
34. Using the given dataset, implement Lasso Regression with given alpha value.
|
|
788
|
+
|
|
789
|
+
35. Using the given dataset, compare MSE of Linear, Ridge, and Lasso models.
|
|
790
|
+
|
|
791
|
+
36. Using the given dataset, calculate average MSE using cross-validation.
|
|
792
|
+
|
|
793
|
+
37. Using the given dataset, compare validation and cross-validation results.
|
|
794
|
+
|
|
795
|
+
38. Using the given dataset, select best model based on minimum error.
|
|
796
|
+
|
|
797
|
+
39. Using the given dataset, tune alpha parameter using cross-validation.
|
|
798
|
+
|
|
799
|
+
40. Using the given dataset, plot error versus alpha values.
|
|
800
|
+
|
|
801
|
+
41. Using the given dataset, compare coefficients of Ridge and Lasso models.
|
|
802
|
+
|
|
803
|
+
42. Using the given dataset, compare linear and polynomial regression results.
|
|
804
|
+
|
|
805
|
+
====================================================================
|
|
806
|
+
""")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pipcode
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pipcode-ani" # must be unique
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "My pip package"
|
|
5
|
+
authors = [{name = "Aniket"}]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
requires-python = ">=3.7"
|
|
8
|
+
|
|
9
|
+
[build-system]
|
|
10
|
+
requires = ["setuptools"]
|
|
11
|
+
build-backend = "setuptools.build_meta"
|