sklern 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sklern-0.0.1/PKG-INFO +23 -0
- sklern-0.0.1/README.md +14 -0
- sklern-0.0.1/pyproject.toml +20 -0
- sklern-0.0.1/setup.cfg +4 -0
- sklern-0.0.1/src/sklern/__init__.py +15 -0
- sklern-0.0.1/src/sklern/ai_helper.py +25 -0
- sklern-0.0.1/src/sklern/gen.py +796 -0
- sklern-0.0.1/src/sklern.egg-info/PKG-INFO +23 -0
- sklern-0.0.1/src/sklern.egg-info/SOURCES.txt +10 -0
- sklern-0.0.1/src/sklern.egg-info/dependency_links.txt +1 -0
- sklern-0.0.1/src/sklern.egg-info/requires.txt +1 -0
- sklern-0.0.1/src/sklern.egg-info/top_level.txt +1 -0
sklern-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sklern
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Example PyPI package
|
|
5
|
+
Author-email: Panda <mg5401103@gmail.com>
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: groq
|
|
9
|
+
|
|
10
|
+
# Sample Package
|
|
11
|
+
|
|
12
|
+
This is a simple Python package published to PyPI.
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
pip install sample-package
|
|
17
|
+
|
|
18
|
+
## Usage
|
|
19
|
+
|
|
20
|
+
from sample_package import add, greet
|
|
21
|
+
|
|
22
|
+
print(add(2,3))
|
|
23
|
+
print(greet("Mayank"))
|
sklern-0.0.1/README.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sklern"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
description = "Example PyPI package"
|
|
9
|
+
authors = [
|
|
10
|
+
{name = "Panda", email = "mg5401103@gmail.com"}
|
|
11
|
+
]
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.8"
|
|
14
|
+
|
|
15
|
+
dependencies = [
|
|
16
|
+
"groq"
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[tool.setuptools.packages.find]
|
|
20
|
+
where = ["src"]
|
sklern-0.0.1/setup.cfg
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .gen import linear_regression, logistic_regression, decision_tree, perceptron, mlp, simple_nlp, rnn_model, preprocessing_models, eda_functions, linear_regression_from_scratch, mlp_from_scratch
|
|
2
|
+
from .ai_helper import get1
|
|
3
|
+
|
|
4
|
+
__all__ = ["linear_regression",
|
|
5
|
+
"logistic_regression",
|
|
6
|
+
"decision_tree",
|
|
7
|
+
"perceptron",
|
|
8
|
+
"mlp",
|
|
9
|
+
"simple_nlp",
|
|
10
|
+
"rnn_model",
|
|
11
|
+
"preprocessing_models",
|
|
12
|
+
"eda_functions",
|
|
13
|
+
"linear_regression_from_scratch",
|
|
14
|
+
"mlp_from_scratch",
|
|
15
|
+
"get1"]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from groq import Groq
|
|
2
|
+
|
|
3
|
+
# your API key
|
|
4
|
+
client = Groq(api_key="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")
|
|
5
|
+
|
|
6
|
+
MODEL = "openai/gpt-oss-20b" # change here if needed
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get1(prompt):
|
|
11
|
+
response = client.chat.completions.create(
|
|
12
|
+
model=MODEL,
|
|
13
|
+
messages=[
|
|
14
|
+
{"role": "user", "content": prompt}
|
|
15
|
+
]
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
return response.choices[0].message.content
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if __name__ == "__main__":
|
|
23
|
+
user_prompt = input("You: ")
|
|
24
|
+
answer = get1(user_prompt)
|
|
25
|
+
print("\nLLM:", answer, "\n")
|
|
@@ -0,0 +1,796 @@
|
|
|
1
|
+
def linear_regression():
|
|
2
|
+
print("""
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sklearn.model_selection import train_test_split
|
|
5
|
+
from sklearn.linear_model import LinearRegression
|
|
6
|
+
|
|
7
|
+
# simple dataset
|
|
8
|
+
data = {
|
|
9
|
+
"Hours":[1,2,3,4,5,6,7,8],
|
|
10
|
+
"Score":[10,20,30,40,50,60,70,80]
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
df = pd.DataFrame(data)
|
|
14
|
+
|
|
15
|
+
# input and output
|
|
16
|
+
X = df[["Hours"]]
|
|
17
|
+
y = df["Score"]
|
|
18
|
+
|
|
19
|
+
# split data
|
|
20
|
+
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
|
|
21
|
+
|
|
22
|
+
# create model
|
|
23
|
+
model = LinearRegression()
|
|
24
|
+
|
|
25
|
+
# train model
|
|
26
|
+
model.fit(X_train,y_train)
|
|
27
|
+
|
|
28
|
+
# prediction
|
|
29
|
+
pred = model.predict([[9]])
|
|
30
|
+
|
|
31
|
+
print("Predicted score for 9 hours:",pred)
|
|
32
|
+
""")
|
|
33
|
+
|
|
34
|
+
def logistic_regression():
|
|
35
|
+
print("""
|
|
36
|
+
import pandas as pd
|
|
37
|
+
from sklearn.model_selection import train_test_split
|
|
38
|
+
from sklearn.linear_model import LogisticRegression
|
|
39
|
+
|
|
40
|
+
# dataset
|
|
41
|
+
data = {
|
|
42
|
+
"Hours":[1,2,3,4,5,6,7,8],
|
|
43
|
+
"Pass":[0,0,0,0,1,1,1,1]
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
df = pd.DataFrame(data)
|
|
47
|
+
|
|
48
|
+
X = df[["Hours"]]
|
|
49
|
+
y = df["Pass"]
|
|
50
|
+
|
|
51
|
+
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
|
|
52
|
+
|
|
53
|
+
model = LogisticRegression()
|
|
54
|
+
|
|
55
|
+
model.fit(X_train,y_train)
|
|
56
|
+
|
|
57
|
+
pred = model.predict([[6]])
|
|
58
|
+
|
|
59
|
+
print("Will student pass (1=yes 0=no):",pred)
|
|
60
|
+
""")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def decision_tree():
|
|
64
|
+
print("""
|
|
65
|
+
import pandas as pd
|
|
66
|
+
from sklearn.model_selection import train_test_split
|
|
67
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
68
|
+
|
|
69
|
+
data = {
|
|
70
|
+
"Hours":[1,2,3,4,5,6,7,8],
|
|
71
|
+
"Pass":[0,0,0,0,1,1,1,1]
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
df = pd.DataFrame(data)
|
|
75
|
+
|
|
76
|
+
X = df[["Hours"]]
|
|
77
|
+
y = df["Pass"]
|
|
78
|
+
|
|
79
|
+
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
|
|
80
|
+
|
|
81
|
+
model = DecisionTreeClassifier()
|
|
82
|
+
|
|
83
|
+
model.fit(X_train,y_train)
|
|
84
|
+
|
|
85
|
+
pred = model.predict([[6]])
|
|
86
|
+
|
|
87
|
+
print("Prediction:",pred)
|
|
88
|
+
""")
|
|
89
|
+
|
|
90
|
+
def perceptron():
|
|
91
|
+
print("""
|
|
92
|
+
import pandas as pd
|
|
93
|
+
from sklearn.model_selection import train_test_split
|
|
94
|
+
from sklearn.linear_model import Perceptron
|
|
95
|
+
|
|
96
|
+
data = {
|
|
97
|
+
"Hours":[1,2,3,4,5,6,7,8],
|
|
98
|
+
"Pass":[0,0,0,0,1,1,1,1]
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
df = pd.DataFrame(data)
|
|
102
|
+
|
|
103
|
+
X = df[["Hours"]]
|
|
104
|
+
y = df["Pass"]
|
|
105
|
+
|
|
106
|
+
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
|
|
107
|
+
|
|
108
|
+
model = Perceptron()
|
|
109
|
+
|
|
110
|
+
model.fit(X_train,y_train)
|
|
111
|
+
|
|
112
|
+
pred = model.predict([[7]])
|
|
113
|
+
|
|
114
|
+
print("Prediction:",pred)
|
|
115
|
+
""")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def mlp():
|
|
119
|
+
print("""
|
|
120
|
+
import pandas as pd
|
|
121
|
+
from sklearn.model_selection import train_test_split
|
|
122
|
+
from sklearn.neural_network import MLPClassifier
|
|
123
|
+
|
|
124
|
+
data = {
|
|
125
|
+
"Hours":[1,2,3,4,5,6,7,8],
|
|
126
|
+
"Pass":[0,0,0,0,1,1,1,1]
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
df = pd.DataFrame(data)
|
|
130
|
+
|
|
131
|
+
X = df[["Hours"]]
|
|
132
|
+
y = df["Pass"]
|
|
133
|
+
|
|
134
|
+
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
|
|
135
|
+
|
|
136
|
+
model = MLPClassifier()
|
|
137
|
+
|
|
138
|
+
model.fit(X_train,y_train)
|
|
139
|
+
|
|
140
|
+
pred = model.predict([[7]])
|
|
141
|
+
|
|
142
|
+
print("Prediction:",pred)
|
|
143
|
+
""")
|
|
144
|
+
|
|
145
|
+
def simple_nlp():
|
|
146
|
+
print("""
|
|
147
|
+
# Import libraries
|
|
148
|
+
import pandas as pd
|
|
149
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
|
150
|
+
from sklearn.naive_bayes import MultinomialNB
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# ------------------------------
|
|
154
|
+
# STEP 1: Create Text Dataset
|
|
155
|
+
# ------------------------------
|
|
156
|
+
|
|
157
|
+
data = {
|
|
158
|
+
"text":[
|
|
159
|
+
"win free money",
|
|
160
|
+
"claim your prize",
|
|
161
|
+
"hello how are you",
|
|
162
|
+
"lets meet tomorrow",
|
|
163
|
+
"win cash now"
|
|
164
|
+
],
|
|
165
|
+
"label":[
|
|
166
|
+
"spam",
|
|
167
|
+
"spam",
|
|
168
|
+
"normal",
|
|
169
|
+
"normal",
|
|
170
|
+
"spam"
|
|
171
|
+
]
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
df = pd.DataFrame(data)
|
|
175
|
+
|
|
176
|
+
print("Dataset")
|
|
177
|
+
print(df)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# ------------------------------
|
|
181
|
+
# STEP 2: Convert Text to Numbers
|
|
182
|
+
# ------------------------------
|
|
183
|
+
|
|
184
|
+
# computers cannot understand words
|
|
185
|
+
# so we convert text into numbers
|
|
186
|
+
|
|
187
|
+
vectorizer = CountVectorizer()
|
|
188
|
+
|
|
189
|
+
X = vectorizer.fit_transform(df["text"])
|
|
190
|
+
|
|
191
|
+
y = df["label"]
|
|
192
|
+
|
|
193
|
+
print("Words:",vectorizer.get_feature_names_out())
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# ------------------------------
|
|
197
|
+
# STEP 3: Train Model
|
|
198
|
+
# ------------------------------
|
|
199
|
+
|
|
200
|
+
model = MultinomialNB()
|
|
201
|
+
|
|
202
|
+
model.fit(X,y)
|
|
203
|
+
|
|
204
|
+
print("Model Trained")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# ------------------------------
|
|
208
|
+
# STEP 4: Predict New Sentence
|
|
209
|
+
# ------------------------------
|
|
210
|
+
|
|
211
|
+
sentence = ["win money now"]
|
|
212
|
+
|
|
213
|
+
sentence_vector = vectorizer.transform(sentence)
|
|
214
|
+
|
|
215
|
+
prediction = model.predict(sentence_vector)
|
|
216
|
+
|
|
217
|
+
print("Sentence:",sentence)
|
|
218
|
+
print("Prediction:",prediction)
|
|
219
|
+
""")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def rnn_model():
|
|
223
|
+
print("""
|
|
224
|
+
# Import libraries
|
|
225
|
+
import numpy as np
|
|
226
|
+
from tensorflow.keras.models import Sequential
|
|
227
|
+
from tensorflow.keras.layers import SimpleRNN, Dense
|
|
228
|
+
from tensorflow.keras.preprocessing.text import Tokenizer
|
|
229
|
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# ------------------------------
|
|
233
|
+
# STEP 1: Create Dataset
|
|
234
|
+
# ------------------------------
|
|
235
|
+
|
|
236
|
+
sentences = [
|
|
237
|
+
"i love ai",
|
|
238
|
+
"ai is amazing",
|
|
239
|
+
"i hate waiting",
|
|
240
|
+
"this is bad"
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
labels = [1,1,0,0] # 1 = positive, 0 = negative
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
# ------------------------------
|
|
247
|
+
# STEP 2: Convert Words to Numbers
|
|
248
|
+
# ------------------------------
|
|
249
|
+
|
|
250
|
+
tokenizer = Tokenizer()
|
|
251
|
+
tokenizer.fit_on_texts(sentences)
|
|
252
|
+
|
|
253
|
+
sequences = tokenizer.texts_to_sequences(sentences)
|
|
254
|
+
|
|
255
|
+
padded = pad_sequences(sequences)
|
|
256
|
+
|
|
257
|
+
print("Sequences:", padded)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# ------------------------------
|
|
261
|
+
# STEP 3: Build RNN Model
|
|
262
|
+
# ------------------------------
|
|
263
|
+
|
|
264
|
+
model = Sequential()
|
|
265
|
+
|
|
266
|
+
model.add(SimpleRNN(10, input_shape=(padded.shape[1],1)))
|
|
267
|
+
|
|
268
|
+
model.add(Dense(1, activation="sigmoid"))
|
|
269
|
+
|
|
270
|
+
model.compile(
|
|
271
|
+
optimizer="adam",
|
|
272
|
+
loss="binary_crossentropy",
|
|
273
|
+
metrics=["accuracy"]
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
# ------------------------------
|
|
278
|
+
# STEP 4: Train Model
|
|
279
|
+
# ------------------------------
|
|
280
|
+
|
|
281
|
+
X = padded.reshape((padded.shape[0], padded.shape[1],1))
|
|
282
|
+
|
|
283
|
+
model.fit(X, labels, epochs=50, verbose=0)
|
|
284
|
+
|
|
285
|
+
print("Model Trained")
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
# ------------------------------
|
|
289
|
+
# STEP 5: Predict
|
|
290
|
+
# ------------------------------
|
|
291
|
+
|
|
292
|
+
test = ["i love waiting"]
|
|
293
|
+
|
|
294
|
+
test_seq = tokenizer.texts_to_sequences(test)
|
|
295
|
+
test_pad = pad_sequences(test_seq, maxlen=padded.shape[1])
|
|
296
|
+
|
|
297
|
+
test_pad = test_pad.reshape((1,test_pad.shape[1],1))
|
|
298
|
+
|
|
299
|
+
prediction = model.predict(test_pad)
|
|
300
|
+
|
|
301
|
+
print("Prediction:", prediction)
|
|
302
|
+
""")
|
|
303
|
+
|
|
304
|
+
def preprocessing_models():
|
|
305
|
+
print("""
|
|
306
|
+
# Import libraries
|
|
307
|
+
import pandas as pd
|
|
308
|
+
import numpy as np
|
|
309
|
+
|
|
310
|
+
from sklearn.model_selection import train_test_split
|
|
311
|
+
|
|
312
|
+
# Encoding
|
|
313
|
+
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
|
|
314
|
+
|
|
315
|
+
# Scaling
|
|
316
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
317
|
+
|
|
318
|
+
# Models
|
|
319
|
+
from sklearn.linear_model import LinearRegression, LogisticRegression
|
|
320
|
+
from sklearn.tree import DecisionTreeClassifier
|
|
321
|
+
from sklearn.neural_network import MLPClassifier
|
|
322
|
+
|
|
323
|
+
# Evaluation
|
|
324
|
+
from sklearn import metrics
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
# -----------------------------
|
|
328
|
+
# STEP 1 : Dataset
|
|
329
|
+
# -----------------------------
|
|
330
|
+
|
|
331
|
+
data = {
|
|
332
|
+
"Age":[22,25,30,35,40],
|
|
333
|
+
"City":["Delhi","Mumbai","Delhi","Chennai","Mumbai"],
|
|
334
|
+
"Level":["Low","Medium","High","Medium","High"],
|
|
335
|
+
"Salary":[20000,30000,40000,50000,60000],
|
|
336
|
+
"Purchased":["No","No","Yes","Yes","Yes"]
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
df = pd.DataFrame(data)
|
|
340
|
+
|
|
341
|
+
print("Original Data")
|
|
342
|
+
print(df)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# -----------------------------
|
|
346
|
+
# STEP 2 : Encoding
|
|
347
|
+
# -----------------------------
|
|
348
|
+
|
|
349
|
+
# Label Encoding (used for target variable)
|
|
350
|
+
|
|
351
|
+
label = LabelEncoder()
|
|
352
|
+
df["Purchased"] = label.fit_transform(df["Purchased"])
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
# Ordinal Encoding (used when order matters)
|
|
356
|
+
|
|
357
|
+
ordinal = OrdinalEncoder()
|
|
358
|
+
df[["Level"]] = ordinal.fit_transform(df[["Level"]])
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
# One Hot Encoding (used for city or categories)
|
|
362
|
+
|
|
363
|
+
df = pd.get_dummies(df, columns=["City"])
|
|
364
|
+
|
|
365
|
+
print("\\nAfter Encoding")
|
|
366
|
+
print(df)
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
# -----------------------------
|
|
370
|
+
# STEP 3 : Feature / Target
|
|
371
|
+
# -----------------------------
|
|
372
|
+
|
|
373
|
+
X = df.drop("Purchased", axis=1)
|
|
374
|
+
y = df["Purchased"]
|
|
375
|
+
|
|
376
|
+
X_train,X_test,y_train,y_test = train_test_split(
|
|
377
|
+
X,y,test_size=0.2
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
# -----------------------------
|
|
382
|
+
# STEP 4 : Scaling
|
|
383
|
+
# -----------------------------
|
|
384
|
+
|
|
385
|
+
# StandardScaler
|
|
386
|
+
# Used for Linear Regression, Logistic Regression, Neural Networks
|
|
387
|
+
|
|
388
|
+
scaler = StandardScaler()
|
|
389
|
+
|
|
390
|
+
X_train_scaled = scaler.fit_transform(X_train)
|
|
391
|
+
X_test_scaled = scaler.transform(X_test)
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
# MinMaxScaler
|
|
395
|
+
# Often used for Neural Networks
|
|
396
|
+
|
|
397
|
+
minmax = MinMaxScaler()
|
|
398
|
+
|
|
399
|
+
X_train_minmax = minmax.fit_transform(X_train)
|
|
400
|
+
X_test_minmax = minmax.transform(X_test)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# -----------------------------
|
|
404
|
+
# STEP 5 : Linear Regression
|
|
405
|
+
# -----------------------------
|
|
406
|
+
# Used for predicting continuous numbers
|
|
407
|
+
|
|
408
|
+
lin_model = LinearRegression()
|
|
409
|
+
|
|
410
|
+
lin_model.fit(X_train_scaled, y_train)
|
|
411
|
+
|
|
412
|
+
pred_lin = lin_model.predict(X_test_scaled)
|
|
413
|
+
|
|
414
|
+
print("\\nLinear Regression")
|
|
415
|
+
|
|
416
|
+
print("R2:",metrics.r2_score(y_test,pred_lin))
|
|
417
|
+
print("MSE:",metrics.mean_squared_error(y_test,pred_lin))
|
|
418
|
+
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test,pred_lin)))
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
# -----------------------------
|
|
422
|
+
# STEP 6 : Logistic Regression
|
|
423
|
+
# -----------------------------
|
|
424
|
+
# Used for classification (Yes / No)
|
|
425
|
+
|
|
426
|
+
log_model = LogisticRegression()
|
|
427
|
+
|
|
428
|
+
log_model.fit(X_train_scaled, y_train)
|
|
429
|
+
|
|
430
|
+
pred_log = log_model.predict(X_test_scaled)
|
|
431
|
+
|
|
432
|
+
print("\\nLogistic Regression")
|
|
433
|
+
|
|
434
|
+
print("Accuracy:",metrics.accuracy_score(y_test,pred_log))
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
# -----------------------------
|
|
438
|
+
# STEP 7 : Decision Tree
|
|
439
|
+
# -----------------------------
|
|
440
|
+
# Works without scaling
|
|
441
|
+
|
|
442
|
+
tree_model = DecisionTreeClassifier()
|
|
443
|
+
|
|
444
|
+
tree_model.fit(X_train, y_train)
|
|
445
|
+
|
|
446
|
+
pred_tree = tree_model.predict(X_test)
|
|
447
|
+
|
|
448
|
+
print("\\nDecision Tree")
|
|
449
|
+
|
|
450
|
+
print("Accuracy:",metrics.accuracy_score(y_test,pred_tree))
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
# -----------------------------
|
|
454
|
+
# STEP 8 : Neural Network (MLP)
|
|
455
|
+
# -----------------------------
|
|
456
|
+
# Works best with scaled data
|
|
457
|
+
|
|
458
|
+
mlp_model = MLPClassifier(max_iter=500)
|
|
459
|
+
|
|
460
|
+
mlp_model.fit(X_train_minmax, y_train)
|
|
461
|
+
|
|
462
|
+
pred_mlp = mlp_model.predict(X_test_minmax)
|
|
463
|
+
|
|
464
|
+
print("\\nMLP")
|
|
465
|
+
|
|
466
|
+
print("Accuracy:",metrics.accuracy_score(y_test,pred_mlp))
|
|
467
|
+
|
|
468
|
+
""")
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def eda_functions():
|
|
472
|
+
print("""
|
|
473
|
+
# Import libraries
|
|
474
|
+
import pandas as pd
|
|
475
|
+
import numpy as np
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
# -------------------------
|
|
479
|
+
# STEP 1 : Create Dataset
|
|
480
|
+
# -------------------------
|
|
481
|
+
|
|
482
|
+
data = {
|
|
483
|
+
"Age":[22,25,30,35,40,100],
|
|
484
|
+
"Salary":[20000,30000,40000,50000,60000,1000000],
|
|
485
|
+
"City":["Delhi","Mumbai","Delhi","Chennai","Mumbai","Delhi"]
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
df = pd.DataFrame(data)
|
|
489
|
+
|
|
490
|
+
print("Dataset")
|
|
491
|
+
print(df)
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
# -------------------------
|
|
495
|
+
# BASIC EDA
|
|
496
|
+
# -------------------------
|
|
497
|
+
|
|
498
|
+
print(df.head()) # first rows
|
|
499
|
+
print(df.tail()) # last rows
|
|
500
|
+
print(df.shape) # rows and columns
|
|
501
|
+
print(df.columns) # column names
|
|
502
|
+
print(df.info()) # data types
|
|
503
|
+
print(df.describe()) # statistics
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
# -------------------------
|
|
507
|
+
# NUMPY BASIC OPERATIONS
|
|
508
|
+
# -------------------------
|
|
509
|
+
|
|
510
|
+
arr = np.array(df["Salary"])
|
|
511
|
+
|
|
512
|
+
print(np.mean(arr))
|
|
513
|
+
print(np.median(arr))
|
|
514
|
+
print(np.std(arr))
|
|
515
|
+
print(np.max(arr))
|
|
516
|
+
print(np.min(arr))
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
# -------------------------
|
|
520
|
+
# MISSING VALUES
|
|
521
|
+
# -------------------------
|
|
522
|
+
|
|
523
|
+
print(df.isnull().sum()) # count missing values
|
|
524
|
+
|
|
525
|
+
df["Age"] = df["Age"].fillna(df["Age"].mean())
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
# -------------------------
|
|
529
|
+
# FILTERING DATA
|
|
530
|
+
# -------------------------
|
|
531
|
+
|
|
532
|
+
print(df[df["Age"] > 30])
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
# -------------------------
|
|
536
|
+
# SORTING
|
|
537
|
+
# -------------------------
|
|
538
|
+
|
|
539
|
+
print(df.sort_values("Salary"))
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
# -------------------------
|
|
543
|
+
# GROUPING
|
|
544
|
+
# -------------------------
|
|
545
|
+
|
|
546
|
+
print(df.groupby("City").mean())
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
# -------------------------
|
|
550
|
+
# ADD NEW COLUMN
|
|
551
|
+
# -------------------------
|
|
552
|
+
|
|
553
|
+
df["Bonus"] = df["Salary"] * 0.1
|
|
554
|
+
|
|
555
|
+
print(df)
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
# -------------------------
|
|
559
|
+
# REMOVE DUPLICATES
|
|
560
|
+
# -------------------------
|
|
561
|
+
|
|
562
|
+
df.drop_duplicates()
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
# -------------------------
|
|
566
|
+
# OUTLIER DETECTION (IQR)
|
|
567
|
+
# -------------------------
|
|
568
|
+
|
|
569
|
+
Q1 = df["Salary"].quantile(0.25)
|
|
570
|
+
Q3 = df["Salary"].quantile(0.75)
|
|
571
|
+
|
|
572
|
+
IQR = Q3 - Q1
|
|
573
|
+
|
|
574
|
+
lower = Q1 - 1.5 * IQR
|
|
575
|
+
upper = Q3 + 1.5 * IQR
|
|
576
|
+
|
|
577
|
+
df_clean = df[(df["Salary"] >= lower) & (df["Salary"] <= upper)]
|
|
578
|
+
|
|
579
|
+
print("After removing outliers")
|
|
580
|
+
print(df_clean)
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
# -------------------------
|
|
584
|
+
# CORRELATION
|
|
585
|
+
# -------------------------
|
|
586
|
+
|
|
587
|
+
print(df.corr())
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
# -------------------------
|
|
591
|
+
# NUMPY ARRAY OPERATIONS
|
|
592
|
+
# -------------------------
|
|
593
|
+
|
|
594
|
+
a = np.array([1,2,3,4])
|
|
595
|
+
|
|
596
|
+
print(np.sum(a))
|
|
597
|
+
print(np.sqrt(a))
|
|
598
|
+
print(np.log(a))
|
|
599
|
+
print(np.square(a))
|
|
600
|
+
""")
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def linear_regression_from_scratch():
|
|
604
|
+
print("""
|
|
605
|
+
import numpy as np
|
|
606
|
+
|
|
607
|
+
# -------------------------
|
|
608
|
+
# STEP 1 : Dataset
|
|
609
|
+
# -------------------------
|
|
610
|
+
|
|
611
|
+
# Hours studied
|
|
612
|
+
X = np.array([1,2,3,4,5])
|
|
613
|
+
|
|
614
|
+
# Scores
|
|
615
|
+
y = np.array([10,20,30,40,50])
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
# -------------------------
|
|
619
|
+
# STEP 2 : Calculate Mean
|
|
620
|
+
# -------------------------
|
|
621
|
+
|
|
622
|
+
mean_x = np.mean(X)
|
|
623
|
+
mean_y = np.mean(y)
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
# -------------------------
|
|
627
|
+
# STEP 3 : Calculate slope (m)
|
|
628
|
+
# formula:
|
|
629
|
+
# m = Σ((x - x̄)(y - ȳ)) / Σ((x - x̄)^2)
|
|
630
|
+
|
|
631
|
+
num = np.sum((X - mean_x) * (y - mean_y))
|
|
632
|
+
den = np.sum((X - mean_x) ** 2)
|
|
633
|
+
|
|
634
|
+
m = num / den
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
# -------------------------
|
|
638
|
+
# STEP 4 : Calculate intercept (b)
|
|
639
|
+
# formula:
|
|
640
|
+
# b = ȳ - m*x̄
|
|
641
|
+
|
|
642
|
+
b = mean_y - m * mean_x
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
print("Slope (m):", m)
|
|
646
|
+
print("Intercept (b):", b)
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
# -------------------------
|
|
650
|
+
# STEP 5 : Prediction
|
|
651
|
+
# -------------------------
|
|
652
|
+
|
|
653
|
+
y_pred = m * X + b
|
|
654
|
+
|
|
655
|
+
print("Predictions:", y_pred)
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
# -------------------------
|
|
659
|
+
# STEP 6 : Evaluation
|
|
660
|
+
# -------------------------
|
|
661
|
+
|
|
662
|
+
# MSE
|
|
663
|
+
mse = np.mean((y - y_pred) ** 2)
|
|
664
|
+
|
|
665
|
+
# RMSE
|
|
666
|
+
rmse = np.sqrt(mse)
|
|
667
|
+
|
|
668
|
+
# R2 Score
|
|
669
|
+
ss_total = np.sum((y - mean_y) ** 2)
|
|
670
|
+
ss_res = np.sum((y - y_pred) ** 2)
|
|
671
|
+
|
|
672
|
+
r2 = 1 - (ss_res / ss_total)
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
print("MSE:", mse)
|
|
676
|
+
print("RMSE:", rmse)
|
|
677
|
+
print("R2:", r2)
|
|
678
|
+
""")
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
def mlp_from_scratch():
|
|
682
|
+
print("""
|
|
683
|
+
import numpy as np
|
|
684
|
+
|
|
685
|
+
# -------------------------
|
|
686
|
+
# STEP 1 : Dataset
|
|
687
|
+
# -------------------------
|
|
688
|
+
|
|
689
|
+
# XOR problem
|
|
690
|
+
X = np.array([
|
|
691
|
+
[0,0],
|
|
692
|
+
[0,1],
|
|
693
|
+
[1,0],
|
|
694
|
+
[1,1]
|
|
695
|
+
])
|
|
696
|
+
|
|
697
|
+
y = np.array([[0],[1],[1],[0]])
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
# -------------------------
|
|
701
|
+
# STEP 2 : Initialize weights
|
|
702
|
+
# -------------------------
|
|
703
|
+
|
|
704
|
+
np.random.seed(0)
|
|
705
|
+
|
|
706
|
+
input_size = 2
|
|
707
|
+
hidden_size = 4
|
|
708
|
+
output_size = 1
|
|
709
|
+
|
|
710
|
+
W1 = np.random.randn(input_size, hidden_size)
|
|
711
|
+
b1 = np.zeros((1, hidden_size))
|
|
712
|
+
|
|
713
|
+
W2 = np.random.randn(hidden_size, output_size)
|
|
714
|
+
b2 = np.zeros((1, output_size))
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
# -------------------------
|
|
718
|
+
# STEP 3 : Activation Function
|
|
719
|
+
# -------------------------
|
|
720
|
+
|
|
721
|
+
def sigmoid(x):
|
|
722
|
+
return 1/(1+np.exp(-x))
|
|
723
|
+
|
|
724
|
+
def sigmoid_derivative(x):
|
|
725
|
+
return x*(1-x)
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
# -------------------------
|
|
729
|
+
# STEP 4 : Training Loop
|
|
730
|
+
# -------------------------
|
|
731
|
+
|
|
732
|
+
learning_rate = 0.1
|
|
733
|
+
epochs = 10000
|
|
734
|
+
|
|
735
|
+
for i in range(epochs):
|
|
736
|
+
|
|
737
|
+
# -------- Forward Propagation --------
|
|
738
|
+
|
|
739
|
+
z1 = np.dot(X, W1) + b1
|
|
740
|
+
a1 = sigmoid(z1)
|
|
741
|
+
|
|
742
|
+
z2 = np.dot(a1, W2) + b2
|
|
743
|
+
output = sigmoid(z2)
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
# -------- Calculate Error --------
|
|
747
|
+
|
|
748
|
+
error = y - output
|
|
749
|
+
|
|
750
|
+
|
|
751
|
+
# -------- Backpropagation --------
|
|
752
|
+
|
|
753
|
+
d_output = error * sigmoid_derivative(output)
|
|
754
|
+
|
|
755
|
+
d_hidden = np.dot(d_output, W2.T) * sigmoid_derivative(a1)
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
# -------- Update Weights --------
|
|
759
|
+
|
|
760
|
+
W2 += np.dot(a1.T, d_output) * learning_rate
|
|
761
|
+
b2 += np.sum(d_output, axis=0, keepdims=True) * learning_rate
|
|
762
|
+
|
|
763
|
+
W1 += np.dot(X.T, d_hidden) * learning_rate
|
|
764
|
+
b1 += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
# -------------------------
|
|
768
|
+
# STEP 5 : Final Prediction
|
|
769
|
+
# -------------------------
|
|
770
|
+
|
|
771
|
+
print("Predictions:")
|
|
772
|
+
print(output)
|
|
773
|
+
""")
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
def help_ml():
|
|
777
|
+
print("Available models:")
|
|
778
|
+
print("1. linear_regression()")
|
|
779
|
+
print("2. logistic_regression()")
|
|
780
|
+
print("3. decision_tree()")
|
|
781
|
+
print("4. perceptron()")
|
|
782
|
+
print("5. mlp()")
|
|
783
|
+
print("6. simple_nlp()")
|
|
784
|
+
print("7. rnn_model()")
|
|
785
|
+
print("8. preprocessing_models()")
|
|
786
|
+
print("9. eda_functions()")
|
|
787
|
+
print("10. linear_regression_from_scratch()")
|
|
788
|
+
print("11. mlp_from_scratch()")
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
if __name__ == "__main__":
|
|
796
|
+
decision_tree()
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sklern
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Example PyPI package
|
|
5
|
+
Author-email: Panda <mg5401103@gmail.com>
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: groq
|
|
9
|
+
|
|
10
|
+
# Sample Package
|
|
11
|
+
|
|
12
|
+
This is a simple Python package published to PyPI.
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
pip install sample-package
|
|
17
|
+
|
|
18
|
+
## Usage
|
|
19
|
+
|
|
20
|
+
from sample_package import add, greet
|
|
21
|
+
|
|
22
|
+
print(add(2,3))
|
|
23
|
+
print(greet("Mayank"))
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/sklern/__init__.py
|
|
4
|
+
src/sklern/ai_helper.py
|
|
5
|
+
src/sklern/gen.py
|
|
6
|
+
src/sklern.egg-info/PKG-INFO
|
|
7
|
+
src/sklern.egg-info/SOURCES.txt
|
|
8
|
+
src/sklern.egg-info/dependency_links.txt
|
|
9
|
+
src/sklern.egg-info/requires.txt
|
|
10
|
+
src/sklern.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
groq
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sklern
|