ml2000 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml2000-0.1.0.dist-info/METADATA +29 -0
- ml2000-0.1.0.dist-info/RECORD +8 -0
- ml2000-0.1.0.dist-info/WHEEL +5 -0
- ml2000-0.1.0.dist-info/entry_points.txt +2 -0
- ml2000-0.1.0.dist-info/top_level.txt +1 -0
- ml_labs/__init__.py +2 -0
- ml_labs/cli.py +27 -0
- ml_labs/generator.py +420 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ml2000
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A CLI tool to generate machine learning experiment templates and datasets.
|
|
5
|
+
Author-email: Your Name <your.email@example.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: Operating System :: OS Independent
|
|
8
|
+
Requires-Python: >=3.8
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# ML Labs Generator
|
|
12
|
+
|
|
13
|
+
This tool generates boilerplate code and datasets for standard Machine Learning experiments.
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pipx install .
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Generate the experiments and datasets
|
|
25
|
+
ml-labs generate
|
|
26
|
+
|
|
27
|
+
# Print the required packages
|
|
28
|
+
ml-labs requirements
|
|
29
|
+
```
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
ml_labs/__init__.py,sha256=ZuMLGqQP5_DraPm_p4DUb-QSj_PFNP4YsRmKzSy-GNg,40
|
|
2
|
+
ml_labs/cli.py,sha256=c-_oLZT7gJOo_QTiO6XAhCbrGBJePJICT-5GhV02Hrw,950
|
|
3
|
+
ml_labs/generator.py,sha256=MyY_vrfCFIMSIo4xYLyLbi3bEb5AQ40yKqV7b09IzUQ,14125
|
|
4
|
+
ml2000-0.1.0.dist-info/METADATA,sha256=w60Yzh89zI6dBl966O6c5eJzeXdh7jOT28Pq50Y7i4Y,662
|
|
5
|
+
ml2000-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
ml2000-0.1.0.dist-info/entry_points.txt,sha256=RheDcquRaEb6t9RZBNVb8OZb8okfE78lZEofN5nESMo,44
|
|
7
|
+
ml2000-0.1.0.dist-info/top_level.txt,sha256=B1f-D4jn3FwXutJKX3R7MLLfY3VQ9tO8fjm87AjyXMQ,8
|
|
8
|
+
ml2000-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ml_labs
|
ml_labs/__init__.py
ADDED
ml_labs/cli.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
from .generator import generate_experiments, print_requirements, interactive_menu
|
|
4
|
+
|
|
5
|
+
def main():
|
|
6
|
+
parser = argparse.ArgumentParser(description="ML Labs Experiments Generator")
|
|
7
|
+
subparsers = parser.add_subparsers(dest="command", help="Available commands")
|
|
8
|
+
|
|
9
|
+
# generate command
|
|
10
|
+
gen_parser = subparsers.add_parser("generate", help="Generate all experiment scripts and datasets")
|
|
11
|
+
gen_parser.add_argument("--output-dir", default="ML_Experiments", help="Output directory name")
|
|
12
|
+
|
|
13
|
+
# requirements command
|
|
14
|
+
req_parser = subparsers.add_parser("requirements", help="Print required packages")
|
|
15
|
+
|
|
16
|
+
args = parser.parse_args()
|
|
17
|
+
|
|
18
|
+
if args.command == "generate":
|
|
19
|
+
generate_experiments(args.output_dir)
|
|
20
|
+
elif args.command == "requirements":
|
|
21
|
+
print_requirements()
|
|
22
|
+
else:
|
|
23
|
+
# If no arguments provided, run the interactive menu
|
|
24
|
+
interactive_menu()
|
|
25
|
+
|
|
26
|
+
if __name__ == "__main__":
|
|
27
|
+
main()
|
ml_labs/generator.py
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import csv
|
|
3
|
+
import random
|
|
4
|
+
import math
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
def print_requirements():
|
|
8
|
+
reqs = """numpy
|
|
9
|
+
pandas
|
|
10
|
+
matplotlib
|
|
11
|
+
seaborn
|
|
12
|
+
scikit-learn
|
|
13
|
+
scipy
|
|
14
|
+
"""
|
|
15
|
+
print(reqs)
|
|
16
|
+
|
|
17
|
+
def generate_datasets(data_dir: Path):
|
|
18
|
+
data_dir.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
|
|
20
|
+
# Classification Data (e.g. for Decision Tree, Random Forest, SVM, LR, KNN, NB)
|
|
21
|
+
# 2 features, 1 target (0 or 1)
|
|
22
|
+
with open(data_dir / "classification.csv", "w", newline="") as f:
|
|
23
|
+
writer = csv.writer(f)
|
|
24
|
+
writer.writerow(["feature1", "feature2", "target"])
|
|
25
|
+
for _ in range(200):
|
|
26
|
+
cls = random.choice([0, 1])
|
|
27
|
+
f1 = random.gauss(cls * 2, 1)
|
|
28
|
+
f2 = random.gauss(cls * 2, 1)
|
|
29
|
+
writer.writerow([f1, f2, cls])
|
|
30
|
+
|
|
31
|
+
# Regression Data (e.g. for Linear, Multiple, Polynomial Regression)
|
|
32
|
+
with open(data_dir / "regression.csv", "w", newline="") as f:
|
|
33
|
+
writer = csv.writer(f)
|
|
34
|
+
writer.writerow(["x1", "x2", "y"])
|
|
35
|
+
for _ in range(150):
|
|
36
|
+
x1 = random.uniform(-10, 10)
|
|
37
|
+
x2 = random.uniform(-10, 10)
|
|
38
|
+
y = 3.5 * x1 - 2.0 * x2 + 10 + random.gauss(0, 2)
|
|
39
|
+
writer.writerow([x1, x2, y])
|
|
40
|
+
|
|
41
|
+
# Clustering Data (e.g. for K-Means, Agglomerative)
|
|
42
|
+
with open(data_dir / "clustering.csv", "w", newline="") as f:
|
|
43
|
+
writer = csv.writer(f)
|
|
44
|
+
writer.writerow(["f1", "f2"])
|
|
45
|
+
for _ in range(300):
|
|
46
|
+
cluster = random.choice([(0,0), (5,5), (-5,5)])
|
|
47
|
+
f1 = random.gauss(cluster[0], 1)
|
|
48
|
+
f2 = random.gauss(cluster[1], 1)
|
|
49
|
+
writer.writerow([f1, f2])
|
|
50
|
+
|
|
51
|
+
# Recommendation Data (Collaborative Filtering) - Users, Items, Ratings
|
|
52
|
+
with open(data_dir / "ratings.csv", "w", newline="") as f:
|
|
53
|
+
writer = csv.writer(f)
|
|
54
|
+
writer.writerow(["user_id", "item_id", "rating"])
|
|
55
|
+
for u in range(1, 21): # 20 users
|
|
56
|
+
for i in range(1, 11): # 10 items
|
|
57
|
+
if random.random() > 0.3: # sparse matrix
|
|
58
|
+
writer.writerow([u, i, random.randint(1, 5)])
|
|
59
|
+
|
|
60
|
+
EXPERIMENTS = {
|
|
61
|
+
"expt_01_intro.py": '''"""
|
|
62
|
+
Expt 01: Introduction to Python for Machine Learning
|
|
63
|
+
Setting up Python environment, Basic Python syntax, data types, variables.
|
|
64
|
+
Introduction to NumPy.
|
|
65
|
+
"""
|
|
66
|
+
import numpy as np
|
|
67
|
+
|
|
68
|
+
# Python basics
|
|
69
|
+
x = 10
|
|
70
|
+
y = "Hello ML"
|
|
71
|
+
print(f"Variables: x={x}, y='{y}'")
|
|
72
|
+
|
|
73
|
+
# NumPy basics
|
|
74
|
+
arr = np.array([1, 2, 3, 4, 5])
|
|
75
|
+
print("NumPy Array:", arr)
|
|
76
|
+
print("Mean:", np.mean(arr))
|
|
77
|
+
print("Dot product:", np.dot(arr, arr))
|
|
78
|
+
''',
|
|
79
|
+
|
|
80
|
+
"expt_02_data.py": '''"""
|
|
81
|
+
Expt 02: Data Manipulation & Visualization
|
|
82
|
+
Pandas library for data manipulation, loading datasets, Data cleaning,
|
|
83
|
+
Indexing, slicing, Data Visualization with Matplotlib and Seaborn.
|
|
84
|
+
"""
|
|
85
|
+
import pandas as pd
|
|
86
|
+
import matplotlib.pyplot as plt
|
|
87
|
+
import seaborn as sns
|
|
88
|
+
|
|
89
|
+
# Load classification dataset
|
|
90
|
+
df = pd.read_csv("datasets/classification.csv")
|
|
91
|
+
|
|
92
|
+
# Data manipulation
|
|
93
|
+
print(df.head())
|
|
94
|
+
print(df.describe())
|
|
95
|
+
filtered_df = df[df['feature1'] > 0]
|
|
96
|
+
print(f"Filtered rows: {len(filtered_df)}")
|
|
97
|
+
|
|
98
|
+
# Visualization
|
|
99
|
+
sns.scatterplot(data=df, x='feature1', y='feature2', hue='target')
|
|
100
|
+
plt.title("Classification Data Visualization")
|
|
101
|
+
plt.savefig("expt_02_plot.png")
|
|
102
|
+
print("Saved plot to expt_02_plot.png")
|
|
103
|
+
''',
|
|
104
|
+
|
|
105
|
+
"expt_03_decision_tree.py": '''"""
|
|
106
|
+
Expt 03: Decision Tree
|
|
107
|
+
Implement and evaluate ID3 / C4.5 algorithm on given dataset.
|
|
108
|
+
Using sklearn's DecisionTreeClassifier (uses optimized CART).
|
|
109
|
+
"""
|
|
110
|
+
import pandas as pd
|
|
111
|
+
from sklearn.model_selection import train_test_split
|
|
112
|
+
from sklearn.tree import DecisionTreeClassifier, plot_tree
|
|
113
|
+
from sklearn.metrics import accuracy_score
|
|
114
|
+
import matplotlib.pyplot as plt
|
|
115
|
+
|
|
116
|
+
df = pd.read_csv("datasets/classification.csv")
|
|
117
|
+
X = df[['feature1', 'feature2']]
|
|
118
|
+
y = df['target']
|
|
119
|
+
|
|
120
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
121
|
+
|
|
122
|
+
# Create and train Decision Tree (entropy for ID3/C4.5-like behavior)
|
|
123
|
+
clf = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=42)
|
|
124
|
+
clf.fit(X_train, y_train)
|
|
125
|
+
|
|
126
|
+
# Evaluate
|
|
127
|
+
y_pred = clf.predict(X_test)
|
|
128
|
+
print(f"Decision Tree Accuracy: {accuracy_score(y_test, y_pred):.4f}")
|
|
129
|
+
|
|
130
|
+
# Plot tree
|
|
131
|
+
plt.figure(figsize=(10,6))
|
|
132
|
+
plot_tree(clf, feature_names=['feature1', 'feature2'], class_names=['0', '1'], filled=True)
|
|
133
|
+
plt.savefig("expt_03_tree.png")
|
|
134
|
+
print("Decision tree visualized in expt_03_tree.png")
|
|
135
|
+
''',
|
|
136
|
+
|
|
137
|
+
"expt_04_regression.py": '''"""
|
|
138
|
+
Expt 04: Regression
|
|
139
|
+
Implement and evaluate Linear, Multiple and Polynomial regression algorithms.
|
|
140
|
+
"""
|
|
141
|
+
import pandas as pd
|
|
142
|
+
import numpy as np
|
|
143
|
+
from sklearn.model_selection import train_test_split
|
|
144
|
+
from sklearn.linear_model import LinearRegression
|
|
145
|
+
from sklearn.preprocessing import PolynomialFeatures
|
|
146
|
+
from sklearn.metrics import mean_squared_error, r2_score
|
|
147
|
+
|
|
148
|
+
df = pd.read_csv("datasets/regression.csv")
|
|
149
|
+
X = df[['x1', 'x2']]
|
|
150
|
+
y = df['y']
|
|
151
|
+
|
|
152
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
153
|
+
|
|
154
|
+
# Multiple Linear Regression
|
|
155
|
+
model_mlr = LinearRegression()
|
|
156
|
+
model_mlr.fit(X_train, y_train)
|
|
157
|
+
y_pred_mlr = model_mlr.predict(X_test)
|
|
158
|
+
print(f"Multiple Linear Regression MSE: {mean_squared_error(y_test, y_pred_mlr):.4f}")
|
|
159
|
+
|
|
160
|
+
# Polynomial Regression (degree 2)
|
|
161
|
+
poly = PolynomialFeatures(degree=2)
|
|
162
|
+
X_train_poly = poly.fit_transform(X_train)
|
|
163
|
+
X_test_poly = poly.transform(X_test)
|
|
164
|
+
|
|
165
|
+
model_poly = LinearRegression()
|
|
166
|
+
model_poly.fit(X_train_poly, y_train)
|
|
167
|
+
y_pred_poly = model_poly.predict(X_test_poly)
|
|
168
|
+
print(f"Polynomial Regression (d=2) MSE: {mean_squared_error(y_test, y_pred_poly):.4f}")
|
|
169
|
+
''',
|
|
170
|
+
|
|
171
|
+
"expt_05_random_forest.py": '''"""
|
|
172
|
+
Expt 05: Random Forest
|
|
173
|
+
Implement and evaluate Random Forest algorithm on given dataset.
|
|
174
|
+
"""
|
|
175
|
+
import pandas as pd
|
|
176
|
+
from sklearn.model_selection import train_test_split
|
|
177
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
178
|
+
from sklearn.metrics import accuracy_score, classification_report
|
|
179
|
+
|
|
180
|
+
df = pd.read_csv("datasets/classification.csv")
|
|
181
|
+
X = df[['feature1', 'feature2']]
|
|
182
|
+
y = df['target']
|
|
183
|
+
|
|
184
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
185
|
+
|
|
186
|
+
clf = RandomForestClassifier(n_estimators=100, random_state=42)
|
|
187
|
+
clf.fit(X_train, y_train)
|
|
188
|
+
y_pred = clf.predict(X_test)
|
|
189
|
+
|
|
190
|
+
print(f"Random Forest Accuracy: {accuracy_score(y_test, y_pred):.4f}")
|
|
191
|
+
print("Classification Report:")
|
|
192
|
+
print(classification_report(y_test, y_pred))
|
|
193
|
+
''',
|
|
194
|
+
|
|
195
|
+
"expt_06_svm.py": '''"""
|
|
196
|
+
Expt 06: Support Vector Machine
|
|
197
|
+
Implement and evaluate SVM algorithm on given dataset.
|
|
198
|
+
"""
|
|
199
|
+
import pandas as pd
|
|
200
|
+
from sklearn.model_selection import train_test_split
|
|
201
|
+
from sklearn.svm import SVC
|
|
202
|
+
from sklearn.metrics import accuracy_score
|
|
203
|
+
|
|
204
|
+
df = pd.read_csv("datasets/classification.csv")
|
|
205
|
+
X = df[['feature1', 'feature2']]
|
|
206
|
+
y = df['target']
|
|
207
|
+
|
|
208
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
209
|
+
|
|
210
|
+
clf = SVC(kernel='linear', C=1.0)
|
|
211
|
+
clf.fit(X_train, y_train)
|
|
212
|
+
y_pred = clf.predict(X_test)
|
|
213
|
+
|
|
214
|
+
print(f"SVM (Linear Kernel) Accuracy: {accuracy_score(y_test, y_pred):.4f}")
|
|
215
|
+
''',
|
|
216
|
+
|
|
217
|
+
"expt_07_logistic_regression.py": '''"""
|
|
218
|
+
Expt 07: Logistic Regression
|
|
219
|
+
Implement and evaluate Logistic Regression algorithm on binary classification problem.
|
|
220
|
+
"""
|
|
221
|
+
import pandas as pd
|
|
222
|
+
from sklearn.model_selection import train_test_split
|
|
223
|
+
from sklearn.linear_model import LogisticRegression
|
|
224
|
+
from sklearn.metrics import accuracy_score, confusion_matrix
|
|
225
|
+
|
|
226
|
+
df = pd.read_csv("datasets/classification.csv")
|
|
227
|
+
X = df[['feature1', 'feature2']]
|
|
228
|
+
y = df['target']
|
|
229
|
+
|
|
230
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
231
|
+
|
|
232
|
+
clf = LogisticRegression()
|
|
233
|
+
clf.fit(X_train, y_train)
|
|
234
|
+
y_pred = clf.predict(X_test)
|
|
235
|
+
|
|
236
|
+
print(f"Logistic Regression Accuracy: {accuracy_score(y_test, y_pred):.4f}")
|
|
237
|
+
print("Confusion Matrix:")
|
|
238
|
+
print(confusion_matrix(y_test, y_pred))
|
|
239
|
+
''',
|
|
240
|
+
|
|
241
|
+
"expt_08_knn.py": '''"""
|
|
242
|
+
Expt 08: K-Nearest Neighbor
|
|
243
|
+
Implement and visualize KNN clustering algorithm on given dataset.
|
|
244
|
+
Note: KNN is traditionally for classification. K-Means is for clustering.
|
|
245
|
+
We implement KNN Classifier here.
|
|
246
|
+
"""
|
|
247
|
+
import pandas as pd
|
|
248
|
+
from sklearn.model_selection import train_test_split
|
|
249
|
+
from sklearn.neighbors import KNeighborsClassifier
|
|
250
|
+
from sklearn.metrics import accuracy_score
|
|
251
|
+
|
|
252
|
+
df = pd.read_csv("datasets/classification.csv")
|
|
253
|
+
X = df[['feature1', 'feature2']]
|
|
254
|
+
y = df['target']
|
|
255
|
+
|
|
256
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
257
|
+
|
|
258
|
+
knn = KNeighborsClassifier(n_neighbors=5)
|
|
259
|
+
knn.fit(X_train, y_train)
|
|
260
|
+
y_pred = knn.predict(X_test)
|
|
261
|
+
|
|
262
|
+
print(f"KNN Classifier Accuracy (K=5): {accuracy_score(y_test, y_pred):.4f}")
|
|
263
|
+
''',
|
|
264
|
+
|
|
265
|
+
"expt_09_naive_bayes.py": '''"""
|
|
266
|
+
Expt 09: Naive Bayes
|
|
267
|
+
Implement Naive Bayes Classifier.
|
|
268
|
+
"""
|
|
269
|
+
import pandas as pd
|
|
270
|
+
from sklearn.model_selection import train_test_split
|
|
271
|
+
from sklearn.naive_bayes import GaussianNB
|
|
272
|
+
from sklearn.metrics import accuracy_score
|
|
273
|
+
|
|
274
|
+
df = pd.read_csv("datasets/classification.csv")
|
|
275
|
+
X = df[['feature1', 'feature2']]
|
|
276
|
+
y = df['target']
|
|
277
|
+
|
|
278
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
|
279
|
+
|
|
280
|
+
nb = GaussianNB()
|
|
281
|
+
nb.fit(X_train, y_train)
|
|
282
|
+
y_pred = nb.predict(X_test)
|
|
283
|
+
|
|
284
|
+
print(f"Gaussian Naive Bayes Accuracy: {accuracy_score(y_test, y_pred):.4f}")
|
|
285
|
+
''',
|
|
286
|
+
|
|
287
|
+
"expt_10_kmeans_agglomerative.py": '''"""
|
|
288
|
+
Expt 10: Unsupervised Learning
|
|
289
|
+
Implement and apply suitable measuring parameters for K Means
|
|
290
|
+
& Agglomerative clustering algorithms on given dataset.
|
|
291
|
+
"""
|
|
292
|
+
import pandas as pd
|
|
293
|
+
from sklearn.cluster import KMeans, AgglomerativeClustering
|
|
294
|
+
from sklearn.metrics import silhouette_score
|
|
295
|
+
|
|
296
|
+
df = pd.read_csv("datasets/clustering.csv")
|
|
297
|
+
X = df[['f1', 'f2']]
|
|
298
|
+
|
|
299
|
+
# K-Means
|
|
300
|
+
kmeans = KMeans(n_clusters=3, random_state=42, n_init='auto')
|
|
301
|
+
labels_km = kmeans.fit_predict(X)
|
|
302
|
+
sil_km = silhouette_score(X, labels_km)
|
|
303
|
+
print(f"K-Means Silhouette Score (k=3): {sil_km:.4f}")
|
|
304
|
+
|
|
305
|
+
# Agglomerative
|
|
306
|
+
agglo = AgglomerativeClustering(n_clusters=3)
|
|
307
|
+
labels_agg = agglo.fit_predict(X)
|
|
308
|
+
sil_agg = silhouette_score(X, labels_agg)
|
|
309
|
+
print(f"Agglomerative Silhouette Score (k=3): {sil_agg:.4f}")
|
|
310
|
+
''',
|
|
311
|
+
|
|
312
|
+
"expt_11_recommendation.py": '''"""
|
|
313
|
+
Expt 11: Recommendation System
|
|
314
|
+
Implement a Collaborative Filtering Recommender System.
|
|
315
|
+
Using simple Item-Item Collaborative Filtering via Cosine Similarity.
|
|
316
|
+
"""
|
|
317
|
+
import pandas as pd
|
|
318
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
319
|
+
|
|
320
|
+
# Load ratings dataset
|
|
321
|
+
ratings = pd.read_csv("datasets/ratings.csv")
|
|
322
|
+
|
|
323
|
+
# Create user-item matrix
|
|
324
|
+
user_item_matrix = ratings.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)
|
|
325
|
+
print("User-Item Matrix shape:", user_item_matrix.shape)
|
|
326
|
+
|
|
327
|
+
# Calculate item similarity
|
|
328
|
+
item_similarity = cosine_similarity(user_item_matrix.T)
|
|
329
|
+
item_sim_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)
|
|
330
|
+
|
|
331
|
+
# Get recommendations for item 1
|
|
332
|
+
print("\\nSimilar items to Item 1:")
|
|
333
|
+
similar_items = item_sim_df[1].sort_values(ascending=False)[1:4] # top 3 excluding itself
|
|
334
|
+
print(similar_items)
|
|
335
|
+
'''
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
def interactive_menu():
|
|
339
|
+
print("=" * 60)
|
|
340
|
+
print(" ML EXPERIMENTS CLI")
|
|
341
|
+
print("=" * 60)
|
|
342
|
+
print("Available Experiments:\\n")
|
|
343
|
+
|
|
344
|
+
# Extract titles for the menu
|
|
345
|
+
titles = []
|
|
346
|
+
keys = list(EXPERIMENTS.keys())
|
|
347
|
+
for key in keys:
|
|
348
|
+
# Extract the first line of the docstring as title
|
|
349
|
+
content = EXPERIMENTS[key]
|
|
350
|
+
lines = content.splitlines()
|
|
351
|
+
first_line = key
|
|
352
|
+
for line in lines:
|
|
353
|
+
if "Expt " in line:
|
|
354
|
+
first_line = line.replace('Expt ', '').strip()
|
|
355
|
+
break
|
|
356
|
+
titles.append(first_line)
|
|
357
|
+
|
|
358
|
+
for i, title in enumerate(titles, 1):
|
|
359
|
+
print(f"{i}. {title}")
|
|
360
|
+
|
|
361
|
+
print("\\nEnter the number of the experiment to clone the code (or 'q' to quit):")
|
|
362
|
+
|
|
363
|
+
while True:
|
|
364
|
+
try:
|
|
365
|
+
choice = input("> ").strip()
|
|
366
|
+
if choice.lower() == 'q':
|
|
367
|
+
print("Goodbye!")
|
|
368
|
+
break
|
|
369
|
+
|
|
370
|
+
choice_idx = int(choice) - 1
|
|
371
|
+
if 0 <= choice_idx < len(keys):
|
|
372
|
+
filename = keys[choice_idx]
|
|
373
|
+
content = EXPERIMENTS[filename]
|
|
374
|
+
|
|
375
|
+
# Write the file
|
|
376
|
+
out_path = Path.cwd()
|
|
377
|
+
(out_path / filename).write_text(content, encoding='utf-8')
|
|
378
|
+
print(f"\\n[+] Cloned '{filename}' into current directory.")
|
|
379
|
+
|
|
380
|
+
# Generate datasets automatically
|
|
381
|
+
data_dir = out_path / "datasets"
|
|
382
|
+
if "datasets/classification.csv" in content and not (data_dir / "classification.csv").exists():
|
|
383
|
+
generate_datasets(data_dir)
|
|
384
|
+
print(f"[+] Generated required datasets in './datasets/' directory.")
|
|
385
|
+
elif "datasets/regression.csv" in content and not (data_dir / "regression.csv").exists():
|
|
386
|
+
generate_datasets(data_dir)
|
|
387
|
+
print(f"[+] Generated required datasets in './datasets/' directory.")
|
|
388
|
+
elif "datasets/clustering.csv" in content and not (data_dir / "clustering.csv").exists():
|
|
389
|
+
generate_datasets(data_dir)
|
|
390
|
+
print(f"[+] Generated required datasets in './datasets/' directory.")
|
|
391
|
+
elif "datasets/ratings.csv" in content and not (data_dir / "ratings.csv").exists():
|
|
392
|
+
generate_datasets(data_dir)
|
|
393
|
+
print(f"[+] Generated required datasets in './datasets/' directory.")
|
|
394
|
+
|
|
395
|
+
print("=" * 60)
|
|
396
|
+
print("Enter another number to clone, or 'q' to quit:")
|
|
397
|
+
else:
|
|
398
|
+
print("Invalid number. Please try again.")
|
|
399
|
+
except ValueError:
|
|
400
|
+
print("Invalid input. Please enter a number or 'q'.")
|
|
401
|
+
except KeyboardInterrupt:
|
|
402
|
+
print("\\nGoodbye!")
|
|
403
|
+
break
|
|
404
|
+
|
|
405
|
+
def generate_experiments(output_dir: str):
|
|
406
|
+
out_path = Path(output_dir)
|
|
407
|
+
print(f"Creating experiments in: {out_path.absolute()}")
|
|
408
|
+
|
|
409
|
+
# 1. Create datasets
|
|
410
|
+
datasets_dir = out_path / "datasets"
|
|
411
|
+
generate_datasets(datasets_dir)
|
|
412
|
+
print("Generated datasets.")
|
|
413
|
+
|
|
414
|
+
# 2. Create python scripts
|
|
415
|
+
for filename, content in EXPERIMENTS.items():
|
|
416
|
+
script_path = out_path / filename
|
|
417
|
+
script_path.write_text(content, encoding='utf-8')
|
|
418
|
+
print(f"Generated {filename}")
|
|
419
|
+
|
|
420
|
+
print("\\nAll experiments generated successfully!")
|