ml2000 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ Metadata-Version: 2.4
2
+ Name: ml2000
3
+ Version: 0.1.0
4
+ Summary: A CLI tool to generate machine learning experiment templates and datasets.
5
+ Author-email: Your Name <your.email@example.com>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: >=3.8
9
+ Description-Content-Type: text/markdown
10
+
11
+ # ML Labs Generator
12
+
13
+ This tool generates boilerplate code and datasets for standard Machine Learning experiments.
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ pipx install .
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ # Generate the experiments and datasets
25
+ ml-labs generate
26
+
27
+ # Print the required packages
28
+ ml-labs requirements
29
+ ```
@@ -0,0 +1,8 @@
1
+ ml_labs/__init__.py,sha256=ZuMLGqQP5_DraPm_p4DUb-QSj_PFNP4YsRmKzSy-GNg,40
2
+ ml_labs/cli.py,sha256=c-_oLZT7gJOo_QTiO6XAhCbrGBJePJICT-5GhV02Hrw,950
3
+ ml_labs/generator.py,sha256=MyY_vrfCFIMSIo4xYLyLbi3bEb5AQ40yKqV7b09IzUQ,14125
4
+ ml2000-0.1.0.dist-info/METADATA,sha256=w60Yzh89zI6dBl966O6c5eJzeXdh7jOT28Pq50Y7i4Y,662
5
+ ml2000-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ ml2000-0.1.0.dist-info/entry_points.txt,sha256=RheDcquRaEb6t9RZBNVb8OZb8okfE78lZEofN5nESMo,44
7
+ ml2000-0.1.0.dist-info/top_level.txt,sha256=B1f-D4jn3FwXutJKX3R7MLLfY3VQ9tO8fjm87AjyXMQ,8
8
+ ml2000-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ml2000 = ml_labs.cli:main
@@ -0,0 +1 @@
1
+ ml_labs
ml_labs/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ # ml_labs package
2
+ __version__ = "0.1.0"
ml_labs/cli.py ADDED
@@ -0,0 +1,27 @@
1
+ import argparse
2
+ import sys
3
+ from .generator import generate_experiments, print_requirements, interactive_menu
4
+
5
+ def main():
6
+ parser = argparse.ArgumentParser(description="ML Labs Experiments Generator")
7
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
8
+
9
+ # generate command
10
+ gen_parser = subparsers.add_parser("generate", help="Generate all experiment scripts and datasets")
11
+ gen_parser.add_argument("--output-dir", default="ML_Experiments", help="Output directory name")
12
+
13
+ # requirements command
14
+ req_parser = subparsers.add_parser("requirements", help="Print required packages")
15
+
16
+ args = parser.parse_args()
17
+
18
+ if args.command == "generate":
19
+ generate_experiments(args.output_dir)
20
+ elif args.command == "requirements":
21
+ print_requirements()
22
+ else:
23
+ # If no arguments provided, run the interactive menu
24
+ interactive_menu()
25
+
26
+ if __name__ == "__main__":
27
+ main()
ml_labs/generator.py ADDED
@@ -0,0 +1,420 @@
1
+ import os
2
+ import csv
3
+ import random
4
+ import math
5
+ from pathlib import Path
6
+
7
+ def print_requirements():
8
+ reqs = """numpy
9
+ pandas
10
+ matplotlib
11
+ seaborn
12
+ scikit-learn
13
+ scipy
14
+ """
15
+ print(reqs)
16
+
17
+ def generate_datasets(data_dir: Path):
18
+ data_dir.mkdir(parents=True, exist_ok=True)
19
+
20
+ # Classification Data (e.g. for Decision Tree, Random Forest, SVM, LR, KNN, NB)
21
+ # 2 features, 1 target (0 or 1)
22
+ with open(data_dir / "classification.csv", "w", newline="") as f:
23
+ writer = csv.writer(f)
24
+ writer.writerow(["feature1", "feature2", "target"])
25
+ for _ in range(200):
26
+ cls = random.choice([0, 1])
27
+ f1 = random.gauss(cls * 2, 1)
28
+ f2 = random.gauss(cls * 2, 1)
29
+ writer.writerow([f1, f2, cls])
30
+
31
+ # Regression Data (e.g. for Linear, Multiple, Polynomial Regression)
32
+ with open(data_dir / "regression.csv", "w", newline="") as f:
33
+ writer = csv.writer(f)
34
+ writer.writerow(["x1", "x2", "y"])
35
+ for _ in range(150):
36
+ x1 = random.uniform(-10, 10)
37
+ x2 = random.uniform(-10, 10)
38
+ y = 3.5 * x1 - 2.0 * x2 + 10 + random.gauss(0, 2)
39
+ writer.writerow([x1, x2, y])
40
+
41
+ # Clustering Data (e.g. for K-Means, Agglomerative)
42
+ with open(data_dir / "clustering.csv", "w", newline="") as f:
43
+ writer = csv.writer(f)
44
+ writer.writerow(["f1", "f2"])
45
+ for _ in range(300):
46
+ cluster = random.choice([(0,0), (5,5), (-5,5)])
47
+ f1 = random.gauss(cluster[0], 1)
48
+ f2 = random.gauss(cluster[1], 1)
49
+ writer.writerow([f1, f2])
50
+
51
+ # Recommendation Data (Collaborative Filtering) - Users, Items, Ratings
52
+ with open(data_dir / "ratings.csv", "w", newline="") as f:
53
+ writer = csv.writer(f)
54
+ writer.writerow(["user_id", "item_id", "rating"])
55
+ for u in range(1, 21): # 20 users
56
+ for i in range(1, 11): # 10 items
57
+ if random.random() > 0.3: # sparse matrix
58
+ writer.writerow([u, i, random.randint(1, 5)])
59
+
60
+ EXPERIMENTS = {
61
+ "expt_01_intro.py": '''"""
62
+ Expt 01: Introduction to Python for Machine Learning
63
+ Setting up Python environment, Basic Python syntax, data types, variables.
64
+ Introduction to NumPy.
65
+ """
66
+ import numpy as np
67
+
68
+ # Python basics
69
+ x = 10
70
+ y = "Hello ML"
71
+ print(f"Variables: x={x}, y='{y}'")
72
+
73
+ # NumPy basics
74
+ arr = np.array([1, 2, 3, 4, 5])
75
+ print("NumPy Array:", arr)
76
+ print("Mean:", np.mean(arr))
77
+ print("Dot product:", np.dot(arr, arr))
78
+ ''',
79
+
80
+ "expt_02_data.py": '''"""
81
+ Expt 02: Data Manipulation & Visualization
82
+ Pandas library for data manipulation, loading datasets, Data cleaning,
83
+ Indexing, slicing, Data Visualization with Matplotlib and Seaborn.
84
+ """
85
+ import pandas as pd
86
+ import matplotlib.pyplot as plt
87
+ import seaborn as sns
88
+
89
+ # Load classification dataset
90
+ df = pd.read_csv("datasets/classification.csv")
91
+
92
+ # Data manipulation
93
+ print(df.head())
94
+ print(df.describe())
95
+ filtered_df = df[df['feature1'] > 0]
96
+ print(f"Filtered rows: {len(filtered_df)}")
97
+
98
+ # Visualization
99
+ sns.scatterplot(data=df, x='feature1', y='feature2', hue='target')
100
+ plt.title("Classification Data Visualization")
101
+ plt.savefig("expt_02_plot.png")
102
+ print("Saved plot to expt_02_plot.png")
103
+ ''',
104
+
105
+ "expt_03_decision_tree.py": '''"""
106
+ Expt 03: Decision Tree
107
+ Implement and evaluate ID3 / C4.5 algorithm on given dataset.
108
+ Using sklearn's DecisionTreeClassifier (uses optimized CART).
109
+ """
110
+ import pandas as pd
111
+ from sklearn.model_selection import train_test_split
112
+ from sklearn.tree import DecisionTreeClassifier, plot_tree
113
+ from sklearn.metrics import accuracy_score
114
+ import matplotlib.pyplot as plt
115
+
116
+ df = pd.read_csv("datasets/classification.csv")
117
+ X = df[['feature1', 'feature2']]
118
+ y = df['target']
119
+
120
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
121
+
122
+ # Create and train Decision Tree (entropy for ID3/C4.5-like behavior)
123
+ clf = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=42)
124
+ clf.fit(X_train, y_train)
125
+
126
+ # Evaluate
127
+ y_pred = clf.predict(X_test)
128
+ print(f"Decision Tree Accuracy: {accuracy_score(y_test, y_pred):.4f}")
129
+
130
+ # Plot tree
131
+ plt.figure(figsize=(10,6))
132
+ plot_tree(clf, feature_names=['feature1', 'feature2'], class_names=['0', '1'], filled=True)
133
+ plt.savefig("expt_03_tree.png")
134
+ print("Decision tree visualized in expt_03_tree.png")
135
+ ''',
136
+
137
+ "expt_04_regression.py": '''"""
138
+ Expt 04: Regression
139
+ Implement and evaluate Linear, Multiple and Polynomial regression algorithms.
140
+ """
141
+ import pandas as pd
142
+ import numpy as np
143
+ from sklearn.model_selection import train_test_split
144
+ from sklearn.linear_model import LinearRegression
145
+ from sklearn.preprocessing import PolynomialFeatures
146
+ from sklearn.metrics import mean_squared_error, r2_score
147
+
148
+ df = pd.read_csv("datasets/regression.csv")
149
+ X = df[['x1', 'x2']]
150
+ y = df['y']
151
+
152
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
153
+
154
+ # Multiple Linear Regression
155
+ model_mlr = LinearRegression()
156
+ model_mlr.fit(X_train, y_train)
157
+ y_pred_mlr = model_mlr.predict(X_test)
158
+ print(f"Multiple Linear Regression MSE: {mean_squared_error(y_test, y_pred_mlr):.4f}")
159
+
160
+ # Polynomial Regression (degree 2)
161
+ poly = PolynomialFeatures(degree=2)
162
+ X_train_poly = poly.fit_transform(X_train)
163
+ X_test_poly = poly.transform(X_test)
164
+
165
+ model_poly = LinearRegression()
166
+ model_poly.fit(X_train_poly, y_train)
167
+ y_pred_poly = model_poly.predict(X_test_poly)
168
+ print(f"Polynomial Regression (d=2) MSE: {mean_squared_error(y_test, y_pred_poly):.4f}")
169
+ ''',
170
+
171
+ "expt_05_random_forest.py": '''"""
172
+ Expt 05: Random Forest
173
+ Implement and evaluate Random Forest algorithm on given dataset.
174
+ """
175
+ import pandas as pd
176
+ from sklearn.model_selection import train_test_split
177
+ from sklearn.ensemble import RandomForestClassifier
178
+ from sklearn.metrics import accuracy_score, classification_report
179
+
180
+ df = pd.read_csv("datasets/classification.csv")
181
+ X = df[['feature1', 'feature2']]
182
+ y = df['target']
183
+
184
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
185
+
186
+ clf = RandomForestClassifier(n_estimators=100, random_state=42)
187
+ clf.fit(X_train, y_train)
188
+ y_pred = clf.predict(X_test)
189
+
190
+ print(f"Random Forest Accuracy: {accuracy_score(y_test, y_pred):.4f}")
191
+ print("Classification Report:")
192
+ print(classification_report(y_test, y_pred))
193
+ ''',
194
+
195
+ "expt_06_svm.py": '''"""
196
+ Expt 06: Support Vector Machine
197
+ Implement and evaluate SVM algorithm on given dataset.
198
+ """
199
+ import pandas as pd
200
+ from sklearn.model_selection import train_test_split
201
+ from sklearn.svm import SVC
202
+ from sklearn.metrics import accuracy_score
203
+
204
+ df = pd.read_csv("datasets/classification.csv")
205
+ X = df[['feature1', 'feature2']]
206
+ y = df['target']
207
+
208
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
209
+
210
+ clf = SVC(kernel='linear', C=1.0)
211
+ clf.fit(X_train, y_train)
212
+ y_pred = clf.predict(X_test)
213
+
214
+ print(f"SVM (Linear Kernel) Accuracy: {accuracy_score(y_test, y_pred):.4f}")
215
+ ''',
216
+
217
+ "expt_07_logistic_regression.py": '''"""
218
+ Expt 07: Logistic Regression
219
+ Implement and evaluate Logistic Regression algorithm on binary classification problem.
220
+ """
221
+ import pandas as pd
222
+ from sklearn.model_selection import train_test_split
223
+ from sklearn.linear_model import LogisticRegression
224
+ from sklearn.metrics import accuracy_score, confusion_matrix
225
+
226
+ df = pd.read_csv("datasets/classification.csv")
227
+ X = df[['feature1', 'feature2']]
228
+ y = df['target']
229
+
230
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
231
+
232
+ clf = LogisticRegression()
233
+ clf.fit(X_train, y_train)
234
+ y_pred = clf.predict(X_test)
235
+
236
+ print(f"Logistic Regression Accuracy: {accuracy_score(y_test, y_pred):.4f}")
237
+ print("Confusion Matrix:")
238
+ print(confusion_matrix(y_test, y_pred))
239
+ ''',
240
+
241
+ "expt_08_knn.py": '''"""
242
+ Expt 08: K-Nearest Neighbor
243
+ Implement and visualize KNN clustering algorithm on given dataset.
244
+ Note: KNN is traditionally for classification. K-Means is for clustering.
245
+ We implement KNN Classifier here.
246
+ """
247
+ import pandas as pd
248
+ from sklearn.model_selection import train_test_split
249
+ from sklearn.neighbors import KNeighborsClassifier
250
+ from sklearn.metrics import accuracy_score
251
+
252
+ df = pd.read_csv("datasets/classification.csv")
253
+ X = df[['feature1', 'feature2']]
254
+ y = df['target']
255
+
256
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
257
+
258
+ knn = KNeighborsClassifier(n_neighbors=5)
259
+ knn.fit(X_train, y_train)
260
+ y_pred = knn.predict(X_test)
261
+
262
+ print(f"KNN Classifier Accuracy (K=5): {accuracy_score(y_test, y_pred):.4f}")
263
+ ''',
264
+
265
+ "expt_09_naive_bayes.py": '''"""
266
+ Expt 09: Naive Bayes
267
+ Implement Naive Bayes Classifier.
268
+ """
269
+ import pandas as pd
270
+ from sklearn.model_selection import train_test_split
271
+ from sklearn.naive_bayes import GaussianNB
272
+ from sklearn.metrics import accuracy_score
273
+
274
+ df = pd.read_csv("datasets/classification.csv")
275
+ X = df[['feature1', 'feature2']]
276
+ y = df['target']
277
+
278
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
279
+
280
+ nb = GaussianNB()
281
+ nb.fit(X_train, y_train)
282
+ y_pred = nb.predict(X_test)
283
+
284
+ print(f"Gaussian Naive Bayes Accuracy: {accuracy_score(y_test, y_pred):.4f}")
285
+ ''',
286
+
287
+ "expt_10_kmeans_agglomerative.py": '''"""
288
+ Expt 10: Unsupervised Learning
289
+ Implement and apply suitable measuring parameters for K Means
290
+ & Agglomerative clustering algorithms on given dataset.
291
+ """
292
+ import pandas as pd
293
+ from sklearn.cluster import KMeans, AgglomerativeClustering
294
+ from sklearn.metrics import silhouette_score
295
+
296
+ df = pd.read_csv("datasets/clustering.csv")
297
+ X = df[['f1', 'f2']]
298
+
299
+ # K-Means
300
+ kmeans = KMeans(n_clusters=3, random_state=42, n_init='auto')
301
+ labels_km = kmeans.fit_predict(X)
302
+ sil_km = silhouette_score(X, labels_km)
303
+ print(f"K-Means Silhouette Score (k=3): {sil_km:.4f}")
304
+
305
+ # Agglomerative
306
+ agglo = AgglomerativeClustering(n_clusters=3)
307
+ labels_agg = agglo.fit_predict(X)
308
+ sil_agg = silhouette_score(X, labels_agg)
309
+ print(f"Agglomerative Silhouette Score (k=3): {sil_agg:.4f}")
310
+ ''',
311
+
312
+ "expt_11_recommendation.py": '''"""
313
+ Expt 11: Recommendation System
314
+ Implement a Collaborative Filtering Recommender System.
315
+ Using simple Item-Item Collaborative Filtering via Cosine Similarity.
316
+ """
317
+ import pandas as pd
318
+ from sklearn.metrics.pairwise import cosine_similarity
319
+
320
+ # Load ratings dataset
321
+ ratings = pd.read_csv("datasets/ratings.csv")
322
+
323
+ # Create user-item matrix
324
+ user_item_matrix = ratings.pivot_table(index='user_id', columns='item_id', values='rating').fillna(0)
325
+ print("User-Item Matrix shape:", user_item_matrix.shape)
326
+
327
+ # Calculate item similarity
328
+ item_similarity = cosine_similarity(user_item_matrix.T)
329
+ item_sim_df = pd.DataFrame(item_similarity, index=user_item_matrix.columns, columns=user_item_matrix.columns)
330
+
331
+ # Get recommendations for item 1
332
+ print("\\nSimilar items to Item 1:")
333
+ similar_items = item_sim_df[1].sort_values(ascending=False)[1:4] # top 3 excluding itself
334
+ print(similar_items)
335
+ '''
336
+ }
337
+
338
+ def interactive_menu():
339
+ print("=" * 60)
340
+ print(" ML EXPERIMENTS CLI")
341
+ print("=" * 60)
342
+ print("Available Experiments:\\n")
343
+
344
+ # Extract titles for the menu
345
+ titles = []
346
+ keys = list(EXPERIMENTS.keys())
347
+ for key in keys:
348
+ # Extract the first line of the docstring as title
349
+ content = EXPERIMENTS[key]
350
+ lines = content.splitlines()
351
+ first_line = key
352
+ for line in lines:
353
+ if "Expt " in line:
354
+ first_line = line.replace('Expt ', '').strip()
355
+ break
356
+ titles.append(first_line)
357
+
358
+ for i, title in enumerate(titles, 1):
359
+ print(f"{i}. {title}")
360
+
361
+ print("\\nEnter the number of the experiment to clone the code (or 'q' to quit):")
362
+
363
+ while True:
364
+ try:
365
+ choice = input("> ").strip()
366
+ if choice.lower() == 'q':
367
+ print("Goodbye!")
368
+ break
369
+
370
+ choice_idx = int(choice) - 1
371
+ if 0 <= choice_idx < len(keys):
372
+ filename = keys[choice_idx]
373
+ content = EXPERIMENTS[filename]
374
+
375
+ # Write the file
376
+ out_path = Path.cwd()
377
+ (out_path / filename).write_text(content, encoding='utf-8')
378
+ print(f"\\n[+] Cloned '{filename}' into current directory.")
379
+
380
+ # Generate datasets automatically
381
+ data_dir = out_path / "datasets"
382
+ if "datasets/classification.csv" in content and not (data_dir / "classification.csv").exists():
383
+ generate_datasets(data_dir)
384
+ print(f"[+] Generated required datasets in './datasets/' directory.")
385
+ elif "datasets/regression.csv" in content and not (data_dir / "regression.csv").exists():
386
+ generate_datasets(data_dir)
387
+ print(f"[+] Generated required datasets in './datasets/' directory.")
388
+ elif "datasets/clustering.csv" in content and not (data_dir / "clustering.csv").exists():
389
+ generate_datasets(data_dir)
390
+ print(f"[+] Generated required datasets in './datasets/' directory.")
391
+ elif "datasets/ratings.csv" in content and not (data_dir / "ratings.csv").exists():
392
+ generate_datasets(data_dir)
393
+ print(f"[+] Generated required datasets in './datasets/' directory.")
394
+
395
+ print("=" * 60)
396
+ print("Enter another number to clone, or 'q' to quit:")
397
+ else:
398
+ print("Invalid number. Please try again.")
399
+ except ValueError:
400
+ print("Invalid input. Please enter a number or 'q'.")
401
+ except KeyboardInterrupt:
402
+ print("\\nGoodbye!")
403
+ break
404
+
405
+ def generate_experiments(output_dir: str):
406
+ out_path = Path(output_dir)
407
+ print(f"Creating experiments in: {out_path.absolute()}")
408
+
409
+ # 1. Create datasets
410
+ datasets_dir = out_path / "datasets"
411
+ generate_datasets(datasets_dir)
412
+ print("Generated datasets.")
413
+
414
+ # 2. Create python scripts
415
+ for filename, content in EXPERIMENTS.items():
416
+ script_path = out_path / filename
417
+ script_path.write_text(content, encoding='utf-8')
418
+ print(f"Generated {filename}")
419
+
420
+ print("\\nAll experiments generated successfully!")