ezyml 1__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ezyml might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ezyml
3
- Version: 1
3
+ Version: 1.2.1
4
4
  Summary: A lightweight tool to train, evaluate, and export ML models in one line.
5
5
  Home-page: https://github.com/Rktim/ezyml
6
6
  Author: Raktim Kalita
@@ -34,16 +34,20 @@ Dynamic: summary
34
34
 
35
35
  From raw data to a trained model — in just one line of code.
36
36
 
37
- <a href="https://pypi.org/project/ezyml/">
38
- <img alt="PyPI" src="https://img.shields.io/pypi/v/ezyml?color=blue&label=PyPI&logo=pypi">
39
- </a>
37
+
40
38
  <a href="https://github.com/Rktim/ezyml/blob/main/LICENSE">
41
39
  <img alt="License" src="https://img.shields.io/github/license/Rktim/ezyml?color=blue">
42
40
  </a>
43
41
  <img alt="Python Versions" src="https://img.shields.io/pypi/pyversions/ezyml?logo=python&logoColor=white">
42
+
43
+
44
+ [![PyPI Downloads](https://static.pepy.tech/badge/ezyml)](https://pepy.tech/projects/ezyml)
44
45
 
45
46
  </div>
46
47
 
48
+
49
+
50
+
47
51
  ---
48
52
 
49
53
  ## 🌟 Why ezyml?
@@ -4,16 +4,20 @@
4
4
 
5
5
  From raw data to a trained model — in just one line of code.
6
6
 
7
- <a href="https://pypi.org/project/ezyml/">
8
- <img alt="PyPI" src="https://img.shields.io/pypi/v/ezyml?color=blue&label=PyPI&logo=pypi">
9
- </a>
7
+
10
8
  <a href="https://github.com/Rktim/ezyml/blob/main/LICENSE">
11
9
  <img alt="License" src="https://img.shields.io/github/license/Rktim/ezyml?color=blue">
12
10
  </a>
13
11
  <img alt="Python Versions" src="https://img.shields.io/pypi/pyversions/ezyml?logo=python&logoColor=white">
12
+
13
+
14
+ [![PyPI Downloads](https://static.pepy.tech/badge/ezyml)](https://pepy.tech/projects/ezyml)
14
15
 
15
16
  </div>
16
17
 
18
+
19
+
20
+
17
21
  ---
18
22
 
19
23
  ## 🌟 Why ezyml?
@@ -0,0 +1,9 @@
1
+ # ezyml/__init__.py
2
+
3
+ # This file makes the 'ezyml' directory a Python package.
4
+
5
+ # Import the main class to make it directly accessible to users
6
+ from .core import EZTrainer
7
+
8
+ __version__ = "1.2.1"
9
+ __author__ = "Raktim Kalita"
@@ -0,0 +1,74 @@
1
+ # ezyml/cli.py
2
+
3
+ import argparse
4
+ import pandas as pd
5
+ from .core import EZTrainer
6
+
7
+ def train_cli(args):
8
+ """Handler for the 'train' command."""
9
+ print("--- EZYML CLI: Train Mode ---")
10
+ try:
11
+ trainer = EZTrainer(
12
+ data=args.data,
13
+ target=args.target,
14
+ model=args.model,
15
+ task=args.task
16
+ )
17
+ trainer.train()
18
+
19
+ if args.output:
20
+ trainer.save_model(args.output)
21
+
22
+ if args.report:
23
+ trainer.save_report(args.report)
24
+
25
+ except Exception as e:
26
+ print(f"\nAn error occurred: {e}")
27
+
28
+ def reduce_cli(args):
29
+ """Handler for the 'reduce' command."""
30
+ print("--- EZYML CLI: Reduce Mode ---")
31
+ try:
32
+ trainer = EZTrainer(
33
+ data=args.data,
34
+ model=args.model,
35
+ task='dim_reduction',
36
+ n_components=args.components
37
+ )
38
+ trainer.train()
39
+
40
+ if args.output:
41
+ trainer.save_transformed(args.output)
42
+
43
+ except Exception as e:
44
+ print(f"\nAn error occurred: {e}")
45
+
46
+
47
+ def main():
48
+ """Main function for the command-line interface."""
49
+ parser = argparse.ArgumentParser(description="EZYML: Train and manage ML models easily from the command line.")
50
+ subparsers = parser.add_subparsers(dest="command", help="Available commands", required=True)
51
+
52
+ # --- Train Command ---
53
+ parser_train = subparsers.add_parser("train", help="Train a classification, regression, or clustering model.")
54
+ parser_train.add_argument("--data", required=True, help="Path to the input data CSV file.")
55
+ parser_train.add_argument("--target", help="Name of the target column (for classification/regression).")
56
+ parser_train.add_argument("--model", default="random_forest", help="Name of the model to train.")
57
+ parser_train.add_argument("--output", help="Path to save the trained model (.pkl).")
58
+ parser_train.add_argument("--report", help="Path to save the evaluation report (.json).")
59
+ parser_train.add_argument("--task", default="auto", choices=["auto", "classification", "regression", "clustering"], help="Specify the task type.")
60
+ parser_train.set_defaults(func=train_cli)
61
+
62
+ # --- Reduce Command ---
63
+ parser_reduce = subparsers.add_parser("reduce", help="Perform dimensionality reduction.")
64
+ parser_reduce.add_argument("--data", required=True, help="Path to the input data CSV file.")
65
+ parser_reduce.add_argument("--model", required=True, choices=["pca", "tsne"], help="Dimensionality reduction method.")
66
+ parser_reduce.add_argument("--components", type=int, required=True, help="Number of components to reduce to.")
67
+ parser_reduce.add_argument("--output", required=True, help="Path to save the transformed data (.csv).")
68
+ parser_reduce.set_defaults(func=reduce_cli)
69
+
70
+ args = parser.parse_args()
71
+ args.func(args)
72
+
73
+ if __name__ == '__main__':
74
+ main()
@@ -0,0 +1,316 @@
1
+ # ezyml/ezyml.py
2
+
3
+ import pandas as pd
4
+ import numpy as np
5
+ import pickle
6
+ import json
7
+
8
+ # Preprocessing
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
11
+ from sklearn.compose import ColumnTransformer
12
+ from sklearn.pipeline import Pipeline
13
+ from sklearn.impute import SimpleImputer
14
+
15
+ # Models
16
+ from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, Lasso, ElasticNet
17
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor, ExtraTreesClassifier
18
+ from sklearn.svm import SVC, SVR
19
+ from sklearn.neighbors import KNeighborsClassifier
20
+ from sklearn.naive_bayes import GaussianNB
21
+ from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
22
+ from sklearn.decomposition import PCA
23
+ from sklearn.manifold import TSNE
24
+ import xgboost as xgb
25
+
26
+ # Metrics
27
+ from sklearn.metrics import (
28
+ accuracy_score, f1_score, roc_auc_score, confusion_matrix,
29
+ mean_absolute_error, mean_squared_error, r2_score,
30
+ silhouette_score
31
+ )
32
+
33
+ # --- Model Dictionaries ---
34
+ CLASSIFICATION_MODELS = {
35
+ "logistic_regression": LogisticRegression,
36
+ "random_forest": RandomForestClassifier,
37
+ "xgboost": xgb.XGBClassifier,
38
+ "svm": SVC,
39
+ "naive_bayes": GaussianNB,
40
+ "gradient_boosting": GradientBoostingClassifier,
41
+ "extra_trees": ExtraTreesClassifier,
42
+ "knn": KNeighborsClassifier,
43
+ }
44
+
45
+ REGRESSION_MODELS = {
46
+ "linear_regression": LinearRegression,
47
+ "ridge": Ridge,
48
+ "lasso": Lasso,
49
+ "elasticnet": ElasticNet,
50
+ "random_forest": RandomForestRegressor,
51
+ "xgboost": xgb.XGBRegressor,
52
+ "svr": SVR,
53
+ "gradient_boosting": GradientBoostingRegressor,
54
+ }
55
+
56
+ CLUSTERING_MODELS = {
57
+ "kmeans": KMeans,
58
+ "dbscan": DBSCAN,
59
+ "agglo": AgglomerativeClustering,
60
+ }
61
+
62
+ DIM_REDUCTION_MODELS = {
63
+ "pca": PCA,
64
+ "tsne": TSNE,
65
+ }
66
+
67
+
68
+ class EZTrainer:
69
+ """A class to easily train, evaluate, and export ML models."""
70
+
71
+ def __init__(self, data, target=None, model="random_forest", task="auto",
72
+ test_size=0.2, scale=True, n_components=None, random_state=42):
73
+ """
74
+ Initializes the EZTrainer.
75
+
76
+ Args:
77
+ data (str or pd.DataFrame): Path to CSV or a pandas DataFrame.
78
+ target (str, optional): Name of the target column. Defaults to None.
79
+ model (str, optional): Model to use. Defaults to "random_forest".
80
+ task (str, optional): Type of task. Can be 'auto', 'classification',
81
+ 'regression', 'clustering', 'dim_reduction'. Defaults to "auto".
82
+ test_size (float, optional): Proportion of data for the test set. Defaults to 0.2.
83
+ scale (bool, optional): Whether to scale numerical features. Defaults to True.
84
+ n_components (int, optional): Number of components for dimensionality reduction. Defaults to None.
85
+ random_state (int, optional): Random state for reproducibility. Defaults to 42.
86
+ """
87
+ self.target = target
88
+ self.model_name = model
89
+ self.task = task
90
+ self.test_size = test_size
91
+ self.scale = scale
92
+ self.n_components = n_components
93
+ self.random_state = random_state
94
+
95
+ self.df = self._load_data(data)
96
+ self._auto_detect_task()
97
+
98
+ self.X = None
99
+ self.y = None
100
+ self.X_train, self.X_test, self.y_train, self.y_test = [None] * 4
101
+
102
+ self.pipeline = None
103
+ self.report = {}
104
+ self.transformed_data = None
105
+
106
+ def _load_data(self, data):
107
+ """Loads data from path or uses the provided DataFrame."""
108
+ if isinstance(data, str):
109
+ print(f"Loading data from {data}...")
110
+ return pd.read_csv(data)
111
+ elif isinstance(data, pd.DataFrame):
112
+ print("Using provided DataFrame.")
113
+ return data.copy()
114
+ else:
115
+ raise TypeError("Data must be a file path (str) or a pandas DataFrame.")
116
+
117
+ def _auto_detect_task(self):
118
+ """Automatically detects the ML task based on data and parameters."""
119
+ if self.task != "auto":
120
+ print(f"Task specified as: {self.task}")
121
+ return
122
+
123
+ if self.target:
124
+ if self.target not in self.df.columns:
125
+ raise ValueError(f"Target column '{self.target}' not found in data.")
126
+
127
+ target_dtype = self.df[self.target].dtype
128
+ unique_values = self.df[self.target].nunique()
129
+
130
+ # Heuristic for classification vs. regression
131
+ if pd.api.types.is_numeric_dtype(target_dtype) and unique_values > 20:
132
+ self.task = "regression"
133
+ else:
134
+ self.task = "classification"
135
+ elif self.model_name in CLUSTERING_MODELS:
136
+ self.task = "clustering"
137
+ elif self.model_name in DIM_REDUCTION_MODELS:
138
+ self.task = "dim_reduction"
139
+ else:
140
+ raise ValueError("Could not auto-detect task. Please specify the 'task' parameter.")
141
+
142
+ print(f"Auto-detected task as: {self.task}")
143
+
144
+ def _get_preprocessor(self):
145
+ """Builds a preprocessor pipeline for numerical and categorical features."""
146
+ numerical_features = self.X.select_dtypes(include=np.number).columns.tolist()
147
+ categorical_features = self.X.select_dtypes(include=['object', 'category']).columns.tolist()
148
+
149
+ print(f"Identified {len(numerical_features)} numerical features: {numerical_features}")
150
+ print(f"Identified {len(categorical_features)} categorical features: {categorical_features}")
151
+
152
+ num_steps = [('imputer', SimpleImputer(strategy='median'))]
153
+ if self.scale:
154
+ num_steps.append(('scaler', StandardScaler()))
155
+
156
+ numerical_transformer = Pipeline(steps=num_steps)
157
+ categorical_transformer = Pipeline(steps=[
158
+ ('imputer', SimpleImputer(strategy='most_frequent')),
159
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
160
+ ])
161
+
162
+ return ColumnTransformer(transformers=[
163
+ ('num', numerical_transformer, numerical_features),
164
+ ('cat', categorical_transformer, categorical_features)
165
+ ], remainder='passthrough')
166
+
167
+ def _calculate_metrics(self):
168
+ """Calculates and stores performance metrics based on the task."""
169
+ print("Calculating metrics...")
170
+ if self.task == "classification":
171
+ preds = self.pipeline.predict(self.X_test)
172
+ self.report = {
173
+ "accuracy": accuracy_score(self.y_test, preds),
174
+ "f1_score": f1_score(self.y_test, preds, average='weighted'),
175
+ "confusion_matrix": confusion_matrix(self.y_test, preds).tolist(),
176
+ }
177
+ # ROC AUC for binary and multi-class (if applicable)
178
+ try:
179
+ if hasattr(self.pipeline, "predict_proba"):
180
+ probs = self.pipeline.predict_proba(self.X_test)
181
+ if probs.shape[1] == 2: # Binary
182
+ self.report["roc_auc"] = roc_auc_score(self.y_test, probs[:, 1])
183
+ else: # Multi-class
184
+ self.report["roc_auc"] = roc_auc_score(self.y_test, probs, multi_class='ovr')
185
+ except Exception as e:
186
+ print(f"Could not calculate ROC AUC score: {e}")
187
+
188
+ elif self.task == "regression":
189
+ preds = self.pipeline.predict(self.X_test)
190
+ self.report = {
191
+ "r2_score": r2_score(self.y_test, preds),
192
+ "mae": mean_absolute_error(self.y_test, preds),
193
+ "mse": mean_squared_error(self.y_test, preds),
194
+ "rmse": np.sqrt(mean_squared_error(self.y_test, preds)),
195
+ }
196
+
197
+ elif self.task == "clustering":
198
+ labels = self.pipeline.named_steps['model'].labels_
199
+ if len(set(labels)) > 1: # Silhouette score requires at least 2 clusters
200
+ self.report = {
201
+ "silhouette_score": silhouette_score(self.X, labels),
202
+ "n_clusters": len(set(labels))
203
+ }
204
+ else:
205
+ self.report = {"n_clusters": len(set(labels)), "silhouette_score": None}
206
+
207
+ print("Metrics report:")
208
+ print(json.dumps(self.report, indent=4))
209
+
210
+ def train(self):
211
+ """Trains the specified model."""
212
+ print(f"\n--- Starting Training for Task: {self.task.upper()} ---")
213
+
214
+ if self.task in ["classification", "regression"]:
215
+ self.X = self.df.drop(columns=[self.target])
216
+ self.y = self.df[self.target]
217
+
218
+ self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
219
+ self.X, self.y, test_size=self.test_size, random_state=self.random_state
220
+ )
221
+
222
+ preprocessor = self._get_preprocessor()
223
+ model_map = CLASSIFICATION_MODELS if self.task == "classification" else REGRESSION_MODELS
224
+
225
+ if self.model_name not in model_map:
226
+ raise ValueError(f"Model '{self.model_name}' not supported for {self.task}.")
227
+
228
+ model_instance = model_map[self.model_name](random_state=self.random_state) if 'random_state' in model_map[self.model_name]().get_params() else model_map[self.model_name]()
229
+
230
+ self.pipeline = Pipeline(steps=[
231
+ ('preprocessor', preprocessor),
232
+ ('model', model_instance)
233
+ ])
234
+
235
+ print(f"Training {self.model_name} model...")
236
+ self.pipeline.fit(self.X_train, self.y_train)
237
+ self._calculate_metrics()
238
+
239
+ elif self.task == "clustering":
240
+ self.X = self.df.copy()
241
+ preprocessor = self._get_preprocessor()
242
+
243
+ if self.model_name not in CLUSTERING_MODELS:
244
+ raise ValueError(f"Model '{self.model_name}' not supported for clustering.")
245
+
246
+ model_instance = CLUSTERING_MODELS[self.model_name]()
247
+
248
+ self.pipeline = Pipeline(steps=[
249
+ ('preprocessor', preprocessor),
250
+ ('model', model_instance)
251
+ ])
252
+
253
+ print(f"Fitting {self.model_name} model...")
254
+ self.pipeline.fit(self.X)
255
+ self._calculate_metrics()
256
+
257
+ elif self.task == "dim_reduction":
258
+ self.X = self.df.copy()
259
+ preprocessor = self._get_preprocessor()
260
+
261
+ if self.model_name not in DIM_REDUCTION_MODELS:
262
+ raise ValueError(f"Model '{self.model_name}' not supported for dimensionality reduction.")
263
+
264
+ model_instance = DIM_REDUCTION_MODELS[self.model_name](n_components=self.n_components, random_state=self.random_state) if self.n_components else DIM_REDUCTION_MODELS[self.model_name](random_state=self.random_state)
265
+
266
+ self.pipeline = Pipeline(steps=[
267
+ ('preprocessor', preprocessor),
268
+ ('model', model_instance)
269
+ ])
270
+
271
+ print(f"Transforming data with {self.model_name}...")
272
+ self.transformed_data = self.pipeline.fit_transform(self.X)
273
+ print(f"Data transformed into {self.transformed_data.shape[1]} dimensions.")
274
+
275
+ else:
276
+ raise ValueError(f"Task '{self.task}' is not supported.")
277
+
278
+ print("--- Training Complete ---")
279
+
280
+ def predict(self, X_new):
281
+ """Makes predictions on new data."""
282
+ if not self.pipeline:
283
+ raise RuntimeError("Model has not been trained yet. Call .train() first.")
284
+ if self.task not in ["classification", "regression"]:
285
+ raise RuntimeError(f"Predict is not available for task '{self.task}'.")
286
+
287
+ if isinstance(X_new, str):
288
+ X_new = pd.read_csv(X_new)
289
+
290
+ return self.pipeline.predict(X_new)
291
+
292
+ def save_model(self, path="model.pkl"):
293
+ """Saves the trained pipeline to a .pkl file."""
294
+ if not self.pipeline:
295
+ raise RuntimeError("No model to save. Call .train() first.")
296
+
297
+ with open(path, 'wb') as f:
298
+ pickle.dump(self.pipeline, f)
299
+ print(f"Model saved successfully to {path}")
300
+
301
+ def save_report(self, path="report.json"):
302
+ """Saves the metrics report to a .json file."""
303
+ if not self.report:
304
+ raise RuntimeError("No report to save. Call .train() and ensure metrics were calculated.")
305
+
306
+ with open(path, 'w') as f:
307
+ json.dump(self.report, f, indent=4)
308
+ print(f"Report saved successfully to {path}")
309
+
310
+ def save_transformed(self, path="transformed_data.csv"):
311
+ """Saves the transformed data from PCA/t-SNE to a .csv file."""
312
+ if self.transformed_data is None:
313
+ raise RuntimeError("No transformed data to save. Run a 'dim_reduction' task first.")
314
+
315
+ pd.DataFrame(self.transformed_data).to_csv(path, index=False)
316
+ print(f"Transformed data saved successfully to {path}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ezyml
3
- Version: 1
3
+ Version: 1.2.1
4
4
  Summary: A lightweight tool to train, evaluate, and export ML models in one line.
5
5
  Home-page: https://github.com/Rktim/ezyml
6
6
  Author: Raktim Kalita
@@ -34,16 +34,20 @@ Dynamic: summary
34
34
 
35
35
  From raw data to a trained model — in just one line of code.
36
36
 
37
- <a href="https://pypi.org/project/ezyml/">
38
- <img alt="PyPI" src="https://img.shields.io/pypi/v/ezyml?color=blue&label=PyPI&logo=pypi">
39
- </a>
37
+
40
38
  <a href="https://github.com/Rktim/ezyml/blob/main/LICENSE">
41
39
  <img alt="License" src="https://img.shields.io/github/license/Rktim/ezyml?color=blue">
42
40
  </a>
43
41
  <img alt="Python Versions" src="https://img.shields.io/pypi/pyversions/ezyml?logo=python&logoColor=white">
42
+
43
+
44
+ [![PyPI Downloads](https://static.pepy.tech/badge/ezyml)](https://pepy.tech/projects/ezyml)
44
45
 
45
46
  </div>
46
47
 
48
+
49
+
50
+
47
51
  ---
48
52
 
49
53
  ## 🌟 Why ezyml?
@@ -1,6 +1,9 @@
1
1
  LICENSE
2
2
  README.md
3
3
  setup.py
4
+ ezyml/__init__.py
5
+ ezyml/cli.py
6
+ ezyml/core.py
4
7
  ezyml.egg-info/PKG-INFO
5
8
  ezyml.egg-info/SOURCES.txt
6
9
  ezyml.egg-info/dependency_links.txt
@@ -0,0 +1 @@
1
+ ezyml
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
6
6
 
7
7
  setup(
8
8
  name="ezyml",
9
- version="1",
9
+ version="1.2.1",
10
10
  author="Raktim Kalita",
11
11
  author_email="raktimkalita.ai@gmail.com",
12
12
  description="A lightweight tool to train, evaluate, and export ML models in one line.",
@@ -1 +0,0 @@
1
-
File without changes
File without changes
File without changes
File without changes