PyPI - ezyml - Versions diffs - 1__tar.gz → 1.2.1__tar.gz - Mend

ezyml 1tar.gz → 1.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ezyml might be problematic. Click here for more details.

Files changed (15) hide show

{ezyml-1 → ezyml-1.2.1}/PKG-INFO +8 -4
{ezyml-1 → ezyml-1.2.1}/README.md +7 -3
ezyml-1.2.1/ezyml/__init__.py +9 -0
ezyml-1.2.1/ezyml/cli.py +74 -0
ezyml-1.2.1/ezyml/core.py +316 -0
{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/PKG-INFO +8 -4
{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/SOURCES.txt +3 -0
ezyml-1.2.1/ezyml.egg-info/top_level.txt +1 -0
{ezyml-1 → ezyml-1.2.1}/setup.py +1 -1
ezyml-1/ezyml.egg-info/top_level.txt +0 -1
{ezyml-1 → ezyml-1.2.1}/LICENSE +0 -0
{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/dependency_links.txt +0 -0
{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/entry_points.txt +0 -0
{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/requires.txt +0 -0
{ezyml-1 → ezyml-1.2.1}/setup.cfg +0 -0

{ezyml-1 → ezyml-1.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ezyml
-Version: 1
+Version: 1.2.1
 Summary: A lightweight tool to train, evaluate, and export ML models in one line.
 Home-page: https://github.com/Rktim/ezyml
 Author: Raktim Kalita
@@ -34,16 +34,20 @@ Dynamic: summary
 From raw data to a trained model — in just one line of code.
-<a href="https://pypi.org/project/ezyml/">
-  <img alt="PyPI" src="https://img.shields.io/pypi/v/ezyml?color=blue&label=PyPI&logo=pypi">
-</a>
 <a href="https://github.com/Rktim/ezyml/blob/main/LICENSE">
   <img alt="License" src="https://img.shields.io/github/license/Rktim/ezyml?color=blue">
 </a>
 <img alt="Python Versions" src="https://img.shields.io/pypi/pyversions/ezyml?logo=python&logoColor=white">
+[![PyPI Downloads](https://static.pepy.tech/badge/ezyml)](https://pepy.tech/projects/ezyml)
 </div>
 ---
 ## 🌟 Why ezyml?

{ezyml-1 → ezyml-1.2.1}/README.md RENAMED Viewed

@@ -4,16 +4,20 @@
 From raw data to a trained model — in just one line of code.
-<a href="https://pypi.org/project/ezyml/">
-  <img alt="PyPI" src="https://img.shields.io/pypi/v/ezyml?color=blue&label=PyPI&logo=pypi">
-</a>
 <a href="https://github.com/Rktim/ezyml/blob/main/LICENSE">
   <img alt="License" src="https://img.shields.io/github/license/Rktim/ezyml?color=blue">
 </a>
 <img alt="Python Versions" src="https://img.shields.io/pypi/pyversions/ezyml?logo=python&logoColor=white">
+[![PyPI Downloads](https://static.pepy.tech/badge/ezyml)](https://pepy.tech/projects/ezyml)
 </div>
 ---
 ## 🌟 Why ezyml?

ezyml-1.2.1/ezyml/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# ezyml/__init__.py
+# This file makes the 'ezyml' directory a Python package.
+# Import the main class to make it directly accessible to users
+from .core import EZTrainer
+__version__ = "1.2.1"
+__author__ = "Raktim Kalita"

ezyml-1.2.1/ezyml/cli.py ADDED Viewed

@@ -0,0 +1,74 @@
+# ezyml/cli.py
+import argparse
+import pandas as pd
+from .core import EZTrainer
+def train_cli(args):
+    """Handler for the 'train' command."""
+    print("--- EZYML CLI: Train Mode ---")
+    try:
+        trainer = EZTrainer(
+            data=args.data,
+            target=args.target,
+            model=args.model,
+            task=args.task
+        )
+        trainer.train()
+        if args.output:
+            trainer.save_model(args.output)
+        if args.report:
+            trainer.save_report(args.report)
+    except Exception as e:
+        print(f"\nAn error occurred: {e}")
+def reduce_cli(args):
+    """Handler for the 'reduce' command."""
+    print("--- EZYML CLI: Reduce Mode ---")
+    try:
+        trainer = EZTrainer(
+            data=args.data,
+            model=args.model,
+            task='dim_reduction',
+            n_components=args.components
+        )
+        trainer.train()
+        if args.output:
+            trainer.save_transformed(args.output)
+    except Exception as e:
+        print(f"\nAn error occurred: {e}")
+def main():
+    """Main function for the command-line interface."""
+    parser = argparse.ArgumentParser(description="EZYML: Train and manage ML models easily from the command line.")
+    subparsers = parser.add_subparsers(dest="command", help="Available commands", required=True)
+    # --- Train Command ---
+    parser_train = subparsers.add_parser("train", help="Train a classification, regression, or clustering model.")
+    parser_train.add_argument("--data", required=True, help="Path to the input data CSV file.")
+    parser_train.add_argument("--target", help="Name of the target column (for classification/regression).")
+    parser_train.add_argument("--model", default="random_forest", help="Name of the model to train.")
+    parser_train.add_argument("--output", help="Path to save the trained model (.pkl).")
+    parser_train.add_argument("--report", help="Path to save the evaluation report (.json).")
+    parser_train.add_argument("--task", default="auto", choices=["auto", "classification", "regression", "clustering"], help="Specify the task type.")
+    parser_train.set_defaults(func=train_cli)
+    # --- Reduce Command ---
+    parser_reduce = subparsers.add_parser("reduce", help="Perform dimensionality reduction.")
+    parser_reduce.add_argument("--data", required=True, help="Path to the input data CSV file.")
+    parser_reduce.add_argument("--model", required=True, choices=["pca", "tsne"], help="Dimensionality reduction method.")
+    parser_reduce.add_argument("--components", type=int, required=True, help="Number of components to reduce to.")
+    parser_reduce.add_argument("--output", required=True, help="Path to save the transformed data (.csv).")
+    parser_reduce.set_defaults(func=reduce_cli)
+    args = parser.parse_args()
+    args.func(args)
+if __name__ == '__main__':
+    main()

ezyml-1.2.1/ezyml/core.py ADDED Viewed

@@ -0,0 +1,316 @@
+# ezyml/ezyml.py
+import pandas as pd
+import numpy as np
+import pickle
+import json
+# Preprocessing
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.impute import SimpleImputer
+# Models
+from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, Lasso, ElasticNet
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor, ExtraTreesClassifier
+from sklearn.svm import SVC, SVR
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.naive_bayes import GaussianNB
+from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+import xgboost as xgb
+# Metrics
+from sklearn.metrics import (
+    accuracy_score, f1_score, roc_auc_score, confusion_matrix,
+    mean_absolute_error, mean_squared_error, r2_score,
+    silhouette_score
+)
+# --- Model Dictionaries ---
+CLASSIFICATION_MODELS = {
+    "logistic_regression": LogisticRegression,
+    "random_forest": RandomForestClassifier,
+    "xgboost": xgb.XGBClassifier,
+    "svm": SVC,
+    "naive_bayes": GaussianNB,
+    "gradient_boosting": GradientBoostingClassifier,
+    "extra_trees": ExtraTreesClassifier,
+    "knn": KNeighborsClassifier,
+}
+REGRESSION_MODELS = {
+    "linear_regression": LinearRegression,
+    "ridge": Ridge,
+    "lasso": Lasso,
+    "elasticnet": ElasticNet,
+    "random_forest": RandomForestRegressor,
+    "xgboost": xgb.XGBRegressor,
+    "svr": SVR,
+    "gradient_boosting": GradientBoostingRegressor,
+}
+CLUSTERING_MODELS = {
+    "kmeans": KMeans,
+    "dbscan": DBSCAN,
+    "agglo": AgglomerativeClustering,
+}
+DIM_REDUCTION_MODELS = {
+    "pca": PCA,
+    "tsne": TSNE,
+}
+class EZTrainer:
+    """A class to easily train, evaluate, and export ML models."""
+    def __init__(self, data, target=None, model="random_forest", task="auto",
+                 test_size=0.2, scale=True, n_components=None, random_state=42):
+        """
+        Initializes the EZTrainer.
+        Args:
+            data (str or pd.DataFrame): Path to CSV or a pandas DataFrame.
+            target (str, optional): Name of the target column. Defaults to None.
+            model (str, optional): Model to use. Defaults to "random_forest".
+            task (str, optional): Type of task. Can be 'auto', 'classification',
+                                  'regression', 'clustering', 'dim_reduction'. Defaults to "auto".
+            test_size (float, optional): Proportion of data for the test set. Defaults to 0.2.
+            scale (bool, optional): Whether to scale numerical features. Defaults to True.
+            n_components (int, optional): Number of components for dimensionality reduction. Defaults to None.
+            random_state (int, optional): Random state for reproducibility. Defaults to 42.
+        """
+        self.target = target
+        self.model_name = model
+        self.task = task
+        self.test_size = test_size
+        self.scale = scale
+        self.n_components = n_components
+        self.random_state = random_state
+        self.df = self._load_data(data)
+        self._auto_detect_task()
+        self.X = None
+        self.y = None
+        self.X_train, self.X_test, self.y_train, self.y_test = [None] * 4
+        self.pipeline = None
+        self.report = {}
+        self.transformed_data = None
+    def _load_data(self, data):
+        """Loads data from path or uses the provided DataFrame."""
+        if isinstance(data, str):
+            print(f"Loading data from {data}...")
+            return pd.read_csv(data)
+        elif isinstance(data, pd.DataFrame):
+            print("Using provided DataFrame.")
+            return data.copy()
+        else:
+            raise TypeError("Data must be a file path (str) or a pandas DataFrame.")
+    def _auto_detect_task(self):
+        """Automatically detects the ML task based on data and parameters."""
+        if self.task != "auto":
+            print(f"Task specified as: {self.task}")
+            return
+        if self.target:
+            if self.target not in self.df.columns:
+                raise ValueError(f"Target column '{self.target}' not found in data.")
+            target_dtype = self.df[self.target].dtype
+            unique_values = self.df[self.target].nunique()
+            # Heuristic for classification vs. regression
+            if pd.api.types.is_numeric_dtype(target_dtype) and unique_values > 20:
+                self.task = "regression"
+            else:
+                self.task = "classification"
+        elif self.model_name in CLUSTERING_MODELS:
+            self.task = "clustering"
+        elif self.model_name in DIM_REDUCTION_MODELS:
+            self.task = "dim_reduction"
+        else:
+            raise ValueError("Could not auto-detect task. Please specify the 'task' parameter.")
+        print(f"Auto-detected task as: {self.task}")
+    def _get_preprocessor(self):
+        """Builds a preprocessor pipeline for numerical and categorical features."""
+        numerical_features = self.X.select_dtypes(include=np.number).columns.tolist()
+        categorical_features = self.X.select_dtypes(include=['object', 'category']).columns.tolist()
+        print(f"Identified {len(numerical_features)} numerical features: {numerical_features}")
+        print(f"Identified {len(categorical_features)} categorical features: {categorical_features}")
+        num_steps = [('imputer', SimpleImputer(strategy='median'))]
+        if self.scale:
+            num_steps.append(('scaler', StandardScaler()))
+        numerical_transformer = Pipeline(steps=num_steps)
+        categorical_transformer = Pipeline(steps=[
+            ('imputer', SimpleImputer(strategy='most_frequent')),
+            ('onehot', OneHotEncoder(handle_unknown='ignore'))
+        ])
+        return ColumnTransformer(transformers=[
+            ('num', numerical_transformer, numerical_features),
+            ('cat', categorical_transformer, categorical_features)
+        ], remainder='passthrough')
+    def _calculate_metrics(self):
+        """Calculates and stores performance metrics based on the task."""
+        print("Calculating metrics...")
+        if self.task == "classification":
+            preds = self.pipeline.predict(self.X_test)
+            self.report = {
+                "accuracy": accuracy_score(self.y_test, preds),
+                "f1_score": f1_score(self.y_test, preds, average='weighted'),
+                "confusion_matrix": confusion_matrix(self.y_test, preds).tolist(),
+            }
+            # ROC AUC for binary and multi-class (if applicable)
+            try:
+                if hasattr(self.pipeline, "predict_proba"):
+                    probs = self.pipeline.predict_proba(self.X_test)
+                    if probs.shape[1] == 2: # Binary
+                        self.report["roc_auc"] = roc_auc_score(self.y_test, probs[:, 1])
+                    else: # Multi-class
+                        self.report["roc_auc"] = roc_auc_score(self.y_test, probs, multi_class='ovr')
+            except Exception as e:
+                print(f"Could not calculate ROC AUC score: {e}")
+        elif self.task == "regression":
+            preds = self.pipeline.predict(self.X_test)
+            self.report = {
+                "r2_score": r2_score(self.y_test, preds),
+                "mae": mean_absolute_error(self.y_test, preds),
+                "mse": mean_squared_error(self.y_test, preds),
+                "rmse": np.sqrt(mean_squared_error(self.y_test, preds)),
+            }
+        elif self.task == "clustering":
+            labels = self.pipeline.named_steps['model'].labels_
+            if len(set(labels)) > 1: # Silhouette score requires at least 2 clusters
+                self.report = {
+                    "silhouette_score": silhouette_score(self.X, labels),
+                    "n_clusters": len(set(labels))
+                }
+            else:
+                self.report = {"n_clusters": len(set(labels)), "silhouette_score": None}
+        print("Metrics report:")
+        print(json.dumps(self.report, indent=4))
+    def train(self):
+        """Trains the specified model."""
+        print(f"\n--- Starting Training for Task: {self.task.upper()} ---")
+        if self.task in ["classification", "regression"]:
+            self.X = self.df.drop(columns=[self.target])
+            self.y = self.df[self.target]
+            self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
+                self.X, self.y, test_size=self.test_size, random_state=self.random_state
+            )
+            preprocessor = self._get_preprocessor()
+            model_map = CLASSIFICATION_MODELS if self.task == "classification" else REGRESSION_MODELS
+            if self.model_name not in model_map:
+                raise ValueError(f"Model '{self.model_name}' not supported for {self.task}.")
+            model_instance = model_map[self.model_name](random_state=self.random_state) if 'random_state' in model_map[self.model_name]().get_params() else model_map[self.model_name]()
+            self.pipeline = Pipeline(steps=[
+                ('preprocessor', preprocessor),
+                ('model', model_instance)
+            ])
+            print(f"Training {self.model_name} model...")
+            self.pipeline.fit(self.X_train, self.y_train)
+            self._calculate_metrics()
+        elif self.task == "clustering":
+            self.X = self.df.copy()
+            preprocessor = self._get_preprocessor()
+            if self.model_name not in CLUSTERING_MODELS:
+                raise ValueError(f"Model '{self.model_name}' not supported for clustering.")
+            model_instance = CLUSTERING_MODELS[self.model_name]()
+            self.pipeline = Pipeline(steps=[
+                ('preprocessor', preprocessor),
+                ('model', model_instance)
+            ])
+            print(f"Fitting {self.model_name} model...")
+            self.pipeline.fit(self.X)
+            self._calculate_metrics()
+        elif self.task == "dim_reduction":
+            self.X = self.df.copy()
+            preprocessor = self._get_preprocessor()
+            if self.model_name not in DIM_REDUCTION_MODELS:
+                raise ValueError(f"Model '{self.model_name}' not supported for dimensionality reduction.")
+            model_instance = DIM_REDUCTION_MODELS[self.model_name](n_components=self.n_components, random_state=self.random_state) if self.n_components else DIM_REDUCTION_MODELS[self.model_name](random_state=self.random_state)
+            self.pipeline = Pipeline(steps=[
+                ('preprocessor', preprocessor),
+                ('model', model_instance)
+            ])
+            print(f"Transforming data with {self.model_name}...")
+            self.transformed_data = self.pipeline.fit_transform(self.X)
+            print(f"Data transformed into {self.transformed_data.shape[1]} dimensions.")
+        else:
+            raise ValueError(f"Task '{self.task}' is not supported.")
+        print("--- Training Complete ---")
+    def predict(self, X_new):
+        """Makes predictions on new data."""
+        if not self.pipeline:
+            raise RuntimeError("Model has not been trained yet. Call .train() first.")
+        if self.task not in ["classification", "regression"]:
+            raise RuntimeError(f"Predict is not available for task '{self.task}'.")
+        if isinstance(X_new, str):
+            X_new = pd.read_csv(X_new)
+        return self.pipeline.predict(X_new)
+    def save_model(self, path="model.pkl"):
+        """Saves the trained pipeline to a .pkl file."""
+        if not self.pipeline:
+            raise RuntimeError("No model to save. Call .train() first.")
+        with open(path, 'wb') as f:
+            pickle.dump(self.pipeline, f)
+        print(f"Model saved successfully to {path}")
+    def save_report(self, path="report.json"):
+        """Saves the metrics report to a .json file."""
+        if not self.report:
+            raise RuntimeError("No report to save. Call .train() and ensure metrics were calculated.")
+        with open(path, 'w') as f:
+            json.dump(self.report, f, indent=4)
+        print(f"Report saved successfully to {path}")
+    def save_transformed(self, path="transformed_data.csv"):
+        """Saves the transformed data from PCA/t-SNE to a .csv file."""
+        if self.transformed_data is None:
+            raise RuntimeError("No transformed data to save. Run a 'dim_reduction' task first.")
+        pd.DataFrame(self.transformed_data).to_csv(path, index=False)
+        print(f"Transformed data saved successfully to {path}")

{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ezyml
-Version: 1
+Version: 1.2.1
 Summary: A lightweight tool to train, evaluate, and export ML models in one line.
 Home-page: https://github.com/Rktim/ezyml
 Author: Raktim Kalita
@@ -34,16 +34,20 @@ Dynamic: summary
 From raw data to a trained model — in just one line of code.
-<a href="https://pypi.org/project/ezyml/">
-  <img alt="PyPI" src="https://img.shields.io/pypi/v/ezyml?color=blue&label=PyPI&logo=pypi">
-</a>
 <a href="https://github.com/Rktim/ezyml/blob/main/LICENSE">
   <img alt="License" src="https://img.shields.io/github/license/Rktim/ezyml?color=blue">
 </a>
 <img alt="Python Versions" src="https://img.shields.io/pypi/pyversions/ezyml?logo=python&logoColor=white">
+[![PyPI Downloads](https://static.pepy.tech/badge/ezyml)](https://pepy.tech/projects/ezyml)
 </div>
 ---
 ## 🌟 Why ezyml?

{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,6 +1,9 @@
 LICENSE
 README.md
 setup.py
+ezyml/__init__.py
+ezyml/cli.py
+ezyml/core.py
 ezyml.egg-info/PKG-INFO
 ezyml.egg-info/SOURCES.txt
 ezyml.egg-info/dependency_links.txt

ezyml-1.2.1/ezyml.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ ezyml

{ezyml-1 → ezyml-1.2.1}/setup.py RENAMED Viewed

@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 setup(
     name="ezyml",
-    version="1",
+    version="1.2.1",
     author="Raktim Kalita",
     author_email="raktimkalita.ai@gmail.com",
     description="A lightweight tool to train, evaluate, and export ML models in one line.",

ezyml-1/ezyml.egg-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	-

{ezyml-1 → ezyml-1.2.1}/LICENSE RENAMED Viewed

File without changes

{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/entry_points.txt RENAMED Viewed

File without changes

{ezyml-1 → ezyml-1.2.1}/ezyml.egg-info/requires.txt RENAMED Viewed

File without changes

{ezyml-1 → ezyml-1.2.1}/setup.cfg RENAMED Viewed

File without changes

ezyml 1__tar.gz → 1.2.1__tar.gz

Potentially problematic release.

ezyml 1tar.gz → 1.2.1tar.gz