PyPI - gradia - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gradia 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

gradia/__init__.py +1 -0
gradia/cli/__init__.py +0 -0
gradia/cli/main.py +91 -0
gradia/core/config.py +56 -0
gradia/core/inspector.py +37 -0
gradia/core/scenario.py +118 -0
gradia/models/base.py +39 -0
gradia/models/sklearn_wrappers.py +114 -0
gradia/trainer/callbacks.py +48 -0
gradia/trainer/engine.py +203 -0
gradia/viz/assets/logo.png +0 -0
gradia/viz/server.py +228 -0
gradia/viz/static/css/style.css +312 -0
gradia/viz/static/js/app.js +348 -0
gradia/viz/templates/configure.html +304 -0
gradia/viz/templates/index.html +147 -0
gradia-1.0.0.dist-info/METADATA +143 -0
gradia-1.0.0.dist-info/RECORD +22 -0
gradia-1.0.0.dist-info/WHEEL +5 -0
gradia-1.0.0.dist-info/entry_points.txt +2 -0
gradia-1.0.0.dist-info/licenses/LICENSE +21 -0
gradia-1.0.0.dist-info/top_level.txt +1 -0

gradia/trainer/engine.py ADDED Viewed

@@ -0,0 +1,203 @@
+from typing import Any, Dict, List
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+import time
+import json
+import pickle
+from pathlib import Path
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import (
+    accuracy_score, mean_squared_error, r2_score,
+    precision_score, recall_score, f1_score, mean_absolute_error, confusion_matrix
+)
+from ..models.base import GradiaModel
+from ..models.sklearn_wrappers import ModelFactory
+from ..core.scenario import Scenario
+from .callbacks import Callback, EventLogger
+class Trainer:
+    def __init__(self, scenario: Scenario, config: Dict[str, Any], run_dir: str):
+        self.scenario = scenario
+        self.config = config
+        self.run_dir = run_dir
+        print(f"DEBUG: Trainer initialized with RUN_DIR: {self.run_dir}")
+        self.model: GradiaModel = ModelFactory.create(
+            config['model']['type'],
+            scenario.task_type,
+            config['model'].get('params', {})
+        )
+        self.callbacks: List[Callback] = [EventLogger(run_dir)]
+    def run(self):
+        print("DEBUG: Trainer.run() started.")
+        try:
+            # 1. Load Data
+            df = self._load_full_data()
+            # 2. Preprocess
+            df = df.dropna()
+            # Separate Target and Features
+            y = df[self.scenario.target_column]
+            X = df[self.scenario.features]
+            # --- Robust Preprocessing ---
+            # 1. Identify non-numeric columns
+            non_numeric_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()
+            cols_to_drop = []
+            cols_to_encode = []
+            for col in non_numeric_cols:
+                # high cardinality heuristic (>50 unique and >5% of data) => drop (likely ID or Name)
+                unique_count = X[col].nunique()
+                if unique_count > 50 and unique_count > len(X) * 0.05:
+                    cols_to_drop.append(col)
+                else:
+                    cols_to_encode.append(col)
+            if cols_to_drop:
+                X = X.drop(columns=cols_to_drop)
+                print(f"Dropped high-cardinality/ID columns: {cols_to_drop}")
+            # 2. One-Hot Encode
+            if cols_to_encode:
+                X = pd.get_dummies(X, columns=cols_to_encode, drop_first=True)
+                print(f"Encoded columns: {cols_to_encode}")
+            # Update features list for the UI
+            self.scenario.features = X.columns.tolist()
+            # --- End Preprocessing ---
+            # Simple encoding for classification target if string
+            if self.scenario.task_type == 'classification' and y.dtype == 'object':
+                y = y.astype('category').cat.codes
+            # 3. Split
+            test_size = self.config['training'].get('test_split', 0.2)
+            X_train, X_test, y_train, y_test = train_test_split(
+                X, y, test_size=test_size,
+                random_state=self.config['training'].get('random_seed', 42)
+            )
+            # Notify Start
+            epochs = self.config['training'].get('epochs', 10)
+            self._dispatch('on_train_begin', {
+                "scenario": str(self.scenario),
+                "samples": len(df),
+                "features": self.scenario.features,
+                "epochs": epochs
+            })
+            # 4. Fit Loop
+            epochs = self.config['training'].get('epochs', 10)
+            if self.model.supports_iterative:
+                # For Random Forest in warm_start, we need to set initial estimators
+                if hasattr(self.model.model, "n_estimators"):
+                    # We will grow it from 0 to 'epochs' (which acts as total estimators here)
+                    self.model.model.n_estimators = 0
+                classes = np.unique(y) if self.scenario.task_type == 'classification' else None
+                # TQDM Output to Console
+                import time
+                with tqdm(range(1, epochs + 1), desc="Training", unit="epoch", colour="green") as pbar:
+                    for epoch in pbar:
+                        # Small delay to visualize speed if too fast
+                        time.sleep(0.1)
+                        self.model.partial_fit(X_train, y_train, classes=classes)
+                        # Evaluate
+                        metrics = self._evaluate(X_train, y_train, X_test, y_test)
+                        self._dispatch('on_epoch_end', epoch, metrics)
+            else:
+                # Non-Iterative Models (SVM, KNN, DecisionTree, etc.)
+                # We fit once, then simulate "epochs" for user visual satisfaction
+                print("Training standard model (single batch fits)...")
+                self.model.fit(X_train, y_train)
+                # Compute final metrics
+                metrics = self._evaluate(X_train, y_train, X_test, y_test)
+                # Simulate progress bar so UI doesn't look broken
+                import time
+                with tqdm(range(1, epochs + 1), desc="Training", unit="epoch", colour="blue") as pbar:
+                    for epoch in pbar:
+                        time.sleep(0.1) # Simulate work
+                        # We broadcast the SAME metrics for every "epoch" since the model doesn't change
+                        # But it keeps the UI happy and consistent
+                        self._dispatch('on_epoch_end', epoch, metrics)
+                        # Update Progress Bar
+                        pf = {}
+                        if 'train_acc' in metrics:
+                            pf['acc'] = f"{metrics['train_acc']:.3f}"
+                        if 'train_mse' in metrics:
+                            pf['mse'] = f"{metrics['train_mse']:.3f}"
+                        pbar.set_postfix(pf)
+            # 5. Finalize
+            fi = self.model.get_feature_importance()
+            # 6. Training Complete
+            self._dispatch("on_train_end", {
+                "epoch": epochs,
+                "feature_importance": self.model.get_feature_importance()
+            })
+            # 7. Save Model
+            if self.config.get('save_model'):
+                ckpt_dir = Path(self.run_dir) / "models" / "best-ckpt"
+                ckpt_dir.mkdir(parents=True, exist_ok=True)
+                ckpt_path = ckpt_dir / "model.pkl"
+                with open(ckpt_path, "wb") as f:
+                    pickle.dump(self.model, f)
+                print(f"Model saved to {ckpt_path}")
+            print(f"DEBUG: Trainer.run() finished successfully.")
+        except Exception as e:
+            print(f"CRITICAL ERROR IN TRAINER: {e}")
+            import traceback
+            traceback.print_exc()
+            raise e
+    def _evaluate(self, X_train, y_train, X_test, y_test):
+        preds_train = self.model.predict(X_train)
+        preds_test = self.model.predict(X_test)
+        metrics = {}
+        if self.scenario.task_type == 'classification':
+            metrics['train_acc'] = accuracy_score(y_train, preds_train)
+            metrics['test_acc'] = accuracy_score(y_test, preds_test)
+            # Weighted average for multiclass support
+            metrics['precision'] = precision_score(y_test, preds_test, average='weighted', zero_division=0)
+            metrics['recall'] = recall_score(y_test, preds_test, average='weighted', zero_division=0)
+            metrics['f1'] = f1_score(y_test, preds_test, average='weighted', zero_division=0)
+        else:
+            metrics['train_mse'] = mean_squared_error(y_train, preds_train)
+            metrics['test_mse'] = mean_squared_error(y_test, preds_test)
+            metrics['mae'] = mean_absolute_error(y_test, preds_test)
+            metrics['r2'] = r2_score(y_test, preds_test)
+        return metrics
+    def _load_full_data(self):
+        # MVP: Load everything into memory
+        path = self.scenario.dataset_path
+        if path.endswith('.csv'):
+            return pd.read_csv(path)
+        return pd.read_parquet(path)
+    def _dispatch(self, method_name, *args, **kwargs):
+        for cb in self.callbacks:
+            getattr(cb, method_name)(*args, **kwargs)

gradia/viz/assets/logo.png ADDED Viewed

Binary file

gradia/viz/server.py ADDED Viewed

@@ -0,0 +1,228 @@
+from fastapi import FastAPI, Request
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from fastapi.responses import JSONResponse, RedirectResponse
+import uvicorn
+import json
+import threading
+from pathlib import Path
+from typing import Dict, Any
+from ..trainer.engine import Trainer
+import psutil
+import time
+app = FastAPI()
+# Global State (Injected by CLI)
+SCENARIO = None
+CONFIG_MGR = None
+RUN_DIR = Path(".gradia_logs").resolve()
+DEFAULT_CONFIG = {}
+TRAINER = None
+TRAINING_THREAD = None
+SYSTEM_THREAD = None
+# Mounts
+BASE_DIR = Path(__file__).resolve().parent
+app.mount("/static", StaticFiles(directory=BASE_DIR / "static"), name="static")
+# Mount assets if they exist outside static, or ensure user put them in static. Assuming viz/assets
+assets_path = BASE_DIR / "assets"
+if assets_path.exists():
+    app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
+templates = Jinja2Templates(directory=BASE_DIR / "templates")
+from ..trainer.callbacks import log_lock
+# ... imports ...
+import os
+# System Monitor
+def system_monitor_loop():
+    log_path = RUN_DIR / "events.jsonl"
+    while True:
+        cpu = psutil.cpu_percent(interval=1)
+        mem = psutil.virtual_memory().percent
+        t = time.time()
+        event = {
+            "timestamp": t,
+            "type": "system_metrics",
+            "data": {"cpu": cpu, "ram": mem, "epoch": t}
+        }
+        if RUN_DIR.exists():
+            with log_lock:
+                with open(log_path, "a") as f:
+                    f.write(json.dumps(event) + "\n")
+                    f.flush()
+                    os.fsync(f.fileno())
+# Start System Monitor on import/startup (or when server starts)
+@app.on_event("startup")
+async def startup_event():
+    global SYSTEM_THREAD
+    SYSTEM_THREAD = threading.Thread(target=system_monitor_loop, daemon=True)
+    SYSTEM_THREAD.start()
+@app.get("/")
+async def read_root(request: Request):
+    if TRAINER is None:
+        return RedirectResponse("/configure")
+    return templates.TemplateResponse("index.html", {"request": request, "scenario": SCENARIO})
+@app.get("/configure")
+async def configure_page(request: Request):
+    if SCENARIO is None:
+        return "System not initialized correctly from CLI."
+    return templates.TemplateResponse("configure.html", {
+        "request": request,
+        "scenario": SCENARIO,
+        "features": SCENARIO.features,
+        "default_config": DEFAULT_CONFIG
+    })
+@app.post("/api/start")
+async def start_training(config_data: Dict[str, Any]):
+    global TRAINER, TRAINING_THREAD
+    # Merge received config with defaults
+    # Expect config_data = {model: {type:..., params:...}, training: {epochs:...}}
+    # We construct the full config object
+    full_config = DEFAULT_CONFIG.copy()
+    # Helper to merge deep dicts if needed, or just overwrite keys
+    full_config['model'] = config_data.get('model', full_config['model'])
+    full_config['training'].update(config_data.get('training', {}))
+    # New fields
+    full_config['project_name'] = config_data.get('project_name', 'experiment')
+    full_config['save_model'] = config_data.get('save_model', False)
+    # Save config
+    CONFIG_MGR.save(full_config)
+    # Initialize Trainer
+    TRAINER = Trainer(SCENARIO, full_config, str(RUN_DIR))
+    # Start Thread
+    def train_wrapper():
+        import time
+        time.sleep(1) # Breathe
+        try:
+            TRAINER.run()
+        except Exception as e:
+            print(f"Training Error: {e}")
+    TRAINING_THREAD = threading.Thread(target=train_wrapper, daemon=True)
+    TRAINING_THREAD.start()
+    return {"status": "started"}
+@app.get("/api/events")
+async def get_events():
+    event_path = RUN_DIR / "events.jsonl"
+    events = []
+    if event_path.exists():
+        # No lock needed for reading usually if we tolerate partial lines (which json.loads handles with try/except)
+        # But to be safe vs partial writes, we could lock, but that might block writers.
+        # Standard polling read is usually fine without lock if we just read lines.
+        with open(event_path, "r") as f:
+            for line in f:
+                if line.strip():
+                    try:
+                        events.append(json.loads(line))
+                    except json.JSONDecodeError:
+                        pass
+    return JSONResponse(content=events)
+@app.get("/api/report/json")
+async def download_report_json():
+    event_path = RUN_DIR / "events.jsonl"
+    if not event_path.exists():
+        return JSONResponse({"error": "No logs found"}, status_code=404)
+    events = []
+    with open(event_path, "r") as f:
+        for line in f:
+            if line.strip():
+                try: events.append(json.loads(line))
+                except: pass
+    return JSONResponse(content={"project": SCENARIO.target_column if SCENARIO else "gradia", "events": events})
+@app.get("/api/report/pdf")
+async def download_report_pdf():
+    # Return a HTML page optimized for print-to-pdf for simplicity without reportlab dep
+    event_path = RUN_DIR / "events.jsonl"
+    events = []
+    if event_path.exists():
+        with open(event_path, "r") as f:
+            for line in f:
+                try: events.append(json.loads(line))
+                except: pass
+    html = f"""
+    <html>
+    <head>
+        <title>Training Report</title>
+        <style>
+            body {{ font-family: sans-serif; padding: 40px; }}
+            h1 {{ border-bottom: 2px solid #333; }}
+            table {{ border-collapse: collapse; width: 100%; margin-top: 20px; }}
+            th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
+            th {{ background-color: #f2f2f2; }}
+            .metric {{ color: #0066cc; font-weight: bold; }}
+        </style>
+    </head>
+    <body onload="window.print()">
+        <h1>Gradia Training Report</h1>
+        <p>Target: {SCENARIO.target_column if SCENARIO else 'N/A'}</p>
+        <p>Total Epochs: {len([e for e in events if e['type'] == 'epoch_end'])}</p>
+        <h2>Training History</h2>
+        <table>
+            <thead><tr><th>Epoch</th><th>Train Acc/MSE</th><th>Test Acc/MSE</th><th>CPU %</th><th>RAM %</th></tr></thead>
+            <tbody>
+    """
+    # Process events to correlate metrics
+    epochs = [e for e in events if e['type'] == 'epoch_end']
+    for e in epochs:
+        d = e['data']
+        # Find close system metric
+        html += f"<tr><td>{d['epoch']}</td><td>{d.get('train_acc', d.get('train_mse', 'N/A'))}</td><td>{d.get('test_acc', d.get('test_mse', 'N/A'))}</td><td>-</td><td>-</td></tr>"
+    html += """
+            </tbody>
+        </table>
+    </body>
+    </html>
+    """
+    from fastapi.responses import HTMLResponse
+    return HTMLResponse(content=html)
+@app.post("/api/evaluate")
+async def evaluate_model():
+    if TRAINER is None:
+        return JSONResponse({"error": "No model trained"}, status_code=400)
+    try:
+        results = TRAINER.evaluate_full()
+        return JSONResponse(content=results)
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+def start_server(run_dir: str, port: int = 8000):
+    global RUN_DIR
+    RUN_DIR = Path(run_dir).resolve()
+    print(f"DEBUG: Server using RUN_DIR: {RUN_DIR}")
+    uvicorn.run(app, host="127.0.0.1", port=port, log_level="error")