PyPI - gradia - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gradia 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

gradia/__init__.py +1 -0
gradia/cli/__init__.py +0 -0
gradia/cli/main.py +91 -0
gradia/core/config.py +56 -0
gradia/core/inspector.py +37 -0
gradia/core/scenario.py +118 -0
gradia/models/base.py +39 -0
gradia/models/sklearn_wrappers.py +114 -0
gradia/trainer/callbacks.py +48 -0
gradia/trainer/engine.py +203 -0
gradia/viz/assets/logo.png +0 -0
gradia/viz/server.py +228 -0
gradia/viz/static/css/style.css +312 -0
gradia/viz/static/js/app.js +348 -0
gradia/viz/templates/configure.html +304 -0
gradia/viz/templates/index.html +147 -0
gradia-1.0.0.dist-info/METADATA +143 -0
gradia-1.0.0.dist-info/RECORD +22 -0
gradia-1.0.0.dist-info/WHEEL +5 -0
gradia-1.0.0.dist-info/entry_points.txt +2 -0
gradia-1.0.0.dist-info/licenses/LICENSE +21 -0
gradia-1.0.0.dist-info/top_level.txt +1 -0

gradia/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "1.0.0"

gradia/cli/__init__.py ADDED Viewed

File without changes

gradia/cli/main.py ADDED Viewed

@@ -0,0 +1,91 @@
+import typer
+import threading
+import time
+import os
+import webbrowser
+from pathlib import Path
+from rich.console import Console
+from ..core.inspector import Inspector
+from ..core.scenario import ScenarioInferrer
+from ..core.config import ConfigManager
+from ..trainer.engine import Trainer
+from ..viz import server
+app = typer.Typer()
+console = Console()
+@app.callback()
+def callback():
+    """
+    gradia: Local-first ML training visualization.
+    """
+@app.command()
+def run(
+    ctx: typer.Context,
+    path: str = typer.Argument(".", help="Path to data directory"),
+    target: str = typer.Option(None, help="Manually specify target column"),
+    port: int = typer.Option(8000, help="Port for visualization server")
+):
+    """
+    Starts the gradia training and visualization session.
+    """
+    console.rule("[bold blue]gradia v1.0.0[/bold blue]")
+    # 1. Inspect
+    path = Path(path).resolve()
+    inspector = Inspector(path)
+    datasets = inspector.find_datasets()
+    if not datasets:
+        console.print(f"[red]No .csv or .parquet files found in {path}[/red]")
+        raise typer.Exit(code=1)
+    # Select first dataset for MVP
+    dataset = datasets[0]
+    console.print(f"[green]Found dataset:[/green] {dataset.name}")
+    # 2. Config & Scenario Reuse
+    run_dir = path / ".gradia_logs"
+    config_mgr = ConfigManager(run_dir)
+    config = config_mgr.load_or_create()
+    # We infer scenario here to pass to server, but user confirms/configures in UI
+    with console.status("Inferring scenario..."):
+        inferrer = ScenarioInferrer()
+        scenario = inferrer.infer(str(dataset), target_override=target)
+    console.print(f"Target: [bold]{scenario.target_column}[/bold] | Task: [bold]{scenario.task_type}[/bold]")
+    # Session Isolation: Create unique run directory
+    session_id = int(time.time())
+    run_dir = Path(".gradia_logs") / f"run_{session_id}"
+    run_dir.mkdir(parents=True, exist_ok=True)
+    config_mgr = ConfigManager(run_dir)
+    config = config_mgr.load_or_create()
+    # Apply Smart Recommendation
+    config['model']['type'] = scenario.recommended_model
+    console.print(f"[cyan]Smart Suggestion:[/cyan] Using [bold]{scenario.recommended_model}[/bold] for this dataset.")
+    console.print(f"[bold green]Configuration moved to Web UI[/bold green]")
+    console.print(f"Visualization running at http://localhost:{port}")
+    console.print(f"Logs: {run_dir.resolve()}")
+    # 3. Launch Server
+    # We inject state into the server module before starting it
+    server.SCENARIO = scenario
+    server.CONFIG_MGR = config_mgr
+    server.RUN_DIR = run_dir
+    server.DEFAULT_CONFIG = config
+    # Launch browser
+    threading.Timer(1.5, lambda: webbrowser.open(f"http://localhost:{port}/configure")).start()
+    # Start server (blocking main thread is fine now as we don't have a separate training thread YET)
+    # The training thread will be spawned by the server upon API request.
+    server.start_server(str(run_dir), port)
+if __name__ == "__main__":
+    app()

gradia/core/config.py ADDED Viewed

@@ -0,0 +1,56 @@
+import yaml
+from pathlib import Path
+from typing import Any, Dict
+class ConfigManager:
+    """Manages gradia configuration."""
+    DEFAULT_CONFIG = {
+        'model': {
+            'type': 'auto', # auto, linear, random_forest
+            'params': {}
+        },
+        'training': {
+            'test_split': 0.2,
+            'random_seed': 42,
+            'shuffle': True
+        },
+        'scenario': {
+            'target': None, # Auto-detect
+            'task': None # Auto-detect
+        }
+    }
+    def __init__(self, run_dir: str = ".gradia_logs"):
+        self.run_dir = Path(run_dir)
+        self.config_path = self.run_dir / "config.yaml"
+    def load_or_create(self, user_overrides: Dict[str, Any] = None) -> Dict[str, Any]:
+        config = self.DEFAULT_CONFIG.copy()
+        # Load existing if any (feature for restart, maybe not for MVP run-once)
+        # For immutable runs, we usually generate NEW config.
+        # But if gradia.yaml exists in ROOT, we load it.
+        root_config = Path("gradia.yaml")
+        if root_config.exists():
+            with open(root_config, 'r') as f:
+                user_config = yaml.safe_load(f)
+                self._update_recursive(config, user_config)
+        if user_overrides:
+            self._update_recursive(config, user_overrides)
+        return config
+    def save(self, config: Dict[str, Any]):
+        self.run_dir.mkdir(exist_ok=True)
+        with open(self.config_path, 'w') as f:
+            yaml.dump(config, f)
+    def _update_recursive(self, base: Dict, update: Dict):
+        for k, v in update.items():
+            if k in base and isinstance(base[k], dict) and isinstance(v, dict):
+                self._update_recursive(base[k], v)
+            else:
+                base[k] = v

gradia/core/inspector.py ADDED Viewed

@@ -0,0 +1,37 @@
+import os
+from pathlib import Path
+from typing import List, Optional
+class Inspector:
+    """Scans the working directory for potential dataset files."""
+    SUPPORTED_EXTENSIONS = {'.csv', '.parquet'}
+    def __init__(self, root_dir: str = "."):
+        self.root_dir = Path(root_dir)
+    def find_datasets(self) -> List[Path]:
+        """Finds all supported dataset files in the root directory."""
+        datasets = []
+        for ext in self.SUPPORTED_EXTENSIONS:
+            datasets.extend(self.root_dir.glob(f"*{ext}"))
+        return sorted(datasets)
+    def detect_split_layout(self):
+        """
+        Detects if proper 'train'/'val'/'test' folders exist.
+        Returns a dictionary with paths or None.
+        """
+        layout = {}
+        for split in ['train', 'val', 'validation', 'test']:
+            split_dir = self.root_dir / split
+            if split_dir.exists() and split_dir.is_dir():
+                # Check for files inside
+                files = []
+                for ext in self.SUPPORTED_EXTENSIONS:
+                    files.extend(list(split_dir.glob(f"*{ext}")))
+                if files:
+                    layout[split] = split_dir
+        return layout if layout else None

gradia/core/scenario.py ADDED Viewed

@@ -0,0 +1,118 @@
+import pandas as pd
+import numpy as np
+from dataclasses import dataclass, field
+from typing import Optional, List, Any
+@dataclass
+class Scenario:
+    dataset_path: str
+    target_column: str
+    task_type: str  # 'classification' or 'regression'
+    is_multiclass: bool = False
+    class_count: int = 0
+    features: List[str] = field(default_factory=list)
+    recommended_model: str = "random_forest"
+class ScenarioInferrer:
+    """Infers the ML scenario (Task type, Target) from a dataset."""
+    POSSIBLE_TARGET_NAMES = ['target', 'label', 'y', 'class', 'outcome', 'price', 'score']
+    def infer(self, file_path: str, target_override: Optional[str] = None) -> Scenario:
+        # Load a sample to infer types
+        df = self._load_sample(file_path)
+        target = target_override
+        if not target:
+            target = self._guess_target(df)
+        if not target:
+            raise ValueError(f"Could not infer target column for {file_path}. Please name one of {self.POSSIBLE_TARGET_NAMES} or provide config.")
+        task_type, is_multiclass, count = self._infer_task_type(df[target])
+        features = [c for c in df.columns if c != target]
+        recommended_model = self._infer_model_recommendation(features)
+        return Scenario(
+            dataset_path=str(file_path),
+            target_column=target,
+            task_type=task_type,
+            is_multiclass=is_multiclass,
+            class_count=count,
+            features=features,
+            recommended_model=recommended_model
+        )
+    def _infer_model_recommendation(self, features: List[str]) -> str:
+        # Heuristic 1: Check for pixel data (Fashion MNIST, MNIST, etc.)
+        # If > 100 features and names contain 'pixel'
+        if len(features) > 100:
+            pixel_cols = [f for f in features if 'pixel' in f.lower()]
+            if len(pixel_cols) > len(features) * 0.5:
+                return "cnn"
+        # Heuristic 2: Tabular default
+        return "random_forest"
+    def _load_sample(self, path: str, n_rows: int = 1000) -> pd.DataFrame:
+        if path.endswith('.csv'):
+            return pd.read_csv(path, nrows=n_rows)
+        elif path.endswith('.parquet'):
+            # Parquet doesn't support 'nrows' efficiently same as csv sometimes,
+            # but pandas read_parquet usually loads full. For large files we might need pyarrow.
+            # For MVP assume fits in memory or use logic to limits.
+            return pd.read_parquet(path).head(n_rows)
+        else:
+            raise ValueError("Unsupported format")
+    def _guess_target(self, df: pd.DataFrame) -> Optional[str]:
+        # 1. Exact Name match
+        for name in self.POSSIBLE_TARGET_NAMES:
+            if name in df.columns:
+                return name
+            if name.upper() in df.columns:
+                return name.upper()
+        # 2. Heuristic: Avoid ID/Date columns
+        candidates = []
+        for col in df.columns:
+            lower = col.lower()
+            if not any(x in lower for x in ['id', 'date', 'time', 'created_at', 'uuid', 'index']):
+                candidates.append(col)
+        if candidates:
+            return candidates[-1]
+        # 3. Last column fallback
+        return df.columns[-1]
+    def _infer_task_type(self, series: pd.Series):
+        """
+        Returns (task_type, is_multiclass, class_count)
+        """
+        # Heuristics:
+        # If string/object -> Classification
+        # If float -> Regression (unless low cardinality?)
+        # If int -> Check cardinality. Low (<20) -> Classification. High -> Regression.
+        unique_count = series.nunique()
+        dtype = series.dtype
+        if pd.api.types.is_string_dtype(dtype) or pd.api.types.is_object_dtype(dtype):
+            return 'classification', unique_count > 2, unique_count
+        if pd.api.types.is_float_dtype(dtype):
+            # If floats are actually integers (e.g. 1.0, 0.0), check that
+            if series.apply(float.is_integer).all() and unique_count < 20:
+                 return 'classification', unique_count > 2, unique_count
+            return 'regression', False, 0
+        if pd.api.types.is_integer_dtype(dtype):
+            if unique_count < 20:  # Arbitrary threshold for MVP
+                return 'classification', unique_count > 2, unique_count
+            else:
+                return 'regression', False, 0
+        # Fallback
+        return 'regression', False, 0

gradia/models/base.py ADDED Viewed

@@ -0,0 +1,39 @@
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+import numpy as np
+class GradiaModel(ABC):
+    """Abstract base class for all Gradia models."""
+    @abstractmethod
+    def fit(self, X, y, **kwargs):
+        """Train the model fully."""
+        pass
+    def partial_fit(self, X, y, **kwargs):
+        """Train on a batch or single epoch (optional)."""
+        raise NotImplementedError("This model does not support iterative training.")
+    @property
+    def supports_iterative(self) -> bool:
+        return False
+    @abstractmethod
+    def predict(self, X) -> np.ndarray:
+        """Make predictions."""
+        pass
+    @abstractmethod
+    def predict_proba(self, X) -> Optional[np.ndarray]:
+        """Make probability predictions (if applicable)."""
+        pass
+    @abstractmethod
+    def get_feature_importance(self) -> Optional[Dict[str, float]]:
+        """Return feature importance map if available."""
+        pass
+    @abstractmethod
+    def get_params(self) -> Dict[str, Any]:
+        """Return model hyperparameters."""
+        pass

gradia/models/sklearn_wrappers.py ADDED Viewed

@@ -0,0 +1,114 @@
+from typing import Any, Dict, Optional
+import numpy as np
+from sklearn.linear_model import LogisticRegression, LinearRegression, SGDClassifier, SGDRegressor
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from .base import GradiaModel
+class SklearnWrapper(GradiaModel):
+    def __init__(self, model, feature_names=None):
+        self.model = model
+        self.feature_names = feature_names
+    def fit(self, X, y, **kwargs):
+        self.model.fit(X, y)
+        if hasattr(X, "columns"):
+            self.feature_names = list(X.columns)
+    def partial_fit(self, X, y, **kwargs):
+        # For SGD (true partial_fit)
+        if hasattr(self.model, "partial_fit"):
+            classes = kwargs.get('classes')
+            if classes is not None:
+                self.model.partial_fit(X, y, classes=classes)
+            else:
+                self.model.partial_fit(X, y)
+        # For RandomForest (warm_start simulation)
+        elif hasattr(self.model, "warm_start") and self.model.warm_start:
+            # Increase estimators by 1 step
+            self.model.n_estimators += 1
+            self.model.fit(X, y)
+        if hasattr(X, "columns"):
+            self.feature_names = list(X.columns)
+    @property
+    def supports_iterative(self) -> bool:
+        return hasattr(self.model, "partial_fit") or (hasattr(self.model, "warm_start") and self.model.warm_start)
+    def predict(self, X) -> np.ndarray:
+        return self.model.predict(X)
+    def predict_proba(self, X) -> Optional[np.ndarray]:
+        if hasattr(self.model, "predict_proba"):
+            return self.model.predict_proba(X)
+        return None
+    def get_feature_importance(self) -> Optional[Dict[str, float]]:
+        if not self.feature_names:
+            return None
+        importances = None
+        if hasattr(self.model, "coef_"):
+            # Linear models
+            importances = np.abs(self.model.coef_)
+            if importances.ndim > 1:
+                importances = importances.mean(axis=0) # Multiclass avg
+        elif hasattr(self.model, "feature_importances_"):
+            # Utilities (Tree based)
+            importances = self.model.feature_importances_
+        if importances is not None:
+            return dict(zip(self.feature_names, importances))
+        return None
+    def get_params(self) -> Dict[str, Any]:
+        return self.model.get_params()
+class ModelFactory:
+    @staticmethod
+    def create(model_type: str, task_type: str, params: Dict[str, Any] = {}) -> GradiaModel:
+        # Standard Linear
+        if model_type == 'linear':
+            if task_type == 'classification':
+                return SklearnWrapper(LogisticRegression(**params))
+            else:
+                return SklearnWrapper(LinearRegression(**params))
+        # Random Forest
+        elif model_type == 'random_forest':
+            # Enable warm_start for iterative viz if not specified
+            if 'warm_start' not in params:
+                 params['warm_start'] = True
+            if task_type == 'classification':
+                return SklearnWrapper(RandomForestClassifier(**params))
+            else:
+                return SklearnWrapper(RandomForestRegressor(**params))
+        # SGD (Iterative Linear)
+        elif model_type == 'sgd':
+            # Map optimizer/learning rate params from UI to sklearn args if needed
+            # User might pass 'lr', sklearn uses 'eta0' + 'learning_rate'='constant'/'invscaling'
+            # simplified normalization handled by CLI or here.
+            # For MVP, assume params are already sklearn-compatible or clean them up.
+            if task_type == 'classification':
+                 return SklearnWrapper(SGDClassifier(**params))
+            else:
+                 return SklearnWrapper(SGDRegressor(**params))
+        # MLP / CNN (Basic Neural Net)
+        elif model_type in ['mlp', 'cnn']:
+             from sklearn.neural_network import MLPClassifier, MLPRegressor
+             if task_type == 'classification':
+                 # hidden_layer_sizes default for simple MNIST-like
+                 if 'hidden_layer_sizes' not in params:
+                     params['hidden_layer_sizes'] = (100, 50)
+                 return SklearnWrapper(MLPClassifier(warm_start=True, **params))
+             else:
+                 return SklearnWrapper(MLPRegressor(warm_start=True, **params))
+        # Default fallback
+        if task_type == 'classification':
+             return SklearnWrapper(RandomForestClassifier(warm_start=True, **params))
+        else:
+             return SklearnWrapper(RandomForestRegressor(warm_start=True, **params))

gradia/trainer/callbacks.py ADDED Viewed

@@ -0,0 +1,48 @@
+from typing import Dict, Any, List
+import json
+import time
+import threading
+import os
+from pathlib import Path
+# Shared lock for writing to the log file from multiple threads (Trainer vs SystemMonitor)
+log_lock = threading.Lock()
+class Callback:
+    def on_train_begin(self, logs: Dict[str, Any] = {}): pass
+    def on_epoch_end(self, epoch: int, logs: Dict[str, Any] = {}): pass
+    def on_train_end(self, logs: Dict[str, Any] = {}): pass
+class EventLogger(Callback):
+    """
+    Logs events to a file which can be tailed by the UI server.
+    Also keeps an in-memory buffer.
+    """
+    def __init__(self, log_dir: str):
+        self.log_path = Path(log_dir) / "events.jsonl"
+        self.log_path.parent.mkdir(parents=True, exist_ok=True)
+        # Clear existing
+        if self.log_path.exists():
+            with log_lock:
+                # Double check to avoid race if multiple loggers init (rare)
+                if self.log_path.exists():
+                    self.log_path.unlink()
+    def _emit(self, event_type: str, data: Dict[str, Any]):
+        payload = {
+            "timestamp": time.time(),
+            "type": event_type,
+            "data": data
+        }
+        with log_lock:
+            with open(self.log_path, "a") as f:
+                f.write(json.dumps(payload) + "\n")
+    def on_train_begin(self, logs={}):
+        self._emit("train_begin", logs)
+    def on_epoch_end(self, epoch: int, logs={}):
+        self._emit("epoch_end", {"epoch": epoch, **logs})
+    def on_train_end(self, logs={}):
+        self._emit("train_end", logs)