PyPI - randomstatsmodels - Versions diffs - 0.1.0__tar.gz - Mend

randomstatsmodels 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

randomstatsmodels-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Jacob
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to do so, subject to the following
+conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

randomstatsmodels-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,89 @@
+Metadata-Version: 2.4
+Name: randomstatsmodels
+Version: 0.1.0
+Summary: Tools for benchmarking, metrics, and models.
+Author: Jacob Wright
+License: MIT License
+        Copyright (c) 2025 Jacob
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to do so, subject to the following
+        conditions:
+        The above copyright notice and this permission notice shall be included in
+        all copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Keywords: statistics,machine learning,metrics,models
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Science/Research
+Classifier: Topic :: Scientific/Engineering :: Mathematics
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.24
+Dynamic: license-file
+# randomstatsmodels
+A tiny, modern Python package skeleton for experimenting with forecasting and statistics utilities.
+## Quick start
+```bash
+# from the project root
+python -m venv .venv && source .venv/bin/activate  # Windows: .venv\Scripts\activate
+pip install -e ".[dev]"  # install in editable mode with dev extras
+randomstatsmodels --version
+```
+## Usage
+```python
+from randomstatsmodels.metrics import mae, rmse
+y_true = [1, 2, 3]
+y_pred = [1.1, 1.9, 3.2]
+print(mae(y_true, y_pred))
+```
+## Testing
+```bash
+pytest -q
+```
+## Using your models
+Put your custom models in `randomstatsmodels/user_models.py` (we copied your uploaded file there).
+### Python API
+```python
+from randomstatsmodels.api import predict
+# Model ref can be 'randomstatsmodels.user_models:MyModel' or an object/callable
+yhat = predict("randomstatsmodels.user_models:MyModel", X_dataframe)
+```
+### CLI
+```bash
+randomstatsmodels predict \        --model randomstatsmodels.user_models:MyModel \        --input data.csv \        --output preds.csv
+```
+The adapter accepts:
+- Objects with `.predict(X, **kwargs)`
+- Callables like `def f(X): ...`
+- Classes that can be instantiated without args and have `.predict(X)`

randomstatsmodels-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,45 @@
+# randomstatsmodels
+A tiny, modern Python package skeleton for experimenting with forecasting and statistics utilities.
+## Quick start
+```bash
+# from the project root
+python -m venv .venv && source .venv/bin/activate  # Windows: .venv\Scripts\activate
+pip install -e ".[dev]"  # install in editable mode with dev extras
+randomstatsmodels --version
+```
+## Usage
+```python
+from randomstatsmodels.metrics import mae, rmse
+y_true = [1, 2, 3]
+y_pred = [1.1, 1.9, 3.2]
+print(mae(y_true, y_pred))
+```
+## Testing
+```bash
+pytest -q
+```
+## Using your models
+Put your custom models in `randomstatsmodels/user_models.py` (we copied your uploaded file there).
+### Python API
+```python
+from randomstatsmodels.api import predict
+# Model ref can be 'randomstatsmodels.user_models:MyModel' or an object/callable
+yhat = predict("randomstatsmodels.user_models:MyModel", X_dataframe)
+```
+### CLI
+```bash
+randomstatsmodels predict \        --model randomstatsmodels.user_models:MyModel \        --input data.csv \        --output preds.csv
+```
+The adapter accepts:
+- Objects with `.predict(X, **kwargs)`
+- Callables like `def f(X): ...`
+- Classes that can be instantiated without args and have `.predict(X)`

randomstatsmodels-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,41 @@
+[build-system]
+requires = ["setuptools>=69", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "randomstatsmodels"
+version = "0.1.0"
+description = "Tools for benchmarking, metrics, and models."
+readme = "README.md"
+license = { file = "LICENSE" }
+authors = [
+  { name = "Jacob Wright" }
+]
+requires-python = ">=3.9"
+dependencies = [
+  "numpy>=1.24"
+]
+keywords = ["statistics", "machine learning", "metrics", "models"]
+classifiers = [
+  "Development Status :: 3 - Alpha",
+  "Intended Audience :: Science/Research",
+  "Topic :: Scientific/Engineering :: Mathematics",
+  "Topic :: Scientific/Engineering :: Artificial Intelligence",
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Operating System :: OS Independent"
+]
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["randomstatsmodels*"]
+[tool.setuptools.package-data]
+# if you ever add CSVs or py.typed, include them here
+randomstatsmodels = []

randomstatsmodels-0.1.0/randomstatsmodels/__init__.py ADDED Viewed

@@ -0,0 +1,39 @@
+# randomstatsmodels/__init__.py
+# This makes the folder a Python package.
+from .metrics.metrics import mae, mape, smape, rmse
+from .models.models import (
+    AutoHybridForecaster,
+    AutoKNN,
+    AutoMELD,
+    AutoNEO,
+    AutoPALF,
+    AutoThetaAR,
+    AutoPolymath,
+    AutoSeasonalAR,
+    AutoFourier,
+    AutoRollingMedian,
+    AutoTrimmedMean,
+    AutoWindow,
+    AutoRankInsertion,
+)
+from .benchmarking.benchmarking import benchmark_model, benchmark_models
+__version__ = "0.1.0"
+__all__ = [
+    "__version__",
+    "mae",
+    "mape",
+    "smape",
+    "rmse",
+    "AutoHybridForecaster",
+    "AutoKNN",
+    "AutoMELD",
+    "AutoNEO",
+    "AutoPALF",
+    "AutoThetaAR",
+    "AutoPolymath",
+    "AutoSeasonalAR",
+    "benchmark_models",
+    "benchmark_model",
+]

randomstatsmodels-0.1.0/randomstatsmodels/benchmarking/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .benchmarking import benchmark_model, benchmark_models

randomstatsmodels-0.1.0/randomstatsmodels/benchmarking/benchmarking.py ADDED Viewed

@@ -0,0 +1,304 @@
+import time
+import math
+import numpy as np
+from ..metrics import mae, rmse, mape, smape
+import time
+import numpy as np
+from ..metrics import mae, rmse, mape, smape
+def benchmark_model(model_class, data, iterations=1, h=7):
+    """
+    Benchmark the training + prediction speed of a time series model,
+    and compute MAE, RMSE, MAPE, sMAPE on the last h points.
+    Parameters
+    ----------
+    model_class : class
+        The model class to initialize (e.g., AutoNEO).
+    data : array-like
+        The time series data.
+    iterations : int, default=5
+        Number of times to run the benchmark.
+    h : int, default=20
+        Forecast horizon.
+    **fit_kwargs : dict
+        Additional arguments passed to model.fit().
+    Returns
+    -------
+    results : dict
+        {
+          "avg_total_time_s": float,
+          "avg_fit_time_s": float,
+          "avg_predict_time_s": float,
+          "avg_mae": float,
+          "avg_rmse": float,
+          "avg_mape": float,
+          "avg_smape": float,
+          "per_iteration": [
+              {
+                "fit_time_s": float,
+                "predict_time_s": float,
+                "total_time_s": float,
+                "mae": float,
+                "rmse": float,
+                "mape": float,
+                "smape": float
+              },
+              ...
+          ]
+        }
+    """
+    data = np.asarray(data)
+    assert len(data) > h, "Data length must be greater than forecast horizon h."
+    per_iter = []
+    for i in range(iterations):
+        model = model_class()  # fresh model each run
+        model_name = model_class.__name__
+        t0 = time.time()
+        model.fit(data[:-h])
+        fit_time = time.time() - t0
+        t1 = time.time()
+        y_pred = model.predict(h)
+        predict_time = time.time() - t1
+        total_time = fit_time + predict_time
+        y_true = data[-h:]
+        # Ensure shapes compatible
+        y_pred = np.asarray(y_pred).reshape(-1)[:h]
+        if model_name == "AutoETS":
+            y_pred = y_pred[0]["mean"]
+        iter_metrics = {
+            "fit_time_s": fit_time,
+            "predict_time_s": predict_time,
+            "total_time_s": total_time,
+            "mae": float(mae(y_true, y_pred)),
+            "rmse": float(rmse(y_true, y_pred)),
+            "mape": float(mape(y_true, y_pred)),
+            "smape": float(smape(y_true, y_pred)),
+        }
+        per_iter.append(iter_metrics)
+    # Averages
+    avg_total = float(np.mean([x["total_time_s"] for x in per_iter]))
+    avg_fit = float(np.mean([x["fit_time_s"] for x in per_iter]))
+    avg_predict = float(np.mean([x["predict_time_s"] for x in per_iter]))
+    avg_mae_ = float(np.mean([x["mae"] for x in per_iter]))
+    avg_rmse_ = float(np.mean([x["rmse"] for x in per_iter]))
+    avg_mape_ = float(np.mean([x["mape"] for x in per_iter]))
+    avg_smape_ = float(np.mean([x["smape"] for x in per_iter]))
+    print(
+        f"\nAverages over {iterations} runs --> "
+        f"fit: {avg_fit:.4f}s | predict: {avg_predict:.4f}s | total: {avg_total:.4f}s | "
+        f"MAE: {avg_mae_:.4f} | RMSE: {avg_rmse_:.4f} | MAPE: {avg_mape_:.4f} | sMAPE: {avg_smape_:.4f}"
+    )
+    return {
+        "avg_total_time_s": round(avg_total, 2),
+        "avg_fit_time_s": round(avg_fit, 2),
+        "avg_predict_time_s": round(avg_predict, 2),
+        "avg_mae": round(avg_mae_, 3),
+        "avg_rmse": round(avg_rmse_, 3),
+        "avg_mape": round(avg_mape_, 3),
+        "avg_smape": round(avg_smape_, 3),
+        "per_iteration": per_iter,
+    }
+import time
+import numpy as np
+from ..metrics import mae, rmse, mape, smape
+def _coerce_forecast(yp, h, model_name):
+    """
+    Coerce various model outputs to a 1D np.ndarray of length h.
+    Handles special cases like AutoETS structure.
+    """
+    # Special-case: your AutoETS wrapper shape
+    if model_name == "AutoETS":
+        # expected like [{'mean': np.array([...])}, ...] or similar
+        try:
+            yp = yp["mean"]
+        except Exception:
+            pass
+    if yp.shape[0] != h:
+        raise ValueError(f"{model_name}.predict({h}) returned length {yp.shape[0]} (expected {h}).")
+    return yp
+def _metrics_dict(y_true, y_pred):
+    return {
+        "mae": float(mae(y_true, y_pred)),
+        "rmse": float(rmse(y_true, y_pred)),
+        "mape": float(mape(y_true, y_pred)),
+        "smape": float(smape(y_true, y_pred)),
+    }
+def _avg_block(per_iter):
+    return {
+        "avg_total_time_s": (
+            round(float(np.mean([x["total_time_s"] for x in per_iter])), 2)
+            if per_iter and "total_time_s" in per_iter[0]
+            else None
+        ),
+        "avg_fit_time_s": (
+            round(float(np.mean([x["fit_time_s"] for x in per_iter])), 2)
+            if per_iter and "fit_time_s" in per_iter[0]
+            else None
+        ),
+        "avg_predict_time_s": (
+            round(float(np.mean([x["predict_time_s"] for x in per_iter])), 2)
+            if per_iter and "predict_time_s" in per_iter[0]
+            else None
+        ),
+        "avg_mae": round(float(np.mean([x["mae"] for x in per_iter])), 3) if per_iter else None,
+        "avg_rmse": round(float(np.mean([x["rmse"] for x in per_iter])), 3) if per_iter else None,
+        "avg_mape": round(float(np.mean([x["mape"] for x in per_iter])), 3) if per_iter else None,
+        "avg_smape": round(float(np.mean([x["smape"] for x in per_iter])), 3) if per_iter else None,
+    }
+def _safe_fmt(x, fmt=".3f"):
+    if x is None:
+        return "—"
+    try:
+        if isinstance(x, float) and (math.isnan(x) or math.isinf(x)):
+            return "—"
+        return format(x, fmt)
+    except Exception:
+        return "—"
+def benchmark_models(
+    model_classes,
+    data,
+    iterations=1,
+    h=7,
+    ensembles=("mean", "median"),
+    exclude_from_ensemble=None,
+):
+    """
+    (docstring unchanged)
+    """
+    data = np.asarray(data)
+    assert len(data) > h, "Data length must be greater than forecast horizon h."
+    y_true = data[-h:]
+    model_classes = list(model_classes)
+    if exclude_from_ensemble is None:
+        exclude_from_ensemble = []
+    exclude_names = {(cls.__name__ if not isinstance(cls, str) else cls) for cls in exclude_from_ensemble}
+    results = {
+        "meta": {"iterations": iterations, "h": h, "n_models": len(model_classes)},
+        "models": {},
+        "ensembles": {},
+    }
+    # Prepare per-model storage
+    per_model_iters = {cls.__name__: [] for cls in model_classes}
+    failed_models = set()
+    # Per-iteration: collect predictions for ensembles
+    ens_iters_preds = []  # list per iteration: 2D array [n_models_used x h]
+    for i in range(iterations):
+        iter_preds = []
+        for cls in model_classes:
+            model_name = cls.__name__
+            try:
+                # fresh model each run
+                t0 = time.time()
+                model = cls()
+                model.fit(data[:-h])
+                fit_time = time.time() - t0
+                t1 = time.time()
+                y_pred = model.predict(h)
+                predict_time = time.time() - t1
+                y_pred = _coerce_forecast(y_pred, h, model_name)
+                if np.isnan(y_pred).any():
+                    print(f"Skipping model {model_name}: NaN in predictions")
+                    failed_models.add(model_name)
+                    continue
+                total_time = fit_time + predict_time
+                # metrics
+                m = _metrics_dict(y_true, y_pred)
+                per_model_iters[model_name].append(
+                    {
+                        "fit_time_s": float(fit_time),
+                        "predict_time_s": float(predict_time),
+                        "total_time_s": float(total_time),
+                        **m,
+                    }
+                )
+                # include in ensemble only if not excluded
+                if model_name not in exclude_names:
+                    iter_preds.append(y_pred)
+            except Exception as e:
+                print(f"Skipping model {model_name}: {e}")
+                failed_models.add(model_name)
+                continue
+        # Store stacked predictions for ensembles this iteration
+        if iter_preds:
+            ens_iters_preds.append(np.vstack(iter_preds))  # shape: (n_used_models, h)
+    # Aggregate per-model (only include models with at least one valid iteration)
+    for model_name, per_iter in per_model_iters.items():
+        if not per_iter:
+            # Do not include empty models to avoid None in summary formatting
+            continue
+        results["models"][model_name] = {**_avg_block(per_iter), "per_iteration": per_iter}
+    # Compute ensembles (metrics only; no timing)
+    valid_ens = set([e.lower() for e in ensembles]) if ensembles else set()
+    for ens_type in ("mean", "median"):
+        if ens_type in valid_ens and ens_iters_preds:
+            per_iter_metrics = []
+            for stacked in ens_iters_preds:
+                if stacked.size == 0:
+                    continue
+                if ens_type == "mean":
+                    y_ens = np.nanmean(stacked, axis=0)
+                else:  # median
+                    y_ens = np.nanmedian(stacked, axis=0)
+                per_iter_metrics.append(_metrics_dict(y_true, y_ens))
+            if per_iter_metrics:
+                results["ensembles"][ens_type] = {
+                    "avg_mae": round(float(np.mean([x["mae"] for x in per_iter_metrics])), 3),
+                    "avg_rmse": round(float(np.mean([x["rmse"] for x in per_iter_metrics])), 3),
+                    "avg_mape": round(float(np.mean([x["mape"] for x in per_iter_metrics])), 3),
+                    "avg_smape": round(float(np.mean([x["smape"] for x in per_iter_metrics])), 3),
+                    "per_iteration": per_iter_metrics,
+                }
+    # Optional: brief console summary (use safe formatting)
+    print(f"\nBenchmark over {iterations} runs (h={h})")
+    if failed_models:
+        print(f"(Some models were skipped due to errors/NaNs: {sorted(failed_models)})")
+    return results

randomstatsmodels-0.1.0/randomstatsmodels/metrics/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .metrics import mae, rmse, mape, smape

randomstatsmodels-0.1.0/randomstatsmodels/metrics/metrics.py ADDED Viewed

@@ -0,0 +1,27 @@
+import numpy as np
+def mae(y_true, y_pred):
+    y_true = np.asarray(y_true, float)
+    y_pred = np.asarray(y_pred, float)
+    return np.mean(np.abs(y_true - y_pred))
+def rmse(y_true, y_pred):
+    y_true = np.asarray(y_true, float)
+    y_pred = np.asarray(y_pred, float)
+    return np.sqrt(np.mean((y_true - y_pred) ** 2))
+def mape(y_true, y_pred, epsilon=1e-8):
+    y_true = np.asarray(y_true, float)
+    y_pred = np.asarray(y_pred, float)
+    denom = np.maximum(np.abs(y_true), epsilon)
+    return np.mean(np.abs((y_true - y_pred) / denom)) * 100.0
+def smape(y_true, y_pred, epsilon=1e-8):
+    y_true = np.asarray(y_true, float)
+    y_pred = np.asarray(y_pred, float)
+    denom = np.maximum((np.abs(y_true) + np.abs(y_pred)) / 2.0, epsilon)
+    return np.mean(np.abs(y_true - y_pred) / denom) * 100.0

randomstatsmodels-0.1.0/randomstatsmodels/models/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from .models import (
+    AutoHybridForecaster,
+    AutoKNN,
+    AutoMELD,
+    AutoNEO,
+    AutoPALF,
+    AutoThetaAR,
+    AutoPolymath,
+    AutoSeasonalAR,
+    AutoFourier,
+    AutoRollingMedian,
+    AutoTrimmedMean,
+    AutoWindow,
+    AutoRankInsertion,
+)

randomstatsmodels-0.1.0/randomstatsmodels/models/model_utils.py ADDED Viewed

@@ -0,0 +1,47 @@
+import numpy as np
+def _weighted_quantile(values, weights, q):
+    values = np.asarray(values, float)
+    weights = np.asarray(weights, float)
+    srt = np.argsort(values)
+    v, w = values[srt], weights[srt]
+    cw = np.cumsum(w) / np.sum(w)
+    idx = np.searchsorted(cw, q, side="left")
+    idx = np.clip(idx, 0, len(v) - 1)
+    return float(v[idx])
+def _golden_section_minimize(f, a, b, tol=1e-6, max_iter=200):
+    phi = (1 + 5**0.5) / 2
+    invphi = 1 / phi
+    c = b - invphi * (b - a)
+    d = a + invphi * (b - a)
+    fc = f(c)
+    fd = f(d)
+    for _ in range(max_iter):
+        if abs(b - a) < tol:
+            break
+        if fc < fd:
+            b, d, fd = d, c, fc
+            c = b - invphi * (b - a)
+            fc = f(c)
+        else:
+            a, c, fc = c, d, fd
+            d = a + invphi * (b - a)
+            fd = f(d)
+    return (a + b) / 2
+def _penalty_value(r, kind="l2", delta=1.0, tau=0.5):
+    if kind == "l2":
+        return 0.5 * r * r
+    elif kind == "l1":
+        return np.abs(r)
+    elif kind == "huber":
+        a = np.abs(r)
+        return np.where(a <= delta, 0.5 * r * r, delta * (a - 0.5 * delta))
+    elif kind == "pinball":
+        return np.where(r >= 0, tau * r, (tau - 1.0) * r)
+    else:
+        raise ValueError("Unknown penalty")