PyPI - jano - Versions diffs - 0.3.1__py3-none-any.whl - Mend

jano 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

jano/__init__.py +57 -0
jano/_version.py +3 -0
jano/describe.py +5 -0
jano/engines.py +196 -0
jano/io.py +37 -0
jano/jano.py +16 -0
jano/mcp_server.py +180 -0
jano/mcp_tools.py +309 -0
jano/planning.py +294 -0
jano/policies.py +558 -0
jano/reporting.py +626 -0
jano/simulation.py +223 -0
jano/slicing.py +101 -0
jano/splits.py +105 -0
jano/splitters.py +401 -0
jano/types.py +146 -0
jano/validation.py +107 -0
jano/workflows.py +403 -0
jano-0.3.1.dist-info/METADATA +522 -0
jano-0.3.1.dist-info/RECORD +24 -0
jano-0.3.1.dist-info/WHEEL +5 -0
jano-0.3.1.dist-info/entry_points.txt +2 -0
jano-0.3.1.dist-info/licenses/LICENSE.txt +20 -0
jano-0.3.1.dist-info/top_level.txt +1 -0

jano/__init__.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""Public package interface for Jano."""
+from ._version import __version__
+from .engines import PartitionEngineMetadata
+from .planning import PartitionPlan, PlannedFold, SimulationPlan
+from .policies import (
+    PerformanceDecayPolicy,
+    PerformanceDecayResult,
+    TrainGrowthPolicy,
+    TrainGrowthResult,
+)
+from .reporting import SimulationChartData, SimulationSummary
+from .simulation import SimulationResult, TemporalSimulation
+from .splitters import TemporalBacktestSplitter
+from .splits import TimeSplit
+from .types import (
+    FeatureLookbackSpec,
+    SegmentBoundaries,
+    SizeSpec,
+    TemporalPartitionSpec,
+    TemporalSemanticsSpec,
+)
+from .workflows import (
+    DriftMonitoringPolicy,
+    RollingTrainHistoryPolicy,
+    RollingTrainHistoryResult,
+    TrainHistoryPolicy,
+    WalkForwardPolicy,
+)
+__all__ = [
+    "FeatureLookbackSpec",
+    "DriftMonitoringPolicy",
+    "PartitionPlan",
+    "PartitionEngineMetadata",
+    "PerformanceDecayPolicy",
+    "PerformanceDecayResult",
+    "PlannedFold",
+    "RollingTrainHistoryPolicy",
+    "RollingTrainHistoryResult",
+    "SegmentBoundaries",
+    "SimulationChartData",
+    "SimulationPlan",
+    "SimulationResult",
+    "SimulationSummary",
+    "SizeSpec",
+    "TemporalSimulation",
+    "TemporalBacktestSplitter",
+    "TemporalPartitionSpec",
+    "TemporalSemanticsSpec",
+    "TrainHistoryPolicy",
+    "TrainGrowthPolicy",
+    "TrainGrowthResult",
+    "TimeSplit",
+    "WalkForwardPolicy",
+    "__version__",
+]

jano/_version.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Package version metadata."""
+__version__ = "0.3.1"

jano/describe.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Backward-compatible import surface for simulation descriptions."""
+from .reporting import SimulationSummary
+__all__ = ["SimulationSummary"]

jano/engines.py ADDED Viewed

@@ -0,0 +1,196 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Iterable
+import numpy as np
+import pandas as pd
+try:
+    import polars as pl
+except ImportError:  # pragma: no cover - exercised when polars is not installed
+    pl = None
+from .io import coerce_tabular_input
+from .types import ColumnRef
+@dataclass(frozen=True)
+class PartitionEngineMetadata:
+    """Execution metadata for the internal partition engine.
+    Attributes:
+        engine: Internal engine selected to compute temporal boundaries and indices.
+        input_backend: Backend detected from the user-provided input.
+        converted: Whether the full dataset was converted before planning.
+    """
+    engine: str
+    input_backend: str
+    converted: bool
+    def to_dict(self) -> dict[str, object]:
+        """Return metadata as a serializable dictionary."""
+        return {
+            "engine": self.engine,
+            "input_backend": self.input_backend,
+            "converted": self.converted,
+        }
+class PartitionEngine:
+    """Thin internal adapter used by the splitter to avoid unnecessary conversions."""
+    def __init__(
+        self,
+        data: Any,
+        *,
+        engine: str,
+        input_backend: str,
+        converted: bool = False,
+    ) -> None:
+        self.data = data
+        self.metadata = PartitionEngineMetadata(
+            engine=engine,
+            input_backend=input_backend,
+            converted=converted,
+        )
+        self.columns = self._resolve_columns()
+        self.total_rows = self._resolve_total_rows()
+    @classmethod
+    def from_input(cls, X: Any, prefer: str = "auto") -> "PartitionEngine":
+        """Select the safest available internal engine for ``X``."""
+        if prefer not in {"auto", "pandas", "polars", "numpy"}:
+            raise ValueError("engine must be one of 'auto', 'pandas', 'polars' or 'numpy'")
+        input_backend = detect_backend(X)
+        selected = input_backend if prefer == "auto" else prefer
+        if selected == "pandas":
+            data = coerce_tabular_input(X)
+            return cls(
+                data,
+                engine="pandas",
+                input_backend=input_backend,
+                converted=input_backend != "pandas",
+            )
+        if selected == "polars":
+            if pl is None:
+                raise ImportError("Polars engine requires the optional 'polars' dependency")
+            if input_backend == "polars":
+                return cls(X, engine="polars", input_backend=input_backend)
+            if input_backend == "pandas":
+                return cls(
+                    pl.from_pandas(X),
+                    engine="polars",
+                    input_backend=input_backend,
+                    converted=True,
+                )
+            raise ValueError("Polars engine can only be forced for pandas or polars inputs")
+        if selected == "numpy":
+            if input_backend == "numpy":
+                return cls(X, engine="numpy", input_backend=input_backend)
+            if input_backend == "pandas":
+                return cls(
+                    X.to_numpy(),
+                    engine="numpy",
+                    input_backend=input_backend,
+                    converted=True,
+                )
+            raise ValueError("NumPy engine can only be forced for pandas or numpy inputs")
+        # Unknown tabular-like objects are normalized through the stable pandas path.
+        data = coerce_tabular_input(X)
+        return cls(
+            data,
+            engine="pandas",
+            input_backend=input_backend,
+            converted=input_backend != "pandas",
+        )
+    @property
+    def empty(self) -> bool:
+        return self.total_rows == 0
+    def column_values(self, ref: ColumnRef) -> np.ndarray:
+        """Return one column as a NumPy array without converting the whole dataset."""
+        if self.metadata.engine == "pandas":
+            return self.data[self._resolve_column_ref(ref)].to_numpy()
+        if self.metadata.engine == "polars":
+            return self.data[self._resolve_column_ref(ref)].to_numpy()
+        if self.metadata.engine == "numpy":
+            if self.data.dtype.names is not None:
+                return self.data[self._resolve_column_ref(ref)]
+            return self.data[:, self._resolve_column_ref(ref)]
+        raise RuntimeError(f"Unsupported partition engine '{self.metadata.engine}'")
+    def to_pandas(self) -> pd.DataFrame:
+        """Materialize the full dataset as pandas for reporting or user-facing slices."""
+        return coerce_tabular_input(self.data)
+    def _resolve_columns(self) -> list[object]:
+        if isinstance(self.data, pd.DataFrame):
+            return list(self.data.columns)
+        if pl is not None and isinstance(self.data, pl.DataFrame):
+            return list(self.data.columns)
+        if isinstance(self.data, np.ndarray):
+            if self.data.dtype.names is not None:
+                return list(self.data.dtype.names)
+            if self.data.ndim == 1:
+                return [0]
+            return list(range(self.data.shape[1]))
+        return list(coerce_tabular_input(self.data).columns)
+    def _resolve_total_rows(self) -> int:
+        if isinstance(self.data, pd.DataFrame):
+            return len(self.data)
+        if pl is not None and isinstance(self.data, pl.DataFrame):
+            return self.data.height
+        if isinstance(self.data, np.ndarray):
+            if self.data.ndim == 0:
+                raise TypeError("NumPy scalar inputs are not supported; provide a tabular array")
+            return int(self.data.shape[0])
+        return len(coerce_tabular_input(self.data))
+    def _resolve_column_ref(self, ref: ColumnRef) -> object:
+        if isinstance(ref, int):
+            if ref < 0 or ref >= len(self.columns):
+                raise ValueError(f"Column position {ref} is out of bounds")
+            if self.metadata.engine == "numpy" and self.data.dtype.names is None:
+                return ref
+            return self.columns[ref]
+        if ref not in self.columns:
+            raise ValueError(f"Column '{ref}' was not found in the dataset")
+        return ref
+def detect_backend(X: Any) -> str:
+    """Return the input backend name used for engine selection metadata."""
+    if isinstance(X, pd.DataFrame):
+        return "pandas"
+    if isinstance(X, np.ndarray):
+        return "numpy"
+    if pl is not None and isinstance(X, pl.DataFrame):
+        return "polars"
+    module_name = getattr(type(X), "__module__", "")
+    if module_name.startswith("polars"):
+        raise ImportError(
+            "Polars input support requires the optional 'polars' dependency to be installed"
+        )
+    return "pandas"
+def missing_columns(columns: Iterable[ColumnRef], available: Iterable[object]) -> list[object]:
+    """Return missing named columns while treating integer refs as positional."""
+    available_list = list(available)
+    missing: list[object] = []
+    for column in columns:
+        if isinstance(column, int):
+            if column < 0 or column >= len(available_list):
+                missing.append(column)
+        elif column not in available_list:
+            missing.append(column)
+    return missing

jano/io.py ADDED Viewed

@@ -0,0 +1,37 @@
+from __future__ import annotations
+from typing import Any
+import numpy as np
+import pandas as pd
+try:
+    import polars as pl
+except ImportError:  # pragma: no cover - exercised when polars is not installed
+    pl = None
+def coerce_tabular_input(X: Any) -> pd.DataFrame:
+    """Normalize supported tabular inputs into a pandas DataFrame."""
+    if isinstance(X, pd.DataFrame):
+        return X
+    if isinstance(X, np.ndarray):
+        if X.ndim == 0:
+            raise TypeError("NumPy scalar inputs are not supported; provide a tabular array")
+        if X.dtype.names is not None:
+            return pd.DataFrame.from_records(X)
+        return pd.DataFrame(X)
+    if pl is not None and isinstance(X, pl.DataFrame):
+        return pd.DataFrame(X.to_dict(as_series=False))
+    module_name = getattr(type(X), "__module__", "")
+    if module_name.startswith("polars"):
+        raise ImportError(
+            "Polars input support requires the optional 'polars' dependency to be installed"
+        )
+    raise TypeError(
+        "TemporalBacktestSplitter expects a pandas DataFrame, NumPy ndarray or polars DataFrame"
+    )

jano/jano.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Backward-compatible import surface for legacy users."""
+from .splitters import TemporalBacktestSplitter
+from .reporting import SimulationSummary
+from .splits import TimeSplit
+from .types import SegmentBoundaries, SizeSpec, TemporalPartitionSpec, TemporalSemanticsSpec
+__all__ = [
+    "SegmentBoundaries",
+    "SimulationSummary",
+    "SizeSpec",
+    "TemporalBacktestSplitter",
+    "TemporalPartitionSpec",
+    "TemporalSemanticsSpec",
+    "TimeSplit",
+]

jano/mcp_server.py ADDED Viewed

@@ -0,0 +1,180 @@
+from __future__ import annotations
+from .mcp_tools import plan_walk_forward, preview_dataset, run_walk_forward
+def build_server():
+    """Build the Jano MCP server lazily so importing jano does not require MCP."""
+    try:
+        from mcp.server.fastmcp import FastMCP
+    except Exception as exc:  # pragma: no cover - exercised through runtime usage
+        raise RuntimeError(
+            "The Jano MCP server requires the optional MCP dependency. "
+            "Install it with `pip install \"jano[mcp]\"` in a Python 3.10+ environment."
+        ) from exc
+    mcp = FastMCP(
+        "Jano",
+        instructions=(
+            "Jano exposes temporal planning and simulation tools for time-aware "
+            "machine learning evaluation. Prefer planning before materializing folds "
+            "when the user wants to inspect iteration geometry or exclude date windows."
+        ),
+    )
+    @mcp.tool()
+    def preview_local_dataset(
+        dataset_path: str,
+        dataset_format: str = "auto",
+        sample_rows: int = 5,
+    ) -> dict:
+        """Preview a local tabular dataset before building temporal policies.
+        Args:
+            dataset_path: Local path to a CSV, Parquet or ZIP-with-CSV dataset.
+            dataset_format: Explicit format or ``"auto"``.
+            sample_rows: Number of rows to include in the preview.
+        """
+        return preview_dataset(
+            dataset_path,
+            dataset_format=dataset_format,
+            sample_rows=sample_rows,
+        )
+    @mcp.tool()
+    def plan_walk_forward_simulation(
+        dataset_path: str,
+        partition: dict,
+        step: str,
+        time_col: str,
+        strategy: str = "rolling",
+        allow_partial: bool = False,
+        engine: str = "auto",
+        start_at: str | None = None,
+        end_at: str | None = None,
+        max_folds: int | None = None,
+        dataset_format: str = "auto",
+        order_col: str | None = None,
+        train_time_col: str | None = None,
+        validation_time_col: str | None = None,
+        test_time_col: str | None = None,
+        title: str | None = None,
+        preview_rows: int = 20,
+    ) -> dict:
+        """Precompute a walk-forward plan and return fold boundaries plus row counts.
+        Args:
+            dataset_path: Local path to a CSV, Parquet or ZIP-with-CSV dataset.
+            partition: Object accepted by ``TemporalPartitionSpec``. Example:
+                ``{"layout": "train_test", "train_size": "7D", "test_size": "1D"}``.
+            step: Step size such as ``"1D"``.
+            time_col: Timeline column used to anchor the simulation.
+            strategy: Movement strategy: ``"single"``, ``"rolling"`` or ``"expanding"``.
+            allow_partial: Whether to keep a final partial fold.
+            engine: Internal partition engine preference: ``"auto"``, ``"pandas"``,
+                ``"polars"`` or ``"numpy"``.
+            start_at: Optional lower timestamp bound.
+            end_at: Optional upper timestamp bound.
+            max_folds: Optional maximum number of folds.
+            dataset_format: Explicit format or ``"auto"``.
+            order_col: Optional column used to sort the dataset.
+            train_time_col: Optional timestamp column used to assign train rows.
+            validation_time_col: Optional timestamp column used to assign validation rows.
+            test_time_col: Optional timestamp column used to assign test rows.
+            title: Optional report title.
+            preview_rows: Number of planned folds returned in the preview.
+        """
+        return plan_walk_forward(
+            dataset_path,
+            partition=partition,
+            step=step,
+            time_col=time_col,
+            strategy=strategy,
+            allow_partial=allow_partial,
+            engine=engine,
+            start_at=start_at,
+            end_at=end_at,
+            max_folds=max_folds,
+            dataset_format=dataset_format,
+            order_col=order_col,
+            train_time_col=train_time_col,
+            validation_time_col=validation_time_col,
+            test_time_col=test_time_col,
+            title=title,
+            preview_rows=preview_rows,
+        )
+    @mcp.tool()
+    def run_walk_forward_simulation(
+        dataset_path: str,
+        partition: dict,
+        step: str,
+        time_col: str,
+        strategy: str = "rolling",
+        allow_partial: bool = False,
+        engine: str = "auto",
+        start_at: str | None = None,
+        end_at: str | None = None,
+        max_folds: int | None = None,
+        dataset_format: str = "auto",
+        order_col: str | None = None,
+        train_time_col: str | None = None,
+        validation_time_col: str | None = None,
+        test_time_col: str | None = None,
+        title: str | None = None,
+        preview_rows: int = 20,
+    ) -> dict:
+        """Run a walk-forward simulation and return a compact summary plus HTML.
+        Args:
+            dataset_path: Local path to a CSV, Parquet or ZIP-with-CSV dataset.
+            partition: Object accepted by ``TemporalPartitionSpec``.
+            step: Step size such as ``"1D"``.
+            time_col: Timeline column used to anchor the simulation.
+            strategy: Movement strategy: ``"single"``, ``"rolling"`` or ``"expanding"``.
+            allow_partial: Whether to keep a final partial fold.
+            engine: Internal partition engine preference: ``"auto"``, ``"pandas"``,
+                ``"polars"`` or ``"numpy"``.
+            start_at: Optional lower timestamp bound.
+            end_at: Optional upper timestamp bound.
+            max_folds: Optional maximum number of folds.
+            dataset_format: Explicit format or ``"auto"``.
+            order_col: Optional column used to sort the dataset.
+            train_time_col: Optional timestamp column used to assign train rows.
+            validation_time_col: Optional timestamp column used to assign validation rows.
+            test_time_col: Optional timestamp column used to assign test rows.
+            title: Optional report title.
+            preview_rows: Number of summary rows returned in the preview.
+        """
+        return run_walk_forward(
+            dataset_path,
+            partition=partition,
+            step=step,
+            time_col=time_col,
+            strategy=strategy,
+            allow_partial=allow_partial,
+            engine=engine,
+            start_at=start_at,
+            end_at=end_at,
+            max_folds=max_folds,
+            dataset_format=dataset_format,
+            order_col=order_col,
+            train_time_col=train_time_col,
+            validation_time_col=validation_time_col,
+            test_time_col=test_time_col,
+            title=title,
+            preview_rows=preview_rows,
+        )
+    return mcp
+def main() -> None:
+    """Run Jano as a local stdio MCP server."""
+    build_server().run()
+if __name__ == "__main__":
+    main()