jano 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jano/__init__.py ADDED
@@ -0,0 +1,57 @@
1
+ """Public package interface for Jano."""
2
+
3
+ from ._version import __version__
4
+ from .engines import PartitionEngineMetadata
5
+ from .planning import PartitionPlan, PlannedFold, SimulationPlan
6
+ from .policies import (
7
+ PerformanceDecayPolicy,
8
+ PerformanceDecayResult,
9
+ TrainGrowthPolicy,
10
+ TrainGrowthResult,
11
+ )
12
+ from .reporting import SimulationChartData, SimulationSummary
13
+ from .simulation import SimulationResult, TemporalSimulation
14
+ from .splitters import TemporalBacktestSplitter
15
+ from .splits import TimeSplit
16
+ from .types import (
17
+ FeatureLookbackSpec,
18
+ SegmentBoundaries,
19
+ SizeSpec,
20
+ TemporalPartitionSpec,
21
+ TemporalSemanticsSpec,
22
+ )
23
+ from .workflows import (
24
+ DriftMonitoringPolicy,
25
+ RollingTrainHistoryPolicy,
26
+ RollingTrainHistoryResult,
27
+ TrainHistoryPolicy,
28
+ WalkForwardPolicy,
29
+ )
30
+
31
+ __all__ = [
32
+ "FeatureLookbackSpec",
33
+ "DriftMonitoringPolicy",
34
+ "PartitionPlan",
35
+ "PartitionEngineMetadata",
36
+ "PerformanceDecayPolicy",
37
+ "PerformanceDecayResult",
38
+ "PlannedFold",
39
+ "RollingTrainHistoryPolicy",
40
+ "RollingTrainHistoryResult",
41
+ "SegmentBoundaries",
42
+ "SimulationChartData",
43
+ "SimulationPlan",
44
+ "SimulationResult",
45
+ "SimulationSummary",
46
+ "SizeSpec",
47
+ "TemporalSimulation",
48
+ "TemporalBacktestSplitter",
49
+ "TemporalPartitionSpec",
50
+ "TemporalSemanticsSpec",
51
+ "TrainHistoryPolicy",
52
+ "TrainGrowthPolicy",
53
+ "TrainGrowthResult",
54
+ "TimeSplit",
55
+ "WalkForwardPolicy",
56
+ "__version__",
57
+ ]
jano/_version.py ADDED
@@ -0,0 +1,3 @@
1
+ """Package version metadata."""
2
+
3
+ __version__ = "0.3.1"
jano/describe.py ADDED
@@ -0,0 +1,5 @@
1
+ """Backward-compatible import surface for simulation descriptions."""
2
+
3
+ from .reporting import SimulationSummary
4
+
5
+ __all__ = ["SimulationSummary"]
jano/engines.py ADDED
@@ -0,0 +1,196 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Iterable
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ try:
10
+ import polars as pl
11
+ except ImportError: # pragma: no cover - exercised when polars is not installed
12
+ pl = None
13
+
14
+ from .io import coerce_tabular_input
15
+ from .types import ColumnRef
16
+
17
+
18
+ @dataclass(frozen=True)
19
+ class PartitionEngineMetadata:
20
+ """Execution metadata for the internal partition engine.
21
+
22
+ Attributes:
23
+ engine: Internal engine selected to compute temporal boundaries and indices.
24
+ input_backend: Backend detected from the user-provided input.
25
+ converted: Whether the full dataset was converted before planning.
26
+ """
27
+
28
+ engine: str
29
+ input_backend: str
30
+ converted: bool
31
+
32
+ def to_dict(self) -> dict[str, object]:
33
+ """Return metadata as a serializable dictionary."""
34
+ return {
35
+ "engine": self.engine,
36
+ "input_backend": self.input_backend,
37
+ "converted": self.converted,
38
+ }
39
+
40
+
41
+ class PartitionEngine:
42
+ """Thin internal adapter used by the splitter to avoid unnecessary conversions."""
43
+
44
+ def __init__(
45
+ self,
46
+ data: Any,
47
+ *,
48
+ engine: str,
49
+ input_backend: str,
50
+ converted: bool = False,
51
+ ) -> None:
52
+ self.data = data
53
+ self.metadata = PartitionEngineMetadata(
54
+ engine=engine,
55
+ input_backend=input_backend,
56
+ converted=converted,
57
+ )
58
+ self.columns = self._resolve_columns()
59
+ self.total_rows = self._resolve_total_rows()
60
+
61
+ @classmethod
62
+ def from_input(cls, X: Any, prefer: str = "auto") -> "PartitionEngine":
63
+ """Select the safest available internal engine for ``X``."""
64
+ if prefer not in {"auto", "pandas", "polars", "numpy"}:
65
+ raise ValueError("engine must be one of 'auto', 'pandas', 'polars' or 'numpy'")
66
+
67
+ input_backend = detect_backend(X)
68
+ selected = input_backend if prefer == "auto" else prefer
69
+
70
+ if selected == "pandas":
71
+ data = coerce_tabular_input(X)
72
+ return cls(
73
+ data,
74
+ engine="pandas",
75
+ input_backend=input_backend,
76
+ converted=input_backend != "pandas",
77
+ )
78
+
79
+ if selected == "polars":
80
+ if pl is None:
81
+ raise ImportError("Polars engine requires the optional 'polars' dependency")
82
+ if input_backend == "polars":
83
+ return cls(X, engine="polars", input_backend=input_backend)
84
+ if input_backend == "pandas":
85
+ return cls(
86
+ pl.from_pandas(X),
87
+ engine="polars",
88
+ input_backend=input_backend,
89
+ converted=True,
90
+ )
91
+ raise ValueError("Polars engine can only be forced for pandas or polars inputs")
92
+
93
+ if selected == "numpy":
94
+ if input_backend == "numpy":
95
+ return cls(X, engine="numpy", input_backend=input_backend)
96
+ if input_backend == "pandas":
97
+ return cls(
98
+ X.to_numpy(),
99
+ engine="numpy",
100
+ input_backend=input_backend,
101
+ converted=True,
102
+ )
103
+ raise ValueError("NumPy engine can only be forced for pandas or numpy inputs")
104
+
105
+ # Unknown tabular-like objects are normalized through the stable pandas path.
106
+ data = coerce_tabular_input(X)
107
+ return cls(
108
+ data,
109
+ engine="pandas",
110
+ input_backend=input_backend,
111
+ converted=input_backend != "pandas",
112
+ )
113
+
114
+ @property
115
+ def empty(self) -> bool:
116
+ return self.total_rows == 0
117
+
118
+ def column_values(self, ref: ColumnRef) -> np.ndarray:
119
+ """Return one column as a NumPy array without converting the whole dataset."""
120
+ if self.metadata.engine == "pandas":
121
+ return self.data[self._resolve_column_ref(ref)].to_numpy()
122
+ if self.metadata.engine == "polars":
123
+ return self.data[self._resolve_column_ref(ref)].to_numpy()
124
+ if self.metadata.engine == "numpy":
125
+ if self.data.dtype.names is not None:
126
+ return self.data[self._resolve_column_ref(ref)]
127
+ return self.data[:, self._resolve_column_ref(ref)]
128
+ raise RuntimeError(f"Unsupported partition engine '{self.metadata.engine}'")
129
+
130
+ def to_pandas(self) -> pd.DataFrame:
131
+ """Materialize the full dataset as pandas for reporting or user-facing slices."""
132
+ return coerce_tabular_input(self.data)
133
+
134
+ def _resolve_columns(self) -> list[object]:
135
+ if isinstance(self.data, pd.DataFrame):
136
+ return list(self.data.columns)
137
+ if pl is not None and isinstance(self.data, pl.DataFrame):
138
+ return list(self.data.columns)
139
+ if isinstance(self.data, np.ndarray):
140
+ if self.data.dtype.names is not None:
141
+ return list(self.data.dtype.names)
142
+ if self.data.ndim == 1:
143
+ return [0]
144
+ return list(range(self.data.shape[1]))
145
+ return list(coerce_tabular_input(self.data).columns)
146
+
147
+ def _resolve_total_rows(self) -> int:
148
+ if isinstance(self.data, pd.DataFrame):
149
+ return len(self.data)
150
+ if pl is not None and isinstance(self.data, pl.DataFrame):
151
+ return self.data.height
152
+ if isinstance(self.data, np.ndarray):
153
+ if self.data.ndim == 0:
154
+ raise TypeError("NumPy scalar inputs are not supported; provide a tabular array")
155
+ return int(self.data.shape[0])
156
+ return len(coerce_tabular_input(self.data))
157
+
158
+ def _resolve_column_ref(self, ref: ColumnRef) -> object:
159
+ if isinstance(ref, int):
160
+ if ref < 0 or ref >= len(self.columns):
161
+ raise ValueError(f"Column position {ref} is out of bounds")
162
+ if self.metadata.engine == "numpy" and self.data.dtype.names is None:
163
+ return ref
164
+ return self.columns[ref]
165
+ if ref not in self.columns:
166
+ raise ValueError(f"Column '{ref}' was not found in the dataset")
167
+ return ref
168
+
169
+
170
+ def detect_backend(X: Any) -> str:
171
+ """Return the input backend name used for engine selection metadata."""
172
+ if isinstance(X, pd.DataFrame):
173
+ return "pandas"
174
+ if isinstance(X, np.ndarray):
175
+ return "numpy"
176
+ if pl is not None and isinstance(X, pl.DataFrame):
177
+ return "polars"
178
+ module_name = getattr(type(X), "__module__", "")
179
+ if module_name.startswith("polars"):
180
+ raise ImportError(
181
+ "Polars input support requires the optional 'polars' dependency to be installed"
182
+ )
183
+ return "pandas"
184
+
185
+
186
+ def missing_columns(columns: Iterable[ColumnRef], available: Iterable[object]) -> list[object]:
187
+ """Return missing named columns while treating integer refs as positional."""
188
+ available_list = list(available)
189
+ missing: list[object] = []
190
+ for column in columns:
191
+ if isinstance(column, int):
192
+ if column < 0 or column >= len(available_list):
193
+ missing.append(column)
194
+ elif column not in available_list:
195
+ missing.append(column)
196
+ return missing
jano/io.py ADDED
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ try:
9
+ import polars as pl
10
+ except ImportError: # pragma: no cover - exercised when polars is not installed
11
+ pl = None
12
+
13
+
14
+ def coerce_tabular_input(X: Any) -> pd.DataFrame:
15
+ """Normalize supported tabular inputs into a pandas DataFrame."""
16
+ if isinstance(X, pd.DataFrame):
17
+ return X
18
+
19
+ if isinstance(X, np.ndarray):
20
+ if X.ndim == 0:
21
+ raise TypeError("NumPy scalar inputs are not supported; provide a tabular array")
22
+ if X.dtype.names is not None:
23
+ return pd.DataFrame.from_records(X)
24
+ return pd.DataFrame(X)
25
+
26
+ if pl is not None and isinstance(X, pl.DataFrame):
27
+ return pd.DataFrame(X.to_dict(as_series=False))
28
+
29
+ module_name = getattr(type(X), "__module__", "")
30
+ if module_name.startswith("polars"):
31
+ raise ImportError(
32
+ "Polars input support requires the optional 'polars' dependency to be installed"
33
+ )
34
+
35
+ raise TypeError(
36
+ "TemporalBacktestSplitter expects a pandas DataFrame, NumPy ndarray or polars DataFrame"
37
+ )
jano/jano.py ADDED
@@ -0,0 +1,16 @@
1
+ """Backward-compatible import surface for legacy users."""
2
+
3
+ from .splitters import TemporalBacktestSplitter
4
+ from .reporting import SimulationSummary
5
+ from .splits import TimeSplit
6
+ from .types import SegmentBoundaries, SizeSpec, TemporalPartitionSpec, TemporalSemanticsSpec
7
+
8
+ __all__ = [
9
+ "SegmentBoundaries",
10
+ "SimulationSummary",
11
+ "SizeSpec",
12
+ "TemporalBacktestSplitter",
13
+ "TemporalPartitionSpec",
14
+ "TemporalSemanticsSpec",
15
+ "TimeSplit",
16
+ ]
jano/mcp_server.py ADDED
@@ -0,0 +1,180 @@
1
+ from __future__ import annotations
2
+
3
+ from .mcp_tools import plan_walk_forward, preview_dataset, run_walk_forward
4
+
5
+
6
+ def build_server():
7
+ """Build the Jano MCP server lazily so importing jano does not require MCP."""
8
+
9
+ try:
10
+ from mcp.server.fastmcp import FastMCP
11
+ except Exception as exc: # pragma: no cover - exercised through runtime usage
12
+ raise RuntimeError(
13
+ "The Jano MCP server requires the optional MCP dependency. "
14
+ "Install it with `pip install \"jano[mcp]\"` in a Python 3.10+ environment."
15
+ ) from exc
16
+
17
+ mcp = FastMCP(
18
+ "Jano",
19
+ instructions=(
20
+ "Jano exposes temporal planning and simulation tools for time-aware "
21
+ "machine learning evaluation. Prefer planning before materializing folds "
22
+ "when the user wants to inspect iteration geometry or exclude date windows."
23
+ ),
24
+ )
25
+
26
+ @mcp.tool()
27
+ def preview_local_dataset(
28
+ dataset_path: str,
29
+ dataset_format: str = "auto",
30
+ sample_rows: int = 5,
31
+ ) -> dict:
32
+ """Preview a local tabular dataset before building temporal policies.
33
+
34
+ Args:
35
+ dataset_path: Local path to a CSV, Parquet or ZIP-with-CSV dataset.
36
+ dataset_format: Explicit format or ``"auto"``.
37
+ sample_rows: Number of rows to include in the preview.
38
+ """
39
+ return preview_dataset(
40
+ dataset_path,
41
+ dataset_format=dataset_format,
42
+ sample_rows=sample_rows,
43
+ )
44
+
45
+ @mcp.tool()
46
+ def plan_walk_forward_simulation(
47
+ dataset_path: str,
48
+ partition: dict,
49
+ step: str,
50
+ time_col: str,
51
+ strategy: str = "rolling",
52
+ allow_partial: bool = False,
53
+ engine: str = "auto",
54
+ start_at: str | None = None,
55
+ end_at: str | None = None,
56
+ max_folds: int | None = None,
57
+ dataset_format: str = "auto",
58
+ order_col: str | None = None,
59
+ train_time_col: str | None = None,
60
+ validation_time_col: str | None = None,
61
+ test_time_col: str | None = None,
62
+ title: str | None = None,
63
+ preview_rows: int = 20,
64
+ ) -> dict:
65
+ """Precompute a walk-forward plan and return fold boundaries plus row counts.
66
+
67
+ Args:
68
+ dataset_path: Local path to a CSV, Parquet or ZIP-with-CSV dataset.
69
+ partition: Object accepted by ``TemporalPartitionSpec``. Example:
70
+ ``{"layout": "train_test", "train_size": "7D", "test_size": "1D"}``.
71
+ step: Step size such as ``"1D"``.
72
+ time_col: Timeline column used to anchor the simulation.
73
+ strategy: Movement strategy: ``"single"``, ``"rolling"`` or ``"expanding"``.
74
+ allow_partial: Whether to keep a final partial fold.
75
+ engine: Internal partition engine preference: ``"auto"``, ``"pandas"``,
76
+ ``"polars"`` or ``"numpy"``.
77
+ start_at: Optional lower timestamp bound.
78
+ end_at: Optional upper timestamp bound.
79
+ max_folds: Optional maximum number of folds.
80
+ dataset_format: Explicit format or ``"auto"``.
81
+ order_col: Optional column used to sort the dataset.
82
+ train_time_col: Optional timestamp column used to assign train rows.
83
+ validation_time_col: Optional timestamp column used to assign validation rows.
84
+ test_time_col: Optional timestamp column used to assign test rows.
85
+ title: Optional report title.
86
+ preview_rows: Number of planned folds returned in the preview.
87
+ """
88
+ return plan_walk_forward(
89
+ dataset_path,
90
+ partition=partition,
91
+ step=step,
92
+ time_col=time_col,
93
+ strategy=strategy,
94
+ allow_partial=allow_partial,
95
+ engine=engine,
96
+ start_at=start_at,
97
+ end_at=end_at,
98
+ max_folds=max_folds,
99
+ dataset_format=dataset_format,
100
+ order_col=order_col,
101
+ train_time_col=train_time_col,
102
+ validation_time_col=validation_time_col,
103
+ test_time_col=test_time_col,
104
+ title=title,
105
+ preview_rows=preview_rows,
106
+ )
107
+
108
+ @mcp.tool()
109
+ def run_walk_forward_simulation(
110
+ dataset_path: str,
111
+ partition: dict,
112
+ step: str,
113
+ time_col: str,
114
+ strategy: str = "rolling",
115
+ allow_partial: bool = False,
116
+ engine: str = "auto",
117
+ start_at: str | None = None,
118
+ end_at: str | None = None,
119
+ max_folds: int | None = None,
120
+ dataset_format: str = "auto",
121
+ order_col: str | None = None,
122
+ train_time_col: str | None = None,
123
+ validation_time_col: str | None = None,
124
+ test_time_col: str | None = None,
125
+ title: str | None = None,
126
+ preview_rows: int = 20,
127
+ ) -> dict:
128
+ """Run a walk-forward simulation and return a compact summary plus HTML.
129
+
130
+ Args:
131
+ dataset_path: Local path to a CSV, Parquet or ZIP-with-CSV dataset.
132
+ partition: Object accepted by ``TemporalPartitionSpec``.
133
+ step: Step size such as ``"1D"``.
134
+ time_col: Timeline column used to anchor the simulation.
135
+ strategy: Movement strategy: ``"single"``, ``"rolling"`` or ``"expanding"``.
136
+ allow_partial: Whether to keep a final partial fold.
137
+ engine: Internal partition engine preference: ``"auto"``, ``"pandas"``,
138
+ ``"polars"`` or ``"numpy"``.
139
+ start_at: Optional lower timestamp bound.
140
+ end_at: Optional upper timestamp bound.
141
+ max_folds: Optional maximum number of folds.
142
+ dataset_format: Explicit format or ``"auto"``.
143
+ order_col: Optional column used to sort the dataset.
144
+ train_time_col: Optional timestamp column used to assign train rows.
145
+ validation_time_col: Optional timestamp column used to assign validation rows.
146
+ test_time_col: Optional timestamp column used to assign test rows.
147
+ title: Optional report title.
148
+ preview_rows: Number of summary rows returned in the preview.
149
+ """
150
+ return run_walk_forward(
151
+ dataset_path,
152
+ partition=partition,
153
+ step=step,
154
+ time_col=time_col,
155
+ strategy=strategy,
156
+ allow_partial=allow_partial,
157
+ engine=engine,
158
+ start_at=start_at,
159
+ end_at=end_at,
160
+ max_folds=max_folds,
161
+ dataset_format=dataset_format,
162
+ order_col=order_col,
163
+ train_time_col=train_time_col,
164
+ validation_time_col=validation_time_col,
165
+ test_time_col=test_time_col,
166
+ title=title,
167
+ preview_rows=preview_rows,
168
+ )
169
+
170
+ return mcp
171
+
172
+
173
+ def main() -> None:
174
+ """Run Jano as a local stdio MCP server."""
175
+
176
+ build_server().run()
177
+
178
+
179
+ if __name__ == "__main__":
180
+ main()