juniper-recurrence-model 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- juniper_recurrence_model/__init__.py +27 -0
- juniper_recurrence_model/_version.py +7 -0
- juniper_recurrence_model/data.py +103 -0
- juniper_recurrence_model/model.py +261 -0
- juniper_recurrence_model/units/__init__.py +9 -0
- juniper_recurrence_model/units/lmu_varstep.py +237 -0
- juniper_recurrence_model-0.1.0.dist-info/METADATA +133 -0
- juniper_recurrence_model-0.1.0.dist-info/RECORD +10 -0
- juniper_recurrence_model-0.1.0.dist-info/WHEEL +5 -0
- juniper_recurrence_model-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""juniper-recurrence-model — the model-specific core for the juniper-recurrence app.
|
|
2
|
+
|
|
3
|
+
The selected model is **P3-C (LMU + Approach-C)**: a closed-form, variable-Δt Legendre
|
|
4
|
+
Memory Unit discretisation (C1-clean, irregular-Δt-native). This package ships the Δt-native
|
|
5
|
+
memory unit (:class:`VariableStepLMUMemory`), the fixed-order LMU regressor
|
|
6
|
+
(:class:`LMURegressor`) implementing juniper-model-core's ``TrainableModel`` interface, and a
|
|
7
|
+
lean loader (:func:`load_sequence_npz`) for the WS-1 3-D sequence NPZ contract.
|
|
8
|
+
|
|
9
|
+
See the design of record ``notes/JUNIPER_RECURRENCE_MODEL_DETAILED_DESIGN_2026-06-14.md`` and
|
|
10
|
+
the WS-4 build plan ``notes/JUNIPER_RECURRENCE_WS4_MODEL_BUILD_PLAN_2026-06-15.md`` (juniper-ml).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from juniper_recurrence_model._version import __version__
|
|
14
|
+
from juniper_recurrence_model.data import SequenceData, load_sequence_npz, sequence_data_from_arrays
|
|
15
|
+
from juniper_recurrence_model.model import LMURegressor, LMUSerializer
|
|
16
|
+
from juniper_recurrence_model.units import VariableStepLMUMemory, lmu_matrices
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"__version__",
|
|
20
|
+
"LMURegressor",
|
|
21
|
+
"LMUSerializer",
|
|
22
|
+
"SequenceData",
|
|
23
|
+
"load_sequence_npz",
|
|
24
|
+
"sequence_data_from_arrays",
|
|
25
|
+
"VariableStepLMUMemory",
|
|
26
|
+
"lmu_matrices",
|
|
27
|
+
]
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Load a 3-D sequence NPZ artifact (the WS-1 contract) into arrays for the regressor.
|
|
2
|
+
|
|
3
|
+
The authoritative, full-contract validator is juniper-data-client's
|
|
4
|
+
``validate_npz_contract`` — the juniper-recurrence *app* calls it on the data-fetch path.
|
|
5
|
+
This module is the lean, **numpy-only model-side reader**: it pulls the per-split arrays
|
|
6
|
+
:class:`~juniper_recurrence_model.LMURegressor` consumes (``X`` / ``y`` / ``dt`` /
|
|
7
|
+
``target_dt`` / ``seq_lengths``) out of the NPZ key layout (per-split suffixes
|
|
8
|
+
``_train`` / ``_test`` / ``_full``) and applies the minimal ``dt`` rules the model relies on.
|
|
9
|
+
It deliberately takes **no** juniper-data-client dependency, keeping this package numpy-only.
|
|
10
|
+
|
|
11
|
+
The WS-1 3-D contract (juniper-data#168; ``DELTA_T_HANDLING`` §6): ``X_{split}`` is ``(W, L, F)``;
|
|
12
|
+
``dt_{split}`` is ``(W, L)`` with ``dt[:, 0] == 0`` and ``dt >= 0`` (or absolute ``t_{split}``,
|
|
13
|
+
from which ``dt`` is derived); ``y_reg_{split}`` is the regression target (one per window);
|
|
14
|
+
``target_dt_{split}`` (horizon) and ``seq_lengths_{split}`` (valid step count) are optional.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
|
|
24
|
+
__all__ = ["SequenceData", "load_sequence_npz", "sequence_data_from_arrays"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class SequenceData:
|
|
29
|
+
"""One split of a 3-D sequence artifact, ready for :class:`LMURegressor`.
|
|
30
|
+
|
|
31
|
+
``X`` is ``(W, L, F)``; ``y`` is ``(W, output_dim)``; ``dt`` is ``(W, L)`` with
|
|
32
|
+
``dt[:, 0] == 0``. ``target_dt`` ``(W,)`` and ``seq_lengths`` ``(W,)`` are optional.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
X: np.ndarray
|
|
36
|
+
y: np.ndarray
|
|
37
|
+
dt: np.ndarray
|
|
38
|
+
target_dt: np.ndarray | None = None
|
|
39
|
+
seq_lengths: np.ndarray | None = None
|
|
40
|
+
|
|
41
|
+
def fit_kwargs(self) -> dict[str, Any]:
|
|
42
|
+
"""The auxiliary-array keywords for ``LMURegressor.fit`` / ``predict`` (the D3 contract)."""
|
|
43
|
+
kwargs: dict[str, Any] = {"dt": self.dt}
|
|
44
|
+
if self.target_dt is not None:
|
|
45
|
+
kwargs["target_dt"] = self.target_dt
|
|
46
|
+
if self.seq_lengths is not None:
|
|
47
|
+
kwargs["seq_lengths"] = self.seq_lengths
|
|
48
|
+
return kwargs
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def load_sequence_npz(path: Any, split: str = "train") -> SequenceData:
|
|
52
|
+
"""Read one ``split`` (``"train"`` / ``"test"`` / ``"full"``) of a 3-D sequence ``.npz``."""
|
|
53
|
+
with np.load(path, allow_pickle=False) as handle:
|
|
54
|
+
arrays = {key: handle[key] for key in handle.files}
|
|
55
|
+
return sequence_data_from_arrays(arrays, split)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def sequence_data_from_arrays(arrays: dict[str, np.ndarray], split: str = "train") -> SequenceData:
|
|
59
|
+
"""Build a :class:`SequenceData` from an in-memory NPZ array mapping.
|
|
60
|
+
|
|
61
|
+
Reads ``X_{split}`` (required, 3-D), the regression target ``y_reg_{split}`` (preferred;
|
|
62
|
+
falls back to ``y_{split}``), and the timing channel ``dt_{split}`` (or derives it from
|
|
63
|
+
``t_{split}``). ``target_dt_{split}`` / ``seq_lengths_{split}`` are read when present.
|
|
64
|
+
Applies the minimal model-side ``dt`` checks (a strict subset of ``validate_npz_contract``).
|
|
65
|
+
"""
|
|
66
|
+
if f"X_{split}" not in arrays:
|
|
67
|
+
raise ValueError(f"NPZ artifact is missing required key 'X_{split}'")
|
|
68
|
+
X = np.asarray(arrays[f"X_{split}"])
|
|
69
|
+
if X.ndim != 3:
|
|
70
|
+
raise ValueError(f"X_{split} must be 3-D (W, L, F) for a sequence artifact; got {X.ndim}-D")
|
|
71
|
+
n_windows, lookback = int(X.shape[0]), int(X.shape[1])
|
|
72
|
+
|
|
73
|
+
# Regression target: prefer y_reg, fall back to y.
|
|
74
|
+
if f"y_reg_{split}" in arrays:
|
|
75
|
+
y = np.asarray(arrays[f"y_reg_{split}"])
|
|
76
|
+
elif f"y_{split}" in arrays:
|
|
77
|
+
y = np.asarray(arrays[f"y_{split}"])
|
|
78
|
+
else:
|
|
79
|
+
raise ValueError(f"missing regression target: neither 'y_reg_{split}' nor 'y_{split}' present")
|
|
80
|
+
if y.ndim == 1:
|
|
81
|
+
y = y[:, None]
|
|
82
|
+
|
|
83
|
+
# Timing: dt directly, or derived from absolute t (matches the contract's t/dt consistency).
|
|
84
|
+
dt_key, t_key = f"dt_{split}", f"t_{split}"
|
|
85
|
+
if dt_key in arrays:
|
|
86
|
+
dt = np.asarray(arrays[dt_key], dtype=float)
|
|
87
|
+
elif t_key in arrays:
|
|
88
|
+
t = np.asarray(arrays[t_key], dtype=float)
|
|
89
|
+
dt = np.zeros_like(t)
|
|
90
|
+
dt[:, 1:] = np.diff(t, axis=1)
|
|
91
|
+
else:
|
|
92
|
+
raise ValueError(f"a 3-D artifact needs at least one of 'dt_{split}' / 't_{split}'")
|
|
93
|
+
if dt.shape != (n_windows, lookback):
|
|
94
|
+
raise ValueError(f"{dt_key} shape {dt.shape} != {(n_windows, lookback)}")
|
|
95
|
+
if np.any(dt < 0):
|
|
96
|
+
raise ValueError(f"{dt_key} has negative gaps")
|
|
97
|
+
if n_windows and np.any(dt[:, 0] != 0):
|
|
98
|
+
raise ValueError(f"{dt_key}[:, 0] must be 0 by convention")
|
|
99
|
+
|
|
100
|
+
target_dt = np.asarray(arrays[f"target_dt_{split}"]).reshape(n_windows) if f"target_dt_{split}" in arrays else None
|
|
101
|
+
seq_lengths = np.asarray(arrays[f"seq_lengths_{split}"]).reshape(n_windows) if f"seq_lengths_{split}" in arrays else None
|
|
102
|
+
|
|
103
|
+
return SequenceData(X=X, y=y, dt=dt, target_dt=target_dt, seq_lengths=seq_lengths)
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""Fixed-order Δt-native LMU regressor — ``juniper-model-core`` ``TrainableModel``.
|
|
2
|
+
|
|
3
|
+
This is the WS-4 model layer: a standalone, fixed-order, irregular-Δt-native
|
|
4
|
+
Legendre Memory Unit **regressor** that satisfies ``juniper_model_core.TrainableModel``.
|
|
5
|
+
It wraps the fixed :class:`~juniper_recurrence_model.units.VariableStepLMUMemory` cell
|
|
6
|
+
with the only trained surface — a closed-form least-squares **readout**.
|
|
7
|
+
|
|
8
|
+
Design (ratified decisions D-WS4-1…3, plan
|
|
9
|
+
``notes/JUNIPER_RECURRENCE_WS4_MODEL_BUILD_PLAN_2026-06-15.md`` in juniper-ml):
|
|
10
|
+
|
|
11
|
+
* **D-WS4-1 — per-feature identity read-in.** Each of the ``F`` input features drives its
|
|
12
|
+
own order-``d`` memory through the *same* fixed ``A``/``B``/θ (no trained projection); the
|
|
13
|
+
per-window memory state is the concatenation ``M ∈ ℝ^{F·d}``. Only the readout is trained.
|
|
14
|
+
* **D-WS4-2 — ``target_dt`` as a readout feature.** When supplied, the irregular forecast
|
|
15
|
+
horizon is concatenated to the readout design matrix.
|
|
16
|
+
* **D-WS4-3 — standalone.** No cascor cascade head; this regressor has its own readout.
|
|
17
|
+
|
|
18
|
+
Because the memory matrices are fixed (never differentiated) and the readout is linear, the
|
|
19
|
+
whole model is a closed-form ``lstsq`` solve over an LMU-memory feature map — **numpy-only,
|
|
20
|
+
no autodiff framework**. This is the structural twin of ``juniper_model_core``'s
|
|
21
|
+
``ReferenceLinearModel`` with its ``_flatten(X)`` feature map replaced by a dt-aware
|
|
22
|
+
LMU-memory rollout. (A trained projection read-in / nonlinear readout — the point at which
|
|
23
|
+
torch would enter — is a deferred increment.)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import os
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
import numpy as np
|
|
33
|
+
from juniper_model_core import ModelSerializer, TaskType, Topology, TrainableModel, TrainingEvent, TrainResult
|
|
34
|
+
|
|
35
|
+
from juniper_recurrence_model.units.lmu_varstep import VariableStepLMUMemory
|
|
36
|
+
|
|
37
|
+
__all__ = ["LMURegressor", "LMUSerializer"]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _regression_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict[str, float]:
|
|
41
|
+
"""Canonical regression metrics (``REGRESSION_METRIC_KEYS``); never ``accuracy`` (RK-6)."""
|
|
42
|
+
err = y_pred - y_true
|
|
43
|
+
mse = float(np.mean(err**2))
|
|
44
|
+
mae = float(np.mean(np.abs(err)))
|
|
45
|
+
ss_res = float(np.sum(err**2))
|
|
46
|
+
ss_tot = float(np.sum((y_true - y_true.mean(axis=0)) ** 2))
|
|
47
|
+
r2 = 1.0 - ss_res / ss_tot if ss_tot > 0 else 0.0
|
|
48
|
+
return {"mse": mse, "rmse": mse**0.5, "mae": mae, "r2": r2, "loss": mse}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class LMURegressor(TrainableModel):
|
|
52
|
+
"""Fixed-order Δt-native LMU regressor (per-feature identity read-in + linear readout).
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
d:
|
|
57
|
+
LMU memory order (Legendre coefficients per feature). Practical range ~4..64.
|
|
58
|
+
theta:
|
|
59
|
+
Memory window length, in the same real-time units as ``dt`` (e.g. calendar days).
|
|
60
|
+
``None`` (default) resolves it data-drivenly at ``fit``: the median per-window total
|
|
61
|
+
elapsed time ``median(sum(dt, axis=1))``, falling back to the window length ``T`` when
|
|
62
|
+
``dt`` is absent or non-positive.
|
|
63
|
+
ridge:
|
|
64
|
+
L2 penalty on the readout (the bias column is never penalised). ``0.0`` (default)
|
|
65
|
+
uses a plain min-norm least-squares solve — which lets the readout memorise a tiny
|
|
66
|
+
set exactly (the overfit-tiny guarantee) and mirrors the reference model.
|
|
67
|
+
time_unit:
|
|
68
|
+
Declared real-time unit of ``dt`` / ``theta`` (carried in the topology meta).
|
|
69
|
+
random_seed:
|
|
70
|
+
Stored for the contract; the closed-form fit is deterministic regardless.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, d: int = 16, theta: float | None = None, *, ridge: float = 0.0, time_unit: str = "steps", random_seed: int | None = 0) -> None:
|
|
74
|
+
self.task_type: TaskType = "regression"
|
|
75
|
+
self.random_seed = random_seed
|
|
76
|
+
self.d = int(d)
|
|
77
|
+
self.theta: float | None = None if theta is None else float(theta)
|
|
78
|
+
self.ridge = float(ridge)
|
|
79
|
+
self.time_unit = str(time_unit)
|
|
80
|
+
# When theta is data-driven (None) the fixed memory is built in fit(); see fit().
|
|
81
|
+
self._memory = None if self.theta is None else VariableStepLMUMemory(self.d, self.theta)
|
|
82
|
+
self._coef: np.ndarray | None = None
|
|
83
|
+
self._in_shape: tuple[int, ...] = ()
|
|
84
|
+
self._out_shape: tuple[int, ...] = ()
|
|
85
|
+
self._n_features: int | None = None
|
|
86
|
+
self._uses_target_dt: bool = False
|
|
87
|
+
self._metrics: dict[str, float] = {}
|
|
88
|
+
|
|
89
|
+
# ----- feature map (shared by fit and predict) -----------------------------------
|
|
90
|
+
@staticmethod
|
|
91
|
+
def _readout_index(n: int, n_steps: int, readout_mask: np.ndarray | None, seq_lengths: np.ndarray | None) -> np.ndarray:
|
|
92
|
+
"""Per-sample index of the readout step (the last valid step, many-to-one)."""
|
|
93
|
+
if seq_lengths is not None:
|
|
94
|
+
return np.clip(np.asarray(seq_lengths, dtype=int) - 1, 0, n_steps - 1)
|
|
95
|
+
if readout_mask is not None:
|
|
96
|
+
mask = np.asarray(readout_mask, dtype=bool)
|
|
97
|
+
reversed_mask = mask[:, ::-1]
|
|
98
|
+
has_true = reversed_mask.any(axis=1)
|
|
99
|
+
last_true = n_steps - 1 - np.argmax(reversed_mask, axis=1)
|
|
100
|
+
return np.where(has_true, last_true, n_steps - 1)
|
|
101
|
+
return np.full(n, n_steps - 1, dtype=int)
|
|
102
|
+
|
|
103
|
+
def _features(self, X: np.ndarray, dt: np.ndarray | None, target_dt: np.ndarray | None, readout_mask: np.ndarray | None, seq_lengths: np.ndarray | None) -> np.ndarray:
|
|
104
|
+
X = np.asarray(X, dtype=float)
|
|
105
|
+
if X.ndim != 3:
|
|
106
|
+
raise ValueError(f"X must be 3-D (n, T, F); got shape {X.shape}")
|
|
107
|
+
n, n_steps, n_features = X.shape
|
|
108
|
+
if self._n_features is not None and n_features != self._n_features:
|
|
109
|
+
raise ValueError(f"expected F={self._n_features} features, got {n_features}")
|
|
110
|
+
if dt is None:
|
|
111
|
+
dt = np.zeros((n, n_steps))
|
|
112
|
+
dt[:, 1:] = 1.0 # uniform unit-spacing fallback (bare predict(X) — no timing supplied)
|
|
113
|
+
trajectory = self._memory.rollout_batch(X, dt) # (n, T, F, d)
|
|
114
|
+
idx = self._readout_index(n, n_steps, readout_mask, seq_lengths)
|
|
115
|
+
memory_state = trajectory[np.arange(n), idx].reshape(n, n_features * self.d) # (n, F*d)
|
|
116
|
+
columns = [memory_state]
|
|
117
|
+
if self._uses_target_dt:
|
|
118
|
+
horizon = np.zeros(n) if target_dt is None else np.asarray(target_dt, dtype=float).reshape(n)
|
|
119
|
+
columns.append(horizon[:, None])
|
|
120
|
+
columns.append(np.ones((n, 1))) # bias
|
|
121
|
+
return np.concatenate(columns, axis=1)
|
|
122
|
+
|
|
123
|
+
# ----- TrainableModel contract ---------------------------------------------------
|
|
124
|
+
def fit(self, X: np.ndarray, y: np.ndarray, *, X_val: np.ndarray | None = None, y_val: np.ndarray | None = None, on_event: Any = None, **kw: Any) -> TrainResult:
|
|
125
|
+
X = np.asarray(X, dtype=float)
|
|
126
|
+
y = np.asarray(y, dtype=float)
|
|
127
|
+
if X.ndim != 3:
|
|
128
|
+
raise ValueError(f"X must be 3-D (n, T, F); got shape {X.shape}")
|
|
129
|
+
if y.ndim == 3:
|
|
130
|
+
raise NotImplementedError("dense many-to-many readout is a deferred WS-4 increment; supply one target per window (y of shape (n,) or (n, output_dim))")
|
|
131
|
+
if y.ndim == 1:
|
|
132
|
+
y = y[:, None]
|
|
133
|
+
n, n_steps, n_features = X.shape
|
|
134
|
+
self._in_shape = (n_steps, n_features)
|
|
135
|
+
self._n_features = n_features
|
|
136
|
+
self._out_shape = (int(y.shape[1]),)
|
|
137
|
+
self._uses_target_dt = kw.get("target_dt") is not None
|
|
138
|
+
# Resolve a data-driven theta (median per-window elapsed time) when not pinned,
|
|
139
|
+
# then build the fixed LMU memory. A pinned theta is used as-is.
|
|
140
|
+
if self.theta is None:
|
|
141
|
+
window_dt = kw.get("dt")
|
|
142
|
+
theta = float(np.median(np.sum(np.asarray(window_dt, dtype=float), axis=1))) if window_dt is not None else float(n_steps)
|
|
143
|
+
self.theta = theta if theta > 0 else float(n_steps)
|
|
144
|
+
if self._memory is None:
|
|
145
|
+
self._memory = VariableStepLMUMemory(self.d, self.theta)
|
|
146
|
+
|
|
147
|
+
seq = 0
|
|
148
|
+
if on_event is not None:
|
|
149
|
+
on_event(TrainingEvent("training_start", {"n_samples": int(n)}, seq))
|
|
150
|
+
seq += 1
|
|
151
|
+
|
|
152
|
+
design = self._features(X, kw.get("dt"), kw.get("target_dt"), kw.get("readout_mask"), kw.get("seq_lengths"))
|
|
153
|
+
if self.ridge > 0.0:
|
|
154
|
+
gram = design.T @ design
|
|
155
|
+
penalty = self.ridge * np.eye(gram.shape[0])
|
|
156
|
+
penalty[-1, -1] = 0.0 # never regularise the bias column
|
|
157
|
+
coef = np.linalg.solve(gram + penalty, design.T @ y)
|
|
158
|
+
else:
|
|
159
|
+
coef, *_ = np.linalg.lstsq(design, y, rcond=None)
|
|
160
|
+
self._coef = coef
|
|
161
|
+
self._metrics = _regression_metrics(y, design @ coef)
|
|
162
|
+
|
|
163
|
+
if on_event is not None:
|
|
164
|
+
on_event(TrainingEvent("epoch_end", {"epoch": 0, "metrics": dict(self._metrics)}, seq))
|
|
165
|
+
seq += 1
|
|
166
|
+
on_event(TrainingEvent("training_end", {"metrics": dict(self._metrics)}, seq))
|
|
167
|
+
return TrainResult(final_metrics=dict(self._metrics), n_epochs=1, history=[dict(self._metrics)], stopped_reason="converged")
|
|
168
|
+
|
|
169
|
+
def predict(self, X: np.ndarray, *, dt: np.ndarray | None = None, target_dt: np.ndarray | None = None, readout_mask: np.ndarray | None = None, seq_lengths: np.ndarray | None = None) -> np.ndarray:
|
|
170
|
+
"""Continuous predictions for ``X``.
|
|
171
|
+
|
|
172
|
+
The signature widens the ``TrainableModel.predict(X)`` contract with *optional*
|
|
173
|
+
sequence keywords (the ABC checks the method name, not the signature). When ``dt``
|
|
174
|
+
is omitted — as the conformance kit calls it — a uniform unit grid is assumed; real
|
|
175
|
+
callers pass ``dt`` (and ``target_dt`` when the model was fit with one) to engage the
|
|
176
|
+
Δt path. Never returns an ``argmax`` (RK-6 — collapsing to labels is classification-only).
|
|
177
|
+
"""
|
|
178
|
+
if self._coef is None:
|
|
179
|
+
raise RuntimeError("model is not fitted")
|
|
180
|
+
X = np.asarray(X, dtype=float)
|
|
181
|
+
design = self._features(X, dt, target_dt, readout_mask, seq_lengths)
|
|
182
|
+
return (design @ self._coef).reshape((X.shape[0], *self._out_shape))
|
|
183
|
+
|
|
184
|
+
def metrics(self) -> dict[str, float]:
|
|
185
|
+
return dict(self._metrics)
|
|
186
|
+
|
|
187
|
+
def describe_topology(self) -> Topology:
|
|
188
|
+
return {
|
|
189
|
+
"model_type": "lmu",
|
|
190
|
+
"nodes": [
|
|
191
|
+
{"id": "input", "kind": "input", "frozen": True},
|
|
192
|
+
{"id": "memory", "kind": "memory", "frozen": True},
|
|
193
|
+
{"id": "output", "kind": "output", "frozen": False},
|
|
194
|
+
],
|
|
195
|
+
"edges": [
|
|
196
|
+
{"src": "input", "dst": "memory", "recurrent": False},
|
|
197
|
+
{"src": "memory", "dst": "memory", "recurrent": True},
|
|
198
|
+
{"src": "memory", "dst": "output", "recurrent": False},
|
|
199
|
+
],
|
|
200
|
+
"meta": {
|
|
201
|
+
"n_units": 0,
|
|
202
|
+
"task_type": self.task_type,
|
|
203
|
+
"theta": self.theta,
|
|
204
|
+
"d": self.d,
|
|
205
|
+
"time_unit": self.time_unit,
|
|
206
|
+
"n_features": self._n_features,
|
|
207
|
+
},
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def input_shape(self) -> tuple[int, ...]:
|
|
212
|
+
return self._in_shape
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def output_shape(self) -> tuple[int, ...]:
|
|
216
|
+
return self._out_shape
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class LMUSerializer(ModelSerializer):
|
|
220
|
+
"""Lossless ``.npz`` + JSON serializer for :class:`LMURegressor`.
|
|
221
|
+
|
|
222
|
+
Persists the trained readout coefficients plus the hyperparameters; the fixed memory
|
|
223
|
+
eigendecomposition is recomputed from ``d``/θ on load (deterministic), so reloaded
|
|
224
|
+
predictions are bit-identical (the conformance kit's lossless-round-trip assertion).
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
def save(self, model: TrainableModel, path: str | os.PathLike[str]) -> None:
|
|
228
|
+
if not isinstance(model, LMURegressor):
|
|
229
|
+
raise TypeError("LMUSerializer only serializes LMURegressor")
|
|
230
|
+
if model._coef is None:
|
|
231
|
+
raise RuntimeError("cannot serialize an unfitted model")
|
|
232
|
+
meta = {
|
|
233
|
+
"d": model.d,
|
|
234
|
+
"theta": model.theta,
|
|
235
|
+
"ridge": model.ridge,
|
|
236
|
+
"time_unit": model.time_unit,
|
|
237
|
+
"random_seed": model.random_seed,
|
|
238
|
+
"task_type": model.task_type,
|
|
239
|
+
"in_shape": list(model._in_shape),
|
|
240
|
+
"out_shape": list(model._out_shape),
|
|
241
|
+
"n_features": model._n_features,
|
|
242
|
+
"uses_target_dt": model._uses_target_dt,
|
|
243
|
+
"metrics": model._metrics,
|
|
244
|
+
}
|
|
245
|
+
np.savez(os.fspath(path), coef=model._coef, meta=json.dumps(meta))
|
|
246
|
+
|
|
247
|
+
def load(self, path: str | os.PathLike[str]) -> LMURegressor:
|
|
248
|
+
resolved = os.fspath(path)
|
|
249
|
+
if not resolved.endswith(".npz"):
|
|
250
|
+
resolved = resolved + ".npz"
|
|
251
|
+
with np.load(resolved, allow_pickle=False) as data:
|
|
252
|
+
coef = data["coef"]
|
|
253
|
+
meta = json.loads(str(data["meta"]))
|
|
254
|
+
model = LMURegressor(d=meta["d"], theta=meta["theta"], ridge=meta["ridge"], time_unit=meta["time_unit"], random_seed=meta["random_seed"])
|
|
255
|
+
model._coef = coef
|
|
256
|
+
model._in_shape = tuple(meta["in_shape"])
|
|
257
|
+
model._out_shape = tuple(meta["out_shape"])
|
|
258
|
+
model._n_features = meta["n_features"]
|
|
259
|
+
model._uses_target_dt = bool(meta["uses_target_dt"])
|
|
260
|
+
model._metrics = {key: float(value) for key, value in meta["metrics"].items()}
|
|
261
|
+
return model
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Recurrent / continuous-time memory units for juniper-recurrence.
|
|
2
|
+
|
|
3
|
+
Currently exposes the Δt-native Legendre Memory Unit (Approach-C). Additional unit
|
|
4
|
+
kinds (e.g. a self-recurrent RCC candidate, P1) may be added as the framework grows.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from juniper_recurrence_model.units.lmu_varstep import VariableStepLMUMemory, lmu_matrices
|
|
8
|
+
|
|
9
|
+
__all__ = ["VariableStepLMUMemory", "lmu_matrices"]
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Δt-native Legendre Memory Unit (Approach-C).
|
|
2
|
+
|
|
3
|
+
A Legendre Memory Unit (LMU; Voelker, Kajić & Eliasmith 2019, NeurIPS) maintains a
|
|
4
|
+
continuous-time representation of an input's recent history by orthogonalising it onto
|
|
5
|
+
the Legendre polynomial basis over a sliding window of length ``theta``. Its linear
|
|
6
|
+
memory state obeys
|
|
7
|
+
|
|
8
|
+
theta * m'(t) = A @ m(t) + B * u(t)
|
|
9
|
+
|
|
10
|
+
with **fixed, closed-form** matrices ``A`` (``d x d``) and ``B`` (``d x 1``) — the
|
|
11
|
+
HiPPO-LegT operator (Gu et al. 2020). Because the system is *linear*, its exact
|
|
12
|
+
discretisation is a matrix exponential — no numerical ODE solver is required.
|
|
13
|
+
|
|
14
|
+
**Approach C — the irregular-Δt win.** Standard LMU implementations bake the discrete
|
|
15
|
+
``Abar``/``Bbar`` as constants for one fixed step. The only change needed for
|
|
16
|
+
irregularly-sampled data is to evaluate them at the *actual* per-step gap ``dt_k`` — i.e.
|
|
17
|
+
the dataset's ``dt`` channel *is* the discretisation step (the same role the per-step
|
|
18
|
+
``Delta`` parameter plays in S4/Mamba). This is done in closed form via a one-time
|
|
19
|
+
eigendecomposition of the fixed ``A``, so each step costs only ``d`` scalar exponentials.
|
|
20
|
+
|
|
21
|
+
**C1-clean (first-principles).** No ODE solver, no autodiff-through-solver — only scalar
|
|
22
|
+
exponentials of the eigenvalues of a FIXED, closed-form matrix. ``A`` and ``B`` are not
|
|
23
|
+
learned and not data-dependent; only the read-in (features -> drive ``u``) and the readout
|
|
24
|
+
(memory -> output) are trained, and they live outside this module.
|
|
25
|
+
|
|
26
|
+
Verified reference: the numerics here match ``util/ad-hoc/verify_delta_t_reference_code.py``
|
|
27
|
+
in juniper-ml (numpy 2.4.4 / Python 3.13): ``A`` (d=16) max eigenvalue real part = -6.49
|
|
28
|
+
(stable); delayed-sinusoid reconstruction ``e_reg`` ≈ 0.035 and grid-invariant
|
|
29
|
+
``e_irr`` ≈ 0.039–0.043 (≈1.15×). See
|
|
30
|
+
``notes/JUNIPER_RECURSE_DELTA_T_HANDLING_2026-06-05.md`` §8 and
|
|
31
|
+
``notes/JUNIPER_RECURRENCE_MODEL_DETAILED_DESIGN_2026-06-14.md`` Part 3.
|
|
32
|
+
|
|
33
|
+
References
|
|
34
|
+
---------
|
|
35
|
+
- Voelker, Kajić & Eliasmith (2019). Legendre Memory Units. NeurIPS.
|
|
36
|
+
- Gu, Dao, Ermon, Rudra & Ré (2020). HiPPO. NeurIPS; arXiv:2008.07669.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from __future__ import annotations
|
|
40
|
+
|
|
41
|
+
import numpy as np
|
|
42
|
+
from numpy.polynomial.legendre import Legendre
|
|
43
|
+
|
|
44
|
+
__all__ = ["lmu_matrices", "VariableStepLMUMemory"]
|
|
45
|
+
|
|
46
|
+
# Below this magnitude an eigenvalue is treated as zero for the removable
|
|
47
|
+
# singularity in (exp(z) - 1) / lambda. LegT's A has no zero eigenvalue, but the
|
|
48
|
+
# guard keeps the code correct for hygiene / other bases.
|
|
49
|
+
_LAMBDA_ZERO_TOL = 1e-12
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def lmu_matrices(d: int) -> tuple[np.ndarray, np.ndarray]:
|
|
53
|
+
"""Return the fixed, closed-form Legendre (HiPPO-LegT) state matrices.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
d:
|
|
58
|
+
Memory order (number of Legendre coefficients). Practical range ~4..64;
|
|
59
|
+
the eigenvector matrix of ``A`` becomes ill-conditioned for large ``d``.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
(A, B):
|
|
64
|
+
``A`` of shape ``(d, d)`` and ``B`` of shape ``(d, 1)``. These depend only
|
|
65
|
+
on ``d`` — they are not learned and not data-dependent.
|
|
66
|
+
"""
|
|
67
|
+
if d < 1:
|
|
68
|
+
raise ValueError(f"order d must be >= 1, got {d}")
|
|
69
|
+
A = np.zeros((d, d))
|
|
70
|
+
B = np.zeros((d, 1))
|
|
71
|
+
for i in range(d):
|
|
72
|
+
B[i, 0] = (2 * i + 1) * ((-1) ** i)
|
|
73
|
+
for j in range(d):
|
|
74
|
+
A[i, j] = (2 * i + 1) * (-1.0 if i < j else (-1.0) ** (i - j + 1))
|
|
75
|
+
return A, B
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class VariableStepLMUMemory:
|
|
79
|
+
"""Irregular-Δt-native LMU memory (Approach-C).
|
|
80
|
+
|
|
81
|
+
The linear LMU memory, exactly discretised at an arbitrary per-step real gap
|
|
82
|
+
``dt`` via the zero-order-hold update
|
|
83
|
+
|
|
84
|
+
m_{k+1} = Abar(dt) @ m_k + Bbar(dt) * u_k
|
|
85
|
+
|
|
86
|
+
where ``Abar``/``Bbar`` are computed from a one-time eigendecomposition of the
|
|
87
|
+
fixed matrix ``A``::
|
|
88
|
+
|
|
89
|
+
z_i = lambda_i * dt / theta
|
|
90
|
+
Abar(dt) = V @ diag(exp(z_i)) @ Vinv
|
|
91
|
+
Bbar(dt) = V @ diag(expm1(z_i) / lam_i) @ (Vinv @ B)
|
|
92
|
+
|
|
93
|
+
``expm1`` (not ``exp(z) - 1``) avoids catastrophic cancellation at small ``z``.
|
|
94
|
+
Stability is automatic for ``dt > 0`` because every eigenvalue has negative real
|
|
95
|
+
part, so ``|exp(z_i)| < 1`` and ``Abar`` is a contraction.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
d:
|
|
100
|
+
Memory order (see :func:`lmu_matrices`).
|
|
101
|
+
theta:
|
|
102
|
+
Memory window length, in the *same real-time units* as ``dt`` (e.g. calendar
|
|
103
|
+
days for the equities use-case).
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(self, d: int, theta: float) -> None:
|
|
107
|
+
if theta <= 0:
|
|
108
|
+
raise ValueError(f"theta must be > 0, got {theta}")
|
|
109
|
+
self.d = int(d)
|
|
110
|
+
self.theta = float(theta)
|
|
111
|
+
A, B = lmu_matrices(self.d)
|
|
112
|
+
lam, V = np.linalg.eig(A)
|
|
113
|
+
# Precomputed once; depend only on (d, theta).
|
|
114
|
+
self.lam = lam
|
|
115
|
+
self.V = V
|
|
116
|
+
self.Vinv = np.linalg.inv(V)
|
|
117
|
+
self.VinvB = self.Vinv @ B
|
|
118
|
+
|
|
119
|
+
def step_matrices(self, dt: float) -> tuple[np.ndarray, np.ndarray]:
|
|
120
|
+
"""Return ``(Abar, Bbar)`` for a single real gap ``dt`` (both real-valued)."""
|
|
121
|
+
z = self.lam * (dt / self.theta)
|
|
122
|
+
Abar = (self.V * np.exp(z)) @ self.Vinv
|
|
123
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
124
|
+
fac = np.expm1(z) / self.lam
|
|
125
|
+
fac = np.where(np.abs(self.lam) < _LAMBDA_ZERO_TOL, dt / self.theta, fac)
|
|
126
|
+
Bbar = (self.V * fac) @ self.VinvB
|
|
127
|
+
return Abar.real, Bbar.real
|
|
128
|
+
|
|
129
|
+
def rollout(self, u: np.ndarray, dt: np.ndarray) -> np.ndarray:
|
|
130
|
+
"""Roll the memory over a 1-D input ``u`` with per-step gaps ``dt``.
|
|
131
|
+
|
|
132
|
+
Zero-order-hold convention: ``u[k-1]`` is held constant across the interval
|
|
133
|
+
``(t[k-1], t[k]]`` of length ``dt[k]``. ``dt[0]`` is unused (empty window);
|
|
134
|
+
the returned ``out[0]`` is the zero initial state.
|
|
135
|
+
|
|
136
|
+
Parameters
|
|
137
|
+
----------
|
|
138
|
+
u:
|
|
139
|
+
Scalar drive per step, shape ``(n,)``.
|
|
140
|
+
dt:
|
|
141
|
+
Per-step elapsed real time, shape ``(n,)``; ``dt[k] > 0`` for ``k >= 1``.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
np.ndarray
|
|
146
|
+
Memory trajectory of shape ``(n, d)``.
|
|
147
|
+
"""
|
|
148
|
+
u = np.asarray(u, dtype=float)
|
|
149
|
+
dt = np.asarray(dt, dtype=float)
|
|
150
|
+
if u.ndim != 1 or dt.ndim != 1 or u.shape[0] != dt.shape[0]:
|
|
151
|
+
raise ValueError(f"u and dt must be 1-D and equal length; got {u.shape}, {dt.shape}")
|
|
152
|
+
n = u.shape[0]
|
|
153
|
+
m = np.zeros((self.d, 1))
|
|
154
|
+
out = np.zeros((n, self.d))
|
|
155
|
+
for k in range(1, n):
|
|
156
|
+
if dt[k] <= 0:
|
|
157
|
+
raise ValueError(f"dt[{k}]={dt[k]} must be > 0 for k >= 1")
|
|
158
|
+
Abar, Bbar = self.step_matrices(float(dt[k]))
|
|
159
|
+
m = Abar @ m + Bbar * u[k - 1]
|
|
160
|
+
out[k] = m[:, 0]
|
|
161
|
+
return out
|
|
162
|
+
|
|
163
|
+
def rollout_batch(self, u: np.ndarray, dt: np.ndarray) -> np.ndarray:
|
|
164
|
+
"""Batched, multi-channel ZOH rollout, integrated in the eigenbasis.
|
|
165
|
+
|
|
166
|
+
Rolls ``F`` independent input channels through this *same* fixed LMU memory
|
|
167
|
+
operator, for a batch of ``n`` sequences, with per-(sequence, step) real gaps
|
|
168
|
+
``dt``. Channel ``f`` of sequence ``i`` evolves exactly as :meth:`rollout`
|
|
169
|
+
would for the 1-D drive ``u[i, :, f]`` — this is the per-feature identity
|
|
170
|
+
read-in of the recurrence regressor (each feature drives its own memory).
|
|
171
|
+
|
|
172
|
+
The recurrence is integrated in the eigenbasis of the fixed matrix ``A`` so a
|
|
173
|
+
step is an elementwise scaling by ``exp(z)`` rather than a per-sequence ``d×d``
|
|
174
|
+
matmul. The memory matrices are never differentiated (C1-clean); this returns
|
|
175
|
+
plain arrays with no autograd graph.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
u:
|
|
180
|
+
Per-step channel drives, shape ``(n, T, F)`` (``(n, T)`` is accepted and
|
|
181
|
+
treated as a single channel, ``F == 1``).
|
|
182
|
+
dt:
|
|
183
|
+
Per-step elapsed real time, shape ``(n, T)``. ``dt[:, 0]`` is unused (empty
|
|
184
|
+
initial window). Gaps must be ``>= 0``; ``dt == 0`` is a *no-op* step (the
|
|
185
|
+
memory is held and the step's drive is ignored), so padded tails past
|
|
186
|
+
``seq_lengths`` pass through harmlessly. Negative gaps are a contract
|
|
187
|
+
violation.
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
np.ndarray
|
|
192
|
+
Real memory trajectory of shape ``(n, T, F, d)``; ``out[:, 0]`` is the zero
|
|
193
|
+
initial state.
|
|
194
|
+
|
|
195
|
+
Notes
|
|
196
|
+
-----
|
|
197
|
+
Returns the full trajectory (needed for parity testing and a future dense
|
|
198
|
+
many-to-many readout); a many-to-one consumer keeps only the readout step.
|
|
199
|
+
A per-``dt``-bucket cache of ``exp(z)`` / ``expm1(z)/λ`` is a future
|
|
200
|
+
optimisation when ``dt`` is quantised (e.g. integer calendar-day gaps).
|
|
201
|
+
"""
|
|
202
|
+
u = np.asarray(u, dtype=float)
|
|
203
|
+
if u.ndim == 2:
|
|
204
|
+
u = u[:, :, None]
|
|
205
|
+
if u.ndim != 3:
|
|
206
|
+
raise ValueError(f"u must be (n, T, F) or (n, T); got shape {u.shape}")
|
|
207
|
+
dt = np.asarray(dt, dtype=float)
|
|
208
|
+
n, n_steps, n_channels = u.shape
|
|
209
|
+
if dt.shape != (n, n_steps):
|
|
210
|
+
raise ValueError(f"dt must have shape {(n, n_steps)} to match u; got {dt.shape}")
|
|
211
|
+
if np.any(dt < 0):
|
|
212
|
+
raise ValueError("dt must be >= 0 everywhere (dt == 0 is a held/no-op step)")
|
|
213
|
+
|
|
214
|
+
lam = self.lam[None, :] # (1, d)
|
|
215
|
+
vinv_b = self.VinvB[:, 0] # (d,) — B projected into the eigenbasis
|
|
216
|
+
# eigen-coordinate state, complex: p[i, :, f] are the eigen-coefficients of memory f.
|
|
217
|
+
p = np.zeros((n, self.d, n_channels), dtype=np.complex128)
|
|
218
|
+
out = np.zeros((n, n_steps, n_channels, self.d), dtype=float)
|
|
219
|
+
for k in range(1, n_steps):
|
|
220
|
+
z = lam * (dt[:, k][:, None] / self.theta) # (n, d)
|
|
221
|
+
ez = np.exp(z)
|
|
222
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
223
|
+
fac = np.expm1(z) / lam
|
|
224
|
+
fac = np.where(np.abs(self.lam)[None, :] < _LAMBDA_ZERO_TOL, dt[:, k][:, None] / self.theta, fac)
|
|
225
|
+
gain = fac * vinv_b[None, :] # (n, d) per-eigenmode input gain
|
|
226
|
+
p = ez[:, :, None] * p + gain[:, :, None] * u[:, k - 1, :][:, None, :]
|
|
227
|
+
out[:, k] = np.einsum("ij,njf->nif", self.V, p).real.transpose(0, 2, 1)
|
|
228
|
+
return out
|
|
229
|
+
|
|
230
|
+
def decode_weights(self, rho: float) -> np.ndarray:
|
|
231
|
+
"""Readout weights to reconstruct the input at delay ``rho * theta`` into the past.
|
|
232
|
+
|
|
233
|
+
Uses shifted-Legendre evaluation: ``w[i] = P_i(2*rho - 1)`` for ``i in 0..d-1``,
|
|
234
|
+
with ``rho in [0, 1]`` (0 = now, 1 = the full window ago).
|
|
235
|
+
"""
|
|
236
|
+
x = 2.0 * float(rho) - 1.0
|
|
237
|
+
return np.array([Legendre.basis(i)(x) for i in range(self.d)])
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: juniper-recurrence-model
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Δt-native Legendre Memory Unit (Approach-C) and recurrent model core for the juniper-recurrence application
|
|
5
|
+
Author: Paul Calnon
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/pcalnon/juniper-recurrence
|
|
8
|
+
Project-URL: Repository, https://github.com/pcalnon/juniper-recurrence
|
|
9
|
+
Project-URL: Issues, https://github.com/pcalnon/juniper-recurrence/issues
|
|
10
|
+
Keywords: juniper,recurrence,lmu,legendre,state-space,time-series,irregular-dt
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: numpy>=1.24
|
|
23
|
+
Requires-Dist: juniper-model-core<0.2.0,>=0.1.0
|
|
24
|
+
Provides-Extra: test
|
|
25
|
+
Requires-Dist: pytest>=8.0; extra == "test"
|
|
26
|
+
Requires-Dist: pytest-cov>=5.0; extra == "test"
|
|
27
|
+
|
|
28
|
+
# juniper-recurrence-model
|
|
29
|
+
|
|
30
|
+
The model-specific core for the [juniper-recurrence](https://github.com/pcalnon/juniper-recurrence)
|
|
31
|
+
application — the selected model **P3-C (LMU + Approach-C)**.
|
|
32
|
+
|
|
33
|
+
This package ships the **Δt-native Legendre Memory Unit (Approach-C)** — a closed-form,
|
|
34
|
+
variable-step LMU discretisation that is the only first-principles-clean ("C1") option natively
|
|
35
|
+
handling irregularly-sampled time series — **and** `FixedOrderLMURegressor`, the recurrent model
|
|
36
|
+
implementing the shared [`juniper-model-core`](https://github.com/pcalnon/juniper-ml)
|
|
37
|
+
`TrainableModel` interface (now that that package has landed). The regressor keeps the LMU memory
|
|
38
|
+
**fixed** and trains only a linear readout in **closed form** (least squares — no BPTT, fully
|
|
39
|
+
deterministic); it passes model-core's conformance kit unchanged, making it the WS-4 refactor
|
|
40
|
+
template (a non-cascor model on the shared model seam).
|
|
41
|
+
|
|
42
|
+
Design of record (in juniper-ml):
|
|
43
|
+
[`notes/JUNIPER_RECURRENCE_MODEL_DETAILED_DESIGN_2026-06-14.md`](https://github.com/pcalnon/juniper-ml/blob/main/notes/JUNIPER_RECURRENCE_MODEL_DETAILED_DESIGN_2026-06-14.md).
|
|
44
|
+
|
|
45
|
+
## Why Approach-C
|
|
46
|
+
|
|
47
|
+
An LMU's linear memory obeys `theta * m'(t) = A·m(t) + B·u(t)` with **fixed, closed-form** matrices.
|
|
48
|
+
Because the system is linear, its *exact* discretisation is a matrix exponential — **no ODE solver,
|
|
49
|
+
no autodiff-through-solver**. For irregular sampling, the discrete update is simply evaluated at the
|
|
50
|
+
real per-step gap `dt`: the dataset's `dt` channel *is* the discretisation step. `A`/`B` are never
|
|
51
|
+
trained; only the read-in/readout are. That is the entire C1-clean, irregular-Δt-native story.
|
|
52
|
+
|
|
53
|
+
## Install
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install juniper-recurrence-model # once published
|
|
57
|
+
pip install -e ".[test]" # local development
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
numpy-only at the core (the memory is a fixed linear recurrence requiring no autodiff).
|
|
61
|
+
|
|
62
|
+
## Quick start
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
import numpy as np
|
|
66
|
+
from juniper_recurrence_model import VariableStepLMUMemory
|
|
67
|
+
|
|
68
|
+
mem = VariableStepLMUMemory(d=16, theta=1.0) # order 16, window 1.0 (same unit as dt)
|
|
69
|
+
|
|
70
|
+
# Irregularly-sampled input: u driven on a non-uniform time grid
|
|
71
|
+
t = np.cumsum(np.r_[0.0, np.random.default_rng(0).uniform(0.02, 0.08, 239)])
|
|
72
|
+
dt = np.empty_like(t); dt[0] = 0.0; dt[1:] = np.diff(t)
|
|
73
|
+
u = np.sin(2.0 * t)
|
|
74
|
+
|
|
75
|
+
m = mem.rollout(u, dt) # (240, 16) memory trajectory
|
|
76
|
+
w = mem.decode_weights(rho=1.0) # read the input one full window ago
|
|
77
|
+
reconstruction = m @ w
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Trainable model (`FixedOrderLMURegressor`)
|
|
81
|
+
|
|
82
|
+
The package also exposes `FixedOrderLMURegressor`, a `juniper-model-core` `TrainableModel`. The
|
|
83
|
+
LMU memory is fixed; only a linear readout is fit, in closed form (least squares — no BPTT, fully
|
|
84
|
+
deterministic). It is Δt-native: pass per-step gaps `dt` (`(n, T)`) and an optional `readout_mask`
|
|
85
|
+
to `fit` / `predict`; both default to uniform gaps and the final step, so the bare ABC
|
|
86
|
+
`predict(X)` works too. It reports canonical regression metrics (`mse`, `rmse`, `mae`, `r2`).
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
import numpy as np
|
|
90
|
+
from juniper_recurrence_model import FixedOrderLMURegressor, LMURegressorSerializer
|
|
91
|
+
|
|
92
|
+
n, T, F = 48, 6, 3
|
|
93
|
+
X = np.random.default_rng(0).normal(size=(n, T, F))
|
|
94
|
+
y = X.reshape(n, -1) @ np.random.default_rng(1).normal(size=(T * F, 1))
|
|
95
|
+
dt = np.zeros((n, T)); dt[:, 1:] = np.random.default_rng(2).integers(1, 4, size=(n, T - 1))
|
|
96
|
+
|
|
97
|
+
model = FixedOrderLMURegressor(d=6) # theta resolved data-driven from dt at fit time
|
|
98
|
+
result = model.fit(X, y, dt=dt) # closed-form readout solve
|
|
99
|
+
preds = model.predict(X, dt=dt) # (n, 1)
|
|
100
|
+
print(result.final_metrics["r2"], model.describe_topology()["model_type"])
|
|
101
|
+
|
|
102
|
+
LMURegressorSerializer().save(model, "/tmp/lmu") # writes /tmp/lmu.npz (lossless round-trip)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
`FixedOrderLMURegressor` passes model-core's conformance kit unchanged
|
|
106
|
+
(`tests/test_lmu_conformance.py`), proving the WS-4 refactor template.
|
|
107
|
+
|
|
108
|
+
## Verified behaviour
|
|
109
|
+
|
|
110
|
+
| Check | Result |
|
|
111
|
+
|---|---|
|
|
112
|
+
| `A` (d=16) max eigenvalue real part | **−6.49** (< 0 → stable) |
|
|
113
|
+
| Reconstruction RMSE `e_reg` (regular grid) | **≈ 0.035** (< 0.05) |
|
|
114
|
+
| Grid-invariance `e_irr` (irregular grid) | **≈ 0.039–0.043** (≈1.15× `e_reg`; < 3·`e_reg` + 0.02) |
|
|
115
|
+
|
|
116
|
+
Pinned by `tests/test_lmu_grid_invariance.py`. Numerics match the reference
|
|
117
|
+
`util/ad-hoc/verify_delta_t_reference_code.py` in juniper-ml.
|
|
118
|
+
|
|
119
|
+
## Numerical guardrails
|
|
120
|
+
|
|
121
|
+
- Keep `d ≲ 64` — the eigenvector matrix of `A` becomes ill-conditioned for large `d`
|
|
122
|
+
(Padé scaling-and-squaring is the documented fallback for larger orders).
|
|
123
|
+
- Stability is automatic for `dt > 0` (`Re(λ) < 0 ⇒ |e^z| < 1`).
|
|
124
|
+
- `dt` may be quantised (e.g. integer calendar-day gaps) and `Abar`/`Bbar` cached per bucket.
|
|
125
|
+
|
|
126
|
+
## Versioning
|
|
127
|
+
|
|
128
|
+
PEP 440 + [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). Consumers should pin
|
|
129
|
+
`juniper-recurrence-model>=A.B,<A+1`. See [`CHANGELOG.md`](./CHANGELOG.md).
|
|
130
|
+
|
|
131
|
+
## License
|
|
132
|
+
|
|
133
|
+
MIT — see [LICENSE](https://github.com/pcalnon/juniper-recurrence/blob/main/LICENSE).
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
juniper_recurrence_model/__init__.py,sha256=kXarPjfWwsYocqN3jmZEaQ2UGPJ285nXzb446YKy744,1231
|
|
2
|
+
juniper_recurrence_model/_version.py,sha256=fg_FQVJvTdOUQfPfpolQ1-cWCD1f2983FcSYCau5mwI,253
|
|
3
|
+
juniper_recurrence_model/data.py,sha256=wzSxdFWFlm4qzVFE2dqyNCpZum8tVZz6rAcm5BH69dU,4835
|
|
4
|
+
juniper_recurrence_model/model.py,sha256=1loA0fBbtXznhN1r2qPyDsDLZR45SBE1T-XWw7QKNM4,13108
|
|
5
|
+
juniper_recurrence_model/units/__init__.py,sha256=0iKyCzag1nsqfg7d8FDax6vpcP3dY5BXvsgSr46rwsQ,387
|
|
6
|
+
juniper_recurrence_model/units/lmu_varstep.py,sha256=JtUXX1xRqGioZWpF_-dL0FqRJ06ZV1-KQ0m7qOJpY-Q,10459
|
|
7
|
+
juniper_recurrence_model-0.1.0.dist-info/METADATA,sha256=IAyMGWAFvTNYMI7KXaQR0jz05kvtcpR8XXMi411Tdsc,6221
|
|
8
|
+
juniper_recurrence_model-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
9
|
+
juniper_recurrence_model-0.1.0.dist-info/top_level.txt,sha256=8n0jDVpLTQtGiyJNhg4sq7Voz4wbKrY-74or_KcTMvs,25
|
|
10
|
+
juniper_recurrence_model-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
juniper_recurrence_model
|