certflow 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
certflow/cert.py ADDED
@@ -0,0 +1,1186 @@
1
+ """CERT main loop: the 8-step replanning round of spec section 4.3.
2
+
3
+ Integrates graphcore (dual incremental searches), conformal (certificate
4
+ substrate), and sensing (route-critical observation selection). The planner
5
+ never sees true costs; it interacts with the world only through observe().
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import math
10
+ import random
11
+ from dataclasses import dataclass
12
+
13
+ from certflow.conformal import (
14
+ ACITracker,
15
+ AgeBinnedScorer,
16
+ ConformalScorer,
17
+ path_alpha_edge,
18
+ path_confidence,
19
+ )
20
+ from certflow.fastgraph import FastDStarLite, FlatGraph
21
+ from certflow.sensing import baseline_select, near_optimal_alternatives, path_edges, select_observation
22
+ from certflow.types import Certificate, Edge, EdgeBelief, Node, World
23
+
24
+ # Finite upper-cost cap for unbounded edges. An UNOBSERVED edge (or warm-up
25
+ # u-cost at q=inf) has no coverage theorem pricing its upper bound, so it is
26
+ # conceptually +inf; D* Lite needs strictly finite, positive costs to maintain
27
+ # its g/rhs invariants, so we cap at this sentinel. It must dominate any real
28
+ # path cost (so capped edges are never chosen unless unavoidable) yet stay well
29
+ # below float overflow when summed over a path.
30
+ _UB_CAP = 1e9
31
+
32
+
33
+ @dataclass
34
+ class PlannerConfig:
35
+ epsilon: float = 5.0 # target certificate gap
36
+ alpha_prime: float = 0.1 # path-level miscoverage target
37
+ rho_w: float = 0.99 # conformal weight decay per unit time
38
+ eps_tv: float = 0.0 # A2 TV-Lipschitz rate (0 = exchangeable claim)
39
+ gamma_aci: float = 0.005 # ACI step size
40
+ delta: float = 1.0 # sensing period (time units per round)
41
+ rho_hat_over_rho: float = 1.0 # drift misspecification factor (A1 sweep)
42
+ sense_cost: float = 0.1 # uniform m_e for v1
43
+ k_alternatives: int = 3
44
+ delta_subopt: float = 0.1
45
+ backstop_slack: float = 1.5 # backstop_age = slack * L * delta
46
+ cost_floor: float = 1e-3
47
+ # Maintenance sensing while certified (T2': certification is sustained by
48
+ # a sensing rate, not achieved once). lookahead: sense when the projected
49
+ # gap crosses epsilon within this many rounds. every: calibration-freshness
50
+ # floor, sense at least once per this many certified rounds.
51
+ maintenance_lookahead: float = 2.0
52
+ maintenance_every: int = 5
53
+ # kappa corridor hysteresis (Design 1): among incumbent candidates whose
54
+ # u-cost is within kappa_slack_frac*epsilon of the tightest UB, execute the
55
+ # one with the highest mean edge-conductivity instead of the raw argmin.
56
+ # UB itself is always the tightest bound, so the certificate is untouched;
57
+ # execution suboptimality stays bounded by gap + slack. kappa is reinforced
58
+ # on the executed incumbent and decays geometrically each round.
59
+ use_kappa: bool = False
60
+ kappa_decay: float = 0.95
61
+ kappa_slack_frac: float = 0.5
62
+
63
+ # Sensing policy: "cert" (gap-shrink VOI + backstop, the contribution),
64
+ # or Tier-2 baselines: "random", "max_age" (global freshness round-robin),
65
+ # "max_width" (global info-gain proxy, not route-critical), "none".
66
+ sensing_policy: str = "cert"
67
+
68
+ # Unknown-terrain start (Tier-2): skip the t0 survey; every edge begins
69
+ # at a weak prior with a large age, so intervals start wide and sensing
70
+ # allocation actually matters.
71
+ initial_survey: bool = True
72
+ prior_cost: float = 1.0
73
+ prior_age: float = 200.0
74
+
75
+ # Margin factor lambda (paper/theory.tex): 1.0 = observable-coverage
76
+ # semantics (T1a, the empirically-validated default); 2.0 = provable
77
+ # latent-cost coverage (T1b). Scales the conformal quantile everywhere.
78
+ latent_margin: float = 1.0
79
+
80
+ # Thinned calibration (theory.tex, honest accounting item 1): consecutive
81
+ # scores on the same edge share a noise draw (one-dependent). Thinning
82
+ # keeps only scores from disjoint observation pairs (2nd, 4th, ... obs of
83
+ # each edge), restoring independence at a factor-2 sample cost. Part of
84
+ # the provable mode together with latent_margin=2.
85
+ thinned_scores: bool = False
86
+
87
+ # ACI adapts the working alpha from realized edge-coverage events. It
88
+ # CANCELS static margins (with lambda=2, errs vanish, alpha climbs, q
89
+ # shrinks until errs return to target) — so the provable T1b mode must
90
+ # freeze it: use_aci=False pins the working level at alpha_prime and the
91
+ # quantile is the raw weighted-conformal quantile the theorem assumes.
92
+ use_aci: bool = True
93
+
94
+ # Sum-aware upper certificate (theory.tex T4): replace the incumbent's
95
+ # Bonferroni UB value (sum of per-edge u_e, margin ~ L*q_{a'/L}) with
96
+ # sum(c_hat) + block-quantile margin at level alpha' (~ sqrt(L)*q) +
97
+ # sum(rho*a). UB side only — the LB must hold uniformly over paths and
98
+ # keeps its per-edge construction. Tightens the gap and the T2' floor.
99
+ sum_aware_ub: bool = False
100
+
101
+ # Alpha annealing: report the best currently-supportable claim instead of
102
+ # INVALID during warm-up. The effective sample size m floors the per-edge
103
+ # level at 1/(m+1); the path level anneals from coarse to the target as
104
+ # evidence accrues. Claims always state the annealed (weaker) level, so
105
+ # nothing is overclaimed; certification additionally requires the claim
106
+ # to have reached min_certify_confidence (never stop sensing on a weak
107
+ # claim).
108
+ anneal_alpha: bool = True
109
+ min_certify_confidence: float = 0.5
110
+
111
+ # Adaptive sensing rate (T2'): sense k <= max_sense_per_round edges per
112
+ # round, with k chosen so the sustained gap floor 2*L*q + rho*Delta*
113
+ # L*(L-1)/k meets epsilon when possible. Also focuses sensing on P_lb
114
+ # (gap decomposition) and adapts the pre-widening horizon B so the cache
115
+ # spends at most prewiden_slack_frac of the epsilon-slack on width
116
+ # (at high drift B drops to 0: exact metrics, slower, certifiable).
117
+ # Off by default (changes spend and latency semantics).
118
+ adaptive_rate: bool = False
119
+ max_sense_per_round: int = 4
120
+ prewiden_slack_frac: float = 0.25
121
+
122
+ # Objective-matched sensing: when T2' says epsilon is unattainable at the
123
+ # current rate, certificate-gap sensing buys nothing — spend observations
124
+ # on the EXPECTED-best route instead (VOI), which is what determines
125
+ # departure quality; switch back to gap-directed sensing when epsilon is
126
+ # attainable. Measured: matches the VOI baseline's regret (5x better than
127
+ # pure gap sensing in unattainable regimes) while keeping certificate
128
+ # behavior where certification is possible.
129
+ hybrid_sensing: bool = False
130
+
131
+ # Online drift-rate estimation: rho_mode="online" frees the planner from
132
+ # a world-supplied rho. Pooled rate samples |obs - c_hat_prev| / age from
133
+ # re-observations (noise inflates them -> conservative); rho_hat is their
134
+ # rho_online_quantile. Until the estimator warms, rho ~ 0 and the
135
+ # conformal layer absorbs unmodeled drift into the scores (validated on
136
+ # real traffic at up to 49% A1-violation rates: width cost, not coverage).
137
+ rho_mode: str = "given" # "given" | "online"
138
+
139
+ # Stabilized sensing target (the P_lb-churn factor): under drift the
140
+ # optimistic path is a moving target and focused sensing chases it,
141
+ # leaving realized gaps ~2x above the T2' floor. Keep sensing the SAME
142
+ # path while its ell-cost stays within (1+tol) of LB; the gap bound pays
143
+ # at most tol*LB extra (u(P_s) - ell(P_s) + [ell(P_s) - LB]) and the
144
+ # ages on the stable target obey the round-robin analysis.
145
+ stabilize_sensing: bool = False
146
+ sense_path_tol: float = 0.1
147
+
148
+ # Churn-aware certification (T7): the optimistic path hops over a CHURN
149
+ # SET of K >= L edges under drift; the T2' floor and the sensing target
150
+ # must use K, not the instantaneous path length, or realized gaps run
151
+ # ~K/L above the floor (the measured ~1.6x residual). K is tracked over
152
+ # a sliding window and reported; adaptive k solves the K-floor.
153
+ churn_window: int = 50
154
+
155
+ # Refine-after-certify: certification stops gap-sensing, but the
156
+ # certified incumbent can be far from optimal WITHIN epsilon (measured
157
+ # in lifelong runs: memory-carried incumbents certify at regret 0.4-0.6
158
+ # vs 0.025 for fresh exploration). When on, certified rounds keep
159
+ # sensing the EXPECTED-best route (VOI) to improve the incumbent; the
160
+ # certificate is untouched (sensing only ever tightens it).
161
+ refine_after_certify: bool = False
162
+
163
+ # Strict LB level (theory GAP-A): the lower bound must cover the UNKNOWN
164
+ # optimum's edges, whose count can exceed |P_lb|; the airtight per-edge
165
+ # level divides by (|V|-1), not L. Off by default (the deployed planner
166
+ # operates at alpha'/L, validated against ground truth at 1.000 across
167
+ # all conditions — the conservatism slack absorbs the difference); ON in
168
+ # the provable recipe, where every constant must be theorem-exact.
169
+ strict_lb_alpha: bool = False
170
+
171
+ # Decision-uniform certificates: per-round claims are marginal; a robot
172
+ # that ACTS whenever certified peeks every round, and across T rounds the
173
+ # chance that SOME acted-on certificate failed exceeds alpha'. Full
174
+ # per-round time-uniformity is quantifiably impractical (stitched-DKW
175
+ # needs n >~ 63k scores at Bonferroni levels — theory.tex T6), but the
176
+ # certificate is only USED at decision instants (stop sensing, depart):
177
+ # alpha-spending over a decision budget gives simultaneous validity of
178
+ # ALL decisions at level alpha' for the width cost of alpha'/N_dec.
179
+ decision_uniform: bool = False
180
+ max_decisions: int = 5
181
+
182
+ # Predictor mode (spatial-predictor study): when a point predictor is
183
+ # supplied to the planner, edges older than predictor_age_gate*delta use
184
+ # (predicted center, LEARNED age-binned conformal width) instead of
185
+ # (last obs, q + rho*age). Per-edge fallback chain: prediction available
186
+ # AND its age-bin quantile supportable, else the model-based path —
187
+ # separate calibration buffers per model, so scores never mix regimes
188
+ # (assumption A4': within-bin exchangeability). Bin edges in delta units.
189
+ predictor_age_gate: float = 12.0
190
+ predictor_bins: tuple = (6.0, 12.0, 24.0, 48.0)
191
+ rho_online_quantile: float = 0.9
192
+ rho_online_min_samples: int = 10
193
+
194
+ # Lazy pre-widening (T3 locality): cache edge metrics at age + B*delta so
195
+ # they stay valid (conservatively wide) for B rounds and D* Lite repair
196
+ # touches ~|E|/B edges per round instead of all of them. Soundness:
197
+ # cached ell <= true ell and cached u >= true u throughout the window.
198
+ # Width cost: +2*rho*B*delta per edge. 0 disables (exact, slow).
199
+ prewiden_rounds: int = 10
200
+
201
+ # Staggered pre-widening (predictor-free vectorized path): per-edge horizon
202
+ # factors are drawn uniformly from [stagger_lo, stagger_hi] to spread cache
203
+ # expiries across rounds instead of all firing on one synchronized round.
204
+ # Soundness is independent of the spread — each entry's width is computed at
205
+ # ITS OWN horizon — so this is purely a latency-smoothing knob.
206
+ stagger_lo: float = 0.75
207
+ stagger_hi: float = 1.25
208
+
209
+
210
+ def recommended_config(**overrides) -> "PlannerConfig":
211
+ """The best-known configuration from the full ablation/benchmark program:
212
+ online drift estimation (coverage-neutral, 1.7-2.4x tighter gaps),
213
+ objective-matched hybrid sensing (regret matches/beats the strongest
214
+ baseline while keeping the certificate), kappa hysteresis (-70% churn),
215
+ adaptive rate + adaptive pre-widening, gated sum-aware UB. Annealing is
216
+ already the default. decision_uniform stays a claim-semantics choice."""
217
+ base = dict(
218
+ rho_mode="online",
219
+ hybrid_sensing=True,
220
+ use_kappa=True,
221
+ adaptive_rate=True,
222
+ sum_aware_ub=True,
223
+ )
224
+ base.update(overrides)
225
+ return PlannerConfig(**base)
226
+
227
+
228
+ class CertPlanner:
229
+ """Holds beliefs, the two D* Lite instances, and the certificate state."""
230
+
231
+ def __init__(
232
+ self,
233
+ world: World,
234
+ start: Node,
235
+ goal: Node,
236
+ config: PlannerConfig,
237
+ t0: float = 0.0,
238
+ predictor=None,
239
+ ) -> None:
240
+ self.cfg = config
241
+ self.world = world
242
+ if not config.anneal_alpha and config.rho_w < 1.0:
243
+ ess_cap = 1.0 / (1.0 - config.rho_w)
244
+ if ess_cap < 1.0 / config.alpha_prime:
245
+ import warnings
246
+ warnings.warn(
247
+ f"rho_w={config.rho_w} caps effective sample size at "
248
+ f"~{ess_cap:.0f} < 1/alpha_prime={1/config.alpha_prime:.0f}: "
249
+ "without annealing the certificate may never become valid",
250
+ stacklevel=2,
251
+ )
252
+ self.start = start
253
+ self.goal = goal
254
+ self.t = t0
255
+
256
+ # Initial survey: one observation per edge at t0 (spec: warm-up phase;
257
+ # the certificate stays INVALID until the calibration buffer fills).
258
+ # With initial_survey=False (unknown terrain), edges start at a weak
259
+ # prior with a large age instead.
260
+ self.beliefs: dict[Edge, EdgeBelief] = {}
261
+ for e in world.edges():
262
+ if config.initial_survey:
263
+ c0, t_obs0, seen = max(world.observe(e, t0), config.cost_floor), t0, True
264
+ else:
265
+ c0, t_obs0, seen = config.prior_cost, t0 - config.prior_age, False
266
+ self.beliefs[e] = EdgeBelief(
267
+ c_hat=c0,
268
+ t_obs=t_obs0,
269
+ rho=self._rho_hat(e),
270
+ sense_cost=config.sense_cost,
271
+ observed=seen,
272
+ )
273
+
274
+ self.scorer = ConformalScorer(rho_w=config.rho_w, eps_tv=config.eps_tv)
275
+ self.predictor = predictor
276
+ self.binned = AgeBinnedScorer(
277
+ bin_edges=tuple(b * config.delta for b in config.predictor_bins),
278
+ rho_w=config.rho_w, eps_tv=config.eps_tv,
279
+ )
280
+ self.pred_used_rounds = 0 # diagnostic: edges priced by the predictor
281
+ self._edge_alpha_extra: dict[Edge, float] = {} # per-bin annealing charge
282
+ self.aci = ACITracker(alpha_target=config.alpha_prime, gamma=config.gamma_aci)
283
+ self.sense_spend = 0.0
284
+ self._round_idx = 0
285
+ self._obs_count: dict[Edge, int] = {} # real observations per edge
286
+ self._rate_samples: list[float] = [] # online rho: |dc|/age samples
287
+ self._rho_online = 1e-9
288
+ self._last_gap = math.inf # gap-stall feedback for k
289
+ self._stall = 0
290
+ self._churn_seen: dict[Edge, int] = {} # edge -> last round on P_lb
291
+ self.cal_rho_a_max = 0.0 # max rho_e*a_e among pushed scores (pi_cal diagnostic)
292
+
293
+ # lazy pre-widening cache (see PlannerConfig.prewiden_rounds)
294
+ self._cache_lo: dict[Edge, float] = {}
295
+ self._cache_up: dict[Edge, float] = {}
296
+ self._cache_due: dict[Edge, float] = {} # absolute expiry time
297
+ self._cache_q: float = -1.0 # q the cache was built with
298
+
299
+ # certified snapshot oracle (snapshot.py): built on point estimates
300
+ # when the certificate proves the map tight; O(1) queries thereafter
301
+ self._oracle = None
302
+ self._oracle_chat_snap = None
303
+ self._flat_mid = None
304
+ self._beliefs_version = 0
305
+
306
+ # kappa corridor hysteresis state (see PlannerConfig.use_kappa)
307
+ self._p_sense: list[Node] = [] # stabilized sensing target path
308
+ self._kappa: dict[Edge, float] = {}
309
+ self._prev_incumbent: list[Node] = []
310
+ self._incumbent_since = t0 # when the incumbent edge-set last changed
311
+ self._rng = random.Random(0) # baseline sensing policies only
312
+
313
+ nodes = set(world.graph) | {v for n in world.graph for v in world.graph[n]}
314
+ lo, up = self._metrics(q=math.inf) # warm-up: q=inf -> ell at floor, u at inf
315
+ # D* Lite needs finite costs; cap warm-up upper costs.
316
+ up = {e: min(c, _UB_CAP) for e, c in up.items()}
317
+ adj_lo = self._to_adj(nodes, lo)
318
+ adj_up = self._to_adj(nodes, up)
319
+ self._flat_lo = FlatGraph(adj_lo, extra_nodes=(start, goal))
320
+ self._flat_up = FlatGraph(adj_up, extra_nodes=(start, goal))
321
+ self.sp_lower = FastDStarLite(adj_lo, start, goal, flat=self._flat_lo)
322
+ self.sp_upper = FastDStarLite(adj_up, start, goal, flat=self._flat_up)
323
+ self._graph_lower_cache = adj_lo
324
+ # fixed edge order + CSR slots for vectorized cache->flat cost sync
325
+ # (the shared-flat constructor does NOT read costs from the adjacency,
326
+ # so scratch rebuilds must write the cache into the arrays themselves)
327
+ import numpy as _np
328
+ self._edge_order = list(self.beliefs)
329
+ ix_lo, ix_up = self._flat_lo.index_of, self._flat_up.index_of
330
+ self._slots_lo = _np.array(
331
+ [self._flat_lo.slot_of(ix_lo[u], ix_lo[v]) for u, v in self._edge_order],
332
+ dtype=_np.int64)
333
+ self._slots_up = _np.array(
334
+ [self._flat_up.slot_of(ix_up[u], ix_up[v]) for u, v in self._edge_order],
335
+ dtype=_np.int64)
336
+ # belief arrays in edge order (vectorized full-refresh: fast_metrics)
337
+ self._edge_idx = {e: i for i, e in enumerate(self._edge_order)}
338
+ self._arr_chat = _np.array(
339
+ [self.beliefs[e].c_hat for e in self._edge_order])
340
+ self._arr_tobs = _np.array(
341
+ [self.beliefs[e].t_obs for e in self._edge_order])
342
+ self._arr_rho = _np.array(
343
+ [self.beliefs[e].rho for e in self._edge_order])
344
+ self._arr_obs = _np.array(
345
+ [self.beliefs[e].observed for e in self._edge_order], dtype=bool)
346
+ # staggered pre-widening horizons (audit seam 3): synchronized expiry
347
+ # made one round per cycle pay a full-|E| refresh loop; per-edge
348
+ # horizon factors in [0.75, 1.25] spread expiries across rounds.
349
+ # Soundness: each entry's width is computed at ITS OWN horizon.
350
+ self._arr_stagger = _np.random.default_rng(0).uniform(
351
+ config.stagger_lo, config.stagger_hi, len(self._edge_order))
352
+ # _arr_due is the expiry clock for the VECTORIZED (predictor-free) refresh
353
+ # path ONLY; it is the array twin of _cache_due there. When a predictor is
354
+ # supplied the dict path drives expiry off _cache_due alone and never
355
+ # reads _arr_due, so the two intentionally do not track each other in that
356
+ # mode (the array view is simply unused). ingest_observation expires both.
357
+ self._arr_due = _np.full(len(self._edge_order), -_np.inf)
358
+
359
+ def _adaptive_B(self, q_eff: float) -> int:
360
+ """Pre-widening horizon: cap width spend at a fraction of the
361
+ epsilon-slack when certification is in play; keep the configured
362
+ latency-optimal horizon during warm-up or unattainable epsilon."""
363
+ cfg = self.cfg
364
+ B = cfg.prewiden_rounds
365
+ if not cfg.adaptive_rate or B <= 0:
366
+ return max(B, 0)
367
+ L_b = max(getattr(self, "_last_L", 1), 1)
368
+ rho_b = max((self.beliefs[e].rho for e in self.beliefs), default=0.0)
369
+ slack = cfg.epsilon - 2 * L_b * q_eff
370
+ if rho_b > 0 and slack > 0 and q_eff > 0:
371
+ b_cap = int(cfg.prewiden_slack_frac * slack
372
+ / (2 * rho_b * L_b * cfg.delta))
373
+ B = max(0, min(B, b_cap))
374
+ return B
375
+
376
+ def _rebuild_searches(self) -> None:
377
+ """Fresh D* Lite instances from the current cached metrics (used when
378
+ a change touches most edges; incremental repair of ~|E| inconsistent
379
+ vertices is strictly slower than one scratch compute)."""
380
+ import numpy as _np
381
+ # sync the cache into the flat cost arrays FIRST: the shared-flat
382
+ # constructor keeps existing costs, and the scratch-rebuild path
383
+ # skips update_edges — without this write the engines resurrect
384
+ # stale costs (a divergence the degenerate ablation caught)
385
+ self._flat_lo.cost[self._slots_lo] = _np.fromiter(
386
+ (self._cache_lo[e] for e in self._edge_order),
387
+ dtype=_np.float64, count=len(self._edge_order))
388
+ self._flat_up.cost[self._slots_up] = _np.fromiter(
389
+ (min(self._cache_up[e], _UB_CAP) for e in self._edge_order),
390
+ dtype=_np.float64, count=len(self._edge_order))
391
+ # reuse the FlatGraphs: CSR stays built, numba kernel stays warm;
392
+ # structure-only adjacency suffices (engines read flat.cost)
393
+ self.sp_lower = FastDStarLite(self._graph_lower_cache, self.start,
394
+ self.goal, flat=self._flat_lo)
395
+ self.sp_upper = FastDStarLite(self._graph_lower_cache, self.start,
396
+ self.goal, flat=self._flat_up)
397
+
398
+ def _rho_hat(self, e: Edge) -> float:
399
+ if self.cfg.rho_mode == "online":
400
+ return 1e-9 # estimator warms from observed rates (see round())
401
+ rho_true = self.world.rho_true(e)
402
+ if not math.isfinite(rho_true):
403
+ rho_true = 0.0 # off-model worlds: planner assumes its A1 model anyway
404
+ return max(rho_true * self.cfg.rho_hat_over_rho, 1e-9)
405
+
406
+ def _update_online_rho(self) -> None:
407
+ """Pooled drift-rate estimate from re-observation rate samples; on a
408
+ material change, update all beliefs and force a metric rebuild."""
409
+ cfg = self.cfg
410
+ if cfg.rho_mode != "online" or len(self._rate_samples) < cfg.rho_online_min_samples:
411
+ return
412
+ if len(self._rate_samples) < 1.1 * getattr(self, "_rho_sorted_at", 0):
413
+ return # re-estimate only when the sample set grew 10%
414
+ self._rho_sorted_at = len(self._rate_samples)
415
+ rates = sorted(self._rate_samples)
416
+ rho = max(rates[int(cfg.rho_online_quantile * (len(rates) - 1))], 1e-9)
417
+ if abs(rho - self._rho_online) > 0.05 * max(self._rho_online, 1e-9):
418
+ self._rho_online = rho
419
+ for b in self.beliefs.values():
420
+ b.rho = rho
421
+ self._arr_rho[:] = rho
422
+ self._cache_q = -1.0 # rho changed everywhere: full metric rebuild
423
+
424
+ def _to_adj(self, nodes, costs: dict[Edge, float]) -> dict[Node, dict[Node, float]]:
425
+ adj: dict[Node, dict[Node, float]] = {n: {} for n in nodes}
426
+ for (u, v), c in costs.items():
427
+ adj[u][v] = c
428
+ return adj
429
+
430
+ def _pred_interval(self, e: Edge, age: float) -> tuple[float, float] | None:
431
+ """(center, halfwidth) from the predictor path, or None to fall back.
432
+ Requires: predictor supplied, age past the gate, a prediction for e,
433
+ and a supportable age-bin quantile at the current per-edge level."""
434
+ cfg = self.cfg
435
+ if self.predictor is None or age < cfg.predictor_age_gate * cfg.delta:
436
+ return None
437
+ pred = self.predictor(e, self.t, self.beliefs)
438
+ if pred is None:
439
+ return None
440
+ alpha_edge = getattr(self, "_last_alpha_edge", self._alpha_prime_eff)
441
+ # per-bin annealing: query at the bin's supportable level and charge
442
+ # the weakening to the claim (weakest-link accounting, summed over
443
+ # the certifying path's predictor-priced edges in round())
444
+ mass = self.binned.effective_mass(self.t, age)
445
+ if mass <= 0.0:
446
+ return None
447
+ alpha_bin = max(alpha_edge, (1.0 + 1e-9) / (mass + 1.0))
448
+ if alpha_bin >= 0.5:
449
+ return None # bin too immature to be worth a claim
450
+ qb = self.binned.quantile(alpha_bin, self.t, age)
451
+ if not math.isfinite(qb):
452
+ return None
453
+ self._edge_alpha_extra[e] = max(0.0, alpha_bin - alpha_edge)
454
+ return max(pred, cfg.cost_floor), cfg.latent_margin * qb
455
+
456
+ def _metrics(self, q: float) -> tuple[dict[Edge, float], dict[Edge, float]]:
457
+ lo, up = {}, {}
458
+ for e, b in self.beliefs.items():
459
+ if not b.observed:
460
+ # an unobserved edge is UNKNOWN: the prior is not an
461
+ # observation and no coverage theorem prices it — ell at the
462
+ # floor (it could be cheap), u unbounded (it could be awful).
463
+ # Certification therefore requires a fully-OBSERVED path:
464
+ # exactly the Traversing-Mars 'prove the path' semantics,
465
+ # which T2's degenerate corollary claims (and a noise-free
466
+ # test exposed: the prior-centered interval was a soundness
467
+ # hole masked by noise everywhere else).
468
+ lo[e] = self.cfg.cost_floor
469
+ up[e] = _UB_CAP
470
+ elif math.isfinite(q):
471
+ pi = self._pred_interval(e, b.age(self.t))
472
+ if pi is not None:
473
+ c, h = pi
474
+ lo[e] = max(self.cfg.cost_floor, c - h)
475
+ up[e] = max(self.cfg.cost_floor, c + h)
476
+ else:
477
+ lo[e] = b.lower(self.t, q, self.cfg.cost_floor)
478
+ up[e] = b.upper(self.t, q, self.cfg.cost_floor)
479
+ else:
480
+ lo[e] = self.cfg.cost_floor
481
+ up[e] = _UB_CAP
482
+ return lo, up
483
+
484
+ def _refresh_metrics(self, q_eff: float) -> None:
485
+ """Maintain the pre-widened metric cache; push only changed edges to
486
+ the two searches. Soundness: entries are computed at age + B*delta,
487
+ so cached ell <= true ell and cached u >= true u until expiry; a grown
488
+ quantile forces a full rebuild (a cached-too-small q would be unsound,
489
+ a cached-too-large q is only conservative)."""
490
+ cfg = self.cfg
491
+ B = cfg.prewiden_rounds
492
+ if self.predictor is None:
493
+ # vectorized full-refresh fast path (fast_metrics): exact mode
494
+ # recomputes everything every round, and full rebuilds touch all
495
+ # edges — both were a Python per-edge loop (~15ms at 14k edges)
496
+ import numpy as _np
497
+ from certflow.fastgraph import fast_metrics
498
+ full_now = (
499
+ B <= 0
500
+ or not self._cache_lo
501
+ or q_eff > self._cache_q + 1e-12
502
+ or self._cache_q > 1.30 * q_eff + 1e-12
503
+ )
504
+ if full_now:
505
+ horizon = 0.0 if B <= 0 else self._adaptive_B(q_eff) * cfg.delta
506
+ q_used = q_eff if B <= 0 else 1.15 * q_eff
507
+ lo_a, up_a = fast_metrics(
508
+ self._arr_chat, self._arr_tobs, self._arr_rho,
509
+ self.t, q_used, cfg.cost_floor)
510
+ if horizon > 0.0:
511
+ # per-edge staggered horizons: widen each entry to cover
512
+ # its OWN expiry time (linear in age, so additive here)
513
+ h = horizon * self._arr_stagger
514
+ widen = self._arr_rho * h
515
+ lo_a = _np.maximum(lo_a - widen, cfg.cost_floor)
516
+ up_a = up_a + widen
517
+ dues = self.t + h
518
+ else:
519
+ dues = _np.full(len(self._edge_order), self.t)
520
+ unobs = ~self._arr_obs
521
+ lo_a[unobs] = cfg.cost_floor
522
+ up_a[unobs] = _UB_CAP
523
+ _np.minimum(up_a, _UB_CAP, out=up_a)
524
+ self._cache_lo = dict(zip(self._edge_order, lo_a.tolist()))
525
+ self._cache_up = dict(zip(self._edge_order, up_a.tolist()))
526
+ self._cache_due = dict(zip(self._edge_order, dues.tolist()))
527
+ self._arr_due = dues
528
+ if B > 0:
529
+ self._cache_q = q_used
530
+ self._flat_lo.cost[self._slots_lo] = lo_a
531
+ self._flat_up.cost[self._slots_up] = up_a
532
+ self.sp_lower = FastDStarLite(
533
+ self._graph_lower_cache, self.start, self.goal,
534
+ flat=self._flat_lo)
535
+ self.sp_upper = FastDStarLite(
536
+ self._graph_lower_cache, self.start, self.goal,
537
+ flat=self._flat_up)
538
+ # adjacency VALUES are consumed only by the alternatives
539
+ # helper, which refreshes them on demand (_graph_lower_with);
540
+ # the engines read costs from the flat arrays — skip the
541
+ # O(|E|) dict-of-dicts rebuild here
542
+ return
543
+ # vectorized staggered due-subset (small by construction)
544
+ mask = self._arr_due <= self.t
545
+ if mask.any():
546
+ idx = _np.nonzero(mask)[0]
547
+ B_eff = self._adaptive_B(q_eff)
548
+ h = B_eff * cfg.delta * self._arr_stagger[idx]
549
+ widen = self._arr_rho[idx] * (
550
+ (self.t - self._arr_tobs[idx]) + h)
551
+ q_used = self._cache_q
552
+ lo_sub = _np.maximum(
553
+ self._arr_chat[idx] - q_used - widen, cfg.cost_floor)
554
+ up_sub = _np.minimum(
555
+ self._arr_chat[idx] + q_used + widen, _UB_CAP)
556
+ unobs = ~self._arr_obs[idx]
557
+ lo_sub[unobs] = cfg.cost_floor
558
+ up_sub[unobs] = _UB_CAP
559
+ self._arr_due[idx] = self.t + h
560
+ lo_chg, up_chg = {}, {}
561
+ for j, li, ui in zip(idx.tolist(), lo_sub.tolist(),
562
+ up_sub.tolist()):
563
+ e = self._edge_order[j]
564
+ self._cache_due[e] = self._arr_due[j]
565
+ if li != self._cache_lo.get(e):
566
+ lo_chg[e] = self._cache_lo[e] = li
567
+ if ui != self._cache_up.get(e):
568
+ up_chg[e] = self._cache_up[e] = ui
569
+ if len(lo_chg) > 0.3 * len(self.beliefs):
570
+ self._rebuild_searches()
571
+ elif lo_chg or up_chg:
572
+ if lo_chg:
573
+ self.sp_lower.update_edges(lo_chg)
574
+ if up_chg:
575
+ self.sp_upper.update_edges(up_chg)
576
+ return
577
+ if cfg.adaptive_rate and B > 0:
578
+ B = self._adaptive_B(q_eff)
579
+ if B <= 0:
580
+ lo, up = self._metrics(q_eff)
581
+ self._cache_lo, self._cache_up = lo, up
582
+ self._rebuild_searches() # all edges changed: scratch beats repair
583
+ return
584
+
585
+ full = (
586
+ not self._cache_lo
587
+ or q_eff > self._cache_q + 1e-12 # unsound to keep: rebuild
588
+ or self._cache_q > 1.30 * q_eff + 1e-12 # too loose: rebuild
589
+ )
590
+ # headroom 1.15: a growing quantile forces a full rebuild, and on
591
+ # large graphs each rebuild is an O(|E|) + scratch-search event (the
592
+ # p95 spikes); more headroom = fewer events at ~15% width on the
593
+ # noise term only (the drift term dominates under drift anyway)
594
+ q_used = 1.15 * q_eff if full else self._cache_q
595
+ horizon = B * cfg.delta
596
+ lo_chg: dict[Edge, float] = {}
597
+ up_chg: dict[Edge, float] = {}
598
+ for e, b in self.beliefs.items():
599
+ if not (full or self.t >= self._cache_due.get(e, -math.inf)):
600
+ continue
601
+ if not b.observed:
602
+ # unknown edge (see _metrics): floor / unbounded until seen
603
+ lo_v, up_v = cfg.cost_floor, _UB_CAP
604
+ if lo_v != self._cache_lo.get(e):
605
+ lo_chg[e] = self._cache_lo[e] = lo_v
606
+ if up_v != self._cache_up.get(e):
607
+ up_chg[e] = self._cache_up[e] = up_v
608
+ self._cache_due[e] = self.t + horizon
609
+ continue
610
+ a_pre = b.age(self.t) + horizon
611
+ pi = self._pred_interval(e, a_pre) # pre-widened age: conservative
612
+ if pi is not None:
613
+ c_pi, h_pi = pi
614
+ self.pred_used_rounds += 1
615
+ lo_v = max(cfg.cost_floor, c_pi - h_pi)
616
+ up_v = max(cfg.cost_floor, c_pi + h_pi)
617
+ else:
618
+ self._edge_alpha_extra.pop(e, None)
619
+ lo_v = max(cfg.cost_floor, b.c_hat - q_used - b.rho * a_pre)
620
+ up_v = max(cfg.cost_floor, b.c_hat + q_used + b.rho * a_pre)
621
+ if lo_v != self._cache_lo.get(e):
622
+ lo_chg[e] = self._cache_lo[e] = lo_v
623
+ if up_v != self._cache_up.get(e):
624
+ up_chg[e] = self._cache_up[e] = up_v
625
+ self._cache_due[e] = self.t + horizon
626
+ if full:
627
+ self._cache_q = q_used
628
+ # When most of the graph changed (full rebuilds, B=0 mode), repairing
629
+ # ~|E| inconsistencies through the priority queue costs far more than
630
+ # one fresh compute — rebuild the search instances from scratch
631
+ # instead (measured: p95 spikes 33-96 ms -> scratch cost ~1-5 ms).
632
+ if len(lo_chg) > 0.3 * len(self.beliefs):
633
+ self._rebuild_searches()
634
+ return
635
+ if lo_chg:
636
+ self.sp_lower.update_edges(lo_chg)
637
+ if up_chg:
638
+ self.sp_upper.update_edges({e: min(c, _UB_CAP) for e, c in up_chg.items()})
639
+
640
+ @property
641
+ def _alpha_prime_eff(self) -> float:
642
+ """Claim level: alpha'/N_dec under decision-uniform alpha-spending."""
643
+ if self.cfg.decision_uniform:
644
+ return self.cfg.alpha_prime / max(self.cfg.max_decisions, 1)
645
+ return self.cfg.alpha_prime
646
+
647
+ def _q(self, path_len: int) -> float:
648
+ alpha_path = (
649
+ self.aci.working_alpha() if self.cfg.use_aci else self._alpha_prime_eff
650
+ )
651
+ path_len = max(path_len, 1)
652
+ if self.cfg.strict_lb_alpha:
653
+ # GAP-A: cover the unknown optimum's edges too — divide by the
654
+ # max possible simple-path length, not the current path's
655
+ path_len = max(path_len, len(self._graph_lower_cache) - 1)
656
+ # annealing floor: the smallest per-edge level the buffer supports
657
+ self._alpha_claim = self._alpha_prime_eff
658
+ if self.cfg.anneal_alpha:
659
+ m = self.scorer.effective_mass(self.t)
660
+ if m <= 0.0:
661
+ self._alpha_claim = 1.0 # empty buffer: nothing supportable
662
+ else:
663
+ alpha_edge_min = (1.0 + 1e-9) / (m + 1.0)
664
+ supportable = min(1.0, path_len * alpha_edge_min)
665
+ alpha_path = max(alpha_path, supportable)
666
+ self._alpha_claim = max(self._alpha_prime_eff, supportable)
667
+ alpha_edge = path_alpha_edge(alpha_path, path_len)
668
+ self._last_alpha_edge = alpha_edge
669
+ return self.scorer.quantile(alpha_edge, self.t)
670
+
671
+ def round(self) -> tuple[Certificate, Edge | None]:
672
+ """One replanning round. Returns the certificate and the sensed edge."""
673
+ cfg = self.cfg
674
+
675
+ # Step 1-2: iterate q <-> path-length coupling once (L feeds Bonferroni).
676
+ # Start from last known L or a Dijkstra-free guess of 1.
677
+ self._update_online_rho()
678
+ L_guess = getattr(self, "_last_L", 1)
679
+ q = self._q(L_guess)
680
+ q_eff = (q if math.isfinite(q) else 0.0) * cfg.latent_margin
681
+ # warm-up: intervals exist but the certificate is INVALID via confidence
682
+ self._refresh_metrics(q_eff)
683
+
684
+ sum_aware_L = 0
685
+ p_lb, lb = self.sp_lower.shortest_path()
686
+ lb_edges = path_edges(p_lb)
687
+ L = max(len(lb_edges), 1)
688
+ if L != L_guess: # one refinement pass with the right Bonferroni level
689
+ q = self._q(L)
690
+ q_eff = (q if math.isfinite(q) else 0.0) * cfg.latent_margin
691
+ self._refresh_metrics(q_eff)
692
+ p_lb, lb = self.sp_lower.shortest_path()
693
+ lb_edges = path_edges(p_lb)
694
+ L = max(len(lb_edges), 1)
695
+ self._last_L = L
696
+ lo, up = self._cache_lo, self._cache_up
697
+
698
+ # stabilized sensing target (see PlannerConfig.stabilize_sensing)
699
+ sense_path = p_lb
700
+ if cfg.stabilize_sensing and p_lb is not None:
701
+ ps = self._p_sense
702
+ if (
703
+ ps
704
+ and ps[0] == self.start
705
+ and ps[-1] == self.goal
706
+ and sum(lo[e] for e in path_edges(ps))
707
+ <= (1.0 + cfg.sense_path_tol) * lb
708
+ ):
709
+ sense_path = ps
710
+ self._p_sense = list(sense_path)
711
+ sense_edges = path_edges(sense_path) if sense_path else lb_edges
712
+
713
+ # Step 3: UB = min over (u-cost of optimistic path, u-cost of
714
+ # conservative shortest path); any path's u-cost upper-bounds OPT.
715
+ p_ub, _ = self.sp_upper.shortest_path()
716
+ ub_edges = path_edges(p_ub) if p_ub is not None else []
717
+ ub_candidates = []
718
+ if p_lb is not None:
719
+ ub_candidates.append((sum(up[e] for e in lb_edges), p_lb))
720
+ if p_ub is not None:
721
+ ub_candidates.append((sum(up[e] for e in ub_edges), p_ub))
722
+ prev = self._trimmed_prev_incumbent()
723
+ if prev is not None:
724
+ ub_candidates.append(
725
+ (sum(up[e] for e in path_edges(prev)), prev)
726
+ )
727
+ if cfg.stabilize_sensing and sense_path is not None and sense_path is not p_lb:
728
+ # the stabilized sensing target's edges are the fresh ones; its
729
+ # u-cost completes the gap bound u(P_s) - LB <= width(P_s) + tol*LB
730
+ ub_candidates.append(
731
+ (sum(up[e] for e in sense_edges), sense_path)
732
+ )
733
+ if not ub_candidates:
734
+ ub, incumbent = math.inf, []
735
+ else:
736
+ if cfg.sum_aware_ub and math.isfinite(q) and prev is not None:
737
+ # T4: tighter UB on the standing incumbent ONLY, gated on
738
+ # freshness — every edge re-observed since this path became
739
+ # the incumbent. Post-selection observations are independent
740
+ # of the selection event, so the fixed-path theorem applies
741
+ # conditionally; without the gate the winner's curse breaks
742
+ # coverage (measured: 0.823 in the noise-dominated regime).
743
+ pe = path_edges(prev)
744
+ fresh = pe and all(
745
+ self.beliefs[e].t_obs >= self._incumbent_since for e in pe
746
+ )
747
+ if fresh:
748
+ alpha_path = (
749
+ self.aci.working_alpha() if cfg.use_aci else cfg.alpha_prime
750
+ )
751
+ m = self.scorer.block_quantile(alpha_path, self.t, len(pe))
752
+ if math.isfinite(m):
753
+ sum_aware_L = len(pe)
754
+ c_sum = (
755
+ sum(self.beliefs[e].c_hat for e in pe)
756
+ + cfg.latent_margin * m
757
+ + sum(self.beliefs[e].rho * self.beliefs[e].age(self.t)
758
+ for e in pe)
759
+ )
760
+ ub_candidates = [
761
+ (min(c, c_sum), p) if p is prev else (c, p)
762
+ for c, p in ub_candidates
763
+ ]
764
+ # the certificate always reports the tightest bound
765
+ ub = min(c for c, _ in ub_candidates)
766
+ if cfg.use_kappa:
767
+ slack = cfg.kappa_slack_frac * cfg.epsilon
768
+ eligible = [p for c, p in ub_candidates if c <= ub + slack]
769
+ incumbent = max(eligible, key=self._kappa_score)
770
+ else:
771
+ incumbent = min(ub_candidates, key=lambda x: x[0])[1]
772
+ incumbent_edges = path_edges(incumbent)
773
+ if cfg.use_kappa:
774
+ decay = cfg.kappa_decay
775
+ for e in self._kappa:
776
+ self._kappa[e] *= decay
777
+ for e in incumbent_edges:
778
+ self._kappa[e] = self._kappa.get(e, 0.0) + 1.0
779
+ if set(incumbent_edges) != set(path_edges(self._prev_incumbent)):
780
+ self._incumbent_since = self.t # freshness gate resets (T4)
781
+ self._prev_incumbent = list(incumbent) if incumbent else []
782
+
783
+ # Churn set (T7): edges recently on the optimistic path; the floor
784
+ # and the sensing rotation must cover this set, not just today's path
785
+ for e in lb_edges:
786
+ self._churn_seen[e] = self._round_idx
787
+ cutoff = self._round_idx - cfg.churn_window
788
+ self._churn_seen = {
789
+ e: r for e, r in self._churn_seen.items() if r >= cutoff
790
+ }
791
+ churn_edges = list(self._churn_seen)
792
+ K = max(len(churn_edges), L, 1)
793
+
794
+ # Step 4: churn-aware T2' certifiability floor (T7): round-robin over
795
+ # the K-edge churn set at rate k bounds every current-path age by
796
+ # (K-1)*Delta/k, so the sustainable floor uses K, not L
797
+ rho_bar = max((self.beliefs[e].rho for e in lb_edges), default=0.0)
798
+ k_now = 1
799
+ eps_floor = 2 * L * q_eff + 2 * rho_bar * cfg.delta * L * (K - 1) / k_now
800
+ attainable = cfg.epsilon >= eps_floor and math.isfinite(q)
801
+
802
+ # Confidence: 1 - alpha_claim - sum of Delta_stale over the certifying
803
+ # path. The CLAIM is the annealed level (>= alpha_prime; equals it
804
+ # once the buffer supports the target) — never ACI's working alpha,
805
+ # which only modulates interval width.
806
+ d_stale = self.scorer.delta_stale(self.t)
807
+ stale_total = L * d_stale
808
+ if sum_aware_L:
809
+ # T4's UB-side staleness is the BLOCK-level term (audit GAP-B;
810
+ # block_delta_stale was dead code): charge the larger of the two
811
+ # accountings — conservative, hence sound
812
+ stale_total = max(
813
+ stale_total,
814
+ self.cfg.latent_margin
815
+ * self.scorer.block_delta_stale(self.t, sum_aware_L),
816
+ )
817
+ alpha_claim = getattr(self, "_alpha_claim", self.cfg.alpha_prime)
818
+ alpha_claim += sum(self._edge_alpha_extra.get(e, 0.0) for e in lb_edges)
819
+ confidence = (
820
+ max(0.0, 1.0 - alpha_claim - stale_total)
821
+ if math.isfinite(q)
822
+ else 0.0
823
+ )
824
+
825
+ cert = Certificate(
826
+ lb=lb if p_lb is not None else math.inf,
827
+ ub=ub,
828
+ confidence=confidence,
829
+ path=incumbent or [],
830
+ epsilon_attainable=attainable,
831
+ epsilon_floor=eps_floor,
832
+ )
833
+
834
+ # Step 5-6: sense unless certified; certified rounds still perform
835
+ # maintenance sensing (projected-expiry + calibration-freshness floor),
836
+ # otherwise the buffer ages and the claim self-extinguishes even in a
837
+ # static world (observed in Tier-0).
838
+ sensed: Edge | None = None
839
+ # certification requires the claim to have annealed past the floor:
840
+ # a gap <= epsilon at confidence 0.1 must not stop sensing
841
+ certified = (
842
+ cert.valid
843
+ and cert.gap <= cfg.epsilon
844
+ and cert.confidence >= cfg.min_certify_confidence
845
+ )
846
+ maintain = False
847
+ if certified and p_lb is not None:
848
+ growth = 2.0 * cfg.delta * sum(self.beliefs[e].rho for e in lb_edges)
849
+ expiring = cert.gap + cfg.maintenance_lookahead * growth > cfg.epsilon
850
+ cal_floor = self._round_idx % max(cfg.maintenance_every, 1) == 0
851
+ maintain = expiring or cal_floor or cfg.refine_after_certify
852
+ # Adaptive rate (T2'): choose k so the sustainable floor
853
+ # 2*L*q + rho*Delta*L*(L-1)/k meets epsilon when possible.
854
+ n_sense = 1
855
+ if (
856
+ cfg.adaptive_rate
857
+ and math.isfinite(q)
858
+ and not certified
859
+ and cfg.sensing_policy == "cert"
860
+ ):
861
+ noise_floor = 2 * L * q_eff
862
+ if cfg.epsilon > noise_floor and rho_bar > 0:
863
+ k_needed = math.ceil(
864
+ 2 * rho_bar * cfg.delta * L * (K - 1)
865
+ / max(cfg.epsilon - noise_floor, 1e-9)
866
+ )
867
+ if k_needed <= cfg.max_sense_per_round:
868
+ n_sense = max(1, k_needed)
869
+ # else: epsilon unattainable even at max rate — do not burn
870
+ # budget chasing it (T2' says no rate <= max can sustain it)
871
+ # gap-stall feedback: the floor formula assumes a fixed path, but
872
+ # optimism attracts the LB to the stalest region and the target
873
+ # churns; when the gap visibly stalls above epsilon, raise the
874
+ # rate (bounded by max_sense_per_round)
875
+ if cert.gap >= self._last_gap - 1e-9:
876
+ self._stall += 1
877
+ else:
878
+ self._stall = 0
879
+ n_sense = min(
880
+ cfg.max_sense_per_round, n_sense + self._stall // 5
881
+ )
882
+ self._last_gap = cert.gap if math.isfinite(cert.gap) else self._last_gap
883
+
884
+ sensed_list: list[Edge] = []
885
+ alt: set[Edge] | None = None
886
+ for i in range(n_sense):
887
+ pick: Edge | None = None
888
+ if cfg.sensing_policy != "cert":
889
+ if not certified:
890
+ mean_graph = None
891
+ if cfg.sensing_policy == "voi":
892
+ mean_graph = self._mean_graph()
893
+ pick = baseline_select(
894
+ cfg.sensing_policy, self.beliefs, self.t, self._rng,
895
+ mean_graph=mean_graph, start=self.start, goal=self.goal,
896
+ )
897
+ elif (not certified or maintain) and p_lb is not None and sense_edges:
898
+ if not math.isfinite(q):
899
+ # Warm-up: alternate MAPPING (round-robin the optimistic
900
+ # path) with CALIBRATION (re-observe the oldest already-
901
+ # observed edge — only repeat observations form scores).
902
+ # Without the alternation, unknown-terrain warm-up chases
903
+ # the churning P_lb onto first-touch edges and the buffer
904
+ # starves (measured: 26 scores from 120 observations).
905
+ seen = [
906
+ e for e, b in self.beliefs.items() if b.observed
907
+ ]
908
+ if (self._round_idx + i) % 2 == 1 and seen:
909
+ pick = max(seen, key=lambda e: self.beliefs[e].age(self.t))
910
+ else:
911
+ pick = sense_edges[(self._round_idx + i) % len(sense_edges)]
912
+ elif (cfg.hybrid_sensing and not attainable) or (
913
+ cfg.refine_after_certify and certified):
914
+ # objective-matched: epsilon unattainable -> VOI on the
915
+ # expected-best route (departure quality is the objective)
916
+ if alt is None: # latch the mean graph once per round
917
+ self._mean_graph_round = self._mean_graph()
918
+ alt = set()
919
+ mean_graph = self._mean_graph_round
920
+ pick = baseline_select(
921
+ "voi", self.beliefs, self.t, self._rng,
922
+ mean_graph=mean_graph, start=self.start, goal=self.goal,
923
+ )
924
+ else:
925
+ if cfg.adaptive_rate:
926
+ # Focused mode, churn-measured (T7): focused sensing
927
+ # SUPPRESSES churn (measured: K 59 -> 11 ~ L) — far
928
+ # better than rotating over the churn set, which
929
+ # spreads observations thin (same cert%, +20% spend).
930
+ # K still feeds the floor and the rate honestly.
931
+ pick = select_observation(
932
+ self.beliefs, sense_edges, [], set(),
933
+ q_eff, self.t,
934
+ backstop_age=cfg.backstop_slack * L * cfg.delta,
935
+ )
936
+ else:
937
+ if alt is None:
938
+ alt = near_optimal_alternatives(
939
+ self._graph_lower_with(lo), self.start,
940
+ self.goal, lb, k=cfg.k_alternatives,
941
+ delta_subopt=cfg.delta_subopt,
942
+ )
943
+ pick = select_observation(
944
+ self.beliefs, sense_edges, ub_edges, alt,
945
+ q_eff, self.t,
946
+ backstop_age=cfg.backstop_slack * L * cfg.delta,
947
+ )
948
+ if pick is None and maintain:
949
+ # static-world maintenance: zero gap-recovery, but the
950
+ # calibration buffer still needs fresh residuals
951
+ pick = max(
952
+ sense_edges, key=lambda e: self.beliefs[e].age(self.t)
953
+ )
954
+ if pick is None:
955
+ break
956
+ # Observe, score, ACI feedback, belief update. The err event uses
957
+ # the UNCLIPPED interval (T1a observable semantics): the cost-floor
958
+ # clip is justified by latent positivity (c > 0) and is sound
959
+ # inside the search metrics, but the observable y = c + eta can be
960
+ # negative under heavy-tailed noise — testing observables against
961
+ # clipped bounds manufactures spurious miscoverage.
962
+ b_pre = self.beliefs[pick]
963
+ was_observed = b_pre.observed
964
+ half = q_eff + b_pre.rho * b_pre.age(self.t)
965
+ lo_obs, up_obs = b_pre.c_hat - half, b_pre.c_hat + half
966
+ obs = self.ingest_observation(pick)
967
+ covered = lo_obs - 1e-12 <= obs <= up_obs + 1e-12
968
+ if math.isfinite(q) and was_observed:
969
+ self.aci.update(err=not covered)
970
+ self.sense_spend += self.beliefs[pick].sense_cost
971
+ sensed_list.append(pick)
972
+ self._round_idx += 1
973
+ sensed = sensed_list[0] if sensed_list else None
974
+
975
+ self.t += cfg.delta
976
+ return cert, sensed
977
+
978
+ def _mean_graph(self) -> dict[Node, dict[Node, float]]:
979
+ """Point-estimate adjacency (max(c_hat, cost_floor)) for VOI sensing.
980
+ Cached by beliefs-version: rebuilt only when an observation changed a
981
+ c_hat since the last build (the dict-of-dicts is O(|E|) to construct)."""
982
+ if (getattr(self, "_mean_graph_version", None) == self._beliefs_version
983
+ and getattr(self, "_mean_graph_cache", None) is not None):
984
+ return self._mean_graph_cache
985
+ floor = self.cfg.cost_floor
986
+ beliefs = self.beliefs
987
+ mg: dict[Node, dict[Node, float]] = {}
988
+ for u, nbrs in self._graph_lower_cache.items():
989
+ mg[u] = {v: max(beliefs[(u, v)].c_hat, floor) for v in nbrs}
990
+ self._mean_graph_cache = mg
991
+ self._mean_graph_version = self._beliefs_version
992
+ return mg
993
+
994
+ def _graph_lower_with(self, lo: dict[Edge, float]) -> dict[Node, dict[Node, float]]:
995
+ for (u, v), c in lo.items():
996
+ self._graph_lower_cache[u][v] = c
997
+ return self._graph_lower_cache
998
+
999
+ def ingest_observation(self, e: Edge) -> float:
1000
+ """Observe edge e now and absorb it: drift-adjusted nonconformity
1001
+ score into the calibration buffer (theory note: the deterministic
1002
+ widening is removed so scores stay ~exchangeable under A1), belief
1003
+ update projected into the feasible set, metric-cache expiry. Used by
1004
+ sensing (paid) and by traversal (free observation while moving)."""
1005
+ b = self.beliefs[e]
1006
+ obs = self.world.observe(e, self.t)
1007
+ if self.predictor is not None and b.observed:
1008
+ pred = self.predictor(e, self.t, self.beliefs)
1009
+ if pred is not None:
1010
+ self.binned.push(abs(obs - pred), self.t, b.age(self.t))
1011
+ old_count = self._obs_count.get(e, 1 if b.observed else 0)
1012
+ self._obs_count[e] = old_count + 1
1013
+ # A score is only a valid noise-pair score when a real previous
1014
+ # observation exists (never against a prior). Thinned mode keeps only
1015
+ # disjoint pairs: the 2nd, 4th, ... observation of each edge.
1016
+ if b.observed and (
1017
+ not self.cfg.thinned_scores or self._obs_count[e] % 2 == 0
1018
+ ):
1019
+ score = abs(obs - b.c_hat) - b.rho * b.age(self.t)
1020
+ self.scorer.push(score, self.t)
1021
+ self.scorer.push_signed(obs - b.c_hat, self.t)
1022
+ self.cal_rho_a_max = max(self.cal_rho_a_max, b.rho * b.age(self.t))
1023
+ age = b.age(self.t)
1024
+ if self.cfg.rho_mode == "online" and age >= self.cfg.delta:
1025
+ self._rate_samples.append(abs(obs - b.c_hat) / age)
1026
+ if len(self._rate_samples) > 2000:
1027
+ del self._rate_samples[0]
1028
+ b.c_hat = max(obs, self.cfg.cost_floor)
1029
+ b.t_obs = self.t
1030
+ b.observed = True
1031
+ i = self._edge_idx[e]
1032
+ self._arr_chat[i] = b.c_hat
1033
+ self._arr_tobs[i] = b.t_obs
1034
+ self._arr_obs[i] = True
1035
+ self._arr_due[i] = self.t # expire alongside _cache_due
1036
+ self._beliefs_version += 1 # invalidates the cached snapshot gate
1037
+ self._cache_due[e] = self.t # expire: fresh metric next round
1038
+ return obs
1039
+
1040
+ def _kappa_score(self, path: list[Node]) -> float:
1041
+ """Mean conductivity over a path's edges (mean, not sum, so longer
1042
+ paths are not favored merely for having more reinforced edges)."""
1043
+ edges = path_edges(path)
1044
+ if not edges:
1045
+ return 0.0
1046
+ return sum(self._kappa.get(e, 0.0) for e in edges) / len(edges)
1047
+
1048
+ def _trimmed_prev_incumbent(self) -> list[Node] | None:
1049
+ """Previous incumbent re-rooted at the current start, or None if the
1050
+ start is no longer on it (it is then not a valid s-g path)."""
1051
+ p = self._prev_incumbent
1052
+ if not p or p[-1] != self.goal:
1053
+ return None
1054
+ try:
1055
+ i = p.index(self.start)
1056
+ except ValueError:
1057
+ return None
1058
+ trimmed = p[i:]
1059
+ return trimmed if len(trimmed) >= 2 else None
1060
+
1061
+ def snapshot_query(self, s: Node, g: Node, tau: float):
1062
+ """Certified O(1) route query via certificate-gated preprocessing.
1063
+
1064
+ Gate: for every edge, the CURRENT interval fits inside the snapshot
1065
+ estimate +/- tau (|c_hat_now - c_hat_snap| + lambda*q + rho*a <= tau).
1066
+ On the coverage event this puts every true cost within tau of the
1067
+ snapshot costs, so the returned (snapshot-optimal) path's true cost
1068
+ is within |P|*tau of its reported cost and within 2|P|*tau of the
1069
+ true optimum. Returns dict(path, cost, slack, confidence) or None
1070
+ when the gate is closed (the oracle then needs a rebuild or the map
1071
+ is genuinely too uncertain — fall back to round()).
1072
+ """
1073
+ import numpy as _np
1074
+ from certflow.fastgraph import FlatGraph
1075
+ from certflow.snapshot import SnapshotOracle
1076
+
1077
+ # the gate verdict is constant within a planner round: cache it
1078
+ stamp = (self._round_idx, self.t, tau, self._beliefs_version)
1079
+ if getattr(self, "_gate_stamp", None) == stamp:
1080
+ if not self._gate_ok:
1081
+ return None
1082
+ return self._answer_query(s, g, tau)
1083
+
1084
+ # re-anneal at query time: weighted mass decays between rounds, so
1085
+ # the stored per-edge level can fall just below the supportable floor
1086
+ mass = self.scorer.effective_mass(self.t)
1087
+ if mass <= 0:
1088
+ return None
1089
+ alpha_edge_q = max(
1090
+ getattr(self, "_last_alpha_edge", self._alpha_prime_eff),
1091
+ (1.0 + 1e-6) / (mass + 1.0),
1092
+ )
1093
+ q = self.scorer.quantile(alpha_edge_q, self.t)
1094
+ if not math.isfinite(q):
1095
+ return None
1096
+ half = (self.cfg.latent_margin * q
1097
+ + self._arr_rho * (self.t - self._arr_tobs))
1098
+ if not self._arr_obs.all():
1099
+ self._gate_stamp = stamp
1100
+ self._gate_ok = False
1101
+ return None
1102
+ if self._oracle is None or self._oracle_chat_snap is None:
1103
+ drift_ok = half <= tau # fresh build: snap == now
1104
+ else:
1105
+ drift_ok = (_np.abs(self._arr_chat - self._oracle_chat_snap)
1106
+ + half) <= tau
1107
+ if not bool(drift_ok.all()):
1108
+ # widths/drift exceed tau on some edge: snapshot (if any) expires
1109
+ if self._oracle is not None:
1110
+ self._oracle.invalidate()
1111
+ self._oracle_chat_snap = None
1112
+ # rebuild is allowed only when the CURRENT map fits the gate
1113
+ if not bool((half <= tau).all()):
1114
+ self._gate_stamp = stamp
1115
+ self._gate_ok = False
1116
+ return None
1117
+ if self._oracle is None or not self._oracle.ready:
1118
+ if self._flat_mid is None:
1119
+ self._flat_mid = FlatGraph(
1120
+ self._graph_lower_cache,
1121
+ extra_nodes=(self.start, self.goal))
1122
+ ix = self._flat_mid.index_of
1123
+ self._slots_mid = _np.array(
1124
+ [self._flat_mid.slot_of(ix[u], ix[v])
1125
+ for u, v in self._edge_order], dtype=_np.int64)
1126
+ self._flat_mid.cost[self._slots_mid] = self._arr_chat
1127
+ self._oracle = self._oracle or SnapshotOracle(self._flat_mid)
1128
+ self._oracle.build(self.t)
1129
+ self._oracle_chat_snap = self._arr_chat.copy()
1130
+ self._gate_stamp = stamp
1131
+ self._gate_ok = True
1132
+ self._gate_alpha_edge = alpha_edge_q
1133
+ self._gate_dstale = self.scorer.delta_stale(self.t)
1134
+ return self._answer_query(s, g, tau)
1135
+
1136
+ def _answer_query(self, s: Node, g: Node, tau: float):
1137
+ ix = self._flat_mid.index_of
1138
+ si, gi = ix.get(s), ix.get(g)
1139
+ if si is None or gi is None:
1140
+ return None
1141
+ pi = self._oracle.path(si, gi)
1142
+ if pi is None:
1143
+ return None
1144
+ path = [self._flat_mid.node_of(i) for i in pi]
1145
+ cost = self._oracle.cost(si, gi)
1146
+ L_p = len(path) - 1
1147
+ alpha_claim = max(
1148
+ getattr(self, "_alpha_claim", self._alpha_prime_eff),
1149
+ min(1.0, L_p * self._gate_alpha_edge),
1150
+ )
1151
+ return dict(
1152
+ path=path, cost=cost, slack=L_p * tau,
1153
+ opt_slack=2 * L_p * tau,
1154
+ confidence=path_confidence(
1155
+ alpha_claim, [self._gate_dstale] * max(L_p, 1)),
1156
+ )
1157
+
1158
+ def retarget(self, start: Node, goal: Node) -> None:
1159
+ """New mission in the same environment (lifelong operation): keep the
1160
+ learned memory — beliefs, calibration buffer, ACI state, kappa — and
1161
+ drop mission-specific state (incumbent, sensing target, gap-stall).
1162
+ Searches are rebuilt from scratch at the new endpoints (a global
1163
+ change; scratch beats repair)."""
1164
+ self.start, self.goal = start, goal
1165
+ self._prev_incumbent = []
1166
+ self._p_sense = []
1167
+ self._incumbent_since = self.t
1168
+ self._last_gap = math.inf
1169
+ self._stall = 0
1170
+ if hasattr(self, "_last_L"):
1171
+ del self._last_L
1172
+ if self._cache_lo:
1173
+ self._rebuild_searches()
1174
+ else: # retarget before any round: warm-up metrics, fresh engines
1175
+ self.sp_lower = FastDStarLite(
1176
+ self._graph_lower_cache, start, goal, flat=self._flat_lo)
1177
+ self.sp_upper = FastDStarLite(
1178
+ self._to_adj(set(self._graph_lower_cache),
1179
+ {e: _UB_CAP for e in self.beliefs}),
1180
+ start, goal, flat=self._flat_up)
1181
+
1182
+ def advance_start(self, node: Node) -> None:
1183
+ """Robot moved: shift both searches' start (D* Lite km offset)."""
1184
+ self.start = node
1185
+ self.sp_lower.set_start(node)
1186
+ self.sp_upper.set_start(node)