certflow 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- certflow/__init__.py +40 -0
- certflow/baselines.py +279 -0
- certflow/cert.py +1186 -0
- certflow/ch.py +1348 -0
- certflow/conformal.py +275 -0
- certflow/drift.py +472 -0
- certflow/egraph.py +371 -0
- certflow/episodes.py +176 -0
- certflow/fastgraph.py +1032 -0
- certflow/graphcore.py +271 -0
- certflow/harness.py +534 -0
- certflow/movingai.py +673 -0
- certflow/oracle.py +222 -0
- certflow/realworld.py +375 -0
- certflow/roadnet.py +585 -0
- certflow/sensing.py +121 -0
- certflow/snapshot.py +167 -0
- certflow/types.py +119 -0
- certflow-1.0.1.dist-info/METADATA +199 -0
- certflow-1.0.1.dist-info/RECORD +22 -0
- certflow-1.0.1.dist-info/WHEEL +4 -0
- certflow-1.0.1.dist-info/licenses/LICENSE +21 -0
certflow/cert.py
ADDED
|
@@ -0,0 +1,1186 @@
|
|
|
1
|
+
"""CERT main loop: the 8-step replanning round of spec section 4.3.
|
|
2
|
+
|
|
3
|
+
Integrates graphcore (dual incremental searches), conformal (certificate
|
|
4
|
+
substrate), and sensing (route-critical observation selection). The planner
|
|
5
|
+
never sees true costs; it interacts with the world only through observe().
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import math
|
|
10
|
+
import random
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from certflow.conformal import (
|
|
14
|
+
ACITracker,
|
|
15
|
+
AgeBinnedScorer,
|
|
16
|
+
ConformalScorer,
|
|
17
|
+
path_alpha_edge,
|
|
18
|
+
path_confidence,
|
|
19
|
+
)
|
|
20
|
+
from certflow.fastgraph import FastDStarLite, FlatGraph
|
|
21
|
+
from certflow.sensing import baseline_select, near_optimal_alternatives, path_edges, select_observation
|
|
22
|
+
from certflow.types import Certificate, Edge, EdgeBelief, Node, World
|
|
23
|
+
|
|
24
|
+
# Finite upper-cost cap for unbounded edges. An UNOBSERVED edge (or warm-up
|
|
25
|
+
# u-cost at q=inf) has no coverage theorem pricing its upper bound, so it is
|
|
26
|
+
# conceptually +inf; D* Lite needs strictly finite, positive costs to maintain
|
|
27
|
+
# its g/rhs invariants, so we cap at this sentinel. It must dominate any real
|
|
28
|
+
# path cost (so capped edges are never chosen unless unavoidable) yet stay well
|
|
29
|
+
# below float overflow when summed over a path.
|
|
30
|
+
_UB_CAP = 1e9
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class PlannerConfig:
|
|
35
|
+
epsilon: float = 5.0 # target certificate gap
|
|
36
|
+
alpha_prime: float = 0.1 # path-level miscoverage target
|
|
37
|
+
rho_w: float = 0.99 # conformal weight decay per unit time
|
|
38
|
+
eps_tv: float = 0.0 # A2 TV-Lipschitz rate (0 = exchangeable claim)
|
|
39
|
+
gamma_aci: float = 0.005 # ACI step size
|
|
40
|
+
delta: float = 1.0 # sensing period (time units per round)
|
|
41
|
+
rho_hat_over_rho: float = 1.0 # drift misspecification factor (A1 sweep)
|
|
42
|
+
sense_cost: float = 0.1 # uniform m_e for v1
|
|
43
|
+
k_alternatives: int = 3
|
|
44
|
+
delta_subopt: float = 0.1
|
|
45
|
+
backstop_slack: float = 1.5 # backstop_age = slack * L * delta
|
|
46
|
+
cost_floor: float = 1e-3
|
|
47
|
+
# Maintenance sensing while certified (T2': certification is sustained by
|
|
48
|
+
# a sensing rate, not achieved once). lookahead: sense when the projected
|
|
49
|
+
# gap crosses epsilon within this many rounds. every: calibration-freshness
|
|
50
|
+
# floor, sense at least once per this many certified rounds.
|
|
51
|
+
maintenance_lookahead: float = 2.0
|
|
52
|
+
maintenance_every: int = 5
|
|
53
|
+
# kappa corridor hysteresis (Design 1): among incumbent candidates whose
|
|
54
|
+
# u-cost is within kappa_slack_frac*epsilon of the tightest UB, execute the
|
|
55
|
+
# one with the highest mean edge-conductivity instead of the raw argmin.
|
|
56
|
+
# UB itself is always the tightest bound, so the certificate is untouched;
|
|
57
|
+
# execution suboptimality stays bounded by gap + slack. kappa is reinforced
|
|
58
|
+
# on the executed incumbent and decays geometrically each round.
|
|
59
|
+
use_kappa: bool = False
|
|
60
|
+
kappa_decay: float = 0.95
|
|
61
|
+
kappa_slack_frac: float = 0.5
|
|
62
|
+
|
|
63
|
+
# Sensing policy: "cert" (gap-shrink VOI + backstop, the contribution),
|
|
64
|
+
# or Tier-2 baselines: "random", "max_age" (global freshness round-robin),
|
|
65
|
+
# "max_width" (global info-gain proxy, not route-critical), "none".
|
|
66
|
+
sensing_policy: str = "cert"
|
|
67
|
+
|
|
68
|
+
# Unknown-terrain start (Tier-2): skip the t0 survey; every edge begins
|
|
69
|
+
# at a weak prior with a large age, so intervals start wide and sensing
|
|
70
|
+
# allocation actually matters.
|
|
71
|
+
initial_survey: bool = True
|
|
72
|
+
prior_cost: float = 1.0
|
|
73
|
+
prior_age: float = 200.0
|
|
74
|
+
|
|
75
|
+
# Margin factor lambda (paper/theory.tex): 1.0 = observable-coverage
|
|
76
|
+
# semantics (T1a, the empirically-validated default); 2.0 = provable
|
|
77
|
+
# latent-cost coverage (T1b). Scales the conformal quantile everywhere.
|
|
78
|
+
latent_margin: float = 1.0
|
|
79
|
+
|
|
80
|
+
# Thinned calibration (theory.tex, honest accounting item 1): consecutive
|
|
81
|
+
# scores on the same edge share a noise draw (one-dependent). Thinning
|
|
82
|
+
# keeps only scores from disjoint observation pairs (2nd, 4th, ... obs of
|
|
83
|
+
# each edge), restoring independence at a factor-2 sample cost. Part of
|
|
84
|
+
# the provable mode together with latent_margin=2.
|
|
85
|
+
thinned_scores: bool = False
|
|
86
|
+
|
|
87
|
+
# ACI adapts the working alpha from realized edge-coverage events. It
|
|
88
|
+
# CANCELS static margins (with lambda=2, errs vanish, alpha climbs, q
|
|
89
|
+
# shrinks until errs return to target) — so the provable T1b mode must
|
|
90
|
+
# freeze it: use_aci=False pins the working level at alpha_prime and the
|
|
91
|
+
# quantile is the raw weighted-conformal quantile the theorem assumes.
|
|
92
|
+
use_aci: bool = True
|
|
93
|
+
|
|
94
|
+
# Sum-aware upper certificate (theory.tex T4): replace the incumbent's
|
|
95
|
+
# Bonferroni UB value (sum of per-edge u_e, margin ~ L*q_{a'/L}) with
|
|
96
|
+
# sum(c_hat) + block-quantile margin at level alpha' (~ sqrt(L)*q) +
|
|
97
|
+
# sum(rho*a). UB side only — the LB must hold uniformly over paths and
|
|
98
|
+
# keeps its per-edge construction. Tightens the gap and the T2' floor.
|
|
99
|
+
sum_aware_ub: bool = False
|
|
100
|
+
|
|
101
|
+
# Alpha annealing: report the best currently-supportable claim instead of
|
|
102
|
+
# INVALID during warm-up. The effective sample size m floors the per-edge
|
|
103
|
+
# level at 1/(m+1); the path level anneals from coarse to the target as
|
|
104
|
+
# evidence accrues. Claims always state the annealed (weaker) level, so
|
|
105
|
+
# nothing is overclaimed; certification additionally requires the claim
|
|
106
|
+
# to have reached min_certify_confidence (never stop sensing on a weak
|
|
107
|
+
# claim).
|
|
108
|
+
anneal_alpha: bool = True
|
|
109
|
+
min_certify_confidence: float = 0.5
|
|
110
|
+
|
|
111
|
+
# Adaptive sensing rate (T2'): sense k <= max_sense_per_round edges per
|
|
112
|
+
# round, with k chosen so the sustained gap floor 2*L*q + rho*Delta*
|
|
113
|
+
# L*(L-1)/k meets epsilon when possible. Also focuses sensing on P_lb
|
|
114
|
+
# (gap decomposition) and adapts the pre-widening horizon B so the cache
|
|
115
|
+
# spends at most prewiden_slack_frac of the epsilon-slack on width
|
|
116
|
+
# (at high drift B drops to 0: exact metrics, slower, certifiable).
|
|
117
|
+
# Off by default (changes spend and latency semantics).
|
|
118
|
+
adaptive_rate: bool = False
|
|
119
|
+
max_sense_per_round: int = 4
|
|
120
|
+
prewiden_slack_frac: float = 0.25
|
|
121
|
+
|
|
122
|
+
# Objective-matched sensing: when T2' says epsilon is unattainable at the
|
|
123
|
+
# current rate, certificate-gap sensing buys nothing — spend observations
|
|
124
|
+
# on the EXPECTED-best route instead (VOI), which is what determines
|
|
125
|
+
# departure quality; switch back to gap-directed sensing when epsilon is
|
|
126
|
+
# attainable. Measured: matches the VOI baseline's regret (5x better than
|
|
127
|
+
# pure gap sensing in unattainable regimes) while keeping certificate
|
|
128
|
+
# behavior where certification is possible.
|
|
129
|
+
hybrid_sensing: bool = False
|
|
130
|
+
|
|
131
|
+
# Online drift-rate estimation: rho_mode="online" frees the planner from
|
|
132
|
+
# a world-supplied rho. Pooled rate samples |obs - c_hat_prev| / age from
|
|
133
|
+
# re-observations (noise inflates them -> conservative); rho_hat is their
|
|
134
|
+
# rho_online_quantile. Until the estimator warms, rho ~ 0 and the
|
|
135
|
+
# conformal layer absorbs unmodeled drift into the scores (validated on
|
|
136
|
+
# real traffic at up to 49% A1-violation rates: width cost, not coverage).
|
|
137
|
+
rho_mode: str = "given" # "given" | "online"
|
|
138
|
+
|
|
139
|
+
# Stabilized sensing target (the P_lb-churn factor): under drift the
|
|
140
|
+
# optimistic path is a moving target and focused sensing chases it,
|
|
141
|
+
# leaving realized gaps ~2x above the T2' floor. Keep sensing the SAME
|
|
142
|
+
# path while its ell-cost stays within (1+tol) of LB; the gap bound pays
|
|
143
|
+
# at most tol*LB extra (u(P_s) - ell(P_s) + [ell(P_s) - LB]) and the
|
|
144
|
+
# ages on the stable target obey the round-robin analysis.
|
|
145
|
+
stabilize_sensing: bool = False
|
|
146
|
+
sense_path_tol: float = 0.1
|
|
147
|
+
|
|
148
|
+
# Churn-aware certification (T7): the optimistic path hops over a CHURN
|
|
149
|
+
# SET of K >= L edges under drift; the T2' floor and the sensing target
|
|
150
|
+
# must use K, not the instantaneous path length, or realized gaps run
|
|
151
|
+
# ~K/L above the floor (the measured ~1.6x residual). K is tracked over
|
|
152
|
+
# a sliding window and reported; adaptive k solves the K-floor.
|
|
153
|
+
churn_window: int = 50
|
|
154
|
+
|
|
155
|
+
# Refine-after-certify: certification stops gap-sensing, but the
|
|
156
|
+
# certified incumbent can be far from optimal WITHIN epsilon (measured
|
|
157
|
+
# in lifelong runs: memory-carried incumbents certify at regret 0.4-0.6
|
|
158
|
+
# vs 0.025 for fresh exploration). When on, certified rounds keep
|
|
159
|
+
# sensing the EXPECTED-best route (VOI) to improve the incumbent; the
|
|
160
|
+
# certificate is untouched (sensing only ever tightens it).
|
|
161
|
+
refine_after_certify: bool = False
|
|
162
|
+
|
|
163
|
+
# Strict LB level (theory GAP-A): the lower bound must cover the UNKNOWN
|
|
164
|
+
# optimum's edges, whose count can exceed |P_lb|; the airtight per-edge
|
|
165
|
+
# level divides by (|V|-1), not L. Off by default (the deployed planner
|
|
166
|
+
# operates at alpha'/L, validated against ground truth at 1.000 across
|
|
167
|
+
# all conditions — the conservatism slack absorbs the difference); ON in
|
|
168
|
+
# the provable recipe, where every constant must be theorem-exact.
|
|
169
|
+
strict_lb_alpha: bool = False
|
|
170
|
+
|
|
171
|
+
# Decision-uniform certificates: per-round claims are marginal; a robot
|
|
172
|
+
# that ACTS whenever certified peeks every round, and across T rounds the
|
|
173
|
+
# chance that SOME acted-on certificate failed exceeds alpha'. Full
|
|
174
|
+
# per-round time-uniformity is quantifiably impractical (stitched-DKW
|
|
175
|
+
# needs n >~ 63k scores at Bonferroni levels — theory.tex T6), but the
|
|
176
|
+
# certificate is only USED at decision instants (stop sensing, depart):
|
|
177
|
+
# alpha-spending over a decision budget gives simultaneous validity of
|
|
178
|
+
# ALL decisions at level alpha' for the width cost of alpha'/N_dec.
|
|
179
|
+
decision_uniform: bool = False
|
|
180
|
+
max_decisions: int = 5
|
|
181
|
+
|
|
182
|
+
# Predictor mode (spatial-predictor study): when a point predictor is
|
|
183
|
+
# supplied to the planner, edges older than predictor_age_gate*delta use
|
|
184
|
+
# (predicted center, LEARNED age-binned conformal width) instead of
|
|
185
|
+
# (last obs, q + rho*age). Per-edge fallback chain: prediction available
|
|
186
|
+
# AND its age-bin quantile supportable, else the model-based path —
|
|
187
|
+
# separate calibration buffers per model, so scores never mix regimes
|
|
188
|
+
# (assumption A4': within-bin exchangeability). Bin edges in delta units.
|
|
189
|
+
predictor_age_gate: float = 12.0
|
|
190
|
+
predictor_bins: tuple = (6.0, 12.0, 24.0, 48.0)
|
|
191
|
+
rho_online_quantile: float = 0.9
|
|
192
|
+
rho_online_min_samples: int = 10
|
|
193
|
+
|
|
194
|
+
# Lazy pre-widening (T3 locality): cache edge metrics at age + B*delta so
|
|
195
|
+
# they stay valid (conservatively wide) for B rounds and D* Lite repair
|
|
196
|
+
# touches ~|E|/B edges per round instead of all of them. Soundness:
|
|
197
|
+
# cached ell <= true ell and cached u >= true u throughout the window.
|
|
198
|
+
# Width cost: +2*rho*B*delta per edge. 0 disables (exact, slow).
|
|
199
|
+
prewiden_rounds: int = 10
|
|
200
|
+
|
|
201
|
+
# Staggered pre-widening (predictor-free vectorized path): per-edge horizon
|
|
202
|
+
# factors are drawn uniformly from [stagger_lo, stagger_hi] to spread cache
|
|
203
|
+
# expiries across rounds instead of all firing on one synchronized round.
|
|
204
|
+
# Soundness is independent of the spread — each entry's width is computed at
|
|
205
|
+
# ITS OWN horizon — so this is purely a latency-smoothing knob.
|
|
206
|
+
stagger_lo: float = 0.75
|
|
207
|
+
stagger_hi: float = 1.25
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def recommended_config(**overrides) -> "PlannerConfig":
|
|
211
|
+
"""The best-known configuration from the full ablation/benchmark program:
|
|
212
|
+
online drift estimation (coverage-neutral, 1.7-2.4x tighter gaps),
|
|
213
|
+
objective-matched hybrid sensing (regret matches/beats the strongest
|
|
214
|
+
baseline while keeping the certificate), kappa hysteresis (-70% churn),
|
|
215
|
+
adaptive rate + adaptive pre-widening, gated sum-aware UB. Annealing is
|
|
216
|
+
already the default. decision_uniform stays a claim-semantics choice."""
|
|
217
|
+
base = dict(
|
|
218
|
+
rho_mode="online",
|
|
219
|
+
hybrid_sensing=True,
|
|
220
|
+
use_kappa=True,
|
|
221
|
+
adaptive_rate=True,
|
|
222
|
+
sum_aware_ub=True,
|
|
223
|
+
)
|
|
224
|
+
base.update(overrides)
|
|
225
|
+
return PlannerConfig(**base)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class CertPlanner:
|
|
229
|
+
"""Holds beliefs, the two D* Lite instances, and the certificate state."""
|
|
230
|
+
|
|
231
|
+
def __init__(
|
|
232
|
+
self,
|
|
233
|
+
world: World,
|
|
234
|
+
start: Node,
|
|
235
|
+
goal: Node,
|
|
236
|
+
config: PlannerConfig,
|
|
237
|
+
t0: float = 0.0,
|
|
238
|
+
predictor=None,
|
|
239
|
+
) -> None:
|
|
240
|
+
self.cfg = config
|
|
241
|
+
self.world = world
|
|
242
|
+
if not config.anneal_alpha and config.rho_w < 1.0:
|
|
243
|
+
ess_cap = 1.0 / (1.0 - config.rho_w)
|
|
244
|
+
if ess_cap < 1.0 / config.alpha_prime:
|
|
245
|
+
import warnings
|
|
246
|
+
warnings.warn(
|
|
247
|
+
f"rho_w={config.rho_w} caps effective sample size at "
|
|
248
|
+
f"~{ess_cap:.0f} < 1/alpha_prime={1/config.alpha_prime:.0f}: "
|
|
249
|
+
"without annealing the certificate may never become valid",
|
|
250
|
+
stacklevel=2,
|
|
251
|
+
)
|
|
252
|
+
self.start = start
|
|
253
|
+
self.goal = goal
|
|
254
|
+
self.t = t0
|
|
255
|
+
|
|
256
|
+
# Initial survey: one observation per edge at t0 (spec: warm-up phase;
|
|
257
|
+
# the certificate stays INVALID until the calibration buffer fills).
|
|
258
|
+
# With initial_survey=False (unknown terrain), edges start at a weak
|
|
259
|
+
# prior with a large age instead.
|
|
260
|
+
self.beliefs: dict[Edge, EdgeBelief] = {}
|
|
261
|
+
for e in world.edges():
|
|
262
|
+
if config.initial_survey:
|
|
263
|
+
c0, t_obs0, seen = max(world.observe(e, t0), config.cost_floor), t0, True
|
|
264
|
+
else:
|
|
265
|
+
c0, t_obs0, seen = config.prior_cost, t0 - config.prior_age, False
|
|
266
|
+
self.beliefs[e] = EdgeBelief(
|
|
267
|
+
c_hat=c0,
|
|
268
|
+
t_obs=t_obs0,
|
|
269
|
+
rho=self._rho_hat(e),
|
|
270
|
+
sense_cost=config.sense_cost,
|
|
271
|
+
observed=seen,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
self.scorer = ConformalScorer(rho_w=config.rho_w, eps_tv=config.eps_tv)
|
|
275
|
+
self.predictor = predictor
|
|
276
|
+
self.binned = AgeBinnedScorer(
|
|
277
|
+
bin_edges=tuple(b * config.delta for b in config.predictor_bins),
|
|
278
|
+
rho_w=config.rho_w, eps_tv=config.eps_tv,
|
|
279
|
+
)
|
|
280
|
+
self.pred_used_rounds = 0 # diagnostic: edges priced by the predictor
|
|
281
|
+
self._edge_alpha_extra: dict[Edge, float] = {} # per-bin annealing charge
|
|
282
|
+
self.aci = ACITracker(alpha_target=config.alpha_prime, gamma=config.gamma_aci)
|
|
283
|
+
self.sense_spend = 0.0
|
|
284
|
+
self._round_idx = 0
|
|
285
|
+
self._obs_count: dict[Edge, int] = {} # real observations per edge
|
|
286
|
+
self._rate_samples: list[float] = [] # online rho: |dc|/age samples
|
|
287
|
+
self._rho_online = 1e-9
|
|
288
|
+
self._last_gap = math.inf # gap-stall feedback for k
|
|
289
|
+
self._stall = 0
|
|
290
|
+
self._churn_seen: dict[Edge, int] = {} # edge -> last round on P_lb
|
|
291
|
+
self.cal_rho_a_max = 0.0 # max rho_e*a_e among pushed scores (pi_cal diagnostic)
|
|
292
|
+
|
|
293
|
+
# lazy pre-widening cache (see PlannerConfig.prewiden_rounds)
|
|
294
|
+
self._cache_lo: dict[Edge, float] = {}
|
|
295
|
+
self._cache_up: dict[Edge, float] = {}
|
|
296
|
+
self._cache_due: dict[Edge, float] = {} # absolute expiry time
|
|
297
|
+
self._cache_q: float = -1.0 # q the cache was built with
|
|
298
|
+
|
|
299
|
+
# certified snapshot oracle (snapshot.py): built on point estimates
|
|
300
|
+
# when the certificate proves the map tight; O(1) queries thereafter
|
|
301
|
+
self._oracle = None
|
|
302
|
+
self._oracle_chat_snap = None
|
|
303
|
+
self._flat_mid = None
|
|
304
|
+
self._beliefs_version = 0
|
|
305
|
+
|
|
306
|
+
# kappa corridor hysteresis state (see PlannerConfig.use_kappa)
|
|
307
|
+
self._p_sense: list[Node] = [] # stabilized sensing target path
|
|
308
|
+
self._kappa: dict[Edge, float] = {}
|
|
309
|
+
self._prev_incumbent: list[Node] = []
|
|
310
|
+
self._incumbent_since = t0 # when the incumbent edge-set last changed
|
|
311
|
+
self._rng = random.Random(0) # baseline sensing policies only
|
|
312
|
+
|
|
313
|
+
nodes = set(world.graph) | {v for n in world.graph for v in world.graph[n]}
|
|
314
|
+
lo, up = self._metrics(q=math.inf) # warm-up: q=inf -> ell at floor, u at inf
|
|
315
|
+
# D* Lite needs finite costs; cap warm-up upper costs.
|
|
316
|
+
up = {e: min(c, _UB_CAP) for e, c in up.items()}
|
|
317
|
+
adj_lo = self._to_adj(nodes, lo)
|
|
318
|
+
adj_up = self._to_adj(nodes, up)
|
|
319
|
+
self._flat_lo = FlatGraph(adj_lo, extra_nodes=(start, goal))
|
|
320
|
+
self._flat_up = FlatGraph(adj_up, extra_nodes=(start, goal))
|
|
321
|
+
self.sp_lower = FastDStarLite(adj_lo, start, goal, flat=self._flat_lo)
|
|
322
|
+
self.sp_upper = FastDStarLite(adj_up, start, goal, flat=self._flat_up)
|
|
323
|
+
self._graph_lower_cache = adj_lo
|
|
324
|
+
# fixed edge order + CSR slots for vectorized cache->flat cost sync
|
|
325
|
+
# (the shared-flat constructor does NOT read costs from the adjacency,
|
|
326
|
+
# so scratch rebuilds must write the cache into the arrays themselves)
|
|
327
|
+
import numpy as _np
|
|
328
|
+
self._edge_order = list(self.beliefs)
|
|
329
|
+
ix_lo, ix_up = self._flat_lo.index_of, self._flat_up.index_of
|
|
330
|
+
self._slots_lo = _np.array(
|
|
331
|
+
[self._flat_lo.slot_of(ix_lo[u], ix_lo[v]) for u, v in self._edge_order],
|
|
332
|
+
dtype=_np.int64)
|
|
333
|
+
self._slots_up = _np.array(
|
|
334
|
+
[self._flat_up.slot_of(ix_up[u], ix_up[v]) for u, v in self._edge_order],
|
|
335
|
+
dtype=_np.int64)
|
|
336
|
+
# belief arrays in edge order (vectorized full-refresh: fast_metrics)
|
|
337
|
+
self._edge_idx = {e: i for i, e in enumerate(self._edge_order)}
|
|
338
|
+
self._arr_chat = _np.array(
|
|
339
|
+
[self.beliefs[e].c_hat for e in self._edge_order])
|
|
340
|
+
self._arr_tobs = _np.array(
|
|
341
|
+
[self.beliefs[e].t_obs for e in self._edge_order])
|
|
342
|
+
self._arr_rho = _np.array(
|
|
343
|
+
[self.beliefs[e].rho for e in self._edge_order])
|
|
344
|
+
self._arr_obs = _np.array(
|
|
345
|
+
[self.beliefs[e].observed for e in self._edge_order], dtype=bool)
|
|
346
|
+
# staggered pre-widening horizons (audit seam 3): synchronized expiry
|
|
347
|
+
# made one round per cycle pay a full-|E| refresh loop; per-edge
|
|
348
|
+
# horizon factors in [0.75, 1.25] spread expiries across rounds.
|
|
349
|
+
# Soundness: each entry's width is computed at ITS OWN horizon.
|
|
350
|
+
self._arr_stagger = _np.random.default_rng(0).uniform(
|
|
351
|
+
config.stagger_lo, config.stagger_hi, len(self._edge_order))
|
|
352
|
+
# _arr_due is the expiry clock for the VECTORIZED (predictor-free) refresh
|
|
353
|
+
# path ONLY; it is the array twin of _cache_due there. When a predictor is
|
|
354
|
+
# supplied the dict path drives expiry off _cache_due alone and never
|
|
355
|
+
# reads _arr_due, so the two intentionally do not track each other in that
|
|
356
|
+
# mode (the array view is simply unused). ingest_observation expires both.
|
|
357
|
+
self._arr_due = _np.full(len(self._edge_order), -_np.inf)
|
|
358
|
+
|
|
359
|
+
def _adaptive_B(self, q_eff: float) -> int:
|
|
360
|
+
"""Pre-widening horizon: cap width spend at a fraction of the
|
|
361
|
+
epsilon-slack when certification is in play; keep the configured
|
|
362
|
+
latency-optimal horizon during warm-up or unattainable epsilon."""
|
|
363
|
+
cfg = self.cfg
|
|
364
|
+
B = cfg.prewiden_rounds
|
|
365
|
+
if not cfg.adaptive_rate or B <= 0:
|
|
366
|
+
return max(B, 0)
|
|
367
|
+
L_b = max(getattr(self, "_last_L", 1), 1)
|
|
368
|
+
rho_b = max((self.beliefs[e].rho for e in self.beliefs), default=0.0)
|
|
369
|
+
slack = cfg.epsilon - 2 * L_b * q_eff
|
|
370
|
+
if rho_b > 0 and slack > 0 and q_eff > 0:
|
|
371
|
+
b_cap = int(cfg.prewiden_slack_frac * slack
|
|
372
|
+
/ (2 * rho_b * L_b * cfg.delta))
|
|
373
|
+
B = max(0, min(B, b_cap))
|
|
374
|
+
return B
|
|
375
|
+
|
|
376
|
+
def _rebuild_searches(self) -> None:
|
|
377
|
+
"""Fresh D* Lite instances from the current cached metrics (used when
|
|
378
|
+
a change touches most edges; incremental repair of ~|E| inconsistent
|
|
379
|
+
vertices is strictly slower than one scratch compute)."""
|
|
380
|
+
import numpy as _np
|
|
381
|
+
# sync the cache into the flat cost arrays FIRST: the shared-flat
|
|
382
|
+
# constructor keeps existing costs, and the scratch-rebuild path
|
|
383
|
+
# skips update_edges — without this write the engines resurrect
|
|
384
|
+
# stale costs (a divergence the degenerate ablation caught)
|
|
385
|
+
self._flat_lo.cost[self._slots_lo] = _np.fromiter(
|
|
386
|
+
(self._cache_lo[e] for e in self._edge_order),
|
|
387
|
+
dtype=_np.float64, count=len(self._edge_order))
|
|
388
|
+
self._flat_up.cost[self._slots_up] = _np.fromiter(
|
|
389
|
+
(min(self._cache_up[e], _UB_CAP) for e in self._edge_order),
|
|
390
|
+
dtype=_np.float64, count=len(self._edge_order))
|
|
391
|
+
# reuse the FlatGraphs: CSR stays built, numba kernel stays warm;
|
|
392
|
+
# structure-only adjacency suffices (engines read flat.cost)
|
|
393
|
+
self.sp_lower = FastDStarLite(self._graph_lower_cache, self.start,
|
|
394
|
+
self.goal, flat=self._flat_lo)
|
|
395
|
+
self.sp_upper = FastDStarLite(self._graph_lower_cache, self.start,
|
|
396
|
+
self.goal, flat=self._flat_up)
|
|
397
|
+
|
|
398
|
+
def _rho_hat(self, e: Edge) -> float:
|
|
399
|
+
if self.cfg.rho_mode == "online":
|
|
400
|
+
return 1e-9 # estimator warms from observed rates (see round())
|
|
401
|
+
rho_true = self.world.rho_true(e)
|
|
402
|
+
if not math.isfinite(rho_true):
|
|
403
|
+
rho_true = 0.0 # off-model worlds: planner assumes its A1 model anyway
|
|
404
|
+
return max(rho_true * self.cfg.rho_hat_over_rho, 1e-9)
|
|
405
|
+
|
|
406
|
+
def _update_online_rho(self) -> None:
|
|
407
|
+
"""Pooled drift-rate estimate from re-observation rate samples; on a
|
|
408
|
+
material change, update all beliefs and force a metric rebuild."""
|
|
409
|
+
cfg = self.cfg
|
|
410
|
+
if cfg.rho_mode != "online" or len(self._rate_samples) < cfg.rho_online_min_samples:
|
|
411
|
+
return
|
|
412
|
+
if len(self._rate_samples) < 1.1 * getattr(self, "_rho_sorted_at", 0):
|
|
413
|
+
return # re-estimate only when the sample set grew 10%
|
|
414
|
+
self._rho_sorted_at = len(self._rate_samples)
|
|
415
|
+
rates = sorted(self._rate_samples)
|
|
416
|
+
rho = max(rates[int(cfg.rho_online_quantile * (len(rates) - 1))], 1e-9)
|
|
417
|
+
if abs(rho - self._rho_online) > 0.05 * max(self._rho_online, 1e-9):
|
|
418
|
+
self._rho_online = rho
|
|
419
|
+
for b in self.beliefs.values():
|
|
420
|
+
b.rho = rho
|
|
421
|
+
self._arr_rho[:] = rho
|
|
422
|
+
self._cache_q = -1.0 # rho changed everywhere: full metric rebuild
|
|
423
|
+
|
|
424
|
+
def _to_adj(self, nodes, costs: dict[Edge, float]) -> dict[Node, dict[Node, float]]:
|
|
425
|
+
adj: dict[Node, dict[Node, float]] = {n: {} for n in nodes}
|
|
426
|
+
for (u, v), c in costs.items():
|
|
427
|
+
adj[u][v] = c
|
|
428
|
+
return adj
|
|
429
|
+
|
|
430
|
+
def _pred_interval(self, e: Edge, age: float) -> tuple[float, float] | None:
|
|
431
|
+
"""(center, halfwidth) from the predictor path, or None to fall back.
|
|
432
|
+
Requires: predictor supplied, age past the gate, a prediction for e,
|
|
433
|
+
and a supportable age-bin quantile at the current per-edge level."""
|
|
434
|
+
cfg = self.cfg
|
|
435
|
+
if self.predictor is None or age < cfg.predictor_age_gate * cfg.delta:
|
|
436
|
+
return None
|
|
437
|
+
pred = self.predictor(e, self.t, self.beliefs)
|
|
438
|
+
if pred is None:
|
|
439
|
+
return None
|
|
440
|
+
alpha_edge = getattr(self, "_last_alpha_edge", self._alpha_prime_eff)
|
|
441
|
+
# per-bin annealing: query at the bin's supportable level and charge
|
|
442
|
+
# the weakening to the claim (weakest-link accounting, summed over
|
|
443
|
+
# the certifying path's predictor-priced edges in round())
|
|
444
|
+
mass = self.binned.effective_mass(self.t, age)
|
|
445
|
+
if mass <= 0.0:
|
|
446
|
+
return None
|
|
447
|
+
alpha_bin = max(alpha_edge, (1.0 + 1e-9) / (mass + 1.0))
|
|
448
|
+
if alpha_bin >= 0.5:
|
|
449
|
+
return None # bin too immature to be worth a claim
|
|
450
|
+
qb = self.binned.quantile(alpha_bin, self.t, age)
|
|
451
|
+
if not math.isfinite(qb):
|
|
452
|
+
return None
|
|
453
|
+
self._edge_alpha_extra[e] = max(0.0, alpha_bin - alpha_edge)
|
|
454
|
+
return max(pred, cfg.cost_floor), cfg.latent_margin * qb
|
|
455
|
+
|
|
456
|
+
def _metrics(self, q: float) -> tuple[dict[Edge, float], dict[Edge, float]]:
|
|
457
|
+
lo, up = {}, {}
|
|
458
|
+
for e, b in self.beliefs.items():
|
|
459
|
+
if not b.observed:
|
|
460
|
+
# an unobserved edge is UNKNOWN: the prior is not an
|
|
461
|
+
# observation and no coverage theorem prices it — ell at the
|
|
462
|
+
# floor (it could be cheap), u unbounded (it could be awful).
|
|
463
|
+
# Certification therefore requires a fully-OBSERVED path:
|
|
464
|
+
# exactly the Traversing-Mars 'prove the path' semantics,
|
|
465
|
+
# which T2's degenerate corollary claims (and a noise-free
|
|
466
|
+
# test exposed: the prior-centered interval was a soundness
|
|
467
|
+
# hole masked by noise everywhere else).
|
|
468
|
+
lo[e] = self.cfg.cost_floor
|
|
469
|
+
up[e] = _UB_CAP
|
|
470
|
+
elif math.isfinite(q):
|
|
471
|
+
pi = self._pred_interval(e, b.age(self.t))
|
|
472
|
+
if pi is not None:
|
|
473
|
+
c, h = pi
|
|
474
|
+
lo[e] = max(self.cfg.cost_floor, c - h)
|
|
475
|
+
up[e] = max(self.cfg.cost_floor, c + h)
|
|
476
|
+
else:
|
|
477
|
+
lo[e] = b.lower(self.t, q, self.cfg.cost_floor)
|
|
478
|
+
up[e] = b.upper(self.t, q, self.cfg.cost_floor)
|
|
479
|
+
else:
|
|
480
|
+
lo[e] = self.cfg.cost_floor
|
|
481
|
+
up[e] = _UB_CAP
|
|
482
|
+
return lo, up
|
|
483
|
+
|
|
484
|
+
def _refresh_metrics(self, q_eff: float) -> None:
|
|
485
|
+
"""Maintain the pre-widened metric cache; push only changed edges to
|
|
486
|
+
the two searches. Soundness: entries are computed at age + B*delta,
|
|
487
|
+
so cached ell <= true ell and cached u >= true u until expiry; a grown
|
|
488
|
+
quantile forces a full rebuild (a cached-too-small q would be unsound,
|
|
489
|
+
a cached-too-large q is only conservative)."""
|
|
490
|
+
cfg = self.cfg
|
|
491
|
+
B = cfg.prewiden_rounds
|
|
492
|
+
if self.predictor is None:
|
|
493
|
+
# vectorized full-refresh fast path (fast_metrics): exact mode
|
|
494
|
+
# recomputes everything every round, and full rebuilds touch all
|
|
495
|
+
# edges — both were a Python per-edge loop (~15ms at 14k edges)
|
|
496
|
+
import numpy as _np
|
|
497
|
+
from certflow.fastgraph import fast_metrics
|
|
498
|
+
full_now = (
|
|
499
|
+
B <= 0
|
|
500
|
+
or not self._cache_lo
|
|
501
|
+
or q_eff > self._cache_q + 1e-12
|
|
502
|
+
or self._cache_q > 1.30 * q_eff + 1e-12
|
|
503
|
+
)
|
|
504
|
+
if full_now:
|
|
505
|
+
horizon = 0.0 if B <= 0 else self._adaptive_B(q_eff) * cfg.delta
|
|
506
|
+
q_used = q_eff if B <= 0 else 1.15 * q_eff
|
|
507
|
+
lo_a, up_a = fast_metrics(
|
|
508
|
+
self._arr_chat, self._arr_tobs, self._arr_rho,
|
|
509
|
+
self.t, q_used, cfg.cost_floor)
|
|
510
|
+
if horizon > 0.0:
|
|
511
|
+
# per-edge staggered horizons: widen each entry to cover
|
|
512
|
+
# its OWN expiry time (linear in age, so additive here)
|
|
513
|
+
h = horizon * self._arr_stagger
|
|
514
|
+
widen = self._arr_rho * h
|
|
515
|
+
lo_a = _np.maximum(lo_a - widen, cfg.cost_floor)
|
|
516
|
+
up_a = up_a + widen
|
|
517
|
+
dues = self.t + h
|
|
518
|
+
else:
|
|
519
|
+
dues = _np.full(len(self._edge_order), self.t)
|
|
520
|
+
unobs = ~self._arr_obs
|
|
521
|
+
lo_a[unobs] = cfg.cost_floor
|
|
522
|
+
up_a[unobs] = _UB_CAP
|
|
523
|
+
_np.minimum(up_a, _UB_CAP, out=up_a)
|
|
524
|
+
self._cache_lo = dict(zip(self._edge_order, lo_a.tolist()))
|
|
525
|
+
self._cache_up = dict(zip(self._edge_order, up_a.tolist()))
|
|
526
|
+
self._cache_due = dict(zip(self._edge_order, dues.tolist()))
|
|
527
|
+
self._arr_due = dues
|
|
528
|
+
if B > 0:
|
|
529
|
+
self._cache_q = q_used
|
|
530
|
+
self._flat_lo.cost[self._slots_lo] = lo_a
|
|
531
|
+
self._flat_up.cost[self._slots_up] = up_a
|
|
532
|
+
self.sp_lower = FastDStarLite(
|
|
533
|
+
self._graph_lower_cache, self.start, self.goal,
|
|
534
|
+
flat=self._flat_lo)
|
|
535
|
+
self.sp_upper = FastDStarLite(
|
|
536
|
+
self._graph_lower_cache, self.start, self.goal,
|
|
537
|
+
flat=self._flat_up)
|
|
538
|
+
# adjacency VALUES are consumed only by the alternatives
|
|
539
|
+
# helper, which refreshes them on demand (_graph_lower_with);
|
|
540
|
+
# the engines read costs from the flat arrays — skip the
|
|
541
|
+
# O(|E|) dict-of-dicts rebuild here
|
|
542
|
+
return
|
|
543
|
+
# vectorized staggered due-subset (small by construction)
|
|
544
|
+
mask = self._arr_due <= self.t
|
|
545
|
+
if mask.any():
|
|
546
|
+
idx = _np.nonzero(mask)[0]
|
|
547
|
+
B_eff = self._adaptive_B(q_eff)
|
|
548
|
+
h = B_eff * cfg.delta * self._arr_stagger[idx]
|
|
549
|
+
widen = self._arr_rho[idx] * (
|
|
550
|
+
(self.t - self._arr_tobs[idx]) + h)
|
|
551
|
+
q_used = self._cache_q
|
|
552
|
+
lo_sub = _np.maximum(
|
|
553
|
+
self._arr_chat[idx] - q_used - widen, cfg.cost_floor)
|
|
554
|
+
up_sub = _np.minimum(
|
|
555
|
+
self._arr_chat[idx] + q_used + widen, _UB_CAP)
|
|
556
|
+
unobs = ~self._arr_obs[idx]
|
|
557
|
+
lo_sub[unobs] = cfg.cost_floor
|
|
558
|
+
up_sub[unobs] = _UB_CAP
|
|
559
|
+
self._arr_due[idx] = self.t + h
|
|
560
|
+
lo_chg, up_chg = {}, {}
|
|
561
|
+
for j, li, ui in zip(idx.tolist(), lo_sub.tolist(),
|
|
562
|
+
up_sub.tolist()):
|
|
563
|
+
e = self._edge_order[j]
|
|
564
|
+
self._cache_due[e] = self._arr_due[j]
|
|
565
|
+
if li != self._cache_lo.get(e):
|
|
566
|
+
lo_chg[e] = self._cache_lo[e] = li
|
|
567
|
+
if ui != self._cache_up.get(e):
|
|
568
|
+
up_chg[e] = self._cache_up[e] = ui
|
|
569
|
+
if len(lo_chg) > 0.3 * len(self.beliefs):
|
|
570
|
+
self._rebuild_searches()
|
|
571
|
+
elif lo_chg or up_chg:
|
|
572
|
+
if lo_chg:
|
|
573
|
+
self.sp_lower.update_edges(lo_chg)
|
|
574
|
+
if up_chg:
|
|
575
|
+
self.sp_upper.update_edges(up_chg)
|
|
576
|
+
return
|
|
577
|
+
if cfg.adaptive_rate and B > 0:
|
|
578
|
+
B = self._adaptive_B(q_eff)
|
|
579
|
+
if B <= 0:
|
|
580
|
+
lo, up = self._metrics(q_eff)
|
|
581
|
+
self._cache_lo, self._cache_up = lo, up
|
|
582
|
+
self._rebuild_searches() # all edges changed: scratch beats repair
|
|
583
|
+
return
|
|
584
|
+
|
|
585
|
+
full = (
|
|
586
|
+
not self._cache_lo
|
|
587
|
+
or q_eff > self._cache_q + 1e-12 # unsound to keep: rebuild
|
|
588
|
+
or self._cache_q > 1.30 * q_eff + 1e-12 # too loose: rebuild
|
|
589
|
+
)
|
|
590
|
+
# headroom 1.15: a growing quantile forces a full rebuild, and on
|
|
591
|
+
# large graphs each rebuild is an O(|E|) + scratch-search event (the
|
|
592
|
+
# p95 spikes); more headroom = fewer events at ~15% width on the
|
|
593
|
+
# noise term only (the drift term dominates under drift anyway)
|
|
594
|
+
q_used = 1.15 * q_eff if full else self._cache_q
|
|
595
|
+
horizon = B * cfg.delta
|
|
596
|
+
lo_chg: dict[Edge, float] = {}
|
|
597
|
+
up_chg: dict[Edge, float] = {}
|
|
598
|
+
for e, b in self.beliefs.items():
|
|
599
|
+
if not (full or self.t >= self._cache_due.get(e, -math.inf)):
|
|
600
|
+
continue
|
|
601
|
+
if not b.observed:
|
|
602
|
+
# unknown edge (see _metrics): floor / unbounded until seen
|
|
603
|
+
lo_v, up_v = cfg.cost_floor, _UB_CAP
|
|
604
|
+
if lo_v != self._cache_lo.get(e):
|
|
605
|
+
lo_chg[e] = self._cache_lo[e] = lo_v
|
|
606
|
+
if up_v != self._cache_up.get(e):
|
|
607
|
+
up_chg[e] = self._cache_up[e] = up_v
|
|
608
|
+
self._cache_due[e] = self.t + horizon
|
|
609
|
+
continue
|
|
610
|
+
a_pre = b.age(self.t) + horizon
|
|
611
|
+
pi = self._pred_interval(e, a_pre) # pre-widened age: conservative
|
|
612
|
+
if pi is not None:
|
|
613
|
+
c_pi, h_pi = pi
|
|
614
|
+
self.pred_used_rounds += 1
|
|
615
|
+
lo_v = max(cfg.cost_floor, c_pi - h_pi)
|
|
616
|
+
up_v = max(cfg.cost_floor, c_pi + h_pi)
|
|
617
|
+
else:
|
|
618
|
+
self._edge_alpha_extra.pop(e, None)
|
|
619
|
+
lo_v = max(cfg.cost_floor, b.c_hat - q_used - b.rho * a_pre)
|
|
620
|
+
up_v = max(cfg.cost_floor, b.c_hat + q_used + b.rho * a_pre)
|
|
621
|
+
if lo_v != self._cache_lo.get(e):
|
|
622
|
+
lo_chg[e] = self._cache_lo[e] = lo_v
|
|
623
|
+
if up_v != self._cache_up.get(e):
|
|
624
|
+
up_chg[e] = self._cache_up[e] = up_v
|
|
625
|
+
self._cache_due[e] = self.t + horizon
|
|
626
|
+
if full:
|
|
627
|
+
self._cache_q = q_used
|
|
628
|
+
# When most of the graph changed (full rebuilds, B=0 mode), repairing
|
|
629
|
+
# ~|E| inconsistencies through the priority queue costs far more than
|
|
630
|
+
# one fresh compute — rebuild the search instances from scratch
|
|
631
|
+
# instead (measured: p95 spikes 33-96 ms -> scratch cost ~1-5 ms).
|
|
632
|
+
if len(lo_chg) > 0.3 * len(self.beliefs):
|
|
633
|
+
self._rebuild_searches()
|
|
634
|
+
return
|
|
635
|
+
if lo_chg:
|
|
636
|
+
self.sp_lower.update_edges(lo_chg)
|
|
637
|
+
if up_chg:
|
|
638
|
+
self.sp_upper.update_edges({e: min(c, _UB_CAP) for e, c in up_chg.items()})
|
|
639
|
+
|
|
640
|
+
@property
|
|
641
|
+
def _alpha_prime_eff(self) -> float:
|
|
642
|
+
"""Claim level: alpha'/N_dec under decision-uniform alpha-spending."""
|
|
643
|
+
if self.cfg.decision_uniform:
|
|
644
|
+
return self.cfg.alpha_prime / max(self.cfg.max_decisions, 1)
|
|
645
|
+
return self.cfg.alpha_prime
|
|
646
|
+
|
|
647
|
+
def _q(self, path_len: int) -> float:
|
|
648
|
+
alpha_path = (
|
|
649
|
+
self.aci.working_alpha() if self.cfg.use_aci else self._alpha_prime_eff
|
|
650
|
+
)
|
|
651
|
+
path_len = max(path_len, 1)
|
|
652
|
+
if self.cfg.strict_lb_alpha:
|
|
653
|
+
# GAP-A: cover the unknown optimum's edges too — divide by the
|
|
654
|
+
# max possible simple-path length, not the current path's
|
|
655
|
+
path_len = max(path_len, len(self._graph_lower_cache) - 1)
|
|
656
|
+
# annealing floor: the smallest per-edge level the buffer supports
|
|
657
|
+
self._alpha_claim = self._alpha_prime_eff
|
|
658
|
+
if self.cfg.anneal_alpha:
|
|
659
|
+
m = self.scorer.effective_mass(self.t)
|
|
660
|
+
if m <= 0.0:
|
|
661
|
+
self._alpha_claim = 1.0 # empty buffer: nothing supportable
|
|
662
|
+
else:
|
|
663
|
+
alpha_edge_min = (1.0 + 1e-9) / (m + 1.0)
|
|
664
|
+
supportable = min(1.0, path_len * alpha_edge_min)
|
|
665
|
+
alpha_path = max(alpha_path, supportable)
|
|
666
|
+
self._alpha_claim = max(self._alpha_prime_eff, supportable)
|
|
667
|
+
alpha_edge = path_alpha_edge(alpha_path, path_len)
|
|
668
|
+
self._last_alpha_edge = alpha_edge
|
|
669
|
+
return self.scorer.quantile(alpha_edge, self.t)
|
|
670
|
+
|
|
671
|
+
def round(self) -> tuple[Certificate, Edge | None]:
|
|
672
|
+
"""One replanning round. Returns the certificate and the sensed edge."""
|
|
673
|
+
cfg = self.cfg
|
|
674
|
+
|
|
675
|
+
# Step 1-2: iterate q <-> path-length coupling once (L feeds Bonferroni).
|
|
676
|
+
# Start from last known L or a Dijkstra-free guess of 1.
|
|
677
|
+
self._update_online_rho()
|
|
678
|
+
L_guess = getattr(self, "_last_L", 1)
|
|
679
|
+
q = self._q(L_guess)
|
|
680
|
+
q_eff = (q if math.isfinite(q) else 0.0) * cfg.latent_margin
|
|
681
|
+
# warm-up: intervals exist but the certificate is INVALID via confidence
|
|
682
|
+
self._refresh_metrics(q_eff)
|
|
683
|
+
|
|
684
|
+
sum_aware_L = 0
|
|
685
|
+
p_lb, lb = self.sp_lower.shortest_path()
|
|
686
|
+
lb_edges = path_edges(p_lb)
|
|
687
|
+
L = max(len(lb_edges), 1)
|
|
688
|
+
if L != L_guess: # one refinement pass with the right Bonferroni level
|
|
689
|
+
q = self._q(L)
|
|
690
|
+
q_eff = (q if math.isfinite(q) else 0.0) * cfg.latent_margin
|
|
691
|
+
self._refresh_metrics(q_eff)
|
|
692
|
+
p_lb, lb = self.sp_lower.shortest_path()
|
|
693
|
+
lb_edges = path_edges(p_lb)
|
|
694
|
+
L = max(len(lb_edges), 1)
|
|
695
|
+
self._last_L = L
|
|
696
|
+
lo, up = self._cache_lo, self._cache_up
|
|
697
|
+
|
|
698
|
+
# stabilized sensing target (see PlannerConfig.stabilize_sensing)
|
|
699
|
+
sense_path = p_lb
|
|
700
|
+
if cfg.stabilize_sensing and p_lb is not None:
|
|
701
|
+
ps = self._p_sense
|
|
702
|
+
if (
|
|
703
|
+
ps
|
|
704
|
+
and ps[0] == self.start
|
|
705
|
+
and ps[-1] == self.goal
|
|
706
|
+
and sum(lo[e] for e in path_edges(ps))
|
|
707
|
+
<= (1.0 + cfg.sense_path_tol) * lb
|
|
708
|
+
):
|
|
709
|
+
sense_path = ps
|
|
710
|
+
self._p_sense = list(sense_path)
|
|
711
|
+
sense_edges = path_edges(sense_path) if sense_path else lb_edges
|
|
712
|
+
|
|
713
|
+
# Step 3: UB = min over (u-cost of optimistic path, u-cost of
|
|
714
|
+
# conservative shortest path); any path's u-cost upper-bounds OPT.
|
|
715
|
+
p_ub, _ = self.sp_upper.shortest_path()
|
|
716
|
+
ub_edges = path_edges(p_ub) if p_ub is not None else []
|
|
717
|
+
ub_candidates = []
|
|
718
|
+
if p_lb is not None:
|
|
719
|
+
ub_candidates.append((sum(up[e] for e in lb_edges), p_lb))
|
|
720
|
+
if p_ub is not None:
|
|
721
|
+
ub_candidates.append((sum(up[e] for e in ub_edges), p_ub))
|
|
722
|
+
prev = self._trimmed_prev_incumbent()
|
|
723
|
+
if prev is not None:
|
|
724
|
+
ub_candidates.append(
|
|
725
|
+
(sum(up[e] for e in path_edges(prev)), prev)
|
|
726
|
+
)
|
|
727
|
+
if cfg.stabilize_sensing and sense_path is not None and sense_path is not p_lb:
|
|
728
|
+
# the stabilized sensing target's edges are the fresh ones; its
|
|
729
|
+
# u-cost completes the gap bound u(P_s) - LB <= width(P_s) + tol*LB
|
|
730
|
+
ub_candidates.append(
|
|
731
|
+
(sum(up[e] for e in sense_edges), sense_path)
|
|
732
|
+
)
|
|
733
|
+
if not ub_candidates:
|
|
734
|
+
ub, incumbent = math.inf, []
|
|
735
|
+
else:
|
|
736
|
+
if cfg.sum_aware_ub and math.isfinite(q) and prev is not None:
|
|
737
|
+
# T4: tighter UB on the standing incumbent ONLY, gated on
|
|
738
|
+
# freshness — every edge re-observed since this path became
|
|
739
|
+
# the incumbent. Post-selection observations are independent
|
|
740
|
+
# of the selection event, so the fixed-path theorem applies
|
|
741
|
+
# conditionally; without the gate the winner's curse breaks
|
|
742
|
+
# coverage (measured: 0.823 in the noise-dominated regime).
|
|
743
|
+
pe = path_edges(prev)
|
|
744
|
+
fresh = pe and all(
|
|
745
|
+
self.beliefs[e].t_obs >= self._incumbent_since for e in pe
|
|
746
|
+
)
|
|
747
|
+
if fresh:
|
|
748
|
+
alpha_path = (
|
|
749
|
+
self.aci.working_alpha() if cfg.use_aci else cfg.alpha_prime
|
|
750
|
+
)
|
|
751
|
+
m = self.scorer.block_quantile(alpha_path, self.t, len(pe))
|
|
752
|
+
if math.isfinite(m):
|
|
753
|
+
sum_aware_L = len(pe)
|
|
754
|
+
c_sum = (
|
|
755
|
+
sum(self.beliefs[e].c_hat for e in pe)
|
|
756
|
+
+ cfg.latent_margin * m
|
|
757
|
+
+ sum(self.beliefs[e].rho * self.beliefs[e].age(self.t)
|
|
758
|
+
for e in pe)
|
|
759
|
+
)
|
|
760
|
+
ub_candidates = [
|
|
761
|
+
(min(c, c_sum), p) if p is prev else (c, p)
|
|
762
|
+
for c, p in ub_candidates
|
|
763
|
+
]
|
|
764
|
+
# the certificate always reports the tightest bound
|
|
765
|
+
ub = min(c for c, _ in ub_candidates)
|
|
766
|
+
if cfg.use_kappa:
|
|
767
|
+
slack = cfg.kappa_slack_frac * cfg.epsilon
|
|
768
|
+
eligible = [p for c, p in ub_candidates if c <= ub + slack]
|
|
769
|
+
incumbent = max(eligible, key=self._kappa_score)
|
|
770
|
+
else:
|
|
771
|
+
incumbent = min(ub_candidates, key=lambda x: x[0])[1]
|
|
772
|
+
incumbent_edges = path_edges(incumbent)
|
|
773
|
+
if cfg.use_kappa:
|
|
774
|
+
decay = cfg.kappa_decay
|
|
775
|
+
for e in self._kappa:
|
|
776
|
+
self._kappa[e] *= decay
|
|
777
|
+
for e in incumbent_edges:
|
|
778
|
+
self._kappa[e] = self._kappa.get(e, 0.0) + 1.0
|
|
779
|
+
if set(incumbent_edges) != set(path_edges(self._prev_incumbent)):
|
|
780
|
+
self._incumbent_since = self.t # freshness gate resets (T4)
|
|
781
|
+
self._prev_incumbent = list(incumbent) if incumbent else []
|
|
782
|
+
|
|
783
|
+
# Churn set (T7): edges recently on the optimistic path; the floor
|
|
784
|
+
# and the sensing rotation must cover this set, not just today's path
|
|
785
|
+
for e in lb_edges:
|
|
786
|
+
self._churn_seen[e] = self._round_idx
|
|
787
|
+
cutoff = self._round_idx - cfg.churn_window
|
|
788
|
+
self._churn_seen = {
|
|
789
|
+
e: r for e, r in self._churn_seen.items() if r >= cutoff
|
|
790
|
+
}
|
|
791
|
+
churn_edges = list(self._churn_seen)
|
|
792
|
+
K = max(len(churn_edges), L, 1)
|
|
793
|
+
|
|
794
|
+
# Step 4: churn-aware T2' certifiability floor (T7): round-robin over
|
|
795
|
+
# the K-edge churn set at rate k bounds every current-path age by
|
|
796
|
+
# (K-1)*Delta/k, so the sustainable floor uses K, not L
|
|
797
|
+
rho_bar = max((self.beliefs[e].rho for e in lb_edges), default=0.0)
|
|
798
|
+
k_now = 1
|
|
799
|
+
eps_floor = 2 * L * q_eff + 2 * rho_bar * cfg.delta * L * (K - 1) / k_now
|
|
800
|
+
attainable = cfg.epsilon >= eps_floor and math.isfinite(q)
|
|
801
|
+
|
|
802
|
+
# Confidence: 1 - alpha_claim - sum of Delta_stale over the certifying
|
|
803
|
+
# path. The CLAIM is the annealed level (>= alpha_prime; equals it
|
|
804
|
+
# once the buffer supports the target) — never ACI's working alpha,
|
|
805
|
+
# which only modulates interval width.
|
|
806
|
+
d_stale = self.scorer.delta_stale(self.t)
|
|
807
|
+
stale_total = L * d_stale
|
|
808
|
+
if sum_aware_L:
|
|
809
|
+
# T4's UB-side staleness is the BLOCK-level term (audit GAP-B;
|
|
810
|
+
# block_delta_stale was dead code): charge the larger of the two
|
|
811
|
+
# accountings — conservative, hence sound
|
|
812
|
+
stale_total = max(
|
|
813
|
+
stale_total,
|
|
814
|
+
self.cfg.latent_margin
|
|
815
|
+
* self.scorer.block_delta_stale(self.t, sum_aware_L),
|
|
816
|
+
)
|
|
817
|
+
alpha_claim = getattr(self, "_alpha_claim", self.cfg.alpha_prime)
|
|
818
|
+
alpha_claim += sum(self._edge_alpha_extra.get(e, 0.0) for e in lb_edges)
|
|
819
|
+
confidence = (
|
|
820
|
+
max(0.0, 1.0 - alpha_claim - stale_total)
|
|
821
|
+
if math.isfinite(q)
|
|
822
|
+
else 0.0
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
cert = Certificate(
|
|
826
|
+
lb=lb if p_lb is not None else math.inf,
|
|
827
|
+
ub=ub,
|
|
828
|
+
confidence=confidence,
|
|
829
|
+
path=incumbent or [],
|
|
830
|
+
epsilon_attainable=attainable,
|
|
831
|
+
epsilon_floor=eps_floor,
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
# Step 5-6: sense unless certified; certified rounds still perform
|
|
835
|
+
# maintenance sensing (projected-expiry + calibration-freshness floor),
|
|
836
|
+
# otherwise the buffer ages and the claim self-extinguishes even in a
|
|
837
|
+
# static world (observed in Tier-0).
|
|
838
|
+
sensed: Edge | None = None
|
|
839
|
+
# certification requires the claim to have annealed past the floor:
|
|
840
|
+
# a gap <= epsilon at confidence 0.1 must not stop sensing
|
|
841
|
+
certified = (
|
|
842
|
+
cert.valid
|
|
843
|
+
and cert.gap <= cfg.epsilon
|
|
844
|
+
and cert.confidence >= cfg.min_certify_confidence
|
|
845
|
+
)
|
|
846
|
+
maintain = False
|
|
847
|
+
if certified and p_lb is not None:
|
|
848
|
+
growth = 2.0 * cfg.delta * sum(self.beliefs[e].rho for e in lb_edges)
|
|
849
|
+
expiring = cert.gap + cfg.maintenance_lookahead * growth > cfg.epsilon
|
|
850
|
+
cal_floor = self._round_idx % max(cfg.maintenance_every, 1) == 0
|
|
851
|
+
maintain = expiring or cal_floor or cfg.refine_after_certify
|
|
852
|
+
# Adaptive rate (T2'): choose k so the sustainable floor
|
|
853
|
+
# 2*L*q + rho*Delta*L*(L-1)/k meets epsilon when possible.
|
|
854
|
+
n_sense = 1
|
|
855
|
+
if (
|
|
856
|
+
cfg.adaptive_rate
|
|
857
|
+
and math.isfinite(q)
|
|
858
|
+
and not certified
|
|
859
|
+
and cfg.sensing_policy == "cert"
|
|
860
|
+
):
|
|
861
|
+
noise_floor = 2 * L * q_eff
|
|
862
|
+
if cfg.epsilon > noise_floor and rho_bar > 0:
|
|
863
|
+
k_needed = math.ceil(
|
|
864
|
+
2 * rho_bar * cfg.delta * L * (K - 1)
|
|
865
|
+
/ max(cfg.epsilon - noise_floor, 1e-9)
|
|
866
|
+
)
|
|
867
|
+
if k_needed <= cfg.max_sense_per_round:
|
|
868
|
+
n_sense = max(1, k_needed)
|
|
869
|
+
# else: epsilon unattainable even at max rate — do not burn
|
|
870
|
+
# budget chasing it (T2' says no rate <= max can sustain it)
|
|
871
|
+
# gap-stall feedback: the floor formula assumes a fixed path, but
|
|
872
|
+
# optimism attracts the LB to the stalest region and the target
|
|
873
|
+
# churns; when the gap visibly stalls above epsilon, raise the
|
|
874
|
+
# rate (bounded by max_sense_per_round)
|
|
875
|
+
if cert.gap >= self._last_gap - 1e-9:
|
|
876
|
+
self._stall += 1
|
|
877
|
+
else:
|
|
878
|
+
self._stall = 0
|
|
879
|
+
n_sense = min(
|
|
880
|
+
cfg.max_sense_per_round, n_sense + self._stall // 5
|
|
881
|
+
)
|
|
882
|
+
self._last_gap = cert.gap if math.isfinite(cert.gap) else self._last_gap
|
|
883
|
+
|
|
884
|
+
sensed_list: list[Edge] = []
|
|
885
|
+
alt: set[Edge] | None = None
|
|
886
|
+
for i in range(n_sense):
|
|
887
|
+
pick: Edge | None = None
|
|
888
|
+
if cfg.sensing_policy != "cert":
|
|
889
|
+
if not certified:
|
|
890
|
+
mean_graph = None
|
|
891
|
+
if cfg.sensing_policy == "voi":
|
|
892
|
+
mean_graph = self._mean_graph()
|
|
893
|
+
pick = baseline_select(
|
|
894
|
+
cfg.sensing_policy, self.beliefs, self.t, self._rng,
|
|
895
|
+
mean_graph=mean_graph, start=self.start, goal=self.goal,
|
|
896
|
+
)
|
|
897
|
+
elif (not certified or maintain) and p_lb is not None and sense_edges:
|
|
898
|
+
if not math.isfinite(q):
|
|
899
|
+
# Warm-up: alternate MAPPING (round-robin the optimistic
|
|
900
|
+
# path) with CALIBRATION (re-observe the oldest already-
|
|
901
|
+
# observed edge — only repeat observations form scores).
|
|
902
|
+
# Without the alternation, unknown-terrain warm-up chases
|
|
903
|
+
# the churning P_lb onto first-touch edges and the buffer
|
|
904
|
+
# starves (measured: 26 scores from 120 observations).
|
|
905
|
+
seen = [
|
|
906
|
+
e for e, b in self.beliefs.items() if b.observed
|
|
907
|
+
]
|
|
908
|
+
if (self._round_idx + i) % 2 == 1 and seen:
|
|
909
|
+
pick = max(seen, key=lambda e: self.beliefs[e].age(self.t))
|
|
910
|
+
else:
|
|
911
|
+
pick = sense_edges[(self._round_idx + i) % len(sense_edges)]
|
|
912
|
+
elif (cfg.hybrid_sensing and not attainable) or (
|
|
913
|
+
cfg.refine_after_certify and certified):
|
|
914
|
+
# objective-matched: epsilon unattainable -> VOI on the
|
|
915
|
+
# expected-best route (departure quality is the objective)
|
|
916
|
+
if alt is None: # latch the mean graph once per round
|
|
917
|
+
self._mean_graph_round = self._mean_graph()
|
|
918
|
+
alt = set()
|
|
919
|
+
mean_graph = self._mean_graph_round
|
|
920
|
+
pick = baseline_select(
|
|
921
|
+
"voi", self.beliefs, self.t, self._rng,
|
|
922
|
+
mean_graph=mean_graph, start=self.start, goal=self.goal,
|
|
923
|
+
)
|
|
924
|
+
else:
|
|
925
|
+
if cfg.adaptive_rate:
|
|
926
|
+
# Focused mode, churn-measured (T7): focused sensing
|
|
927
|
+
# SUPPRESSES churn (measured: K 59 -> 11 ~ L) — far
|
|
928
|
+
# better than rotating over the churn set, which
|
|
929
|
+
# spreads observations thin (same cert%, +20% spend).
|
|
930
|
+
# K still feeds the floor and the rate honestly.
|
|
931
|
+
pick = select_observation(
|
|
932
|
+
self.beliefs, sense_edges, [], set(),
|
|
933
|
+
q_eff, self.t,
|
|
934
|
+
backstop_age=cfg.backstop_slack * L * cfg.delta,
|
|
935
|
+
)
|
|
936
|
+
else:
|
|
937
|
+
if alt is None:
|
|
938
|
+
alt = near_optimal_alternatives(
|
|
939
|
+
self._graph_lower_with(lo), self.start,
|
|
940
|
+
self.goal, lb, k=cfg.k_alternatives,
|
|
941
|
+
delta_subopt=cfg.delta_subopt,
|
|
942
|
+
)
|
|
943
|
+
pick = select_observation(
|
|
944
|
+
self.beliefs, sense_edges, ub_edges, alt,
|
|
945
|
+
q_eff, self.t,
|
|
946
|
+
backstop_age=cfg.backstop_slack * L * cfg.delta,
|
|
947
|
+
)
|
|
948
|
+
if pick is None and maintain:
|
|
949
|
+
# static-world maintenance: zero gap-recovery, but the
|
|
950
|
+
# calibration buffer still needs fresh residuals
|
|
951
|
+
pick = max(
|
|
952
|
+
sense_edges, key=lambda e: self.beliefs[e].age(self.t)
|
|
953
|
+
)
|
|
954
|
+
if pick is None:
|
|
955
|
+
break
|
|
956
|
+
# Observe, score, ACI feedback, belief update. The err event uses
|
|
957
|
+
# the UNCLIPPED interval (T1a observable semantics): the cost-floor
|
|
958
|
+
# clip is justified by latent positivity (c > 0) and is sound
|
|
959
|
+
# inside the search metrics, but the observable y = c + eta can be
|
|
960
|
+
# negative under heavy-tailed noise — testing observables against
|
|
961
|
+
# clipped bounds manufactures spurious miscoverage.
|
|
962
|
+
b_pre = self.beliefs[pick]
|
|
963
|
+
was_observed = b_pre.observed
|
|
964
|
+
half = q_eff + b_pre.rho * b_pre.age(self.t)
|
|
965
|
+
lo_obs, up_obs = b_pre.c_hat - half, b_pre.c_hat + half
|
|
966
|
+
obs = self.ingest_observation(pick)
|
|
967
|
+
covered = lo_obs - 1e-12 <= obs <= up_obs + 1e-12
|
|
968
|
+
if math.isfinite(q) and was_observed:
|
|
969
|
+
self.aci.update(err=not covered)
|
|
970
|
+
self.sense_spend += self.beliefs[pick].sense_cost
|
|
971
|
+
sensed_list.append(pick)
|
|
972
|
+
self._round_idx += 1
|
|
973
|
+
sensed = sensed_list[0] if sensed_list else None
|
|
974
|
+
|
|
975
|
+
self.t += cfg.delta
|
|
976
|
+
return cert, sensed
|
|
977
|
+
|
|
978
|
+
def _mean_graph(self) -> dict[Node, dict[Node, float]]:
|
|
979
|
+
"""Point-estimate adjacency (max(c_hat, cost_floor)) for VOI sensing.
|
|
980
|
+
Cached by beliefs-version: rebuilt only when an observation changed a
|
|
981
|
+
c_hat since the last build (the dict-of-dicts is O(|E|) to construct)."""
|
|
982
|
+
if (getattr(self, "_mean_graph_version", None) == self._beliefs_version
|
|
983
|
+
and getattr(self, "_mean_graph_cache", None) is not None):
|
|
984
|
+
return self._mean_graph_cache
|
|
985
|
+
floor = self.cfg.cost_floor
|
|
986
|
+
beliefs = self.beliefs
|
|
987
|
+
mg: dict[Node, dict[Node, float]] = {}
|
|
988
|
+
for u, nbrs in self._graph_lower_cache.items():
|
|
989
|
+
mg[u] = {v: max(beliefs[(u, v)].c_hat, floor) for v in nbrs}
|
|
990
|
+
self._mean_graph_cache = mg
|
|
991
|
+
self._mean_graph_version = self._beliefs_version
|
|
992
|
+
return mg
|
|
993
|
+
|
|
994
|
+
def _graph_lower_with(self, lo: dict[Edge, float]) -> dict[Node, dict[Node, float]]:
|
|
995
|
+
for (u, v), c in lo.items():
|
|
996
|
+
self._graph_lower_cache[u][v] = c
|
|
997
|
+
return self._graph_lower_cache
|
|
998
|
+
|
|
999
|
+
def ingest_observation(self, e: Edge) -> float:
|
|
1000
|
+
"""Observe edge e now and absorb it: drift-adjusted nonconformity
|
|
1001
|
+
score into the calibration buffer (theory note: the deterministic
|
|
1002
|
+
widening is removed so scores stay ~exchangeable under A1), belief
|
|
1003
|
+
update projected into the feasible set, metric-cache expiry. Used by
|
|
1004
|
+
sensing (paid) and by traversal (free observation while moving)."""
|
|
1005
|
+
b = self.beliefs[e]
|
|
1006
|
+
obs = self.world.observe(e, self.t)
|
|
1007
|
+
if self.predictor is not None and b.observed:
|
|
1008
|
+
pred = self.predictor(e, self.t, self.beliefs)
|
|
1009
|
+
if pred is not None:
|
|
1010
|
+
self.binned.push(abs(obs - pred), self.t, b.age(self.t))
|
|
1011
|
+
old_count = self._obs_count.get(e, 1 if b.observed else 0)
|
|
1012
|
+
self._obs_count[e] = old_count + 1
|
|
1013
|
+
# A score is only a valid noise-pair score when a real previous
|
|
1014
|
+
# observation exists (never against a prior). Thinned mode keeps only
|
|
1015
|
+
# disjoint pairs: the 2nd, 4th, ... observation of each edge.
|
|
1016
|
+
if b.observed and (
|
|
1017
|
+
not self.cfg.thinned_scores or self._obs_count[e] % 2 == 0
|
|
1018
|
+
):
|
|
1019
|
+
score = abs(obs - b.c_hat) - b.rho * b.age(self.t)
|
|
1020
|
+
self.scorer.push(score, self.t)
|
|
1021
|
+
self.scorer.push_signed(obs - b.c_hat, self.t)
|
|
1022
|
+
self.cal_rho_a_max = max(self.cal_rho_a_max, b.rho * b.age(self.t))
|
|
1023
|
+
age = b.age(self.t)
|
|
1024
|
+
if self.cfg.rho_mode == "online" and age >= self.cfg.delta:
|
|
1025
|
+
self._rate_samples.append(abs(obs - b.c_hat) / age)
|
|
1026
|
+
if len(self._rate_samples) > 2000:
|
|
1027
|
+
del self._rate_samples[0]
|
|
1028
|
+
b.c_hat = max(obs, self.cfg.cost_floor)
|
|
1029
|
+
b.t_obs = self.t
|
|
1030
|
+
b.observed = True
|
|
1031
|
+
i = self._edge_idx[e]
|
|
1032
|
+
self._arr_chat[i] = b.c_hat
|
|
1033
|
+
self._arr_tobs[i] = b.t_obs
|
|
1034
|
+
self._arr_obs[i] = True
|
|
1035
|
+
self._arr_due[i] = self.t # expire alongside _cache_due
|
|
1036
|
+
self._beliefs_version += 1 # invalidates the cached snapshot gate
|
|
1037
|
+
self._cache_due[e] = self.t # expire: fresh metric next round
|
|
1038
|
+
return obs
|
|
1039
|
+
|
|
1040
|
+
def _kappa_score(self, path: list[Node]) -> float:
|
|
1041
|
+
"""Mean conductivity over a path's edges (mean, not sum, so longer
|
|
1042
|
+
paths are not favored merely for having more reinforced edges)."""
|
|
1043
|
+
edges = path_edges(path)
|
|
1044
|
+
if not edges:
|
|
1045
|
+
return 0.0
|
|
1046
|
+
return sum(self._kappa.get(e, 0.0) for e in edges) / len(edges)
|
|
1047
|
+
|
|
1048
|
+
def _trimmed_prev_incumbent(self) -> list[Node] | None:
|
|
1049
|
+
"""Previous incumbent re-rooted at the current start, or None if the
|
|
1050
|
+
start is no longer on it (it is then not a valid s-g path)."""
|
|
1051
|
+
p = self._prev_incumbent
|
|
1052
|
+
if not p or p[-1] != self.goal:
|
|
1053
|
+
return None
|
|
1054
|
+
try:
|
|
1055
|
+
i = p.index(self.start)
|
|
1056
|
+
except ValueError:
|
|
1057
|
+
return None
|
|
1058
|
+
trimmed = p[i:]
|
|
1059
|
+
return trimmed if len(trimmed) >= 2 else None
|
|
1060
|
+
|
|
1061
|
+
def snapshot_query(self, s: Node, g: Node, tau: float):
|
|
1062
|
+
"""Certified O(1) route query via certificate-gated preprocessing.
|
|
1063
|
+
|
|
1064
|
+
Gate: for every edge, the CURRENT interval fits inside the snapshot
|
|
1065
|
+
estimate +/- tau (|c_hat_now - c_hat_snap| + lambda*q + rho*a <= tau).
|
|
1066
|
+
On the coverage event this puts every true cost within tau of the
|
|
1067
|
+
snapshot costs, so the returned (snapshot-optimal) path's true cost
|
|
1068
|
+
is within |P|*tau of its reported cost and within 2|P|*tau of the
|
|
1069
|
+
true optimum. Returns dict(path, cost, slack, confidence) or None
|
|
1070
|
+
when the gate is closed (the oracle then needs a rebuild or the map
|
|
1071
|
+
is genuinely too uncertain — fall back to round()).
|
|
1072
|
+
"""
|
|
1073
|
+
import numpy as _np
|
|
1074
|
+
from certflow.fastgraph import FlatGraph
|
|
1075
|
+
from certflow.snapshot import SnapshotOracle
|
|
1076
|
+
|
|
1077
|
+
# the gate verdict is constant within a planner round: cache it
|
|
1078
|
+
stamp = (self._round_idx, self.t, tau, self._beliefs_version)
|
|
1079
|
+
if getattr(self, "_gate_stamp", None) == stamp:
|
|
1080
|
+
if not self._gate_ok:
|
|
1081
|
+
return None
|
|
1082
|
+
return self._answer_query(s, g, tau)
|
|
1083
|
+
|
|
1084
|
+
# re-anneal at query time: weighted mass decays between rounds, so
|
|
1085
|
+
# the stored per-edge level can fall just below the supportable floor
|
|
1086
|
+
mass = self.scorer.effective_mass(self.t)
|
|
1087
|
+
if mass <= 0:
|
|
1088
|
+
return None
|
|
1089
|
+
alpha_edge_q = max(
|
|
1090
|
+
getattr(self, "_last_alpha_edge", self._alpha_prime_eff),
|
|
1091
|
+
(1.0 + 1e-6) / (mass + 1.0),
|
|
1092
|
+
)
|
|
1093
|
+
q = self.scorer.quantile(alpha_edge_q, self.t)
|
|
1094
|
+
if not math.isfinite(q):
|
|
1095
|
+
return None
|
|
1096
|
+
half = (self.cfg.latent_margin * q
|
|
1097
|
+
+ self._arr_rho * (self.t - self._arr_tobs))
|
|
1098
|
+
if not self._arr_obs.all():
|
|
1099
|
+
self._gate_stamp = stamp
|
|
1100
|
+
self._gate_ok = False
|
|
1101
|
+
return None
|
|
1102
|
+
if self._oracle is None or self._oracle_chat_snap is None:
|
|
1103
|
+
drift_ok = half <= tau # fresh build: snap == now
|
|
1104
|
+
else:
|
|
1105
|
+
drift_ok = (_np.abs(self._arr_chat - self._oracle_chat_snap)
|
|
1106
|
+
+ half) <= tau
|
|
1107
|
+
if not bool(drift_ok.all()):
|
|
1108
|
+
# widths/drift exceed tau on some edge: snapshot (if any) expires
|
|
1109
|
+
if self._oracle is not None:
|
|
1110
|
+
self._oracle.invalidate()
|
|
1111
|
+
self._oracle_chat_snap = None
|
|
1112
|
+
# rebuild is allowed only when the CURRENT map fits the gate
|
|
1113
|
+
if not bool((half <= tau).all()):
|
|
1114
|
+
self._gate_stamp = stamp
|
|
1115
|
+
self._gate_ok = False
|
|
1116
|
+
return None
|
|
1117
|
+
if self._oracle is None or not self._oracle.ready:
|
|
1118
|
+
if self._flat_mid is None:
|
|
1119
|
+
self._flat_mid = FlatGraph(
|
|
1120
|
+
self._graph_lower_cache,
|
|
1121
|
+
extra_nodes=(self.start, self.goal))
|
|
1122
|
+
ix = self._flat_mid.index_of
|
|
1123
|
+
self._slots_mid = _np.array(
|
|
1124
|
+
[self._flat_mid.slot_of(ix[u], ix[v])
|
|
1125
|
+
for u, v in self._edge_order], dtype=_np.int64)
|
|
1126
|
+
self._flat_mid.cost[self._slots_mid] = self._arr_chat
|
|
1127
|
+
self._oracle = self._oracle or SnapshotOracle(self._flat_mid)
|
|
1128
|
+
self._oracle.build(self.t)
|
|
1129
|
+
self._oracle_chat_snap = self._arr_chat.copy()
|
|
1130
|
+
self._gate_stamp = stamp
|
|
1131
|
+
self._gate_ok = True
|
|
1132
|
+
self._gate_alpha_edge = alpha_edge_q
|
|
1133
|
+
self._gate_dstale = self.scorer.delta_stale(self.t)
|
|
1134
|
+
return self._answer_query(s, g, tau)
|
|
1135
|
+
|
|
1136
|
+
def _answer_query(self, s: Node, g: Node, tau: float):
|
|
1137
|
+
ix = self._flat_mid.index_of
|
|
1138
|
+
si, gi = ix.get(s), ix.get(g)
|
|
1139
|
+
if si is None or gi is None:
|
|
1140
|
+
return None
|
|
1141
|
+
pi = self._oracle.path(si, gi)
|
|
1142
|
+
if pi is None:
|
|
1143
|
+
return None
|
|
1144
|
+
path = [self._flat_mid.node_of(i) for i in pi]
|
|
1145
|
+
cost = self._oracle.cost(si, gi)
|
|
1146
|
+
L_p = len(path) - 1
|
|
1147
|
+
alpha_claim = max(
|
|
1148
|
+
getattr(self, "_alpha_claim", self._alpha_prime_eff),
|
|
1149
|
+
min(1.0, L_p * self._gate_alpha_edge),
|
|
1150
|
+
)
|
|
1151
|
+
return dict(
|
|
1152
|
+
path=path, cost=cost, slack=L_p * tau,
|
|
1153
|
+
opt_slack=2 * L_p * tau,
|
|
1154
|
+
confidence=path_confidence(
|
|
1155
|
+
alpha_claim, [self._gate_dstale] * max(L_p, 1)),
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1158
|
+
def retarget(self, start: Node, goal: Node) -> None:
|
|
1159
|
+
"""New mission in the same environment (lifelong operation): keep the
|
|
1160
|
+
learned memory — beliefs, calibration buffer, ACI state, kappa — and
|
|
1161
|
+
drop mission-specific state (incumbent, sensing target, gap-stall).
|
|
1162
|
+
Searches are rebuilt from scratch at the new endpoints (a global
|
|
1163
|
+
change; scratch beats repair)."""
|
|
1164
|
+
self.start, self.goal = start, goal
|
|
1165
|
+
self._prev_incumbent = []
|
|
1166
|
+
self._p_sense = []
|
|
1167
|
+
self._incumbent_since = self.t
|
|
1168
|
+
self._last_gap = math.inf
|
|
1169
|
+
self._stall = 0
|
|
1170
|
+
if hasattr(self, "_last_L"):
|
|
1171
|
+
del self._last_L
|
|
1172
|
+
if self._cache_lo:
|
|
1173
|
+
self._rebuild_searches()
|
|
1174
|
+
else: # retarget before any round: warm-up metrics, fresh engines
|
|
1175
|
+
self.sp_lower = FastDStarLite(
|
|
1176
|
+
self._graph_lower_cache, start, goal, flat=self._flat_lo)
|
|
1177
|
+
self.sp_upper = FastDStarLite(
|
|
1178
|
+
self._to_adj(set(self._graph_lower_cache),
|
|
1179
|
+
{e: _UB_CAP for e in self.beliefs}),
|
|
1180
|
+
start, goal, flat=self._flat_up)
|
|
1181
|
+
|
|
1182
|
+
def advance_start(self, node: Node) -> None:
|
|
1183
|
+
"""Robot moved: shift both searches' start (D* Lite km offset)."""
|
|
1184
|
+
self.start = node
|
|
1185
|
+
self.sp_lower.set_start(node)
|
|
1186
|
+
self.sp_upper.set_start(node)
|