odin-engine 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. benchmarks/__init__.py +17 -17
  2. benchmarks/datasets.py +284 -284
  3. benchmarks/metrics.py +275 -275
  4. benchmarks/run_ablation.py +279 -279
  5. benchmarks/run_npll_benchmark.py +270 -270
  6. npll/__init__.py +10 -10
  7. npll/bootstrap.py +474 -474
  8. npll/core/__init__.py +33 -33
  9. npll/core/knowledge_graph.py +308 -308
  10. npll/core/logical_rules.py +496 -496
  11. npll/core/mln.py +474 -474
  12. npll/inference/__init__.py +40 -40
  13. npll/inference/e_step.py +419 -419
  14. npll/inference/elbo.py +434 -434
  15. npll/inference/m_step.py +576 -576
  16. npll/npll_model.py +631 -631
  17. npll/scoring/__init__.py +42 -42
  18. npll/scoring/embeddings.py +441 -441
  19. npll/scoring/probability.py +402 -402
  20. npll/scoring/scoring_module.py +369 -369
  21. npll/training/__init__.py +24 -24
  22. npll/training/evaluation.py +496 -496
  23. npll/training/npll_trainer.py +520 -520
  24. npll/utils/__init__.py +47 -47
  25. npll/utils/batch_utils.py +492 -492
  26. npll/utils/config.py +144 -144
  27. npll/utils/math_utils.py +338 -338
  28. odin/__init__.py +21 -20
  29. odin/engine.py +264 -264
  30. odin/schema.py +210 -0
  31. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/METADATA +503 -456
  32. odin_engine-0.2.0.dist-info/RECORD +63 -0
  33. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/licenses/LICENSE +21 -21
  34. retrieval/__init__.py +50 -50
  35. retrieval/adapters.py +140 -140
  36. retrieval/adapters_arango.py +1418 -1418
  37. retrieval/aggregators.py +707 -707
  38. retrieval/beam.py +127 -127
  39. retrieval/budget.py +60 -60
  40. retrieval/cache.py +159 -159
  41. retrieval/confidence.py +88 -88
  42. retrieval/eval.py +49 -49
  43. retrieval/linker.py +87 -87
  44. retrieval/metrics.py +105 -105
  45. retrieval/metrics_motifs.py +36 -36
  46. retrieval/orchestrator.py +571 -571
  47. retrieval/ppr/__init__.py +12 -12
  48. retrieval/ppr/anchors.py +41 -41
  49. retrieval/ppr/bippr.py +61 -61
  50. retrieval/ppr/engines.py +257 -257
  51. retrieval/ppr/global_pr.py +76 -76
  52. retrieval/ppr/indexes.py +78 -78
  53. retrieval/ppr.py +156 -156
  54. retrieval/ppr_cache.py +25 -25
  55. retrieval/scoring.py +294 -294
  56. retrieval/utils/pii_redaction.py +36 -36
  57. retrieval/writers/__init__.py +9 -9
  58. retrieval/writers/arango_writer.py +28 -28
  59. retrieval/writers/base.py +21 -21
  60. retrieval/writers/janus_writer.py +36 -36
  61. odin_engine-0.1.0.dist-info/RECORD +0 -62
  62. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/WHEEL +0 -0
  63. {odin_engine-0.1.0.dist-info → odin_engine-0.2.0.dist-info}/top_level.txt +0 -0
retrieval/orchestrator.py CHANGED
@@ -1,571 +1,571 @@
1
- from __future__ import annotations
2
- import os
3
- from dataclasses import dataclass, field, replace
4
- from typing import Dict, List, Tuple, Optional, Callable, Any
5
- from collections import defaultdict
6
-
7
- from .adapters import GraphAccessor, NodeId
8
- from .budget import SearchBudget, BudgetTracker
9
- from .ppr.engines import PushPPREngine, MonteCarloPPREngine, PPRParams
10
- from .ppr.bippr import BiPPREngine
11
- from .ppr.indexes import RandomWalkIndex
12
- from .beam import beam_search, BeamParams
13
- from .confidence import EdgeConfidenceProvider, ConstantConfidence, NPLLConfidence
14
- from .ppr_cache import PPRCache, _key as ppr_cache_key
15
- from .scoring import (
16
- PathScoreConfig,
17
- InsightScoreConfig,
18
- score_paths_and_insight,
19
- )
20
- from .metrics import Timer, MetricsLogger, RetrievalMetrics
21
- from .adapters import OverlayAccessor
22
- from .linker import CoherenceLinker, LinkerConfig, Mention
23
- from .utils.pii_redaction import redact_dict
24
- from .writers.base import PersistenceWriter
25
-
26
- # NEW: triage-ready aggregators (with guards & priors surprise)
27
- from .aggregators import build_opportunity_features, decompose_insight_score
28
-
29
-
30
- @dataclass
31
- class OrchestratorParams:
32
- community_id: str
33
- # PPR
34
- alpha: float = 0.15
35
- eps: float = 1e-4
36
- num_walks: int = 5000
37
- walk_len: int = 40
38
- topn: int = 200
39
- # Beam/paths
40
- hop_limit: int = 3
41
- beam_width: int = 64
42
- max_paths: int = 200
43
- # Scoring
44
- path_cfg: PathScoreConfig = field(default_factory=PathScoreConfig)
45
- insight_cfg: InsightScoreConfig = field(default_factory=InsightScoreConfig)
46
-
47
-
48
- class RetrievalOrchestrator:
49
- """
50
- Orchestrates:
51
- 1) Mixed PPR (push + MC) with optional anchor personalization & cache
52
- 2) Beam path enumeration with edge confidences (NPLL if configured)
53
- 3) Path & insight scoring
54
- 4) Aggregation → motifs / relation shares / priors surprise / anchors
55
- 5) Optional baseline (shifted time window) for deltas
56
- 6) Triage score + ICS decomposition for explainability
57
-
58
- Quality guards:
59
- - Requires NPLL if ODIN_REQUIRE_NPLL=true
60
- - Normalizes raw path edge schema for aggregators
61
- - Suppresses low-support metrics automatically
62
- - Optional single-step dynamic scaling when support is low
63
- """
64
-
65
- def __init__(
66
- self,
67
- accessor: GraphAccessor,
68
- ppr_cache: Optional[PPRCache] = None,
69
- edge_confidence: Optional[EdgeConfidenceProvider] = None,
70
- edge_timestamp_lookup: Optional[Callable[[NodeId, str, NodeId], Optional[float]]] = None,
71
- walk_index: Optional[RandomWalkIndex] = None,
72
- redact_pii_in_trace: bool = True,
73
- ):
74
- self.A = accessor
75
- self.cache = ppr_cache or PPRCache()
76
- self.conf = edge_confidence or ConstantConfidence(0.8)
77
-
78
- # Enforce NPLL when requested by env
79
- if os.getenv("ODIN_REQUIRE_NPLL", "false").lower() in ("true", "1", "yes"):
80
- if not isinstance(self.conf, NPLLConfidence):
81
- raise RuntimeError(
82
- "ODIN_REQUIRE_NPLL is enabled but edge_confidence is not NPLLConfidence. "
83
- "Provide NPLLConfidence(npll_model)."
84
- )
85
-
86
- self.ts_lookup = edge_timestamp_lookup
87
- self.walk_index = walk_index
88
- self.redact_pii_in_trace = redact_pii_in_trace
89
-
90
- # ----------------------------
91
- # Public API
92
- # ----------------------------
93
-
94
- def retrieve(
95
- self,
96
- seeds: List[NodeId],
97
- params: OrchestratorParams,
98
- budget: Optional[SearchBudget] = None,
99
- now_ts: Optional[float] = None,
100
- include_baseline: bool = False,
101
- anchor_prior: Optional[Dict[NodeId, float]] = None,
102
- beam_override: Optional[BeamParams] = None,
103
- dynamic_scale: bool = True,
104
- min_paths_for_confidence: int = 8,
105
- ) -> Dict[str, object]:
106
- """
107
- Core retrieval + aggregation + triage pipeline.
108
- Optionally performs a single dynamic scale-up pass if support is low.
109
- """
110
- primary = self._retrieve_once(
111
- seeds=seeds,
112
- params=params,
113
- budget=budget,
114
- now_ts=now_ts,
115
- include_baseline=include_baseline,
116
- anchor_prior=anchor_prior,
117
- beam_override=beam_override,
118
- )
119
-
120
- # Single-step dynamic expansion if needed (low support or no paths)
121
- need_scale = dynamic_scale and (
122
- len(primary.get("paths", [])) < min_paths_for_confidence
123
- or (primary.get("aggregates", {}).get("summary", {}).get("low_support") is True)
124
- )
125
-
126
- if not need_scale:
127
- return primary
128
-
129
- # Expand hop/beam a notch, reuse same PPR
130
- scaled_params = replace(
131
- params,
132
- hop_limit=min(params.hop_limit + 1, 4),
133
- beam_width=min(params.beam_width * 2, 256),
134
- max_paths=min(params.max_paths * 2, 400),
135
- )
136
-
137
- scaled = self._retrieve_once(
138
- seeds=seeds,
139
- params=scaled_params,
140
- budget=budget,
141
- now_ts=now_ts,
142
- include_baseline=include_baseline,
143
- anchor_prior=anchor_prior,
144
- beam_override=beam_override,
145
- )
146
-
147
- # Choose better result by (insight_score, then #paths)
148
- def _key(r: Dict[str, Any]) -> Tuple[float, int]:
149
- return (float(r.get("insight_score", 0.0)), len(r.get("paths", [])))
150
-
151
- return scaled if _key(scaled) > _key(primary) else primary
152
-
153
- def score_candidates(
154
- self,
155
- source: NodeId,
156
- targets: List[NodeId],
157
- params: OrchestratorParams,
158
- budget: Optional[SearchBudget] = None,
159
- ) -> Dict[str, object]:
160
- """
161
- Efficiently score a given candidate set via BiPPR.
162
- """
163
- bt = BudgetTracker(budget or SearchBudget())
164
- bippr = BiPPREngine(graph=self.A, alpha=params.alpha, rmax=params.eps)
165
- walks = max(1000, min(params.num_walks, bt.left().max_edges))
166
- scores = bippr.score(source=source, targets=targets, walks=walks)
167
- return {
168
- "candidate_scores": scores,
169
- "used_budget": bt.usage.__dict__,
170
- "trace": {"engine": "bippr", "walks": walks},
171
- }
172
-
173
- def link_and_retrieve(
174
- self,
175
- mentions: List[Mention],
176
- base_accessor: GraphAccessor,
177
- params: OrchestratorParams,
178
- seeds_from_linked: int = 3,
179
- budget: Optional[SearchBudget] = None,
180
- now_ts: Optional[float] = None,
181
- persistence_writer: Optional[PersistenceWriter] = None,
182
- ) -> Dict[str, object]:
183
- linker_cfg = getattr(self, "linker_cfg", LinkerConfig())
184
- linker = CoherenceLinker(linker_cfg)
185
- linked = linker.link(mentions)
186
-
187
- overlay = OverlayAccessor(base_accessor, params.community_id)
188
- seeds: List[NodeId] = []
189
- for m in mentions:
190
- if m.mention_id in linked:
191
- ent = linked[m.mention_id]["entity_id"]
192
- conf = linked[m.mention_id]["link_confidence"]
193
- overlay.add_edge(m.surface, "mentions", ent, conf)
194
- seeds.append(ent)
195
- seeds = list(dict.fromkeys(seeds))[: seeds_from_linked]
196
-
197
- prev_A = self.A
198
- try:
199
- self.A = overlay
200
- res = self.retrieve(seeds=seeds, params=params, budget=budget, now_ts=now_ts)
201
- res["trace"]["linked_entities"] = linked
202
-
203
- if persistence_writer is not None:
204
- for m in mentions:
205
- if m.mention_id in linked:
206
- ent = linked[m.mention_id]["entity_id"]
207
- conf = linked[m.mention_id]["link_confidence"]
208
- try:
209
- persistence_writer.maybe_write_link(
210
- src_entity=ent,
211
- rel="linked_from_text",
212
- dst_entity=ent,
213
- confidence=conf,
214
- metadata={"mention_id": m.mention_id},
215
- )
216
- except Exception:
217
- pass
218
-
219
- return res
220
- finally:
221
- self.A = prev_A
222
-
223
- # ----------------------------
224
- # Internal helpers
225
- # ----------------------------
226
-
227
- def _retrieve_once(
228
- self,
229
- seeds: List[NodeId],
230
- params: OrchestratorParams,
231
- budget: Optional[SearchBudget],
232
- now_ts: Optional[float],
233
- include_baseline: bool,
234
- anchor_prior: Optional[Dict[NodeId, float]],
235
- beam_override: Optional[BeamParams],
236
- ) -> Dict[str, object]:
237
- bt = BudgetTracker(budget or SearchBudget(max_paths=params.max_paths))
238
- tm = Timer()
239
-
240
- # Personalization vector (seeds + optional anchors)
241
- personalization: Dict[NodeId, float] = defaultdict(float)
242
- for s in seeds:
243
- personalization[s] += 1.0
244
- if anchor_prior:
245
- for n, p in anchor_prior.items():
246
- personalization[n] += float(p)
247
-
248
- total_mass = sum(personalization.values())
249
- if total_mass > 0:
250
- personalization = {n: v / total_mass for n, v in personalization.items()}
251
- else:
252
- # Fallback: uniform over community if no seeds/anchors
253
- all_nodes = list(self.A.nodes(params.community_id))
254
- if not all_nodes:
255
- return self._empty_result(bt, tm)
256
- uniform = 1.0 / len(all_nodes)
257
- personalization = {n: uniform for n in all_nodes}
258
-
259
- # ---- PPR (with cache) ----
260
- prior_hash = str(hash(frozenset(anchor_prior.items()))) if anchor_prior else ""
261
- key = ppr_cache_key(params.community_id, seeds, params.alpha, engine="push+mc", prior_hash=prior_hash)
262
- cached = self.cache.get(key)
263
-
264
- if cached is not None:
265
- ppr_scores = cached
266
- ppr_trace = {"engine": "cache", "cache_hit": True}
267
- tm.mark("ppr_done")
268
- else:
269
- push = PushPPREngine(self.A, params.community_id)
270
- mc = MonteCarloPPREngine(self.A, params.community_id, walk_index=self.walk_index)
271
- pp = PPRParams(alpha=params.alpha, eps=params.eps, num_walks=0, walk_len=0, topn=params.topn)
272
- mm = PPRParams(alpha=params.alpha, eps=params.eps, num_walks=params.num_walks, walk_len=params.walk_len, topn=params.topn)
273
-
274
- pr_push = push.run(list(personalization.keys()), pp, bt.left(), personalization=personalization)
275
- pr_mc = mc.run(list(personalization.keys()), mm, bt.left(), personalization=personalization)
276
-
277
- push_mass = max(pr_push.mass, 1e-12)
278
- resid = max(1.0 - push_mass, 0.0)
279
-
280
- combined: Dict[NodeId, float] = {}
281
- for n, p in pr_push.scores:
282
- combined[n] = combined.get(n, 0.0) + p
283
- for n, p in pr_mc.scores:
284
- combined[n] = combined.get(n, 0.0) + p * resid
285
-
286
- Z = sum(combined.values()) or 1.0
287
- ppr_scores = sorted(((n, v / Z) for n, v in combined.items()), key=lambda kv: kv[1], reverse=True)[: params.topn]
288
- ppr_trace = {"engine": "push+mc", "push": pr_push.trace, "mc": pr_mc.trace, "cache_hit": False}
289
- self.cache.put(key, ppr_scores)
290
- tm.mark("ppr_done")
291
-
292
- # ---- Beam search (paths) ----
293
- beam = beam_override or BeamParams(hop_limit=params.hop_limit, beam_width=params.beam_width, max_paths=params.max_paths)
294
- path_out = beam_search(
295
- accessor=self.A,
296
- community_id=params.community_id,
297
- seeds=seeds,
298
- ppr_scores=ppr_scores,
299
- budget=bt.left(),
300
- beam_params=beam,
301
- conf_provider=self.conf,
302
- )
303
- tm.mark("beam_done")
304
-
305
- # ---- Scoring ----
306
- candidate_paths = [[(e["u"], e["rel"], e["v"]) for e in p.get("edges", [])] for p in path_out.get("paths", [])]
307
- scored = score_paths_and_insight(
308
- accessor=self.A,
309
- community_id=params.community_id,
310
- seeds=seeds,
311
- node_ppr_scores=ppr_scores,
312
- candidate_paths=candidate_paths,
313
- conf_provider=self.conf,
314
- now_ts=now_ts,
315
- edge_timestamp_lookup=self.ts_lookup,
316
- path_cfg=params.path_cfg,
317
- insight_cfg=params.insight_cfg,
318
- )
319
- tm.mark("scoring_done")
320
-
321
- # ---- Normalize edges for aggregators (add relation/labels/ts/provenance keys) ----
322
- paths_norm = self._normalize_paths_for_aggregators(scored.get("paths", []))
323
-
324
- # ---- Baseline retrieval (optional; shifted window) ----
325
- baseline_paths_norm: Optional[List[Dict[str, Any]]] = None
326
- if include_baseline and getattr(self.A, "time_window", None):
327
- baseline_paths_norm = self._get_shifted_baseline_paths(
328
- seeds=seeds,
329
- params=params,
330
- scorer_output_like=scored,
331
- )
332
-
333
- # ---- Priors (if accessor exposes them) ----
334
- try:
335
- priors: Dict[str, float] = getattr(self.A, "get_edge_type_priors")(params.community_id) # type: ignore
336
- except Exception:
337
- priors = {}
338
-
339
- # ---- Half-life (~1.5x window length clamped 14–60) ----
340
- half_life_days = self._half_life_from_accessor_window(default=30.0)
341
-
342
- # ---- Aggregates + triage (includes motif density, priors surprise, anchors, low-support flags) ----
343
- features = build_opportunity_features(
344
- paths=paths_norm,
345
- baseline_paths=baseline_paths_norm,
346
- priors=priors,
347
- half_life_days=half_life_days,
348
- controllability=1.0, # default; Discovery can override per-op
349
- )
350
-
351
- # ---- Insight score decomposition for explainability (uses scored metrics) ----
352
- ics_decomposed = decompose_insight_score(
353
- paths=paths_norm,
354
- evidence_strength=float(scored.get("evidence_strength", 0.0)),
355
- community_relevance=float(scored.get("community_relevance", 0.0)),
356
- insight_score=float(scored.get("insight_score", 0.0)),
357
- )
358
-
359
- result = {
360
- "topk_ppr": ppr_scores,
361
- "paths": paths_norm, # normalized, aggregator-friendly
362
- "evidence_strength": scored.get("evidence_strength", 0.0),
363
- "community_relevance": scored.get("community_relevance", 0.0),
364
- "insight_score": scored.get("insight_score", 0.0),
365
- "aggregates": features["aggregates"],
366
- "triage": features["triage"],
367
- "ics": ics_decomposed,
368
- "used_budget": bt.usage.__dict__ | {"beam": path_out.get("used_budget", {})},
369
- "trace": {
370
- "ppr": ppr_trace,
371
- "beam": path_out.get("trace", {}),
372
- "params": params,
373
- "timings_ms": tm.marks | {"total": tm.elapsed_ms()},
374
- "linker_cfg": getattr(self, "linker_cfg", None),
375
- },
376
- }
377
-
378
- # Optional external metrics logger
379
- if hasattr(self, "metrics_logger") and isinstance(self.metrics_logger, MetricsLogger):
380
- esr = path_out.get("trace", {}).get("early_stop_reason")
381
- self.metrics_logger.log(
382
- RetrievalMetrics(
383
- query_id=None,
384
- community_id=params.community_id,
385
- seeds_count=len(seeds),
386
- ppr_mass=sum(p for _, p in ppr_scores),
387
- topk=len(ppr_scores),
388
- used_budget=result["used_budget"],
389
- latency_ms=tm.elapsed_ms(),
390
- early_stop_reason=esr,
391
- engine=ppr_trace.get("engine", "unknown"),
392
- notes={"timings": tm.marks},
393
- )
394
- )
395
-
396
- # PII redaction for trace
397
- if self.redact_pii_in_trace:
398
- result["trace"] = redact_dict(result["trace"])
399
-
400
- return result
401
-
402
- # ---- schema normalization & utilities ----
403
-
404
- def _normalize_paths_for_aggregators(self, paths: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
405
- """
406
- Ensure each edge has aggregator-friendly fields:
407
- relation, u_label, v_label, confidence, created_at/timestamp, provenance{document_id?}
408
- """
409
- out: List[Dict[str, Any]] = []
410
-
411
- def _label(n: Any) -> Optional[str]:
412
- try:
413
- # Accessor MAY expose label(...) or node_label(...)
414
- if hasattr(self.A, "label"):
415
- return self.A.label(n) # type: ignore
416
- if hasattr(self.A, "node_label"):
417
- return self.A.node_label(n) # type: ignore
418
- except Exception:
419
- pass
420
- return None
421
-
422
- for p in paths:
423
- norm_edges: List[Dict[str, Any]] = []
424
- for e in p.get("edges", []):
425
- u = e.get("u")
426
- v = e.get("v")
427
- rel = e.get("rel") or e.get("relation") or e.get("relationship")
428
-
429
- # Timestamp: prefer provided; else ts_lookup fallback (seconds)
430
- ts = e.get("created_at", e.get("timestamp"))
431
- if ts is None and self.ts_lookup and (u is not None and v is not None and rel is not None):
432
- try:
433
- ts_lookup_val = self.ts_lookup(u, str(rel), v)
434
- ts = ts_lookup_val
435
- except Exception:
436
- ts = None
437
-
438
- # Provenance
439
- prov = e.get("provenance")
440
- if prov is None:
441
- # Common alternates
442
- doc_id = e.get("source_doc") or e.get("doc_id")
443
- if doc_id:
444
- prov = {"document_id": doc_id}
445
-
446
- norm_edges.append(
447
- {
448
- "u": u,
449
- "v": v,
450
- "relation": rel,
451
- "u_label": e.get("u_label") or _label(u),
452
- "v_label": e.get("v_label") or _label(v),
453
- "confidence": e.get("confidence", e.get("weight", 1.0)),
454
- "created_at": ts, # ISO or epoch; aggregator can parse both
455
- "provenance": prov,
456
- "_id": e.get("_id", e.get("id")),
457
- }
458
- )
459
-
460
- out.append({"id": p.get("id"), "score": p.get("score"), "edges": norm_edges})
461
-
462
- return out
463
-
464
- def _get_shifted_baseline_paths(
465
- self,
466
- seeds: List[NodeId],
467
- params: OrchestratorParams,
468
- scorer_output_like: Dict[str, Any],
469
- ) -> List[Dict[str, Any]]:
470
- """
471
- Run a light baseline retrieval by shifting self.A.time_window back by its duration.
472
- Returns normalized paths (aggregator schema).
473
- """
474
- original_window = getattr(self.A, "time_window", None)
475
- if not original_window:
476
- return []
477
-
478
- start_ts, end_ts = original_window
479
- start_f = self._coerce_ts(start_ts)
480
- end_f = self._coerce_ts(end_ts)
481
- if start_f is None or end_f is None or end_f <= start_f:
482
- return []
483
-
484
- duration = end_f - start_f
485
- baseline_window = (max(0.0, start_f - duration), start_f)
486
-
487
- # Temporarily swap window
488
- self.A.time_window = baseline_window # type: ignore[attr-defined]
489
- try:
490
- baseline_budget = SearchBudget(max_paths=max(20, params.max_paths // 10), max_edges=1000)
491
- # Slim params for baseline
492
- base_params = replace(params, max_paths=baseline_budget.max_paths, beam_width=max(16, params.beam_width // 2))
493
- baseline_res = self._retrieve_once(
494
- seeds=seeds,
495
- params=base_params,
496
- budget=baseline_budget,
497
- now_ts=None,
498
- include_baseline=False,
499
- anchor_prior=None,
500
- beam_override=None,
501
- )
502
- return baseline_res.get("paths", [])
503
- finally:
504
- self.A.time_window = original_window # type: ignore[attr-defined]
505
-
506
- def _half_life_from_accessor_window(self, default: float = 30.0) -> float:
507
- """
508
- ~1.5× window length in days, clamped to [14, 60].
509
- Falls back to `default` if no valid window on accessor.
510
- """
511
- w = getattr(self.A, "time_window", None)
512
- if not w:
513
- return default
514
- s, e = w
515
- s_f = self._coerce_ts(s)
516
- e_f = self._coerce_ts(e)
517
- if s_f is None or e_f is None or e_f <= s_f:
518
- return default
519
- days = (e_f - s_f) / 86400.0
520
- hl = max(14.0, min(60.0, 1.5 * days))
521
- return hl
522
-
523
- @staticmethod
524
- def _coerce_ts(ts: Any) -> Optional[float]:
525
- """
526
- Coerce ISO string or numeric epoch into float epoch seconds.
527
- """
528
- if ts is None:
529
- return None
530
- if isinstance(ts, (int, float)):
531
- return float(ts)
532
- if isinstance(ts, str):
533
- try:
534
- # Allow trailing Z
535
- from datetime import datetime, timezone
536
- dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
537
- return dt.timestamp()
538
- except Exception:
539
- return None
540
- return None
541
-
542
- @staticmethod
543
- def _empty_result(bt: BudgetTracker, tm: Timer) -> Dict[str, object]:
544
- return {
545
- "topk_ppr": [],
546
- "paths": [],
547
- "evidence_strength": 0.0,
548
- "community_relevance": 0.0,
549
- "insight_score": 0.0,
550
- "aggregates": {
551
- "motifs": [],
552
- "relation_share": {},
553
- "snippet_anchors": [],
554
- "summary": {
555
- "total_paths": 0,
556
- "unique_motifs": 0,
557
- "unique_relations": 0,
558
- "total_edges": 0,
559
- "provenance": 0.0,
560
- "recency": 0.0,
561
- "label_coverage": 0.0,
562
- "motif_density": 0.0,
563
- "has_baseline": False,
564
- "low_support": True,
565
- },
566
- },
567
- "triage": {"score": 0, "components": {}, "dominant_relation": {}},
568
- "ics": {"value": 0.0, "label": "Low", "drivers": {}, "quality_gate": {"meets_evidence_floor": False, "has_strong_paths": False, "recommendation": "gather_more_evidence"}},
569
- "used_budget": bt.usage.__dict__,
570
- "trace": {"timings_ms": tm.marks | {"total": tm.elapsed_ms()}},
571
- }
1
+ from __future__ import annotations
2
+ import os
3
+ from dataclasses import dataclass, field, replace
4
+ from typing import Dict, List, Tuple, Optional, Callable, Any
5
+ from collections import defaultdict
6
+
7
+ from .adapters import GraphAccessor, NodeId
8
+ from .budget import SearchBudget, BudgetTracker
9
+ from .ppr.engines import PushPPREngine, MonteCarloPPREngine, PPRParams
10
+ from .ppr.bippr import BiPPREngine
11
+ from .ppr.indexes import RandomWalkIndex
12
+ from .beam import beam_search, BeamParams
13
+ from .confidence import EdgeConfidenceProvider, ConstantConfidence, NPLLConfidence
14
+ from .ppr_cache import PPRCache, _key as ppr_cache_key
15
+ from .scoring import (
16
+ PathScoreConfig,
17
+ InsightScoreConfig,
18
+ score_paths_and_insight,
19
+ )
20
+ from .metrics import Timer, MetricsLogger, RetrievalMetrics
21
+ from .adapters import OverlayAccessor
22
+ from .linker import CoherenceLinker, LinkerConfig, Mention
23
+ from .utils.pii_redaction import redact_dict
24
+ from .writers.base import PersistenceWriter
25
+
26
+ # NEW: triage-ready aggregators (with guards & priors surprise)
27
+ from .aggregators import build_opportunity_features, decompose_insight_score
28
+
29
+
30
+ @dataclass
31
+ class OrchestratorParams:
32
+ community_id: str
33
+ # PPR
34
+ alpha: float = 0.15
35
+ eps: float = 1e-4
36
+ num_walks: int = 5000
37
+ walk_len: int = 40
38
+ topn: int = 200
39
+ # Beam/paths
40
+ hop_limit: int = 3
41
+ beam_width: int = 64
42
+ max_paths: int = 200
43
+ # Scoring
44
+ path_cfg: PathScoreConfig = field(default_factory=PathScoreConfig)
45
+ insight_cfg: InsightScoreConfig = field(default_factory=InsightScoreConfig)
46
+
47
+
48
+ class RetrievalOrchestrator:
49
+ """
50
+ Orchestrates:
51
+ 1) Mixed PPR (push + MC) with optional anchor personalization & cache
52
+ 2) Beam path enumeration with edge confidences (NPLL if configured)
53
+ 3) Path & insight scoring
54
+ 4) Aggregation → motifs / relation shares / priors surprise / anchors
55
+ 5) Optional baseline (shifted time window) for deltas
56
+ 6) Triage score + ICS decomposition for explainability
57
+
58
+ Quality guards:
59
+ - Requires NPLL if ODIN_REQUIRE_NPLL=true
60
+ - Normalizes raw path edge schema for aggregators
61
+ - Suppresses low-support metrics automatically
62
+ - Optional single-step dynamic scaling when support is low
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ accessor: GraphAccessor,
68
+ ppr_cache: Optional[PPRCache] = None,
69
+ edge_confidence: Optional[EdgeConfidenceProvider] = None,
70
+ edge_timestamp_lookup: Optional[Callable[[NodeId, str, NodeId], Optional[float]]] = None,
71
+ walk_index: Optional[RandomWalkIndex] = None,
72
+ redact_pii_in_trace: bool = True,
73
+ ):
74
+ self.A = accessor
75
+ self.cache = ppr_cache or PPRCache()
76
+ self.conf = edge_confidence or ConstantConfidence(0.8)
77
+
78
+ # Enforce NPLL when requested by env
79
+ if os.getenv("ODIN_REQUIRE_NPLL", "false").lower() in ("true", "1", "yes"):
80
+ if not isinstance(self.conf, NPLLConfidence):
81
+ raise RuntimeError(
82
+ "ODIN_REQUIRE_NPLL is enabled but edge_confidence is not NPLLConfidence. "
83
+ "Provide NPLLConfidence(npll_model)."
84
+ )
85
+
86
+ self.ts_lookup = edge_timestamp_lookup
87
+ self.walk_index = walk_index
88
+ self.redact_pii_in_trace = redact_pii_in_trace
89
+
90
+ # ----------------------------
91
+ # Public API
92
+ # ----------------------------
93
+
94
+ def retrieve(
95
+ self,
96
+ seeds: List[NodeId],
97
+ params: OrchestratorParams,
98
+ budget: Optional[SearchBudget] = None,
99
+ now_ts: Optional[float] = None,
100
+ include_baseline: bool = False,
101
+ anchor_prior: Optional[Dict[NodeId, float]] = None,
102
+ beam_override: Optional[BeamParams] = None,
103
+ dynamic_scale: bool = True,
104
+ min_paths_for_confidence: int = 8,
105
+ ) -> Dict[str, object]:
106
+ """
107
+ Core retrieval + aggregation + triage pipeline.
108
+ Optionally performs a single dynamic scale-up pass if support is low.
109
+ """
110
+ primary = self._retrieve_once(
111
+ seeds=seeds,
112
+ params=params,
113
+ budget=budget,
114
+ now_ts=now_ts,
115
+ include_baseline=include_baseline,
116
+ anchor_prior=anchor_prior,
117
+ beam_override=beam_override,
118
+ )
119
+
120
+ # Single-step dynamic expansion if needed (low support or no paths)
121
+ need_scale = dynamic_scale and (
122
+ len(primary.get("paths", [])) < min_paths_for_confidence
123
+ or (primary.get("aggregates", {}).get("summary", {}).get("low_support") is True)
124
+ )
125
+
126
+ if not need_scale:
127
+ return primary
128
+
129
+ # Expand hop/beam a notch, reuse same PPR
130
+ scaled_params = replace(
131
+ params,
132
+ hop_limit=min(params.hop_limit + 1, 4),
133
+ beam_width=min(params.beam_width * 2, 256),
134
+ max_paths=min(params.max_paths * 2, 400),
135
+ )
136
+
137
+ scaled = self._retrieve_once(
138
+ seeds=seeds,
139
+ params=scaled_params,
140
+ budget=budget,
141
+ now_ts=now_ts,
142
+ include_baseline=include_baseline,
143
+ anchor_prior=anchor_prior,
144
+ beam_override=beam_override,
145
+ )
146
+
147
+ # Choose better result by (insight_score, then #paths)
148
+ def _key(r: Dict[str, Any]) -> Tuple[float, int]:
149
+ return (float(r.get("insight_score", 0.0)), len(r.get("paths", [])))
150
+
151
+ return scaled if _key(scaled) > _key(primary) else primary
152
+
153
+ def score_candidates(
154
+ self,
155
+ source: NodeId,
156
+ targets: List[NodeId],
157
+ params: OrchestratorParams,
158
+ budget: Optional[SearchBudget] = None,
159
+ ) -> Dict[str, object]:
160
+ """
161
+ Efficiently score a given candidate set via BiPPR.
162
+ """
163
+ bt = BudgetTracker(budget or SearchBudget())
164
+ bippr = BiPPREngine(graph=self.A, alpha=params.alpha, rmax=params.eps)
165
+ walks = max(1000, min(params.num_walks, bt.left().max_edges))
166
+ scores = bippr.score(source=source, targets=targets, walks=walks)
167
+ return {
168
+ "candidate_scores": scores,
169
+ "used_budget": bt.usage.__dict__,
170
+ "trace": {"engine": "bippr", "walks": walks},
171
+ }
172
+
173
+ def link_and_retrieve(
174
+ self,
175
+ mentions: List[Mention],
176
+ base_accessor: GraphAccessor,
177
+ params: OrchestratorParams,
178
+ seeds_from_linked: int = 3,
179
+ budget: Optional[SearchBudget] = None,
180
+ now_ts: Optional[float] = None,
181
+ persistence_writer: Optional[PersistenceWriter] = None,
182
+ ) -> Dict[str, object]:
183
+ linker_cfg = getattr(self, "linker_cfg", LinkerConfig())
184
+ linker = CoherenceLinker(linker_cfg)
185
+ linked = linker.link(mentions)
186
+
187
+ overlay = OverlayAccessor(base_accessor, params.community_id)
188
+ seeds: List[NodeId] = []
189
+ for m in mentions:
190
+ if m.mention_id in linked:
191
+ ent = linked[m.mention_id]["entity_id"]
192
+ conf = linked[m.mention_id]["link_confidence"]
193
+ overlay.add_edge(m.surface, "mentions", ent, conf)
194
+ seeds.append(ent)
195
+ seeds = list(dict.fromkeys(seeds))[: seeds_from_linked]
196
+
197
+ prev_A = self.A
198
+ try:
199
+ self.A = overlay
200
+ res = self.retrieve(seeds=seeds, params=params, budget=budget, now_ts=now_ts)
201
+ res["trace"]["linked_entities"] = linked
202
+
203
+ if persistence_writer is not None:
204
+ for m in mentions:
205
+ if m.mention_id in linked:
206
+ ent = linked[m.mention_id]["entity_id"]
207
+ conf = linked[m.mention_id]["link_confidence"]
208
+ try:
209
+ persistence_writer.maybe_write_link(
210
+ src_entity=ent,
211
+ rel="linked_from_text",
212
+ dst_entity=ent,
213
+ confidence=conf,
214
+ metadata={"mention_id": m.mention_id},
215
+ )
216
+ except Exception:
217
+ pass
218
+
219
+ return res
220
+ finally:
221
+ self.A = prev_A
222
+
223
+ # ----------------------------
224
+ # Internal helpers
225
+ # ----------------------------
226
+
227
+ def _retrieve_once(
228
+ self,
229
+ seeds: List[NodeId],
230
+ params: OrchestratorParams,
231
+ budget: Optional[SearchBudget],
232
+ now_ts: Optional[float],
233
+ include_baseline: bool,
234
+ anchor_prior: Optional[Dict[NodeId, float]],
235
+ beam_override: Optional[BeamParams],
236
+ ) -> Dict[str, object]:
237
+ bt = BudgetTracker(budget or SearchBudget(max_paths=params.max_paths))
238
+ tm = Timer()
239
+
240
+ # Personalization vector (seeds + optional anchors)
241
+ personalization: Dict[NodeId, float] = defaultdict(float)
242
+ for s in seeds:
243
+ personalization[s] += 1.0
244
+ if anchor_prior:
245
+ for n, p in anchor_prior.items():
246
+ personalization[n] += float(p)
247
+
248
+ total_mass = sum(personalization.values())
249
+ if total_mass > 0:
250
+ personalization = {n: v / total_mass for n, v in personalization.items()}
251
+ else:
252
+ # Fallback: uniform over community if no seeds/anchors
253
+ all_nodes = list(self.A.nodes(params.community_id))
254
+ if not all_nodes:
255
+ return self._empty_result(bt, tm)
256
+ uniform = 1.0 / len(all_nodes)
257
+ personalization = {n: uniform for n in all_nodes}
258
+
259
+ # ---- PPR (with cache) ----
260
+ prior_hash = str(hash(frozenset(anchor_prior.items()))) if anchor_prior else ""
261
+ key = ppr_cache_key(params.community_id, seeds, params.alpha, engine="push+mc", prior_hash=prior_hash)
262
+ cached = self.cache.get(key)
263
+
264
+ if cached is not None:
265
+ ppr_scores = cached
266
+ ppr_trace = {"engine": "cache", "cache_hit": True}
267
+ tm.mark("ppr_done")
268
+ else:
269
+ push = PushPPREngine(self.A, params.community_id)
270
+ mc = MonteCarloPPREngine(self.A, params.community_id, walk_index=self.walk_index)
271
+ pp = PPRParams(alpha=params.alpha, eps=params.eps, num_walks=0, walk_len=0, topn=params.topn)
272
+ mm = PPRParams(alpha=params.alpha, eps=params.eps, num_walks=params.num_walks, walk_len=params.walk_len, topn=params.topn)
273
+
274
+ pr_push = push.run(list(personalization.keys()), pp, bt.left(), personalization=personalization)
275
+ pr_mc = mc.run(list(personalization.keys()), mm, bt.left(), personalization=personalization)
276
+
277
+ push_mass = max(pr_push.mass, 1e-12)
278
+ resid = max(1.0 - push_mass, 0.0)
279
+
280
+ combined: Dict[NodeId, float] = {}
281
+ for n, p in pr_push.scores:
282
+ combined[n] = combined.get(n, 0.0) + p
283
+ for n, p in pr_mc.scores:
284
+ combined[n] = combined.get(n, 0.0) + p * resid
285
+
286
+ Z = sum(combined.values()) or 1.0
287
+ ppr_scores = sorted(((n, v / Z) for n, v in combined.items()), key=lambda kv: kv[1], reverse=True)[: params.topn]
288
+ ppr_trace = {"engine": "push+mc", "push": pr_push.trace, "mc": pr_mc.trace, "cache_hit": False}
289
+ self.cache.put(key, ppr_scores)
290
+ tm.mark("ppr_done")
291
+
292
+ # ---- Beam search (paths) ----
293
+ beam = beam_override or BeamParams(hop_limit=params.hop_limit, beam_width=params.beam_width, max_paths=params.max_paths)
294
+ path_out = beam_search(
295
+ accessor=self.A,
296
+ community_id=params.community_id,
297
+ seeds=seeds,
298
+ ppr_scores=ppr_scores,
299
+ budget=bt.left(),
300
+ beam_params=beam,
301
+ conf_provider=self.conf,
302
+ )
303
+ tm.mark("beam_done")
304
+
305
+ # ---- Scoring ----
306
+ candidate_paths = [[(e["u"], e["rel"], e["v"]) for e in p.get("edges", [])] for p in path_out.get("paths", [])]
307
+ scored = score_paths_and_insight(
308
+ accessor=self.A,
309
+ community_id=params.community_id,
310
+ seeds=seeds,
311
+ node_ppr_scores=ppr_scores,
312
+ candidate_paths=candidate_paths,
313
+ conf_provider=self.conf,
314
+ now_ts=now_ts,
315
+ edge_timestamp_lookup=self.ts_lookup,
316
+ path_cfg=params.path_cfg,
317
+ insight_cfg=params.insight_cfg,
318
+ )
319
+ tm.mark("scoring_done")
320
+
321
+ # ---- Normalize edges for aggregators (add relation/labels/ts/provenance keys) ----
322
+ paths_norm = self._normalize_paths_for_aggregators(scored.get("paths", []))
323
+
324
+ # ---- Baseline retrieval (optional; shifted window) ----
325
+ baseline_paths_norm: Optional[List[Dict[str, Any]]] = None
326
+ if include_baseline and getattr(self.A, "time_window", None):
327
+ baseline_paths_norm = self._get_shifted_baseline_paths(
328
+ seeds=seeds,
329
+ params=params,
330
+ scorer_output_like=scored,
331
+ )
332
+
333
+ # ---- Priors (if accessor exposes them) ----
334
+ try:
335
+ priors: Dict[str, float] = getattr(self.A, "get_edge_type_priors")(params.community_id) # type: ignore
336
+ except Exception:
337
+ priors = {}
338
+
339
+ # ---- Half-life (~1.5x window length clamped 14–60) ----
340
+ half_life_days = self._half_life_from_accessor_window(default=30.0)
341
+
342
+ # ---- Aggregates + triage (includes motif density, priors surprise, anchors, low-support flags) ----
343
+ features = build_opportunity_features(
344
+ paths=paths_norm,
345
+ baseline_paths=baseline_paths_norm,
346
+ priors=priors,
347
+ half_life_days=half_life_days,
348
+ controllability=1.0, # default; Discovery can override per-op
349
+ )
350
+
351
+ # ---- Insight score decomposition for explainability (uses scored metrics) ----
352
+ ics_decomposed = decompose_insight_score(
353
+ paths=paths_norm,
354
+ evidence_strength=float(scored.get("evidence_strength", 0.0)),
355
+ community_relevance=float(scored.get("community_relevance", 0.0)),
356
+ insight_score=float(scored.get("insight_score", 0.0)),
357
+ )
358
+
359
+ result = {
360
+ "topk_ppr": ppr_scores,
361
+ "paths": paths_norm, # normalized, aggregator-friendly
362
+ "evidence_strength": scored.get("evidence_strength", 0.0),
363
+ "community_relevance": scored.get("community_relevance", 0.0),
364
+ "insight_score": scored.get("insight_score", 0.0),
365
+ "aggregates": features["aggregates"],
366
+ "triage": features["triage"],
367
+ "ics": ics_decomposed,
368
+ "used_budget": bt.usage.__dict__ | {"beam": path_out.get("used_budget", {})},
369
+ "trace": {
370
+ "ppr": ppr_trace,
371
+ "beam": path_out.get("trace", {}),
372
+ "params": params,
373
+ "timings_ms": tm.marks | {"total": tm.elapsed_ms()},
374
+ "linker_cfg": getattr(self, "linker_cfg", None),
375
+ },
376
+ }
377
+
378
+ # Optional external metrics logger
379
+ if hasattr(self, "metrics_logger") and isinstance(self.metrics_logger, MetricsLogger):
380
+ esr = path_out.get("trace", {}).get("early_stop_reason")
381
+ self.metrics_logger.log(
382
+ RetrievalMetrics(
383
+ query_id=None,
384
+ community_id=params.community_id,
385
+ seeds_count=len(seeds),
386
+ ppr_mass=sum(p for _, p in ppr_scores),
387
+ topk=len(ppr_scores),
388
+ used_budget=result["used_budget"],
389
+ latency_ms=tm.elapsed_ms(),
390
+ early_stop_reason=esr,
391
+ engine=ppr_trace.get("engine", "unknown"),
392
+ notes={"timings": tm.marks},
393
+ )
394
+ )
395
+
396
+ # PII redaction for trace
397
+ if self.redact_pii_in_trace:
398
+ result["trace"] = redact_dict(result["trace"])
399
+
400
+ return result
401
+
402
+ # ---- schema normalization & utilities ----
403
+
404
+ def _normalize_paths_for_aggregators(self, paths: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
405
+ """
406
+ Ensure each edge has aggregator-friendly fields:
407
+ relation, u_label, v_label, confidence, created_at/timestamp, provenance{document_id?}
408
+ """
409
+ out: List[Dict[str, Any]] = []
410
+
411
+ def _label(n: Any) -> Optional[str]:
412
+ try:
413
+ # Accessor MAY expose label(...) or node_label(...)
414
+ if hasattr(self.A, "label"):
415
+ return self.A.label(n) # type: ignore
416
+ if hasattr(self.A, "node_label"):
417
+ return self.A.node_label(n) # type: ignore
418
+ except Exception:
419
+ pass
420
+ return None
421
+
422
+ for p in paths:
423
+ norm_edges: List[Dict[str, Any]] = []
424
+ for e in p.get("edges", []):
425
+ u = e.get("u")
426
+ v = e.get("v")
427
+ rel = e.get("rel") or e.get("relation") or e.get("relationship")
428
+
429
+ # Timestamp: prefer provided; else ts_lookup fallback (seconds)
430
+ ts = e.get("created_at", e.get("timestamp"))
431
+ if ts is None and self.ts_lookup and (u is not None and v is not None and rel is not None):
432
+ try:
433
+ ts_lookup_val = self.ts_lookup(u, str(rel), v)
434
+ ts = ts_lookup_val
435
+ except Exception:
436
+ ts = None
437
+
438
+ # Provenance
439
+ prov = e.get("provenance")
440
+ if prov is None:
441
+ # Common alternates
442
+ doc_id = e.get("source_doc") or e.get("doc_id")
443
+ if doc_id:
444
+ prov = {"document_id": doc_id}
445
+
446
+ norm_edges.append(
447
+ {
448
+ "u": u,
449
+ "v": v,
450
+ "relation": rel,
451
+ "u_label": e.get("u_label") or _label(u),
452
+ "v_label": e.get("v_label") or _label(v),
453
+ "confidence": e.get("confidence", e.get("weight", 1.0)),
454
+ "created_at": ts, # ISO or epoch; aggregator can parse both
455
+ "provenance": prov,
456
+ "_id": e.get("_id", e.get("id")),
457
+ }
458
+ )
459
+
460
+ out.append({"id": p.get("id"), "score": p.get("score"), "edges": norm_edges})
461
+
462
+ return out
463
+
464
+ def _get_shifted_baseline_paths(
465
+ self,
466
+ seeds: List[NodeId],
467
+ params: OrchestratorParams,
468
+ scorer_output_like: Dict[str, Any],
469
+ ) -> List[Dict[str, Any]]:
470
+ """
471
+ Run a light baseline retrieval by shifting self.A.time_window back by its duration.
472
+ Returns normalized paths (aggregator schema).
473
+ """
474
+ original_window = getattr(self.A, "time_window", None)
475
+ if not original_window:
476
+ return []
477
+
478
+ start_ts, end_ts = original_window
479
+ start_f = self._coerce_ts(start_ts)
480
+ end_f = self._coerce_ts(end_ts)
481
+ if start_f is None or end_f is None or end_f <= start_f:
482
+ return []
483
+
484
+ duration = end_f - start_f
485
+ baseline_window = (max(0.0, start_f - duration), start_f)
486
+
487
+ # Temporarily swap window
488
+ self.A.time_window = baseline_window # type: ignore[attr-defined]
489
+ try:
490
+ baseline_budget = SearchBudget(max_paths=max(20, params.max_paths // 10), max_edges=1000)
491
+ # Slim params for baseline
492
+ base_params = replace(params, max_paths=baseline_budget.max_paths, beam_width=max(16, params.beam_width // 2))
493
+ baseline_res = self._retrieve_once(
494
+ seeds=seeds,
495
+ params=base_params,
496
+ budget=baseline_budget,
497
+ now_ts=None,
498
+ include_baseline=False,
499
+ anchor_prior=None,
500
+ beam_override=None,
501
+ )
502
+ return baseline_res.get("paths", [])
503
+ finally:
504
+ self.A.time_window = original_window # type: ignore[attr-defined]
505
+
506
+ def _half_life_from_accessor_window(self, default: float = 30.0) -> float:
507
+ """
508
+ ~1.5× window length in days, clamped to [14, 60].
509
+ Falls back to `default` if no valid window on accessor.
510
+ """
511
+ w = getattr(self.A, "time_window", None)
512
+ if not w:
513
+ return default
514
+ s, e = w
515
+ s_f = self._coerce_ts(s)
516
+ e_f = self._coerce_ts(e)
517
+ if s_f is None or e_f is None or e_f <= s_f:
518
+ return default
519
+ days = (e_f - s_f) / 86400.0
520
+ hl = max(14.0, min(60.0, 1.5 * days))
521
+ return hl
522
+
523
+ @staticmethod
524
+ def _coerce_ts(ts: Any) -> Optional[float]:
525
+ """
526
+ Coerce ISO string or numeric epoch into float epoch seconds.
527
+ """
528
+ if ts is None:
529
+ return None
530
+ if isinstance(ts, (int, float)):
531
+ return float(ts)
532
+ if isinstance(ts, str):
533
+ try:
534
+ # Allow trailing Z
535
+ from datetime import datetime, timezone
536
+ dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
537
+ return dt.timestamp()
538
+ except Exception:
539
+ return None
540
+ return None
541
+
542
+ @staticmethod
543
+ def _empty_result(bt: BudgetTracker, tm: Timer) -> Dict[str, object]:
544
+ return {
545
+ "topk_ppr": [],
546
+ "paths": [],
547
+ "evidence_strength": 0.0,
548
+ "community_relevance": 0.0,
549
+ "insight_score": 0.0,
550
+ "aggregates": {
551
+ "motifs": [],
552
+ "relation_share": {},
553
+ "snippet_anchors": [],
554
+ "summary": {
555
+ "total_paths": 0,
556
+ "unique_motifs": 0,
557
+ "unique_relations": 0,
558
+ "total_edges": 0,
559
+ "provenance": 0.0,
560
+ "recency": 0.0,
561
+ "label_coverage": 0.0,
562
+ "motif_density": 0.0,
563
+ "has_baseline": False,
564
+ "low_support": True,
565
+ },
566
+ },
567
+ "triage": {"score": 0, "components": {}, "dominant_relation": {}},
568
+ "ics": {"value": 0.0, "label": "Low", "drivers": {}, "quality_gate": {"meets_evidence_floor": False, "has_strong_paths": False, "recommendation": "gather_more_evidence"}},
569
+ "used_budget": bt.usage.__dict__,
570
+ "trace": {"timings_ms": tm.marks | {"total": tm.elapsed_ms()}},
571
+ }