agentfootprint 6.25.0 → 6.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +194 -48
  2. package/dist/esm/lib/context-bisect/ablation.js +183 -0
  3. package/dist/esm/lib/context-bisect/ablation.js.map +1 -0
  4. package/dist/esm/lib/context-bisect/bisect.js +129 -0
  5. package/dist/esm/lib/context-bisect/bisect.js.map +1 -0
  6. package/dist/esm/lib/context-bisect/index.js +22 -0
  7. package/dist/esm/lib/context-bisect/index.js.map +1 -0
  8. package/dist/esm/lib/context-bisect/llmEdgeWeigher.js +0 -0
  9. package/dist/esm/lib/context-bisect/llmEdgeWeigher.js.map +1 -0
  10. package/dist/esm/lib/context-bisect/localize.js +555 -0
  11. package/dist/esm/lib/context-bisect/localize.js.map +1 -0
  12. package/dist/esm/lib/context-bisect/types.js +56 -0
  13. package/dist/esm/lib/context-bisect/types.js.map +1 -0
  14. package/dist/esm/lib/tool-lint/cli.js +6 -1
  15. package/dist/esm/lib/tool-lint/cli.js.map +1 -1
  16. package/dist/esm/observe.js +7 -0
  17. package/dist/esm/observe.js.map +1 -1
  18. package/dist/lib/context-bisect/ablation.js +192 -0
  19. package/dist/lib/context-bisect/ablation.js.map +1 -0
  20. package/dist/lib/context-bisect/bisect.js +133 -0
  21. package/dist/lib/context-bisect/bisect.js.map +1 -0
  22. package/dist/lib/context-bisect/index.js +40 -0
  23. package/dist/lib/context-bisect/index.js.map +1 -0
  24. package/dist/lib/context-bisect/llmEdgeWeigher.js +0 -0
  25. package/dist/lib/context-bisect/llmEdgeWeigher.js.map +1 -0
  26. package/dist/lib/context-bisect/localize.js +563 -0
  27. package/dist/lib/context-bisect/localize.js.map +1 -0
  28. package/dist/lib/context-bisect/types.js +59 -0
  29. package/dist/lib/context-bisect/types.js.map +1 -0
  30. package/dist/lib/tool-lint/cli.js +30 -2
  31. package/dist/lib/tool-lint/cli.js.map +1 -1
  32. package/dist/observe.js +42 -20
  33. package/dist/observe.js.map +1 -1
  34. package/dist/types/lib/context-bisect/ablation.d.ts +97 -0
  35. package/dist/types/lib/context-bisect/ablation.d.ts.map +1 -0
  36. package/dist/types/lib/context-bisect/bisect.d.ts +76 -0
  37. package/dist/types/lib/context-bisect/bisect.d.ts.map +1 -0
  38. package/dist/types/lib/context-bisect/index.d.ts +22 -0
  39. package/dist/types/lib/context-bisect/index.d.ts.map +1 -0
  40. package/dist/types/lib/context-bisect/llmEdgeWeigher.d.ts +125 -0
  41. package/dist/types/lib/context-bisect/llmEdgeWeigher.d.ts.map +1 -0
  42. package/dist/types/lib/context-bisect/localize.d.ts +119 -0
  43. package/dist/types/lib/context-bisect/localize.d.ts.map +1 -0
  44. package/dist/types/lib/context-bisect/types.d.ts +356 -0
  45. package/dist/types/lib/context-bisect/types.d.ts.map +1 -0
  46. package/dist/types/lib/tool-lint/cli.d.ts.map +1 -1
  47. package/dist/types/observe.d.ts +1 -0
  48. package/dist/types/observe.d.ts.map +1 -1
  49. package/package.json +1 -1
package/README.md CHANGED
@@ -1,17 +1,23 @@
1
1
 
2
+
3
+ <h1 align="center">Agentfootprint</h1>
4
+
2
5
  <p align="center">
3
- <picture>
4
- <source media="(prefers-color-scheme: dark)" srcset="docs/assets/hero-dark.svg">
5
- <source media="(prefers-color-scheme: light)" srcset="docs/assets/hero-light.svg">
6
- <img alt="agentfootprint mascot composing context flavors (Skills, Steering, Guardrails, RAG, Tool APIs, Memory) into three structured LLM slots (system, messages, tools) — the central abstraction, visualized." src="docs/assets/hero-light.svg" width="100%"/>
7
- </picture>
6
+ <strong>Your agent picked the wrong tool, gave a wrong answer — and the logs can't tell you why.<br/>Agentfootprint can.</strong>
8
7
  </p>
9
8
 
10
- <h1 align="center">Agentfootprint</h1>
9
+ <p align="center">
10
+ The explainable agent framework: every read, write, decision, and tool call becomes
11
+ <strong>connected evidence</strong> as your agent runs. When something goes wrong, you don't grep logs — you ask.
12
+ </p>
11
13
 
12
14
  <p align="center">
13
- <strong>We abstract context engineering — and hand back the trace.</strong><br/>
14
- <strong>Live</strong> to develop · <strong>offline</strong> to monitor · <strong>detailed</strong> to improve.
15
+ <a href="https://footprintjs.github.io/agentThinkingUI/">
16
+ <img src="docs/assets/hero-atui.png" alt="An agent run replayed in AgentThinkingUI the LLM 'brain' calls the Flight-search tool, the step inspector shows the tool's raw output and the brain's reasoning about it, and the timeline scrubs every step of the run." width="100%">
17
+ </a>
18
+ </p>
19
+ <p align="center">
20
+ <sub>A real run, replayed — rendered with <a href="https://github.com/footprintjs/agentThinkingUI"><b>AgentThinkingUI</b></a> (<code>npm i agentthinkingui</code>). Every frame is generated from the run's own trace; <a href="https://footprintjs.github.io/agentThinkingUI/">▶ watch it live</a>.</sub>
15
21
  </p>
16
22
 
17
23
  <p align="center">
@@ -26,7 +32,176 @@
26
32
 
27
33
  ---
28
34
 
29
- ## 1. What we abstract
35
+ ## The new error class
36
+
37
+ For decades, software had two kinds of errors — and developers never needed deep
38
+ domain knowledge to fix either:
39
+
40
+ | Error class | Where the bug lives | How you find it |
41
+ |---|---|---|
42
+ | **Infrastructure** — crash, timeout, 500 | the system | infra logs, monitoring |
43
+ | **Business logic** — wrong branch, wrong math | the code | stack trace, debugger, `console.log` |
44
+ | **Contextual** — wrong tool chosen, wrong fact believed, stale memory trusted | **what the model was given** | **nothing. Until now.** |
45
+
46
+ Agents introduced the third class. The code is correct, the infra is healthy — and
47
+ the run is still wrong, because two tool descriptions read alike, or an injected
48
+ fact was misleading, or memory carried last week's truth. Classical logs can't
49
+ explain it: **they record what the code did, never what the context did.**
50
+
51
+ ## The idea
52
+
53
+ If contextual errors live in what the model was given, then the run itself must be
54
+ structured so context is **evidence** — every injection, read, write, decision, and
55
+ tool call recorded *connected*, the moment it happens. Not logs you grep. Evidence
56
+ you ask.
57
+
58
+ ## How — we abstract context engineering
59
+
60
+ Every piece of context enters the LLM through one of **3 slots** (`system` ·
61
+ `messages` · `tools`), under one of **4 triggers** — skills, steering, RAG, facts,
62
+ memory, guardrails are all the same move: `Injection = slot × trigger × cache`.
63
+
64
+ **Because the framework owns that injection point, every piece of context is born
65
+ tracked.** Tracking isn't an add-on you wire up — it's a consequence of the
66
+ abstraction. [The full model ↓](#the-model--what-we-abstract)
67
+
68
+ <p align="center">
69
+ <picture>
70
+ <source media="(prefers-color-scheme: dark)" srcset="docs/assets/hero-dark.svg">
71
+ <source media="(prefers-color-scheme: light)" srcset="docs/assets/hero-light.svg">
72
+ <img alt="agentfootprint mascot composing context flavors (Skills, Steering, Guardrails, RAG, Tool APIs, Memory) into three structured LLM slots (system, messages, tools) — the central abstraction, visualized." src="docs/assets/hero-light.svg" width="100%"/>
73
+ </picture>
74
+ </p>
75
+
76
+ ## What tracking buys you
77
+
78
+ **See it in 30 seconds** — four questions logs can't answer, each answered by code in this repo from a real run:
79
+
80
+ ```text
81
+ Q: Why did the model pick refund_full instead of refund_partial?
82
+ A: margin 0.02 — ⚠ NARROW: the two tool descriptions read nearly identical
83
+ (toolChoiceRecorder — and the catalog lint flags the pair before you ever run)
84
+
85
+ Q: Why was this loan declined?
86
+ A: decision ← [control: "DTI above the 0.43 affordability ceiling"] ← dti 0.52 ← monthlyDebt / income
87
+ (decide() evidence + the causal slice — every hop is a real recorded edge)
88
+
89
+ Q: Which piece of context made the answer wrong?
90
+ A: CAUSAL: ablating fact 'vip-override' flipped the outcome in 3/3 seeded reruns
91
+ (localizeContextBug — ranked proxies, counterfactual proof)
92
+
93
+ Q: Prove nobody edited this run's record.
94
+ A: verifyAuditBundle → valid: false, brokenAt: #16 — the tampered record, named
95
+ (hash-chained audit export, offline verification)
96
+ ```
97
+
98
+ And you don't have to read the trace yourself — **we provide the tools for an LLM to track it for you**: the trace toolpack let a debugger model find a planted bug while reading **9.5% of the trace** ([guide](docs/guides/trace-debugging.md)).
99
+
100
+ ---
101
+
102
+ ## Pick your door
103
+
104
+ | 🔧 Building an agent? | 🐛 Agent misbehaving? | 🏛️ Need audit / compliance? |
105
+ |---|---|---|
106
+ | Typed agents with skills, steering, RAG, memory, guardrails — and the trace for free. | Lint your tool catalog in 5 minutes — works on **any** framework's tool list (plain JSON / MCP / OpenAI / Anthropic shapes). Then causal slices, context bisection, and the debugger-LLM toolpack. | Hash-chained, tamper-evident run records with an offline verifier — record-keeping in the EU-AI-Act shape. |
107
+ | [→ Quick start](#quick-start--runs-offline-no-api-key) | [→ Tool-catalog lint](docs/guides/tool-catalog-lint.md) · [→ Trace debugging](docs/guides/trace-debugging.md) | [→ Tamper-evident audit](docs/guides/security.md) |
108
+
109
+ ---
110
+
111
+ ## Quick start — runs offline, no API key
112
+
113
+ ```bash
114
+ npm install agentfootprint footprintjs
115
+ ```
116
+
117
+ ```typescript
118
+ import { Agent, defineTool, mock } from 'agentfootprint';
119
+
120
+ const weather = defineTool({
121
+ name: 'weather',
122
+ description: 'Get current weather for a city.',
123
+ inputSchema: {
124
+ type: 'object',
125
+ properties: { city: { type: 'string' } },
126
+ required: ['city'],
127
+ },
128
+ execute: async ({ city }: { city: string }) => `${city}: 72°F, sunny`,
129
+ });
130
+
131
+ const agent = Agent.create({
132
+ provider: mock({ reply: 'I checked: it is 72°F and sunny.' }),
133
+ model: 'mock',
134
+ })
135
+ .system('You answer weather questions using the weather tool.')
136
+ .tool(weather)
137
+ .build();
138
+
139
+ const result = await agent.run({ message: 'Weather in Paris?' });
140
+ console.log(result); // → "I checked: it is 72°F and sunny."
141
+ ```
142
+
143
+ For production, import a real provider from `agentfootprint/llm-providers` and swap it in — `anthropic(...)` / `openai(...)` / `bedrock(...)` / `ollama(...)`. Only the import line changes; the agent code stays the same. (The vendor-SDK providers live on the `agentfootprint/llm-providers` subpath so the main `agentfootprint` barrel stays free of optional peer-dep requires; `mock`, `browserAnthropic`, and `browserOpenai` are on the main barrel.)
144
+
145
+ ### Then add context
146
+
147
+ A real agent carries more than one prompt and one tool: facts about the user, always-on rules, skills that unlock on demand. Declare each piece — the framework decides **when** it fires and **which slot** it lands in, and every piece is born tracked:
148
+
149
+ ```typescript
150
+ import { defineFact, defineSteering, defineSkill } from 'agentfootprint';
151
+
152
+ const agent = Agent.create({ provider, model })
153
+ .system('You are a support agent.')
154
+ .fact(defineFact({ // data the model should know — always on
155
+ id: 'user-profile',
156
+ data: 'Name: Maya · Plan: Pro · Customer since 2022',
157
+ }))
158
+ .steering(defineSteering({ // rules the model must follow — always on
159
+ id: 'refund-policy',
160
+ prompt: 'Never promise a refund before checking the policy tool.',
161
+ }))
162
+ .skill(defineSkill({ // guidance + tools — unlocks when the LLM asks
163
+ id: 'billing',
164
+ description: 'Use for refunds, charges, billing questions.',
165
+ body: 'When handling billing: confirm identity first, then…',
166
+ tools: [refundTool],
167
+ }))
168
+ .build();
169
+ ```
170
+
171
+ Same shape for `.instruction()` / `.memory()` / `.rag()` / raw `.injection()` — they're all the one primitive, `Injection = slot × trigger × cache`. [The full model ↓](#the-model--what-we-abstract)
172
+
173
+ ### Then compose control flow
174
+
175
+ One agent is a `Runner`. So is every composition of agents — four control-flow primitives, and anything that runs composes into anything else:
176
+
177
+ ```typescript
178
+ import { Sequence, Parallel, Conditional } from 'agentfootprint';
179
+
180
+ const pipeline = Sequence.create()
181
+ .step('classify', classifyAgent) // sequence: step → step
182
+ .step('review',
183
+ Parallel.create() // parallel: fan out, then merge
184
+ .branch('legal', legalAgent)
185
+ .branch('ethics', ethicsAgent)
186
+ .mergeWithLLM({ provider, model, prompt: 'Synthesize:' })
187
+ .build())
188
+ .step('respond',
189
+ Conditional.create() // conditional: one branch runs
190
+ .when('urgent', (i) => i.message.startsWith('URGENT'), urgentAgent)
191
+ .otherwise('normal', normalAgent)
192
+ .build())
193
+ .build();
194
+
195
+ await pipeline.run({ message: 'URGENT: refund dispute on order #4411' });
196
+ ```
197
+
198
+ The fourth primitive is `Loop` — `Loop.repeat(agent).until(guard).times(5)`, with a mandatory budget guard. And the named patterns from the research literature ship pre-composed from the same four: `selfConsistency` · `reflection` · `debate` · `mapReduce` · `tot` · `swarm`. Because every composition is a flowchart, the structure you wrote is the structure you see in the UI — and the trace spans the whole pipeline, not one agent at a time. [Designing systems of agents ↓](#how-do-i-design-my-agent-or-system-of-agents)
199
+
200
+ ---
201
+
202
+ ## The model — what we abstract
203
+
204
+
30
205
 
31
206
  When you build an Agentic Application, you collect domain-specific data and instructions, then wire them up based on what your system receives.
32
207
 
@@ -70,7 +245,7 @@ That's the whole model: `Injection = slot × trigger × cache`.
70
245
 
71
246
  ---
72
247
 
73
- ## 2. Why we chose this abstraction
248
+ ## Why we chose this abstraction
74
249
 
75
250
  The agent space has many credible primary abstractions:
76
251
 
@@ -143,7 +318,7 @@ And a fourth, novel: **the agent can read its own trace.** Six months after the
143
318
 
144
319
  ---
145
320
 
146
- ## 3. How do I design my agent or system of agents?
321
+ ## How do I design my agent or system of agents?
147
322
 
148
323
  Two scales — same alphabet. Four control flows are the entire vocabulary.
149
324
 
@@ -305,7 +480,14 @@ Same trick as Beat 1: instead of N libraries for N patterns, we found the M buil
305
480
 
306
481
  ---
307
482
 
308
- ## 4. How do I see what my agent did?
483
+ ## How do I see what my agent did?
484
+
485
+ <p align="center">
486
+ <img src="docs/assets/lens-run.png" alt="A real agent run in the Lens: the conversation (with live PII redaction), the executed path lit on the merge-tree flowchart, the WHAT-HAPPENED timeline of every iteration/context/LLM turn/route, run stats, and the step inspector — all generated from the run's own trace." width="100%">
487
+ </p>
488
+ <p align="center">
489
+ <sub>One real run, fully explained — the <a href="https://github.com/footprintjs/agentfootprint-lens"><b>Lens</b></a> (<code>npm i agentfootprint-lens</code>): conversation · executed path · per-step timeline · stats, every pixel from the trace.</sub>
490
+ </p>
309
491
 
310
492
  Because we own the loop (Beat 2), every decision and execution is captured during traversal — not bolted on. The default capture is the **causal trace**: every stage, read, write, and decision evidence, as a JSON-portable, scrubbable, queryable, exportable artifact. Beyond the default, wire custom recorders for cost, latency, or quality scoring — any observation hook fires on the same stream.
311
493
 
@@ -427,42 +609,6 @@ off the hot path.
427
609
 
428
610
  ---
429
611
 
430
- ## Quick start — runs offline, no API key
431
-
432
- ```bash
433
- npm install agentfootprint footprintjs
434
- ```
435
-
436
- ```typescript
437
- import { Agent, defineTool, mock } from 'agentfootprint';
438
-
439
- const weather = defineTool({
440
- name: 'weather',
441
- description: 'Get current weather for a city.',
442
- inputSchema: {
443
- type: 'object',
444
- properties: { city: { type: 'string' } },
445
- required: ['city'],
446
- },
447
- execute: async ({ city }: { city: string }) => `${city}: 72°F, sunny`,
448
- });
449
-
450
- const agent = Agent.create({
451
- provider: mock({ reply: 'I checked: it is 72°F and sunny.' }),
452
- model: 'mock',
453
- })
454
- .system('You answer weather questions using the weather tool.')
455
- .tool(weather)
456
- .build();
457
-
458
- const result = await agent.run({ message: 'Weather in Paris?' });
459
- console.log(result); // → "I checked: it is 72°F and sunny."
460
- ```
461
-
462
- For production, import a real provider from `agentfootprint/llm-providers` and swap it in — `anthropic(...)` / `openai(...)` / `bedrock(...)` / `ollama(...)`. Only the import line changes; the agent code stays the same. (The vendor-SDK providers live on the `agentfootprint/llm-providers` subpath so the main `agentfootprint` barrel stays free of optional peer-dep requires; `mock`, `browserAnthropic`, and `browserOpenai` are on the main barrel.)
463
-
464
- ---
465
-
466
612
  ## Mocks first, production second
467
613
 
468
614
  Build the entire app against in-memory mocks with **zero API cost**, then swap real infrastructure one boundary at a time.
@@ -0,0 +1,183 @@
1
+ /**
2
+ * Ablation — the counterfactual seam (RFC-003 Part B, D8 stage 4 + the
3
+ * D9 stats engine).
4
+ *
5
+ * Three pieces:
6
+ *
7
+ * 1. **Adapters** — `ablationForSuspect` maps a classified suspect to
8
+ * the spec that removes it (tool → drop from catalog; injection /
9
+ * fact / skill → exclude the `Injection.id`; memory → filter the
10
+ * `MemoryEntry.id`; arg → consumer-override note).
11
+ *
12
+ * 2. **The seam** — `applyAblations` filters the inputs an agent is
13
+ * BUILT from. Documented here because the seam did not previously
14
+ * exist: `AgentOptions` has no `ignoredTools` runtime kill-switch, so
15
+ * tool ablation happens at construction (the consumer's
16
+ * `AblationRunner` rebuilds the agent from filtered inputs). Same for
17
+ * injections and memory entries.
18
+ *
19
+ * 3. **The probe engine** — `runAblationProbe` calls the consumer's
20
+ * runner N seeded times, measures embedding similarity to the
21
+ * original output, counts outcome flips, and returns variance —
22
+ * never a single-run verdict (D9 discipline).
23
+ *
24
+ * §B2: only `runAblationProbe`-derived verdicts are causal claims; every
25
+ * score elsewhere is a correlational proxy.
26
+ */
27
+ import { cosineSimilarity } from '../../memory/embedding/cosine.js';
28
+ import { CONTEXT_BISECT_DEFAULTS } from './types.js';
29
+ // ─── Adapters: suspect → spec ────────────────────────────────────────
30
+ /**
31
+ * The spec that removes one suspect — or `undefined` for kind `'stage'`
32
+ * (plain pipeline stages have no removable input; re-rank or refactor).
33
+ */
34
+ export function ablationForSuspect(suspect) {
35
+ switch (suspect.kind) {
36
+ case 'tool':
37
+ return suspect.detail?.toolName !== undefined
38
+ ? { kind: 'tool', ignoredTools: [suspect.detail.toolName] }
39
+ : undefined;
40
+ case 'injection':
41
+ return suspect.detail?.injectionId !== undefined
42
+ ? { kind: 'injection', excludeInjectionIds: [suspect.detail.injectionId] }
43
+ : undefined;
44
+ case 'memory':
45
+ return suspect.detail?.injectionId !== undefined
46
+ ? { kind: 'memory', excludeMemoryIds: [suspect.detail.injectionId] }
47
+ : undefined;
48
+ case 'arg':
49
+ return {
50
+ kind: 'arg',
51
+ source: suspect.source,
52
+ note: `step ${suspect.source} consumed untracked run input ($getArgs()/env) — ` +
53
+ `the runner must override the input itself; the library cannot filter it.`,
54
+ };
55
+ case 'stage':
56
+ return undefined;
57
+ }
58
+ }
59
+ /**
60
+ * Apply ablation specs to the inputs an agent is constructed from —
61
+ * THE documented seam (see module docs). Generic over the concrete tool /
62
+ * injection / memory-entry types so it filters without importing them.
63
+ *
64
+ * `'arg'` specs are deliberately NOT handled here: run input belongs to
65
+ * the consumer's runner (`spec.note` says so).
66
+ *
67
+ * @example inside an AblationRunner
68
+ * ```ts
69
+ * const { tools, injections } = applyAblations(specs, {
70
+ * tools: ALL_TOOLS, injections: ALL_FACTS,
71
+ * });
72
+ * const agent = Agent.create({ provider: freshProvider(), model })
73
+ * .tools([...tools]);
74
+ * for (const inj of injections) agent.fact(inj);
75
+ * ```
76
+ */
77
+ export function applyAblations(specs, targets) {
78
+ const ignoredTools = new Set();
79
+ const excludedInjections = new Set();
80
+ const excludedMemory = new Set();
81
+ for (const spec of specs) {
82
+ if (spec.kind === 'tool')
83
+ for (const name of spec.ignoredTools)
84
+ ignoredTools.add(name);
85
+ if (spec.kind === 'injection')
86
+ for (const id of spec.excludeInjectionIds)
87
+ excludedInjections.add(id);
88
+ if (spec.kind === 'memory')
89
+ for (const id of spec.excludeMemoryIds)
90
+ excludedMemory.add(id);
91
+ }
92
+ return {
93
+ tools: (targets.tools ?? []).filter((tool) => !ignoredTools.has(tool.schema.name)),
94
+ injections: (targets.injections ?? []).filter((injection) => !excludedInjections.has(injection.id)),
95
+ memoryEntries: (targets.memoryEntries ?? []).filter((entry) => !excludedMemory.has(entry.id)),
96
+ };
97
+ }
98
+ // ─── The probe engine (D9 stats) ─────────────────────────────────────
99
+ function similarityStats(values) {
100
+ if (values.length === 0)
101
+ return { mean: 0, min: 0, max: 0, stdev: 0 };
102
+ const mean = values.reduce((sum, v) => sum + v, 0) / values.length;
103
+ const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
104
+ return {
105
+ mean,
106
+ min: Math.min(...values),
107
+ max: Math.max(...values),
108
+ stdev: Math.sqrt(variance),
109
+ };
110
+ }
111
+ /** The default comparator: embedding similarity below the threshold. */
112
+ export function defaultOutcomeComparator(embedder, flipThreshold) {
113
+ return async (original, ablated) => {
114
+ const [a, b] = await Promise.all([
115
+ embedder.embed({ text: original }),
116
+ embedder.embed({ text: ablated }),
117
+ ]);
118
+ return cosineSimilarity(a, b) < flipThreshold;
119
+ };
120
+ }
121
+ /**
122
+ * Run ONE probe: call the consumer's runner with `specs` once per seed
123
+ * (0..samples-1), measure each output's embedding similarity to the
124
+ * original, and count outcome flips. Variance is always reported.
125
+ *
126
+ * `samples` is clamped to ≥ 2 — D9: never single-run verdicts.
127
+ */
128
+ export async function runAblationProbe(config, specs) {
129
+ const samples = Math.max(2, config.rerun.samples ?? CONTEXT_BISECT_DEFAULTS.samples);
130
+ const flipThreshold = config.rerun.flipThreshold ?? CONTEXT_BISECT_DEFAULTS.flipThreshold;
131
+ const outcomeChanged = config.rerun.outcomeChanged ?? defaultOutcomeComparator(config.embedder, flipThreshold);
132
+ const similarities = [];
133
+ let flips = 0;
134
+ const originalVec = await config.embedder.embed({ text: config.rerun.originalOutput });
135
+ for (let seed = 0; seed < samples; seed++) {
136
+ const output = await config.rerun.runner(specs, { seed });
137
+ const outputVec = await config.embedder.embed({ text: output });
138
+ similarities.push(cosineSimilarity(originalVec, outputVec));
139
+ if (await outcomeChanged(config.rerun.originalOutput, output))
140
+ flips++;
141
+ }
142
+ return { samples, flips, similarity: similarityStats(similarities) };
143
+ }
144
+ /** Majority-flip rule shared by D8 verdicts and D9 probes. */
145
+ export function probeFlipped(stats) {
146
+ return stats.flips * 2 > stats.samples;
147
+ }
148
+ /**
149
+ * Translate probe evidence into the verdict — the ONLY causal claim tier
150
+ * (§B2). `baselineStable=false` (the un-ablated scenario itself flipped)
151
+ * forces `'inconclusive'`: no ablation verdict is trustworthy on an
152
+ * unstable baseline.
153
+ */
154
+ export function verdictFor(label, stats, baselineStable) {
155
+ if (!baselineStable) {
156
+ return {
157
+ verdict: 'inconclusive',
158
+ claim: `INCONCLUSIVE: the un-ablated baseline itself changed outcome across seeded reruns — ` +
159
+ `no ablation verdict for ${label} is trustworthy on an unstable scenario.`,
160
+ };
161
+ }
162
+ if (probeFlipped(stats)) {
163
+ return {
164
+ verdict: 'confirmed',
165
+ claim: `CAUSAL: ablating ${label} flipped the outcome in ${stats.flips}/${stats.samples} ` +
166
+ `seeded reruns (mean similarity to original ${stats.similarity.mean.toFixed(3)} ` +
167
+ `± ${stats.similarity.stdev.toFixed(3)}).`,
168
+ };
169
+ }
170
+ if (stats.flips > 0) {
171
+ return {
172
+ verdict: 'inconclusive',
173
+ claim: `INCONCLUSIVE: ablating ${label} flipped only ${stats.flips}/${stats.samples} seeded ` +
174
+ `reruns — below majority; raise samples or check scenario stability.`,
175
+ };
176
+ }
177
+ return {
178
+ verdict: 'not-confirmed',
179
+ claim: `NOT CONFIRMED: ablating ${label} did not change the outcome in ${stats.samples} seeded ` +
180
+ `reruns — its ranking remains a correlational proxy only.`,
181
+ };
182
+ }
183
+ //# sourceMappingURL=ablation.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ablation.js","sourceRoot":"","sources":["../../../../src/lib/context-bisect/ablation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AAWpE,OAAO,EAAE,uBAAuB,EAAE,MAAM,YAAY,CAAC;AAErD,wEAAwE;AAExE;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAgB;IACjD,QAAQ,OAAO,CAAC,IAAI,EAAE,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,OAAO,CAAC,MAAM,EAAE,QAAQ,KAAK,SAAS;gBAC3C,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE;gBAC3D,CAAC,CAAC,SAAS,CAAC;QAChB,KAAK,WAAW;YACd,OAAO,OAAO,CAAC,MAAM,EAAE,WAAW,KAAK,SAAS;gBAC9C,CAAC,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE;gBAC1E,CAAC,CAAC,SAAS,CAAC;QAChB,KAAK,QAAQ;YACX,OAAO,OAAO,CAAC,MAAM,EAAE,WAAW,KAAK,SAAS;gBAC9C,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,gBAAgB,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE;gBACpE,CAAC,CAAC,SAAS,CAAC;QAChB,KAAK,KAAK;YACR,OAAO;gBACL,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,IAAI,EACF,QAAQ,OAAO,CAAC,MAAM,mDAAmD;oBACzE,0EAA0E;aAC7E,CAAC;QACJ,KAAK,OAAO;YACV,OAAO,SAAS,CAAC;IACrB,CAAC;AACH,CAAC;AAwBD;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,cAAc,CAK5B,KAA8B,EAC9B,OAAyD;IAMzD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACvC,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC7C,MAAM,cAAc,GAAG,IAAI,GAAG,EAAU,CAAC;IACzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM;YAAE,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,YAAY;gBAAE,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACvF,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW;YAC3B,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,mBAAmB;gBAAE,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACxE,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ;YAAE,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,gBAAgB;gBAAE,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC7F,CAAC;IACD,OAAO;QACL,KAAK,EAAE,CAAC,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAClF,UAAU,EAAE,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,MAAM,CAC3C,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC,CACrD;QACD,aAAa,EAAE,CAAC,OAAO,CAAC,aAAa,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;KAC9F,CAAC;AACJ,CAAC;AAED,wEAAwE;AAExE,SAAS,eAAe,CAAC,MAAyB;IAChD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IACtE,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACnE,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACrF,OAAO;QACL,IAAI;QACJ,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC;QACxB,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC;QACxB,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC;KAC3B,CAAC;AACJ,CAAC;AAED,wEAAwE;AACxE,MAAM,UAAU,wBAAwB,CACtC,QAAkB,EAClB,aAAqB;IAErB,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,EAAE;QACjC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC/B,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;YAClC,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;SAClC,CAAC,CAAC;QACH,OAAO,gBAAgB,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC;IAChD,CAAC,CAAC;AACJ,CAAC;AAQD;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,MAAmB,EACnB,KAA8B;IAE9B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO,IAAI,uBAAuB,CAAC,OAAO,CAAC,CAAC;IACrF,MAAM,aAAa,GAAG,MAAM,CAAC,KAAK,CAAC,aAAa,IAAI,uBAAuB,CAAC,aAAa,CAAC;IAC1F,MAAM,cAAc,GAClB,MAAM,CAAC,KAAK,CAAC,cAAc,IAAI,wBAAwB,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IAE1F,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC,CAAC;IACvF,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC;QAC1C,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAChE,YAAY,CAAC,IAAI,CAAC,gBAAgB,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,CAAC;QAC5D,IAAI,MAAM,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,MAAM,CAAC;YAAE,KAAK,EAAE,CAAC;IACzE,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,eAAe,CAAC,YAAY,CAAC,EAAE,CAAC;AACvE,CAAC;AAED,8DAA8D;AAC9D,MAAM,UAAU,YAAY,CAAC,KAAuB;IAClD,OAAO,KAAK,CAAC,KAAK,GAAG,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC;AACzC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,UAAU,CACxB,KAAa,EACb,KAAuB,EACvB,cAAuB;IAEvB,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO;YACL,OAAO,EAAE,cAAc;YACvB,KAAK,EACH,sFAAsF;gBACtF,2BAA2B,KAAK,0CAA0C;SAC7E,CAAC;IACJ,CAAC;IACD,IAAI,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO;YACL,OAAO,EAAE,WAAW;YACpB,KAAK,EACH,oBAAoB,KAAK,2BAA2B,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,GAAG;gBACnF,8CAA8C,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;gBACjF,KAAK,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;SAC7C,CAAC;IACJ,CAAC;IACD,IAAI,KAAK,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;QACpB,OAAO;YACL,OAAO,EAAE,cAAc;YACvB,KAAK,EACH,0BAA0B,KAAK,iBAAiB,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,UAAU;gBACtF,qEAAqE;SACxE,CAAC;IACJ,CAAC;IACD,OAAO;QACL,OAAO,EAAE,eAAe;QACxB,KAAK,EACH,2BAA2B,KAAK,kCAAkC,KAAK,CAAC,OAAO,UAAU;YACzF,0DAA0D;KAC7D,CAAC;AACJ,CAAC"}
@@ -0,0 +1,129 @@
1
+ /**
2
+ * bisectCulprits — multi-culprit bisection over the ranked suspect set
3
+ * (RFC-003 Part B, block D9). The "git bisect" of the localizer.
4
+ *
5
+ * When single-suspect ablations don't flip the outcome — redundant causes
6
+ * (two facts that EACH justify the wrong answer), or interacting ones —
7
+ * the culprit is a SET. This harness finds a minimal culprit set by
8
+ * recursive halving over the ranked suspects (delta-debugging style,
9
+ * Zeller's ddmin specialized to two-way splits), then keeps searching the
10
+ * remainder for INDEPENDENT culprits until the remainder stops flipping.
11
+ *
12
+ * Probe semantics (the D9 discipline):
13
+ * - every probe = N seeded reruns of the consumer's `AblationRunner`
14
+ * with the probe's combined specs; "flipped" = MAJORITY of runs
15
+ * changed outcome; similarity mean ± spread is always reported —
16
+ * never single-run verdicts;
17
+ * - probe 0 is the BASELINE (no ablation): if it flips, the scenario
18
+ * itself is unstable and the result is honestly `'inconclusive'`;
19
+ * - probes are cached by spec-set, and budgeted (`maxProbes`) — running
20
+ * out of budget yields `'inconclusive'`, never a partial claim
21
+ * dressed up as a finding.
22
+ *
23
+ * §B2: the returned `verdict`/`culprits` are CAUSAL claims — they rest
24
+ * exclusively on counterfactual reruns. The input ranking only chooses
25
+ * the SEARCH ORDER (better ranking = fewer probes), it never decides the
26
+ * outcome.
27
+ */
28
+ import { probeFlipped, runAblationProbe } from './ablation.js';
29
+ import { CONTEXT_BISECT_DEFAULTS } from './types.js';
30
+ import { suspectLabel } from './localize.js';
31
+ // ─── The harness ─────────────────────────────────────────────────────
32
+ class ProbeBudgetExceeded extends Error {
33
+ constructor() {
34
+ super('probe budget exceeded');
35
+ }
36
+ }
37
+ /**
38
+ * Find minimal culprit set(s) by seeded counterfactual bisection. See
39
+ * module docs for semantics and the §B2 claim tier.
40
+ */
41
+ export async function bisectCulprits(options) {
42
+ const candidates = options.suspects.filter((suspect) => suspect.ablation !== undefined && suspect.ablation.kind !== 'arg');
43
+ const maxProbes = options.maxProbes ?? CONTEXT_BISECT_DEFAULTS.maxProbes;
44
+ const maxCulprits = options.maxCulprits ?? CONTEXT_BISECT_DEFAULTS.maxCulprits;
45
+ const config = { rerun: options.rerun, embedder: options.embedder };
46
+ const probes = [];
47
+ const cache = new Map();
48
+ let runsUsed = 0;
49
+ const keyOf = (set) => set
50
+ .map((suspect) => suspectLabel(suspect))
51
+ .sort()
52
+ .join('|');
53
+ async function probe(set) {
54
+ const key = keyOf(set);
55
+ const cached = cache.get(key);
56
+ if (cached !== undefined)
57
+ return cached;
58
+ if (probes.length >= maxProbes)
59
+ throw new ProbeBudgetExceeded();
60
+ const stats = await runAblationProbe(config, set.flatMap((suspect) => (suspect.ablation !== undefined ? [suspect.ablation] : [])));
61
+ runsUsed += stats.samples;
62
+ const flipped = probeFlipped(stats);
63
+ probes.push({ ablated: set.map((suspect) => suspectLabel(suspect)), stats, flipped });
64
+ cache.set(key, flipped);
65
+ return flipped;
66
+ }
67
+ /**
68
+ * Minimal subset of `candidates` that — together with `context` — flips
69
+ * the outcome. Precondition: probe(candidates ∪ context) flipped.
70
+ * Two-way ddmin: try each half; on interference (neither half alone
71
+ * suffices) minimize each half with the other as context.
72
+ */
73
+ async function minimize(set, context) {
74
+ if (set.length <= 1)
75
+ return [...set];
76
+ const mid = Math.ceil(set.length / 2);
77
+ const top = set.slice(0, mid); // ranked order: the likelier half first
78
+ const rest = set.slice(mid);
79
+ if (await probe([...top, ...context]))
80
+ return minimize(top, context);
81
+ if (await probe([...rest, ...context]))
82
+ return minimize(rest, context);
83
+ // Interference: parts of BOTH halves are needed jointly.
84
+ const fromTop = await minimize(top, [...rest, ...context]);
85
+ const fromRest = await minimize(rest, [...fromTop, ...context]);
86
+ return [...fromTop, ...fromRest];
87
+ }
88
+ try {
89
+ // Baseline: an unstable scenario invalidates everything downstream.
90
+ // ZERO-TOLERANCE (review Finding 1): a single un-ablated flip marks the
91
+ // scenario unstable — the majority-rule probeFlipped() gate would let a
92
+ // 1-in-3-flaky scenario through to a 'confirmed' CAUSAL verdict, which
93
+ // violates the §B2 honest-claims discipline. Same gate localize.ts uses.
94
+ {
95
+ const baselineStats = await runAblationProbe(config, []);
96
+ runsUsed += baselineStats.samples;
97
+ const unstable = baselineStats.flips > 0;
98
+ probes.push({ ablated: [], stats: baselineStats, flipped: unstable });
99
+ cache.set(keyOf([]), probeFlipped(baselineStats));
100
+ if (unstable) {
101
+ return { verdict: 'inconclusive', culprits: [], probes, runsUsed };
102
+ }
103
+ }
104
+ // Reproduction gate: the full ranked set must flip at all.
105
+ if (candidates.length === 0 || !(await probe(candidates))) {
106
+ return { verdict: 'not-reproducible', culprits: [], probes, runsUsed };
107
+ }
108
+ // Find minimal sets; then keep searching the remainder for
109
+ // INDEPENDENT culprits until it stops flipping.
110
+ const culprits = [];
111
+ let remaining = candidates;
112
+ for (let round = 0; round < maxCulprits; round++) {
113
+ const found = await minimize(remaining, []);
114
+ culprits.push(found);
115
+ const foundKeys = new Set(found.map((suspect) => suspectLabel(suspect)));
116
+ remaining = remaining.filter((suspect) => !foundKeys.has(suspectLabel(suspect)));
117
+ if (remaining.length === 0 || !(await probe(remaining)))
118
+ break;
119
+ }
120
+ return { verdict: 'confirmed', culprits, probes, runsUsed };
121
+ }
122
+ catch (error) {
123
+ if (error instanceof ProbeBudgetExceeded) {
124
+ return { verdict: 'inconclusive', culprits: [], probes, runsUsed };
125
+ }
126
+ throw error;
127
+ }
128
+ }
129
+ //# sourceMappingURL=bisect.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bisect.js","sourceRoot":"","sources":["../../../../src/lib/context-bisect/bisect.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAoB,MAAM,eAAe,CAAC;AAEjF,OAAO,EAAE,uBAAuB,EAAE,MAAM,YAAY,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAiD7C,wEAAwE;AAExE,MAAM,mBAAoB,SAAQ,KAAK;IACrC;QACE,KAAK,CAAC,uBAAuB,CAAC,CAAC;IACjC,CAAC;CACF;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,OAA8B;IACjE,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CACxC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,KAAK,SAAS,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,KAAK,KAAK,CAC/E,CAAC;IACF,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,uBAAuB,CAAC,SAAS,CAAC;IACzE,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,uBAAuB,CAAC,WAAW,CAAC;IAC/E,MAAM,MAAM,GAAgB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,CAAC;IAEjF,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAmB,CAAC;IACzC,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,MAAM,KAAK,GAAG,CAAC,GAAuB,EAAU,EAAE,CAChD,GAAG;SACA,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;SACvC,IAAI,EAAE;SACN,IAAI,CAAC,GAAG,CAAC,CAAC;IAEf,KAAK,UAAU,KAAK,CAAC,GAAuB;QAC1C,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;QACvB,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,MAAM,KAAK,SAAS;YAAE,OAAO,MAAM,CAAC;QACxC,IAAI,MAAM,CAAC,MAAM,IAAI,SAAS;YAAE,MAAM,IAAI,mBAAmB,EAAE,CAAC;QAChE,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAClC,MAAM,EACN,GAAG,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CACrF,CAAC;QACF,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC;QAC1B,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;QACtF,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QACxB,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;;;OAKG;IACH,KAAK,UAAU,QAAQ,CACrB,GAAuB,EACvB,OAA2B;QAE3B,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC;YAAE,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACtC,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,wCAAwC;QACvE,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,MAAM,KAAK,CAAC,CAAC,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,CAAC;YAAE,OAAO,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QACrE,IAAI,MAAM,KAAK,CAAC,CAAC,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC,CAAC;YAAE,OAAO,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACvE,yDAAyD;QACzD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,GAAG,EAAE,CAAC,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,GAAG,OAAO,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,OAAO,EAAE,GAAG,QAAQ,CAAC,CAAC;IACnC,CAAC;IAED,IAAI,CAAC;QACH,oEAAoE;QACpE,wEAAwE;QACxE,wEAAwE;QACxE,uEAAuE;QACvE,yEAAyE;QACzE,CAAC;YACC,MAAM,aAAa,GAAG,MAAM,gBAAgB,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;YACzD,QAAQ,IAAI,aAAa,CAAC,OAAO,CAAC;YAClC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,GAAG,CAAC,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;YACtE,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC;YAClD,IAAI,QAAQ,EAAE,CAAC;gBACb,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;YACrE,CAAC;QACH,CAAC;QACD,2DAA2D;QAC3D,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC;YAC1D,OAAO,EAAE,OAAO,EAAE,kBAAkB,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QACzE,CAAC;QAED,2DAA2D;QAC3D,gDAAgD;QAChD,MAAM,QAAQ,GAAgB,EAAE,CAAC;QACjC,IAAI,SAAS,GAAG,UAAU,CAAC;QAC3B,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,WAAW,EAAE,KAAK,EAAE,EAAE,CAAC;YACjD,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACzE,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;gBAAE,MAAM;QACjE,CAAC;QACD,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;IAC9D,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,mBAAmB,EAAE,CAAC;YACzC,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QACrE,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,22 @@
1
+ /**
2
+ * context-bisect — RFC-003 Part B: the contextual-bug LOCALIZER,
3
+ * "git bisect for context".
4
+ *
5
+ * Assembly over shipped pieces: footprintjs 9.8.0's complete causal DAG
6
+ * (control edges, honesty markers, `EdgeWeigher` hook) × influence-core
7
+ * scoring (D6) × consumer-run counterfactual ablation.
8
+ *
9
+ * D7 — `llmEdgeWeigher` influence-weighted LLM-call slice edges
10
+ * D8 — `localizeContextBug` trigger → slice → ranked suspects → ablation
11
+ * D9 — `bisectCulprits` seeded multi-culprit bisection + variance
12
+ *
13
+ * §B2 claim tiers (spelled out on every type): weights/scores are
14
+ * embedding-geometry PROXIES; ablation verdicts are the ONLY causal
15
+ * claims; slice completeness is bounded by tracking — and says so.
16
+ */
17
+ export { llmEdgeWeigher, stepOutputText, } from './llmEdgeWeigher.js';
18
+ export { defaultSuspectClassifier, formatContextBugReport, llmCallIdsFromEvents, localizeContextBug, suspectLabel, } from './localize.js';
19
+ export { ablationForSuspect, applyAblations, defaultOutcomeComparator, probeFlipped, runAblationProbe, verdictFor, } from './ablation.js';
20
+ export { bisectCulprits, } from './bisect.js';
21
+ export { CONTEXT_BISECT_DEFAULTS, } from './types.js';
22
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/lib/context-bisect/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EACL,cAAc,EACd,cAAc,GAIf,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,wBAAwB,EACxB,sBAAsB,EACtB,oBAAoB,EACpB,kBAAkB,EAClB,YAAY,GAKb,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,kBAAkB,EAClB,cAAc,EACd,wBAAwB,EACxB,YAAY,EACZ,gBAAgB,EAChB,UAAU,GAGX,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,cAAc,GAIf,MAAM,aAAa,CAAC;AAErB,OAAO,EACL,uBAAuB,GAoBxB,MAAM,YAAY,CAAC"}