agentfootprint 6.24.0 → 6.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -0
- package/bin/agentfootprint-lint-tools.mjs +14 -0
- package/dist/esm/lib/context-bisect/ablation.js +183 -0
- package/dist/esm/lib/context-bisect/ablation.js.map +1 -0
- package/dist/esm/lib/context-bisect/bisect.js +129 -0
- package/dist/esm/lib/context-bisect/bisect.js.map +1 -0
- package/dist/esm/lib/context-bisect/index.js +22 -0
- package/dist/esm/lib/context-bisect/index.js.map +1 -0
- package/dist/esm/lib/context-bisect/llmEdgeWeigher.js +0 -0
- package/dist/esm/lib/context-bisect/llmEdgeWeigher.js.map +1 -0
- package/dist/esm/lib/context-bisect/localize.js +555 -0
- package/dist/esm/lib/context-bisect/localize.js.map +1 -0
- package/dist/esm/lib/context-bisect/types.js +56 -0
- package/dist/esm/lib/context-bisect/types.js.map +1 -0
- package/dist/esm/lib/tool-lint/analyze.js +235 -0
- package/dist/esm/lib/tool-lint/analyze.js.map +1 -0
- package/dist/esm/lib/tool-lint/cli.js +198 -0
- package/dist/esm/lib/tool-lint/cli.js.map +1 -0
- package/dist/esm/lib/tool-lint/format.js +61 -0
- package/dist/esm/lib/tool-lint/format.js.map +1 -0
- package/dist/esm/lib/tool-lint/index.js +23 -0
- package/dist/esm/lib/tool-lint/index.js.map +1 -0
- package/dist/esm/lib/tool-lint/rules.js +249 -0
- package/dist/esm/lib/tool-lint/rules.js.map +1 -0
- package/dist/esm/lib/tool-lint/types.js +25 -0
- package/dist/esm/lib/tool-lint/types.js.map +1 -0
- package/dist/esm/observe.js +20 -0
- package/dist/esm/observe.js.map +1 -1
- package/dist/esm/recorders/observability/ToolChoiceRecorder.js +261 -0
- package/dist/esm/recorders/observability/ToolChoiceRecorder.js.map +1 -0
- package/dist/lib/context-bisect/ablation.js +192 -0
- package/dist/lib/context-bisect/ablation.js.map +1 -0
- package/dist/lib/context-bisect/bisect.js +133 -0
- package/dist/lib/context-bisect/bisect.js.map +1 -0
- package/dist/lib/context-bisect/index.js +40 -0
- package/dist/lib/context-bisect/index.js.map +1 -0
- package/dist/lib/context-bisect/llmEdgeWeigher.js +0 -0
- package/dist/lib/context-bisect/llmEdgeWeigher.js.map +1 -0
- package/dist/lib/context-bisect/localize.js +563 -0
- package/dist/lib/context-bisect/localize.js.map +1 -0
- package/dist/lib/context-bisect/types.js +59 -0
- package/dist/lib/context-bisect/types.js.map +1 -0
- package/dist/lib/tool-lint/analyze.js +242 -0
- package/dist/lib/tool-lint/analyze.js.map +1 -0
- package/dist/lib/tool-lint/cli.js +203 -0
- package/dist/lib/tool-lint/cli.js.map +1 -0
- package/dist/lib/tool-lint/format.js +65 -0
- package/dist/lib/tool-lint/format.js.map +1 -0
- package/dist/lib/tool-lint/index.js +43 -0
- package/dist/lib/tool-lint/index.js.map +1 -0
- package/dist/lib/tool-lint/rules.js +256 -0
- package/dist/lib/tool-lint/rules.js.map +1 -0
- package/dist/lib/tool-lint/types.js +26 -0
- package/dist/lib/tool-lint/types.js.map +1 -0
- package/dist/observe.js +56 -1
- package/dist/observe.js.map +1 -1
- package/dist/recorders/observability/ToolChoiceRecorder.js +266 -0
- package/dist/recorders/observability/ToolChoiceRecorder.js.map +1 -0
- package/dist/types/lib/context-bisect/ablation.d.ts +97 -0
- package/dist/types/lib/context-bisect/ablation.d.ts.map +1 -0
- package/dist/types/lib/context-bisect/bisect.d.ts +76 -0
- package/dist/types/lib/context-bisect/bisect.d.ts.map +1 -0
- package/dist/types/lib/context-bisect/index.d.ts +22 -0
- package/dist/types/lib/context-bisect/index.d.ts.map +1 -0
- package/dist/types/lib/context-bisect/llmEdgeWeigher.d.ts +125 -0
- package/dist/types/lib/context-bisect/llmEdgeWeigher.d.ts.map +1 -0
- package/dist/types/lib/context-bisect/localize.d.ts +119 -0
- package/dist/types/lib/context-bisect/localize.d.ts.map +1 -0
- package/dist/types/lib/context-bisect/types.d.ts +356 -0
- package/dist/types/lib/context-bisect/types.d.ts.map +1 -0
- package/dist/types/lib/tool-lint/analyze.d.ts +84 -0
- package/dist/types/lib/tool-lint/analyze.d.ts.map +1 -0
- package/dist/types/lib/tool-lint/cli.d.ts +44 -0
- package/dist/types/lib/tool-lint/cli.d.ts.map +1 -0
- package/dist/types/lib/tool-lint/format.d.ts +19 -0
- package/dist/types/lib/tool-lint/format.d.ts.map +1 -0
- package/dist/types/lib/tool-lint/index.d.ts +24 -0
- package/dist/types/lib/tool-lint/index.d.ts.map +1 -0
- package/dist/types/lib/tool-lint/rules.d.ts +86 -0
- package/dist/types/lib/tool-lint/rules.d.ts.map +1 -0
- package/dist/types/lib/tool-lint/types.d.ts +156 -0
- package/dist/types/lib/tool-lint/types.d.ts.map +1 -0
- package/dist/types/observe.d.ts +3 -0
- package/dist/types/observe.d.ts.map +1 -1
- package/dist/types/recorders/observability/ToolChoiceRecorder.d.ts +165 -0
- package/dist/types/recorders/observability/ToolChoiceRecorder.d.ts.map +1 -0
- package/package.json +4 -2
package/README.md
CHANGED
|
@@ -394,6 +394,37 @@ allocates no queue.
|
|
|
394
394
|
> 📖 Full semantics (capture policies, backpressure, `'block'` overflow):
|
|
395
395
|
> [footprintjs deferred-observers guide](https://github.com/footprintjs/footPrint/blob/main/docs/guides/observers-deferred.md)
|
|
396
396
|
|
|
397
|
+
### Lint your tool catalog — before the model picks the wrong twin
|
|
398
|
+
|
|
399
|
+
Tool routing is an LLM decision driven by names + descriptions — so lint the
|
|
400
|
+
catalog like code and gate it in CI. **Zero stack buy-in**: works on any
|
|
401
|
+
OpenAI / Anthropic / MCP / plain tool list, no agentfootprint runtime needed.
|
|
402
|
+
|
|
403
|
+
```bash
|
|
404
|
+
npx agentfootprint-lint-tools tools.json --threshold 0.94 --strict
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
```
|
|
408
|
+
✗ CONFUSABLE 0.9445 get_fcns_database <> influx_get_fcns_database
|
|
409
|
+
hint: names differ only by 'influx' — make the descriptions say WHEN to choose each
|
|
410
|
+
~ warn [enum-in-prose] influx_get_port_ranking.metric
|
|
411
|
+
suggest: "enum": ["avg_iops","peak_iops","mbps"]
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
Pairwise confusability over what the model reads (embedder pluggable,
|
|
415
|
+
content-hash cached) plus a pluggable structural rule pack
|
|
416
|
+
(missing/short descriptions, says-WHAT-not-WHEN, enums hiding in prose,
|
|
417
|
+
undocumented optional params). The runtime counterpart, `toolChoiceRecorder`
|
|
418
|
+
(`agentfootprint/observe`), scores each live LLM call's tool choice against
|
|
419
|
+
the same geometry and flags narrow margins and proxy disagreements — lazily,
|
|
420
|
+
off the hot path.
|
|
421
|
+
|
|
422
|
+
> 📖 **[Tool-catalog lint guide](docs/guides/tool-catalog-lint.md)** — 5 minutes
|
|
423
|
+
> from a tools.json to a gated CI check ·
|
|
424
|
+
> [`examples/observability/02`](examples/observability/02-lint-confusable-catalog.ts) ·
|
|
425
|
+
> [`03`](examples/observability/03-lint-fix-and-pass.ts) ·
|
|
426
|
+
> [`04`](examples/observability/04-tool-choice-margins.ts)
|
|
427
|
+
|
|
397
428
|
---
|
|
398
429
|
|
|
399
430
|
## Quick start — runs offline, no API key
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* agentfootprint-lint-tools — CI gate for tool-catalog confusability
|
|
4
|
+
* (RFC-002 C3). Humble shell: all behavior lives in src/lib/tool-lint/cli.ts
|
|
5
|
+
* (unit-tested there); this wrapper only resolves the built module and maps
|
|
6
|
+
* the returned code onto process.exitCode.
|
|
7
|
+
*
|
|
8
|
+
* npx agentfootprint-lint-tools tools.json
|
|
9
|
+
* npx agentfootprint-lint-tools tools.json --threshold 0.94 --strict
|
|
10
|
+
*
|
|
11
|
+
* Guide: docs/guides/tool-catalog-lint.md
|
|
12
|
+
*/
|
|
13
|
+
const { runToolLintCli } = await import('../dist/esm/lib/tool-lint/cli.js');
|
|
14
|
+
process.exitCode = await runToolLintCli(process.argv.slice(2));
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ablation — the counterfactual seam (RFC-003 Part B, D8 stage 4 + the
|
|
3
|
+
* D9 stats engine).
|
|
4
|
+
*
|
|
5
|
+
* Three pieces:
|
|
6
|
+
*
|
|
7
|
+
* 1. **Adapters** — `ablationForSuspect` maps a classified suspect to
|
|
8
|
+
* the spec that removes it (tool → drop from catalog; injection /
|
|
9
|
+
* fact / skill → exclude the `Injection.id`; memory → filter the
|
|
10
|
+
* `MemoryEntry.id`; arg → consumer-override note).
|
|
11
|
+
*
|
|
12
|
+
* 2. **The seam** — `applyAblations` filters the inputs an agent is
|
|
13
|
+
* BUILT from. Documented here because the seam did not previously
|
|
14
|
+
* exist: `AgentOptions` has no `ignoredTools` runtime kill-switch, so
|
|
15
|
+
* tool ablation happens at construction (the consumer's
|
|
16
|
+
* `AblationRunner` rebuilds the agent from filtered inputs). Same for
|
|
17
|
+
* injections and memory entries.
|
|
18
|
+
*
|
|
19
|
+
* 3. **The probe engine** — `runAblationProbe` calls the consumer's
|
|
20
|
+
* runner N seeded times, measures embedding similarity to the
|
|
21
|
+
* original output, counts outcome flips, and returns variance —
|
|
22
|
+
* never a single-run verdict (D9 discipline).
|
|
23
|
+
*
|
|
24
|
+
* §B2: only `runAblationProbe`-derived verdicts are causal claims; every
|
|
25
|
+
* score elsewhere is a correlational proxy.
|
|
26
|
+
*/
|
|
27
|
+
import { cosineSimilarity } from '../../memory/embedding/cosine.js';
|
|
28
|
+
import { CONTEXT_BISECT_DEFAULTS } from './types.js';
|
|
29
|
+
// ─── Adapters: suspect → spec ────────────────────────────────────────
|
|
30
|
+
/**
|
|
31
|
+
* The spec that removes one suspect — or `undefined` for kind `'stage'`
|
|
32
|
+
* (plain pipeline stages have no removable input; re-rank or refactor).
|
|
33
|
+
*/
|
|
34
|
+
export function ablationForSuspect(suspect) {
|
|
35
|
+
switch (suspect.kind) {
|
|
36
|
+
case 'tool':
|
|
37
|
+
return suspect.detail?.toolName !== undefined
|
|
38
|
+
? { kind: 'tool', ignoredTools: [suspect.detail.toolName] }
|
|
39
|
+
: undefined;
|
|
40
|
+
case 'injection':
|
|
41
|
+
return suspect.detail?.injectionId !== undefined
|
|
42
|
+
? { kind: 'injection', excludeInjectionIds: [suspect.detail.injectionId] }
|
|
43
|
+
: undefined;
|
|
44
|
+
case 'memory':
|
|
45
|
+
return suspect.detail?.injectionId !== undefined
|
|
46
|
+
? { kind: 'memory', excludeMemoryIds: [suspect.detail.injectionId] }
|
|
47
|
+
: undefined;
|
|
48
|
+
case 'arg':
|
|
49
|
+
return {
|
|
50
|
+
kind: 'arg',
|
|
51
|
+
source: suspect.source,
|
|
52
|
+
note: `step ${suspect.source} consumed untracked run input ($getArgs()/env) — ` +
|
|
53
|
+
`the runner must override the input itself; the library cannot filter it.`,
|
|
54
|
+
};
|
|
55
|
+
case 'stage':
|
|
56
|
+
return undefined;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Apply ablation specs to the inputs an agent is constructed from —
|
|
61
|
+
* THE documented seam (see module docs). Generic over the concrete tool /
|
|
62
|
+
* injection / memory-entry types so it filters without importing them.
|
|
63
|
+
*
|
|
64
|
+
* `'arg'` specs are deliberately NOT handled here: run input belongs to
|
|
65
|
+
* the consumer's runner (`spec.note` says so).
|
|
66
|
+
*
|
|
67
|
+
* @example inside an AblationRunner
|
|
68
|
+
* ```ts
|
|
69
|
+
* const { tools, injections } = applyAblations(specs, {
|
|
70
|
+
* tools: ALL_TOOLS, injections: ALL_FACTS,
|
|
71
|
+
* });
|
|
72
|
+
* const agent = Agent.create({ provider: freshProvider(), model })
|
|
73
|
+
* .tools([...tools]);
|
|
74
|
+
* for (const inj of injections) agent.fact(inj);
|
|
75
|
+
* ```
|
|
76
|
+
*/
|
|
77
|
+
export function applyAblations(specs, targets) {
|
|
78
|
+
const ignoredTools = new Set();
|
|
79
|
+
const excludedInjections = new Set();
|
|
80
|
+
const excludedMemory = new Set();
|
|
81
|
+
for (const spec of specs) {
|
|
82
|
+
if (spec.kind === 'tool')
|
|
83
|
+
for (const name of spec.ignoredTools)
|
|
84
|
+
ignoredTools.add(name);
|
|
85
|
+
if (spec.kind === 'injection')
|
|
86
|
+
for (const id of spec.excludeInjectionIds)
|
|
87
|
+
excludedInjections.add(id);
|
|
88
|
+
if (spec.kind === 'memory')
|
|
89
|
+
for (const id of spec.excludeMemoryIds)
|
|
90
|
+
excludedMemory.add(id);
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
tools: (targets.tools ?? []).filter((tool) => !ignoredTools.has(tool.schema.name)),
|
|
94
|
+
injections: (targets.injections ?? []).filter((injection) => !excludedInjections.has(injection.id)),
|
|
95
|
+
memoryEntries: (targets.memoryEntries ?? []).filter((entry) => !excludedMemory.has(entry.id)),
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
// ─── The probe engine (D9 stats) ─────────────────────────────────────
|
|
99
|
+
function similarityStats(values) {
|
|
100
|
+
if (values.length === 0)
|
|
101
|
+
return { mean: 0, min: 0, max: 0, stdev: 0 };
|
|
102
|
+
const mean = values.reduce((sum, v) => sum + v, 0) / values.length;
|
|
103
|
+
const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
|
|
104
|
+
return {
|
|
105
|
+
mean,
|
|
106
|
+
min: Math.min(...values),
|
|
107
|
+
max: Math.max(...values),
|
|
108
|
+
stdev: Math.sqrt(variance),
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
/** The default comparator: embedding similarity below the threshold. */
|
|
112
|
+
export function defaultOutcomeComparator(embedder, flipThreshold) {
|
|
113
|
+
return async (original, ablated) => {
|
|
114
|
+
const [a, b] = await Promise.all([
|
|
115
|
+
embedder.embed({ text: original }),
|
|
116
|
+
embedder.embed({ text: ablated }),
|
|
117
|
+
]);
|
|
118
|
+
return cosineSimilarity(a, b) < flipThreshold;
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Run ONE probe: call the consumer's runner with `specs` once per seed
|
|
123
|
+
* (0..samples-1), measure each output's embedding similarity to the
|
|
124
|
+
* original, and count outcome flips. Variance is always reported.
|
|
125
|
+
*
|
|
126
|
+
* `samples` is clamped to ≥ 2 — D9: never single-run verdicts.
|
|
127
|
+
*/
|
|
128
|
+
export async function runAblationProbe(config, specs) {
|
|
129
|
+
const samples = Math.max(2, config.rerun.samples ?? CONTEXT_BISECT_DEFAULTS.samples);
|
|
130
|
+
const flipThreshold = config.rerun.flipThreshold ?? CONTEXT_BISECT_DEFAULTS.flipThreshold;
|
|
131
|
+
const outcomeChanged = config.rerun.outcomeChanged ?? defaultOutcomeComparator(config.embedder, flipThreshold);
|
|
132
|
+
const similarities = [];
|
|
133
|
+
let flips = 0;
|
|
134
|
+
const originalVec = await config.embedder.embed({ text: config.rerun.originalOutput });
|
|
135
|
+
for (let seed = 0; seed < samples; seed++) {
|
|
136
|
+
const output = await config.rerun.runner(specs, { seed });
|
|
137
|
+
const outputVec = await config.embedder.embed({ text: output });
|
|
138
|
+
similarities.push(cosineSimilarity(originalVec, outputVec));
|
|
139
|
+
if (await outcomeChanged(config.rerun.originalOutput, output))
|
|
140
|
+
flips++;
|
|
141
|
+
}
|
|
142
|
+
return { samples, flips, similarity: similarityStats(similarities) };
|
|
143
|
+
}
|
|
144
|
+
/** Majority-flip rule shared by D8 verdicts and D9 probes. */
|
|
145
|
+
export function probeFlipped(stats) {
|
|
146
|
+
return stats.flips * 2 > stats.samples;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Translate probe evidence into the verdict — the ONLY causal claim tier
|
|
150
|
+
* (§B2). `baselineStable=false` (the un-ablated scenario itself flipped)
|
|
151
|
+
* forces `'inconclusive'`: no ablation verdict is trustworthy on an
|
|
152
|
+
* unstable baseline.
|
|
153
|
+
*/
|
|
154
|
+
export function verdictFor(label, stats, baselineStable) {
|
|
155
|
+
if (!baselineStable) {
|
|
156
|
+
return {
|
|
157
|
+
verdict: 'inconclusive',
|
|
158
|
+
claim: `INCONCLUSIVE: the un-ablated baseline itself changed outcome across seeded reruns — ` +
|
|
159
|
+
`no ablation verdict for ${label} is trustworthy on an unstable scenario.`,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
if (probeFlipped(stats)) {
|
|
163
|
+
return {
|
|
164
|
+
verdict: 'confirmed',
|
|
165
|
+
claim: `CAUSAL: ablating ${label} flipped the outcome in ${stats.flips}/${stats.samples} ` +
|
|
166
|
+
`seeded reruns (mean similarity to original ${stats.similarity.mean.toFixed(3)} ` +
|
|
167
|
+
`± ${stats.similarity.stdev.toFixed(3)}).`,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
if (stats.flips > 0) {
|
|
171
|
+
return {
|
|
172
|
+
verdict: 'inconclusive',
|
|
173
|
+
claim: `INCONCLUSIVE: ablating ${label} flipped only ${stats.flips}/${stats.samples} seeded ` +
|
|
174
|
+
`reruns — below majority; raise samples or check scenario stability.`,
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
return {
|
|
178
|
+
verdict: 'not-confirmed',
|
|
179
|
+
claim: `NOT CONFIRMED: ablating ${label} did not change the outcome in ${stats.samples} seeded ` +
|
|
180
|
+
`reruns — its ranking remains a correlational proxy only.`,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
//# sourceMappingURL=ablation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ablation.js","sourceRoot":"","sources":["../../../../src/lib/context-bisect/ablation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,kCAAkC,CAAC;AAWpE,OAAO,EAAE,uBAAuB,EAAE,MAAM,YAAY,CAAC;AAErD,wEAAwE;AAExE;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAgB;IACjD,QAAQ,OAAO,CAAC,IAAI,EAAE,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,OAAO,CAAC,MAAM,EAAE,QAAQ,KAAK,SAAS;gBAC3C,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE;gBAC3D,CAAC,CAAC,SAAS,CAAC;QAChB,KAAK,WAAW;YACd,OAAO,OAAO,CAAC,MAAM,EAAE,WAAW,KAAK,SAAS;gBAC9C,CAAC,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,mBAAmB,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE;gBAC1E,CAAC,CAAC,SAAS,CAAC;QAChB,KAAK,QAAQ;YACX,OAAO,OAAO,CAAC,MAAM,EAAE,WAAW,KAAK,SAAS;gBAC9C,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,gBAAgB,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE;gBACpE,CAAC,CAAC,SAAS,CAAC;QAChB,KAAK,KAAK;YACR,OAAO;gBACL,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,IAAI,EACF,QAAQ,OAAO,CAAC,MAAM,mDAAmD;oBACzE,0EAA0E;aAC7E,CAAC;QACJ,KAAK,OAAO;YACV,OAAO,SAAS,CAAC;IACrB,CAAC;AACH,CAAC;AAwBD;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,cAAc,CAK5B,KAA8B,EAC9B,OAAyD;IAMzD,MAAM,YAAY,GAAG,IAAI,GAAG,EAAU,CAAC;IACvC,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC7C,MAAM,cAAc,GAAG,IAAI,GAAG,EAAU,CAAC;IACzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM;YAAE,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,YAAY;gBAAE,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACvF,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW;YAC3B,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,mBAAmB;gBAAE,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACxE,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ;YAAE,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,gBAAgB;gBAAE,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC7F,CAAC;IACD,OAAO;QACL,KAAK,EAAE,CAAC,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAClF,UAAU,EAAE,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,MAAM,CAC3C,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC,CACrD;QACD,aAAa,EAAE,CAAC,OAAO,CAAC,aAAa,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;KAC9F,CAAC;AACJ,CAAC;AAED,wEAAwE;AAExE,SAAS,eAAe,CAAC,MAAyB;IAChD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IACtE,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACnE,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IACrF,OAAO;QACL,IAAI;QACJ,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC;QACxB,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC;QACxB,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC;KAC3B,CAAC;AACJ,CAAC;AAED,wEAAwE;AACxE,MAAM,UAAU,wBAAwB,CACtC,QAAkB,EAClB,aAAqB;IAErB,OAAO,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,EAAE;QACjC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC/B,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;YAClC,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;SAClC,CAAC,CAAC;QACH,OAAO,gBAAgB,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC;IAChD,CAAC,CAAC;AACJ,CAAC;AAQD;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,MAAmB,EACnB,KAA8B;IAE9B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,OAAO,IAAI,uBAAuB,CAAC,OAAO,CAAC,CAAC;IACrF,MAAM,aAAa,GAAG,MAAM,CAAC,KAAK,CAAC,aAAa,IAAI,uBAAuB,CAAC,aAAa,CAAC;IAC1F,MAAM,cAAc,GAClB,MAAM,CAAC,KAAK,CAAC,cAAc,IAAI,wBAAwB,CAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IAE1F,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC,CAAC;IACvF,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC;QAC1C,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAChE,YAAY,CAAC,IAAI,CAAC,gBAAgB,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,CAAC;QAC5D,IAAI,MAAM,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,MAAM,CAAC;YAAE,KAAK,EAAE,CAAC;IACzE,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,eAAe,CAAC,YAAY,CAAC,EAAE,CAAC;AACvE,CAAC;AAED,8DAA8D;AAC9D,MAAM,UAAU,YAAY,CAAC,KAAuB;IAClD,OAAO,KAAK,CAAC,KAAK,GAAG,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC;AACzC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,UAAU,CACxB,KAAa,EACb,KAAuB,EACvB,cAAuB;IAEvB,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO;YACL,OAAO,EAAE,cAAc;YACvB,KAAK,EACH,sFAAsF;gBACtF,2BAA2B,KAAK,0CAA0C;SAC7E,CAAC;IACJ,CAAC;IACD,IAAI,YAAY,CAAC,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO;YACL,OAAO,EAAE,WAAW;YACpB,KAAK,EACH,oBAAoB,KAAK,2BAA2B,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,GAAG;gBACnF,8CAA8C,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;gBACjF,KAAK,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;SAC7C,CAAC;IACJ,CAAC;IACD,IAAI,KAAK,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;QACpB,OAAO;YACL,OAAO,EAAE,cAAc;YACvB,KAAK,EACH,0BAA0B,KAAK,iBAAiB,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,UAAU;gBACtF,qEAAqE;SACxE,CAAC;IACJ,CAAC;IACD,OAAO;QACL,OAAO,EAAE,eAAe;QACxB,KAAK,EACH,2BAA2B,KAAK,kCAAkC,KAAK,CAAC,OAAO,UAAU;YACzF,0DAA0D;KAC7D,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bisectCulprits — multi-culprit bisection over the ranked suspect set
|
|
3
|
+
* (RFC-003 Part B, block D9). The "git bisect" of the localizer.
|
|
4
|
+
*
|
|
5
|
+
* When single-suspect ablations don't flip the outcome — redundant causes
|
|
6
|
+
* (two facts that EACH justify the wrong answer), or interacting ones —
|
|
7
|
+
* the culprit is a SET. This harness finds a minimal culprit set by
|
|
8
|
+
* recursive halving over the ranked suspects (delta-debugging style,
|
|
9
|
+
* Zeller's ddmin specialized to two-way splits), then keeps searching the
|
|
10
|
+
* remainder for INDEPENDENT culprits until the remainder stops flipping.
|
|
11
|
+
*
|
|
12
|
+
* Probe semantics (the D9 discipline):
|
|
13
|
+
* - every probe = N seeded reruns of the consumer's `AblationRunner`
|
|
14
|
+
* with the probe's combined specs; "flipped" = MAJORITY of runs
|
|
15
|
+
* changed outcome; similarity mean ± spread is always reported —
|
|
16
|
+
* never single-run verdicts;
|
|
17
|
+
* - probe 0 is the BASELINE (no ablation): if it flips, the scenario
|
|
18
|
+
* itself is unstable and the result is honestly `'inconclusive'`;
|
|
19
|
+
* - probes are cached by spec-set, and budgeted (`maxProbes`) — running
|
|
20
|
+
* out of budget yields `'inconclusive'`, never a partial claim
|
|
21
|
+
* dressed up as a finding.
|
|
22
|
+
*
|
|
23
|
+
* §B2: the returned `verdict`/`culprits` are CAUSAL claims — they rest
|
|
24
|
+
* exclusively on counterfactual reruns. The input ranking only chooses
|
|
25
|
+
* the SEARCH ORDER (better ranking = fewer probes), it never decides the
|
|
26
|
+
* outcome.
|
|
27
|
+
*/
|
|
28
|
+
import { probeFlipped, runAblationProbe } from './ablation.js';
|
|
29
|
+
import { CONTEXT_BISECT_DEFAULTS } from './types.js';
|
|
30
|
+
import { suspectLabel } from './localize.js';
|
|
31
|
+
// ─── The harness ─────────────────────────────────────────────────────
|
|
32
|
+
class ProbeBudgetExceeded extends Error {
|
|
33
|
+
constructor() {
|
|
34
|
+
super('probe budget exceeded');
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Find minimal culprit set(s) by seeded counterfactual bisection. See
|
|
39
|
+
* module docs for semantics and the §B2 claim tier.
|
|
40
|
+
*/
|
|
41
|
+
export async function bisectCulprits(options) {
|
|
42
|
+
const candidates = options.suspects.filter((suspect) => suspect.ablation !== undefined && suspect.ablation.kind !== 'arg');
|
|
43
|
+
const maxProbes = options.maxProbes ?? CONTEXT_BISECT_DEFAULTS.maxProbes;
|
|
44
|
+
const maxCulprits = options.maxCulprits ?? CONTEXT_BISECT_DEFAULTS.maxCulprits;
|
|
45
|
+
const config = { rerun: options.rerun, embedder: options.embedder };
|
|
46
|
+
const probes = [];
|
|
47
|
+
const cache = new Map();
|
|
48
|
+
let runsUsed = 0;
|
|
49
|
+
const keyOf = (set) => set
|
|
50
|
+
.map((suspect) => suspectLabel(suspect))
|
|
51
|
+
.sort()
|
|
52
|
+
.join('|');
|
|
53
|
+
async function probe(set) {
|
|
54
|
+
const key = keyOf(set);
|
|
55
|
+
const cached = cache.get(key);
|
|
56
|
+
if (cached !== undefined)
|
|
57
|
+
return cached;
|
|
58
|
+
if (probes.length >= maxProbes)
|
|
59
|
+
throw new ProbeBudgetExceeded();
|
|
60
|
+
const stats = await runAblationProbe(config, set.flatMap((suspect) => (suspect.ablation !== undefined ? [suspect.ablation] : [])));
|
|
61
|
+
runsUsed += stats.samples;
|
|
62
|
+
const flipped = probeFlipped(stats);
|
|
63
|
+
probes.push({ ablated: set.map((suspect) => suspectLabel(suspect)), stats, flipped });
|
|
64
|
+
cache.set(key, flipped);
|
|
65
|
+
return flipped;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Minimal subset of `candidates` that — together with `context` — flips
|
|
69
|
+
* the outcome. Precondition: probe(candidates ∪ context) flipped.
|
|
70
|
+
* Two-way ddmin: try each half; on interference (neither half alone
|
|
71
|
+
* suffices) minimize each half with the other as context.
|
|
72
|
+
*/
|
|
73
|
+
async function minimize(set, context) {
|
|
74
|
+
if (set.length <= 1)
|
|
75
|
+
return [...set];
|
|
76
|
+
const mid = Math.ceil(set.length / 2);
|
|
77
|
+
const top = set.slice(0, mid); // ranked order: the likelier half first
|
|
78
|
+
const rest = set.slice(mid);
|
|
79
|
+
if (await probe([...top, ...context]))
|
|
80
|
+
return minimize(top, context);
|
|
81
|
+
if (await probe([...rest, ...context]))
|
|
82
|
+
return minimize(rest, context);
|
|
83
|
+
// Interference: parts of BOTH halves are needed jointly.
|
|
84
|
+
const fromTop = await minimize(top, [...rest, ...context]);
|
|
85
|
+
const fromRest = await minimize(rest, [...fromTop, ...context]);
|
|
86
|
+
return [...fromTop, ...fromRest];
|
|
87
|
+
}
|
|
88
|
+
try {
|
|
89
|
+
// Baseline: an unstable scenario invalidates everything downstream.
|
|
90
|
+
// ZERO-TOLERANCE (review Finding 1): a single un-ablated flip marks the
|
|
91
|
+
// scenario unstable — the majority-rule probeFlipped() gate would let a
|
|
92
|
+
// 1-in-3-flaky scenario through to a 'confirmed' CAUSAL verdict, which
|
|
93
|
+
// violates the §B2 honest-claims discipline. Same gate localize.ts uses.
|
|
94
|
+
{
|
|
95
|
+
const baselineStats = await runAblationProbe(config, []);
|
|
96
|
+
runsUsed += baselineStats.samples;
|
|
97
|
+
const unstable = baselineStats.flips > 0;
|
|
98
|
+
probes.push({ ablated: [], stats: baselineStats, flipped: unstable });
|
|
99
|
+
cache.set(keyOf([]), probeFlipped(baselineStats));
|
|
100
|
+
if (unstable) {
|
|
101
|
+
return { verdict: 'inconclusive', culprits: [], probes, runsUsed };
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
// Reproduction gate: the full ranked set must flip at all.
|
|
105
|
+
if (candidates.length === 0 || !(await probe(candidates))) {
|
|
106
|
+
return { verdict: 'not-reproducible', culprits: [], probes, runsUsed };
|
|
107
|
+
}
|
|
108
|
+
// Find minimal sets; then keep searching the remainder for
|
|
109
|
+
// INDEPENDENT culprits until it stops flipping.
|
|
110
|
+
const culprits = [];
|
|
111
|
+
let remaining = candidates;
|
|
112
|
+
for (let round = 0; round < maxCulprits; round++) {
|
|
113
|
+
const found = await minimize(remaining, []);
|
|
114
|
+
culprits.push(found);
|
|
115
|
+
const foundKeys = new Set(found.map((suspect) => suspectLabel(suspect)));
|
|
116
|
+
remaining = remaining.filter((suspect) => !foundKeys.has(suspectLabel(suspect)));
|
|
117
|
+
if (remaining.length === 0 || !(await probe(remaining)))
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
return { verdict: 'confirmed', culprits, probes, runsUsed };
|
|
121
|
+
}
|
|
122
|
+
catch (error) {
|
|
123
|
+
if (error instanceof ProbeBudgetExceeded) {
|
|
124
|
+
return { verdict: 'inconclusive', culprits: [], probes, runsUsed };
|
|
125
|
+
}
|
|
126
|
+
throw error;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
//# sourceMappingURL=bisect.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bisect.js","sourceRoot":"","sources":["../../../../src/lib/context-bisect/bisect.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAoB,MAAM,eAAe,CAAC;AAEjF,OAAO,EAAE,uBAAuB,EAAE,MAAM,YAAY,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAiD7C,wEAAwE;AAExE,MAAM,mBAAoB,SAAQ,KAAK;IACrC;QACE,KAAK,CAAC,uBAAuB,CAAC,CAAC;IACjC,CAAC;CACF;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,OAA8B;IACjE,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,MAAM,CACxC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,QAAQ,KAAK,SAAS,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,KAAK,KAAK,CAC/E,CAAC;IACF,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,uBAAuB,CAAC,SAAS,CAAC;IACzE,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,uBAAuB,CAAC,WAAW,CAAC;IAC/E,MAAM,MAAM,GAAgB,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,CAAC;IAEjF,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAmB,CAAC;IACzC,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,MAAM,KAAK,GAAG,CAAC,GAAuB,EAAU,EAAE,CAChD,GAAG;SACA,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;SACvC,IAAI,EAAE;SACN,IAAI,CAAC,GAAG,CAAC,CAAC;IAEf,KAAK,UAAU,KAAK,CAAC,GAAuB;QAC1C,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;QACvB,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,MAAM,KAAK,SAAS;YAAE,OAAO,MAAM,CAAC;QACxC,IAAI,MAAM,CAAC,MAAM,IAAI,SAAS;YAAE,MAAM,IAAI,mBAAmB,EAAE,CAAC;QAChE,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAClC,MAAM,EACN,GAAG,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CACrF,CAAC;QACF,QAAQ,IAAI,KAAK,CAAC,OAAO,CAAC;QAC1B,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;QACtF,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QACxB,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;;;;OAKG;IACH,KAAK,UAAU,QAAQ,CACrB,GAAuB,EACvB,OAA2B;QAE3B,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC;YAAE,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACtC,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,wCAAwC;QACvE,MAAM,IAAI,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC5B,IAAI,MAAM,KAAK,CAAC,CAAC,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,CAAC;YAAE,OAAO,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;QACrE,IAAI,MAAM,KAAK,CAAC,CAAC,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC,CAAC;YAAE,OAAO,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACvE,yDAAyD;QACzD,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,GAAG,EAAE,CAAC,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC,GAAG,OAAO,EAAE,GAAG,OAAO,CAAC,CAAC,CAAC;QAChE,OAAO,CAAC,GAAG,OAAO,EAAE,GAAG,QAAQ,CAAC,CAAC;IACnC,CAAC;IAED,IAAI,CAAC;QACH,oEAAoE;QACpE,wEAAwE;QACxE,wEAAwE;QACxE,uEAAuE;QACvE,yEAAyE;QACzE,CAAC;YACC,MAAM,aAAa,GAAG,MAAM,gBAAgB,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;YACzD,QAAQ,IAAI,aAAa,CAAC,OAAO,CAAC;YAClC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,GAAG,CAAC,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;YACtE,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC;YAClD,IAAI,QAAQ,EAAE,CAAC;gBACb,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;YACrE,CAAC;QACH,CAAC;QACD,2DAA2D;QAC3D,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC;YAC1D,OAAO,EAAE,OAAO,EAAE,kBAAkB,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QACzE,CAAC;QAED,2DAA2D;QAC3D,gDAAgD;QAChD,MAAM,QAAQ,GAAgB,EAAE,CAAC;QACjC,IAAI,SAAS,GAAG,UAAU,CAAC;QAC3B,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,WAAW,EAAE,KAAK,EAAE,EAAE,CAAC;YACjD,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;YAC5C,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACzE,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACjF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,SAAS,CAAC,CAAC;gBAAE,MAAM;QACjE,CAAC;QACD,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;IAC9D,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,mBAAmB,EAAE,CAAC;YACzC,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QACrE,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* context-bisect — RFC-003 Part B: the contextual-bug LOCALIZER,
|
|
3
|
+
* "git bisect for context".
|
|
4
|
+
*
|
|
5
|
+
* Assembly over shipped pieces: footprintjs 9.8.0's complete causal DAG
|
|
6
|
+
* (control edges, honesty markers, `EdgeWeigher` hook) × influence-core
|
|
7
|
+
* scoring (D6) × consumer-run counterfactual ablation.
|
|
8
|
+
*
|
|
9
|
+
* D7 — `llmEdgeWeigher` influence-weighted LLM-call slice edges
|
|
10
|
+
* D8 — `localizeContextBug` trigger → slice → ranked suspects → ablation
|
|
11
|
+
* D9 — `bisectCulprits` seeded multi-culprit bisection + variance
|
|
12
|
+
*
|
|
13
|
+
* §B2 claim tiers (spelled out on every type): weights/scores are
|
|
14
|
+
* embedding-geometry PROXIES; ablation verdicts are the ONLY causal
|
|
15
|
+
* claims; slice completeness is bounded by tracking — and says so.
|
|
16
|
+
*/
|
|
17
|
+
export { llmEdgeWeigher, stepOutputText, } from './llmEdgeWeigher.js';
|
|
18
|
+
export { defaultSuspectClassifier, formatContextBugReport, llmCallIdsFromEvents, localizeContextBug, suspectLabel, } from './localize.js';
|
|
19
|
+
export { ablationForSuspect, applyAblations, defaultOutcomeComparator, probeFlipped, runAblationProbe, verdictFor, } from './ablation.js';
|
|
20
|
+
export { bisectCulprits, } from './bisect.js';
|
|
21
|
+
export { CONTEXT_BISECT_DEFAULTS, } from './types.js';
|
|
22
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../src/lib/context-bisect/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EACL,cAAc,EACd,cAAc,GAIf,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,wBAAwB,EACxB,sBAAsB,EACtB,oBAAoB,EACpB,kBAAkB,EAClB,YAAY,GAKb,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,kBAAkB,EAClB,cAAc,EACd,wBAAwB,EACxB,YAAY,EACZ,gBAAgB,EAChB,UAAU,GAGX,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,cAAc,GAIf,MAAM,aAAa,CAAC;AAErB,OAAO,EACL,uBAAuB,GAoBxB,MAAM,YAAY,CAAC"}
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"llmEdgeWeigher.js","sourceRoot":"","sources":["../../../../src/lib/context-bisect/llmEdgeWeigher.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiDG;AAIH,OAAO,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAEpE,OAAO,EACL,cAAc,GAIf,MAAM,4BAA4B,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,YAAY,CAAC;AAmErD,wEAAwE;AAExE,yEAAyE;AACzE,SAAS,eAAe,CAAC,SAAkC;IACzD,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;IACnF,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,GAAG,CAAC,IAAY,EAAE,QAAgB;IACzC,OAAO,IAAI,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACjE,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,cAAc,CAC5B,SAAkC,EAClC,SAA8B,EAC9B,cAAsB,EACtB,QAAgB;IAEhB,MAAM,GAAG,GAAG,SAAS,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;IAC1C,IAAI,GAAG,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IACxC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAChC,KAAK,MAAM,MAAM,IAAI,SAAS,EAAE,CAAC;QAC/B,IAAI,MAAM,CAAC,cAAc,KAAK,cAAc;YAAE,SAAS;QACvD,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,KAAK;YAAE,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1D,CAAC;IACD,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IACvC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,IAAI,aAAa,CAAC,aAAa,CAAC,SAA2B,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC;IAChG,CAAC;IACD,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9B,OAAO,IAAI,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC7D,CAAC;AAED,4EAA4E;AAC5E,SAAS,WAAW,CAClB,SAAkC,EAClC,SAA8B,EAC9B,cAAsB,EACtB,GAAW,EACX,QAAgB;IAEhB,MAAM,GAAG,GAAG,SAAS,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;IAC1C,IAAI,GAAG,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IACxC,MAAM,KAAK,GAAG,aAAa,CAAC,SAA2B,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;IACnE,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IAC1C,OAAO,GAAG,CAAC,aAAa,CAAC,KAAK,CAAC,EAAE,QAAQ,CAAC,CAAC;AAC7C,CAAC;AAED,wEAAwE;AAExE;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,OAA8B;IAC3D,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IAC/C,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,uBAAuB,CAAC,YAAY,CAAC;IAClF,MAAM,SAAS,GAAG,eAAe,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAErD,2DAA2D;IAC3D,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC3C,2EAA2E;IAC3E,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkE,CAAC;IAC5F,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,MAAM,OAAO,GAAG,CAAC,OAAe,EAAE,QAAgB,EAAE,GAAW,EAAU,EAAE,CACzE,GAAG,OAAO,IAAI,QAAQ,IAAI,GAAG,EAAE,CAAC;IAElC,MAAM,SAAS,GAAG,CAAC,EAAU,EAAsB,EAAE,CACnD,OAAO,CAAC,WAAW;QACjB,CAAC,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,CAAC;QACzB,CAAC,CAAC,cAAc,CAAC,OAAO,CAAC,SAAS,EAAE,SAAS,EAAE,EAAE,EAAE,YAAY,CAAC,CAAC;IAErE,MAAM,UAAU,GAAG,CAAC,EAAU,EAAE,GAAW,EAAsB,EAAE,CACjE,OAAO,CAAC,YAAY;QAClB,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,EAAE,GAAG,CAAC;QAC/B,CAAC,CAAC,WAAW,CAAC,OAAO,CAAC,SAAS,EAAE,SAAS,EAAE,EAAE,EAAE,GAAG,EAAE,YAAY,CAAC,CAAC;IAEvE,KAAK,UAAU,KAAK,CAAC,IAAgB;QACnC,qEAAqE;QACrE,sEAAsE;QACtE,gDAAgD;QAChD,KAAK,MAAM,IAAI,IAAI,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1C,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC;gBAAE,SAAS;YACnD,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAC9C,IAAI,MAAM,KAAK,SAAS;gBAAE,SAAS;YAEnC,MAAM,QAAQ,GAAoB,EAAE,CAAC;YACrC,MAAM,YAAY,GAA2D,EAAE,CAAC;YAChF,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;gBACpC,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS;oBAAE,SAAS;gBAC7D,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC9E,IAAI,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBAAE,SAAS,CAAC,8BAA8B;gBAC9D,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;gBAC9D,IAAI,IAAI,KAAK,SAAS;oBAAE,SAAS;gBACjC,QAAQ,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,aAAa,EAAE,EAAE,EAAE,CAAC,CAAC;gBAC/C,YAAY,CAAC,IAAI,CAAC;oBAChB,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc;oBACpC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;oBAChC,GAAG,EAAE,IAAI,CAAC,GAAG;iBACd,CAAC,CAAC;YACL,CAAC;YACD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAEpC,QAAQ,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;YAC5D,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC;gBAClC,QAAQ;gBACR,eAAe,EAAE,MAAM;gBACvB,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,GAAG,CAAC,OAAO,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACvE,CAAC,CAAC;YACH,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACvE,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACzC,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;gBACrD,8DAA8D;gBAC9D,mEAAmE;gBACnE,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC;gBAClE,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;YACD,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAgB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE;QACtD,IAAI,IAAI,KAAK,MAAM,IAAI,GAAG,KAAK,SAAS;YAAE,OAAO,SAAS,CAAC;QAC3D,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC;YAAE,OAAO,SAAS,CAAC;QAC5D,OAAO,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,MAAM,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC,CAAC;IACjF,CAAC,CAAC;IAEF,SAAS,aAAa,CAAC,SAAiB;QACtC,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvC,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,CAAC;QACtB,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YAClC,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,MAAM,EAAE,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;SACvE,CAAC,CAAC,CAAC;QACJ,uEAAuE;QACvE,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC;IACpD,CAAC;IAED,OAAO;QACL,KAAK;QACL,KAAK;QACL,aAAa;QACb,aAAa,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,QAAQ,CAAC;KACnC,CAAC;AACJ,CAAC"}
|