agentfootprint 6.28.1 → 6.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -4
- package/dist/esm/lib/context-bisect/ablation.js +20 -8
- package/dist/esm/lib/context-bisect/ablation.js.map +1 -1
- package/dist/esm/lib/context-bisect/index.js +3 -0
- package/dist/esm/lib/context-bisect/index.js.map +1 -1
- package/dist/esm/lib/context-bisect/localize.js +85 -3
- package/dist/esm/lib/context-bisect/localize.js.map +1 -1
- package/dist/esm/lib/context-bisect/missingContext.js +63 -0
- package/dist/esm/lib/context-bisect/missingContext.js.map +1 -0
- package/dist/esm/lib/context-bisect/restoration.js +39 -0
- package/dist/esm/lib/context-bisect/restoration.js.map +1 -0
- package/dist/esm/lib/context-bisect/types.js.map +1 -1
- package/dist/esm/lib/influence-core/attributability.js +136 -0
- package/dist/esm/lib/influence-core/attributability.js.map +1 -0
- package/dist/esm/lib/influence-core/index.js +2 -1
- package/dist/esm/lib/influence-core/index.js.map +1 -1
- package/dist/esm/lib/influence-core/types.js +27 -0
- package/dist/esm/lib/influence-core/types.js.map +1 -1
- package/dist/esm/observe.js +2 -2
- package/dist/esm/observe.js.map +1 -1
- package/dist/lib/context-bisect/ablation.js +22 -8
- package/dist/lib/context-bisect/ablation.js.map +1 -1
- package/dist/lib/context-bisect/index.js +6 -1
- package/dist/lib/context-bisect/index.js.map +1 -1
- package/dist/lib/context-bisect/localize.js +85 -3
- package/dist/lib/context-bisect/localize.js.map +1 -1
- package/dist/lib/context-bisect/missingContext.js +67 -0
- package/dist/lib/context-bisect/missingContext.js.map +1 -0
- package/dist/lib/context-bisect/restoration.js +43 -0
- package/dist/lib/context-bisect/restoration.js.map +1 -0
- package/dist/lib/context-bisect/types.js.map +1 -1
- package/dist/lib/influence-core/attributability.js +142 -0
- package/dist/lib/influence-core/attributability.js.map +1 -0
- package/dist/lib/influence-core/index.js +8 -1
- package/dist/lib/influence-core/index.js.map +1 -1
- package/dist/lib/influence-core/types.js +28 -1
- package/dist/lib/influence-core/types.js.map +1 -1
- package/dist/observe.js +10 -2
- package/dist/observe.js.map +1 -1
- package/dist/types/lib/context-bisect/ablation.d.ts +10 -2
- package/dist/types/lib/context-bisect/ablation.d.ts.map +1 -1
- package/dist/types/lib/context-bisect/index.d.ts +3 -1
- package/dist/types/lib/context-bisect/index.d.ts.map +1 -1
- package/dist/types/lib/context-bisect/localize.d.ts +14 -0
- package/dist/types/lib/context-bisect/localize.d.ts.map +1 -1
- package/dist/types/lib/context-bisect/missingContext.d.ts +72 -0
- package/dist/types/lib/context-bisect/missingContext.d.ts.map +1 -0
- package/dist/types/lib/context-bisect/restoration.d.ts +40 -0
- package/dist/types/lib/context-bisect/restoration.d.ts.map +1 -0
- package/dist/types/lib/context-bisect/types.d.ts +27 -1
- package/dist/types/lib/context-bisect/types.d.ts.map +1 -1
- package/dist/types/lib/influence-core/attributability.d.ts +73 -0
- package/dist/types/lib/influence-core/attributability.d.ts.map +1 -0
- package/dist/types/lib/influence-core/index.d.ts +3 -2
- package/dist/types/lib/influence-core/index.d.ts.map +1 -1
- package/dist/types/lib/influence-core/types.d.ts +95 -0
- package/dist/types/lib/influence-core/types.d.ts.map +1 -1
- package/dist/types/observe.d.ts +2 -2
- package/dist/types/observe.d.ts.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* missingContext — interface #3: find context that was AVAILABLE but never
|
|
4
|
+
* reached the model (RFC-003).
|
|
5
|
+
*
|
|
6
|
+
* The localizer's influence ranking (#1) + ablation (#2) handle culprits that
|
|
7
|
+
* are PRESENT in the context. They are blind to the opposite failure: a needed
|
|
8
|
+
* unit that was *dropped* — truncated out of the window, or never selected —
|
|
9
|
+
* so the model never saw it. You cannot ablate what isn't there.
|
|
10
|
+
*
|
|
11
|
+
* This finder is the cheap, exact, deterministic half of that case: a SET
|
|
12
|
+
* DIFFERENCE over unit ids. The library tracks context as identified units
|
|
13
|
+
* (each injection / memory entry / tool result has a stable id), so "what got
|
|
14
|
+
* dropped" is `available − sent` — no embeddings, no LLM, O(n).
|
|
15
|
+
*
|
|
16
|
+
* Causal confirmation is the MIRROR of ablation: RESTORATION. Add a dropped
|
|
17
|
+
* unit back, re-run, and an outcome flip is the causal proof. Like ablation,
|
|
18
|
+
* the re-run is consumer-supplied (the library doesn't own your agent loop);
|
|
19
|
+
* see `findDroppedContext` docs + example 10 for the pattern.
|
|
20
|
+
*
|
|
21
|
+
* Honest claim: a dropped unit is a CANDIDATE missing-context culprit, never a
|
|
22
|
+
* confirmed cause — most dropped context is correctly dropped. Only restoration
|
|
23
|
+
* makes a causal claim.
|
|
24
|
+
*/
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.findDroppedContext = void 0;
|
|
27
|
+
/**
|
|
28
|
+
* Find context that was available for a turn but never reached the model —
|
|
29
|
+
* `available − sent` by id. Pure, deterministic, O(n); no model or embedder.
|
|
30
|
+
*
|
|
31
|
+
* Ids are assumed stable and unique per side (duplicates are de-duplicated,
|
|
32
|
+
* first occurrence wins). Units in `sent` but not `available` are ignored.
|
|
33
|
+
*
|
|
34
|
+
* Confirm a candidate causally by RESTORATION (the mirror of ablation): add the
|
|
35
|
+
* dropped unit back into the context and re-run; an outcome flip is the proof.
|
|
36
|
+
*
|
|
37
|
+
* @example
|
|
38
|
+
* const { dropped, anyDropped } = findDroppedContext(assembled, sentToModel);
|
|
39
|
+
* if (anyDropped) {
|
|
40
|
+
* for (const unit of dropped) {
|
|
41
|
+
* if (await rerunWith(unit).outcomeFlips()) report(unit); // restoration = causal
|
|
42
|
+
* }
|
|
43
|
+
* }
|
|
44
|
+
*/
|
|
45
|
+
function findDroppedContext(available, sent) {
|
|
46
|
+
const sentIds = new Set();
|
|
47
|
+
for (const u of sent)
|
|
48
|
+
sentIds.add(u.id);
|
|
49
|
+
const dropped = [];
|
|
50
|
+
const seenAvailable = new Set();
|
|
51
|
+
for (const u of available) {
|
|
52
|
+
if (seenAvailable.has(u.id))
|
|
53
|
+
continue; // de-dup by id, first wins
|
|
54
|
+
seenAvailable.add(u.id);
|
|
55
|
+
if (!sentIds.has(u.id))
|
|
56
|
+
dropped.push(u.content === undefined ? { id: u.id } : { id: u.id, content: u.content });
|
|
57
|
+
}
|
|
58
|
+
const availableCount = seenAvailable.size;
|
|
59
|
+
const sentCount = sentIds.size;
|
|
60
|
+
const anyDropped = dropped.length > 0;
|
|
61
|
+
const reason = anyDropped
|
|
62
|
+
? `${dropped.length} of ${availableCount} available unit(s) never reached the model — candidate(s) for a missing-context bug (truncation / dilution). Confirm by RESTORATION: add a unit back and re-run; an outcome flip is the causal proof (mirror of ablation). Most dropped context is correctly dropped — only restoration confirms.`
|
|
63
|
+
: `All ${availableCount} available unit(s) reached the model — no missing-context bug here (nothing was dropped).`;
|
|
64
|
+
return { dropped, availableCount, sentCount, anyDropped, reason };
|
|
65
|
+
}
|
|
66
|
+
exports.findDroppedContext = findDroppedContext;
|
|
67
|
+
//# sourceMappingURL=missingContext.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"missingContext.js","sourceRoot":"","sources":["../../../src/lib/context-bisect/missingContext.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;;;AAkCH;;;;;;;;;;;;;;;;;GAiBG;AACH,SAAgB,kBAAkB,CAChC,SAAiC,EACjC,IAA4B;IAE5B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,IAAI;QAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAExC,MAAM,OAAO,GAAkB,EAAE,CAAC;IAClC,MAAM,aAAa,GAAG,IAAI,GAAG,EAAU,CAAC;IACxC,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1B,IAAI,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAAE,SAAS,CAAC,2BAA2B;QAClE,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACxB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;IAClH,CAAC;IAED,MAAM,cAAc,GAAG,aAAa,CAAC,IAAI,CAAC;IAC1C,MAAM,SAAS,GAAG,OAAO,CAAC,IAAI,CAAC;IAC/B,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,UAAU;QACvB,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,OAAO,cAAc,mSAAmS;QAC3U,CAAC,CAAC,OAAO,cAAc,2FAA2F,CAAC;IAErH,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC;AACpE,CAAC;AAvBD,gDAuBC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runRestorationProbe = void 0;
|
|
4
|
+
/**
|
|
5
|
+
* restoration — RFC-003 Part B: the causal tier for the missing-context finder
|
|
6
|
+
* (interface #3), the MIRROR of ablation (D8's restoration half).
|
|
7
|
+
*
|
|
8
|
+
* Ablation confirms a PRESENT culprit by removing it and watching the outcome
|
|
9
|
+
* flip. Restoration confirms an ABSENT culprit (a unit `findDroppedContext`
|
|
10
|
+
* surfaced) by adding it BACK and watching the outcome flip. Same seeded-rerun
|
|
11
|
+
* discipline, same verdict rule (`verdictFor(..., 'restoring')`), same honest
|
|
12
|
+
* baseline check — only the intervention is inverted.
|
|
13
|
+
*
|
|
14
|
+
* The re-run is consumer-owned (the library doesn't own your agent loop), just
|
|
15
|
+
* like `AblationRunner`. `RestorationRunner` receives the units to add back
|
|
16
|
+
* (`[]` = the un-restored baseline) plus a seed, and returns the run's output.
|
|
17
|
+
*/
|
|
18
|
+
const cosine_js_1 = require("../../memory/embedding/cosine.js");
|
|
19
|
+
const ablation_js_1 = require("./ablation.js");
|
|
20
|
+
const types_js_1 = require("./types.js");
|
|
21
|
+
/**
|
|
22
|
+
* Run ONE restoration probe: call the consumer's runner with `units` restored
|
|
23
|
+
* once per seed, measure each output's similarity to the original, count flips.
|
|
24
|
+
* `[]` units = the un-restored baseline. Mirror of `runAblationProbe`.
|
|
25
|
+
*/
|
|
26
|
+
async function runRestorationProbe(config, units) {
|
|
27
|
+
const samples = (0, ablation_js_1.resolveSamples)(config.rerun.samples);
|
|
28
|
+
const flipThreshold = config.rerun.flipThreshold ?? types_js_1.CONTEXT_BISECT_DEFAULTS.flipThreshold;
|
|
29
|
+
const outcomeChanged = config.rerun.outcomeChanged ?? (0, ablation_js_1.defaultOutcomeComparator)(config.embedder, flipThreshold);
|
|
30
|
+
const similarities = [];
|
|
31
|
+
let flips = 0;
|
|
32
|
+
const originalVec = await config.embedder.embed({ text: config.rerun.originalOutput });
|
|
33
|
+
for (let seed = 0; seed < samples; seed++) {
|
|
34
|
+
const output = await config.rerun.runner(units, { seed });
|
|
35
|
+
const outputVec = await config.embedder.embed({ text: output });
|
|
36
|
+
similarities.push((0, cosine_js_1.cosineSimilarity)(originalVec, outputVec));
|
|
37
|
+
if (await outcomeChanged(config.rerun.originalOutput, output))
|
|
38
|
+
flips++;
|
|
39
|
+
}
|
|
40
|
+
return { samples, flips, similarity: (0, ablation_js_1.similarityStats)(similarities) };
|
|
41
|
+
}
|
|
42
|
+
exports.runRestorationProbe = runRestorationProbe;
|
|
43
|
+
//# sourceMappingURL=restoration.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"restoration.js","sourceRoot":"","sources":["../../../src/lib/context-bisect/restoration.ts"],"names":[],"mappings":";;;AAAA;;;;;;;;;;;;;GAaG;AACH,gEAAoE;AAEpE,+CAA0F;AAG1F,yCAAqD;AAoCrD;;;;GAIG;AACI,KAAK,UAAU,mBAAmB,CACvC,MAA8B,EAC9B,KAA6B;IAE7B,MAAM,OAAO,GAAG,IAAA,4BAAc,EAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACrD,MAAM,aAAa,GAAG,MAAM,CAAC,KAAK,CAAC,aAAa,IAAI,kCAAuB,CAAC,aAAa,CAAC;IAC1F,MAAM,cAAc,GAClB,MAAM,CAAC,KAAK,CAAC,cAAc,IAAI,IAAA,sCAAwB,EAAC,MAAM,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IAE1F,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC,CAAC;IACvF,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,OAAO,EAAE,IAAI,EAAE,EAAE,CAAC;QAC1C,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;QAC1D,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAChE,YAAY,CAAC,IAAI,CAAC,IAAA,4BAAgB,EAAC,WAAW,EAAE,SAAS,CAAC,CAAC,CAAC;QAC5D,IAAI,MAAM,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,MAAM,CAAC;YAAE,KAAK,EAAE,CAAC;IACzE,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,IAAA,6BAAe,EAAC,YAAY,CAAC,EAAE,CAAC;AACvE,CAAC;AAnBD,kDAmBC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/context-bisect/types.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;;;
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/context-bisect/types.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAmCG;;;AAoVH,wEAAwE;AAE3D,QAAA,uBAAuB,GAAG;IACrC,uDAAuD;IACvD,QAAQ,EAAE,EAAE;IACZ,sDAAsD;IACtD,QAAQ,EAAE,EAAE;IACZ,0CAA0C;IAC1C,WAAW,EAAE,EAAE;IACf,4DAA4D;IAC5D,YAAY,EAAE,IAAI;IAClB,yEAAyE;IACzE,OAAO,EAAE,CAAC;IACV,mEAAmE;IACnE,aAAa,EAAE,GAAG;IAClB,mDAAmD;IACnD,SAAS,EAAE,EAAE;IACb,8DAA8D;IAC9D,WAAW,EAAE,CAAC;CACN,CAAC"}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.rankingConfidence = exports.ratioStrategy = exports.marginStrategy = void 0;
|
|
4
|
+
const types_js_1 = require("./types.js");
|
|
5
|
+
const nonNegative = (label, x) => {
|
|
6
|
+
// `!(x >= 0)` rejects negatives AND NaN (a plain `< 0` would let NaN through).
|
|
7
|
+
if (!(x >= 0))
|
|
8
|
+
throw new Error(`${label} must be >= 0 (got ${x})`);
|
|
9
|
+
return x;
|
|
10
|
+
};
|
|
11
|
+
/**
|
|
12
|
+
* Default strategy: ABSOLUTE top-2 gap `s0 − s1 >= threshold`. Simple and
|
|
13
|
+
* interpretable, but embedder-relative (the gap scale depends on the embedding
|
|
14
|
+
* geometry). Use `ratioStrategy` for cross-embedder transfer.
|
|
15
|
+
*/
|
|
16
|
+
function marginStrategy(threshold = types_js_1.DEFAULT_CLEAR_WINNER_MARGIN) {
|
|
17
|
+
nonNegative('marginStrategy: threshold', threshold);
|
|
18
|
+
return {
|
|
19
|
+
name: `margin>=${threshold}`,
|
|
20
|
+
isClearWinner: (s) => s.length >= 2 && s[0] - s[1] >= threshold,
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
exports.marginStrategy = marginStrategy;
|
|
24
|
+
/**
|
|
25
|
+
* Scale-invariant strategy: top-2 gap as a FRACTION of the top score,
|
|
26
|
+
* `(s0 − s1) / |s0| >= threshold`. Transfers across embedders / answer lengths
|
|
27
|
+
* where the absolute margin does not. A zero (or all-equal) top is never a
|
|
28
|
+
* clear winner.
|
|
29
|
+
*/
|
|
30
|
+
function ratioStrategy(threshold = types_js_1.DEFAULT_CLEAR_WINNER_RATIO) {
|
|
31
|
+
nonNegative('ratioStrategy: threshold', threshold);
|
|
32
|
+
return {
|
|
33
|
+
name: `ratio>=${threshold}`,
|
|
34
|
+
isClearWinner: (s) => {
|
|
35
|
+
if (s.length < 2)
|
|
36
|
+
return false;
|
|
37
|
+
const denom = Math.abs(s[0]);
|
|
38
|
+
if (denom === 0)
|
|
39
|
+
return false; // flat at zero → no clear winner (avoid div-by-zero)
|
|
40
|
+
return (s[0] - s[1]) / denom >= threshold;
|
|
41
|
+
},
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
exports.ratioStrategy = ratioStrategy;
|
|
45
|
+
/** Finite score, or −Infinity for a malformed (NaN/+Infinity/−Infinity) one —
|
|
46
|
+
* so a bad embedder degrades that item to "ranked last", never corrupts the
|
|
47
|
+
* ordering. Note +Infinity is demoted too: a meaningless score is never a win. */
|
|
48
|
+
const finiteScore = (s) => (Number.isFinite(s.score) ? s.score : -Infinity);
|
|
49
|
+
/** Total, NaN-free comparator (descending) — correctness does not rest on the
|
|
50
|
+
* engine's handling of a NaN comparator return for the all-malformed case. */
|
|
51
|
+
const byScoreDesc = (a, b) => {
|
|
52
|
+
const x = finiteScore(a);
|
|
53
|
+
const y = finiteScore(b);
|
|
54
|
+
return x > y ? -1 : x < y ? 1 : 0;
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Assess whether an influence ranking has a clear winner to trust as a lead,
|
|
58
|
+
* or is too close to call and should be confirmed by ablation.
|
|
59
|
+
*
|
|
60
|
+
* Guarantees (relied on by the localizer): the returned `shortlist` always
|
|
61
|
+
* contains `lead` when there is one, and — when there is NO clear winner and
|
|
62
|
+
* there are ≥2 suspects — always contains the runner-up too (so ablation over
|
|
63
|
+
* the shortlist covers the real culprit even if it ranked below an innocent).
|
|
64
|
+
*
|
|
65
|
+
* @param scores `scoreInfluence` output (any order — re-sorted defensively).
|
|
66
|
+
* Ids are assumed unique (as `scoreInfluence` enforces); the
|
|
67
|
+
* shortlist is de-duplicated defensively regardless.
|
|
68
|
+
* @throws Error on negative or NaN options.
|
|
69
|
+
*/
|
|
70
|
+
function rankingConfidence(scores, options = {}) {
|
|
71
|
+
// strategy WINS over clearWinnerMargin; the default builds a margin strategy
|
|
72
|
+
// (which validates its own threshold).
|
|
73
|
+
const strategy = options.strategy ?? marginStrategy(options.clearWinnerMargin ?? types_js_1.DEFAULT_CLEAR_WINNER_MARGIN);
|
|
74
|
+
const shortlistBand = nonNegative('rankingConfidence: shortlistBand', options.shortlistBand ?? types_js_1.DEFAULT_SHORTLIST_BAND);
|
|
75
|
+
if (scores.length === 0) {
|
|
76
|
+
return { clearWinner: false, margin: undefined, lead: undefined, shortlist: [], reason: 'No suspects to rank.' };
|
|
77
|
+
}
|
|
78
|
+
const ranked = [...scores].sort(byScoreDesc);
|
|
79
|
+
const top = ranked[0];
|
|
80
|
+
const topScore = finiteScore(top);
|
|
81
|
+
if (ranked.length === 1) {
|
|
82
|
+
return {
|
|
83
|
+
clearWinner: true,
|
|
84
|
+
margin: undefined,
|
|
85
|
+
lead: top.id,
|
|
86
|
+
shortlist: [top.id],
|
|
87
|
+
reason: `Only one suspect "${top.id}" — clear by default (nothing to compare against); confirm by ablation for a causal claim.`,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
const secondScore = finiteScore(ranked[1]);
|
|
91
|
+
// Clear winner, robust to malformed scores (framework invariants, NOT the
|
|
92
|
+
// strategy's concern):
|
|
93
|
+
// - top itself malformed (e.g. all-malformed) → no clear winner, no margin.
|
|
94
|
+
// - clean finite top, malformed runner-up → unambiguous lead → clear winner
|
|
95
|
+
// (the inverse of suppressing it); no meaningful finite gap to report.
|
|
96
|
+
// - both finite → the pluggable STRATEGY decides, over all finite scores.
|
|
97
|
+
let clearWinner;
|
|
98
|
+
let margin;
|
|
99
|
+
if (!Number.isFinite(topScore)) {
|
|
100
|
+
clearWinner = false;
|
|
101
|
+
margin = undefined;
|
|
102
|
+
}
|
|
103
|
+
else if (!Number.isFinite(secondScore)) {
|
|
104
|
+
clearWinner = true;
|
|
105
|
+
margin = undefined;
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
margin = topScore - secondScore;
|
|
109
|
+
const finiteRanked = ranked.map(finiteScore).filter((x) => Number.isFinite(x));
|
|
110
|
+
clearWinner = strategy.isClearWinner(finiteRanked);
|
|
111
|
+
}
|
|
112
|
+
// Shortlist = the band of FINITE scores within shortlistBand of a finite top.
|
|
113
|
+
// Then enforce the guarantees: lead always present; when there is no clear
|
|
114
|
+
// winner with ≥2 suspects, the runner-up is present too.
|
|
115
|
+
const shortlist = [];
|
|
116
|
+
const seen = new Set();
|
|
117
|
+
const add = (id) => {
|
|
118
|
+
if (!seen.has(id)) {
|
|
119
|
+
seen.add(id);
|
|
120
|
+
shortlist.push(id);
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
if (Number.isFinite(topScore)) {
|
|
124
|
+
for (const s of ranked) {
|
|
125
|
+
const sc = finiteScore(s);
|
|
126
|
+
if (Number.isFinite(sc) && topScore - sc <= shortlistBand)
|
|
127
|
+
add(s.id);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
add(top.id); // guarantee: lead always in the shortlist
|
|
131
|
+
if (!clearWinner)
|
|
132
|
+
add(ranked[1].id); // guarantee: no-clear-winner shortlist covers the runner-up
|
|
133
|
+
const gap = margin === undefined ? 'n/a' : margin.toFixed(3);
|
|
134
|
+
const reason = clearWinner
|
|
135
|
+
? margin === undefined
|
|
136
|
+
? `Clear winner [${strategy.name}]: "${top.id}" leads clearly (runner-up score unavailable). A clear lead is a similarity PROXY, not a proven cause — confirm by ablation.`
|
|
137
|
+
: `Clear winner [${strategy.name}]: "${top.id}" leads (top-2 margin ${gap}). A clear lead is a similarity PROXY, not a proven cause — confirm by ablation.`
|
|
138
|
+
: `Too close to call [${strategy.name}]: top-2 margin ${gap} — no suspect stands out by output similarity. Double-check the ${shortlist.length} shortlisted suspect(s) by ABLATION. Similarity scoring is blind to absence/crowding bugs (history truncation, context dilution), where the culprit need not resemble the answer; a flat top can also mean genuinely co-equal sources.`;
|
|
139
|
+
return { clearWinner, margin, lead: top.id, shortlist, reason };
|
|
140
|
+
}
|
|
141
|
+
exports.rankingConfidence = rankingConfidence;
|
|
142
|
+
//# sourceMappingURL=attributability.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"attributability.js","sourceRoot":"","sources":["../../../src/lib/influence-core/attributability.ts"],"names":[],"mappings":";;;AA2BA,yCAA6G;AAE7G,MAAM,WAAW,GAAG,CAAC,KAAa,EAAE,CAAS,EAAU,EAAE;IACvD,+EAA+E;IAC/E,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,GAAG,KAAK,sBAAsB,CAAC,GAAG,CAAC,CAAC;IACnE,OAAO,CAAC,CAAC;AACX,CAAC,CAAC;AAEF;;;;GAIG;AACH,SAAgB,cAAc,CAAC,YAAoB,sCAA2B;IAC5E,WAAW,CAAC,2BAA2B,EAAE,SAAS,CAAC,CAAC;IACpD,OAAO;QACL,IAAI,EAAE,WAAW,SAAS,EAAE;QAC5B,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,SAAS;KAChE,CAAC;AACJ,CAAC;AAND,wCAMC;AAED;;;;;GAKG;AACH,SAAgB,aAAa,CAAC,YAAoB,qCAA0B;IAC1E,WAAW,CAAC,0BAA0B,EAAE,SAAS,CAAC,CAAC;IACnD,OAAO;QACL,IAAI,EAAE,UAAU,SAAS,EAAE;QAC3B,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE;YACnB,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;YAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7B,IAAI,KAAK,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC,CAAC,qDAAqD;YACpF,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,IAAI,SAAS,CAAC;QAC5C,CAAC;KACF,CAAC;AACJ,CAAC;AAXD,sCAWC;AAoBD;;mFAEmF;AACnF,MAAM,WAAW,GAAG,CAAC,CAAiB,EAAU,EAAE,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;AAEpG;+EAC+E;AAC/E,MAAM,WAAW,GAAG,CAAC,CAAiB,EAAE,CAAiB,EAAU,EAAE;IACnE,MAAM,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IACzB,MAAM,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IACzB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACpC,CAAC,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,SAAgB,iBAAiB,CAC/B,MAAiC,EACjC,UAAoC,EAAE;IAEtC,6EAA6E;IAC7E,uCAAuC;IACvC,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,cAAc,CAAC,OAAO,CAAC,iBAAiB,IAAI,sCAA2B,CAAC,CAAC;IAC9G,MAAM,aAAa,GAAG,WAAW,CAAC,kCAAkC,EAAE,OAAO,CAAC,aAAa,IAAI,iCAAsB,CAAC,CAAC;IAEvH,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,EAAE,EAAE,MAAM,EAAE,sBAAsB,EAAE,CAAC;IACnH,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACtB,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAElC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO;YACL,WAAW,EAAE,IAAI;YACjB,MAAM,EAAE,SAAS;YACjB,IAAI,EAAE,GAAG,CAAC,EAAE;YACZ,SAAS,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC;YACnB,MAAM,EAAE,qBAAqB,GAAG,CAAC,EAAE,4FAA4F;SAChI,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3C,0EAA0E;IAC1E,uBAAuB;IACvB,6EAA6E;IAC7E,6EAA6E;IAC7E,0EAA0E;IAC1E,2EAA2E;IAC3E,IAAI,WAAoB,CAAC;IACzB,IAAI,MAA0B,CAAC;IAC/B,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC/B,WAAW,GAAG,KAAK,CAAC;QACpB,MAAM,GAAG,SAAS,CAAC;IACrB,CAAC;SAAM,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QACzC,WAAW,GAAG,IAAI,CAAC;QACnB,MAAM,GAAG,SAAS,CAAC;IACrB,CAAC;SAAM,CAAC;QACN,MAAM,GAAG,QAAQ,GAAG,WAAW,CAAC;QAChC,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/E,WAAW,GAAG,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;IACrD,CAAC;IAED,8EAA8E;IAC9E,2EAA2E;IAC3E,yDAAyD;IACzD,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,GAAG,GAAG,CAAC,EAAU,EAAE,EAAE;QACzB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YAClB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACb,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrB,CAAC;IACH,CAAC,CAAC;IACF,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC9B,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;YACvB,MAAM,EAAE,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;YAC1B,IAAI,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,QAAQ,GAAG,EAAE,IAAI,aAAa;gBAAE,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;IACD,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,0CAA0C;IACvD,IAAI,CAAC,WAAW;QAAE,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,4DAA4D;IAEjG,MAAM,GAAG,GAAG,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAC7D,MAAM,MAAM,GAAG,WAAW;QACxB,CAAC,CAAC,MAAM,KAAK,SAAS;YACpB,CAAC,CAAC,iBAAiB,QAAQ,CAAC,IAAI,OAAO,GAAG,CAAC,EAAE,8HAA8H;YAC3K,CAAC,CAAC,iBAAiB,QAAQ,CAAC,IAAI,OAAO,GAAG,CAAC,EAAE,yBAAyB,GAAG,kFAAkF;QAC7J,CAAC,CAAC,sBAAsB,QAAQ,CAAC,IAAI,mBAAmB,GAAG,mEAAmE,SAAS,CAAC,MAAM,wOAAwO,CAAC;IAEzX,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC;AAClE,CAAC;AA7ED,8CA6EC"}
|
|
@@ -26,11 +26,18 @@
|
|
|
26
26
|
* and never causal attribution.
|
|
27
27
|
*/
|
|
28
28
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
-
exports.scoreMargin = exports.pairwiseSimilarity = exports.structuralProximity = exports.scoreInfluence = exports.persistence = exports.finalAnswerSimilarity = exports.compositeScore = exports.averageRelevancy = exports.adaptWeights = exports.embeddingCache = exports.EmbeddingCache = exports.contentHash = exports.DEFAULT_PERSISTENCE_THRESHOLD = exports.DEFAULT_MARGIN_THRESHOLD = exports.DEFAULT_INFLUENCE_WEIGHTS = void 0;
|
|
29
|
+
exports.scoreMargin = exports.pairwiseSimilarity = exports.structuralProximity = exports.scoreInfluence = exports.persistence = exports.finalAnswerSimilarity = exports.compositeScore = exports.averageRelevancy = exports.adaptWeights = exports.embeddingCache = exports.EmbeddingCache = exports.contentHash = exports.ratioStrategy = exports.rankingConfidence = exports.marginStrategy = exports.DEFAULT_SHORTLIST_BAND = exports.DEFAULT_PERSISTENCE_THRESHOLD = exports.DEFAULT_MARGIN_THRESHOLD = exports.DEFAULT_INFLUENCE_WEIGHTS = exports.DEFAULT_CLEAR_WINNER_RATIO = exports.DEFAULT_CLEAR_WINNER_MARGIN = void 0;
|
|
30
30
|
var types_js_1 = require("./types.js");
|
|
31
|
+
Object.defineProperty(exports, "DEFAULT_CLEAR_WINNER_MARGIN", { enumerable: true, get: function () { return types_js_1.DEFAULT_CLEAR_WINNER_MARGIN; } });
|
|
32
|
+
Object.defineProperty(exports, "DEFAULT_CLEAR_WINNER_RATIO", { enumerable: true, get: function () { return types_js_1.DEFAULT_CLEAR_WINNER_RATIO; } });
|
|
31
33
|
Object.defineProperty(exports, "DEFAULT_INFLUENCE_WEIGHTS", { enumerable: true, get: function () { return types_js_1.DEFAULT_INFLUENCE_WEIGHTS; } });
|
|
32
34
|
Object.defineProperty(exports, "DEFAULT_MARGIN_THRESHOLD", { enumerable: true, get: function () { return types_js_1.DEFAULT_MARGIN_THRESHOLD; } });
|
|
33
35
|
Object.defineProperty(exports, "DEFAULT_PERSISTENCE_THRESHOLD", { enumerable: true, get: function () { return types_js_1.DEFAULT_PERSISTENCE_THRESHOLD; } });
|
|
36
|
+
Object.defineProperty(exports, "DEFAULT_SHORTLIST_BAND", { enumerable: true, get: function () { return types_js_1.DEFAULT_SHORTLIST_BAND; } });
|
|
37
|
+
var attributability_js_1 = require("./attributability.js");
|
|
38
|
+
Object.defineProperty(exports, "marginStrategy", { enumerable: true, get: function () { return attributability_js_1.marginStrategy; } });
|
|
39
|
+
Object.defineProperty(exports, "rankingConfidence", { enumerable: true, get: function () { return attributability_js_1.rankingConfidence; } });
|
|
40
|
+
Object.defineProperty(exports, "ratioStrategy", { enumerable: true, get: function () { return attributability_js_1.ratioStrategy; } });
|
|
34
41
|
var cache_js_1 = require("./cache.js");
|
|
35
42
|
Object.defineProperty(exports, "contentHash", { enumerable: true, get: function () { return cache_js_1.contentHash; } });
|
|
36
43
|
Object.defineProperty(exports, "EmbeddingCache", { enumerable: true, get: function () { return cache_js_1.EmbeddingCache; } });
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/influence-core/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;;;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/lib/influence-core/index.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;;;AAmBH,uCAOoB;AANlB,uHAAA,2BAA2B,OAAA;AAC3B,sHAAA,0BAA0B,OAAA;AAC1B,qHAAA,yBAAyB,OAAA;AACzB,oHAAA,wBAAwB,OAAA;AACxB,yHAAA,6BAA6B,OAAA;AAC7B,kHAAA,sBAAsB,OAAA;AAGxB,2DAK8B;AAJ5B,oHAAA,cAAc,OAAA;AACd,uHAAA,iBAAiB,OAAA;AACjB,mHAAA,aAAa,OAAA;AAIf,uCAMoB;AALlB,uGAAA,WAAW,OAAA;AACX,0GAAA,cAAc,OAAA;AACd,0GAAA,cAAc,OAAA;AAKhB,2CASsB;AARpB,0GAAA,YAAY,OAAA;AACZ,8GAAA,gBAAgB,OAAA;AAChB,4GAAA,cAAc,OAAA;AACd,mHAAA,qBAAqB,OAAA;AACrB,yGAAA,WAAW,OAAA;AACX,4GAAA,cAAc,OAAA;AACd,iHAAA,mBAAmB,OAAA;AAIrB,iDAAkF;AAAzE,mHAAA,kBAAkB,OAAA;AAE3B,yCAAgE;AAAvD,wGAAA,WAAW,OAAA"}
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
* live.
|
|
24
24
|
*/
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.DEFAULT_MARGIN_THRESHOLD = exports.DEFAULT_PERSISTENCE_THRESHOLD = exports.DEFAULT_INFLUENCE_WEIGHTS = void 0;
|
|
26
|
+
exports.DEFAULT_CLEAR_WINNER_RATIO = exports.DEFAULT_SHORTLIST_BAND = exports.DEFAULT_CLEAR_WINNER_MARGIN = exports.DEFAULT_MARGIN_THRESHOLD = exports.DEFAULT_PERSISTENCE_THRESHOLD = exports.DEFAULT_INFLUENCE_WEIGHTS = void 0;
|
|
27
27
|
/** Paper defaults: α=0.40, β=0.30, γ=0.20, δ=0.10 (sum to 1.0). */
|
|
28
28
|
exports.DEFAULT_INFLUENCE_WEIGHTS = Object.freeze({
|
|
29
29
|
fa: 0.4,
|
|
@@ -35,4 +35,31 @@ exports.DEFAULT_INFLUENCE_WEIGHTS = Object.freeze({
|
|
|
35
35
|
exports.DEFAULT_PERSISTENCE_THRESHOLD = 0.3;
|
|
36
36
|
/** RFC-002 §4 default: margins below this flag the choice as `narrow`. */
|
|
37
37
|
exports.DEFAULT_MARGIN_THRESHOLD = 0.05;
|
|
38
|
+
/**
|
|
39
|
+
* RFC-003 default: an influence ranking whose top-1 vs top-2 score margin is
|
|
40
|
+
* below this has NO clear winner — a shortlist, not a verdict. Escalate to
|
|
41
|
+
* ablation.
|
|
42
|
+
*
|
|
43
|
+
* UNCALIBRATED proxy starting point, chosen for interpretability. `margin`
|
|
44
|
+
* is an ABSOLUTE difference on the same scale as `scoreInfluence`'s composite
|
|
45
|
+
* (S ∈ ≈[−0.7, 1]), so this threshold is EMBEDDER-RELATIVE — recalibrate by
|
|
46
|
+
* sweeping clear-winner vs flat rankings on your embedder. The numeric
|
|
47
|
+
* coincidence with `DEFAULT_MARGIN_THRESHOLD` is NOT a shared derivation: that
|
|
48
|
+
* one measures `scoreMargin`'s chosen-vs-not-chosen distribution, a different
|
|
49
|
+
* statistic.
|
|
50
|
+
*/
|
|
51
|
+
exports.DEFAULT_CLEAR_WINNER_MARGIN = 0.05;
|
|
52
|
+
/**
|
|
53
|
+
* RFC-003 default: when there is no clear winner, suspects scoring within this
|
|
54
|
+
* band of the top form the shortlist ablation should COVER (the culprit may be
|
|
55
|
+
* any of them — or, for absence bugs, none). UNCALIBRATED proxy; embedder-
|
|
56
|
+
* relative (see `DEFAULT_CLEAR_WINNER_MARGIN`).
|
|
57
|
+
*/
|
|
58
|
+
exports.DEFAULT_SHORTLIST_BAND = 0.1;
|
|
59
|
+
/**
|
|
60
|
+
* RFC-003 default for `ratioStrategy`: the top-2 gap as a FRACTION of the top
|
|
61
|
+
* score `(s0 − s1) / |s0|`. Unlike the absolute margin this is scale-invariant,
|
|
62
|
+
* so it transfers across embedders / answer lengths. UNCALIBRATED proxy.
|
|
63
|
+
*/
|
|
64
|
+
exports.DEFAULT_CLEAR_WINNER_RATIO = 0.05;
|
|
38
65
|
//# sourceMappingURL=types.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/influence-core/types.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;;;AA0BH,mEAAmE;AACtD,QAAA,yBAAyB,GAAqB,MAAM,CAAC,MAAM,CAAC;IACvE,EAAE,EAAE,GAAG;IACP,GAAG,EAAE,GAAG;IACR,OAAO,EAAE,GAAG;IACZ,KAAK,EAAE,GAAG;CACX,CAAC,CAAC;AAEH,yDAAyD;AAC5C,QAAA,6BAA6B,GAAG,GAAG,CAAC;AAEjD,0EAA0E;AAC7D,QAAA,wBAAwB,GAAG,IAAI,CAAC"}
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/influence-core/types.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;;;AA0BH,mEAAmE;AACtD,QAAA,yBAAyB,GAAqB,MAAM,CAAC,MAAM,CAAC;IACvE,EAAE,EAAE,GAAG;IACP,GAAG,EAAE,GAAG;IACR,OAAO,EAAE,GAAG;IACZ,KAAK,EAAE,GAAG;CACX,CAAC,CAAC;AAEH,yDAAyD;AAC5C,QAAA,6BAA6B,GAAG,GAAG,CAAC;AAEjD,0EAA0E;AAC7D,QAAA,wBAAwB,GAAG,IAAI,CAAC;AAE7C;;;;;;;;;;;;GAYG;AACU,QAAA,2BAA2B,GAAG,IAAI,CAAC;AAEhD;;;;;GAKG;AACU,QAAA,sBAAsB,GAAG,GAAG,CAAC;AAE1C;;;;GAIG;AACU,QAAA,0BAA0B,GAAG,IAAI,CAAC"}
|
package/dist/observe.js
CHANGED
|
@@ -32,8 +32,8 @@
|
|
|
32
32
|
* directly; Tier 3 dashboards are opt-in.
|
|
33
33
|
*/
|
|
34
34
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
35
|
-
exports.
|
|
36
|
-
exports.toolChoiceRecorder = exports.buildChoiceContext = exports.saysWhatNotWhenRule = exports.runToolLintCli = exports.optionalParamRule = exports.MOCK_EMBEDDER_CALIBRATION = exports.formatToolCatalogReport = exports.enumInProseRule = exports.differentiationHint = exports.descriptionRule = exports.defaultStructuralRules = exports.DEFAULT_WHEN_CUES = exports.DEFAULT_WATCH_BAND = exports.DEFAULT_OMISSION_CUES = exports.DEFAULT_CONFUSABILITY_THRESHOLD = exports.confusabilityText = exports.coerceCatalog = exports.catalogFromTools = exports.analyzeToolCatalog = exports.toBacktrackTrace = exports.verdictFor = exports.suspectLabel = exports.stepOutputText = exports.runAblationProbe = exports.probeFlipped = exports.localizeContextBug = exports.llmEdgeWeigher = exports.llmCallIdsFromEvents = exports.formatContextBugReport = exports.defaultSuspectClassifier = exports.defaultOutcomeComparator = exports.CONTEXT_BISECT_DEFAULTS = exports.bisectCulprits = exports.applyAblations = exports.ablationForSuspect = exports.traceDebugAgent = exports.SelfExplainBinding = exports.buildSelfExplainToolProvider = void 0;
|
|
35
|
+
exports.structuralProximity = exports.scoreMargin = exports.scoreInfluence = exports.ratioStrategy = exports.rankingConfidence = exports.persistence = exports.pairwiseSimilarity = exports.marginStrategy = exports.finalAnswerSimilarity = exports.embeddingCache = exports.EmbeddingCache = exports.DEFAULT_SHORTLIST_BAND = exports.DEFAULT_PERSISTENCE_THRESHOLD = exports.DEFAULT_MARGIN_THRESHOLD = exports.DEFAULT_INFLUENCE_WEIGHTS = exports.DEFAULT_CLEAR_WINNER_RATIO = exports.DEFAULT_CLEAR_WINNER_MARGIN = exports.contentHash = exports.compositeScore = exports.averageRelevancy = exports.adaptWeights = exports.typedEmit = exports.agentThinkingTrace = exports.toolLineageRecorder = exports.attachStatus = exports.LoggingDomains = exports.attachLogging = exports.skillRecorder = exports.permissionRecorder = exports.memoryRecorder = exports.evalRecorder = exports.contextEvaluatedRecorder = exports.toolsRecorder = exports.costRecorder = exports.LiveAgentTurnTracker = exports.LiveToolTracker = exports.LiveLLMTracker = exports.LiveStateRecorder = exports.liveStateRecorder = exports.buildStepGraph = exports.attachFlowchart = exports.runStepRecorder = exports.RunStepRecorder = exports.buildRunSteps = exports.BoundaryRecorder = exports.boundaryRecorder = exports.agentRecorder = exports.compositionRecorder = exports.streamRecorder = exports.ContextRecorder = void 0;
|
|
36
|
+
exports.toolChoiceRecorder = exports.buildChoiceContext = exports.saysWhatNotWhenRule = exports.runToolLintCli = exports.optionalParamRule = exports.MOCK_EMBEDDER_CALIBRATION = exports.formatToolCatalogReport = exports.enumInProseRule = exports.differentiationHint = exports.descriptionRule = exports.defaultStructuralRules = exports.DEFAULT_WHEN_CUES = exports.DEFAULT_WATCH_BAND = exports.DEFAULT_OMISSION_CUES = exports.DEFAULT_CONFUSABILITY_THRESHOLD = exports.confusabilityText = exports.coerceCatalog = exports.catalogFromTools = exports.analyzeToolCatalog = exports.toBacktrackTrace = exports.verdictFor = exports.suspectLabel = exports.stepOutputText = exports.runRestorationProbe = exports.runAblationProbe = exports.probeFlipped = exports.localizeContextBug = exports.llmEdgeWeigher = exports.llmCallIdsFromEvents = exports.formatContextBugReport = exports.findDroppedContext = exports.defaultSuspectClassifier = exports.defaultOutcomeComparator = exports.CONTEXT_BISECT_DEFAULTS = exports.bisectCulprits = exports.applyAblations = exports.ablationForSuspect = exports.traceDebugAgent = exports.SelfExplainBinding = exports.buildSelfExplainToolProvider = exports.buildSelfExplainSkill = exports.traceToolpack = exports.TOOLPACK_HARD_CAPS = exports.NO_COMPLETED_RUN_MESSAGE = exports.lazyTraceToolpack = exports.callTraceTool = void 0;
|
|
37
37
|
// Tier 1 — context + stream
|
|
38
38
|
var ContextRecorder_js_1 = require("./recorders/core/ContextRecorder.js");
|
|
39
39
|
Object.defineProperty(exports, "ContextRecorder", { enumerable: true, get: function () { return ContextRecorder_js_1.ContextRecorder; } });
|
|
@@ -103,14 +103,20 @@ Object.defineProperty(exports, "adaptWeights", { enumerable: true, get: function
|
|
|
103
103
|
Object.defineProperty(exports, "averageRelevancy", { enumerable: true, get: function () { return index_js_1.averageRelevancy; } });
|
|
104
104
|
Object.defineProperty(exports, "compositeScore", { enumerable: true, get: function () { return index_js_1.compositeScore; } });
|
|
105
105
|
Object.defineProperty(exports, "contentHash", { enumerable: true, get: function () { return index_js_1.contentHash; } });
|
|
106
|
+
Object.defineProperty(exports, "DEFAULT_CLEAR_WINNER_MARGIN", { enumerable: true, get: function () { return index_js_1.DEFAULT_CLEAR_WINNER_MARGIN; } });
|
|
107
|
+
Object.defineProperty(exports, "DEFAULT_CLEAR_WINNER_RATIO", { enumerable: true, get: function () { return index_js_1.DEFAULT_CLEAR_WINNER_RATIO; } });
|
|
106
108
|
Object.defineProperty(exports, "DEFAULT_INFLUENCE_WEIGHTS", { enumerable: true, get: function () { return index_js_1.DEFAULT_INFLUENCE_WEIGHTS; } });
|
|
107
109
|
Object.defineProperty(exports, "DEFAULT_MARGIN_THRESHOLD", { enumerable: true, get: function () { return index_js_1.DEFAULT_MARGIN_THRESHOLD; } });
|
|
108
110
|
Object.defineProperty(exports, "DEFAULT_PERSISTENCE_THRESHOLD", { enumerable: true, get: function () { return index_js_1.DEFAULT_PERSISTENCE_THRESHOLD; } });
|
|
111
|
+
Object.defineProperty(exports, "DEFAULT_SHORTLIST_BAND", { enumerable: true, get: function () { return index_js_1.DEFAULT_SHORTLIST_BAND; } });
|
|
109
112
|
Object.defineProperty(exports, "EmbeddingCache", { enumerable: true, get: function () { return index_js_1.EmbeddingCache; } });
|
|
110
113
|
Object.defineProperty(exports, "embeddingCache", { enumerable: true, get: function () { return index_js_1.embeddingCache; } });
|
|
111
114
|
Object.defineProperty(exports, "finalAnswerSimilarity", { enumerable: true, get: function () { return index_js_1.finalAnswerSimilarity; } });
|
|
115
|
+
Object.defineProperty(exports, "marginStrategy", { enumerable: true, get: function () { return index_js_1.marginStrategy; } });
|
|
112
116
|
Object.defineProperty(exports, "pairwiseSimilarity", { enumerable: true, get: function () { return index_js_1.pairwiseSimilarity; } });
|
|
113
117
|
Object.defineProperty(exports, "persistence", { enumerable: true, get: function () { return index_js_1.persistence; } });
|
|
118
|
+
Object.defineProperty(exports, "rankingConfidence", { enumerable: true, get: function () { return index_js_1.rankingConfidence; } });
|
|
119
|
+
Object.defineProperty(exports, "ratioStrategy", { enumerable: true, get: function () { return index_js_1.ratioStrategy; } });
|
|
114
120
|
Object.defineProperty(exports, "scoreInfluence", { enumerable: true, get: function () { return index_js_1.scoreInfluence; } });
|
|
115
121
|
Object.defineProperty(exports, "scoreMargin", { enumerable: true, get: function () { return index_js_1.scoreMargin; } });
|
|
116
122
|
Object.defineProperty(exports, "structuralProximity", { enumerable: true, get: function () { return index_js_1.structuralProximity; } });
|
|
@@ -145,12 +151,14 @@ Object.defineProperty(exports, "bisectCulprits", { enumerable: true, get: functi
|
|
|
145
151
|
Object.defineProperty(exports, "CONTEXT_BISECT_DEFAULTS", { enumerable: true, get: function () { return index_js_4.CONTEXT_BISECT_DEFAULTS; } });
|
|
146
152
|
Object.defineProperty(exports, "defaultOutcomeComparator", { enumerable: true, get: function () { return index_js_4.defaultOutcomeComparator; } });
|
|
147
153
|
Object.defineProperty(exports, "defaultSuspectClassifier", { enumerable: true, get: function () { return index_js_4.defaultSuspectClassifier; } });
|
|
154
|
+
Object.defineProperty(exports, "findDroppedContext", { enumerable: true, get: function () { return index_js_4.findDroppedContext; } });
|
|
148
155
|
Object.defineProperty(exports, "formatContextBugReport", { enumerable: true, get: function () { return index_js_4.formatContextBugReport; } });
|
|
149
156
|
Object.defineProperty(exports, "llmCallIdsFromEvents", { enumerable: true, get: function () { return index_js_4.llmCallIdsFromEvents; } });
|
|
150
157
|
Object.defineProperty(exports, "llmEdgeWeigher", { enumerable: true, get: function () { return index_js_4.llmEdgeWeigher; } });
|
|
151
158
|
Object.defineProperty(exports, "localizeContextBug", { enumerable: true, get: function () { return index_js_4.localizeContextBug; } });
|
|
152
159
|
Object.defineProperty(exports, "probeFlipped", { enumerable: true, get: function () { return index_js_4.probeFlipped; } });
|
|
153
160
|
Object.defineProperty(exports, "runAblationProbe", { enumerable: true, get: function () { return index_js_4.runAblationProbe; } });
|
|
161
|
+
Object.defineProperty(exports, "runRestorationProbe", { enumerable: true, get: function () { return index_js_4.runRestorationProbe; } });
|
|
154
162
|
Object.defineProperty(exports, "stepOutputText", { enumerable: true, get: function () { return index_js_4.stepOutputText; } });
|
|
155
163
|
Object.defineProperty(exports, "suspectLabel", { enumerable: true, get: function () { return index_js_4.suspectLabel; } });
|
|
156
164
|
Object.defineProperty(exports, "verdictFor", { enumerable: true, get: function () { return index_js_4.verdictFor; } });
|
package/dist/observe.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"observe.js","sourceRoot":"","sources":["../src/observe.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;;;;AAEH,4BAA4B;AAC5B,0EAAmG;AAA1F,qHAAA,eAAe,OAAA;AACxB,wEAAgG;AAAvF,mHAAA,cAAc,OAAA;AAEvB,+BAA+B;AAC/B,kFAGiD;AAF/C,6HAAA,mBAAmB,OAAA;AAGrB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,qFAkBuD;AAjBrD,uHAAA,gBAAgB,OAAA;AAChB,uHAAA,gBAAgB,OAAA;AAiBlB,mFAWsD;AAVpD,mHAAA,aAAa,OAAA;AACb,qHAAA,eAAe,OAAA;AACf,qHAAA,eAAe,OAAA;AASjB,uFAUwD;AATtD,uHAAA,eAAe,OAAA;AACf,sHAAA,cAAc,OAAA;AAShB,uFAUwD;AATtD,yHAAA,iBAAiB,OAAA;AACjB,yHAAA,iBAAiB,OAAA;AACjB,sHAAA,cAAc,OAAA;AACd,uHAAA,eAAe,OAAA;AACf,4HAAA,oBAAoB,OAAA;AAOtB,6BAA6B;AAC7B,oEAA0F;AAAjF,+GAAA,YAAY,OAAA;AACrB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,4FAGsD;AAFpD,uIAAA,wBAAwB,OAAA;AAG1B,oEAA0F;AAAjF,+GAAA,YAAY,OAAA;AACrB,wEAAgG;AAAvF,mHAAA,cAAc,OAAA;AACvB,gFAGgD;AAF9C,2HAAA,kBAAkB,OAAA;AAGpB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,mFAMsD;AALpD,mHAAA,aAAa,OAAA;AACb,oHAAA,cAAc,OAAA;AAKhB,iFAIqD;AAHnD,iHAAA,YAAY,OAAA;AAId,4EAA4E;AAC5E,gFAAgF;AAChF,2FAO0D;AANxD,6HAAA,mBAAmB,OAAA;AAOrB,gFAAgF;AAChF,gFAAgF;AAChF,yGASiE;AAR/D,mIAAA,kBAAkB,OAAA;AAUpB,uDAAuD;AACvD,8DAA0D;AAAjD,yGAAA,SAAS,OAAA;AAElB,uEAAuE;AACvE,uEAAuE;AACvE,uEAAuE;AACvE,oEAAoE;AACpE,oEAAoE;AACpE,kEAAkE;AAClE,eAAe;AACf,
|
|
1
|
+
{"version":3,"file":"observe.js","sourceRoot":"","sources":["../src/observe.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;;;;AAEH,4BAA4B;AAC5B,0EAAmG;AAA1F,qHAAA,eAAe,OAAA;AACxB,wEAAgG;AAAvF,mHAAA,cAAc,OAAA;AAEvB,+BAA+B;AAC/B,kFAGiD;AAF/C,6HAAA,mBAAmB,OAAA;AAGrB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,qFAkBuD;AAjBrD,uHAAA,gBAAgB,OAAA;AAChB,uHAAA,gBAAgB,OAAA;AAiBlB,mFAWsD;AAVpD,mHAAA,aAAa,OAAA;AACb,qHAAA,eAAe,OAAA;AACf,qHAAA,eAAe,OAAA;AASjB,uFAUwD;AATtD,uHAAA,eAAe,OAAA;AACf,sHAAA,cAAc,OAAA;AAShB,uFAUwD;AATtD,yHAAA,iBAAiB,OAAA;AACjB,yHAAA,iBAAiB,OAAA;AACjB,sHAAA,cAAc,OAAA;AACd,uHAAA,eAAe,OAAA;AACf,4HAAA,oBAAoB,OAAA;AAOtB,6BAA6B;AAC7B,oEAA0F;AAAjF,+GAAA,YAAY,OAAA;AACrB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,4FAGsD;AAFpD,uIAAA,wBAAwB,OAAA;AAG1B,oEAA0F;AAAjF,+GAAA,YAAY,OAAA;AACrB,wEAAgG;AAAvF,mHAAA,cAAc,OAAA;AACvB,gFAGgD;AAF9C,2HAAA,kBAAkB,OAAA;AAGpB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,mFAMsD;AALpD,mHAAA,aAAa,OAAA;AACb,oHAAA,cAAc,OAAA;AAKhB,iFAIqD;AAHnD,iHAAA,YAAY,OAAA;AAId,4EAA4E;AAC5E,gFAAgF;AAChF,2FAO0D;AANxD,6HAAA,mBAAmB,OAAA;AAOrB,gFAAgF;AAChF,gFAAgF;AAChF,yGASiE;AAR/D,mIAAA,kBAAkB,OAAA;AAUpB,uDAAuD;AACvD,8DAA0D;AAAjD,yGAAA,SAAS,OAAA;AAElB,uEAAuE;AACvE,uEAAuE;AACvE,uEAAuE;AACvE,oEAAoE;AACpE,oEAAoE;AACpE,kEAAkE;AAClE,eAAe;AACf,0DAyCuC;AAxCrC,wGAAA,YAAY,OAAA;AACZ,4GAAA,gBAAgB,OAAA;AAChB,0GAAA,cAAc,OAAA;AACd,uGAAA,WAAW,OAAA;AACX,uHAAA,2BAA2B,OAAA;AAC3B,sHAAA,0BAA0B,OAAA;AAC1B,qHAAA,yBAAyB,OAAA;AACzB,oHAAA,wBAAwB,OAAA;AACxB,yHAAA,6BAA6B,OAAA;AAC7B,kHAAA,sBAAsB,OAAA;AACtB,0GAAA,cAAc,OAAA;AACd,0GAAA,cAAc,OAAA;AACd,iHAAA,qBAAqB,OAAA;AACrB,0GAAA,cAAc,OAAA;AACd,8GAAA,kBAAkB,OAAA;AAClB,uGAAA,WAAW,OAAA;AACX,6GAAA,iBAAiB,OAAA;AACjB,yGAAA,aAAa,OAAA;AACb,0GAAA,cAAc,OAAA;AACd,uGAAA,WAAW,OAAA;AACX,+GAAA,mBAAmB,OAAA;AAqBrB,uEAAuE;AACvE,2EAA2E;AAC3E,mEAAmE;AACnE,0DAQuC;AAPrC,yGAAA,aAAa,OAAA;AACb,6GAAA,iBAAiB,OAAA;AACjB,oHAAA,wBAAwB,OAAA;AACxB,8GAAA,kBAAkB,OAAA;AAClB,yGAAA,aAAa,OAAA;AAIf,uEAAuE;AACvE,yEAAyE;AACzE,0EAA0E;AAC1E,kEAAkE;AAClE,0DAOuC;AANrC,iHAAA,qBAAqB,OAAA;AACrB,wHAAA,4BAA4B,OAAA;AAC5B,8GAAA,kBAAkB,OAAA;AAClB,2GAAA,eAAe,OAAA;AAIjB,qEAAqE;AACrE,sEAAsE;AACtE,sEAAsE;AACtE,+DAA+D;AAC/D,oEAAoE;AACpE,mEAAmE;AACnE,0DAuDuC;AAtDrC,8GAAA,kBAAkB,OAAA;AAClB,0GAAA,cAAc,OAAA;AACd,0GAAA,cAAc,OAAA;AACd,mHAAA,uBAAuB,OAAA;AACvB,oHAAA,wBAAwB,OAAA;AACxB,oHAAA,wBAAwB,OAAA;AACxB,8GAAA,kBAAkB,OAAA;AAClB,kHAAA,sBAAsB,OAAA;AACtB,gHAAA,oBAAoB,OAAA;AACpB,0GAAA,cAAc,OAAA;AACd,8GAAA,kBAAkB,OAAA;AAClB,wGAAA,YAAY,OAAA;AACZ,4GAAA,gBAAgB,OAAA;AAChB,+GAAA,mBAAmB,OAAA;AACnB,0GAAA,cAAc,OAAA;AACd,wGAAA,YAAY,OAAA;AACZ,sGAAA,UAAU,OAAA;AAuCZ,sEAAsE;AACtE,uEAAuE;AACvE,mEAAmE;AACnE,0DAQuC;AAPrC,4GAAA,gBAAgB,OAAA;AAQlB,wEAAwE;AACxE,8EAA8E;AAC9E,qEAAqE;AACrE,wEAAwE;AACxE,uEAAuE;AACvE,gEAAgE;AAChE,gDAAgD;AAChD,qDAgCkC;AA/BhC,8GAAA,kBAAkB,OAAA;AAClB,4GAAA,gBAAgB,OAAA;AAChB,yGAAA,aAAa,OAAA;AACb,6GAAA,iBAAiB,OAAA;AACjB,2HAAA,+BAA+B,OAAA;AAC/B,iHAAA,qBAAqB,OAAA;AACrB,8GAAA,kBAAkB,OAAA;AAClB,6GAAA,iBAAiB,OAAA;AACjB,kHAAA,sBAAsB,OAAA;AACtB,2GAAA,eAAe,OAAA;AACf,+GAAA,mBAAmB,OAAA;AACnB,2GAAA,eAAe,OAAA;AACf,mHAAA,uBAAuB,OAAA;AACvB,qHAAA,yBAAyB,OAAA;AACzB,6GAAA,iBAAiB,OAAA;AACjB,0GAAA,cAAc,OAAA;AACd,+GAAA,mBAAmB,OAAA;AAgBrB,sEAAsE;AACtE,uEAAuE;AACvE,qEAAqE;AACrE,oEAAoE;AACpE,yFASyD;AARvD,2HAAA,kBAAkB,OAAA;AAClB,2HAAA,kBAAkB,OAAA"}
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
* score elsewhere is a correlational proxy.
|
|
26
26
|
*/
|
|
27
27
|
import type { Embedder } from '../influence-core/index.js';
|
|
28
|
-
import type { AblationRerun, AblationRunStats, AblationSpec, AblationVerdict, OutcomeComparator, Suspect } from './types.js';
|
|
28
|
+
import type { AblationRerun, AblationRunStats, AblationSpec, AblationVerdict, OutcomeComparator, SimilarityStats, Suspect } from './types.js';
|
|
29
29
|
/**
|
|
30
30
|
* The spec that removes one suspect — or `undefined` for kind `'stage'`
|
|
31
31
|
* (plain pipeline stages have no removable input; re-rank or refactor).
|
|
@@ -69,6 +69,10 @@ export declare function applyAblations<TTool extends NamedTool, TInjection exten
|
|
|
69
69
|
injections: readonly TInjection[];
|
|
70
70
|
memoryEntries: readonly TMemoryEntry[];
|
|
71
71
|
};
|
|
72
|
+
/** Resolve the seeded-rerun count: default on non-finite, floor, clamp to >= 2
|
|
73
|
+
* (no single-run verdicts — D9). Shared by the ablation + restoration probes. */
|
|
74
|
+
export declare function resolveSamples(samples: number | undefined): number;
|
|
75
|
+
export declare function similarityStats(values: readonly number[]): SimilarityStats;
|
|
72
76
|
/** The default comparator: embedding similarity below the threshold. */
|
|
73
77
|
export declare function defaultOutcomeComparator(embedder: Embedder, flipThreshold: number): OutcomeComparator;
|
|
74
78
|
/** Resolved probe configuration shared by D8 and D9. */
|
|
@@ -92,6 +96,10 @@ export declare function probeFlipped(stats: AblationRunStats): boolean;
|
|
|
92
96
|
* forces `'inconclusive'`: no ablation verdict is trustworthy on an
|
|
93
97
|
* unstable baseline.
|
|
94
98
|
*/
|
|
95
|
-
export declare function verdictFor(label: string, stats: AblationRunStats, baselineStable: boolean
|
|
99
|
+
export declare function verdictFor(label: string, stats: AblationRunStats, baselineStable: boolean,
|
|
100
|
+
/** The counterfactual intervention. `'ablating'` (default) for present
|
|
101
|
+
* suspects; `'restoring'` for missing-context candidates (interface #3).
|
|
102
|
+
* Default keeps every claim string byte-identical to before. */
|
|
103
|
+
action?: 'ablating' | 'restoring'): AblationVerdict;
|
|
96
104
|
export {};
|
|
97
105
|
//# sourceMappingURL=ablation.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ablation.d.ts","sourceRoot":"","sources":["../../../../src/lib/context-bisect/ablation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAGH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,KAAK,EACV,aAAa,EACb,gBAAgB,EAChB,YAAY,EACZ,eAAe,EACf,iBAAiB,
|
|
1
|
+
{"version":3,"file":"ablation.d.ts","sourceRoot":"","sources":["../../../../src/lib/context-bisect/ablation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAGH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,4BAA4B,CAAC;AAC3D,OAAO,KAAK,EACV,aAAa,EACb,gBAAgB,EAChB,YAAY,EACZ,eAAe,EACf,iBAAiB,EACjB,eAAe,EACf,OAAO,EACR,MAAM,YAAY,CAAC;AAKpB;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,OAAO,GAAG,YAAY,GAAG,SAAS,CAyB7E;AAID,0EAA0E;AAC1E,UAAU,UAAU;IAClB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;CACrB;AAED,gEAAgE;AAChE,UAAU,SAAS;IACjB,QAAQ,CAAC,MAAM,EAAE;QAAE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;CAC5C;AAED,MAAM,WAAW,eAAe,CAC9B,KAAK,SAAS,SAAS,GAAG,SAAS,EACnC,UAAU,SAAS,UAAU,GAAG,UAAU,EAC1C,YAAY,SAAS,UAAU,GAAG,UAAU;IAE5C,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,UAAU,EAAE,CAAC;IAC5C,QAAQ,CAAC,aAAa,CAAC,EAAE,SAAS,YAAY,EAAE,CAAC;CAClD;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,cAAc,CAC5B,KAAK,SAAS,SAAS,EACvB,UAAU,SAAS,UAAU,EAC7B,YAAY,SAAS,UAAU,EAE/B,KAAK,EAAE,SAAS,YAAY,EAAE,EAC9B,OAAO,EAAE,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,YAAY,CAAC,GACxD;IACD,KAAK,EAAE,SAAS,KAAK,EAAE,CAAC;IACxB,UAAU,EAAE,SAAS,UAAU,EAAE,CAAC;IAClC,aAAa,EAAE,SAAS,YAAY,EAAE,CAAC;CACxC,CAiBA;AAID;kFACkF;AAClF,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,GAAG,MAAM,CAGlE;AAED,wBAAgB,eAAe,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,GAAG,eAAe,CAU1E;AAED,wEAAwE;AACxE,wBAAgB,wBAAwB,CACtC,QAAQ,EAAE,QAAQ,EAClB,aAAa,EAAE,MAAM,GACpB,iBAAiB,CAQnB;AAED,wDAAwD;AACxD,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,KAAK,EAAE,aAAa,CAAC;IAC9B,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC;CAC7B;AAED;;;;;;GAMG;AACH,wBAAsB,gBAAgB,CACpC,MAAM,EAAE,WAAW,EACnB,KAAK,EAAE,SAAS,YAAY,EAAE,GAC7B,OAAO,CAAC,gBAAgB,CAAC,CAgB3B;AAED,8DAA8D;AAC9D,wBAAgB,YAAY,CAAC,KAAK,EAAE,gBAAgB,GAAG,OAAO,CAE7D;AAED;;;;;GAKG;AACH,wBAAgB,UAAU,CACxB,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,gBAAgB,EACvB,cAAc,EAAE,OAAO;AACvB;;iEAEiE;AACjE,MAAM,GAAE,UAAU,GAAG,WAAwB,GAC5C,eAAe,CAkCjB"}
|
|
@@ -15,9 +15,11 @@
|
|
|
15
15
|
* claims; slice completeness is bounded by tracking — and says so.
|
|
16
16
|
*/
|
|
17
17
|
export { llmEdgeWeigher, stepOutputText, type LlmEdgeWeigherHandle, type LlmEdgeWeigherOptions, type RankedParentEdge, } from './llmEdgeWeigher.js';
|
|
18
|
+
export { findDroppedContext, type ContextUnit, type DroppedUnit, type MissingContextResult, } from './missingContext.js';
|
|
19
|
+
export { runRestorationProbe, type RestorationProbeConfig, type RestorationRerun, type RestorationRunner, } from './restoration.js';
|
|
18
20
|
export { defaultSuspectClassifier, formatContextBugReport, llmCallIdsFromEvents, localizeContextBug, suspectLabel, type ClassifyContext, type LocalizeContextBugOptions, type SuspectClassifier, type SuspectSeed, } from './localize.js';
|
|
19
21
|
export { toBacktrackTrace, type BacktrackCustodyHop, type BacktrackHop, type BacktrackSuspectCard, type BacktrackTrace, type BacktrackTrail, type ToBacktrackTraceOptions, } from './toBacktrackTrace.js';
|
|
20
22
|
export { ablationForSuspect, applyAblations, defaultOutcomeComparator, probeFlipped, runAblationProbe, verdictFor, type AblationTargets, type ProbeConfig, } from './ablation.js';
|
|
21
23
|
export { bisectCulprits, type BisectCulpritsOptions, type BisectionProbe, type BisectionResult, } from './bisect.js';
|
|
22
|
-
export { CONTEXT_BISECT_DEFAULTS, type AblationRerun, type AblationRunner, type AblationRunStats, type AblationSpec, type AblationVerdict, type AblationVerdictKind, type CapturedEventLike, type ContextBugArtifacts, type ContextBugReport, type EdgePathStep, type HonestyFlag, type HonestyFlagKind, type OutcomeComparator, type QualityTriggerLookup, type SimilarityStats, type SliceStats, type Suspect, type SuspectDetail, type SuspectKind, } from './types.js';
|
|
24
|
+
export { CONTEXT_BISECT_DEFAULTS, type AblationRerun, type AblationRunner, type AblationRunStats, type AblationSpec, type AblationVerdict, type AblationVerdictKind, type CapturedEventLike, type ContextBugArtifacts, type ContextBugReport, type EdgePathStep, type HonestyFlag, type HonestyFlagKind, type OutcomeComparator, type QualityTriggerLookup, type RestoredCandidate, type SimilarityStats, type SliceStats, type Suspect, type SuspectDetail, type SuspectKind, } from './types.js';
|
|
23
25
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/context-bisect/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EACL,cAAc,EACd,cAAc,EACd,KAAK,oBAAoB,EACzB,KAAK,qBAAqB,EAC1B,KAAK,gBAAgB,GACtB,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/lib/context-bisect/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EACL,cAAc,EACd,cAAc,EACd,KAAK,oBAAoB,EACzB,KAAK,qBAAqB,EAC1B,KAAK,gBAAgB,GACtB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,kBAAkB,EAClB,KAAK,WAAW,EAChB,KAAK,WAAW,EAChB,KAAK,oBAAoB,GAC1B,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EACL,mBAAmB,EACnB,KAAK,sBAAsB,EAC3B,KAAK,gBAAgB,EACrB,KAAK,iBAAiB,GACvB,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EACL,wBAAwB,EACxB,sBAAsB,EACtB,oBAAoB,EACpB,kBAAkB,EAClB,YAAY,EACZ,KAAK,eAAe,EACpB,KAAK,yBAAyB,EAC9B,KAAK,iBAAiB,EACtB,KAAK,WAAW,GACjB,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,gBAAgB,EAChB,KAAK,mBAAmB,EACxB,KAAK,YAAY,EACjB,KAAK,oBAAoB,EACzB,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,uBAAuB,GAC7B,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,kBAAkB,EAClB,cAAc,EACd,wBAAwB,EACxB,YAAY,EACZ,gBAAgB,EAChB,UAAU,EACV,KAAK,eAAe,EACpB,KAAK,WAAW,GACjB,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,cAAc,EACd,KAAK,qBAAqB,EAC1B,KAAK,cAAc,EACnB,KAAK,eAAe,GACrB,MAAM,aAAa,CAAC;AAErB,OAAO,EACL,uBAAuB,EACvB,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,KAAK,eAAe,EACpB,KAAK,mBAAmB,EACxB,KAAK,iBAAiB,EACtB,KAAK,mBAAmB,EACxB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,KAAK,WAAW,EAChB,KAAK,eAAe,EACpB,KAAK,iBAAiB,EACtB,KAAK,oBAAoB,EACzB,KAAK,iBAAiB,EACtB,KAAK,eAAe,EACpB,KAAK,UAAU,EACf,KAAK,OAAO,EACZ,KAAK,aAAa,EAClB,KAAK,WAAW,GACjB,MAAM,YAAY,CAAC"}
|
|
@@ -30,6 +30,8 @@
|
|
|
30
30
|
*/
|
|
31
31
|
import type { CausalNode } from 'footprintjs/trace';
|
|
32
32
|
import { type Embedder } from '../influence-core/index.js';
|
|
33
|
+
import { type ContextUnit } from './missingContext.js';
|
|
34
|
+
import { type RestorationRerun } from './restoration.js';
|
|
33
35
|
import type { AblationRerun, ContextBugArtifacts, ContextBugReport, Suspect, SuspectDetail, SuspectKind } from './types.js';
|
|
34
36
|
/**
|
|
35
37
|
* Extract LLM-call step ids from captured typed events: the
|
|
@@ -90,6 +92,18 @@ export interface LocalizeContextBugOptions {
|
|
|
90
92
|
* ranking, marked `mode: 'correlational'`.
|
|
91
93
|
*/
|
|
92
94
|
readonly rerun?: AblationRerun;
|
|
95
|
+
/**
|
|
96
|
+
* Interface #3 — the MISSING-context tier. Supply what was `available` for
|
|
97
|
+
* the turn and what was `sent` to the model; the report's `dropped` lists the
|
|
98
|
+
* units that never reached the model (`available − sent`). Add a `rerun` and
|
|
99
|
+
* each dropped candidate gets a RESTORATION verdict (the mirror of ablation:
|
|
100
|
+
* restoring it flips the outcome → causal). Absent → no `dropped` section.
|
|
101
|
+
*/
|
|
102
|
+
readonly missingContext?: {
|
|
103
|
+
readonly available: readonly ContextUnit[];
|
|
104
|
+
readonly sent: readonly ContextUnit[];
|
|
105
|
+
readonly rerun?: RestorationRerun;
|
|
106
|
+
};
|
|
93
107
|
/** Slice depth budget. Default 12. */
|
|
94
108
|
readonly maxDepth?: number;
|
|
95
109
|
/** Slice node budget. Default 80. */
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"localize.d.ts","sourceRoot":"","sources":["../../../../src/lib/context-bisect/localize.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAGpD,OAAO,EAAkB,KAAK,QAAQ,EAAsB,MAAM,4BAA4B,CAAC;AAG/F,OAAO,KAAK,EACV,aAAa,
|
|
1
|
+
{"version":3,"file":"localize.d.ts","sourceRoot":"","sources":["../../../../src/lib/context-bisect/localize.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAGpD,OAAO,EAAkB,KAAK,QAAQ,EAAsB,MAAM,4BAA4B,CAAC;AAG/F,OAAO,EAAsB,KAAK,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAC3E,OAAO,EAAuB,KAAK,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAC9E,OAAO,KAAK,EACV,aAAa,EAEb,mBAAmB,EACnB,gBAAgB,EAKhB,OAAO,EACP,aAAa,EACb,WAAW,EACZ,MAAM,YAAY,CAAC;AAKpB;;;;;GAKG;AACH,wBAAgB,oBAAoB,CAClC,MAAM,EAAE,SAAS;IAAE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,IAAI,EAAE;QAAE,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAA;KAAE,CAAA;CAAE,EAAE,GAC/F,MAAM,EAAE,CAWV;AAID,kEAAkE;AAClE,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC;IAC3B,QAAQ,CAAC,MAAM,CAAC,EAAE,aAAa,CAAC;CACjC;AAED,iDAAiD;AACjD,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC;IAC1B,gCAAgC;IAChC,QAAQ,CAAC,WAAW,EAAE,SAAS,MAAM,EAAE,CAAC;IACxC,+DAA+D;IAC/D,QAAQ,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC;CAC5C;AAED;;;;;GAKG;AACH,MAAM,MAAM,iBAAiB,GAAG,CAAC,GAAG,EAAE,eAAe,KAAK,SAAS,WAAW,EAAE,GAAG,SAAS,CAAC;AAuB7F;;;;;;;;;;;GAWG;AACH,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,eAAe,GAAG,SAAS,WAAW,EAAE,CAsDrF;AAmGD,MAAM,WAAW,yBAAyB;IACxC,QAAQ,CAAC,SAAS,EAAE,mBAAmB,CAAC;IACxC,8DAA8D;IAC9D,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC;IAC5B,oEAAoE;IACpE,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,mEAAmE;IACnE,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,mBAAmB,KAAK,MAAM,GAAG,SAAS,CAAC;IAC1E;;;;OAIG;IACH,QAAQ,CAAC,KAAK,CAAC,EAAE,aAAa,CAAC;IAC/B;;;;;;OAMG;IACH,QAAQ,CAAC,cAAc,CAAC,EAAE;QACxB,QAAQ,CAAC,SAAS,EAAE,SAAS,WAAW,EAAE,CAAC;QAC3C,QAAQ,CAAC,IAAI,EAAE,SAAS,WAAW,EAAE,CAAC;QACtC,QAAQ,CAAC,KAAK,CAAC,EAAE,gBAAgB,CAAC;KACnC,CAAC;IACF,sCAAsC;IACtC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,qCAAqC;IACrC,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,sDAAsD;IACtD,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,gDAAgD;IAChD,QAAQ,CAAC,QAAQ,CAAC,EAAE,iBAAiB,CAAC;CACvC;AAID;;;;;;;;GAQG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,gBAAgB,CAAC,CA2P3B;AA0JD,wBAAgB,YAAY,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM,CAGrD;AAID;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,gBAAgB,GAAG,MAAM,CA2GvE"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* missingContext — interface #3: find context that was AVAILABLE but never
|
|
3
|
+
* reached the model (RFC-003).
|
|
4
|
+
*
|
|
5
|
+
* The localizer's influence ranking (#1) + ablation (#2) handle culprits that
|
|
6
|
+
* are PRESENT in the context. They are blind to the opposite failure: a needed
|
|
7
|
+
* unit that was *dropped* — truncated out of the window, or never selected —
|
|
8
|
+
* so the model never saw it. You cannot ablate what isn't there.
|
|
9
|
+
*
|
|
10
|
+
* This finder is the cheap, exact, deterministic half of that case: a SET
|
|
11
|
+
* DIFFERENCE over unit ids. The library tracks context as identified units
|
|
12
|
+
* (each injection / memory entry / tool result has a stable id), so "what got
|
|
13
|
+
* dropped" is `available − sent` — no embeddings, no LLM, O(n).
|
|
14
|
+
*
|
|
15
|
+
* Causal confirmation is the MIRROR of ablation: RESTORATION. Add a dropped
|
|
16
|
+
* unit back, re-run, and an outcome flip is the causal proof. Like ablation,
|
|
17
|
+
* the re-run is consumer-supplied (the library doesn't own your agent loop);
|
|
18
|
+
* see `findDroppedContext` docs + example 10 for the pattern.
|
|
19
|
+
*
|
|
20
|
+
* Honest claim: a dropped unit is a CANDIDATE missing-context culprit, never a
|
|
21
|
+
* confirmed cause — most dropped context is correctly dropped. Only restoration
|
|
22
|
+
* makes a causal claim.
|
|
23
|
+
*/
|
|
24
|
+
/** One unit of context, identified by a stable id (injection id, memory id,
|
|
25
|
+
* tool-result id, ...). `content` is optional — carried through for display
|
|
26
|
+
* and for the restoration re-run. */
|
|
27
|
+
export interface ContextUnit {
|
|
28
|
+
readonly id: string;
|
|
29
|
+
readonly content?: string;
|
|
30
|
+
}
|
|
31
|
+
/** A unit that was available for the turn but never reached the model. */
|
|
32
|
+
export interface DroppedUnit {
|
|
33
|
+
readonly id: string;
|
|
34
|
+
readonly content?: string;
|
|
35
|
+
}
|
|
36
|
+
export interface MissingContextResult {
|
|
37
|
+
/**
|
|
38
|
+
* Units available for the turn that did NOT reach the model (`available −
|
|
39
|
+
* sent`, matched by id, input order preserved). Each is a CANDIDATE
|
|
40
|
+
* missing-context culprit — confirm by restoration, never assume.
|
|
41
|
+
*/
|
|
42
|
+
readonly dropped: readonly DroppedUnit[];
|
|
43
|
+
/** Distinct available units considered. */
|
|
44
|
+
readonly availableCount: number;
|
|
45
|
+
/** Distinct sent units that reached the model. */
|
|
46
|
+
readonly sentCount: number;
|
|
47
|
+
/** True when anything was dropped — a missing-context bug is possible. */
|
|
48
|
+
readonly anyDropped: boolean;
|
|
49
|
+
/** Human-readable explanation. PRESENTATION ONLY — read `dropped` /
|
|
50
|
+
* `anyDropped` as data, never parse this string. */
|
|
51
|
+
readonly reason: string;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Find context that was available for a turn but never reached the model —
|
|
55
|
+
* `available − sent` by id. Pure, deterministic, O(n); no model or embedder.
|
|
56
|
+
*
|
|
57
|
+
* Ids are assumed stable and unique per side (duplicates are de-duplicated,
|
|
58
|
+
* first occurrence wins). Units in `sent` but not `available` are ignored.
|
|
59
|
+
*
|
|
60
|
+
* Confirm a candidate causally by RESTORATION (the mirror of ablation): add the
|
|
61
|
+
* dropped unit back into the context and re-run; an outcome flip is the proof.
|
|
62
|
+
*
|
|
63
|
+
* @example
|
|
64
|
+
* const { dropped, anyDropped } = findDroppedContext(assembled, sentToModel);
|
|
65
|
+
* if (anyDropped) {
|
|
66
|
+
* for (const unit of dropped) {
|
|
67
|
+
* if (await rerunWith(unit).outcomeFlips()) report(unit); // restoration = causal
|
|
68
|
+
* }
|
|
69
|
+
* }
|
|
70
|
+
*/
|
|
71
|
+
export declare function findDroppedContext(available: readonly ContextUnit[], sent: readonly ContextUnit[]): MissingContextResult;
|
|
72
|
+
//# sourceMappingURL=missingContext.d.ts.map
|