@inbrowser/agent 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/diagnostics/index.d.ts +5 -0
- package/dist/diagnostics/index.d.ts.map +1 -0
- package/dist/diagnostics/index.js +3 -0
- package/dist/diagnostics/index.js.map +1 -0
- package/dist/diagnostics/timing.d.ts +48 -0
- package/dist/diagnostics/timing.d.ts.map +1 -0
- package/dist/diagnostics/timing.js +85 -0
- package/dist/diagnostics/timing.js.map +1 -0
- package/dist/diagnostics/truthfulness.d.ts +36 -0
- package/dist/diagnostics/truthfulness.d.ts.map +1 -0
- package/dist/diagnostics/truthfulness.js +180 -0
- package/dist/diagnostics/truthfulness.js.map +1 -0
- package/dist/dispatch-memoization.d.ts +84 -0
- package/dist/dispatch-memoization.d.ts.map +1 -0
- package/dist/dispatch-memoization.js +197 -0
- package/dist/dispatch-memoization.js.map +1 -0
- package/dist/eval/comparison-report.d.ts +164 -0
- package/dist/eval/comparison-report.d.ts.map +1 -0
- package/dist/eval/comparison-report.js +316 -0
- package/dist/eval/comparison-report.js.map +1 -0
- package/dist/eval/fixture.d.ts +74 -0
- package/dist/eval/fixture.d.ts.map +1 -0
- package/dist/eval/fixture.js +217 -0
- package/dist/eval/fixture.js.map +1 -0
- package/dist/eval/index.d.ts +13 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +7 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/load-node.d.ts +16 -0
- package/dist/eval/load-node.d.ts.map +1 -0
- package/dist/eval/load-node.js +58 -0
- package/dist/eval/load-node.js.map +1 -0
- package/dist/eval/metric-collector.d.ts +209 -0
- package/dist/eval/metric-collector.d.ts.map +1 -0
- package/dist/eval/metric-collector.js +293 -0
- package/dist/eval/metric-collector.js.map +1 -0
- package/dist/eval/run-record.d.ts +76 -0
- package/dist/eval/run-record.d.ts.map +1 -0
- package/dist/eval/run-record.js +32 -0
- package/dist/eval/run-record.js.map +1 -0
- package/dist/eval/runner.d.ts +140 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +310 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/eval/spec-framework.d.ts +113 -0
- package/dist/eval/spec-framework.d.ts.map +1 -0
- package/dist/eval/spec-framework.js +100 -0
- package/dist/eval/spec-framework.js.map +1 -0
- package/dist/eval/spec-helpers.d.ts +245 -0
- package/dist/eval/spec-helpers.d.ts.map +1 -0
- package/dist/eval/spec-helpers.js +605 -0
- package/dist/eval/spec-helpers.js.map +1 -0
- package/dist/index.d.ts +24 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/node.d.ts +1 -0
- package/dist/node.d.ts.map +1 -1
- package/dist/node.js +1 -0
- package/dist/node.js.map +1 -1
- package/dist/planner-executor.d.ts +132 -0
- package/dist/planner-executor.d.ts.map +1 -0
- package/dist/planner-executor.js +274 -0
- package/dist/planner-executor.js.map +1 -0
- package/dist/skill-catalog.d.ts +81 -0
- package/dist/skill-catalog.d.ts.map +1 -0
- package/dist/skill-catalog.js +388 -0
- package/dist/skill-catalog.js.map +1 -0
- package/dist/skill-router.d.ts +95 -0
- package/dist/skill-router.d.ts.map +1 -0
- package/dist/skill-router.js +130 -0
- package/dist/skill-router.js.map +1 -0
- package/dist/strategy.d.ts +20 -1
- package/dist/strategy.d.ts.map +1 -1
- package/dist/strategy.js +333 -13
- package/dist/strategy.js.map +1 -1
- package/dist/tools.d.ts +15 -1
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +18 -0
- package/dist/tools.js.map +1 -1
- package/dist/types/strategy.d.ts +48 -0
- package/dist/types/strategy.d.ts.map +1 -1
- package/dist/types/tools.d.ts +18 -0
- package/dist/types/tools.d.ts.map +1 -1
- package/dist/types/trace.d.ts +59 -9
- package/dist/types/trace.d.ts.map +1 -1
- package/dist/types/trace.js +5 -3
- package/dist/types/trace.js.map +1 -1
- package/package.json +1 -1
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 David East
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export type { TruthfulnessFlag, TruthfulnessFlagCategory, TruthfulnessReport, } from './truthfulness.js';
|
|
2
|
+
export { analyzeTruthfulness } from './truthfulness.js';
|
|
3
|
+
export type { TurnTimingRow } from './timing.js';
|
|
4
|
+
export { turnTimingTable } from './timing.js';
|
|
5
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/diagnostics/index.ts"],"names":[],"mappings":"AAAA,YAAY,EACV,gBAAgB,EAChB,wBAAwB,EACxB,kBAAkB,GACnB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAExD,YAAY,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/diagnostics/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAGxD,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wall-clock split helper. Consumes a captured `TraceEvent[]` and
|
|
3
|
+
* returns one `TurnTimingRow` per ReAct iteration, with the
|
|
4
|
+
* iteration's language-model time and tool-dispatch time as
|
|
5
|
+
* separate columns.
|
|
6
|
+
*
|
|
7
|
+
* Pairing rule: events are matched by `requestId`. An iteration
|
|
8
|
+
* with only an `llm_request` (e.g. mid-stream error before
|
|
9
|
+
* `llm_response`) yields a row with `llmMs: undefined` and
|
|
10
|
+
* `dispatchMs: undefined`. An iteration with `llm_request` +
|
|
11
|
+
* `llm_response` but no `turn_dispatch_complete` (the final
|
|
12
|
+
* assistant turn that emits no tool calls) yields `llmMs` and
|
|
13
|
+
* `dispatchMs: undefined`. Missing endpoints never throw.
|
|
14
|
+
*
|
|
15
|
+
* `totalMs` is the iteration's full wall-clock from request
|
|
16
|
+
* dispatch through the tool-dispatch close — i.e. `dispatchEndMs -
|
|
17
|
+
* requestStartMs`. It is `undefined` whenever either endpoint is
|
|
18
|
+
* missing.
|
|
19
|
+
*/
|
|
20
|
+
import type { TraceEvent } from '../types/trace.js';
|
|
21
|
+
export interface TurnTimingRow {
|
|
22
|
+
/** Session-scoped turn id. Multiple rows share a `turnId` when
|
|
23
|
+
* the turn ran multiple ReAct iterations. */
|
|
24
|
+
turnId: string;
|
|
25
|
+
/** 0-indexed ReAct iteration within the turn. Unique together
|
|
26
|
+
* with `turnId`; identical to the iteration index encoded in
|
|
27
|
+
* `requestId`. */
|
|
28
|
+
iteration: number;
|
|
29
|
+
/** Stable id for the row. Matches
|
|
30
|
+
* `LlmRequestTrace.requestId` / `LlmResponseTrace.requestId` /
|
|
31
|
+
* `TurnDispatchCompleteTrace.requestId`. */
|
|
32
|
+
requestId: string;
|
|
33
|
+
/** Wall-clock ms spent in the language model: response timestamp
|
|
34
|
+
* minus request timestamp. `undefined` if either endpoint is
|
|
35
|
+
* missing (e.g. mid-stream error). */
|
|
36
|
+
llmMs: number | undefined;
|
|
37
|
+
/** Wall-clock ms spent in tool dispatch: turn-dispatch-complete
|
|
38
|
+
* timestamp minus response timestamp. `undefined` for the final
|
|
39
|
+
* assistant turn (no tool calls → no dispatch event) or when
|
|
40
|
+
* the response endpoint is missing. */
|
|
41
|
+
dispatchMs: number | undefined;
|
|
42
|
+
/** Wall-clock ms across the full iteration: turn-dispatch-complete
|
|
43
|
+
* timestamp minus request timestamp. `undefined` when either
|
|
44
|
+
* endpoint is missing. */
|
|
45
|
+
totalMs: number | undefined;
|
|
46
|
+
}
|
|
47
|
+
export declare function turnTimingTable(events: readonly TraceEvent[]): TurnTimingRow[];
|
|
48
|
+
//# sourceMappingURL=timing.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"timing.d.ts","sourceRoot":"","sources":["../../src/diagnostics/timing.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,MAAM,WAAW,aAAa;IAC5B;kDAC8C;IAC9C,MAAM,EAAE,MAAM,CAAC;IACf;;uBAEmB;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB;;iDAE6C;IAC7C,SAAS,EAAE,MAAM,CAAC;IAClB;;2CAEuC;IACvC,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;IAC1B;;;4CAGwC;IACxC,UAAU,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;+BAE2B;IAC3B,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC;CAC7B;AAaD,wBAAgB,eAAe,CAAC,MAAM,EAAE,SAAS,UAAU,EAAE,GAAG,aAAa,EAAE,CAsE9E"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wall-clock split helper. Consumes a captured `TraceEvent[]` and
|
|
3
|
+
* returns one `TurnTimingRow` per ReAct iteration, with the
|
|
4
|
+
* iteration's language-model time and tool-dispatch time as
|
|
5
|
+
* separate columns.
|
|
6
|
+
*
|
|
7
|
+
* Pairing rule: events are matched by `requestId`. An iteration
|
|
8
|
+
* with only an `llm_request` (e.g. mid-stream error before
|
|
9
|
+
* `llm_response`) yields a row with `llmMs: undefined` and
|
|
10
|
+
* `dispatchMs: undefined`. An iteration with `llm_request` +
|
|
11
|
+
* `llm_response` but no `turn_dispatch_complete` (the final
|
|
12
|
+
* assistant turn that emits no tool calls) yields `llmMs` and
|
|
13
|
+
* `dispatchMs: undefined`. Missing endpoints never throw.
|
|
14
|
+
*
|
|
15
|
+
* `totalMs` is the iteration's full wall-clock from request
|
|
16
|
+
* dispatch through the tool-dispatch close — i.e. `dispatchEndMs -
|
|
17
|
+
* requestStartMs`. It is `undefined` whenever either endpoint is
|
|
18
|
+
* missing.
|
|
19
|
+
*/
|
|
20
|
+
export function turnTimingTable(events) {
|
|
21
|
+
const byRequestId = new Map();
|
|
22
|
+
const touch = (requestId) => {
|
|
23
|
+
let acc = byRequestId.get(requestId);
|
|
24
|
+
if (!acc) {
|
|
25
|
+
acc = {
|
|
26
|
+
turnId: undefined,
|
|
27
|
+
iteration: undefined,
|
|
28
|
+
requestTs: undefined,
|
|
29
|
+
responseTs: undefined,
|
|
30
|
+
dispatchTs: undefined,
|
|
31
|
+
firstSeenAt: byRequestId.size,
|
|
32
|
+
};
|
|
33
|
+
byRequestId.set(requestId, acc);
|
|
34
|
+
}
|
|
35
|
+
return acc;
|
|
36
|
+
};
|
|
37
|
+
for (const ev of events) {
|
|
38
|
+
if (ev.kind === 'llm_request') {
|
|
39
|
+
const acc = touch(ev.data.requestId);
|
|
40
|
+
acc.turnId = ev.data.turnId;
|
|
41
|
+
acc.iteration = ev.data.iteration;
|
|
42
|
+
acc.requestTs = ev.data.ts;
|
|
43
|
+
}
|
|
44
|
+
else if (ev.kind === 'llm_response') {
|
|
45
|
+
const acc = touch(ev.data.requestId);
|
|
46
|
+
acc.responseTs = ev.data.ts;
|
|
47
|
+
}
|
|
48
|
+
else if (ev.kind === 'turn_dispatch_complete') {
|
|
49
|
+
const acc = touch(ev.data.requestId);
|
|
50
|
+
// `llm_request` is the canonical source for turnId/iteration,
|
|
51
|
+
// but fall through to the dispatch event so an out-of-order or
|
|
52
|
+
// truncated trace still produces a useful row.
|
|
53
|
+
acc.turnId = acc.turnId ?? ev.data.turnId;
|
|
54
|
+
acc.iteration = acc.iteration ?? ev.data.iteration;
|
|
55
|
+
acc.dispatchTs = ev.data.ts;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const rows = [];
|
|
59
|
+
for (const [requestId, acc] of byRequestId) {
|
|
60
|
+
const llmMs = acc.requestTs !== undefined && acc.responseTs !== undefined
|
|
61
|
+
? acc.responseTs - acc.requestTs
|
|
62
|
+
: undefined;
|
|
63
|
+
const dispatchMs = acc.responseTs !== undefined && acc.dispatchTs !== undefined
|
|
64
|
+
? acc.dispatchTs - acc.responseTs
|
|
65
|
+
: undefined;
|
|
66
|
+
const totalMs = acc.requestTs !== undefined && acc.dispatchTs !== undefined
|
|
67
|
+
? acc.dispatchTs - acc.requestTs
|
|
68
|
+
: undefined;
|
|
69
|
+
rows.push({
|
|
70
|
+
turnId: acc.turnId ?? 'turn-anon',
|
|
71
|
+
iteration: acc.iteration ?? 0,
|
|
72
|
+
requestId,
|
|
73
|
+
llmMs,
|
|
74
|
+
dispatchMs,
|
|
75
|
+
totalMs,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
rows.sort((a, b) => {
|
|
79
|
+
const accA = byRequestId.get(a.requestId);
|
|
80
|
+
const accB = byRequestId.get(b.requestId);
|
|
81
|
+
return accA.firstSeenAt - accB.firstSeenAt;
|
|
82
|
+
});
|
|
83
|
+
return rows;
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=timing.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"timing.js","sourceRoot":"","sources":["../../src/diagnostics/timing.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AA0CH,MAAM,UAAU,eAAe,CAAC,MAA6B;IAC3D,MAAM,WAAW,GAAG,IAAI,GAAG,EAAiC,CAAC;IAE7D,MAAM,KAAK,GAAG,CAAC,SAAiB,EAAyB,EAAE;QACzD,IAAI,GAAG,GAAG,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACrC,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,GAAG,GAAG;gBACJ,MAAM,EAAE,SAAS;gBACjB,SAAS,EAAE,SAAS;gBACpB,SAAS,EAAE,SAAS;gBACpB,UAAU,EAAE,SAAS;gBACrB,UAAU,EAAE,SAAS;gBACrB,WAAW,EAAE,WAAW,CAAC,IAAI;aAC9B,CAAC;YACF,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAClC,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC,CAAC;IAEF,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QACxB,IAAI,EAAE,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACrC,GAAG,CAAC,MAAM,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC;YAC5B,GAAG,CAAC,SAAS,GAAG,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC;YAClC,GAAG,CAAC,SAAS,GAAG,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC7B,CAAC;aAAM,IAAI,EAAE,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACrC,GAAG,CAAC,UAAU,GAAG,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9B,CAAC;aAAM,IAAI,EAAE,CAAC,IAAI,KAAK,wBAAwB,EAAE,CAAC;YAChD,MAAM,GAAG,GAAG,KAAK,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACrC,8DAA8D;YAC9D,+DAA+D;YAC/D,+CAA+C;YAC/C,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC;YAC1C,GAAG,CAAC,SAAS,GAAG,GAAG,CAAC,SAAS,IAAI,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC;YACnD,GAAG,CAAC,UAAU,GAAG,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAoB,EAAE,CAAC;IACjC,KAAK,MAAM,CAAC,SAAS,EAAE,GAAG,CAAC,IAAI,WAAW,EAAE,CAAC;QAC3C,MAAM,KAAK,GACT,GAAG,CAAC,SAAS,KAAK,SAAS,IAAI,GAAG,CAAC,UAAU,KAAK,SAAS;YACzD,CAAC,CAAC,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC,SAAS;YAChC,CAAC,CAAC,SAAS,CAAC;QAChB,MAAM,UAAU,GACd,GAAG,CAAC,UAAU,KAAK,SAAS,IAAI,GAAG,CAAC,UAAU,KAAK,SAAS;YAC1D,CAAC,CAAC,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC,UAAU;YACjC,CAAC,CAAC,SAAS,CAAC;QAChB,MAAM,OAAO,GACX,GAAG,CAAC,SAAS,KAAK,SAAS,IAAI,GAAG,CAAC,UAAU,KAAK,SAAS;YACzD,CAAC,CAAC,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC,SAAS;YAChC,CAAC,CAAC,SAAS,CAAC;QAChB,IAAI,CAAC,IAAI,CAAC;YACR,MAAM,EAAE,GAAG,CAAC,MAAM,IAAI,WAAW;YACjC,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,CAAC;YAC7B,SAAS;YACT,KAAK;YACL,UAAU;YACV,OAAO;SACR,CAAC,CAAC;IACL,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACjB,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,CAAE,CAAC;QAC3C,MAAM,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,CAAE,CAAC;QAC3C,OAAO,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-hoc truthfulness detector for agent traces.
|
|
3
|
+
*
|
|
4
|
+
* Walks a list of `TraceEvent`s, pairs each `llm_request` with its
|
|
5
|
+
* matching `llm_response` (or derives the response text from the next
|
|
6
|
+
* request's appended assistant message), extracts candidate factual
|
|
7
|
+
* claims from the assistant text, and flags claims that do not appear
|
|
8
|
+
* in the grounding corpus visible to the model at that moment.
|
|
9
|
+
*
|
|
10
|
+
* The grounding corpus is the union of the system prompt, every
|
|
11
|
+
* message text in the request, and every tool result JSON in the
|
|
12
|
+
* request. Verification is literal substring match — case sensitive.
|
|
13
|
+
*
|
|
14
|
+
* The implementation plan's phase zero calls for an intentionally
|
|
15
|
+
* simple first version. False positives are acceptable. False
|
|
16
|
+
* negatives (missed fabrications) are the failure mode the eval
|
|
17
|
+
* harness will surface later via golden tasks.
|
|
18
|
+
*/
|
|
19
|
+
import type { TraceEvent } from '../types/trace.js';
|
|
20
|
+
export type TruthfulnessFlagCategory = 'firestore-path' | 'quoted-identifier';
|
|
21
|
+
export interface TruthfulnessFlag {
|
|
22
|
+
requestId: string;
|
|
23
|
+
turnId: string;
|
|
24
|
+
iteration: number;
|
|
25
|
+
claim: string;
|
|
26
|
+
category: TruthfulnessFlagCategory;
|
|
27
|
+
context: string;
|
|
28
|
+
}
|
|
29
|
+
export interface TruthfulnessReport {
|
|
30
|
+
totalAssistantTurns: number;
|
|
31
|
+
totalFlags: number;
|
|
32
|
+
flags: TruthfulnessFlag[];
|
|
33
|
+
violationRate: number;
|
|
34
|
+
}
|
|
35
|
+
export declare function analyzeTruthfulness(events: readonly TraceEvent[]): TruthfulnessReport;
|
|
36
|
+
//# sourceMappingURL=truthfulness.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"truthfulness.d.ts","sourceRoot":"","sources":["../../src/diagnostics/truthfulness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAGH,OAAO,KAAK,EAAqC,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEvF,MAAM,MAAM,wBAAwB,GAAG,gBAAgB,GAAG,mBAAmB,CAAC;AAE9E,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,wBAAwB,CAAC;IACnC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,kBAAkB;IACjC,mBAAmB,EAAE,MAAM,CAAC;IAC5B,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,gBAAgB,EAAE,CAAC;IAC1B,aAAa,EAAE,MAAM,CAAC;CACvB;AAqDD,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,SAAS,UAAU,EAAE,GAAG,kBAAkB,CAgCrF"}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-hoc truthfulness detector for agent traces.
|
|
3
|
+
*
|
|
4
|
+
* Walks a list of `TraceEvent`s, pairs each `llm_request` with its
|
|
5
|
+
* matching `llm_response` (or derives the response text from the next
|
|
6
|
+
* request's appended assistant message), extracts candidate factual
|
|
7
|
+
* claims from the assistant text, and flags claims that do not appear
|
|
8
|
+
* in the grounding corpus visible to the model at that moment.
|
|
9
|
+
*
|
|
10
|
+
* The grounding corpus is the union of the system prompt, every
|
|
11
|
+
* message text in the request, and every tool result JSON in the
|
|
12
|
+
* request. Verification is literal substring match — case sensitive.
|
|
13
|
+
*
|
|
14
|
+
* The implementation plan's phase zero calls for an intentionally
|
|
15
|
+
* simple first version. False positives are acceptable. False
|
|
16
|
+
* negatives (missed fabrications) are the failure mode the eval
|
|
17
|
+
* harness will surface later via golden tasks.
|
|
18
|
+
*/
|
|
19
|
+
const PATH_PATTERN = /(?<![:/\w.])([A-Za-z][\w-]*(?:\/[\w\-{}$]+){1,})/g;
|
|
20
|
+
const QUOTED_PATTERN = /`([A-Za-z_][\w/.\-{}$]{2,})`/g;
|
|
21
|
+
const STOPWORDS = new Set([
|
|
22
|
+
'true',
|
|
23
|
+
'false',
|
|
24
|
+
'null',
|
|
25
|
+
'undefined',
|
|
26
|
+
'object',
|
|
27
|
+
'string',
|
|
28
|
+
'number',
|
|
29
|
+
'boolean',
|
|
30
|
+
'array',
|
|
31
|
+
'function',
|
|
32
|
+
'request',
|
|
33
|
+
'resource',
|
|
34
|
+
'response',
|
|
35
|
+
'auth',
|
|
36
|
+
'context',
|
|
37
|
+
'database',
|
|
38
|
+
'document',
|
|
39
|
+
'collection',
|
|
40
|
+
'subcollection',
|
|
41
|
+
'firestore',
|
|
42
|
+
'firebase',
|
|
43
|
+
'permission-denied',
|
|
44
|
+
'not-found',
|
|
45
|
+
'unauthenticated',
|
|
46
|
+
'invalid-argument',
|
|
47
|
+
'failed-precondition',
|
|
48
|
+
'already-exists',
|
|
49
|
+
'resource-exhausted',
|
|
50
|
+
'deadline-exceeded',
|
|
51
|
+
'out-of-range',
|
|
52
|
+
'aborted',
|
|
53
|
+
'unavailable',
|
|
54
|
+
'data-loss',
|
|
55
|
+
'internal',
|
|
56
|
+
'cancelled',
|
|
57
|
+
'unknown',
|
|
58
|
+
'unimplemented',
|
|
59
|
+
'getauth',
|
|
60
|
+
'getfirestore',
|
|
61
|
+
'getdatabase',
|
|
62
|
+
'doc',
|
|
63
|
+
'query',
|
|
64
|
+
'where',
|
|
65
|
+
'orderby',
|
|
66
|
+
'limit',
|
|
67
|
+
]);
|
|
68
|
+
export function analyzeTruthfulness(events) {
|
|
69
|
+
const pairs = pairEvents(events);
|
|
70
|
+
const flags = [];
|
|
71
|
+
for (const pair of pairs) {
|
|
72
|
+
if (!pair.responseText)
|
|
73
|
+
continue;
|
|
74
|
+
const corpus = buildGroundingCorpus(pair.request);
|
|
75
|
+
const seen = new Set();
|
|
76
|
+
for (const candidate of extractCandidates(pair.responseText)) {
|
|
77
|
+
if (isStopword(candidate.claim))
|
|
78
|
+
continue;
|
|
79
|
+
if (corpus.includes(candidate.claim))
|
|
80
|
+
continue;
|
|
81
|
+
const dedupeKey = `${candidate.category}::${candidate.claim}`;
|
|
82
|
+
if (seen.has(dedupeKey))
|
|
83
|
+
continue;
|
|
84
|
+
seen.add(dedupeKey);
|
|
85
|
+
flags.push({
|
|
86
|
+
requestId: pair.request.requestId,
|
|
87
|
+
turnId: pair.request.turnId,
|
|
88
|
+
iteration: pair.request.iteration,
|
|
89
|
+
claim: candidate.claim,
|
|
90
|
+
category: candidate.category,
|
|
91
|
+
context: candidate.context,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
const totalAssistantTurns = pairs.filter((p) => p.responseText.length > 0).length;
|
|
96
|
+
return {
|
|
97
|
+
totalAssistantTurns,
|
|
98
|
+
totalFlags: flags.length,
|
|
99
|
+
flags,
|
|
100
|
+
violationRate: totalAssistantTurns === 0 ? 0 : flags.length / totalAssistantTurns,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
function isStopword(claim) {
|
|
104
|
+
if (STOPWORDS.has(claim.toLowerCase()))
|
|
105
|
+
return true;
|
|
106
|
+
if (claim.includes('{') && claim.includes('}'))
|
|
107
|
+
return true;
|
|
108
|
+
return false;
|
|
109
|
+
}
|
|
110
|
+
function pairEvents(events) {
|
|
111
|
+
const requestOrder = [];
|
|
112
|
+
const responses = new Map();
|
|
113
|
+
for (const ev of events) {
|
|
114
|
+
if (ev.kind === 'llm_request') {
|
|
115
|
+
requestOrder.push(ev.data);
|
|
116
|
+
}
|
|
117
|
+
else if (ev.kind === 'llm_response') {
|
|
118
|
+
responses.set(ev.data.requestId, ev.data);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
const pairs = [];
|
|
122
|
+
for (let i = 0; i < requestOrder.length; i++) {
|
|
123
|
+
const req = requestOrder[i];
|
|
124
|
+
if (!req)
|
|
125
|
+
continue;
|
|
126
|
+
const resp = responses.get(req.requestId);
|
|
127
|
+
let responseText = '';
|
|
128
|
+
if (resp) {
|
|
129
|
+
responseText = resp.text;
|
|
130
|
+
}
|
|
131
|
+
else {
|
|
132
|
+
const next = requestOrder[i + 1];
|
|
133
|
+
if (next) {
|
|
134
|
+
const derived = trailingAssistantText(req.messages, next.messages);
|
|
135
|
+
if (derived)
|
|
136
|
+
responseText = derived;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
pairs.push({ request: req, responseText });
|
|
140
|
+
}
|
|
141
|
+
return pairs;
|
|
142
|
+
}
|
|
143
|
+
function trailingAssistantText(prev, next) {
|
|
144
|
+
for (let i = prev.length; i < next.length; i++) {
|
|
145
|
+
const m = next[i];
|
|
146
|
+
if (m && m.role === 'assistant' && m.text)
|
|
147
|
+
return m.text;
|
|
148
|
+
}
|
|
149
|
+
return undefined;
|
|
150
|
+
}
|
|
151
|
+
function buildGroundingCorpus(req) {
|
|
152
|
+
const parts = [req.systemPrompt];
|
|
153
|
+
for (const m of req.messages) {
|
|
154
|
+
if (m.text)
|
|
155
|
+
parts.push(m.text);
|
|
156
|
+
if (m.resultJson)
|
|
157
|
+
parts.push(m.resultJson);
|
|
158
|
+
}
|
|
159
|
+
return parts.join('\n');
|
|
160
|
+
}
|
|
161
|
+
function extractCandidates(text) {
|
|
162
|
+
const out = [];
|
|
163
|
+
collect(text, PATH_PATTERN, 'firestore-path', out);
|
|
164
|
+
collect(text, QUOTED_PATTERN, 'quoted-identifier', out);
|
|
165
|
+
return out;
|
|
166
|
+
}
|
|
167
|
+
function collect(text, pattern, category, out) {
|
|
168
|
+
const re = new RegExp(pattern.source, pattern.flags);
|
|
169
|
+
let m = re.exec(text);
|
|
170
|
+
while (m !== null) {
|
|
171
|
+
const claim = m[1] ?? m[0];
|
|
172
|
+
if (claim) {
|
|
173
|
+
const start = Math.max(0, m.index - 32);
|
|
174
|
+
const end = Math.min(text.length, m.index + claim.length + 32);
|
|
175
|
+
out.push({ claim, category, context: text.slice(start, end) });
|
|
176
|
+
}
|
|
177
|
+
m = re.exec(text);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
//# sourceMappingURL=truthfulness.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"truthfulness.js","sourceRoot":"","sources":["../../src/diagnostics/truthfulness.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAuBH,MAAM,YAAY,GAAG,mDAAmD,CAAC;AACzE,MAAM,cAAc,GAAG,+BAA+B,CAAC;AAEvD,MAAM,SAAS,GAAwB,IAAI,GAAG,CAAC;IAC7C,MAAM;IACN,OAAO;IACP,MAAM;IACN,WAAW;IACX,QAAQ;IACR,QAAQ;IACR,QAAQ;IACR,SAAS;IACT,OAAO;IACP,UAAU;IACV,SAAS;IACT,UAAU;IACV,UAAU;IACV,MAAM;IACN,SAAS;IACT,UAAU;IACV,UAAU;IACV,YAAY;IACZ,eAAe;IACf,WAAW;IACX,UAAU;IACV,mBAAmB;IACnB,WAAW;IACX,iBAAiB;IACjB,kBAAkB;IAClB,qBAAqB;IACrB,gBAAgB;IAChB,oBAAoB;IACpB,mBAAmB;IACnB,cAAc;IACd,SAAS;IACT,aAAa;IACb,WAAW;IACX,UAAU;IACV,WAAW;IACX,SAAS;IACT,eAAe;IACf,SAAS;IACT,cAAc;IACd,aAAa;IACb,KAAK;IACL,OAAO;IACP,OAAO;IACP,SAAS;IACT,OAAO;CACR,CAAC,CAAC;AAEH,MAAM,UAAU,mBAAmB,CAAC,MAA6B;IAC/D,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;IACjC,MAAM,KAAK,GAAuB,EAAE,CAAC;IAErC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,YAAY;YAAE,SAAS;QACjC,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAClD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAC/B,KAAK,MAAM,SAAS,IAAI,iBAAiB,CAAC,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC;YAC7D,IAAI,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC;gBAAE,SAAS;YAC1C,IAAI,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,KAAK,CAAC;gBAAE,SAAS;YAC/C,MAAM,SAAS,GAAG,GAAG,SAAS,CAAC,QAAQ,KAAK,SAAS,CAAC,KAAK,EAAE,CAAC;YAC9D,IAAI,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC;gBAAE,SAAS;YAClC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YACpB,KAAK,CAAC,IAAI,CAAC;gBACT,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,SAAS;gBACjC,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,MAAM;gBAC3B,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,SAAS;gBACjC,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,QAAQ,EAAE,SAAS,CAAC,QAAQ;gBAC5B,OAAO,EAAE,SAAS,CAAC,OAAO;aAC3B,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,mBAAmB,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;IAClF,OAAO;QACL,mBAAmB;QACnB,UAAU,EAAE,KAAK,CAAC,MAAM;QACxB,KAAK;QACL,aAAa,EAAE,mBAAmB,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,mBAAmB;KAClF,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,IAAI,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;QAAE,OAAO,IAAI,CAAC;IACpD,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5D,OAAO,KAAK,CAAC;AACf,CAAC;AAOD,SAAS,UAAU,CAAC,MAA6B;IAC/C,MAAM,YAAY,GAAsB,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAG,IAAI,GAAG,EAA4B,CAAC;IACtD,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;QACxB,IAAI,EAAE,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YAC9B,YAAY,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;aAAM,IAAI,EAAE,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;YACtC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IACD,MAAM,KAAK,GAAmB,EAAE,CAAC;IACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,CAAC,GAAG;YAAE,SAAS;QACnB,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QAC1C,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,IAAI,EAAE,CAAC;YACT,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC;QAC3B,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACjC,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,OAAO,GAAG,qBAAqB,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACnE,IAAI,OAAO;oBAAE,YAAY,GAAG,OAAO,CAAC;YACtC,CAAC;QACH,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,GAAG,EAAE,YAAY,EAAE,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,qBAAqB,CAC5B,IAAkC,EAClC,IAAkC;IAElC,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/C,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,CAAC,CAAC,IAAI;YAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IAC3D,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAS,oBAAoB,CAAC,GAAoB;IAChD,MAAM,KAAK,GAAa,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAC3C,KAAK,MAAM,CAAC,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC7B,IAAI,CAAC,CAAC,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,CAAC,CAAC,UAAU;YAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAQD,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,GAAG,GAAgB,EAAE,CAAC;IAC5B,OAAO,CAAC,IAAI,EAAE,YAAY,EAAE,gBAAgB,EAAE,GAAG,CAAC,CAAC;IACnD,OAAO,CAAC,IAAI,EAAE,cAAc,EAAE,mBAAmB,EAAE,GAAG,CAAC,CAAC;IACxD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,OAAO,CACd,IAAY,EACZ,OAAe,EACf,QAAkC,EAClC,GAAgB;IAEhB,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IACrD,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtB,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;QAClB,MAAM,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC,CAAC;YACxC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC;YAC/D,GAAG,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACjE,CAAC;QACD,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACpB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Content-addressed memoization layer over `createDispatch`.
|
|
3
|
+
*
|
|
4
|
+
* `createMemoizedDispatch(registry, options?)` returns a `ToolDispatch`-shaped
|
|
5
|
+
* object that caches `ToolResult`s keyed on `(toolName, argsHash,
|
|
6
|
+
* workspaceHash, runtimeHash)`. The cache is consulted only for handlers
|
|
7
|
+
* tagged `pure` (see `isPure` in `./tools.ts`). Non-pure handlers bypass
|
|
8
|
+
* the cache entirely and always execute. Errors from the underlying
|
|
9
|
+
* dispatch propagate; they are NOT cached, since they may be transient.
|
|
10
|
+
*
|
|
11
|
+
* The returned object is structurally a `ToolDispatch` — strategies and
|
|
12
|
+
* downstream code that already accept `ToolDispatch` use it transparently.
|
|
13
|
+
* The one addition is `stats()`, which returns the running counters for
|
|
14
|
+
* hits / misses / bypassed calls. The cache lives for the lifetime of
|
|
15
|
+
* one `MemoizedDispatch` instance; there is no global state.
|
|
16
|
+
*
|
|
17
|
+
* Design notes:
|
|
18
|
+
*
|
|
19
|
+
* - Hashing uses FNV-1a 32-bit over a stable-stringified JSON
|
|
20
|
+
* representation. The cache is for short-running test loops; a
|
|
21
|
+
* cryptographic hash is overkill. Collisions are tolerable at our
|
|
22
|
+
* cache sizes, and the cost of a missed hit is at worst a recomputation.
|
|
23
|
+
* - Argument keys are sorted at every level via `stableStringify` so two
|
|
24
|
+
* structurally-equal arg objects produce the same key regardless of
|
|
25
|
+
* property insertion order.
|
|
26
|
+
* - Workspace hash covers `presetId`, `rules`, `code`, and `appSource`.
|
|
27
|
+
* `stitch` is excluded per the brief — pure tools don't read from it.
|
|
28
|
+
* - Runtime hash is included only when `'runtime' \in keyComponents`.
|
|
29
|
+
* Defaults to `['workspace']`; opting into runtime opt-in keeps the
|
|
30
|
+
* default key small for the dominant pure-tool population.
|
|
31
|
+
* - No eviction in v1. Eval runs are bounded; one instance per harness
|
|
32
|
+
* trial keeps cache growth bounded too.
|
|
33
|
+
*/
|
|
34
|
+
import type { ToolDispatch, ToolRegistry } from './types/tools.js';
|
|
35
|
+
/** Which `ctx` fields contribute to the cache key. */
|
|
36
|
+
export type MemoKeyComponent = 'workspace' | 'runtime';
|
|
37
|
+
export interface MemoOptions {
|
|
38
|
+
/**
|
|
39
|
+
* Which `ctx` fields contribute to the cache key. Defaults to
|
|
40
|
+
* `['workspace']`. Some pure tools depend on runtime; opt-in keeps
|
|
41
|
+
* the default key small for tools that are workspace-determined.
|
|
42
|
+
*/
|
|
43
|
+
keyComponents?: MemoKeyComponent[];
|
|
44
|
+
}
|
|
45
|
+
export interface MemoStats {
|
|
46
|
+
/** Pure tool dispatched and a cached result was served. */
|
|
47
|
+
hits: number;
|
|
48
|
+
/** Pure tool dispatched, cache missed, underlying handler ran. */
|
|
49
|
+
misses: number;
|
|
50
|
+
/** Non-pure tool dispatched; cache layer was bypassed. */
|
|
51
|
+
bypassed: number;
|
|
52
|
+
}
|
|
53
|
+
export interface MemoizedDispatch extends ToolDispatch {
|
|
54
|
+
/** Snapshot of the running counters. Returns a fresh object on every call. */
|
|
55
|
+
stats(): MemoStats;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Wrap a registry in a memoizing dispatcher. The wrapper holds its own
|
|
59
|
+
* cache; the underlying dispatch is the standard `createDispatch(registry)`.
|
|
60
|
+
*
|
|
61
|
+
* Non-pure handlers (including unknown-tool errors) bypass the cache and
|
|
62
|
+
* are dispatched directly; `bypassed` is incremented for those calls.
|
|
63
|
+
*/
|
|
64
|
+
export declare function createMemoizedDispatch(registry: ToolRegistry, options?: MemoOptions): MemoizedDispatch;
|
|
65
|
+
/**
|
|
66
|
+
* Stable JSON serialization: object keys are sorted alphabetically at
|
|
67
|
+
* every nesting level. Arrays preserve order (they are positional).
|
|
68
|
+
* Functions, symbols, `undefined` properties are omitted (standard
|
|
69
|
+
* JSON behaviour). `null` is preserved.
|
|
70
|
+
*
|
|
71
|
+
* This is intentionally not `JSON.stringify(value)` — that emits keys
|
|
72
|
+
* in insertion order, so two structurally-equal arg objects assembled
|
|
73
|
+
* differently would produce different cache keys.
|
|
74
|
+
*/
|
|
75
|
+
export declare function stableStringify(value: unknown): string;
|
|
76
|
+
/**
|
|
77
|
+
* FNV-1a 32-bit hash. Returns the lowercase hex string. Fast,
|
|
78
|
+
* dependency-free, and collision-tolerable at our cache sizes.
|
|
79
|
+
* Iterates the UTF-16 code units of the input; sufficient for our
|
|
80
|
+
* stably-stringified JSON payloads, which only contain ASCII control
|
|
81
|
+
* characters and JSON syntax tokens plus user-supplied string data.
|
|
82
|
+
*/
|
|
83
|
+
export declare function hashFnv1a32(input: string): string;
|
|
84
|
+
//# sourceMappingURL=dispatch-memoization.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dispatch-memoization.d.ts","sourceRoot":"","sources":["../src/dispatch-memoization.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAIH,OAAO,KAAK,EAGV,YAAY,EAEZ,YAAY,EAEb,MAAM,kBAAkB,CAAC;AAG1B,sDAAsD;AACtD,MAAM,MAAM,gBAAgB,GAAG,WAAW,GAAG,SAAS,CAAC;AAEvD,MAAM,WAAW,WAAW;IAC1B;;;;OAIG;IACH,aAAa,CAAC,EAAE,gBAAgB,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,SAAS;IACxB,2DAA2D;IAC3D,IAAI,EAAE,MAAM,CAAC;IACb,kEAAkE;IAClE,MAAM,EAAE,MAAM,CAAC;IACf,0DAA0D;IAC1D,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,gBAAiB,SAAQ,YAAY;IACpD,8EAA8E;IAC9E,KAAK,IAAI,SAAS,CAAC;CACpB;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,YAAY,EACtB,OAAO,CAAC,EAAE,WAAW,GACpB,gBAAgB,CA6ClB;AAoDD;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAEtD;AA8BD;;;;;;GAMG;AACH,wBAAgB,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAQjD"}
|