agentfootprint 6.24.0 → 6.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +31 -0
  2. package/bin/agentfootprint-lint-tools.mjs +14 -0
  3. package/dist/esm/lib/tool-lint/analyze.js +235 -0
  4. package/dist/esm/lib/tool-lint/analyze.js.map +1 -0
  5. package/dist/esm/lib/tool-lint/cli.js +198 -0
  6. package/dist/esm/lib/tool-lint/cli.js.map +1 -0
  7. package/dist/esm/lib/tool-lint/format.js +61 -0
  8. package/dist/esm/lib/tool-lint/format.js.map +1 -0
  9. package/dist/esm/lib/tool-lint/index.js +23 -0
  10. package/dist/esm/lib/tool-lint/index.js.map +1 -0
  11. package/dist/esm/lib/tool-lint/rules.js +249 -0
  12. package/dist/esm/lib/tool-lint/rules.js.map +1 -0
  13. package/dist/esm/lib/tool-lint/types.js +25 -0
  14. package/dist/esm/lib/tool-lint/types.js.map +1 -0
  15. package/dist/esm/observe.js +13 -0
  16. package/dist/esm/observe.js.map +1 -1
  17. package/dist/esm/recorders/observability/ToolChoiceRecorder.js +261 -0
  18. package/dist/esm/recorders/observability/ToolChoiceRecorder.js.map +1 -0
  19. package/dist/lib/tool-lint/analyze.js +242 -0
  20. package/dist/lib/tool-lint/analyze.js.map +1 -0
  21. package/dist/lib/tool-lint/cli.js +203 -0
  22. package/dist/lib/tool-lint/cli.js.map +1 -0
  23. package/dist/lib/tool-lint/format.js +65 -0
  24. package/dist/lib/tool-lint/format.js.map +1 -0
  25. package/dist/lib/tool-lint/index.js +43 -0
  26. package/dist/lib/tool-lint/index.js.map +1 -0
  27. package/dist/lib/tool-lint/rules.js +256 -0
  28. package/dist/lib/tool-lint/rules.js.map +1 -0
  29. package/dist/lib/tool-lint/types.js +26 -0
  30. package/dist/lib/tool-lint/types.js.map +1 -0
  31. package/dist/observe.js +34 -1
  32. package/dist/observe.js.map +1 -1
  33. package/dist/recorders/observability/ToolChoiceRecorder.js +266 -0
  34. package/dist/recorders/observability/ToolChoiceRecorder.js.map +1 -0
  35. package/dist/types/lib/tool-lint/analyze.d.ts +84 -0
  36. package/dist/types/lib/tool-lint/analyze.d.ts.map +1 -0
  37. package/dist/types/lib/tool-lint/cli.d.ts +44 -0
  38. package/dist/types/lib/tool-lint/cli.d.ts.map +1 -0
  39. package/dist/types/lib/tool-lint/format.d.ts +19 -0
  40. package/dist/types/lib/tool-lint/format.d.ts.map +1 -0
  41. package/dist/types/lib/tool-lint/index.d.ts +24 -0
  42. package/dist/types/lib/tool-lint/index.d.ts.map +1 -0
  43. package/dist/types/lib/tool-lint/rules.d.ts +86 -0
  44. package/dist/types/lib/tool-lint/rules.d.ts.map +1 -0
  45. package/dist/types/lib/tool-lint/types.d.ts +156 -0
  46. package/dist/types/lib/tool-lint/types.d.ts.map +1 -0
  47. package/dist/types/observe.d.ts +2 -0
  48. package/dist/types/observe.d.ts.map +1 -1
  49. package/dist/types/recorders/observability/ToolChoiceRecorder.d.ts +165 -0
  50. package/dist/types/recorders/observability/ToolChoiceRecorder.d.ts.map +1 -0
  51. package/package.json +4 -2
@@ -0,0 +1,249 @@
1
+ /**
2
+ * Structural lint rules (RFC-002 block C2) — the PLUGGABLE RULE PACK.
3
+ *
4
+ * Pattern: Strategy list — each rule is a plain `{ id, check }` object;
5
+ * `defaultStructuralRules` is OUR pack, and consumers add /
6
+ * remove / replace freely via `AnalyzeToolCatalogOptions.rules`.
7
+ * Parameterizable rules ship as FACTORIES (`descriptionRule`,
8
+ * `saysWhatNotWhenRule`, …) returning a configured `LintRule`.
9
+ * Role: `src/lib/tool-lint/` leaf. Pure functions over `CatalogTool`;
10
+ * no embedder, no I/O.
11
+ *
12
+ * Every rule encodes a FIELD FINDING from real catalogs (the Neo SAN
13
+ * triage agent's 29-tool catalog was the seed corpus):
14
+ *
15
+ * 1. description-missing-or-short — the model can only guess from a name.
16
+ * 2. says-what-not-when — describes WHAT the tool returns but gives the
17
+ * model no cue for WHEN to pick it over a sibling (the #1 cause of
18
+ * twin-tool confusion: 'get_fcns_database' vs 'influx_get_fcns_database').
19
+ * 3. enum-in-prose — string params whose legal values are listed in prose
20
+ * ("avg_iops | peak_iops | mbps") instead of a JSON-Schema `enum` the
21
+ * model (and validators, see #9 tool-args validation) can act on.
22
+ * 4. optional-param-undocumented — optional params whose omission has
23
+ * meaning (fabric-wide sweep vs one switch) but whose schema never
24
+ * says so; the model can't reason about leaving them out.
25
+ *
26
+ * Honest claim: these are token/regex HEURISTICS. They flag review
27
+ * prompts, not certainties — expect (rare) false positives and tune via
28
+ * the factory options instead of deleting the rule.
29
+ */
30
+ /** Read `properties` / `required` out of a JSON-Schema-ish inputSchema,
31
+ * tolerating absent or malformed shapes (rules must never throw). */
32
+ function readObjectSchema(tool) {
33
+ const schema = tool.inputSchema;
34
+ const props = schema?.properties;
35
+ const properties = props !== null && typeof props === 'object'
36
+ ? Object.entries(props).filter((entry) => entry[1] !== null && typeof entry[1] === 'object')
37
+ : [];
38
+ const required = new Set(Array.isArray(schema?.required) ? schema.required.filter((r) => typeof r === 'string') : []);
39
+ return { properties, required };
40
+ }
41
+ function hasWholeWord(text, word) {
42
+ return new RegExp(`(?:^|[^a-z0-9])${escapeRegExp(word.toLowerCase())}(?:[^a-z0-9]|$)`).test(text.toLowerCase());
43
+ }
44
+ function escapeRegExp(s) {
45
+ return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
46
+ }
47
+ /**
48
+ * Missing description → `error` (the model can only guess from the
49
+ * name). Present but shorter than `minChars` → `warn` (too short to
50
+ * differentiate from siblings).
51
+ */
52
+ export function descriptionRule(options = {}) {
53
+ const minChars = options.minChars ?? 40;
54
+ return {
55
+ id: 'description-missing-or-short',
56
+ check(tool) {
57
+ const description = tool.description?.trim() ?? '';
58
+ if (description.length === 0) {
59
+ return [
60
+ {
61
+ rule: 'description-missing-or-short',
62
+ tool: tool.name,
63
+ severity: 'error',
64
+ message: 'tool has no description — the model can only guess from the name',
65
+ },
66
+ ];
67
+ }
68
+ if (description.length < minChars) {
69
+ return [
70
+ {
71
+ rule: 'description-missing-or-short',
72
+ tool: tool.name,
73
+ severity: 'warn',
74
+ message: `description is ${description.length} chars (< ${minChars}) — too short to differentiate this tool from its siblings`,
75
+ },
76
+ ];
77
+ }
78
+ return [];
79
+ },
80
+ };
81
+ }
82
+ // ── Rule 2 — says WHAT, not WHEN ─────────────────────────────────────
83
+ /** RFC-002 C2 heuristic cue list — temporal/conditional words whose
84
+ * presence suggests the description says WHEN to use the tool. */
85
+ export const DEFAULT_WHEN_CUES = [
86
+ 'for',
87
+ 'when',
88
+ 'after',
89
+ 'first',
90
+ 'fallback',
91
+ 'only',
92
+ ];
93
+ /**
94
+ * A description with NO temporal/conditional cue token usually describes
95
+ * WHAT the tool returns but never WHEN to pick it — the #1 cause of
96
+ * twin-tool confusion. Heuristic by design: tune `cueTokens` rather than
97
+ * dropping the rule. Skips tools with no description (rule 1's finding).
98
+ */
99
+ export function saysWhatNotWhenRule(options = {}) {
100
+ const cues = options.cueTokens ?? DEFAULT_WHEN_CUES;
101
+ return {
102
+ id: 'says-what-not-when',
103
+ check(tool) {
104
+ const description = tool.description?.trim() ?? '';
105
+ if (description.length === 0)
106
+ return [];
107
+ if (cues.some((cue) => hasWholeWord(description, cue)))
108
+ return [];
109
+ return [
110
+ {
111
+ rule: 'says-what-not-when',
112
+ tool: tool.name,
113
+ severity: 'warn',
114
+ message: 'description says WHAT the tool returns but gives no cue for WHEN to use it ' +
115
+ `(no ${cues.map((c) => `'${c}'`).join('/')}) — add the choice condition, ` +
116
+ 'e.g. "Use when …" / "Call FIRST" / "FALLBACK if …"',
117
+ },
118
+ ];
119
+ },
120
+ };
121
+ }
122
+ // ── Rule 3 — enum described in prose ─────────────────────────────────
123
+ const IDENT = '[A-Za-z][A-Za-z0-9_.-]*';
124
+ /** `avg_iops | peak_iops | mbps` — two or more pipe-separated literals. */
125
+ const PIPE_LIST = new RegExp(`(${IDENT})(?:\\s*\\|\\s*(?:${IDENT}))+`);
126
+ /** `one of: red, green, blue` — comma lists only behind an explicit
127
+ * values marker, so free-form examples ("e.g. 1h, 24h") don't flag. */
128
+ const COMMA_LIST = new RegExp(`(?:one of|allowed values?|valid values?|options|values)\\s*:?\\s*(${IDENT}(?:\\s*,\\s*${IDENT})+)`, 'i');
129
+ /**
130
+ * A string param whose description enumerates its legal values in prose
131
+ * (pipe-separated literals, or comma lists behind "one of"/"allowed
132
+ * values") should declare a JSON-Schema `enum` instead — the model picks
133
+ * reliably from enums, and arg validators (#9) can enforce them. The
134
+ * field case: Neo's `influx_get_port_ranking.metric` =
135
+ * `"avg_iops | peak_iops | mbps"`.
136
+ */
137
+ export function enumInProseRule() {
138
+ return {
139
+ id: 'enum-in-prose',
140
+ check(tool) {
141
+ const findings = [];
142
+ const { properties } = readObjectSchema(tool);
143
+ for (const [param, prop] of properties) {
144
+ if (prop.enum !== undefined)
145
+ continue;
146
+ if (prop.type !== undefined && prop.type !== 'string')
147
+ continue;
148
+ const description = typeof prop.description === 'string' ? prop.description : '';
149
+ if (description.length === 0)
150
+ continue;
151
+ const literals = extractProseLiterals(description);
152
+ if (literals === undefined)
153
+ continue;
154
+ findings.push({
155
+ rule: 'enum-in-prose',
156
+ tool: tool.name,
157
+ severity: 'warn',
158
+ param,
159
+ message: `param '${param}' lists its legal values in prose ("${description.slice(0, 80)}") — declare them as a JSON-Schema enum so the model picks reliably`,
160
+ suggestion: `"enum": ${JSON.stringify(literals)}`,
161
+ });
162
+ }
163
+ return findings;
164
+ },
165
+ };
166
+ }
167
+ function extractProseLiterals(description) {
168
+ const pipe = PIPE_LIST.exec(description);
169
+ if (pipe) {
170
+ return pipe[0].split('|').map((v) => v.trim());
171
+ }
172
+ const comma = COMMA_LIST.exec(description);
173
+ if (comma) {
174
+ return comma[1].split(',').map((v) => v.trim());
175
+ }
176
+ return undefined;
177
+ }
178
+ // ── Rule 4 — optional param whose omission is undocumented ───────────
179
+ /** Words that signal the description DOES say what omission means. */
180
+ export const DEFAULT_OMISSION_CUES = [
181
+ 'optional',
182
+ 'default',
183
+ 'defaults',
184
+ 'omit',
185
+ 'omitted',
186
+ 'if not',
187
+ 'when not',
188
+ 'absent',
189
+ 'all',
190
+ 'entire',
191
+ 'every',
192
+ 'fallback',
193
+ ];
194
+ /**
195
+ * An optional param's omission usually MEANS something (Neo:
196
+ * `influx_get_interface_counters` without `switch_name` = fabric-wide
197
+ * sweep) — but the model can only reason about leaving a param out if
198
+ * the description says so. No description at all, or one with no
199
+ * omission cue, gets a `warn`.
200
+ */
201
+ export function optionalParamRule(options = {}) {
202
+ const cues = options.omissionCues ?? DEFAULT_OMISSION_CUES;
203
+ return {
204
+ id: 'optional-param-undocumented',
205
+ check(tool) {
206
+ const findings = [];
207
+ const { properties, required } = readObjectSchema(tool);
208
+ for (const [param, prop] of properties) {
209
+ if (required.has(param))
210
+ continue;
211
+ const description = typeof prop.description === 'string' ? prop.description.trim() : '';
212
+ if (description.length === 0) {
213
+ findings.push({
214
+ rule: 'optional-param-undocumented',
215
+ tool: tool.name,
216
+ severity: 'warn',
217
+ param,
218
+ message: `optional param '${param}' has no description — say what happens when it is omitted (a default? a broader scope?)`,
219
+ });
220
+ }
221
+ else if (!cues.some((cue) => hasWholeWord(description, cue))) {
222
+ findings.push({
223
+ rule: 'optional-param-undocumented',
224
+ tool: tool.name,
225
+ severity: 'warn',
226
+ param,
227
+ message: `optional param '${param}' is described but never says what omission means — add e.g. "optional — defaults to …" / "omit for all …"`,
228
+ });
229
+ }
230
+ }
231
+ return findings;
232
+ },
233
+ };
234
+ }
235
+ // ── The default pack ─────────────────────────────────────────────────
236
+ /**
237
+ * OUR rule pack, built with default options. Compose your own:
238
+ *
239
+ * rules: [...defaultStructuralRules, myRule] // add
240
+ * rules: defaultStructuralRules.filter(r => r.id !== '…') // remove
241
+ * rules: [descriptionRule({ minChars: 80 }), …] // re-tune
242
+ */
243
+ export const defaultStructuralRules = [
244
+ descriptionRule(),
245
+ saysWhatNotWhenRule(),
246
+ enumInProseRule(),
247
+ optionalParamRule(),
248
+ ];
249
+ //# sourceMappingURL=rules.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rules.js","sourceRoot":"","sources":["../../../../src/lib/tool-lint/rules.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAYH;sEACsE;AACtE,SAAS,gBAAgB,CAAC,IAAiB;IAIzC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC;IAChC,MAAM,KAAK,GAAG,MAAM,EAAE,UAAU,CAAC;IACjC,MAAM,UAAU,GACd,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ;QACzC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,KAAgC,CAAC,CAAC,MAAM,CACrD,CAAC,KAAK,EAAqC,EAAE,CAC3C,KAAK,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,QAAQ,CACpD;QACH,CAAC,CAAC,EAAE,CAAC;IACT,MAAM,QAAQ,GAAG,IAAI,GAAG,CACtB,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAC5F,CAAC;IACF,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;AAClC,CAAC;AAED,SAAS,YAAY,CAAC,IAAY,EAAE,IAAY;IAC9C,OAAO,IAAI,MAAM,CAAC,kBAAkB,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,iBAAiB,CAAC,CAAC,IAAI,CACzF,IAAI,CAAC,WAAW,EAAE,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,YAAY,CAAC,CAAS;IAC7B,OAAO,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AAClD,CAAC;AASD;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,UAAkC,EAAE;IAClE,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IACxC,OAAO;QACL,EAAE,EAAE,8BAA8B;QAClC,KAAK,CAAC,IAAI;YACR,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YACnD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC7B,OAAO;oBACL;wBACE,IAAI,EAAE,8BAA8B;wBACpC,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,kEAAkE;qBAC5E;iBACF,CAAC;YACJ,CAAC;YACD,IAAI,WAAW,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;gBAClC,OAAO;oBACL;wBACE,IAAI,EAAE,8BAA8B;wBACpC,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,QAAQ,EAAE,MAAM;wBAChB,OAAO,EAAE,kBAAkB,WAAW,CAAC,MAAM,aAAa,QAAQ,4DAA4D;qBAC/H;iBACF,CAAC;YACJ,CAAC;YACD,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,wEAAwE;AAExE;mEACmE;AACnE,MAAM,CAAC,MAAM,iBAAiB,GAAsB;IAClD,KAAK;IACL,MAAM;IACN,OAAO;IACP,OAAO;IACP,UAAU;IACV,MAAM;CACP,CAAC;AAOF;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CAAC,UAAsC,EAAE;IAC1E,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,IAAI,iBAAiB,CAAC;IACpD,OAAO;QACL,EAAE,EAAE,oBAAoB;QACxB,KAAK,CAAC,IAAI;YACR,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YACnD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YACxC,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,YAAY,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;gBAAE,OAAO,EAAE,CAAC;YAClE,OAAO;gBACL;oBACE,IAAI,EAAE,oBAAoB;oBAC1B,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,QAAQ,EAAE,MAAM;oBAChB,OAAO,EACL,6EAA6E;wBAC7E,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,gCAAgC;wBAC1E,oDAAoD;iBACvD;aACF,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,wEAAwE;AAExE,MAAM,KAAK,GAAG,yBAAyB,CAAC;AACxC,2EAA2E;AAC3E,MAAM,SAAS,GAAG,IAAI,MAAM,CAAC,IAAI,KAAK,qBAAqB,KAAK,KAAK,CAAC,CAAC;AACvE;wEACwE;AACxE,MAAM,UAAU,GAAG,IAAI,MAAM,CAC3B,qEAAqE,KAAK,eAAe,KAAK,KAAK,EACnG,GAAG,CACJ,CAAC;AAEF;;;;;;;GAOG;AACH,MAAM,UAAU,eAAe;IAC7B,OAAO;QACL,EAAE,EAAE,eAAe;QACnB,KAAK,CAAC,IAAI;YACR,MAAM,QAAQ,GAAwB,EAAE,CAAC;YACzC,MAAM,EAAE,UAAU,EAAE,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;YAC9C,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,UAAU,EAAE,CAAC;gBACvC,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS;oBAAE,SAAS;gBACtC,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ;oBAAE,SAAS;gBAChE,MAAM,WAAW,GAAG,OAAO,IAAI,CAAC,WAAW,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC;gBACjF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;oBAAE,SAAS;gBACvC,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,CAAC;gBACnD,IAAI,QAAQ,KAAK,SAAS;oBAAE,SAAS;gBACrC,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,eAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,QAAQ,EAAE,MAAM;oBAChB,KAAK;oBACL,OAAO,EAAE,UAAU,KAAK,uCAAuC,WAAW,CAAC,KAAK,CAC9E,CAAC,EACD,EAAE,CACH,qEAAqE;oBACtE,UAAU,EAAE,WAAW,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE;iBAClD,CAAC,CAAC;YACL,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC;KACF,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAAC,WAAmB;IAC/C,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACzC,IAAI,IAAI,EAAE,CAAC;QACT,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACjD,CAAC;IACD,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC3C,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,wEAAwE;AAExE,sEAAsE;AACtE,MAAM,CAAC,MAAM,qBAAqB,GAAsB;IACtD,UAAU;IACV,SAAS;IACT,UAAU;IACV,MAAM;IACN,SAAS;IACT,QAAQ;IACR,UAAU;IACV,QAAQ;IACR,KAAK;IACL,QAAQ;IACR,OAAO;IACP,UAAU;CACX,CAAC;AAOF;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,UAAoC,EAAE;IACtE,MAAM,IAAI,GAAG,OAAO,CAAC,YAAY,IAAI,qBAAqB,CAAC;IAC3D,OAAO;QACL,EAAE,EAAE,6BAA6B;QACjC,KAAK,CAAC,IAAI;YACR,MAAM,QAAQ,GAAwB,EAAE,CAAC;YACzC,MAAM,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;YACxD,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,UAAU,EAAE,CAAC;gBACvC,IAAI,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC;oBAAE,SAAS;gBAClC,MAAM,WAAW,GAAG,OAAO,IAAI,CAAC,WAAW,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACxF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBAC7B,QAAQ,CAAC,IAAI,CAAC;wBACZ,IAAI,EAAE,6BAA6B;wBACnC,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,QAAQ,EAAE,MAAM;wBAChB,KAAK;wBACL,OAAO,EAAE,mBAAmB,KAAK,0FAA0F;qBAC5H,CAAC,CAAC;gBACL,CAAC;qBAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,YAAY,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC;oBAC/D,QAAQ,CAAC,IAAI,CAAC;wBACZ,IAAI,EAAE,6BAA6B;wBACnC,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,QAAQ,EAAE,MAAM;wBAChB,KAAK;wBACL,OAAO,EAAE,mBAAmB,KAAK,4GAA4G;qBAC9I,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC;KACF,CAAC;AACJ,CAAC;AAED,wEAAwE;AAExE;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAwB;IACzD,eAAe,EAAE;IACjB,mBAAmB,EAAE;IACrB,eAAe,EAAE;IACjB,iBAAiB,EAAE;CACpB,CAAC"}
@@ -0,0 +1,25 @@
1
+ /**
2
+ * tool-lint types — the tool-catalog confusability lint contract
3
+ * (RFC-002 tier 1, blocks C1–C3).
4
+ *
5
+ * Pattern: Strategy seam (the plug-and-play meta-pattern) — the frame
6
+ * and rule engine are the library's; the embedder, thresholds,
7
+ * and structural rule pack are all consumer-injected, with our
8
+ * defaults. Exactly like NarrativeFormatter / reliability /
9
+ * permission / commentary strategies.
10
+ * Role: `src/lib/` leaf module. ZERO stack buy-in: input is a plain
11
+ * `{ name, description?, inputSchema? }[]` — any OpenAI /
12
+ * Anthropic / LangChain / MCP tool list normalizes to it
13
+ * (see `coerceCatalog`). The library's own `Tool[]` adapts via
14
+ * `catalogFromTools`.
15
+ *
16
+ * ## Honest claim (RFC-002 §2)
17
+ *
18
+ * Confusability here is embedding geometry over what the model READS
19
+ * (tool name + description) — a deterministic heuristic for "could the
20
+ * model mix these up", never a measurement of any model's actual
21
+ * selection function. Tier 3 (choice-entropy sampling) validates the
22
+ * proxy; until then treat verdicts as review prompts, not ground truth.
23
+ */
24
+ export {};
25
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/lib/tool-lint/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG"}
@@ -70,4 +70,17 @@ export { adaptWeights, averageRelevancy, compositeScore, contentHash, DEFAULT_IN
70
70
  // exposed as TOOLS a debugging LLM calls over a COMPLETED run's artifacts.
71
71
  // Bounded, honest (⚠ markers), redaction-respecting, id-navigable.
72
72
  export { callTraceTool, TOOLPACK_HARD_CAPS, traceToolpack, } from './lib/trace-toolpack/index.js';
73
+ // Tool-catalog confusability lint (RFC-002 tier 1, C1–C3) — build-time,
74
+ // CI-gateable, framework-agnostic: plain { name, description?, inputSchema? }
75
+ // tools in (OpenAI/Anthropic/MCP lists coerce via coerceCatalog; the
76
+ // library's Tool[] via catalogFromTools), a report with a gateable `ok`
77
+ // out. Pluggable structural rule pack; thresholds + embedder consumer-
78
+ // injected with our defaults. Bin: `agentfootprint-lint-tools`.
79
+ // Front door: docs/guides/tool-catalog-lint.md.
80
+ export { analyzeToolCatalog, catalogFromTools, coerceCatalog, confusabilityText, DEFAULT_CONFUSABILITY_THRESHOLD, DEFAULT_OMISSION_CUES, DEFAULT_WATCH_BAND, DEFAULT_WHEN_CUES, defaultStructuralRules, descriptionRule, differentiationHint, enumInProseRule, formatToolCatalogReport, MOCK_EMBEDDER_CALIBRATION, optionalParamRule, runToolLintCli, saysWhatNotWhenRule, } from './lib/tool-lint/index.js';
81
+ // Tool-choice margin recorder (RFC-002 tier 2, C4–C6) — per LLM call,
82
+ // ranks the OFFERED catalog against the choice context (user message +
83
+ // latest assistant reasoning) via influence-core scoreMargin; embeds
84
+ // LAZILY on first read; flags narrow margins + proxy disagreements.
85
+ export { buildChoiceContext, toolChoiceRecorder, } from './recorders/observability/ToolChoiceRecorder.js';
73
86
  //# sourceMappingURL=observe.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"observe.js","sourceRoot":"","sources":["../../src/observe.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,4BAA4B;AAC5B,OAAO,EAAE,eAAe,EAA+B,MAAM,qCAAqC,CAAC;AACnG,OAAO,EAAE,cAAc,EAA8B,MAAM,oCAAoC,CAAC;AAEhG,+BAA+B;AAC/B,OAAO,EACL,mBAAmB,GAEpB,MAAM,yCAAyC,CAAC;AACjD,OAAO,EAAE,aAAa,EAA6B,MAAM,mCAAmC,CAAC;AAC7F,OAAO,EACL,gBAAgB,EAChB,gBAAgB,GAgBjB,MAAM,+CAA+C,CAAC;AACvD,OAAO,EACL,aAAa,EACb,eAAe,EACf,eAAe,GAQhB,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,eAAe,EACf,cAAc,GAQf,MAAM,gDAAgD,CAAC;AACxD,OAAO,EACL,iBAAiB,EACjB,iBAAiB,EACjB,cAAc,EACd,eAAe,EACf,oBAAoB,GAKrB,MAAM,gDAAgD,CAAC;AAExD,6BAA6B;AAC7B,OAAO,EAAE,YAAY,EAA4B,MAAM,kCAAkC,CAAC;AAC1F,OAAO,EAAE,aAAa,EAA6B,MAAM,mCAAmC,CAAC;AAC7F,OAAO,EACL,wBAAwB,GAEzB,MAAM,8CAA8C,CAAC;AACtD,OAAO,EAAE,YAAY,EAA4B,MAAM,kCAAkC,CAAC;AAC1F,OAAO,EAAE,cAAc,EAA8B,MAAM,oCAAoC,CAAC;AAChG,OAAO,EACL,kBAAkB,GAEnB,MAAM,wCAAwC,CAAC;AAChD,OAAO,EAAE,aAAa,EAA6B,MAAM,mCAAmC,CAAC;AAC7F,OAAO,EACL,aAAa,EACb,cAAc,GAIf,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,YAAY,GAGb,MAAM,6CAA6C,CAAC;AACrD,4EAA4E;AAC5E,gFAAgF;AAChF,OAAO,EACL,mBAAmB,GAMpB,MAAM,kDAAkD,CAAC;AAC1D,gFAAgF;AAChF,gFAAgF;AAChF,OAAO,EACL,kBAAkB,GAQnB,MAAM,yDAAyD,CAAC;AAEjE,uDAAuD;AACvD,OAAO,EAAE,SAAS,EAAE,MAAM,+BAA+B,CAAC;AAE1D,uEAAuE;AACvE,uEAAuE;AACvE,uEAAuE;AACvE,oEAAoE;AACpE,oEAAoE;AACpE,kEAAkE;AAClE,eAAe;AACf,OAAO,EACL,YAAY,EACZ,gBAAgB,EAChB,cAAc,EACd,WAAW,EACX,yBAAyB,EACzB,wBAAwB,EACxB,6BAA6B,EAC7B,cAAc,EACd,cAAc,EACd,qBAAqB,EACrB,kBAAkB,EAClB,WAAW,EACX,cAAc,EACd,WAAW,EACX,mBAAmB,GAiBpB,MAAM,+BAA+B,CAAC;AACvC,uEAAuE;AACvE,2EAA2E;AAC3E,mEAAmE;AACnE,OAAO,EACL,aAAa,EACb,kBAAkB,EAClB,aAAa,GAGd,MAAM,+BAA+B,CAAC"}
1
+ {"version":3,"file":"observe.js","sourceRoot":"","sources":["../../src/observe.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,4BAA4B;AAC5B,OAAO,EAAE,eAAe,EAA+B,MAAM,qCAAqC,CAAC;AACnG,OAAO,EAAE,cAAc,EAA8B,MAAM,oCAAoC,CAAC;AAEhG,+BAA+B;AAC/B,OAAO,EACL,mBAAmB,GAEpB,MAAM,yCAAyC,CAAC;AACjD,OAAO,EAAE,aAAa,EAA6B,MAAM,mCAAmC,CAAC;AAC7F,OAAO,EACL,gBAAgB,EAChB,gBAAgB,GAgBjB,MAAM,+CAA+C,CAAC;AACvD,OAAO,EACL,aAAa,EACb,eAAe,EACf,eAAe,GAQhB,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,eAAe,EACf,cAAc,GAQf,MAAM,gDAAgD,CAAC;AACxD,OAAO,EACL,iBAAiB,EACjB,iBAAiB,EACjB,cAAc,EACd,eAAe,EACf,oBAAoB,GAKrB,MAAM,gDAAgD,CAAC;AAExD,6BAA6B;AAC7B,OAAO,EAAE,YAAY,EAA4B,MAAM,kCAAkC,CAAC;AAC1F,OAAO,EAAE,aAAa,EAA6B,MAAM,mCAAmC,CAAC;AAC7F,OAAO,EACL,wBAAwB,GAEzB,MAAM,8CAA8C,CAAC;AACtD,OAAO,EAAE,YAAY,EAA4B,MAAM,kCAAkC,CAAC;AAC1F,OAAO,EAAE,cAAc,EAA8B,MAAM,oCAAoC,CAAC;AAChG,OAAO,EACL,kBAAkB,GAEnB,MAAM,wCAAwC,CAAC;AAChD,OAAO,EAAE,aAAa,EAA6B,MAAM,mCAAmC,CAAC;AAC7F,OAAO,EACL,aAAa,EACb,cAAc,GAIf,MAAM,8CAA8C,CAAC;AACtD,OAAO,EACL,YAAY,GAGb,MAAM,6CAA6C,CAAC;AACrD,4EAA4E;AAC5E,gFAAgF;AAChF,OAAO,EACL,mBAAmB,GAMpB,MAAM,kDAAkD,CAAC;AAC1D,gFAAgF;AAChF,gFAAgF;AAChF,OAAO,EACL,kBAAkB,GAQnB,MAAM,yDAAyD,CAAC;AAEjE,uDAAuD;AACvD,OAAO,EAAE,SAAS,EAAE,MAAM,+BAA+B,CAAC;AAE1D,uEAAuE;AACvE,uEAAuE;AACvE,uEAAuE;AACvE,oEAAoE;AACpE,oEAAoE;AACpE,kEAAkE;AAClE,eAAe;AACf,OAAO,EACL,YAAY,EACZ,gBAAgB,EAChB,cAAc,EACd,WAAW,EACX,yBAAyB,EACzB,wBAAwB,EACxB,6BAA6B,EAC7B,cAAc,EACd,cAAc,EACd,qBAAqB,EACrB,kBAAkB,EAClB,WAAW,EACX,cAAc,EACd,WAAW,EACX,mBAAmB,GAiBpB,MAAM,+BAA+B,CAAC;AACvC,uEAAuE;AACvE,2EAA2E;AAC3E,mEAAmE;AACnE,OAAO,EACL,aAAa,EACb,kBAAkB,EAClB,aAAa,GAGd,MAAM,+BAA+B,CAAC;AACvC,wEAAwE;AACxE,8EAA8E;AAC9E,qEAAqE;AACrE,wEAAwE;AACxE,uEAAuE;AACvE,gEAAgE;AAChE,gDAAgD;AAChD,OAAO,EACL,kBAAkB,EAClB,gBAAgB,EAChB,aAAa,EACb,iBAAiB,EACjB,+BAA+B,EAC/B,qBAAqB,EACrB,kBAAkB,EAClB,iBAAiB,EACjB,sBAAsB,EACtB,eAAe,EACf,mBAAmB,EACnB,eAAe,EACf,uBAAuB,EACvB,yBAAyB,EACzB,iBAAiB,EACjB,cAAc,EACd,mBAAmB,GAepB,MAAM,0BAA0B,CAAC;AAClC,sEAAsE;AACtE,uEAAuE;AACvE,qEAAqE;AACrE,oEAAoE;AACpE,OAAO,EACL,kBAAkB,EAClB,kBAAkB,GAOnB,MAAM,iDAAiD,CAAC"}
@@ -0,0 +1,261 @@
1
+ /**
2
+ * toolChoiceRecorder — runtime tool-choice margins (RFC-002 tier 2,
3
+ * blocks C4–C6).
4
+ *
5
+ * Per LLM call that OFFERED tools, this recorder captures the menu the
6
+ * model saw (`stream.llm_start.tools`), what it actually invoked
7
+ * (`stream.tool_start`), and the choice context — then, LAZILY on first
8
+ * read, ranks the offered candidates against that context via
9
+ * influence-core's `scoreMargin` (C4):
10
+ *
11
+ * margin = score(best chosen) − score(best non-chosen)
12
+ *
13
+ * Small margin (`narrow`, < `marginThreshold`, default 0.05) = the
14
+ * choice was a close call under the proxy. Top-scored candidate not
15
+ * among the chosen (`proxyDisagreement`) is ALWAYS flagged — either a
16
+ * proxy miss or a genuinely surprising model choice; both are exactly
17
+ * what a debugger wants surfaced.
18
+ *
19
+ * ## The choice context (C4 — what is embedded, precisely)
20
+ *
21
+ * `buildChoiceContext` assembles the SAME two slots the model's
22
+ * tool-selection reasoning ran on:
23
+ *
24
+ * INCLUDED
25
+ * 1. the user message of the current turn (`agent.turn_start.userPrompt`)
26
+ * — the task the model is choosing a tool FOR (first
27
+ * `maxSlotChars` chars: the head states the task);
28
+ * 2. the latest assistant reasoning text — the most recent
29
+ * `stream.llm_end.content` of this turn, when present (last
30
+ * `maxSlotChars` chars: the tail is where "what next" lives).
31
+ * Iteration 1 has no assistant text; the slot is omitted.
32
+ *
33
+ * EXCLUDED (deliberately)
34
+ * - the system prompt: constant across every call of the run — zero
35
+ * per-call discrimination, it only dilutes the embedding;
36
+ * - older history turns: recency dominates tool choice, and the full
37
+ * transcript grows the embedding cost linearly with run length;
38
+ * - raw tool results: the model reads them, but their distilled
39
+ * effect on the NEXT choice is the assistant's own reasoning text,
40
+ * which IS included; raw payloads skew the embedding toward data
41
+ * vocabulary (the honest-proxy discipline: mirror the
42
+ * decision-relevant text, not every visible byte);
43
+ * - tool schemas: those are the CANDIDATES being ranked, not context.
44
+ *
45
+ * Candidate text per offered tool = `confusabilityText` (tokenized name
46
+ * + description) — the SAME construction the catalog lint (C1) embeds,
47
+ * so build-time confusability and runtime margins measure one geometry.
48
+ *
49
+ * ## Laziness (C5)
50
+ *
51
+ * Event hooks only RECORD (string copies into a KeyedStore). The
52
+ * embedder runs on the first `getCalls()` / `getFlagged()` /
53
+ * `getSummary()` — embedding I/O NEVER rides the hot path, even when
54
+ * the recorder is attached inline. Scores memoize per entry. Attach
55
+ * with `{ delivery: 'deferred' }` (footprintjs RFC-001) to move the
56
+ * bookkeeping off the hot path too — it is a normal CombinedRecorder.
57
+ *
58
+ * Pattern: CombinedRecorder (Convention 1 — single purpose: tool-choice
59
+ * margin evidence). Owns a `KeyedStore<ToolChoiceEntry>` keyed
60
+ * by the LLM call's `runtimeStageId`. Convention 4: resets on a
61
+ * new `runId` via `FlowRecorder.onRunStart`.
62
+ * Role: Tier-3 /observe recorder. Attach via `Agent.create(...)
63
+ * .recorder(handle)` or `executor.attachCombinedRecorder`.
64
+ *
65
+ * Honest claim (RFC-002 §2): margins are embedding geometry between the
66
+ * context and tool descriptions — a deterministic PROXY for the model's
67
+ * selection function, never "the model chose because". Tier 3
68
+ * (choice-entropy sampling) validates the proxy.
69
+ */
70
+ import { KeyedStore } from 'footprintjs/trace';
71
+ import { DEFAULT_MARGIN_THRESHOLD, scoreMargin, } from '../../lib/influence-core/index.js';
72
+ import { confusabilityText } from '../../lib/tool-lint/analyze.js';
73
+ /** C4: the precise choice-context construction (see module JSDoc for the
74
+ * include/exclude rationale). Exported so consumers can reproduce it. */
75
+ export function buildChoiceContext(args) {
76
+ const max = args.maxSlotChars ?? 2000;
77
+ const user = `user: ${args.userPrompt.slice(0, max)}`;
78
+ const assistant = args.latestAssistantText?.trim();
79
+ if (assistant === undefined || assistant.length === 0)
80
+ return user;
81
+ return `${user}\n\nassistant: ${assistant.slice(-max)}`;
82
+ }
83
+ /** Build the tool-choice margin recorder (C5). */
84
+ export function toolChoiceRecorder(options) {
85
+ const marginThreshold = options.marginThreshold ?? DEFAULT_MARGIN_THRESHOLD;
86
+ const maxSlotChars = options.maxSlotChars ?? 2000;
87
+ const store = new KeyedStore();
88
+ let lastRunId;
89
+ let userPrompt = '';
90
+ let lastAssistantText;
91
+ let openKey;
92
+ const closeOpen = () => {
93
+ if (openKey === undefined)
94
+ return;
95
+ const open = store.get(openKey);
96
+ if (open)
97
+ open.closed = true;
98
+ openKey = undefined;
99
+ };
100
+ const reset = () => {
101
+ store.clear();
102
+ userPrompt = '';
103
+ lastAssistantText = undefined;
104
+ openKey = undefined;
105
+ };
106
+ /** Lazy scoring pass — the ONLY place the embedder runs. */
107
+ const ensureScored = async () => {
108
+ for (const entry of store.getMap().values()) {
109
+ if (!entry.closed || entry.margin !== undefined || entry.skipped !== undefined)
110
+ continue;
111
+ if (entry.chosen.length === 0) {
112
+ entry.skipped = 'nothing-chosen';
113
+ continue;
114
+ }
115
+ const offeredNames = new Set(entry.offered.map((tool) => tool.name));
116
+ if (!entry.chosen.every((name) => offeredNames.has(name))) {
117
+ entry.skipped = 'chosen-not-offered';
118
+ continue;
119
+ }
120
+ entry.margin = await scoreMargin({
121
+ candidates: entry.offered.map((tool) => ({
122
+ name: tool.name,
123
+ text: confusabilityText(tool),
124
+ })),
125
+ contextText: entry.contextText,
126
+ chosen: entry.chosen,
127
+ embedder: options.embedder,
128
+ marginThreshold,
129
+ });
130
+ }
131
+ };
132
+ const toCall = (entry) => ({
133
+ runtimeStageId: entry.runtimeStageId,
134
+ iteration: entry.iteration,
135
+ offered: entry.offered,
136
+ chosen: [...entry.chosen],
137
+ toolCallIds: [...entry.toolCallIds],
138
+ contextText: entry.contextText,
139
+ ...(entry.margin !== undefined ? { margin: entry.margin } : {}),
140
+ ...(entry.skipped !== undefined ? { skipped: entry.skipped } : {}),
141
+ });
142
+ return {
143
+ id: options.id ?? 'tool-choice',
144
+ onEmit(event) {
145
+ const payload = event.payload;
146
+ if (payload === null || typeof payload !== 'object')
147
+ return; // redacted or foreign
148
+ const p = payload;
149
+ switch (event.name) {
150
+ case 'agentfootprint.agent.turn_start': {
151
+ // New turn on the same run: fresh context slots.
152
+ closeOpen();
153
+ userPrompt = typeof p.userPrompt === 'string' ? p.userPrompt : '';
154
+ lastAssistantText = undefined;
155
+ break;
156
+ }
157
+ case 'agentfootprint.stream.llm_start': {
158
+ closeOpen();
159
+ const tools = Array.isArray(p.tools) ? p.tools : [];
160
+ if (tools.length === 0)
161
+ break; // no menu — nothing to confuse
162
+ store.set(event.runtimeStageId, {
163
+ runtimeStageId: event.runtimeStageId,
164
+ iteration: Number(p.iteration ?? 0),
165
+ offered: tools.map((tool) => ({
166
+ name: tool.name,
167
+ ...(tool.description !== undefined ? { description: tool.description } : {}),
168
+ })),
169
+ contextText: buildChoiceContext({
170
+ userPrompt,
171
+ ...(lastAssistantText !== undefined
172
+ ? { latestAssistantText: lastAssistantText }
173
+ : {}),
174
+ maxSlotChars,
175
+ }),
176
+ chosen: [],
177
+ toolCallIds: [],
178
+ closed: false,
179
+ });
180
+ openKey = event.runtimeStageId;
181
+ break;
182
+ }
183
+ case 'agentfootprint.stream.llm_end': {
184
+ if (typeof p.content === 'string' && p.content.length > 0) {
185
+ lastAssistantText = p.content;
186
+ }
187
+ break;
188
+ }
189
+ case 'agentfootprint.stream.tool_start': {
190
+ if (openKey === undefined)
191
+ break;
192
+ const entry = store.get(openKey);
193
+ if (entry === undefined)
194
+ break;
195
+ const name = String(p.toolName ?? 'unknown');
196
+ if (!entry.chosen.includes(name))
197
+ entry.chosen.push(name);
198
+ entry.toolCallIds.push(String(p.toolCallId ?? ''));
199
+ break;
200
+ }
201
+ case 'agentfootprint.agent.turn_end': {
202
+ closeOpen();
203
+ break;
204
+ }
205
+ default:
206
+ break;
207
+ }
208
+ },
209
+ /** Convention 4 — a new runId means a new run: reset accumulation so
210
+ * runtimeStageId keys (which restart per run) cannot collide. The
211
+ * executor also calls `clear()` before each `run()` — this hook is
212
+ * the detection that works regardless of attach surface. Same-
213
+ * executor `resume()` fires `onResume` (not `onRunStart`) and skips
214
+ * `clear()`, so pre-pause entries SURVIVE a resume by design. */
215
+ onRunStart(event) {
216
+ const runId = event.traversalContext?.runId;
217
+ if (runId !== undefined && runId !== lastRunId) {
218
+ reset();
219
+ lastRunId = runId;
220
+ }
221
+ },
222
+ onRunEnd() {
223
+ closeOpen();
224
+ },
225
+ onRunFailed() {
226
+ closeOpen();
227
+ },
228
+ async getCalls() {
229
+ await ensureScored();
230
+ return [...store.getMap().values()].map(toCall);
231
+ },
232
+ async getFlagged() {
233
+ await ensureScored();
234
+ return [...store.getMap().values()]
235
+ .filter((entry) => entry.margin !== undefined &&
236
+ (entry.margin.flags.narrow || entry.margin.flags.proxyDisagreement))
237
+ .map(toCall);
238
+ },
239
+ async getSummary() {
240
+ await ensureScored();
241
+ const entries = [...store.getMap().values()];
242
+ const scored = entries.filter((entry) => entry.margin !== undefined);
243
+ const narrow = scored.filter((entry) => entry.margin?.flags.narrow).length;
244
+ const proxyDisagreement = scored.filter((entry) => entry.margin?.flags.proxyDisagreement).length;
245
+ const flagged = scored.filter((entry) => entry.margin?.flags.narrow || entry.margin?.flags.proxyDisagreement).length;
246
+ return {
247
+ llmCallsWithTools: entries.length,
248
+ choices: entries.filter((entry) => entry.chosen.length > 0).length,
249
+ scored: scored.length,
250
+ flagged,
251
+ narrow,
252
+ proxyDisagreement,
253
+ skipped: entries.filter((entry) => entry.skipped !== undefined).length,
254
+ };
255
+ },
256
+ clear() {
257
+ reset();
258
+ },
259
+ };
260
+ }
261
+ //# sourceMappingURL=ToolChoiceRecorder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ToolChoiceRecorder.js","sourceRoot":"","sources":["../../../../src/recorders/observability/ToolChoiceRecorder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoEG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EACL,wBAAwB,EACxB,WAAW,GAGZ,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AA2FnE;0EAC0E;AAC1E,MAAM,UAAU,kBAAkB,CAAC,IAIlC;IACC,MAAM,GAAG,GAAG,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC;IACtC,MAAM,IAAI,GAAG,SAAS,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;IACtD,MAAM,SAAS,GAAG,IAAI,CAAC,mBAAmB,EAAE,IAAI,EAAE,CAAC;IACnD,IAAI,SAAS,KAAK,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACnE,OAAO,GAAG,IAAI,kBAAkB,SAAS,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;AAC1D,CAAC;AAgBD,kDAAkD;AAClD,MAAM,UAAU,kBAAkB,CAAC,OAAkC;IACnE,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,IAAI,wBAAwB,CAAC;IAC5E,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC;IAClD,MAAM,KAAK,GAAG,IAAI,UAAU,EAAmB,CAAC;IAEhD,IAAI,SAA6B,CAAC;IAClC,IAAI,UAAU,GAAG,EAAE,CAAC;IACpB,IAAI,iBAAqC,CAAC;IAC1C,IAAI,OAA2B,CAAC;IAEhC,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,IAAI,OAAO,KAAK,SAAS;YAAE,OAAO;QAClC,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAChC,IAAI,IAAI;YAAE,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;QAC7B,OAAO,GAAG,SAAS,CAAC;IACtB,CAAC,CAAC;IAEF,MAAM,KAAK,GAAG,GAAS,EAAE;QACvB,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,UAAU,GAAG,EAAE,CAAC;QAChB,iBAAiB,GAAG,SAAS,CAAC;QAC9B,OAAO,GAAG,SAAS,CAAC;IACtB,CAAC,CAAC;IAEF,4DAA4D;IAC5D,MAAM,YAAY,GAAG,KAAK,IAAmB,EAAE;QAC7C,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC;YAC5C,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,KAAK,SAAS,IAAI,KAAK,CAAC,OAAO,KAAK,SAAS;gBAAE,SAAS;YACzF,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC9B,KAAK,CAAC,OAAO,GAAG,gBAAgB,CAAC;gBACjC,SAAS;YACX,CAAC;YACD,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;YACrE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;gBAC1D,KAAK,CAAC,OAAO,GAAG,oBAAoB,CAAC;gBACrC,SAAS;YACX,CAAC;YACD,KAAK,CAAC,MAAM,GAAG,MAAM,WAAW,CAAC;gBAC/B,UAAU,EAAE,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;oBACvC,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,IAAI,EAAE,iBAAiB,CAAC,IAAI,CAAC;iBAC9B,CAAC,CAAC;gBACH,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,eAAe;aAChB,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC;IAEF,MAAM,MAAM,GAAG,CAAC,KAAsB,EAAkB,EAAE,CAAC,CAAC;QAC1D,cAAc,EAAE,KAAK,CAAC,cAAc;QACpC,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,MAAM,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC;QACzB,WAAW,EAAE,CAAC,GAAG,KAAK,CAAC,WAAW,CAAC;QACnC,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC/D,GAAG,CAAC,KAAK,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACnE,CAAC,CAAC;IAEH,OAAO;QACL,EAAE,EAAE,OAAO,CAAC,EAAE,IAAI,aAAa;QAE/B,MAAM,CAAC,KAAK;YACV,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC;YAC9B,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,OAAO,KAAK,QAAQ;gBAAE,OAAO,CAAC,sBAAsB;YACnF,MAAM,CAAC,GAAG,OAAkC,CAAC;YAC7C,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;gBACnB,KAAK,iCAAiC,CAAC,CAAC,CAAC;oBACvC,iDAAiD;oBACjD,SAAS,EAAE,CAAC;oBACZ,UAAU,GAAG,OAAO,CAAC,CAAC,UAAU,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;oBAClE,iBAAiB,GAAG,SAAS,CAAC;oBAC9B,MAAM;gBACR,CAAC;gBACD,KAAK,iCAAiC,CAAC,CAAC,CAAC;oBACvC,SAAS,EAAE,CAAC;oBACZ,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,KAAuB,CAAC,CAAC,CAAC,EAAE,CAAC;oBACvE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;wBAAE,MAAM,CAAC,+BAA+B;oBAC9D,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,cAAc,EAAE;wBAC9B,cAAc,EAAE,KAAK,CAAC,cAAc;wBACpC,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC;wBACnC,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;4BAC5B,IAAI,EAAE,IAAI,CAAC,IAAI;4BACf,GAAG,CAAC,IAAI,CAAC,WAAW,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;yBAC7E,CAAC,CAAC;wBACH,WAAW,EAAE,kBAAkB,CAAC;4BAC9B,UAAU;4BACV,GAAG,CAAC,iBAAiB,KAAK,SAAS;gCACjC,CAAC,CAAC,EAAE,mBAAmB,EAAE,iBAAiB,EAAE;gCAC5C,CAAC,CAAC,EAAE,CAAC;4BACP,YAAY;yBACb,CAAC;wBACF,MAAM,EAAE,EAAE;wBACV,WAAW,EAAE,EAAE;wBACf,MAAM,EAAE,KAAK;qBACd,CAAC,CAAC;oBACH,OAAO,GAAG,KAAK,CAAC,cAAc,CAAC;oBAC/B,MAAM;gBACR,CAAC;gBACD,KAAK,+BAA+B,CAAC,CAAC,CAAC;oBACrC,IAAI,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC1D,iBAAiB,GAAG,CAAC,CAAC,OAAO,CAAC;oBAChC,CAAC;oBACD,MAAM;gBACR,CAAC;gBACD,KAAK,kCAAkC,CAAC,CAAC,CAAC;oBACxC,IAAI,OAAO,KAAK,SAAS;wBAAE,MAAM;oBACjC,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;oBACjC,IAAI,KAAK,KAAK,SAAS;wBAAE,MAAM;oBAC/B,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,QAAQ,IAAI,SAAS,CAAC,CAAC;oBAC7C,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC;wBAAE,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAC1D,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,CAAC;oBACnD,MAAM;gBACR,CAAC;gBACD,KAAK,+BAA+B,CAAC,CAAC,CAAC;oBACrC,SAAS,EAAE,CAAC;oBACZ,MAAM;gBACR,CAAC;gBACD;oBACE,MAAM;YACV,CAAC;QACH,CAAC;QAED;;;;;0EAKkE;QAClE,UAAU,CAAC,KAAK;YACd,MAAM,KAAK,GAAG,KAAK,CAAC,gBAAgB,EAAE,KAAK,CAAC;YAC5C,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC/C,KAAK,EAAE,CAAC;gBACR,SAAS,GAAG,KAAK,CAAC;YACpB,CAAC;QACH,CAAC;QAED,QAAQ;YACN,SAAS,EAAE,CAAC;QACd,CAAC;QAED,WAAW;YACT,SAAS,EAAE,CAAC;QACd,CAAC;QAED,KAAK,CAAC,QAAQ;YACZ,MAAM,YAAY,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAClD,CAAC;QAED,KAAK,CAAC,UAAU;YACd,MAAM,YAAY,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC;iBAChC,MAAM,CACL,CAAC,KAAK,EAAE,EAAE,CACR,KAAK,CAAC,MAAM,KAAK,SAAS;gBAC1B,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,CACtE;iBACA,GAAG,CAAC,MAAM,CAAC,CAAC;QACjB,CAAC;QAED,KAAK,CAAC,UAAU;YACd,MAAM,YAAY,EAAE,CAAC;YACrB,MAAM,OAAO,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC;YAC7C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;YAC3E,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CACrC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,iBAAiB,CACjD,CAAC,MAAM,CAAC;YACT,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAC3B,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,iBAAiB,CAC/E,CAAC,MAAM,CAAC;YACT,OAAO;gBACL,iBAAiB,EAAE,OAAO,CAAC,MAAM;gBACjC,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM;gBAClE,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,OAAO;gBACP,MAAM;gBACN,iBAAiB;gBACjB,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,MAAM;aACvE,CAAC;QACJ,CAAC;QAED,KAAK;YACH,KAAK,EAAE,CAAC;QACV,CAAC;KACF,CAAC;AACJ,CAAC"}