agentfootprint 6.24.0 → 6.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +31 -0
  2. package/bin/agentfootprint-lint-tools.mjs +14 -0
  3. package/dist/esm/lib/context-bisect/ablation.js +183 -0
  4. package/dist/esm/lib/context-bisect/ablation.js.map +1 -0
  5. package/dist/esm/lib/context-bisect/bisect.js +129 -0
  6. package/dist/esm/lib/context-bisect/bisect.js.map +1 -0
  7. package/dist/esm/lib/context-bisect/index.js +22 -0
  8. package/dist/esm/lib/context-bisect/index.js.map +1 -0
  9. package/dist/esm/lib/context-bisect/llmEdgeWeigher.js +0 -0
  10. package/dist/esm/lib/context-bisect/llmEdgeWeigher.js.map +1 -0
  11. package/dist/esm/lib/context-bisect/localize.js +555 -0
  12. package/dist/esm/lib/context-bisect/localize.js.map +1 -0
  13. package/dist/esm/lib/context-bisect/types.js +56 -0
  14. package/dist/esm/lib/context-bisect/types.js.map +1 -0
  15. package/dist/esm/lib/tool-lint/analyze.js +235 -0
  16. package/dist/esm/lib/tool-lint/analyze.js.map +1 -0
  17. package/dist/esm/lib/tool-lint/cli.js +198 -0
  18. package/dist/esm/lib/tool-lint/cli.js.map +1 -0
  19. package/dist/esm/lib/tool-lint/format.js +61 -0
  20. package/dist/esm/lib/tool-lint/format.js.map +1 -0
  21. package/dist/esm/lib/tool-lint/index.js +23 -0
  22. package/dist/esm/lib/tool-lint/index.js.map +1 -0
  23. package/dist/esm/lib/tool-lint/rules.js +249 -0
  24. package/dist/esm/lib/tool-lint/rules.js.map +1 -0
  25. package/dist/esm/lib/tool-lint/types.js +25 -0
  26. package/dist/esm/lib/tool-lint/types.js.map +1 -0
  27. package/dist/esm/observe.js +20 -0
  28. package/dist/esm/observe.js.map +1 -1
  29. package/dist/esm/recorders/observability/ToolChoiceRecorder.js +261 -0
  30. package/dist/esm/recorders/observability/ToolChoiceRecorder.js.map +1 -0
  31. package/dist/lib/context-bisect/ablation.js +192 -0
  32. package/dist/lib/context-bisect/ablation.js.map +1 -0
  33. package/dist/lib/context-bisect/bisect.js +133 -0
  34. package/dist/lib/context-bisect/bisect.js.map +1 -0
  35. package/dist/lib/context-bisect/index.js +40 -0
  36. package/dist/lib/context-bisect/index.js.map +1 -0
  37. package/dist/lib/context-bisect/llmEdgeWeigher.js +0 -0
  38. package/dist/lib/context-bisect/llmEdgeWeigher.js.map +1 -0
  39. package/dist/lib/context-bisect/localize.js +563 -0
  40. package/dist/lib/context-bisect/localize.js.map +1 -0
  41. package/dist/lib/context-bisect/types.js +59 -0
  42. package/dist/lib/context-bisect/types.js.map +1 -0
  43. package/dist/lib/tool-lint/analyze.js +242 -0
  44. package/dist/lib/tool-lint/analyze.js.map +1 -0
  45. package/dist/lib/tool-lint/cli.js +203 -0
  46. package/dist/lib/tool-lint/cli.js.map +1 -0
  47. package/dist/lib/tool-lint/format.js +65 -0
  48. package/dist/lib/tool-lint/format.js.map +1 -0
  49. package/dist/lib/tool-lint/index.js +43 -0
  50. package/dist/lib/tool-lint/index.js.map +1 -0
  51. package/dist/lib/tool-lint/rules.js +256 -0
  52. package/dist/lib/tool-lint/rules.js.map +1 -0
  53. package/dist/lib/tool-lint/types.js +26 -0
  54. package/dist/lib/tool-lint/types.js.map +1 -0
  55. package/dist/observe.js +56 -1
  56. package/dist/observe.js.map +1 -1
  57. package/dist/recorders/observability/ToolChoiceRecorder.js +266 -0
  58. package/dist/recorders/observability/ToolChoiceRecorder.js.map +1 -0
  59. package/dist/types/lib/context-bisect/ablation.d.ts +97 -0
  60. package/dist/types/lib/context-bisect/ablation.d.ts.map +1 -0
  61. package/dist/types/lib/context-bisect/bisect.d.ts +76 -0
  62. package/dist/types/lib/context-bisect/bisect.d.ts.map +1 -0
  63. package/dist/types/lib/context-bisect/index.d.ts +22 -0
  64. package/dist/types/lib/context-bisect/index.d.ts.map +1 -0
  65. package/dist/types/lib/context-bisect/llmEdgeWeigher.d.ts +125 -0
  66. package/dist/types/lib/context-bisect/llmEdgeWeigher.d.ts.map +1 -0
  67. package/dist/types/lib/context-bisect/localize.d.ts +119 -0
  68. package/dist/types/lib/context-bisect/localize.d.ts.map +1 -0
  69. package/dist/types/lib/context-bisect/types.d.ts +356 -0
  70. package/dist/types/lib/context-bisect/types.d.ts.map +1 -0
  71. package/dist/types/lib/tool-lint/analyze.d.ts +84 -0
  72. package/dist/types/lib/tool-lint/analyze.d.ts.map +1 -0
  73. package/dist/types/lib/tool-lint/cli.d.ts +44 -0
  74. package/dist/types/lib/tool-lint/cli.d.ts.map +1 -0
  75. package/dist/types/lib/tool-lint/format.d.ts +19 -0
  76. package/dist/types/lib/tool-lint/format.d.ts.map +1 -0
  77. package/dist/types/lib/tool-lint/index.d.ts +24 -0
  78. package/dist/types/lib/tool-lint/index.d.ts.map +1 -0
  79. package/dist/types/lib/tool-lint/rules.d.ts +86 -0
  80. package/dist/types/lib/tool-lint/rules.d.ts.map +1 -0
  81. package/dist/types/lib/tool-lint/types.d.ts +156 -0
  82. package/dist/types/lib/tool-lint/types.d.ts.map +1 -0
  83. package/dist/types/observe.d.ts +3 -0
  84. package/dist/types/observe.d.ts.map +1 -1
  85. package/dist/types/recorders/observability/ToolChoiceRecorder.d.ts +165 -0
  86. package/dist/types/recorders/observability/ToolChoiceRecorder.d.ts.map +1 -0
  87. package/package.json +4 -2
@@ -0,0 +1,256 @@
1
+ "use strict";
2
+ /**
3
+ * Structural lint rules (RFC-002 block C2) — the PLUGGABLE RULE PACK.
4
+ *
5
+ * Pattern: Strategy list — each rule is a plain `{ id, check }` object;
6
+ * `defaultStructuralRules` is OUR pack, and consumers add /
7
+ * remove / replace freely via `AnalyzeToolCatalogOptions.rules`.
8
+ * Parameterizable rules ship as FACTORIES (`descriptionRule`,
9
+ * `saysWhatNotWhenRule`, …) returning a configured `LintRule`.
10
+ * Role: `src/lib/tool-lint/` leaf. Pure functions over `CatalogTool`;
11
+ * no embedder, no I/O.
12
+ *
13
+ * Every rule encodes a FIELD FINDING from real catalogs (the Neo SAN
14
+ * triage agent's 29-tool catalog was the seed corpus):
15
+ *
16
+ * 1. description-missing-or-short — the model can only guess from a name.
17
+ * 2. says-what-not-when — describes WHAT the tool returns but gives the
18
+ * model no cue for WHEN to pick it over a sibling (the #1 cause of
19
+ * twin-tool confusion: 'get_fcns_database' vs 'influx_get_fcns_database').
20
+ * 3. enum-in-prose — string params whose legal values are listed in prose
21
+ * ("avg_iops | peak_iops | mbps") instead of a JSON-Schema `enum` the
22
+ * model (and validators, see #9 tool-args validation) can act on.
23
+ * 4. optional-param-undocumented — optional params whose omission has
24
+ * meaning (fabric-wide sweep vs one switch) but whose schema never
25
+ * says so; the model can't reason about leaving them out.
26
+ *
27
+ * Honest claim: these are token/regex HEURISTICS. They flag review
28
+ * prompts, not certainties — expect (rare) false positives and tune via
29
+ * the factory options instead of deleting the rule.
30
+ */
31
+ Object.defineProperty(exports, "__esModule", { value: true });
32
+ exports.defaultStructuralRules = exports.optionalParamRule = exports.DEFAULT_OMISSION_CUES = exports.enumInProseRule = exports.saysWhatNotWhenRule = exports.DEFAULT_WHEN_CUES = exports.descriptionRule = void 0;
33
+ /** Read `properties` / `required` out of a JSON-Schema-ish inputSchema,
34
+ * tolerating absent or malformed shapes (rules must never throw). */
35
+ function readObjectSchema(tool) {
36
+ const schema = tool.inputSchema;
37
+ const props = schema?.properties;
38
+ const properties = props !== null && typeof props === 'object'
39
+ ? Object.entries(props).filter((entry) => entry[1] !== null && typeof entry[1] === 'object')
40
+ : [];
41
+ const required = new Set(Array.isArray(schema?.required) ? schema.required.filter((r) => typeof r === 'string') : []);
42
+ return { properties, required };
43
+ }
44
+ function hasWholeWord(text, word) {
45
+ return new RegExp(`(?:^|[^a-z0-9])${escapeRegExp(word.toLowerCase())}(?:[^a-z0-9]|$)`).test(text.toLowerCase());
46
+ }
47
+ function escapeRegExp(s) {
48
+ return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
49
+ }
50
+ /**
51
+ * Missing description → `error` (the model can only guess from the
52
+ * name). Present but shorter than `minChars` → `warn` (too short to
53
+ * differentiate from siblings).
54
+ */
55
+ function descriptionRule(options = {}) {
56
+ const minChars = options.minChars ?? 40;
57
+ return {
58
+ id: 'description-missing-or-short',
59
+ check(tool) {
60
+ const description = tool.description?.trim() ?? '';
61
+ if (description.length === 0) {
62
+ return [
63
+ {
64
+ rule: 'description-missing-or-short',
65
+ tool: tool.name,
66
+ severity: 'error',
67
+ message: 'tool has no description — the model can only guess from the name',
68
+ },
69
+ ];
70
+ }
71
+ if (description.length < minChars) {
72
+ return [
73
+ {
74
+ rule: 'description-missing-or-short',
75
+ tool: tool.name,
76
+ severity: 'warn',
77
+ message: `description is ${description.length} chars (< ${minChars}) — too short to differentiate this tool from its siblings`,
78
+ },
79
+ ];
80
+ }
81
+ return [];
82
+ },
83
+ };
84
+ }
85
+ exports.descriptionRule = descriptionRule;
86
+ // ── Rule 2 — says WHAT, not WHEN ─────────────────────────────────────
87
+ /** RFC-002 C2 heuristic cue list — temporal/conditional words whose
88
+ * presence suggests the description says WHEN to use the tool. */
89
+ exports.DEFAULT_WHEN_CUES = [
90
+ 'for',
91
+ 'when',
92
+ 'after',
93
+ 'first',
94
+ 'fallback',
95
+ 'only',
96
+ ];
97
+ /**
98
+ * A description with NO temporal/conditional cue token usually describes
99
+ * WHAT the tool returns but never WHEN to pick it — the #1 cause of
100
+ * twin-tool confusion. Heuristic by design: tune `cueTokens` rather than
101
+ * dropping the rule. Skips tools with no description (rule 1's finding).
102
+ */
103
+ function saysWhatNotWhenRule(options = {}) {
104
+ const cues = options.cueTokens ?? exports.DEFAULT_WHEN_CUES;
105
+ return {
106
+ id: 'says-what-not-when',
107
+ check(tool) {
108
+ const description = tool.description?.trim() ?? '';
109
+ if (description.length === 0)
110
+ return [];
111
+ if (cues.some((cue) => hasWholeWord(description, cue)))
112
+ return [];
113
+ return [
114
+ {
115
+ rule: 'says-what-not-when',
116
+ tool: tool.name,
117
+ severity: 'warn',
118
+ message: 'description says WHAT the tool returns but gives no cue for WHEN to use it ' +
119
+ `(no ${cues.map((c) => `'${c}'`).join('/')}) — add the choice condition, ` +
120
+ 'e.g. "Use when …" / "Call FIRST" / "FALLBACK if …"',
121
+ },
122
+ ];
123
+ },
124
+ };
125
+ }
126
+ exports.saysWhatNotWhenRule = saysWhatNotWhenRule;
127
+ // ── Rule 3 — enum described in prose ─────────────────────────────────
128
+ const IDENT = '[A-Za-z][A-Za-z0-9_.-]*';
129
+ /** `avg_iops | peak_iops | mbps` — two or more pipe-separated literals. */
130
+ const PIPE_LIST = new RegExp(`(${IDENT})(?:\\s*\\|\\s*(?:${IDENT}))+`);
131
+ /** `one of: red, green, blue` — comma lists only behind an explicit
132
+ * values marker, so free-form examples ("e.g. 1h, 24h") don't flag. */
133
+ const COMMA_LIST = new RegExp(`(?:one of|allowed values?|valid values?|options|values)\\s*:?\\s*(${IDENT}(?:\\s*,\\s*${IDENT})+)`, 'i');
134
+ /**
135
+ * A string param whose description enumerates its legal values in prose
136
+ * (pipe-separated literals, or comma lists behind "one of"/"allowed
137
+ * values") should declare a JSON-Schema `enum` instead — the model picks
138
+ * reliably from enums, and arg validators (#9) can enforce them. The
139
+ * field case: Neo's `influx_get_port_ranking.metric` =
140
+ * `"avg_iops | peak_iops | mbps"`.
141
+ */
142
+ function enumInProseRule() {
143
+ return {
144
+ id: 'enum-in-prose',
145
+ check(tool) {
146
+ const findings = [];
147
+ const { properties } = readObjectSchema(tool);
148
+ for (const [param, prop] of properties) {
149
+ if (prop.enum !== undefined)
150
+ continue;
151
+ if (prop.type !== undefined && prop.type !== 'string')
152
+ continue;
153
+ const description = typeof prop.description === 'string' ? prop.description : '';
154
+ if (description.length === 0)
155
+ continue;
156
+ const literals = extractProseLiterals(description);
157
+ if (literals === undefined)
158
+ continue;
159
+ findings.push({
160
+ rule: 'enum-in-prose',
161
+ tool: tool.name,
162
+ severity: 'warn',
163
+ param,
164
+ message: `param '${param}' lists its legal values in prose ("${description.slice(0, 80)}") — declare them as a JSON-Schema enum so the model picks reliably`,
165
+ suggestion: `"enum": ${JSON.stringify(literals)}`,
166
+ });
167
+ }
168
+ return findings;
169
+ },
170
+ };
171
+ }
172
+ exports.enumInProseRule = enumInProseRule;
173
+ function extractProseLiterals(description) {
174
+ const pipe = PIPE_LIST.exec(description);
175
+ if (pipe) {
176
+ return pipe[0].split('|').map((v) => v.trim());
177
+ }
178
+ const comma = COMMA_LIST.exec(description);
179
+ if (comma) {
180
+ return comma[1].split(',').map((v) => v.trim());
181
+ }
182
+ return undefined;
183
+ }
184
+ // ── Rule 4 — optional param whose omission is undocumented ───────────
185
+ /** Words that signal the description DOES say what omission means. */
186
+ exports.DEFAULT_OMISSION_CUES = [
187
+ 'optional',
188
+ 'default',
189
+ 'defaults',
190
+ 'omit',
191
+ 'omitted',
192
+ 'if not',
193
+ 'when not',
194
+ 'absent',
195
+ 'all',
196
+ 'entire',
197
+ 'every',
198
+ 'fallback',
199
+ ];
200
+ /**
201
+ * An optional param's omission usually MEANS something (Neo:
202
+ * `influx_get_interface_counters` without `switch_name` = fabric-wide
203
+ * sweep) — but the model can only reason about leaving a param out if
204
+ * the description says so. No description at all, or one with no
205
+ * omission cue, gets a `warn`.
206
+ */
207
+ function optionalParamRule(options = {}) {
208
+ const cues = options.omissionCues ?? exports.DEFAULT_OMISSION_CUES;
209
+ return {
210
+ id: 'optional-param-undocumented',
211
+ check(tool) {
212
+ const findings = [];
213
+ const { properties, required } = readObjectSchema(tool);
214
+ for (const [param, prop] of properties) {
215
+ if (required.has(param))
216
+ continue;
217
+ const description = typeof prop.description === 'string' ? prop.description.trim() : '';
218
+ if (description.length === 0) {
219
+ findings.push({
220
+ rule: 'optional-param-undocumented',
221
+ tool: tool.name,
222
+ severity: 'warn',
223
+ param,
224
+ message: `optional param '${param}' has no description — say what happens when it is omitted (a default? a broader scope?)`,
225
+ });
226
+ }
227
+ else if (!cues.some((cue) => hasWholeWord(description, cue))) {
228
+ findings.push({
229
+ rule: 'optional-param-undocumented',
230
+ tool: tool.name,
231
+ severity: 'warn',
232
+ param,
233
+ message: `optional param '${param}' is described but never says what omission means — add e.g. "optional — defaults to …" / "omit for all …"`,
234
+ });
235
+ }
236
+ }
237
+ return findings;
238
+ },
239
+ };
240
+ }
241
+ exports.optionalParamRule = optionalParamRule;
242
+ // ── The default pack ─────────────────────────────────────────────────
243
+ /**
244
+ * OUR rule pack, built with default options. Compose your own:
245
+ *
246
+ * rules: [...defaultStructuralRules, myRule] // add
247
+ * rules: defaultStructuralRules.filter(r => r.id !== '…') // remove
248
+ * rules: [descriptionRule({ minChars: 80 }), …] // re-tune
249
+ */
250
+ exports.defaultStructuralRules = [
251
+ descriptionRule(),
252
+ saysWhatNotWhenRule(),
253
+ enumInProseRule(),
254
+ optionalParamRule(),
255
+ ];
256
+ //# sourceMappingURL=rules.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rules.js","sourceRoot":"","sources":["../../../src/lib/tool-lint/rules.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;;;AAYH;sEACsE;AACtE,SAAS,gBAAgB,CAAC,IAAiB;IAIzC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC;IAChC,MAAM,KAAK,GAAG,MAAM,EAAE,UAAU,CAAC;IACjC,MAAM,UAAU,GACd,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ;QACzC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,KAAgC,CAAC,CAAC,MAAM,CACrD,CAAC,KAAK,EAAqC,EAAE,CAC3C,KAAK,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,QAAQ,CACpD;QACH,CAAC,CAAC,EAAE,CAAC;IACT,MAAM,QAAQ,GAAG,IAAI,GAAG,CACtB,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAC5F,CAAC;IACF,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;AAClC,CAAC;AAED,SAAS,YAAY,CAAC,IAAY,EAAE,IAAY;IAC9C,OAAO,IAAI,MAAM,CAAC,kBAAkB,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,iBAAiB,CAAC,CAAC,IAAI,CACzF,IAAI,CAAC,WAAW,EAAE,CACnB,CAAC;AACJ,CAAC;AAED,SAAS,YAAY,CAAC,CAAS;IAC7B,OAAO,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AAClD,CAAC;AASD;;;;GAIG;AACH,SAAgB,eAAe,CAAC,UAAkC,EAAE;IAClE,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IACxC,OAAO;QACL,EAAE,EAAE,8BAA8B;QAClC,KAAK,CAAC,IAAI;YACR,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YACnD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC7B,OAAO;oBACL;wBACE,IAAI,EAAE,8BAA8B;wBACpC,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,kEAAkE;qBAC5E;iBACF,CAAC;YACJ,CAAC;YACD,IAAI,WAAW,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;gBAClC,OAAO;oBACL;wBACE,IAAI,EAAE,8BAA8B;wBACpC,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,QAAQ,EAAE,MAAM;wBAChB,OAAO,EAAE,kBAAkB,WAAW,CAAC,MAAM,aAAa,QAAQ,4DAA4D;qBAC/H;iBACF,CAAC;YACJ,CAAC;YACD,OAAO,EAAE,CAAC;QACZ,CAAC;KACF,CAAC;AACJ,CAAC;AA7BD,0CA6BC;AAED,wEAAwE;AAExE;mEACmE;AACtD,QAAA,iBAAiB,GAAsB;IAClD,KAAK;IACL,MAAM;IACN,OAAO;IACP,OAAO;IACP,UAAU;IACV,MAAM;CACP,CAAC;AAOF;;;;;GAKG;AACH,SAAgB,mBAAmB,CAAC,UAAsC,EAAE;IAC1E,MAAM,IAAI,GAAG,OAAO,CAAC,SAAS,IAAI,yBAAiB,CAAC;IACpD,OAAO;QACL,EAAE,EAAE,oBAAoB;QACxB,KAAK,CAAC,IAAI;YACR,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YACnD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YACxC,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,YAAY,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC;gBAAE,OAAO,EAAE,CAAC;YAClE,OAAO;gBACL;oBACE,IAAI,EAAE,oBAAoB;oBAC1B,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,QAAQ,EAAE,MAAM;oBAChB,OAAO,EACL,6EAA6E;wBAC7E,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,gCAAgC;wBAC1E,oDAAoD;iBACvD;aACF,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AArBD,kDAqBC;AAED,wEAAwE;AAExE,MAAM,KAAK,GAAG,yBAAyB,CAAC;AACxC,2EAA2E;AAC3E,MAAM,SAAS,GAAG,IAAI,MAAM,CAAC,IAAI,KAAK,qBAAqB,KAAK,KAAK,CAAC,CAAC;AACvE;wEACwE;AACxE,MAAM,UAAU,GAAG,IAAI,MAAM,CAC3B,qEAAqE,KAAK,eAAe,KAAK,KAAK,EACnG,GAAG,CACJ,CAAC;AAEF;;;;;;;GAOG;AACH,SAAgB,eAAe;IAC7B,OAAO;QACL,EAAE,EAAE,eAAe;QACnB,KAAK,CAAC,IAAI;YACR,MAAM,QAAQ,GAAwB,EAAE,CAAC;YACzC,MAAM,EAAE,UAAU,EAAE,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;YAC9C,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,UAAU,EAAE,CAAC;gBACvC,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS;oBAAE,SAAS;gBACtC,IAAI,IAAI,CAAC,IAAI,KAAK,SAAS,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ;oBAAE,SAAS;gBAChE,MAAM,WAAW,GAAG,OAAO,IAAI,CAAC,WAAW,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC;gBACjF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;oBAAE,SAAS;gBACvC,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,CAAC;gBACnD,IAAI,QAAQ,KAAK,SAAS;oBAAE,SAAS;gBACrC,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,eAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,QAAQ,EAAE,MAAM;oBAChB,KAAK;oBACL,OAAO,EAAE,UAAU,KAAK,uCAAuC,WAAW,CAAC,KAAK,CAC9E,CAAC,EACD,EAAE,CACH,qEAAqE;oBACtE,UAAU,EAAE,WAAW,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE;iBAClD,CAAC,CAAC;YACL,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC;KACF,CAAC;AACJ,CAAC;AA5BD,0CA4BC;AAED,SAAS,oBAAoB,CAAC,WAAmB;IAC/C,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACzC,IAAI,IAAI,EAAE,CAAC;QACT,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACjD,CAAC;IACD,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC3C,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,wEAAwE;AAExE,sEAAsE;AACzD,QAAA,qBAAqB,GAAsB;IACtD,UAAU;IACV,SAAS;IACT,UAAU;IACV,MAAM;IACN,SAAS;IACT,QAAQ;IACR,UAAU;IACV,QAAQ;IACR,KAAK;IACL,QAAQ;IACR,OAAO;IACP,UAAU;CACX,CAAC;AAOF;;;;;;GAMG;AACH,SAAgB,iBAAiB,CAAC,UAAoC,EAAE;IACtE,MAAM,IAAI,GAAG,OAAO,CAAC,YAAY,IAAI,6BAAqB,CAAC;IAC3D,OAAO;QACL,EAAE,EAAE,6BAA6B;QACjC,KAAK,CAAC,IAAI;YACR,MAAM,QAAQ,GAAwB,EAAE,CAAC;YACzC,MAAM,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;YACxD,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,UAAU,EAAE,CAAC;gBACvC,IAAI,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC;oBAAE,SAAS;gBAClC,MAAM,WAAW,GAAG,OAAO,IAAI,CAAC,WAAW,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACxF,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBAC7B,QAAQ,CAAC,IAAI,CAAC;wBACZ,IAAI,EAAE,6BAA6B;wBACnC,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,QAAQ,EAAE,MAAM;wBAChB,KAAK;wBACL,OAAO,EAAE,mBAAmB,KAAK,0FAA0F;qBAC5H,CAAC,CAAC;gBACL,CAAC;qBAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,YAAY,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC;oBAC/D,QAAQ,CAAC,IAAI,CAAC;wBACZ,IAAI,EAAE,6BAA6B;wBACnC,IAAI,EAAE,IAAI,CAAC,IAAI;wBACf,QAAQ,EAAE,MAAM;wBAChB,KAAK;wBACL,OAAO,EAAE,mBAAmB,KAAK,4GAA4G;qBAC9I,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC;KACF,CAAC;AACJ,CAAC;AA/BD,8CA+BC;AAED,wEAAwE;AAExE;;;;;;GAMG;AACU,QAAA,sBAAsB,GAAwB;IACzD,eAAe,EAAE;IACjB,mBAAmB,EAAE;IACrB,eAAe,EAAE;IACjB,iBAAiB,EAAE;CACpB,CAAC"}
@@ -0,0 +1,26 @@
1
+ "use strict";
2
+ /**
3
+ * tool-lint types — the tool-catalog confusability lint contract
4
+ * (RFC-002 tier 1, blocks C1–C3).
5
+ *
6
+ * Pattern: Strategy seam (the plug-and-play meta-pattern) — the frame
7
+ * and rule engine are the library's; the embedder, thresholds,
8
+ * and structural rule pack are all consumer-injected, with our
9
+ * defaults. Exactly like NarrativeFormatter / reliability /
10
+ * permission / commentary strategies.
11
+ * Role: `src/lib/` leaf module. ZERO stack buy-in: input is a plain
12
+ * `{ name, description?, inputSchema? }[]` — any OpenAI /
13
+ * Anthropic / LangChain / MCP tool list normalizes to it
14
+ * (see `coerceCatalog`). The library's own `Tool[]` adapts via
15
+ * `catalogFromTools`.
16
+ *
17
+ * ## Honest claim (RFC-002 §2)
18
+ *
19
+ * Confusability here is embedding geometry over what the model READS
20
+ * (tool name + description) — a deterministic heuristic for "could the
21
+ * model mix these up", never a measurement of any model's actual
22
+ * selection function. Tier 3 (choice-entropy sampling) validates the
23
+ * proxy; until then treat verdicts as review prompts, not ground truth.
24
+ */
25
+ Object.defineProperty(exports, "__esModule", { value: true });
26
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/tool-lint/types.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG"}
package/dist/observe.js CHANGED
@@ -32,7 +32,8 @@
32
32
  * directly; Tier 3 dashboards are opt-in.
33
33
  */
34
34
  Object.defineProperty(exports, "__esModule", { value: true });
35
- exports.traceToolpack = exports.TOOLPACK_HARD_CAPS = exports.callTraceTool = exports.structuralProximity = exports.scoreMargin = exports.scoreInfluence = exports.persistence = exports.pairwiseSimilarity = exports.finalAnswerSimilarity = exports.embeddingCache = exports.EmbeddingCache = exports.DEFAULT_PERSISTENCE_THRESHOLD = exports.DEFAULT_MARGIN_THRESHOLD = exports.DEFAULT_INFLUENCE_WEIGHTS = exports.contentHash = exports.compositeScore = exports.averageRelevancy = exports.adaptWeights = exports.typedEmit = exports.agentThinkingTrace = exports.toolLineageRecorder = exports.attachStatus = exports.LoggingDomains = exports.attachLogging = exports.skillRecorder = exports.permissionRecorder = exports.memoryRecorder = exports.evalRecorder = exports.contextEvaluatedRecorder = exports.toolsRecorder = exports.costRecorder = exports.LiveAgentTurnTracker = exports.LiveToolTracker = exports.LiveLLMTracker = exports.LiveStateRecorder = exports.liveStateRecorder = exports.buildStepGraph = exports.attachFlowchart = exports.runStepRecorder = exports.RunStepRecorder = exports.buildRunSteps = exports.BoundaryRecorder = exports.boundaryRecorder = exports.agentRecorder = exports.compositionRecorder = exports.streamRecorder = exports.ContextRecorder = void 0;
35
+ exports.bisectCulprits = exports.applyAblations = exports.ablationForSuspect = exports.traceToolpack = exports.TOOLPACK_HARD_CAPS = exports.callTraceTool = exports.structuralProximity = exports.scoreMargin = exports.scoreInfluence = exports.persistence = exports.pairwiseSimilarity = exports.finalAnswerSimilarity = exports.embeddingCache = exports.EmbeddingCache = exports.DEFAULT_PERSISTENCE_THRESHOLD = exports.DEFAULT_MARGIN_THRESHOLD = exports.DEFAULT_INFLUENCE_WEIGHTS = exports.contentHash = exports.compositeScore = exports.averageRelevancy = exports.adaptWeights = exports.typedEmit = exports.agentThinkingTrace = exports.toolLineageRecorder = exports.attachStatus = exports.LoggingDomains = exports.attachLogging = exports.skillRecorder = exports.permissionRecorder = exports.memoryRecorder = exports.evalRecorder = exports.contextEvaluatedRecorder = exports.toolsRecorder = exports.costRecorder = exports.LiveAgentTurnTracker = exports.LiveToolTracker = exports.LiveLLMTracker = exports.LiveStateRecorder = exports.liveStateRecorder = exports.buildStepGraph = exports.attachFlowchart = exports.runStepRecorder = exports.RunStepRecorder = exports.buildRunSteps = exports.BoundaryRecorder = exports.boundaryRecorder = exports.agentRecorder = exports.compositionRecorder = exports.streamRecorder = exports.ContextRecorder = void 0;
36
+ exports.toolChoiceRecorder = exports.buildChoiceContext = exports.saysWhatNotWhenRule = exports.runToolLintCli = exports.optionalParamRule = exports.MOCK_EMBEDDER_CALIBRATION = exports.formatToolCatalogReport = exports.enumInProseRule = exports.differentiationHint = exports.descriptionRule = exports.defaultStructuralRules = exports.DEFAULT_WHEN_CUES = exports.DEFAULT_WATCH_BAND = exports.DEFAULT_OMISSION_CUES = exports.DEFAULT_CONFUSABILITY_THRESHOLD = exports.confusabilityText = exports.coerceCatalog = exports.catalogFromTools = exports.analyzeToolCatalog = exports.verdictFor = exports.suspectLabel = exports.stepOutputText = exports.runAblationProbe = exports.probeFlipped = exports.localizeContextBug = exports.llmEdgeWeigher = exports.llmCallIdsFromEvents = exports.formatContextBugReport = exports.defaultSuspectClassifier = exports.defaultOutcomeComparator = exports.CONTEXT_BISECT_DEFAULTS = void 0;
36
37
  // Tier 1 — context + stream
37
38
  var ContextRecorder_js_1 = require("./recorders/core/ContextRecorder.js");
38
39
  Object.defineProperty(exports, "ContextRecorder", { enumerable: true, get: function () { return ContextRecorder_js_1.ContextRecorder; } });
@@ -120,4 +121,58 @@ var index_js_2 = require("./lib/trace-toolpack/index.js");
120
121
  Object.defineProperty(exports, "callTraceTool", { enumerable: true, get: function () { return index_js_2.callTraceTool; } });
121
122
  Object.defineProperty(exports, "TOOLPACK_HARD_CAPS", { enumerable: true, get: function () { return index_js_2.TOOLPACK_HARD_CAPS; } });
122
123
  Object.defineProperty(exports, "traceToolpack", { enumerable: true, get: function () { return index_js_2.traceToolpack; } });
124
+ // Contextual-bug localizer (RFC-003 Part B, D7–D9) — "git bisect for
125
+ // context". Assembly: footprintjs causal DAG (control edges + honesty
126
+ // markers + EdgeWeigher) × influence-core scoring (D6) × consumer-run
127
+ // counterfactual ablation. §B2 claim tiers: scores/weights are
128
+ // embedding-geometry PROXIES; ablation verdicts are the ONLY causal
129
+ // claims; slice completeness is bounded by tracking — and says so.
130
+ var index_js_3 = require("./lib/context-bisect/index.js");
131
+ Object.defineProperty(exports, "ablationForSuspect", { enumerable: true, get: function () { return index_js_3.ablationForSuspect; } });
132
+ Object.defineProperty(exports, "applyAblations", { enumerable: true, get: function () { return index_js_3.applyAblations; } });
133
+ Object.defineProperty(exports, "bisectCulprits", { enumerable: true, get: function () { return index_js_3.bisectCulprits; } });
134
+ Object.defineProperty(exports, "CONTEXT_BISECT_DEFAULTS", { enumerable: true, get: function () { return index_js_3.CONTEXT_BISECT_DEFAULTS; } });
135
+ Object.defineProperty(exports, "defaultOutcomeComparator", { enumerable: true, get: function () { return index_js_3.defaultOutcomeComparator; } });
136
+ Object.defineProperty(exports, "defaultSuspectClassifier", { enumerable: true, get: function () { return index_js_3.defaultSuspectClassifier; } });
137
+ Object.defineProperty(exports, "formatContextBugReport", { enumerable: true, get: function () { return index_js_3.formatContextBugReport; } });
138
+ Object.defineProperty(exports, "llmCallIdsFromEvents", { enumerable: true, get: function () { return index_js_3.llmCallIdsFromEvents; } });
139
+ Object.defineProperty(exports, "llmEdgeWeigher", { enumerable: true, get: function () { return index_js_3.llmEdgeWeigher; } });
140
+ Object.defineProperty(exports, "localizeContextBug", { enumerable: true, get: function () { return index_js_3.localizeContextBug; } });
141
+ Object.defineProperty(exports, "probeFlipped", { enumerable: true, get: function () { return index_js_3.probeFlipped; } });
142
+ Object.defineProperty(exports, "runAblationProbe", { enumerable: true, get: function () { return index_js_3.runAblationProbe; } });
143
+ Object.defineProperty(exports, "stepOutputText", { enumerable: true, get: function () { return index_js_3.stepOutputText; } });
144
+ Object.defineProperty(exports, "suspectLabel", { enumerable: true, get: function () { return index_js_3.suspectLabel; } });
145
+ Object.defineProperty(exports, "verdictFor", { enumerable: true, get: function () { return index_js_3.verdictFor; } });
146
+ // Tool-catalog confusability lint (RFC-002 tier 1, C1–C3) — build-time,
147
+ // CI-gateable, framework-agnostic: plain { name, description?, inputSchema? }
148
+ // tools in (OpenAI/Anthropic/MCP lists coerce via coerceCatalog; the
149
+ // library's Tool[] via catalogFromTools), a report with a gateable `ok`
150
+ // out. Pluggable structural rule pack; thresholds + embedder consumer-
151
+ // injected with our defaults. Bin: `agentfootprint-lint-tools`.
152
+ // Front door: docs/guides/tool-catalog-lint.md.
153
+ var index_js_4 = require("./lib/tool-lint/index.js");
154
+ Object.defineProperty(exports, "analyzeToolCatalog", { enumerable: true, get: function () { return index_js_4.analyzeToolCatalog; } });
155
+ Object.defineProperty(exports, "catalogFromTools", { enumerable: true, get: function () { return index_js_4.catalogFromTools; } });
156
+ Object.defineProperty(exports, "coerceCatalog", { enumerable: true, get: function () { return index_js_4.coerceCatalog; } });
157
+ Object.defineProperty(exports, "confusabilityText", { enumerable: true, get: function () { return index_js_4.confusabilityText; } });
158
+ Object.defineProperty(exports, "DEFAULT_CONFUSABILITY_THRESHOLD", { enumerable: true, get: function () { return index_js_4.DEFAULT_CONFUSABILITY_THRESHOLD; } });
159
+ Object.defineProperty(exports, "DEFAULT_OMISSION_CUES", { enumerable: true, get: function () { return index_js_4.DEFAULT_OMISSION_CUES; } });
160
+ Object.defineProperty(exports, "DEFAULT_WATCH_BAND", { enumerable: true, get: function () { return index_js_4.DEFAULT_WATCH_BAND; } });
161
+ Object.defineProperty(exports, "DEFAULT_WHEN_CUES", { enumerable: true, get: function () { return index_js_4.DEFAULT_WHEN_CUES; } });
162
+ Object.defineProperty(exports, "defaultStructuralRules", { enumerable: true, get: function () { return index_js_4.defaultStructuralRules; } });
163
+ Object.defineProperty(exports, "descriptionRule", { enumerable: true, get: function () { return index_js_4.descriptionRule; } });
164
+ Object.defineProperty(exports, "differentiationHint", { enumerable: true, get: function () { return index_js_4.differentiationHint; } });
165
+ Object.defineProperty(exports, "enumInProseRule", { enumerable: true, get: function () { return index_js_4.enumInProseRule; } });
166
+ Object.defineProperty(exports, "formatToolCatalogReport", { enumerable: true, get: function () { return index_js_4.formatToolCatalogReport; } });
167
+ Object.defineProperty(exports, "MOCK_EMBEDDER_CALIBRATION", { enumerable: true, get: function () { return index_js_4.MOCK_EMBEDDER_CALIBRATION; } });
168
+ Object.defineProperty(exports, "optionalParamRule", { enumerable: true, get: function () { return index_js_4.optionalParamRule; } });
169
+ Object.defineProperty(exports, "runToolLintCli", { enumerable: true, get: function () { return index_js_4.runToolLintCli; } });
170
+ Object.defineProperty(exports, "saysWhatNotWhenRule", { enumerable: true, get: function () { return index_js_4.saysWhatNotWhenRule; } });
171
+ // Tool-choice margin recorder (RFC-002 tier 2, C4–C6) — per LLM call,
172
+ // ranks the OFFERED catalog against the choice context (user message +
173
+ // latest assistant reasoning) via influence-core scoreMargin; embeds
174
+ // LAZILY on first read; flags narrow margins + proxy disagreements.
175
+ var ToolChoiceRecorder_js_1 = require("./recorders/observability/ToolChoiceRecorder.js");
176
+ Object.defineProperty(exports, "buildChoiceContext", { enumerable: true, get: function () { return ToolChoiceRecorder_js_1.buildChoiceContext; } });
177
+ Object.defineProperty(exports, "toolChoiceRecorder", { enumerable: true, get: function () { return ToolChoiceRecorder_js_1.toolChoiceRecorder; } });
123
178
  //# sourceMappingURL=observe.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"observe.js","sourceRoot":"","sources":["../src/observe.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;;;AAEH,4BAA4B;AAC5B,0EAAmG;AAA1F,qHAAA,eAAe,OAAA;AACxB,wEAAgG;AAAvF,mHAAA,cAAc,OAAA;AAEvB,+BAA+B;AAC/B,kFAGiD;AAF/C,6HAAA,mBAAmB,OAAA;AAGrB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,qFAkBuD;AAjBrD,uHAAA,gBAAgB,OAAA;AAChB,uHAAA,gBAAgB,OAAA;AAiBlB,mFAWsD;AAVpD,mHAAA,aAAa,OAAA;AACb,qHAAA,eAAe,OAAA;AACf,qHAAA,eAAe,OAAA;AASjB,uFAUwD;AATtD,uHAAA,eAAe,OAAA;AACf,sHAAA,cAAc,OAAA;AAShB,uFAUwD;AATtD,yHAAA,iBAAiB,OAAA;AACjB,yHAAA,iBAAiB,OAAA;AACjB,sHAAA,cAAc,OAAA;AACd,uHAAA,eAAe,OAAA;AACf,4HAAA,oBAAoB,OAAA;AAOtB,6BAA6B;AAC7B,oEAA0F;AAAjF,+GAAA,YAAY,OAAA;AACrB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,4FAGsD;AAFpD,uIAAA,wBAAwB,OAAA;AAG1B,oEAA0F;AAAjF,+GAAA,YAAY,OAAA;AACrB,wEAAgG;AAAvF,mHAAA,cAAc,OAAA;AACvB,gFAGgD;AAF9C,2HAAA,kBAAkB,OAAA;AAGpB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,mFAMsD;AALpD,mHAAA,aAAa,OAAA;AACb,oHAAA,cAAc,OAAA;AAKhB,iFAIqD;AAHnD,iHAAA,YAAY,OAAA;AAId,4EAA4E;AAC5E,gFAAgF;AAChF,2FAO0D;AANxD,6HAAA,mBAAmB,OAAA;AAOrB,gFAAgF;AAChF,gFAAgF;AAChF,yGASiE;AAR/D,mIAAA,kBAAkB,OAAA;AAUpB,uDAAuD;AACvD,8DAA0D;AAAjD,yGAAA,SAAS,OAAA;AAElB,uEAAuE;AACvE,uEAAuE;AACvE,uEAAuE;AACvE,oEAAoE;AACpE,oEAAoE;AACpE,kEAAkE;AAClE,eAAe;AACf,0DAgCuC;AA/BrC,wGAAA,YAAY,OAAA;AACZ,4GAAA,gBAAgB,OAAA;AAChB,0GAAA,cAAc,OAAA;AACd,uGAAA,WAAW,OAAA;AACX,qHAAA,yBAAyB,OAAA;AACzB,oHAAA,wBAAwB,OAAA;AACxB,yHAAA,6BAA6B,OAAA;AAC7B,0GAAA,cAAc,OAAA;AACd,0GAAA,cAAc,OAAA;AACd,iHAAA,qBAAqB,OAAA;AACrB,8GAAA,kBAAkB,OAAA;AAClB,uGAAA,WAAW,OAAA;AACX,0GAAA,cAAc,OAAA;AACd,uGAAA,WAAW,OAAA;AACX,+GAAA,mBAAmB,OAAA;AAkBrB,uEAAuE;AACvE,2EAA2E;AAC3E,mEAAmE;AACnE,0DAMuC;AALrC,yGAAA,aAAa,OAAA;AACb,8GAAA,kBAAkB,OAAA;AAClB,yGAAA,aAAa,OAAA"}
1
+ {"version":3,"file":"observe.js","sourceRoot":"","sources":["../src/observe.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;;;;AAEH,4BAA4B;AAC5B,0EAAmG;AAA1F,qHAAA,eAAe,OAAA;AACxB,wEAAgG;AAAvF,mHAAA,cAAc,OAAA;AAEvB,+BAA+B;AAC/B,kFAGiD;AAF/C,6HAAA,mBAAmB,OAAA;AAGrB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,qFAkBuD;AAjBrD,uHAAA,gBAAgB,OAAA;AAChB,uHAAA,gBAAgB,OAAA;AAiBlB,mFAWsD;AAVpD,mHAAA,aAAa,OAAA;AACb,qHAAA,eAAe,OAAA;AACf,qHAAA,eAAe,OAAA;AASjB,uFAUwD;AATtD,uHAAA,eAAe,OAAA;AACf,sHAAA,cAAc,OAAA;AAShB,uFAUwD;AATtD,yHAAA,iBAAiB,OAAA;AACjB,yHAAA,iBAAiB,OAAA;AACjB,sHAAA,cAAc,OAAA;AACd,uHAAA,eAAe,OAAA;AACf,4HAAA,oBAAoB,OAAA;AAOtB,6BAA6B;AAC7B,oEAA0F;AAAjF,+GAAA,YAAY,OAAA;AACrB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,4FAGsD;AAFpD,uIAAA,wBAAwB,OAAA;AAG1B,oEAA0F;AAAjF,+GAAA,YAAY,OAAA;AACrB,wEAAgG;AAAvF,mHAAA,cAAc,OAAA;AACvB,gFAGgD;AAF9C,2HAAA,kBAAkB,OAAA;AAGpB,sEAA6F;AAApF,iHAAA,aAAa,OAAA;AACtB,mFAMsD;AALpD,mHAAA,aAAa,OAAA;AACb,oHAAA,cAAc,OAAA;AAKhB,iFAIqD;AAHnD,iHAAA,YAAY,OAAA;AAId,4EAA4E;AAC5E,gFAAgF;AAChF,2FAO0D;AANxD,6HAAA,mBAAmB,OAAA;AAOrB,gFAAgF;AAChF,gFAAgF;AAChF,yGASiE;AAR/D,mIAAA,kBAAkB,OAAA;AAUpB,uDAAuD;AACvD,8DAA0D;AAAjD,yGAAA,SAAS,OAAA;AAElB,uEAAuE;AACvE,uEAAuE;AACvE,uEAAuE;AACvE,oEAAoE;AACpE,oEAAoE;AACpE,kEAAkE;AAClE,eAAe;AACf,0DAgCuC;AA/BrC,wGAAA,YAAY,OAAA;AACZ,4GAAA,gBAAgB,OAAA;AAChB,0GAAA,cAAc,OAAA;AACd,uGAAA,WAAW,OAAA;AACX,qHAAA,yBAAyB,OAAA;AACzB,oHAAA,wBAAwB,OAAA;AACxB,yHAAA,6BAA6B,OAAA;AAC7B,0GAAA,cAAc,OAAA;AACd,0GAAA,cAAc,OAAA;AACd,iHAAA,qBAAqB,OAAA;AACrB,8GAAA,kBAAkB,OAAA;AAClB,uGAAA,WAAW,OAAA;AACX,0GAAA,cAAc,OAAA;AACd,uGAAA,WAAW,OAAA;AACX,+GAAA,mBAAmB,OAAA;AAkBrB,uEAAuE;AACvE,2EAA2E;AAC3E,mEAAmE;AACnE,0DAMuC;AALrC,yGAAA,aAAa,OAAA;AACb,8GAAA,kBAAkB,OAAA;AAClB,yGAAA,aAAa,OAAA;AAIf,qEAAqE;AACrE,sEAAsE;AACtE,sEAAsE;AACtE,+DAA+D;AAC/D,oEAAoE;AACpE,mEAAmE;AACnE,0DA8CuC;AA7CrC,8GAAA,kBAAkB,OAAA;AAClB,0GAAA,cAAc,OAAA;AACd,0GAAA,cAAc,OAAA;AACd,mHAAA,uBAAuB,OAAA;AACvB,oHAAA,wBAAwB,OAAA;AACxB,oHAAA,wBAAwB,OAAA;AACxB,kHAAA,sBAAsB,OAAA;AACtB,gHAAA,oBAAoB,OAAA;AACpB,0GAAA,cAAc,OAAA;AACd,8GAAA,kBAAkB,OAAA;AAClB,wGAAA,YAAY,OAAA;AACZ,4GAAA,gBAAgB,OAAA;AAChB,0GAAA,cAAc,OAAA;AACd,wGAAA,YAAY,OAAA;AACZ,sGAAA,UAAU,OAAA;AAgCZ,wEAAwE;AACxE,8EAA8E;AAC9E,qEAAqE;AACrE,wEAAwE;AACxE,uEAAuE;AACvE,gEAAgE;AAChE,gDAAgD;AAChD,qDAgCkC;AA/BhC,8GAAA,kBAAkB,OAAA;AAClB,4GAAA,gBAAgB,OAAA;AAChB,yGAAA,aAAa,OAAA;AACb,6GAAA,iBAAiB,OAAA;AACjB,2HAAA,+BAA+B,OAAA;AAC/B,iHAAA,qBAAqB,OAAA;AACrB,8GAAA,kBAAkB,OAAA;AAClB,6GAAA,iBAAiB,OAAA;AACjB,kHAAA,sBAAsB,OAAA;AACtB,2GAAA,eAAe,OAAA;AACf,+GAAA,mBAAmB,OAAA;AACnB,2GAAA,eAAe,OAAA;AACf,mHAAA,uBAAuB,OAAA;AACvB,qHAAA,yBAAyB,OAAA;AACzB,6GAAA,iBAAiB,OAAA;AACjB,0GAAA,cAAc,OAAA;AACd,+GAAA,mBAAmB,OAAA;AAgBrB,sEAAsE;AACtE,uEAAuE;AACvE,qEAAqE;AACrE,oEAAoE;AACpE,yFASyD;AARvD,2HAAA,kBAAkB,OAAA;AAClB,2HAAA,kBAAkB,OAAA"}
@@ -0,0 +1,266 @@
1
+ "use strict";
2
+ /**
3
+ * toolChoiceRecorder — runtime tool-choice margins (RFC-002 tier 2,
4
+ * blocks C4–C6).
5
+ *
6
+ * Per LLM call that OFFERED tools, this recorder captures the menu the
7
+ * model saw (`stream.llm_start.tools`), what it actually invoked
8
+ * (`stream.tool_start`), and the choice context — then, LAZILY on first
9
+ * read, ranks the offered candidates against that context via
10
+ * influence-core's `scoreMargin` (C4):
11
+ *
12
+ * margin = score(best chosen) − score(best non-chosen)
13
+ *
14
+ * Small margin (`narrow`, < `marginThreshold`, default 0.05) = the
15
+ * choice was a close call under the proxy. Top-scored candidate not
16
+ * among the chosen (`proxyDisagreement`) is ALWAYS flagged — either a
17
+ * proxy miss or a genuinely surprising model choice; both are exactly
18
+ * what a debugger wants surfaced.
19
+ *
20
+ * ## The choice context (C4 — what is embedded, precisely)
21
+ *
22
+ * `buildChoiceContext` assembles the SAME two slots the model's
23
+ * tool-selection reasoning ran on:
24
+ *
25
+ * INCLUDED
26
+ * 1. the user message of the current turn (`agent.turn_start.userPrompt`)
27
+ * — the task the model is choosing a tool FOR (first
28
+ * `maxSlotChars` chars: the head states the task);
29
+ * 2. the latest assistant reasoning text — the most recent
30
+ * `stream.llm_end.content` of this turn, when present (last
31
+ * `maxSlotChars` chars: the tail is where "what next" lives).
32
+ * Iteration 1 has no assistant text; the slot is omitted.
33
+ *
34
+ * EXCLUDED (deliberately)
35
+ * - the system prompt: constant across every call of the run — zero
36
+ * per-call discrimination, it only dilutes the embedding;
37
+ * - older history turns: recency dominates tool choice, and the full
38
+ * transcript grows the embedding cost linearly with run length;
39
+ * - raw tool results: the model reads them, but their distilled
40
+ * effect on the NEXT choice is the assistant's own reasoning text,
41
+ * which IS included; raw payloads skew the embedding toward data
42
+ * vocabulary (the honest-proxy discipline: mirror the
43
+ * decision-relevant text, not every visible byte);
44
+ * - tool schemas: those are the CANDIDATES being ranked, not context.
45
+ *
46
+ * Candidate text per offered tool = `confusabilityText` (tokenized name
47
+ * + description) — the SAME construction the catalog lint (C1) embeds,
48
+ * so build-time confusability and runtime margins measure one geometry.
49
+ *
50
+ * ## Laziness (C5)
51
+ *
52
+ * Event hooks only RECORD (string copies into a KeyedStore). The
53
+ * embedder runs on the first `getCalls()` / `getFlagged()` /
54
+ * `getSummary()` — embedding I/O NEVER rides the hot path, even when
55
+ * the recorder is attached inline. Scores memoize per entry. Attach
56
+ * with `{ delivery: 'deferred' }` (footprintjs RFC-001) to move the
57
+ * bookkeeping off the hot path too — it is a normal CombinedRecorder.
58
+ *
59
+ * Pattern: CombinedRecorder (Convention 1 — single purpose: tool-choice
60
+ * margin evidence). Owns a `KeyedStore<ToolChoiceEntry>` keyed
61
+ * by the LLM call's `runtimeStageId`. Convention 4: resets on a
62
+ * new `runId` via `FlowRecorder.onRunStart`.
63
+ * Role: Tier-3 /observe recorder. Attach via `Agent.create(...)
64
+ * .recorder(handle)` or `executor.attachCombinedRecorder`.
65
+ *
66
+ * Honest claim (RFC-002 §2): margins are embedding geometry between the
67
+ * context and tool descriptions — a deterministic PROXY for the model's
68
+ * selection function, never "the model chose because". Tier 3
69
+ * (choice-entropy sampling) validates the proxy.
70
+ */
71
+ Object.defineProperty(exports, "__esModule", { value: true });
72
+ exports.toolChoiceRecorder = exports.buildChoiceContext = void 0;
73
+ const trace_1 = require("footprintjs/trace");
74
+ const index_js_1 = require("../../lib/influence-core/index.js");
75
+ const analyze_js_1 = require("../../lib/tool-lint/analyze.js");
76
+ /** C4: the precise choice-context construction (see module JSDoc for the
77
+ * include/exclude rationale). Exported so consumers can reproduce it. */
78
+ function buildChoiceContext(args) {
79
+ const max = args.maxSlotChars ?? 2000;
80
+ const user = `user: ${args.userPrompt.slice(0, max)}`;
81
+ const assistant = args.latestAssistantText?.trim();
82
+ if (assistant === undefined || assistant.length === 0)
83
+ return user;
84
+ return `${user}\n\nassistant: ${assistant.slice(-max)}`;
85
+ }
86
+ exports.buildChoiceContext = buildChoiceContext;
87
+ /** Build the tool-choice margin recorder (C5). */
88
+ function toolChoiceRecorder(options) {
89
+ const marginThreshold = options.marginThreshold ?? index_js_1.DEFAULT_MARGIN_THRESHOLD;
90
+ const maxSlotChars = options.maxSlotChars ?? 2000;
91
+ const store = new trace_1.KeyedStore();
92
+ let lastRunId;
93
+ let userPrompt = '';
94
+ let lastAssistantText;
95
+ let openKey;
96
+ const closeOpen = () => {
97
+ if (openKey === undefined)
98
+ return;
99
+ const open = store.get(openKey);
100
+ if (open)
101
+ open.closed = true;
102
+ openKey = undefined;
103
+ };
104
+ const reset = () => {
105
+ store.clear();
106
+ userPrompt = '';
107
+ lastAssistantText = undefined;
108
+ openKey = undefined;
109
+ };
110
+ /** Lazy scoring pass — the ONLY place the embedder runs. */
111
+ const ensureScored = async () => {
112
+ for (const entry of store.getMap().values()) {
113
+ if (!entry.closed || entry.margin !== undefined || entry.skipped !== undefined)
114
+ continue;
115
+ if (entry.chosen.length === 0) {
116
+ entry.skipped = 'nothing-chosen';
117
+ continue;
118
+ }
119
+ const offeredNames = new Set(entry.offered.map((tool) => tool.name));
120
+ if (!entry.chosen.every((name) => offeredNames.has(name))) {
121
+ entry.skipped = 'chosen-not-offered';
122
+ continue;
123
+ }
124
+ entry.margin = await (0, index_js_1.scoreMargin)({
125
+ candidates: entry.offered.map((tool) => ({
126
+ name: tool.name,
127
+ text: (0, analyze_js_1.confusabilityText)(tool),
128
+ })),
129
+ contextText: entry.contextText,
130
+ chosen: entry.chosen,
131
+ embedder: options.embedder,
132
+ marginThreshold,
133
+ });
134
+ }
135
+ };
136
+ const toCall = (entry) => ({
137
+ runtimeStageId: entry.runtimeStageId,
138
+ iteration: entry.iteration,
139
+ offered: entry.offered,
140
+ chosen: [...entry.chosen],
141
+ toolCallIds: [...entry.toolCallIds],
142
+ contextText: entry.contextText,
143
+ ...(entry.margin !== undefined ? { margin: entry.margin } : {}),
144
+ ...(entry.skipped !== undefined ? { skipped: entry.skipped } : {}),
145
+ });
146
+ return {
147
+ id: options.id ?? 'tool-choice',
148
+ onEmit(event) {
149
+ const payload = event.payload;
150
+ if (payload === null || typeof payload !== 'object')
151
+ return; // redacted or foreign
152
+ const p = payload;
153
+ switch (event.name) {
154
+ case 'agentfootprint.agent.turn_start': {
155
+ // New turn on the same run: fresh context slots.
156
+ closeOpen();
157
+ userPrompt = typeof p.userPrompt === 'string' ? p.userPrompt : '';
158
+ lastAssistantText = undefined;
159
+ break;
160
+ }
161
+ case 'agentfootprint.stream.llm_start': {
162
+ closeOpen();
163
+ const tools = Array.isArray(p.tools) ? p.tools : [];
164
+ if (tools.length === 0)
165
+ break; // no menu — nothing to confuse
166
+ store.set(event.runtimeStageId, {
167
+ runtimeStageId: event.runtimeStageId,
168
+ iteration: Number(p.iteration ?? 0),
169
+ offered: tools.map((tool) => ({
170
+ name: tool.name,
171
+ ...(tool.description !== undefined ? { description: tool.description } : {}),
172
+ })),
173
+ contextText: buildChoiceContext({
174
+ userPrompt,
175
+ ...(lastAssistantText !== undefined
176
+ ? { latestAssistantText: lastAssistantText }
177
+ : {}),
178
+ maxSlotChars,
179
+ }),
180
+ chosen: [],
181
+ toolCallIds: [],
182
+ closed: false,
183
+ });
184
+ openKey = event.runtimeStageId;
185
+ break;
186
+ }
187
+ case 'agentfootprint.stream.llm_end': {
188
+ if (typeof p.content === 'string' && p.content.length > 0) {
189
+ lastAssistantText = p.content;
190
+ }
191
+ break;
192
+ }
193
+ case 'agentfootprint.stream.tool_start': {
194
+ if (openKey === undefined)
195
+ break;
196
+ const entry = store.get(openKey);
197
+ if (entry === undefined)
198
+ break;
199
+ const name = String(p.toolName ?? 'unknown');
200
+ if (!entry.chosen.includes(name))
201
+ entry.chosen.push(name);
202
+ entry.toolCallIds.push(String(p.toolCallId ?? ''));
203
+ break;
204
+ }
205
+ case 'agentfootprint.agent.turn_end': {
206
+ closeOpen();
207
+ break;
208
+ }
209
+ default:
210
+ break;
211
+ }
212
+ },
213
+ /** Convention 4 — a new runId means a new run: reset accumulation so
214
+ * runtimeStageId keys (which restart per run) cannot collide. The
215
+ * executor also calls `clear()` before each `run()` — this hook is
216
+ * the detection that works regardless of attach surface. Same-
217
+ * executor `resume()` fires `onResume` (not `onRunStart`) and skips
218
+ * `clear()`, so pre-pause entries SURVIVE a resume by design. */
219
+ onRunStart(event) {
220
+ const runId = event.traversalContext?.runId;
221
+ if (runId !== undefined && runId !== lastRunId) {
222
+ reset();
223
+ lastRunId = runId;
224
+ }
225
+ },
226
+ onRunEnd() {
227
+ closeOpen();
228
+ },
229
+ onRunFailed() {
230
+ closeOpen();
231
+ },
232
+ async getCalls() {
233
+ await ensureScored();
234
+ return [...store.getMap().values()].map(toCall);
235
+ },
236
+ async getFlagged() {
237
+ await ensureScored();
238
+ return [...store.getMap().values()]
239
+ .filter((entry) => entry.margin !== undefined &&
240
+ (entry.margin.flags.narrow || entry.margin.flags.proxyDisagreement))
241
+ .map(toCall);
242
+ },
243
+ async getSummary() {
244
+ await ensureScored();
245
+ const entries = [...store.getMap().values()];
246
+ const scored = entries.filter((entry) => entry.margin !== undefined);
247
+ const narrow = scored.filter((entry) => entry.margin?.flags.narrow).length;
248
+ const proxyDisagreement = scored.filter((entry) => entry.margin?.flags.proxyDisagreement).length;
249
+ const flagged = scored.filter((entry) => entry.margin?.flags.narrow || entry.margin?.flags.proxyDisagreement).length;
250
+ return {
251
+ llmCallsWithTools: entries.length,
252
+ choices: entries.filter((entry) => entry.chosen.length > 0).length,
253
+ scored: scored.length,
254
+ flagged,
255
+ narrow,
256
+ proxyDisagreement,
257
+ skipped: entries.filter((entry) => entry.skipped !== undefined).length,
258
+ };
259
+ },
260
+ clear() {
261
+ reset();
262
+ },
263
+ };
264
+ }
265
+ exports.toolChoiceRecorder = toolChoiceRecorder;
266
+ //# sourceMappingURL=ToolChoiceRecorder.js.map