@tracemarketplace/shared 0.0.10 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/extractor-claude-code.test.js +53 -0
  2. package/dist/extractor-claude-code.test.js.map +1 -1
  3. package/dist/extractor-codex.test.js +5 -0
  4. package/dist/extractor-codex.test.js.map +1 -1
  5. package/dist/extractors/claude-code.d.ts.map +1 -1
  6. package/dist/extractors/claude-code.js +4 -4
  7. package/dist/extractors/claude-code.js.map +1 -1
  8. package/dist/extractors/codex.d.ts.map +1 -1
  9. package/dist/extractors/codex.js +3 -1
  10. package/dist/extractors/codex.js.map +1 -1
  11. package/dist/extractors/common.d.ts +1 -2
  12. package/dist/extractors/common.d.ts.map +1 -1
  13. package/dist/extractors/common.js +2 -37
  14. package/dist/extractors/common.js.map +1 -1
  15. package/dist/extractors/common.test.d.ts +2 -0
  16. package/dist/extractors/common.test.d.ts.map +1 -0
  17. package/dist/extractors/common.test.js +17 -0
  18. package/dist/extractors/common.test.js.map +1 -0
  19. package/dist/extractors/cursor.d.ts.map +1 -1
  20. package/dist/extractors/cursor.js +8 -0
  21. package/dist/extractors/cursor.js.map +1 -1
  22. package/dist/index.d.ts +1 -0
  23. package/dist/index.d.ts.map +1 -1
  24. package/dist/index.js +1 -0
  25. package/dist/index.js.map +1 -1
  26. package/dist/redact.d.ts.map +1 -1
  27. package/dist/redact.js +3 -1
  28. package/dist/redact.js.map +1 -1
  29. package/dist/redact.test.js +9 -0
  30. package/dist/redact.test.js.map +1 -1
  31. package/dist/scoring.d.ts +5 -3
  32. package/dist/scoring.d.ts.map +1 -1
  33. package/dist/scoring.fixtures.test.d.ts +2 -0
  34. package/dist/scoring.fixtures.test.d.ts.map +1 -0
  35. package/dist/scoring.fixtures.test.js +47 -0
  36. package/dist/scoring.fixtures.test.js.map +1 -0
  37. package/dist/scoring.js +381 -62
  38. package/dist/scoring.js.map +1 -1
  39. package/dist/scoring.test.js +125 -26
  40. package/dist/scoring.test.js.map +1 -1
  41. package/dist/tool-normalization.d.ts +66 -0
  42. package/dist/tool-normalization.d.ts.map +1 -0
  43. package/dist/tool-normalization.generated.d.ts +181 -0
  44. package/dist/tool-normalization.generated.d.ts.map +1 -0
  45. package/dist/tool-normalization.generated.js +261 -0
  46. package/dist/tool-normalization.generated.js.map +1 -0
  47. package/dist/tool-normalization.js +463 -0
  48. package/dist/tool-normalization.js.map +1 -0
  49. package/dist/tool-normalization.test.d.ts +2 -0
  50. package/dist/tool-normalization.test.d.ts.map +1 -0
  51. package/dist/tool-normalization.test.js +188 -0
  52. package/dist/tool-normalization.test.js.map +1 -0
  53. package/dist/types.d.ts +38 -1
  54. package/dist/types.d.ts.map +1 -1
  55. package/dist/validators.d.ts +23 -6
  56. package/dist/validators.d.ts.map +1 -1
  57. package/dist/validators.js +4 -0
  58. package/dist/validators.js.map +1 -1
  59. package/dist/validators.test.js +7 -0
  60. package/dist/validators.test.js.map +1 -1
  61. package/package.json +5 -5
  62. package/scripts/generate-tool-normalization.mjs +16 -0
  63. package/src/extractor-claude-code.test.ts +59 -0
  64. package/src/extractor-codex.test.ts +5 -0
  65. package/src/extractors/claude-code.ts +8 -4
  66. package/src/extractors/codex.ts +4 -2
  67. package/src/extractors/common.test.ts +21 -0
  68. package/src/extractors/common.ts +15 -49
  69. package/src/extractors/cursor.ts +9 -0
  70. package/src/index.ts +1 -0
  71. package/src/redact.test.ts +9 -0
  72. package/src/redact.ts +3 -1
  73. package/src/scoring.fixtures.test.ts +71 -0
  74. package/src/scoring.test.ts +151 -26
  75. package/src/scoring.ts +582 -84
  76. package/src/tool-normalization.generated.ts +262 -0
  77. package/src/tool-normalization.spec.json +205 -0
  78. package/src/tool-normalization.test.ts +221 -0
  79. package/src/tool-normalization.ts +670 -0
  80. package/src/types.ts +50 -0
  81. package/src/validators.test.ts +8 -0
  82. package/src/validators.ts +8 -0
@@ -0,0 +1,670 @@
1
+ import type { ContentBlock, NormalizedTrace } from "./types.js";
2
+ import { toolNormalizationSpec } from "./tool-normalization.generated.js";
3
+
4
+ const SED_IN_PLACE_FLAG_REGEX = /(?:^|\s)-i(?:$|\s|['"=])|--in-place(?:$|\s|=)/;
5
+ const SORT_OUTPUT_FLAG_REGEX = /(?:^|\s)-o(?:$|\s|['"=])|--output(?:$|\s|=)/;
6
+ const ANSI_ESCAPE_REGEX = /\u001b\[[0-9;]*m/g;
7
+ const RM_RF_REGEX = /-\w*r\w*f|-\w*f\w*r|--recursive.*--force/;
8
+ const SHELL_WRAPPER_VERBS = new Set(["bash", "sh", "zsh", "fish"]);
9
+ const SHELL_COMMAND_FLAGS = new Set(["-c", "-lc", "-ic", "-cl", "-l", "-i"]);
10
+ const SHELL_CHAIN_SEPARATORS = new Set(["&&", ";", "||", "|"]);
11
+ const SHELL_SETUP_VERBS = new Set(["cd", "pushd", "popd", "dirs", "export", "source", ".", "set"]);
12
+
13
+ type ToolDescriptorMap = typeof toolNormalizationSpec.normalized_tools;
14
+ export type NormalizedToolId = keyof ToolDescriptorMap;
15
+ export type NormalizedToolFamily = "shell" | "file";
16
+ export type NormalizedToolDescriptor = {
17
+ family: NormalizedToolFamily;
18
+ kind: string;
19
+ aliases: readonly string[];
20
+ token_prefix: string;
21
+ mutates_file: boolean;
22
+ };
23
+ export type NormalizedToolIdentifier = NormalizedToolId | "tool.generic";
24
+ export type FailureExchangeOutcome =
25
+ | "success"
26
+ | "failure"
27
+ | "success_after_retry"
28
+ | "canceled";
29
+ export const TRACE_NORMALIZATION_VERSION = "evaluation-normalization-v1";
30
+
31
+ export type ResolvedNormalizedTool = NormalizedToolDescriptor & {
32
+ normalizedToolId: NormalizedToolId;
33
+ };
34
+
35
+ export interface NormalizedToolUse {
36
+ toolCallId: string;
37
+ rawToolName: string;
38
+ normalizedToolId: NormalizedToolIdentifier;
39
+ family: NormalizedToolFamily | "generic";
40
+ kind: string;
41
+ token: string;
42
+ exitCode: number | null;
43
+ }
44
+
45
+ export interface FailureExchange {
46
+ toolTokens: string[];
47
+ hasError: boolean;
48
+ outcome: FailureExchangeOutcome;
49
+ }
50
+
51
+ export interface TraceNormalizationExchange {
52
+ exchangeIndex: number;
53
+ actions: NormalizedToolUse[];
54
+ toolTokens: string[];
55
+ hasError: boolean;
56
+ outcome: FailureExchangeOutcome;
57
+ prevOutcome: FailureExchangeOutcome | null;
58
+ tokenCount: number;
59
+ }
60
+
61
+ export interface TraceNormalization {
62
+ version: string;
63
+ exchanges: TraceNormalizationExchange[];
64
+ }
65
+
66
+ type ToolUseBlock = Extract<ContentBlock, { type: "tool_use" }>;
67
+
68
+ const normalizedToolEntries = Object.entries(
69
+ toolNormalizationSpec.normalized_tools,
70
+ ) as Array<[NormalizedToolId, NormalizedToolDescriptor]>;
71
+ const shellSpec = toolNormalizationSpec.shell;
72
+ const fileSpec = toolNormalizationSpec.file;
73
+ const textFilterWriteVerbs = new Set<string>(shellSpec.text_filter_write_verbs);
74
+ const dangerousVerbs = new Set<string>(shellSpec.dangerous_verbs);
75
+
76
+ export function normalizeToolName(toolName: string): string {
77
+ return toolName.trim().toLowerCase();
78
+ }
79
+
80
+ export function toolNameMatches(toolName: string, candidate: string): boolean {
81
+ const normalized = normalizeToolName(toolName);
82
+ return normalized === candidate || normalized.endsWith(`.${candidate}`);
83
+ }
84
+
85
+ export function getNormalizedToolHierarchy(
86
+ toolName: string,
87
+ ): ResolvedNormalizedTool | null {
88
+ for (const [normalizedToolId, descriptor] of normalizedToolEntries) {
89
+ if (descriptor.aliases.some((candidate) => toolNameMatches(toolName, candidate))) {
90
+ return { normalizedToolId, ...descriptor };
91
+ }
92
+ }
93
+
94
+ return null;
95
+ }
96
+
97
+ export function listNormalizedToolsByFamily(
98
+ family: NormalizedToolFamily,
99
+ ): ResolvedNormalizedTool[] {
100
+ return normalizedToolEntries
101
+ .filter(([, descriptor]) => descriptor.family === family)
102
+ .map(([normalizedToolId, descriptor]) => ({ normalizedToolId, ...descriptor }));
103
+ }
104
+
105
+ export function isShellToolName(toolName: string): boolean {
106
+ return getNormalizedToolHierarchy(toolName)?.family === "shell";
107
+ }
108
+
109
+ export function isWriteStdinToolName(toolName: string): boolean {
110
+ return getNormalizedToolHierarchy(toolName)?.normalizedToolId === "shell.stdin";
111
+ }
112
+
113
+ export function extractShellCommand(
114
+ toolInput: Record<string, unknown>,
115
+ ): string | null {
116
+ if (typeof toolInput["command"] === "string") {
117
+ return toolInput["command"];
118
+ }
119
+
120
+ if (typeof toolInput["cmd"] === "string") {
121
+ return toolInput["cmd"];
122
+ }
123
+
124
+ if (typeof toolInput["input"] === "string") {
125
+ return toolInput["input"];
126
+ }
127
+
128
+ if (Array.isArray(toolInput["input"])) {
129
+ return toolInput["input"].map((part) => String(part)).join(" ").trim();
130
+ }
131
+
132
+ return null;
133
+ }
134
+
135
+ function stripOuterQuotes(value: string): string {
136
+ const normalized = value.trim();
137
+ if (
138
+ normalized.length >= 2 &&
139
+ normalized[0] === normalized[normalized.length - 1] &&
140
+ (normalized[0] === "\"" || normalized[0] === "'")
141
+ ) {
142
+ return normalized.slice(1, -1).trim();
143
+ }
144
+ return normalized;
145
+ }
146
+
147
+ function splitShellParts(command: string): string[] {
148
+ const normalized = stripOuterQuotes(command.trim())
149
+ .replace(/\r/g, " ")
150
+ .replace(/\n/g, " ; ");
151
+ const matches = normalized.match(/"[^"]*"|'[^']*'|&&|\|\||;|\||\S+/g);
152
+ return matches ?? [];
153
+ }
154
+
155
+ function skipAssignmentTokens(parts: string[], start = 0): number {
156
+ let index = start;
157
+ while (index < parts.length && /^\w+=\S+$/.test(parts[index] ?? "")) {
158
+ index += 1;
159
+ }
160
+ return index;
161
+ }
162
+
163
+ function basenameLower(token: string): string {
164
+ const parts = token.split("/");
165
+ return (parts[parts.length - 1] ?? "").toLowerCase();
166
+ }
167
+
168
+ function remainderAfterSeparator(parts: string[], start: number): string {
169
+ for (let index = start; index < parts.length; index += 1) {
170
+ if (!SHELL_CHAIN_SEPARATORS.has(parts[index] ?? "")) {
171
+ continue;
172
+ }
173
+
174
+ const remainder = parts.slice(index + 1).join(" ").trim();
175
+ if (remainder) {
176
+ return remainder;
177
+ }
178
+ }
179
+
180
+ return "";
181
+ }
182
+
183
+ function extractShellVerb(command: string): string {
184
+ const normalized = stripOuterQuotes(command.trim().replace(/^\(+/, "").trim());
185
+ const parts = splitShellParts(normalized);
186
+ if (parts.length === 0) {
187
+ return "";
188
+ }
189
+
190
+ const index = skipAssignmentTokens(parts);
191
+ if (index >= parts.length) {
192
+ return "";
193
+ }
194
+
195
+ const verb = basenameLower(parts[index] ?? "");
196
+
197
+ if (SHELL_WRAPPER_VERBS.has(verb)) {
198
+ let nextIndex = index + 1;
199
+ while (nextIndex < parts.length && SHELL_COMMAND_FLAGS.has(parts[nextIndex] ?? "")) {
200
+ nextIndex += 1;
201
+ }
202
+
203
+ const remainder = parts.slice(nextIndex).join(" ").trim();
204
+ if (remainder) {
205
+ return extractShellVerb(remainder);
206
+ }
207
+ }
208
+
209
+ if (verb === "timeout") {
210
+ let nextIndex = index + 1;
211
+ while (
212
+ nextIndex < parts.length &&
213
+ (/^-/.test(parts[nextIndex] ?? "") || /^\d+(?:\.\d+)?[smhd]?$/.test(parts[nextIndex] ?? ""))
214
+ ) {
215
+ nextIndex += 1;
216
+ }
217
+
218
+ const remainder = parts.slice(nextIndex).join(" ").trim();
219
+ if (remainder) {
220
+ return extractShellVerb(remainder);
221
+ }
222
+ }
223
+
224
+ if (verb === "env") {
225
+ const remainder = parts.slice(index + 1).join(" ").trim();
226
+ if (remainder) {
227
+ return extractShellVerb(remainder);
228
+ }
229
+ }
230
+
231
+ if (SHELL_SETUP_VERBS.has(verb)) {
232
+ const remainder = remainderAfterSeparator(parts, index + 1);
233
+ if (remainder) {
234
+ return extractShellVerb(remainder);
235
+ }
236
+ }
237
+
238
+ return verb;
239
+ }
240
+
241
+ export function hasShellWriteRedirect(command: string): boolean {
242
+ let inSingleQuotes = false;
243
+ let inDoubleQuotes = false;
244
+ let escaped = false;
245
+
246
+ for (let index = 0; index < command.length; index += 1) {
247
+ const char = command[index];
248
+
249
+ if (escaped) {
250
+ escaped = false;
251
+ continue;
252
+ }
253
+
254
+ if (!inSingleQuotes && char === "\\") {
255
+ escaped = true;
256
+ continue;
257
+ }
258
+
259
+ if (!inDoubleQuotes && char === "'") {
260
+ inSingleQuotes = !inSingleQuotes;
261
+ continue;
262
+ }
263
+
264
+ if (!inSingleQuotes && char === "\"") {
265
+ inDoubleQuotes = !inDoubleQuotes;
266
+ continue;
267
+ }
268
+
269
+ if (inSingleQuotes || inDoubleQuotes || char !== ">") {
270
+ continue;
271
+ }
272
+
273
+ const nextChar = command[index + 1] ?? "";
274
+ if (nextChar === "&" || nextChar === "(") {
275
+ continue;
276
+ }
277
+
278
+ if (nextChar === ">") {
279
+ const afterAppend = command[index + 2] ?? "";
280
+ if (afterAppend === "&" || afterAppend === "(") {
281
+ continue;
282
+ }
283
+ }
284
+
285
+ return true;
286
+ }
287
+
288
+ return false;
289
+ }
290
+
291
+ export function isWriteShapedShellCommand(command: string): boolean {
292
+ if (/\bapply_patch\b|\bperl\s+-pi\b/.test(command)) {
293
+ return true;
294
+ }
295
+
296
+ const verb = extractShellVerb(command);
297
+ if (verb === "sed") {
298
+ return SED_IN_PLACE_FLAG_REGEX.test(command) || hasShellWriteRedirect(command);
299
+ }
300
+
301
+ if (verb === "sort" && SORT_OUTPUT_FLAG_REGEX.test(command)) {
302
+ return true;
303
+ }
304
+
305
+ return (
306
+ textFilterWriteVerbs.has(verb) && hasShellWriteRedirect(command)
307
+ );
308
+ }
309
+
310
+ export function isFileMutationTool(
311
+ toolName: string,
312
+ toolInput: Record<string, unknown>,
313
+ ): boolean {
314
+ const hierarchy = getNormalizedToolHierarchy(toolName);
315
+ if (hierarchy?.mutates_file) {
316
+ return true;
317
+ }
318
+
319
+ if (
320
+ fileSpec.extra_mutation_aliases.some((candidate) =>
321
+ toolNameMatches(toolName, candidate),
322
+ )
323
+ ) {
324
+ return true;
325
+ }
326
+
327
+ if (!isShellToolName(toolName)) return false;
328
+
329
+ const command = extractShellCommand(toolInput);
330
+ return command !== null && isWriteShapedShellCommand(command);
331
+ }
332
+
333
+ function suffixFromExitCode(exitCode: number | null | undefined): ":pass" | ":fail" {
334
+ return exitCode != null && exitCode !== 0 ? ":fail" : ":pass";
335
+ }
336
+
337
+ function isEmptyWriteStdinPoll(toolInput: Record<string, unknown>): boolean {
338
+ const chars = toolInput["chars"];
339
+ return chars == null || chars === "";
340
+ }
341
+
342
+ function isControlOnlyWriteStdin(chars: string): boolean {
343
+ return Array.from(chars).every((char) => {
344
+ const code = char.charCodeAt(0);
345
+ return code < 0x20 || code === 0x7f;
346
+ });
347
+ }
348
+
349
+ function isInterruptWriteStdin(chars: string): boolean {
350
+ return chars.includes("\u0003");
351
+ }
352
+
353
+ function normalizeToolResultOutput(output: string | null | undefined): string {
354
+ return (output ?? "")
355
+ .split(/\r?\n/)
356
+ .map((line) => line.replace(ANSI_ESCAPE_REGEX, "").trim())
357
+ .filter((line) => {
358
+ if (!line) {
359
+ return false;
360
+ }
361
+
362
+ return !toolNormalizationSpec.tool_result_metadata_prefixes.some((prefix) =>
363
+ line.startsWith(prefix),
364
+ );
365
+ })
366
+ .join("\n")
367
+ .trim();
368
+ }
369
+
370
+ function isCleanInterruptResult(
371
+ resultContent: string | null | undefined,
372
+ exitCode: number | null | undefined,
373
+ ): boolean {
374
+ return exitCode != null && exitCode !== 0 && normalizeToolResultOutput(resultContent) === "^C";
375
+ }
376
+
377
+ export function tokenizeWriteStdinChars(
378
+ chars: string,
379
+ exitCode: number | null | undefined,
380
+ ): string {
381
+ const suffix = suffixFromExitCode(exitCode);
382
+
383
+ if (isInterruptWriteStdin(chars)) {
384
+ return `Bash:pty_control:interrupt${suffix}`;
385
+ }
386
+
387
+ if (isControlOnlyWriteStdin(chars)) {
388
+ return `Bash:pty_control${suffix}`;
389
+ }
390
+
391
+ return `Bash:pty_input${suffix}`;
392
+ }
393
+
394
+ export function tokenizeBashCommand(
395
+ command: string,
396
+ exitCode: number | null | undefined,
397
+ ): string {
398
+ const verb = extractShellVerb(command);
399
+ const suffix = suffixFromExitCode(exitCode);
400
+
401
+ if (/^\$[0-9a-z]+$/.test(verb)) {
402
+ return `Bash:placeholder${suffix}`;
403
+ }
404
+
405
+ if (verb === "rm" && RM_RF_REGEX.test(command)) {
406
+ return "Bash:rm_rf";
407
+ }
408
+
409
+ if (verb === "sed") {
410
+ return `Bash:${isWriteShapedShellCommand(command) ? "fs_write" : "fs_read"}${suffix}`;
411
+ }
412
+
413
+ const textFilterToken =
414
+ shellSpec.text_filter_tokens[verb as keyof typeof shellSpec.text_filter_tokens];
415
+ if (textFilterToken) {
416
+ const category = isWriteShapedShellCommand(command)
417
+ ? `text_filter_write:${verb}`
418
+ : textFilterToken;
419
+ return `Bash:${category}${suffix}`;
420
+ }
421
+
422
+ if ((verb === "perl" || verb === "apply_patch") && isWriteShapedShellCommand(command)) {
423
+ return `Bash:fs_write${suffix}`;
424
+ }
425
+
426
+ if (dangerousVerbs.has(verb)) {
427
+ return `Bash:dangerous:${verb}`;
428
+ }
429
+
430
+ const category =
431
+ shellSpec.verb_categories[verb as keyof typeof shellSpec.verb_categories] ?? "other";
432
+ return `Bash:${category}${suffix}`;
433
+ }
434
+
435
+ export function tokenizeFileTool(toolName: string, rawPath: string): string {
436
+ const hierarchy = getNormalizedToolHierarchy(toolName);
437
+ const tokenPrefix = hierarchy?.token_prefix ?? toolName;
438
+ const path = rawPath.toLowerCase().replace(/\\/g, "/");
439
+
440
+ if (fileSpec.sensitive_path_signals.some((signal) => path.includes(signal))) {
441
+ return `${tokenPrefix}:sensitive`;
442
+ }
443
+ if (fileSpec.system_prefixes.some((prefix) => path.startsWith(prefix))) {
444
+ return `${tokenPrefix}:system`;
445
+ }
446
+ if (
447
+ fileSpec.config_exts.some((ext) => path.endsWith(ext)) ||
448
+ fileSpec.config_names.some((name) => path.includes(name))
449
+ ) {
450
+ return `${tokenPrefix}:config`;
451
+ }
452
+ return `${tokenPrefix}:project`;
453
+ }
454
+
455
+ function extractToolPath(toolInput: Record<string, unknown>): string {
456
+ if (typeof toolInput["file_path"] === "string") {
457
+ return toolInput["file_path"];
458
+ }
459
+
460
+ if (typeof toolInput["path"] === "string") {
461
+ return toolInput["path"];
462
+ }
463
+
464
+ return "";
465
+ }
466
+
467
+ export function normalizeToolUse(
468
+ block: ToolUseBlock,
469
+ exitCode: number | null | undefined,
470
+ ): NormalizedToolUse | null {
471
+ const hierarchy = getNormalizedToolHierarchy(block.tool_name);
472
+
473
+ if (hierarchy?.normalizedToolId === "shell.stdin") {
474
+ if (isEmptyWriteStdinPoll(block.tool_input)) {
475
+ return null;
476
+ }
477
+
478
+ const chars = block.tool_input["chars"];
479
+ if (typeof chars === "string") {
480
+ return {
481
+ toolCallId: block.tool_call_id,
482
+ rawToolName: block.tool_name,
483
+ normalizedToolId: hierarchy.normalizedToolId,
484
+ family: hierarchy.family,
485
+ kind: hierarchy.kind,
486
+ token: tokenizeWriteStdinChars(chars, exitCode),
487
+ exitCode: exitCode ?? null,
488
+ };
489
+ }
490
+ }
491
+
492
+ if (hierarchy?.normalizedToolId === "shell.command") {
493
+ const command = extractShellCommand(block.tool_input);
494
+ if (command !== null) {
495
+ return {
496
+ toolCallId: block.tool_call_id,
497
+ rawToolName: block.tool_name,
498
+ normalizedToolId: hierarchy.normalizedToolId,
499
+ family: hierarchy.family,
500
+ kind: hierarchy.kind,
501
+ token: tokenizeBashCommand(command, exitCode),
502
+ exitCode: exitCode ?? null,
503
+ };
504
+ }
505
+ }
506
+
507
+ if (hierarchy?.family === "file") {
508
+ return {
509
+ toolCallId: block.tool_call_id,
510
+ rawToolName: block.tool_name,
511
+ normalizedToolId: hierarchy.normalizedToolId,
512
+ family: hierarchy.family,
513
+ kind: hierarchy.kind,
514
+ token: tokenizeFileTool(block.tool_name, extractToolPath(block.tool_input)),
515
+ exitCode: exitCode ?? null,
516
+ };
517
+ }
518
+
519
+ return {
520
+ toolCallId: block.tool_call_id,
521
+ rawToolName: block.tool_name,
522
+ normalizedToolId: "tool.generic",
523
+ family: "generic",
524
+ kind: "tool",
525
+ token: `Tool:${block.tool_name}${suffixFromExitCode(exitCode)}`,
526
+ exitCode: exitCode ?? null,
527
+ };
528
+ }
529
+
530
+ function outcomeFromExchange(
531
+ toolTokens: string[],
532
+ hasError: boolean,
533
+ ): FailureExchangeOutcome {
534
+ if (!hasError) {
535
+ return "success";
536
+ }
537
+
538
+ for (let i = 0; i < toolTokens.length; i += 1) {
539
+ const token = toolTokens[i];
540
+ if (!token?.includes(":fail")) {
541
+ continue;
542
+ }
543
+
544
+ const base = token.replace(/:fail$/, "");
545
+ for (const laterToken of toolTokens.slice(i + 1)) {
546
+ if (laterToken.startsWith(base) && laterToken.includes(":pass")) {
547
+ return "success_after_retry";
548
+ }
549
+ }
550
+ }
551
+
552
+ return "failure";
553
+ }
554
+
555
+ export function extractFailureExchanges(trace: NormalizedTrace): FailureExchange[] {
556
+ return normalizeTraceForEvaluation(trace).exchanges.map((exchange) => ({
557
+ toolTokens: exchange.toolTokens,
558
+ hasError: exchange.hasError,
559
+ outcome: exchange.outcome,
560
+ }));
561
+ }
562
+
563
+ export function normalizeTraceForEvaluation(
564
+ trace: NormalizedTrace,
565
+ ): TraceNormalization {
566
+ const exchanges: TraceNormalizationExchange[] = [];
567
+ const turns = trace.turns;
568
+ let turnIndex = 0;
569
+ let prevOutcome: FailureExchangeOutcome | null = null;
570
+
571
+ while (turnIndex < turns.length) {
572
+ if (turns[turnIndex]?.role !== "user") {
573
+ turnIndex += 1;
574
+ continue;
575
+ }
576
+
577
+ let assistantEnd = turnIndex + 1;
578
+ while (assistantEnd < turns.length && turns[assistantEnd]?.role === "assistant") {
579
+ assistantEnd += 1;
580
+ }
581
+
582
+ const assistantTurns = turns.slice(turnIndex + 1, assistantEnd);
583
+ const actions: NormalizedToolUse[] = [];
584
+ let hasError = false;
585
+ let hasCancellation = false;
586
+ const exitCodes = new Map<string, number | null>();
587
+ const interruptToolCallIds = new Set<string>();
588
+ const tokenCount = assistantTurns.reduce(
589
+ (sum, turn) => sum + (turn.usage?.output_tokens ?? 0),
590
+ 0,
591
+ );
592
+
593
+ for (const turn of assistantTurns) {
594
+ for (const block of turn.content) {
595
+ if (
596
+ block.type === "tool_use" &&
597
+ isWriteStdinToolName(block.tool_name) &&
598
+ typeof block.tool_input["chars"] === "string" &&
599
+ isInterruptWriteStdin(block.tool_input["chars"])
600
+ ) {
601
+ interruptToolCallIds.add(block.tool_call_id);
602
+ }
603
+ }
604
+ }
605
+
606
+ for (const turn of assistantTurns) {
607
+ for (const block of turn.content) {
608
+ if (block.type !== "tool_result") {
609
+ continue;
610
+ }
611
+
612
+ exitCodes.set(block.tool_call_id, block.exit_code);
613
+ if (
614
+ block.is_error &&
615
+ interruptToolCallIds.has(block.tool_call_id) &&
616
+ isCleanInterruptResult(block.result_content, block.exit_code)
617
+ ) {
618
+ hasCancellation = true;
619
+ continue;
620
+ }
621
+
622
+ if (block.is_error) {
623
+ hasError = true;
624
+ }
625
+ }
626
+ }
627
+
628
+ for (const turn of assistantTurns) {
629
+ for (const block of turn.content) {
630
+ if (block.type !== "tool_use") {
631
+ continue;
632
+ }
633
+
634
+ const normalizedTool = normalizeToolUse(
635
+ block,
636
+ exitCodes.get(block.tool_call_id),
637
+ );
638
+ if (normalizedTool !== null) {
639
+ actions.push(normalizedTool);
640
+ }
641
+ }
642
+ }
643
+
644
+ const toolTokens = actions.map((action) => action.token);
645
+
646
+ if (toolTokens.length > 0 || hasError || hasCancellation) {
647
+ const outcome =
648
+ hasCancellation && !hasError
649
+ ? "canceled"
650
+ : outcomeFromExchange(toolTokens, hasError);
651
+ exchanges.push({
652
+ exchangeIndex: exchanges.length,
653
+ actions,
654
+ toolTokens,
655
+ hasError,
656
+ outcome,
657
+ prevOutcome,
658
+ tokenCount,
659
+ });
660
+ prevOutcome = outcome;
661
+ }
662
+
663
+ turnIndex = assistantEnd;
664
+ }
665
+
666
+ return {
667
+ version: TRACE_NORMALIZATION_VERSION,
668
+ exchanges,
669
+ };
670
+ }