@browserbasehq/orca 3.5.0-preview.0 → 3.5.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
  2. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js +38 -0
  3. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
  4. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
  5. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js +54 -0
  6. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
  7. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
  8. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js +62 -0
  9. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
  10. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
  11. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js +25 -0
  12. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
  13. package/dist/cjs/lib/v3/api.d.ts +7 -1
  14. package/dist/cjs/lib/v3/api.js +100 -29
  15. package/dist/cjs/lib/v3/api.js.map +1 -1
  16. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
  17. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js +31 -0
  18. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
  19. package/dist/cjs/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
  20. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js +83 -7
  21. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  22. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
  23. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js +119 -5
  24. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  25. package/dist/cjs/lib/v3/index.d.ts +11 -0
  26. package/dist/cjs/lib/v3/index.js +19 -1
  27. package/dist/cjs/lib/v3/index.js.map +1 -1
  28. package/dist/cjs/lib/v3/llm/LLMProvider.d.ts +3 -0
  29. package/dist/cjs/lib/v3/llm/LLMProvider.js +28 -8
  30. package/dist/cjs/lib/v3/llm/LLMProvider.js.map +1 -1
  31. package/dist/cjs/lib/v3/types/public/agent.d.ts +6 -0
  32. package/dist/cjs/lib/v3/types/public/agent.js.map +1 -1
  33. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
  34. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js +15 -0
  35. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
  36. package/dist/cjs/lib/v3/types/public/api.d.ts +414 -182
  37. package/dist/cjs/lib/v3/types/public/api.js +62 -20
  38. package/dist/cjs/lib/v3/types/public/api.js.map +1 -1
  39. package/dist/cjs/lib/v3/types/public/index.d.ts +1 -0
  40. package/dist/cjs/lib/v3/types/public/index.js +1 -0
  41. package/dist/cjs/lib/v3/types/public/index.js.map +1 -1
  42. package/dist/cjs/lib/v3/types/public/model.d.ts +16 -7
  43. package/dist/cjs/lib/v3/types/public/model.js.map +1 -1
  44. package/dist/cjs/lib/v3/v3.d.ts +1 -0
  45. package/dist/cjs/lib/v3/v3.js +14 -0
  46. package/dist/cjs/lib/v3/v3.js.map +1 -1
  47. package/dist/cjs/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
  48. package/dist/cjs/lib/v3/verifier/evidenceNormalization.js +100 -0
  49. package/dist/cjs/lib/v3/verifier/evidenceNormalization.js.map +1 -0
  50. package/dist/cjs/lib/v3/verifier/index.d.ts +6 -0
  51. package/dist/cjs/lib/v3/verifier/index.js +16 -0
  52. package/dist/cjs/lib/v3/verifier/index.js.map +1 -0
  53. package/dist/cjs/lib/v3/verifier/trajectory.d.ts +50 -0
  54. package/dist/cjs/lib/v3/verifier/trajectory.js +316 -0
  55. package/dist/cjs/lib/v3/verifier/trajectory.js.map +1 -0
  56. package/dist/cjs/lib/v3/verifier/types.d.ts +281 -0
  57. package/dist/cjs/lib/v3/verifier/types.js +10 -0
  58. package/dist/cjs/lib/v3/verifier/types.js.map +1 -0
  59. package/dist/cjs/lib/v3Evaluator.d.ts +9 -4
  60. package/dist/cjs/lib/v3Evaluator.js +148 -0
  61. package/dist/cjs/lib/v3Evaluator.js.map +1 -1
  62. package/dist/cjs/lib/v3LegacyEvaluator.js +5 -1
  63. package/dist/cjs/lib/v3LegacyEvaluator.js.map +1 -1
  64. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
  65. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js +35 -0
  66. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
  67. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
  68. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js +50 -0
  69. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
  70. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
  71. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js +59 -0
  72. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
  73. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
  74. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js +22 -0
  75. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
  76. package/dist/esm/lib/v3/api.d.ts +7 -1
  77. package/dist/esm/lib/v3/api.js +100 -29
  78. package/dist/esm/lib/v3/api.js.map +1 -1
  79. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
  80. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js +28 -0
  81. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
  82. package/dist/esm/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
  83. package/dist/esm/lib/v3/handlers/v3AgentHandler.js +83 -7
  84. package/dist/esm/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  85. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
  86. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js +119 -5
  87. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  88. package/dist/esm/lib/v3/index.d.ts +11 -0
  89. package/dist/esm/lib/v3/index.js +10 -0
  90. package/dist/esm/lib/v3/index.js.map +1 -1
  91. package/dist/esm/lib/v3/llm/LLMProvider.d.ts +3 -0
  92. package/dist/esm/lib/v3/llm/LLMProvider.js +28 -9
  93. package/dist/esm/lib/v3/llm/LLMProvider.js.map +1 -1
  94. package/dist/esm/lib/v3/types/public/agent.d.ts +6 -0
  95. package/dist/esm/lib/v3/types/public/agent.js.map +1 -1
  96. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
  97. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js +14 -0
  98. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
  99. package/dist/esm/lib/v3/types/public/api.d.ts +414 -182
  100. package/dist/esm/lib/v3/types/public/api.js +60 -18
  101. package/dist/esm/lib/v3/types/public/api.js.map +1 -1
  102. package/dist/esm/lib/v3/types/public/index.d.ts +1 -0
  103. package/dist/esm/lib/v3/types/public/index.js +1 -0
  104. package/dist/esm/lib/v3/types/public/index.js.map +1 -1
  105. package/dist/esm/lib/v3/types/public/model.d.ts +16 -7
  106. package/dist/esm/lib/v3/types/public/model.js.map +1 -1
  107. package/dist/esm/lib/v3/v3.d.ts +1 -0
  108. package/dist/esm/lib/v3/v3.js +14 -0
  109. package/dist/esm/lib/v3/v3.js.map +1 -1
  110. package/dist/esm/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
  111. package/dist/esm/lib/v3/verifier/evidenceNormalization.js +93 -0
  112. package/dist/esm/lib/v3/verifier/evidenceNormalization.js.map +1 -0
  113. package/dist/esm/lib/v3/verifier/index.d.ts +6 -0
  114. package/dist/esm/lib/v3/verifier/index.js +3 -0
  115. package/dist/esm/lib/v3/verifier/index.js.map +1 -0
  116. package/dist/esm/lib/v3/verifier/trajectory.d.ts +50 -0
  117. package/dist/esm/lib/v3/verifier/trajectory.js +273 -0
  118. package/dist/esm/lib/v3/verifier/trajectory.js.map +1 -0
  119. package/dist/esm/lib/v3/verifier/types.d.ts +281 -0
  120. package/dist/esm/lib/v3/verifier/types.js +9 -0
  121. package/dist/esm/lib/v3/verifier/types.js.map +1 -0
  122. package/dist/esm/lib/v3Evaluator.d.ts +9 -4
  123. package/dist/esm/lib/v3Evaluator.js +148 -0
  124. package/dist/esm/lib/v3Evaluator.js.map +1 -1
  125. package/dist/esm/lib/v3LegacyEvaluator.js +5 -1
  126. package/dist/esm/lib/v3LegacyEvaluator.js.map +1 -1
  127. package/package.json +4 -4
@@ -0,0 +1,7 @@
1
+ import type { AgentStepFinishedEvent } from "../types/public/agentEvidenceEvents.js";
2
+ import type { AgentEvidence } from "./types.js";
3
+ export declare const REDACTED_INLINE_IMAGE = "[redacted inline image payload]";
4
+ export declare function collectInlineImagePayloads(value: unknown, actionName?: string, out?: string[]): string[];
5
+ export declare function redactInlineImagePayloads(value: unknown, actionName?: string): unknown;
6
+ export declare function mergeAgentEvidence(...parts: Array<AgentEvidence | undefined>): AgentEvidence;
7
+ export declare function buildAgentEvidenceFromStepFinished(event: AgentStepFinishedEvent): AgentEvidence;
@@ -0,0 +1,93 @@
1
+ export const REDACTED_INLINE_IMAGE = "[redacted inline image payload]";
2
+ const INLINE_IMAGE_KEYS = new Set(["screenshotBase64"]);
3
+ function shouldRedactBase64Key(key, actionName) {
4
+ return (INLINE_IMAGE_KEYS.has(key) ||
5
+ (actionName === "screenshot" && key === "base64"));
6
+ }
7
+ export function collectInlineImagePayloads(value, actionName, out = []) {
8
+ if (!value || typeof value !== "object")
9
+ return out;
10
+ if (Buffer.isBuffer(value))
11
+ return out;
12
+ if (Array.isArray(value)) {
13
+ for (const item of value) {
14
+ collectInlineImagePayloads(item, actionName, out);
15
+ }
16
+ return out;
17
+ }
18
+ for (const [key, nested] of Object.entries(value)) {
19
+ if (shouldRedactBase64Key(key, actionName) && typeof nested === "string") {
20
+ out.push(nested);
21
+ continue;
22
+ }
23
+ collectInlineImagePayloads(nested, actionName, out);
24
+ }
25
+ return out;
26
+ }
27
+ export function redactInlineImagePayloads(value, actionName) {
28
+ if (!value || typeof value !== "object")
29
+ return value;
30
+ if (Buffer.isBuffer(value))
31
+ return value;
32
+ if (Array.isArray(value)) {
33
+ return value.map((item) => redactInlineImagePayloads(item, actionName));
34
+ }
35
+ const out = {};
36
+ for (const [key, nested] of Object.entries(value)) {
37
+ out[key] =
38
+ shouldRedactBase64Key(key, actionName) && typeof nested === "string"
39
+ ? REDACTED_INLINE_IMAGE
40
+ : redactInlineImagePayloads(nested, actionName);
41
+ }
42
+ return out;
43
+ }
44
+ export function mergeAgentEvidence(...parts) {
45
+ return {
46
+ modalities: parts.flatMap((p) => p?.modalities ?? []),
47
+ };
48
+ }
49
+ export function buildAgentEvidenceFromStepFinished(event) {
50
+ const modalities = [];
51
+ if (event.reasoning) {
52
+ modalities.push({ type: "text", content: event.reasoning });
53
+ }
54
+ const result = event.toolOutput.result;
55
+ if (result === undefined || result === null) {
56
+ return { modalities };
57
+ }
58
+ if (typeof result === "string") {
59
+ modalities.push({ type: "text", content: result });
60
+ }
61
+ else if (typeof result === "number" ||
62
+ typeof result === "boolean" ||
63
+ typeof result === "bigint") {
64
+ modalities.push({ type: "text", content: String(result) });
65
+ }
66
+ else if (Buffer.isBuffer(result)) {
67
+ modalities.push({
68
+ type: "image",
69
+ bytes: result,
70
+ mediaType: "image/png",
71
+ });
72
+ }
73
+ else if (typeof result === "object") {
74
+ for (const imageBase64 of collectInlineImagePayloads(result, event.actionName)) {
75
+ try {
76
+ modalities.push({
77
+ type: "image",
78
+ bytes: Buffer.from(imageBase64, "base64"),
79
+ mediaType: "image/png",
80
+ });
81
+ }
82
+ catch {
83
+ // Malformed base64; skip the image and keep the JSON modality.
84
+ }
85
+ }
86
+ modalities.push({
87
+ type: "json",
88
+ content: redactInlineImagePayloads(result, event.actionName),
89
+ });
90
+ }
91
+ return { modalities };
92
+ }
93
+ //# sourceMappingURL=evidenceNormalization.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evidenceNormalization.js","sourceRoot":"","sources":["../../../../../lib/v3/verifier/evidenceNormalization.ts"],"names":[],"mappings":"AAGA,MAAM,CAAC,MAAM,qBAAqB,GAAG,iCAAiC,CAAC;AAEvE,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC;AAExD,SAAS,qBAAqB,CAAC,GAAW,EAAE,UAAmB;IAC7D,OAAO,CACL,iBAAiB,CAAC,GAAG,CAAC,GAAG,CAAC;QAC1B,CAAC,UAAU,KAAK,YAAY,IAAI,GAAG,KAAK,QAAQ,CAAC,CAClD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,0BAA0B,CACxC,KAAc,EACd,UAAmB,EACnB,MAAgB,EAAE;IAElB,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,GAAG,CAAC;IACpD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAEvC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,0BAA0B,CAAC,IAAI,EAAE,UAAU,EAAE,GAAG,CAAC,CAAC;QACpD,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAED,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QAClD,IAAI,qBAAqB,CAAC,GAAG,EAAE,UAAU,CAAC,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;YACzE,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACjB,SAAS;QACX,CAAC;QACD,0BAA0B,CAAC,MAAM,EAAE,UAAU,EAAE,GAAG,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,yBAAyB,CACvC,KAAc,EACd,UAAmB;IAEnB,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IACtD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAEzC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,yBAAyB,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,GAAG,GAA4B,EAAE,CAAC;IACxC,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QAClD,GAAG,CAAC,GAAG,CAAC;YACN,qBAAqB,CAAC,GAAG,EAAE,UAAU,CAAC,IAAI,OAAO,MAAM,KAAK,QAAQ;gBAClE,CAAC,CAAC,qBAAqB;gBACvB,CAAC,CAAC,yBAAyB,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,GAAG,KAAuC;IAE1C,OAAO;QACL,UAAU,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,UAAU,IAAI,EAAE,CAAC;KACtD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,kCAAkC,CAChD,KAA6B;IAE7B,MAAM,UAAU,GAAgC,EAAE,CAAC;IACnD,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACpB,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,SAAS,EAAE,CAAC,CAAC;IAC9D,CAAC;IAED,MAAM,MAAM,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC;IACvC,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QAC5C,OAAO,EAAE,UAAU,EAAE,CAAC;IACxB,CAAC;IAED,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IACrD,CAAC;SAAM,IACL,OAAO,MAAM,KAAK,QAAQ;QAC1B,OAAO,MAAM,KAAK,SAAS;QAC3B,OAAO,MAAM,KAAK,QAAQ,EAC1B,CAAC;QACD,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAC7D,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACnC,UAAU,CAAC,IAAI,CAAC;YACd,IAAI,EAAE,OAAO;YACb,KAAK,EAAE,MAAM;YACb,SAAS,EAAE,WAAW;SACvB,CAAC,CAAC;IACL,CAAC;SAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QACtC,KAAK,MAAM,WAAW,IAAI,0BAA0B,CAClD,MAAM,EACN,KAAK,CAAC,UAAU,CACjB,EAAE,CAAC;YACF,IAAI,CAAC;gBACH,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,OAAO;oBACb,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC;oBACzC,SAAS,EAAE,WAAW;iBACvB,CAAC,CAAC;YACL,CAAC;YAAC,MAAM,CAAC;gBACP,+DAA+D;YACjE,CAAC;QACH,CAAC;QACD,UAAU,CAAC,IAAI,CAAC;YACd,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,yBAAyB,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,CAAC;SAC7D,CAAC,CAAC;IACL,CAAC;IAED,OAAO,EAAE,UAAU,EAAE,CAAC;AACxB,CAAC","sourcesContent":["import type { AgentStepFinishedEvent } from \"../types/public/agentEvidenceEvents.js\";\nimport type { AgentEvidence } from \"./types.js\";\n\nexport const REDACTED_INLINE_IMAGE = \"[redacted inline image payload]\";\n\nconst INLINE_IMAGE_KEYS = new Set([\"screenshotBase64\"]);\n\nfunction shouldRedactBase64Key(key: string, actionName?: string): boolean {\n return (\n INLINE_IMAGE_KEYS.has(key) ||\n (actionName === \"screenshot\" && key === \"base64\")\n );\n}\n\nexport function collectInlineImagePayloads(\n value: unknown,\n actionName?: string,\n out: string[] = [],\n): string[] {\n if (!value || typeof value !== \"object\") return out;\n if (Buffer.isBuffer(value)) return out;\n\n if (Array.isArray(value)) {\n for (const item of value) {\n collectInlineImagePayloads(item, actionName, out);\n }\n return out;\n }\n\n for (const [key, nested] of Object.entries(value)) {\n if (shouldRedactBase64Key(key, actionName) && typeof nested === \"string\") {\n out.push(nested);\n continue;\n }\n collectInlineImagePayloads(nested, actionName, out);\n }\n return out;\n}\n\nexport function redactInlineImagePayloads(\n value: unknown,\n actionName?: string,\n): unknown {\n if (!value || typeof value !== \"object\") return value;\n if (Buffer.isBuffer(value)) return value;\n\n if (Array.isArray(value)) {\n return value.map((item) => redactInlineImagePayloads(item, actionName));\n }\n\n const out: Record<string, unknown> = {};\n for (const [key, nested] of Object.entries(value)) {\n out[key] =\n shouldRedactBase64Key(key, actionName) && typeof nested === \"string\"\n ? REDACTED_INLINE_IMAGE\n : redactInlineImagePayloads(nested, actionName);\n }\n return out;\n}\n\nexport function mergeAgentEvidence(\n ...parts: Array<AgentEvidence | undefined>\n): AgentEvidence {\n return {\n modalities: parts.flatMap((p) => p?.modalities ?? []),\n };\n}\n\nexport function buildAgentEvidenceFromStepFinished(\n event: AgentStepFinishedEvent,\n): AgentEvidence {\n const modalities: AgentEvidence[\"modalities\"] = [];\n if (event.reasoning) {\n modalities.push({ type: \"text\", content: event.reasoning });\n }\n\n const result = event.toolOutput.result;\n if (result === undefined || result === null) {\n return { modalities };\n }\n\n if (typeof result === \"string\") {\n modalities.push({ type: \"text\", content: result });\n } else if (\n typeof result === \"number\" ||\n typeof result === \"boolean\" ||\n typeof result === \"bigint\"\n ) {\n modalities.push({ type: \"text\", content: String(result) });\n } else if (Buffer.isBuffer(result)) {\n modalities.push({\n type: \"image\",\n bytes: result,\n mediaType: \"image/png\",\n });\n } else if (typeof result === \"object\") {\n for (const imageBase64 of collectInlineImagePayloads(\n result,\n event.actionName,\n )) {\n try {\n modalities.push({\n type: \"image\",\n bytes: Buffer.from(imageBase64, \"base64\"),\n mediaType: \"image/png\",\n });\n } catch {\n // Malformed base64; skip the image and keep the JSON modality.\n }\n }\n modalities.push({\n type: \"json\",\n content: redactInlineImagePayloads(result, event.actionName),\n });\n }\n\n return { modalities };\n}\n"]}
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Public re-exports for the verifier subsystem.
3
+ */
4
+ export type { AgentEvidence, AgentEvidenceModality, CriterionScore, EvaluationResult, FirstPointOfFailure, ProbeEvidence, Rubric, RubricCriterion, TaskSpec, TaskValidity, ToolOutput, Trajectory, TrajectoryStatus, TrajectoryStep, TrajectoryUsage, Verifier, VerifierFinding, VerifierRawSteps, } from "./types.js";
5
+ export { buildAgentEvidenceFromStepFinished, collectInlineImagePayloads, mergeAgentEvidence, redactInlineImagePayloads, REDACTED_INLINE_IMAGE, } from "./evidenceNormalization.js";
6
+ export { loadTrajectoryFromDisk, nextResultFilename, normalizeRubric, shouldPersistTrajectory, writeTrajectoryDir, } from "./trajectory.js";
@@ -0,0 +1,3 @@
1
+ export { buildAgentEvidenceFromStepFinished, collectInlineImagePayloads, mergeAgentEvidence, redactInlineImagePayloads, REDACTED_INLINE_IMAGE, } from "./evidenceNormalization.js";
2
+ export { loadTrajectoryFromDisk, nextResultFilename, normalizeRubric, shouldPersistTrajectory, writeTrajectoryDir, } from "./trajectory.js";
3
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../lib/v3/verifier/index.ts"],"names":[],"mappings":"AAuBA,OAAO,EACL,kCAAkC,EAClC,0BAA0B,EAC1B,kBAAkB,EAClB,yBAAyB,EACzB,qBAAqB,GACtB,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,sBAAsB,EACtB,kBAAkB,EAClB,eAAe,EACf,uBAAuB,EACvB,kBAAkB,GACnB,MAAM,iBAAiB,CAAC","sourcesContent":["/**\n * Public re-exports for the verifier subsystem.\n */\nexport type {\n AgentEvidence,\n AgentEvidenceModality,\n CriterionScore,\n EvaluationResult,\n FirstPointOfFailure,\n ProbeEvidence,\n Rubric,\n RubricCriterion,\n TaskSpec,\n TaskValidity,\n ToolOutput,\n Trajectory,\n TrajectoryStatus,\n TrajectoryStep,\n TrajectoryUsage,\n Verifier,\n VerifierFinding,\n VerifierRawSteps,\n} from \"./types.js\";\nexport {\n buildAgentEvidenceFromStepFinished,\n collectInlineImagePayloads,\n mergeAgentEvidence,\n redactInlineImagePayloads,\n REDACTED_INLINE_IMAGE,\n} from \"./evidenceNormalization.js\";\nexport {\n loadTrajectoryFromDisk,\n nextResultFilename,\n normalizeRubric,\n shouldPersistTrajectory,\n writeTrajectoryDir,\n} from \"./trajectory.js\";\n"]}
@@ -0,0 +1,50 @@
1
+ import type { Rubric, Trajectory } from "./types.js";
2
+ /**
3
+ * Convert dataset or generated rubric JSON into the public Stagehand shape.
4
+ * Snake-case dataset fields are accepted here so serialized quirks do not leak
5
+ * into the canonical rubric type.
6
+ */
7
+ export declare function normalizeRubric(rubric: unknown): Rubric | undefined;
8
+ /**
9
+ * Hydrate a Trajectory from the on-disk directory layout written by
10
+ * TrajectoryRecorder.persist(). Used by the offline re-scoring CLI (`bench
11
+ * verify`) and by any consumer that wants to feed a saved trajectory back
12
+ * into V3Evaluator.verify() without running an agent.
13
+ *
14
+ * Reverses the recorder's serialization tweaks:
15
+ * - `probeEvidence.screenshotPath` → read file into `probeEvidence.screenshot`.
16
+ * - Image modalities in `agentEvidence.modalities` carry `imagePath` on
17
+ * disk instead of raw Buffer; legacy `bytesBase64` fixtures are also
18
+ * accepted.
19
+ *
20
+ * @param dir absolute or cwd-relative path to a `<run-id>/<task-id>/` directory.
21
+ */
22
+ export declare function loadTrajectoryFromDisk(dir: string): Promise<Trajectory>;
23
+ /**
24
+ * Build a `result*.json` filename for persisted evaluator output.
25
+ *
26
+ * Convention: the live run writes `result.json`; offline re-score attempts use
27
+ * a label-based name (e.g., `result_rescore-2026-05-11.json`) so they coexist
28
+ * without collisions and remain easy to diff.
29
+ */
30
+ export declare function nextResultFilename(label?: string): string;
31
+ /**
32
+ * Default persistence policy: explicit override, then env, then "on unless CI".
33
+ */
34
+ export declare function shouldPersistTrajectory(override: boolean | undefined): boolean;
35
+ /**
36
+ * Write the on-disk trajectory layout under `dir`:
37
+ *
38
+ * <dir>/
39
+ * ├── task_data.json
40
+ * ├── trajectory.json (screenshots referenced by path)
41
+ * ├── screenshots/
42
+ * │ ├── probe/<N>.png
43
+ * │ └── agent/<N>[_M].png
44
+ * ├── scores/ (empty; populated separately)
45
+ * └── core.log
46
+ *
47
+ * Image bytes are externalized to PNG files; the in-memory Trajectory is left
48
+ * untouched so callers can keep using it after persistence.
49
+ */
50
+ export declare function writeTrajectoryDir(dir: string, trajectory: Trajectory): Promise<void>;
@@ -0,0 +1,273 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { redactInlineImagePayloads } from "./evidenceNormalization.js";
4
+ /**
5
+ * Convert dataset or generated rubric JSON into the public Stagehand shape.
6
+ * Snake-case dataset fields are accepted here so serialized quirks do not leak
7
+ * into the canonical rubric type.
8
+ */
9
+ export function normalizeRubric(rubric) {
10
+ if (rubric == null)
11
+ return undefined;
12
+ if (typeof rubric !== "object") {
13
+ throw new TypeError("Rubric must be an object");
14
+ }
15
+ const rawRubric = rubric;
16
+ if (!Array.isArray(rawRubric.items)) {
17
+ throw new TypeError("Rubric is missing an items array");
18
+ }
19
+ return {
20
+ items: rawRubric.items.map((item) => {
21
+ const criterion = normalizeRequiredString(item.criterion, "criterion");
22
+ const description = normalizeRequiredString(item.description, "description");
23
+ const maxPoints = normalizeMaxPoints(item);
24
+ if (typeof maxPoints !== "number" || !Number.isFinite(maxPoints)) {
25
+ throw new TypeError(`Rubric criterion "${criterion}" is missing a numeric maxPoints value`);
26
+ }
27
+ return {
28
+ criterion,
29
+ description,
30
+ maxPoints,
31
+ ...(typeof item.condition === "string" && {
32
+ condition: item.condition,
33
+ }),
34
+ };
35
+ }),
36
+ };
37
+ }
38
+ function normalizeRequiredString(value, fieldName) {
39
+ if (typeof value === "string" && value.length) {
40
+ return value;
41
+ }
42
+ throw new TypeError(`Rubric criterion is missing a ${fieldName} value`);
43
+ }
44
+ function normalizeMaxPoints(item) {
45
+ return item.maxPoints ?? item.max_points;
46
+ }
47
+ function normalizeResultLabel(label) {
48
+ return (label ?? `rescore-${new Date().toISOString()}`).replace(/[^A-Za-z0-9._-]/g, "_");
49
+ }
50
+ // ─────────────────────────────────────────────────────────────────────────────
51
+ // On-disk loader
52
+ // ─────────────────────────────────────────────────────────────────────────────
53
+ /**
54
+ * Hydrate a Trajectory from the on-disk directory layout written by
55
+ * TrajectoryRecorder.persist(). Used by the offline re-scoring CLI (`bench
56
+ * verify`) and by any consumer that wants to feed a saved trajectory back
57
+ * into V3Evaluator.verify() without running an agent.
58
+ *
59
+ * Reverses the recorder's serialization tweaks:
60
+ * - `probeEvidence.screenshotPath` → read file into `probeEvidence.screenshot`.
61
+ * - Image modalities in `agentEvidence.modalities` carry `imagePath` on
62
+ * disk instead of raw Buffer; legacy `bytesBase64` fixtures are also
63
+ * accepted.
64
+ *
65
+ * @param dir absolute or cwd-relative path to a `<run-id>/<task-id>/` directory.
66
+ */
67
+ export async function loadTrajectoryFromDisk(dir) {
68
+ const fs = await import("node:fs/promises");
69
+ const path = await import("node:path");
70
+ const trajectoryDir = path.resolve(dir);
71
+ const trajectoryPath = path.join(trajectoryDir, "trajectory.json");
72
+ const raw = await fs.readFile(trajectoryPath, "utf8");
73
+ const parsed = JSON.parse(raw);
74
+ const resolveWithinTrajectoryDir = (candidate, fieldName = "screenshotPath") => {
75
+ const resolved = path.resolve(trajectoryDir, candidate);
76
+ const relative = path.relative(trajectoryDir, resolved);
77
+ const outside = relative === ".." ||
78
+ relative.startsWith(`..${path.sep}`) ||
79
+ path.isAbsolute(relative);
80
+ if (outside) {
81
+ throw new Error(`Trajectory ${fieldName} escapes trajectory directory: ${candidate}`);
82
+ }
83
+ return resolved;
84
+ };
85
+ const hydrateProbeScreenshot = async (probe) => {
86
+ if (probe?.screenshotPath && !probe.screenshot) {
87
+ const resolved = resolveWithinTrajectoryDir(probe.screenshotPath);
88
+ try {
89
+ probe.screenshot = await fs.readFile(resolved);
90
+ }
91
+ catch {
92
+ // Missing screenshot file: leave probe.screenshot unset. The verifier's
93
+ // evidence_insufficient path will handle it.
94
+ }
95
+ }
96
+ };
97
+ for (const step of parsed.steps) {
98
+ // Rehydrate tier-2 probe screenshot from its on-disk file reference.
99
+ await hydrateProbeScreenshot(step.probeEvidence);
100
+ // Decode image modalities from disk references back to Buffer.
101
+ if (step.agentEvidence?.modalities) {
102
+ const modalities = [];
103
+ for (const m of step.agentEvidence.modalities) {
104
+ // The on-disk shape carries imagePath/bytesBase64 instead of bytes,
105
+ // so we look through `unknown` rather than rely on the typed union.
106
+ const raw = m;
107
+ if (m.type === "image" && typeof raw.bytesBase64 === "string") {
108
+ modalities.push({
109
+ type: "image",
110
+ bytes: Buffer.from(raw.bytesBase64, "base64"),
111
+ mediaType: m.mediaType,
112
+ });
113
+ continue;
114
+ }
115
+ if (m.type === "image" && typeof raw.imagePath === "string") {
116
+ const resolved = resolveWithinTrajectoryDir(raw.imagePath, "imagePath");
117
+ try {
118
+ modalities.push({
119
+ type: "image",
120
+ bytes: await fs.readFile(resolved),
121
+ mediaType: m.mediaType,
122
+ });
123
+ }
124
+ catch {
125
+ // Missing agent image file: omit that image modality. The
126
+ // verifier's evidence_insufficient path will handle missing bytes.
127
+ }
128
+ continue;
129
+ }
130
+ modalities.push(m);
131
+ }
132
+ step.agentEvidence.modalities = modalities;
133
+ }
134
+ }
135
+ await hydrateProbeScreenshot(parsed.finalObservation);
136
+ return parsed;
137
+ }
138
+ /**
139
+ * Build a `result*.json` filename for persisted evaluator output.
140
+ *
141
+ * Convention: the live run writes `result.json`; offline re-score attempts use
142
+ * a label-based name (e.g., `result_rescore-2026-05-11.json`) so they coexist
143
+ * without collisions and remain easy to diff.
144
+ */
145
+ export function nextResultFilename(label) {
146
+ return `result_${normalizeResultLabel(label)}.json`;
147
+ }
148
+ /**
149
+ * Default persistence policy: explicit override, then env, then "on unless CI".
150
+ */
151
+ export function shouldPersistTrajectory(override) {
152
+ if (override !== undefined)
153
+ return override;
154
+ const env = process.env.VERIFIER_PERSIST_TRAJECTORIES?.toLowerCase();
155
+ if (env === "1" || env === "true")
156
+ return true;
157
+ if (env === "0" || env === "false")
158
+ return false;
159
+ return !process.env.CI;
160
+ }
161
+ /**
162
+ * Write the on-disk trajectory layout under `dir`:
163
+ *
164
+ * <dir>/
165
+ * ├── task_data.json
166
+ * ├── trajectory.json (screenshots referenced by path)
167
+ * ├── screenshots/
168
+ * │ ├── probe/<N>.png
169
+ * │ └── agent/<N>[_M].png
170
+ * ├── scores/ (empty; populated separately)
171
+ * └── core.log
172
+ *
173
+ * Image bytes are externalized to PNG files; the in-memory Trajectory is left
174
+ * untouched so callers can keep using it after persistence.
175
+ */
176
+ export async function writeTrajectoryDir(dir, trajectory) {
177
+ await fs.mkdir(dir, { recursive: true });
178
+ await fs.mkdir(path.join(dir, "screenshots", "probe"), { recursive: true });
179
+ await fs.mkdir(path.join(dir, "screenshots", "agent"), { recursive: true });
180
+ const serializableSteps = [];
181
+ // A single post-turn probe is fanned across every step of a multi-tool turn,
182
+ // and a single agent screenshot is shared across every action a CUA provider
183
+ // chose from it, so the same Buffer is shared by reference. Dedupe by
184
+ // identity: write the PNG once and point every sharing step at the same file.
185
+ const probePathByBuffer = new Map();
186
+ const agentPathByBuffer = new Map();
187
+ for (const [i, step] of trajectory.steps.entries()) {
188
+ const probe = { ...step.probeEvidence };
189
+ if (probe.screenshot) {
190
+ let relPath = probePathByBuffer.get(probe.screenshot);
191
+ if (!relPath) {
192
+ relPath = `screenshots/probe/${i + 1}.png`;
193
+ await fs.writeFile(path.join(dir, relPath), probe.screenshot);
194
+ probePathByBuffer.set(probe.screenshot, relPath);
195
+ }
196
+ probe.screenshotPath = relPath;
197
+ delete probe.screenshot;
198
+ }
199
+ const imageModalities = step.agentEvidence.modalities.filter((m) => m.type === "image");
200
+ const multipleImages = imageModalities.length > 1;
201
+ let imageSeq = 0;
202
+ const modalities = [];
203
+ for (const m of step.agentEvidence.modalities) {
204
+ if (m.type !== "image") {
205
+ modalities.push(m.type === "json"
206
+ ? {
207
+ ...m,
208
+ content: redactInlineImagePayloads(m.content, step.actionName),
209
+ }
210
+ : m);
211
+ continue;
212
+ }
213
+ let relPath = agentPathByBuffer.get(m.bytes);
214
+ if (!relPath) {
215
+ const suffix = multipleImages ? `_${imageSeq}` : "";
216
+ relPath = `screenshots/agent/${i + 1}${suffix}.png`;
217
+ await fs.writeFile(path.join(dir, relPath), m.bytes);
218
+ agentPathByBuffer.set(m.bytes, relPath);
219
+ }
220
+ modalities.push({
221
+ type: "image",
222
+ imagePath: relPath,
223
+ mediaType: m.mediaType,
224
+ });
225
+ imageSeq += 1;
226
+ }
227
+ serializableSteps.push({
228
+ ...step,
229
+ probeEvidence: probe,
230
+ agentEvidence: { modalities },
231
+ toolOutput: {
232
+ ...step.toolOutput,
233
+ result: redactInlineImagePayloads(step.toolOutput.result, step.actionName),
234
+ },
235
+ });
236
+ }
237
+ const finalObservation = trajectory.finalObservation === undefined
238
+ ? undefined
239
+ : { ...trajectory.finalObservation };
240
+ if (finalObservation?.screenshot) {
241
+ const relPath = "screenshots/probe/final.png";
242
+ await fs.writeFile(path.join(dir, relPath), finalObservation.screenshot);
243
+ finalObservation.screenshotPath = relPath;
244
+ delete finalObservation.screenshot;
245
+ }
246
+ // Image modalities carry imagePath instead of raw bytes on disk; cast
247
+ // through unknown rather than widen Trajectory's type contract.
248
+ const serialized = {
249
+ ...trajectory,
250
+ steps: serializableSteps,
251
+ ...(finalObservation ? { finalObservation } : {}),
252
+ };
253
+ await fs.writeFile(path.join(dir, "trajectory.json"), JSON.stringify(serialized, null, 2));
254
+ await fs.writeFile(path.join(dir, "task_data.json"), JSON.stringify({
255
+ task: trajectory.task,
256
+ status: trajectory.status,
257
+ finalAnswer: trajectory.finalAnswer ?? null,
258
+ }, null, 2));
259
+ await fs.mkdir(path.join(dir, "scores"), { recursive: true });
260
+ await fs.writeFile(path.join(dir, "core.log"), coreLog(trajectory));
261
+ }
262
+ function coreLog(trajectory) {
263
+ return (trajectory.steps
264
+ .map((step, i) => JSON.stringify({
265
+ step: i,
266
+ action: step.actionName,
267
+ url: step.probeEvidence.url ?? null,
268
+ ok: step.toolOutput.ok,
269
+ reasoning: step.reasoning || undefined,
270
+ }))
271
+ .join("\n") + "\n");
272
+ }
273
+ //# sourceMappingURL=trajectory.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"trajectory.js","sourceRoot":"","sources":["../../../../../lib/v3/verifier/trajectory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAQ7B,OAAO,EAAE,yBAAyB,EAAE,MAAM,4BAA4B,CAAC;AAkBvE;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,MAAe;IAC7C,IAAI,MAAM,IAAI,IAAI;QAAE,OAAO,SAAS,CAAC;IACrC,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,MAAM,IAAI,SAAS,CAAC,0BAA0B,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,SAAS,GAAG,MAAmB,CAAC;IACtC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;QACpC,MAAM,IAAI,SAAS,CAAC,kCAAkC,CAAC,CAAC;IAC1D,CAAC;IAED,OAAO;QACL,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YAClC,MAAM,SAAS,GAAG,uBAAuB,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;YACvE,MAAM,WAAW,GAAG,uBAAuB,CACzC,IAAI,CAAC,WAAW,EAChB,aAAa,CACd,CAAC;YACF,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;YAE3C,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACjE,MAAM,IAAI,SAAS,CACjB,qBAAqB,SAAS,wCAAwC,CACvE,CAAC;YACJ,CAAC;YAED,OAAO;gBACL,SAAS;gBACT,WAAW;gBACX,SAAS;gBACT,GAAG,CAAC,OAAO,IAAI,CAAC,SAAS,KAAK,QAAQ,IAAI;oBACxC,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC1B,CAAC;aACH,CAAC;QACJ,CAAC,CAAC;KACH,CAAC;AACJ,CAAC;AAED,SAAS,uBAAuB,CAAC,KAAc,EAAE,SAAiB;IAChE,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QAC9C,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,IAAI,SAAS,CAAC,iCAAiC,SAAS,QAAQ,CAAC,CAAC;AAC1E,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAwB;IAClD,OAAO,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,UAAU,CAAC;AAC3C,CAAC;AAED,SAAS,oBAAoB,CAAC,KAAc;IAC1C,OAAO,CAAC,KAAK,IAAI,WAAW,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,OAAO,CAC7D,kBAAkB,EAClB,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,iBAAiB;AACjB,gFAAgF;AAEhF;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAAC,GAAW;IACtD,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;IAC5C,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;IACvC,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAExC,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,iBAAiB,CAAC,CAAC;IACnE,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAsB5B,CAAC;IAEF,MAAM,0BAA0B,GAAG,CACjC,SAAiB,EACjB,SAAS,GAAG,gBAAgB,EACpB,EAAE;QACV,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;QACxD,MAAM,OAAO,GACX,QAAQ,KAAK,IAAI;YACjB,QAAQ,CAAC,UAAU,CAAC,KAAK,IAAI,CAAC,GAAG,EAAE,CAAC;YACpC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAE5B,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CACb,cAAc,SAAS,kCAAkC,SAAS,EAAE,CACrE,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC,CAAC;IAEF,MAAM,sBAAsB,GAAG,KAAK,EAClC,KAAyC,EAC1B,EAAE;QACjB,IAAI,KAAK,EAAE,cAAc,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC/C,MAAM,QAAQ,GAAG,0BAA0B,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;YAClE,IAAI,CAAC;gBACH,KAAK,CAAC,UAAU,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YACjD,CAAC;YAAC,MAAM,CAAC;gBACP,wEAAwE;gBACxE,6CAA6C;YAC/C,CAAC;QACH,CAAC;IACH,CAAC,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QAChC,qEAAqE;QACrE,MAAM,sBAAsB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAEjD,+DAA+D;QAC/D,IAAI,IAAI,CAAC,aAAa,EAAE,UAAU,EAAE,CAAC;YACnC,MAAM,UAAU,GAA4B,EAAE,CAAC;YAC/C,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,CAAC;gBAC9C,oEAAoE;gBACpE,oEAAoE;gBACpE,MAAM,GAAG,GAAG,CAGX,CAAC;gBACF,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO,IAAI,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;oBAC9D,UAAU,CAAC,IAAI,CAAC;wBACd,IAAI,EAAE,OAAgB;wBACtB,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,QAAQ,CAAC;wBAC7C,SAAS,EAAE,CAAC,CAAC,SAAS;qBACvB,CAAC,CAAC;oBACH,SAAS;gBACX,CAAC;gBACD,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO,IAAI,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;oBAC5D,MAAM,QAAQ,GAAG,0BAA0B,CACzC,GAAG,CAAC,SAAS,EACb,WAAW,CACZ,CAAC;oBACF,IAAI,CAAC;wBACH,UAAU,CAAC,IAAI,CAAC;4BACd,IAAI,EAAE,OAAgB;4BACtB,KAAK,EAAE,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;4BAClC,SAAS,EAAE,CAAC,CAAC,SAAS;yBACvB,CAAC,CAAC;oBACL,CAAC;oBAAC,MAAM,CAAC;wBACP,0DAA0D;wBAC1D,mEAAmE;oBACrE,CAAC;oBACD,SAAS;gBACX,CAAC;gBACD,UAAU,CAAC,IAAI,CAAC,CAA0B,CAAC,CAAC;YAC9C,CAAC;YACD,IAAI,CAAC,aAAa,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,MAAM,sBAAsB,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;IAEtD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAAC,KAAc;IAC/C,OAAO,UAAU,oBAAoB,CAAC,KAAK,CAAC,OAAO,CAAC;AACtD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CACrC,QAA6B;IAE7B,IAAI,QAAQ,KAAK,SAAS;QAAE,OAAO,QAAQ,CAAC;IAC5C,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,6BAA6B,EAAE,WAAW,EAAE,CAAC;IACrE,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,MAAM;QAAE,OAAO,IAAI,CAAC;IAC/C,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,OAAO;QAAE,OAAO,KAAK,CAAC;IACjD,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;AACzB,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,GAAW,EACX,UAAsB;IAEtB,MAAM,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzC,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5E,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5E,MAAM,iBAAiB,GAAc,EAAE,CAAC;IACxC,6EAA6E;IAC7E,6EAA6E;IAC7E,sEAAsE;IACtE,8EAA8E;IAC9E,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC;IACpD,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC;IACpD,KAAK,MAAM,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;QACnD,MAAM,KAAK,GAAkB,EAAE,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACvD,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;YACrB,IAAI,OAAO,GAAG,iBAAiB,CAAC,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YACtD,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,OAAO,GAAG,qBAAqB,CAAC,GAAG,CAAC,MAAM,CAAC;gBAC3C,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;gBAC9D,iBAAiB,CAAC,GAAG,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;YACnD,CAAC;YACD,KAAK,CAAC,cAAc,GAAG,OAAO,CAAC;YAC/B,OAAO,KAAK,CAAC,UAAU,CAAC;QAC1B,CAAC;QAED,MAAM,eAAe,GAAG,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,MAAM,CAC1D,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAC1B,CAAC;QACF,MAAM,cAAc,GAAG,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC;QAClD,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,MAAM,UAAU,GAAc,EAAE,CAAC;QACjC,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,CAAC;YAC9C,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;gBACvB,UAAU,CAAC,IAAI,CACb,CAAC,CAAC,IAAI,KAAK,MAAM;oBACf,CAAC,CAAC;wBACE,GAAG,CAAC;wBACJ,OAAO,EAAE,yBAAyB,CAAC,CAAC,CAAC,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC;qBAC/D;oBACH,CAAC,CAAC,CAAC,CACN,CAAC;gBACF,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YAC7C,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,MAAM,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACpD,OAAO,GAAG,qBAAqB,CAAC,GAAG,CAAC,GAAG,MAAM,MAAM,CAAC;gBACpD,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC;gBACrD,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;YAC1C,CAAC;YACD,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,OAAO;gBACb,SAAS,EAAE,OAAO;gBAClB,SAAS,EAAE,CAAC,CAAC,SAAS;aACvB,CAAC,CAAC;YACH,QAAQ,IAAI,CAAC,CAAC;QAChB,CAAC;QACD,iBAAiB,CAAC,IAAI,CAAC;YACrB,GAAG,IAAI;YACP,aAAa,EAAE,KAAK;YACpB,aAAa,EAAE,EAAE,UAAU,EAAE;YAC7B,UAAU,EAAE;gBACV,GAAG,IAAI,CAAC,UAAU;gBAClB,MAAM,EAAE,yBAAyB,CAC/B,IAAI,CAAC,UAAU,CAAC,MAAM,EACtB,IAAI,CAAC,UAAU,CAChB;aACF;SACF,CAAC,CAAC;IACL,CAAC;IAED,MAAM,gBAAgB,GACpB,UAAU,CAAC,gBAAgB,KAAK,SAAS;QACvC,CAAC,CAAC,SAAS;QACX,CAAC,CAAC,EAAE,GAAG,UAAU,CAAC,gBAAgB,EAAE,CAAC;IACzC,IAAI,gBAAgB,EAAE,UAAU,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,6BAA6B,CAAC;QAC9C,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACzE,gBAAgB,CAAC,cAAc,GAAG,OAAO,CAAC;QAC1C,OAAO,gBAAgB,CAAC,UAAU,CAAC;IACrC,CAAC;IAED,sEAAsE;IACtE,gEAAgE;IAChE,MAAM,UAAU,GAAG;QACjB,GAAG,UAAU;QACb,KAAK,EAAE,iBAAiB;QACxB,GAAG,CAAC,gBAAgB,CAAC,CAAC,CAAC,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACvC,CAAC;IAEb,MAAM,EAAE,CAAC,SAAS,CAChB,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,iBAAiB,CAAC,EACjC,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CACpC,CAAC;IAEF,MAAM,EAAE,CAAC,SAAS,CAChB,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,gBAAgB,CAAC,EAChC,IAAI,CAAC,SAAS,CACZ;QACE,IAAI,EAAE,UAAU,CAAC,IAAI;QACrB,MAAM,EAAE,UAAU,CAAC,MAAM;QACzB,WAAW,EAAE,UAAU,CAAC,WAAW,IAAI,IAAI;KAC5C,EACD,IAAI,EACJ,CAAC,CACF,CACF,CAAC;IAEF,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC9D,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,UAAU,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,OAAO,CAAC,UAAsB;IACrC,OAAO,CACL,UAAU,CAAC,KAAK;SACb,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CACf,IAAI,CAAC,SAAS,CAAC;QACb,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,IAAI,CAAC,UAAU;QACvB,GAAG,EAAE,IAAI,CAAC,aAAa,CAAC,GAAG,IAAI,IAAI;QACnC,EAAE,EAAE,IAAI,CAAC,UAAU,CAAC,EAAE;QACtB,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,SAAS;KACvC,CAAC,CACH;SACA,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CACrB,CAAC;AACJ,CAAC","sourcesContent":["import fs from \"node:fs/promises\";\nimport path from \"node:path\";\nimport type {\n AgentEvidenceModality,\n ProbeEvidence,\n Rubric,\n Trajectory,\n TrajectoryStep,\n} from \"./types.js\";\nimport { redactInlineImagePayloads } from \"./evidenceNormalization.js\";\n\ntype RawRubricCriterion = {\n criterion: unknown;\n description: unknown;\n max_points?: unknown;\n maxPoints?: unknown;\n condition?: unknown;\n};\n\ntype RawRubric = {\n items?: unknown;\n};\n\ntype PersistedProbeEvidence = ProbeEvidence & {\n screenshotPath?: string;\n};\n\n/**\n * Convert dataset or generated rubric JSON into the public Stagehand shape.\n * Snake-case dataset fields are accepted here so serialized quirks do not leak\n * into the canonical rubric type.\n */\nexport function normalizeRubric(rubric: unknown): Rubric | undefined {\n if (rubric == null) return undefined;\n if (typeof rubric !== \"object\") {\n throw new TypeError(\"Rubric must be an object\");\n }\n\n const rawRubric = rubric as RawRubric;\n if (!Array.isArray(rawRubric.items)) {\n throw new TypeError(\"Rubric is missing an items array\");\n }\n\n return {\n items: rawRubric.items.map((item) => {\n const criterion = normalizeRequiredString(item.criterion, \"criterion\");\n const description = normalizeRequiredString(\n item.description,\n \"description\",\n );\n const maxPoints = normalizeMaxPoints(item);\n\n if (typeof maxPoints !== \"number\" || !Number.isFinite(maxPoints)) {\n throw new TypeError(\n `Rubric criterion \"${criterion}\" is missing a numeric maxPoints value`,\n );\n }\n\n return {\n criterion,\n description,\n maxPoints,\n ...(typeof item.condition === \"string\" && {\n condition: item.condition,\n }),\n };\n }),\n };\n}\n\nfunction normalizeRequiredString(value: unknown, fieldName: string): string {\n if (typeof value === \"string\" && value.length) {\n return value;\n }\n\n throw new TypeError(`Rubric criterion is missing a ${fieldName} value`);\n}\n\nfunction normalizeMaxPoints(item: RawRubricCriterion): unknown {\n return item.maxPoints ?? item.max_points;\n}\n\nfunction normalizeResultLabel(label?: string): string {\n return (label ?? `rescore-${new Date().toISOString()}`).replace(\n /[^A-Za-z0-9._-]/g,\n \"_\",\n );\n}\n\n// ─────────────────────────────────────────────────────────────────────────────\n// On-disk loader\n// ─────────────────────────────────────────────────────────────────────────────\n\n/**\n * Hydrate a Trajectory from the on-disk directory layout written by\n * TrajectoryRecorder.persist(). Used by the offline re-scoring CLI (`bench\n * verify`) and by any consumer that wants to feed a saved trajectory back\n * into V3Evaluator.verify() without running an agent.\n *\n * Reverses the recorder's serialization tweaks:\n * - `probeEvidence.screenshotPath` → read file into `probeEvidence.screenshot`.\n * - Image modalities in `agentEvidence.modalities` carry `imagePath` on\n * disk instead of raw Buffer; legacy `bytesBase64` fixtures are also\n * accepted.\n *\n * @param dir absolute or cwd-relative path to a `<run-id>/<task-id>/` directory.\n */\nexport async function loadTrajectoryFromDisk(dir: string): Promise<Trajectory> {\n const fs = await import(\"node:fs/promises\");\n const path = await import(\"node:path\");\n const trajectoryDir = path.resolve(dir);\n\n const trajectoryPath = path.join(trajectoryDir, \"trajectory.json\");\n const raw = await fs.readFile(trajectoryPath, \"utf8\");\n const parsed = JSON.parse(raw) as Trajectory & {\n finalObservation?: PersistedProbeEvidence;\n steps: Array<\n TrajectoryStep & {\n agentEvidence: {\n modalities: Array<\n | { type: \"text\"; content: string }\n | {\n type: \"image\";\n mediaType: string;\n // On-disk forms. Current writer externalizes bytes to\n // imagePath; bytesBase64 is accepted for older fixtures.\n bytes?: unknown;\n bytesBase64?: string;\n imagePath?: string;\n }\n | { type: \"json\"; content: unknown }\n >;\n };\n probeEvidence: PersistedProbeEvidence;\n }\n >;\n };\n\n const resolveWithinTrajectoryDir = (\n candidate: string,\n fieldName = \"screenshotPath\",\n ): string => {\n const resolved = path.resolve(trajectoryDir, candidate);\n const relative = path.relative(trajectoryDir, resolved);\n const outside =\n relative === \"..\" ||\n relative.startsWith(`..${path.sep}`) ||\n path.isAbsolute(relative);\n\n if (outside) {\n throw new Error(\n `Trajectory ${fieldName} escapes trajectory directory: ${candidate}`,\n );\n }\n\n return resolved;\n };\n\n const hydrateProbeScreenshot = async (\n probe: PersistedProbeEvidence | undefined,\n ): Promise<void> => {\n if (probe?.screenshotPath && !probe.screenshot) {\n const resolved = resolveWithinTrajectoryDir(probe.screenshotPath);\n try {\n probe.screenshot = await fs.readFile(resolved);\n } catch {\n // Missing screenshot file: leave probe.screenshot unset. The verifier's\n // evidence_insufficient path will handle it.\n }\n }\n };\n\n for (const step of parsed.steps) {\n // Rehydrate tier-2 probe screenshot from its on-disk file reference.\n await hydrateProbeScreenshot(step.probeEvidence);\n\n // Decode image modalities from disk references back to Buffer.\n if (step.agentEvidence?.modalities) {\n const modalities: AgentEvidenceModality[] = [];\n for (const m of step.agentEvidence.modalities) {\n // The on-disk shape carries imagePath/bytesBase64 instead of bytes,\n // so we look through `unknown` rather than rely on the typed union.\n const raw = m as unknown as {\n bytesBase64?: string;\n imagePath?: string;\n };\n if (m.type === \"image\" && typeof raw.bytesBase64 === \"string\") {\n modalities.push({\n type: \"image\" as const,\n bytes: Buffer.from(raw.bytesBase64, \"base64\"),\n mediaType: m.mediaType,\n });\n continue;\n }\n if (m.type === \"image\" && typeof raw.imagePath === \"string\") {\n const resolved = resolveWithinTrajectoryDir(\n raw.imagePath,\n \"imagePath\",\n );\n try {\n modalities.push({\n type: \"image\" as const,\n bytes: await fs.readFile(resolved),\n mediaType: m.mediaType,\n });\n } catch {\n // Missing agent image file: omit that image modality. The\n // verifier's evidence_insufficient path will handle missing bytes.\n }\n continue;\n }\n modalities.push(m as AgentEvidenceModality);\n }\n step.agentEvidence.modalities = modalities;\n }\n }\n\n await hydrateProbeScreenshot(parsed.finalObservation);\n\n return parsed;\n}\n\n/**\n * Build a `result*.json` filename for persisted evaluator output.\n *\n * Convention: the live run writes `result.json`; offline re-score attempts use\n * a label-based name (e.g., `result_rescore-2026-05-11.json`) so they coexist\n * without collisions and remain easy to diff.\n */\nexport function nextResultFilename(label?: string): string {\n return `result_${normalizeResultLabel(label)}.json`;\n}\n\n/**\n * Default persistence policy: explicit override, then env, then \"on unless CI\".\n */\nexport function shouldPersistTrajectory(\n override: boolean | undefined,\n): boolean {\n if (override !== undefined) return override;\n const env = process.env.VERIFIER_PERSIST_TRAJECTORIES?.toLowerCase();\n if (env === \"1\" || env === \"true\") return true;\n if (env === \"0\" || env === \"false\") return false;\n return !process.env.CI;\n}\n\n/**\n * Write the on-disk trajectory layout under `dir`:\n *\n * <dir>/\n * ├── task_data.json\n * ├── trajectory.json (screenshots referenced by path)\n * ├── screenshots/\n * │ ├── probe/<N>.png\n * │ └── agent/<N>[_M].png\n * ├── scores/ (empty; populated separately)\n * └── core.log\n *\n * Image bytes are externalized to PNG files; the in-memory Trajectory is left\n * untouched so callers can keep using it after persistence.\n */\nexport async function writeTrajectoryDir(\n dir: string,\n trajectory: Trajectory,\n): Promise<void> {\n await fs.mkdir(dir, { recursive: true });\n await fs.mkdir(path.join(dir, \"screenshots\", \"probe\"), { recursive: true });\n await fs.mkdir(path.join(dir, \"screenshots\", \"agent\"), { recursive: true });\n\n const serializableSteps: unknown[] = [];\n // A single post-turn probe is fanned across every step of a multi-tool turn,\n // and a single agent screenshot is shared across every action a CUA provider\n // chose from it, so the same Buffer is shared by reference. Dedupe by\n // identity: write the PNG once and point every sharing step at the same file.\n const probePathByBuffer = new Map<Buffer, string>();\n const agentPathByBuffer = new Map<Buffer, string>();\n for (const [i, step] of trajectory.steps.entries()) {\n const probe: ProbeEvidence = { ...step.probeEvidence };\n if (probe.screenshot) {\n let relPath = probePathByBuffer.get(probe.screenshot);\n if (!relPath) {\n relPath = `screenshots/probe/${i + 1}.png`;\n await fs.writeFile(path.join(dir, relPath), probe.screenshot);\n probePathByBuffer.set(probe.screenshot, relPath);\n }\n probe.screenshotPath = relPath;\n delete probe.screenshot;\n }\n\n const imageModalities = step.agentEvidence.modalities.filter(\n (m) => m.type === \"image\",\n );\n const multipleImages = imageModalities.length > 1;\n let imageSeq = 0;\n const modalities: unknown[] = [];\n for (const m of step.agentEvidence.modalities) {\n if (m.type !== \"image\") {\n modalities.push(\n m.type === \"json\"\n ? {\n ...m,\n content: redactInlineImagePayloads(m.content, step.actionName),\n }\n : m,\n );\n continue;\n }\n let relPath = agentPathByBuffer.get(m.bytes);\n if (!relPath) {\n const suffix = multipleImages ? `_${imageSeq}` : \"\";\n relPath = `screenshots/agent/${i + 1}${suffix}.png`;\n await fs.writeFile(path.join(dir, relPath), m.bytes);\n agentPathByBuffer.set(m.bytes, relPath);\n }\n modalities.push({\n type: \"image\",\n imagePath: relPath,\n mediaType: m.mediaType,\n });\n imageSeq += 1;\n }\n serializableSteps.push({\n ...step,\n probeEvidence: probe,\n agentEvidence: { modalities },\n toolOutput: {\n ...step.toolOutput,\n result: redactInlineImagePayloads(\n step.toolOutput.result,\n step.actionName,\n ),\n },\n });\n }\n\n const finalObservation: ProbeEvidence | undefined =\n trajectory.finalObservation === undefined\n ? undefined\n : { ...trajectory.finalObservation };\n if (finalObservation?.screenshot) {\n const relPath = \"screenshots/probe/final.png\";\n await fs.writeFile(path.join(dir, relPath), finalObservation.screenshot);\n finalObservation.screenshotPath = relPath;\n delete finalObservation.screenshot;\n }\n\n // Image modalities carry imagePath instead of raw bytes on disk; cast\n // through unknown rather than widen Trajectory's type contract.\n const serialized = {\n ...trajectory,\n steps: serializableSteps,\n ...(finalObservation ? { finalObservation } : {}),\n } as unknown;\n\n await fs.writeFile(\n path.join(dir, \"trajectory.json\"),\n JSON.stringify(serialized, null, 2),\n );\n\n await fs.writeFile(\n path.join(dir, \"task_data.json\"),\n JSON.stringify(\n {\n task: trajectory.task,\n status: trajectory.status,\n finalAnswer: trajectory.finalAnswer ?? null,\n },\n null,\n 2,\n ),\n );\n\n await fs.mkdir(path.join(dir, \"scores\"), { recursive: true });\n await fs.writeFile(path.join(dir, \"core.log\"), coreLog(trajectory));\n}\n\nfunction coreLog(trajectory: Trajectory): string {\n return (\n trajectory.steps\n .map((step, i) =>\n JSON.stringify({\n step: i,\n action: step.actionName,\n url: step.probeEvidence.url ?? null,\n ok: step.toolOutput.ok,\n reasoning: step.reasoning || undefined,\n }),\n )\n .join(\"\\n\") + \"\\n\"\n );\n}\n"]}