@browserbasehq/orca 3.5.0-vertex-test → 3.5.1-preview.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/README.md +7 -3
  2. package/dist/cjs/lib/utils.d.ts +2 -0
  3. package/dist/cjs/lib/utils.js +20 -0
  4. package/dist/cjs/lib/utils.js.map +1 -1
  5. package/dist/cjs/lib/v3/agent/AgentProvider.js +1 -0
  6. package/dist/cjs/lib/v3/agent/AgentProvider.js.map +1 -1
  7. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js +1 -0
  8. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
  9. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
  10. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js +38 -0
  11. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
  12. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
  13. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js +54 -0
  14. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
  15. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
  16. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js +62 -0
  17. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
  18. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
  19. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js +25 -0
  20. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
  21. package/dist/cjs/lib/v3/api.d.ts +1 -0
  22. package/dist/cjs/lib/v3/api.js +37 -16
  23. package/dist/cjs/lib/v3/api.js.map +1 -1
  24. package/dist/cjs/lib/v3/dom/build/locatorScripts.generated.d.ts +24 -24
  25. package/dist/cjs/lib/v3/dom/build/locatorScripts.generated.js +24 -24
  26. package/dist/cjs/lib/v3/dom/build/locatorScripts.generated.js.map +1 -1
  27. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
  28. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js +31 -0
  29. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
  30. package/dist/cjs/lib/v3/dom/locatorScripts/xpathResolver.js +79 -10
  31. package/dist/cjs/lib/v3/dom/locatorScripts/xpathResolver.js.map +1 -1
  32. package/dist/cjs/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
  33. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js +83 -7
  34. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  35. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
  36. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js +119 -5
  37. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  38. package/dist/cjs/lib/v3/index.d.ts +13 -1
  39. package/dist/cjs/lib/v3/index.js +19 -1
  40. package/dist/cjs/lib/v3/index.js.map +1 -1
  41. package/dist/cjs/lib/v3/llm/LLMProvider.d.ts +3 -0
  42. package/dist/cjs/lib/v3/llm/LLMProvider.js +44 -3
  43. package/dist/cjs/lib/v3/llm/LLMProvider.js.map +1 -1
  44. package/dist/cjs/lib/v3/types/public/agent.d.ts +8 -2
  45. package/dist/cjs/lib/v3/types/public/agent.js +1 -0
  46. package/dist/cjs/lib/v3/types/public/agent.js.map +1 -1
  47. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
  48. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js +15 -0
  49. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
  50. package/dist/cjs/lib/v3/types/public/api.d.ts +925 -182
  51. package/dist/cjs/lib/v3/types/public/api.js +138 -20
  52. package/dist/cjs/lib/v3/types/public/api.js.map +1 -1
  53. package/dist/cjs/lib/v3/types/public/clipboard.d.ts +15 -0
  54. package/dist/cjs/lib/v3/types/public/clipboard.js +3 -0
  55. package/dist/cjs/lib/v3/types/public/clipboard.js.map +1 -0
  56. package/dist/cjs/lib/v3/types/public/index.d.ts +2 -0
  57. package/dist/cjs/lib/v3/types/public/index.js +2 -0
  58. package/dist/cjs/lib/v3/types/public/index.js.map +1 -1
  59. package/dist/cjs/lib/v3/types/public/model.d.ts +30 -6
  60. package/dist/cjs/lib/v3/types/public/model.js.map +1 -1
  61. package/dist/cjs/lib/v3/types/public/page.d.ts +29 -0
  62. package/dist/cjs/lib/v3/types/public/page.js.map +1 -1
  63. package/dist/cjs/lib/v3/types/public/sdkErrors.d.ts +3 -0
  64. package/dist/cjs/lib/v3/types/public/sdkErrors.js +8 -2
  65. package/dist/cjs/lib/v3/types/public/sdkErrors.js.map +1 -1
  66. package/dist/cjs/lib/v3/understudy/clipboard.d.ts +24 -0
  67. package/dist/cjs/lib/v3/understudy/clipboard.js +166 -0
  68. package/dist/cjs/lib/v3/understudy/clipboard.js.map +1 -0
  69. package/dist/cjs/lib/v3/understudy/context.d.ts +3 -0
  70. package/dist/cjs/lib/v3/understudy/context.js +15 -0
  71. package/dist/cjs/lib/v3/understudy/context.js.map +1 -1
  72. package/dist/cjs/lib/v3/understudy/page.d.ts +23 -1
  73. package/dist/cjs/lib/v3/understudy/page.js +283 -0
  74. package/dist/cjs/lib/v3/understudy/page.js.map +1 -1
  75. package/dist/cjs/lib/v3/v3.js +15 -6
  76. package/dist/cjs/lib/v3/v3.js.map +1 -1
  77. package/dist/cjs/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
  78. package/dist/cjs/lib/v3/verifier/evidenceNormalization.js +100 -0
  79. package/dist/cjs/lib/v3/verifier/evidenceNormalization.js.map +1 -0
  80. package/dist/cjs/lib/v3/verifier/index.d.ts +6 -0
  81. package/dist/cjs/lib/v3/verifier/index.js +16 -0
  82. package/dist/cjs/lib/v3/verifier/index.js.map +1 -0
  83. package/dist/cjs/lib/v3/verifier/trajectory.d.ts +50 -0
  84. package/dist/cjs/lib/v3/verifier/trajectory.js +316 -0
  85. package/dist/cjs/lib/v3/verifier/trajectory.js.map +1 -0
  86. package/dist/cjs/lib/v3/verifier/types.d.ts +281 -0
  87. package/dist/cjs/lib/v3/verifier/types.js +10 -0
  88. package/dist/cjs/lib/v3/verifier/types.js.map +1 -0
  89. package/dist/cjs/lib/v3Evaluator.d.ts +9 -4
  90. package/dist/cjs/lib/v3Evaluator.js +148 -0
  91. package/dist/cjs/lib/v3Evaluator.js.map +1 -1
  92. package/dist/cjs/lib/v3LegacyEvaluator.js +5 -1
  93. package/dist/cjs/lib/v3LegacyEvaluator.js.map +1 -1
  94. package/dist/cjs/lib/version.d.ts +1 -1
  95. package/dist/cjs/lib/version.js +1 -1
  96. package/dist/cjs/lib/version.js.map +1 -1
  97. package/dist/esm/lib/utils.d.ts +2 -0
  98. package/dist/esm/lib/utils.js +18 -0
  99. package/dist/esm/lib/utils.js.map +1 -1
  100. package/dist/esm/lib/v3/agent/AgentProvider.js +1 -0
  101. package/dist/esm/lib/v3/agent/AgentProvider.js.map +1 -1
  102. package/dist/esm/lib/v3/agent/AnthropicCUAClient.js +1 -0
  103. package/dist/esm/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
  104. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
  105. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js +35 -0
  106. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
  107. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
  108. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js +50 -0
  109. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
  110. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
  111. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js +59 -0
  112. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
  113. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
  114. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js +22 -0
  115. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
  116. package/dist/esm/lib/v3/api.d.ts +1 -0
  117. package/dist/esm/lib/v3/api.js +38 -17
  118. package/dist/esm/lib/v3/api.js.map +1 -1
  119. package/dist/esm/lib/v3/dom/build/locatorScripts.generated.d.ts +24 -24
  120. package/dist/esm/lib/v3/dom/build/locatorScripts.generated.js +24 -24
  121. package/dist/esm/lib/v3/dom/build/locatorScripts.generated.js.map +1 -1
  122. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
  123. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js +28 -0
  124. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
  125. package/dist/esm/lib/v3/dom/locatorScripts/xpathResolver.js +79 -10
  126. package/dist/esm/lib/v3/dom/locatorScripts/xpathResolver.js.map +1 -1
  127. package/dist/esm/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
  128. package/dist/esm/lib/v3/handlers/v3AgentHandler.js +83 -7
  129. package/dist/esm/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  130. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
  131. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js +119 -5
  132. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  133. package/dist/esm/lib/v3/index.d.ts +13 -1
  134. package/dist/esm/lib/v3/index.js +10 -0
  135. package/dist/esm/lib/v3/index.js.map +1 -1
  136. package/dist/esm/lib/v3/llm/LLMProvider.d.ts +3 -0
  137. package/dist/esm/lib/v3/llm/LLMProvider.js +43 -3
  138. package/dist/esm/lib/v3/llm/LLMProvider.js.map +1 -1
  139. package/dist/esm/lib/v3/types/public/agent.d.ts +8 -2
  140. package/dist/esm/lib/v3/types/public/agent.js +1 -0
  141. package/dist/esm/lib/v3/types/public/agent.js.map +1 -1
  142. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
  143. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js +14 -0
  144. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
  145. package/dist/esm/lib/v3/types/public/api.d.ts +925 -182
  146. package/dist/esm/lib/v3/types/public/api.js +136 -18
  147. package/dist/esm/lib/v3/types/public/api.js.map +1 -1
  148. package/dist/esm/lib/v3/types/public/clipboard.d.ts +15 -0
  149. package/dist/esm/lib/v3/types/public/clipboard.js +2 -0
  150. package/dist/esm/lib/v3/types/public/clipboard.js.map +1 -0
  151. package/dist/esm/lib/v3/types/public/index.d.ts +2 -0
  152. package/dist/esm/lib/v3/types/public/index.js +2 -0
  153. package/dist/esm/lib/v3/types/public/index.js.map +1 -1
  154. package/dist/esm/lib/v3/types/public/model.d.ts +30 -6
  155. package/dist/esm/lib/v3/types/public/model.js.map +1 -1
  156. package/dist/esm/lib/v3/types/public/page.d.ts +29 -0
  157. package/dist/esm/lib/v3/types/public/page.js.map +1 -1
  158. package/dist/esm/lib/v3/types/public/sdkErrors.d.ts +3 -0
  159. package/dist/esm/lib/v3/types/public/sdkErrors.js +5 -0
  160. package/dist/esm/lib/v3/types/public/sdkErrors.js.map +1 -1
  161. package/dist/esm/lib/v3/understudy/clipboard.d.ts +24 -0
  162. package/dist/esm/lib/v3/understudy/clipboard.js +163 -0
  163. package/dist/esm/lib/v3/understudy/clipboard.js.map +1 -0
  164. package/dist/esm/lib/v3/understudy/context.d.ts +3 -0
  165. package/dist/esm/lib/v3/understudy/context.js +15 -0
  166. package/dist/esm/lib/v3/understudy/context.js.map +1 -1
  167. package/dist/esm/lib/v3/understudy/page.d.ts +23 -1
  168. package/dist/esm/lib/v3/understudy/page.js +284 -1
  169. package/dist/esm/lib/v3/understudy/page.js.map +1 -1
  170. package/dist/esm/lib/v3/v3.js +16 -7
  171. package/dist/esm/lib/v3/v3.js.map +1 -1
  172. package/dist/esm/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
  173. package/dist/esm/lib/v3/verifier/evidenceNormalization.js +93 -0
  174. package/dist/esm/lib/v3/verifier/evidenceNormalization.js.map +1 -0
  175. package/dist/esm/lib/v3/verifier/index.d.ts +6 -0
  176. package/dist/esm/lib/v3/verifier/index.js +3 -0
  177. package/dist/esm/lib/v3/verifier/index.js.map +1 -0
  178. package/dist/esm/lib/v3/verifier/trajectory.d.ts +50 -0
  179. package/dist/esm/lib/v3/verifier/trajectory.js +273 -0
  180. package/dist/esm/lib/v3/verifier/trajectory.js.map +1 -0
  181. package/dist/esm/lib/v3/verifier/types.d.ts +281 -0
  182. package/dist/esm/lib/v3/verifier/types.js +9 -0
  183. package/dist/esm/lib/v3/verifier/types.js.map +1 -0
  184. package/dist/esm/lib/v3Evaluator.d.ts +9 -4
  185. package/dist/esm/lib/v3Evaluator.js +148 -0
  186. package/dist/esm/lib/v3Evaluator.js.map +1 -1
  187. package/dist/esm/lib/v3LegacyEvaluator.js +5 -1
  188. package/dist/esm/lib/v3LegacyEvaluator.js.map +1 -1
  189. package/dist/esm/lib/version.d.ts +1 -1
  190. package/dist/esm/lib/version.js +1 -1
  191. package/dist/esm/lib/version.js.map +1 -1
  192. package/package.json +16 -10
@@ -0,0 +1,7 @@
1
+ import type { AgentStepFinishedEvent } from "../types/public/agentEvidenceEvents.js";
2
+ import type { AgentEvidence } from "./types.js";
3
+ export declare const REDACTED_INLINE_IMAGE = "[redacted inline image payload]";
4
+ export declare function collectInlineImagePayloads(value: unknown, actionName?: string, out?: string[]): string[];
5
+ export declare function redactInlineImagePayloads(value: unknown, actionName?: string): unknown;
6
+ export declare function mergeAgentEvidence(...parts: Array<AgentEvidence | undefined>): AgentEvidence;
7
+ export declare function buildAgentEvidenceFromStepFinished(event: AgentStepFinishedEvent): AgentEvidence;
@@ -0,0 +1,100 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.REDACTED_INLINE_IMAGE = void 0;
4
+ exports.collectInlineImagePayloads = collectInlineImagePayloads;
5
+ exports.redactInlineImagePayloads = redactInlineImagePayloads;
6
+ exports.mergeAgentEvidence = mergeAgentEvidence;
7
+ exports.buildAgentEvidenceFromStepFinished = buildAgentEvidenceFromStepFinished;
8
+ exports.REDACTED_INLINE_IMAGE = "[redacted inline image payload]";
9
+ const INLINE_IMAGE_KEYS = new Set(["screenshotBase64"]);
10
+ function shouldRedactBase64Key(key, actionName) {
11
+ return (INLINE_IMAGE_KEYS.has(key) ||
12
+ (actionName === "screenshot" && key === "base64"));
13
+ }
14
+ function collectInlineImagePayloads(value, actionName, out = []) {
15
+ if (!value || typeof value !== "object")
16
+ return out;
17
+ if (Buffer.isBuffer(value))
18
+ return out;
19
+ if (Array.isArray(value)) {
20
+ for (const item of value) {
21
+ collectInlineImagePayloads(item, actionName, out);
22
+ }
23
+ return out;
24
+ }
25
+ for (const [key, nested] of Object.entries(value)) {
26
+ if (shouldRedactBase64Key(key, actionName) && typeof nested === "string") {
27
+ out.push(nested);
28
+ continue;
29
+ }
30
+ collectInlineImagePayloads(nested, actionName, out);
31
+ }
32
+ return out;
33
+ }
34
+ function redactInlineImagePayloads(value, actionName) {
35
+ if (!value || typeof value !== "object")
36
+ return value;
37
+ if (Buffer.isBuffer(value))
38
+ return value;
39
+ if (Array.isArray(value)) {
40
+ return value.map((item) => redactInlineImagePayloads(item, actionName));
41
+ }
42
+ const out = {};
43
+ for (const [key, nested] of Object.entries(value)) {
44
+ out[key] =
45
+ shouldRedactBase64Key(key, actionName) && typeof nested === "string"
46
+ ? exports.REDACTED_INLINE_IMAGE
47
+ : redactInlineImagePayloads(nested, actionName);
48
+ }
49
+ return out;
50
+ }
51
+ function mergeAgentEvidence(...parts) {
52
+ return {
53
+ modalities: parts.flatMap((p) => p?.modalities ?? []),
54
+ };
55
+ }
56
+ function buildAgentEvidenceFromStepFinished(event) {
57
+ const modalities = [];
58
+ if (event.reasoning) {
59
+ modalities.push({ type: "text", content: event.reasoning });
60
+ }
61
+ const result = event.toolOutput.result;
62
+ if (result === undefined || result === null) {
63
+ return { modalities };
64
+ }
65
+ if (typeof result === "string") {
66
+ modalities.push({ type: "text", content: result });
67
+ }
68
+ else if (typeof result === "number" ||
69
+ typeof result === "boolean" ||
70
+ typeof result === "bigint") {
71
+ modalities.push({ type: "text", content: String(result) });
72
+ }
73
+ else if (Buffer.isBuffer(result)) {
74
+ modalities.push({
75
+ type: "image",
76
+ bytes: result,
77
+ mediaType: "image/png",
78
+ });
79
+ }
80
+ else if (typeof result === "object") {
81
+ for (const imageBase64 of collectInlineImagePayloads(result, event.actionName)) {
82
+ try {
83
+ modalities.push({
84
+ type: "image",
85
+ bytes: Buffer.from(imageBase64, "base64"),
86
+ mediaType: "image/png",
87
+ });
88
+ }
89
+ catch {
90
+ // Malformed base64; skip the image and keep the JSON modality.
91
+ }
92
+ }
93
+ modalities.push({
94
+ type: "json",
95
+ content: redactInlineImagePayloads(result, event.actionName),
96
+ });
97
+ }
98
+ return { modalities };
99
+ }
100
+ //# sourceMappingURL=evidenceNormalization.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evidenceNormalization.js","sourceRoot":"","sources":["../../../../../lib/v3/verifier/evidenceNormalization.ts"],"names":[],"mappings":";;;AAcA,gEAuBC;AAED,8DAmBC;AAED,gDAMC;AAED,gFAiDC;AAlHY,QAAA,qBAAqB,GAAG,iCAAiC,CAAC;AAEvE,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC;AAExD,SAAS,qBAAqB,CAAC,GAAW,EAAE,UAAmB;IAC7D,OAAO,CACL,iBAAiB,CAAC,GAAG,CAAC,GAAG,CAAC;QAC1B,CAAC,UAAU,KAAK,YAAY,IAAI,GAAG,KAAK,QAAQ,CAAC,CAClD,CAAC;AACJ,CAAC;AAED,SAAgB,0BAA0B,CACxC,KAAc,EACd,UAAmB,EACnB,MAAgB,EAAE;IAElB,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,GAAG,CAAC;IACpD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAEvC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,0BAA0B,CAAC,IAAI,EAAE,UAAU,EAAE,GAAG,CAAC,CAAC;QACpD,CAAC;QACD,OAAO,GAAG,CAAC;IACb,CAAC;IAED,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QAClD,IAAI,qBAAqB,CAAC,GAAG,EAAE,UAAU,CAAC,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;YACzE,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACjB,SAAS;QACX,CAAC;QACD,0BAA0B,CAAC,MAAM,EAAE,UAAU,EAAE,GAAG,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAgB,yBAAyB,CACvC,KAAc,EACd,UAAmB;IAEnB,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IACtD,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAEzC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,yBAAyB,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC;IAC1E,CAAC;IAED,MAAM,GAAG,GAA4B,EAAE,CAAC;IACxC,KAAK,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QAClD,GAAG,CAAC,GAAG,CAAC;YACN,qBAAqB,CAAC,GAAG,EAAE,UAAU,CAAC,IAAI,OAAO,MAAM,KAAK,QAAQ;gBAClE,CAAC,CAAC,6BAAqB;gBACvB,CAAC,CAAC,yBAAyB,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IACtD,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAgB,kBAAkB,CAChC,GAAG,KAAuC;IAE1C,OAAO;QACL,UAAU,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,UAAU,IAAI,EAAE,CAAC;KACtD,CAAC;AACJ,CAAC;AAED,SAAgB,kCAAkC,CAChD,KAA6B;IAE7B,MAAM,UAAU,GAAgC,EAAE,CAAC;IACnD,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACpB,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,SAAS,EAAE,CAAC,CAAC;IAC9D,CAAC;IAED,MAAM,MAAM,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC;IACvC,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QAC5C,OAAO,EAAE,UAAU,EAAE,CAAC;IACxB,CAAC;IAED,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;IACrD,CAAC;SAAM,IACL,OAAO,MAAM,KAAK,QAAQ;QAC1B,OAAO,MAAM,KAAK,SAAS;QAC3B,OAAO,MAAM,KAAK,QAAQ,EAC1B,CAAC;QACD,UAAU,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAC7D,CAAC;SAAM,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACnC,UAAU,CAAC,IAAI,CAAC;YACd,IAAI,EAAE,OAAO;YACb,KAAK,EAAE,MAAM;YACb,SAAS,EAAE,WAAW;SACvB,CAAC,CAAC;IACL,CAAC;SAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QACtC,KAAK,MAAM,WAAW,IAAI,0BAA0B,CAClD,MAAM,EACN,KAAK,CAAC,UAAU,CACjB,EAAE,CAAC;YACF,IAAI,CAAC;gBACH,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,OAAO;oBACb,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,WAAW,EAAE,QAAQ,CAAC;oBACzC,SAAS,EAAE,WAAW;iBACvB,CAAC,CAAC;YACL,CAAC;YAAC,MAAM,CAAC;gBACP,+DAA+D;YACjE,CAAC;QACH,CAAC;QACD,UAAU,CAAC,IAAI,CAAC;YACd,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,yBAAyB,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,CAAC;SAC7D,CAAC,CAAC;IACL,CAAC;IAED,OAAO,EAAE,UAAU,EAAE,CAAC;AACxB,CAAC","sourcesContent":["import type { AgentStepFinishedEvent } from \"../types/public/agentEvidenceEvents.js\";\nimport type { AgentEvidence } from \"./types.js\";\n\nexport const REDACTED_INLINE_IMAGE = \"[redacted inline image payload]\";\n\nconst INLINE_IMAGE_KEYS = new Set([\"screenshotBase64\"]);\n\nfunction shouldRedactBase64Key(key: string, actionName?: string): boolean {\n return (\n INLINE_IMAGE_KEYS.has(key) ||\n (actionName === \"screenshot\" && key === \"base64\")\n );\n}\n\nexport function collectInlineImagePayloads(\n value: unknown,\n actionName?: string,\n out: string[] = [],\n): string[] {\n if (!value || typeof value !== \"object\") return out;\n if (Buffer.isBuffer(value)) return out;\n\n if (Array.isArray(value)) {\n for (const item of value) {\n collectInlineImagePayloads(item, actionName, out);\n }\n return out;\n }\n\n for (const [key, nested] of Object.entries(value)) {\n if (shouldRedactBase64Key(key, actionName) && typeof nested === \"string\") {\n out.push(nested);\n continue;\n }\n collectInlineImagePayloads(nested, actionName, out);\n }\n return out;\n}\n\nexport function redactInlineImagePayloads(\n value: unknown,\n actionName?: string,\n): unknown {\n if (!value || typeof value !== \"object\") return value;\n if (Buffer.isBuffer(value)) return value;\n\n if (Array.isArray(value)) {\n return value.map((item) => redactInlineImagePayloads(item, actionName));\n }\n\n const out: Record<string, unknown> = {};\n for (const [key, nested] of Object.entries(value)) {\n out[key] =\n shouldRedactBase64Key(key, actionName) && typeof nested === \"string\"\n ? REDACTED_INLINE_IMAGE\n : redactInlineImagePayloads(nested, actionName);\n }\n return out;\n}\n\nexport function mergeAgentEvidence(\n ...parts: Array<AgentEvidence | undefined>\n): AgentEvidence {\n return {\n modalities: parts.flatMap((p) => p?.modalities ?? []),\n };\n}\n\nexport function buildAgentEvidenceFromStepFinished(\n event: AgentStepFinishedEvent,\n): AgentEvidence {\n const modalities: AgentEvidence[\"modalities\"] = [];\n if (event.reasoning) {\n modalities.push({ type: \"text\", content: event.reasoning });\n }\n\n const result = event.toolOutput.result;\n if (result === undefined || result === null) {\n return { modalities };\n }\n\n if (typeof result === \"string\") {\n modalities.push({ type: \"text\", content: result });\n } else if (\n typeof result === \"number\" ||\n typeof result === \"boolean\" ||\n typeof result === \"bigint\"\n ) {\n modalities.push({ type: \"text\", content: String(result) });\n } else if (Buffer.isBuffer(result)) {\n modalities.push({\n type: \"image\",\n bytes: result,\n mediaType: \"image/png\",\n });\n } else if (typeof result === \"object\") {\n for (const imageBase64 of collectInlineImagePayloads(\n result,\n event.actionName,\n )) {\n try {\n modalities.push({\n type: \"image\",\n bytes: Buffer.from(imageBase64, \"base64\"),\n mediaType: \"image/png\",\n });\n } catch {\n // Malformed base64; skip the image and keep the JSON modality.\n }\n }\n modalities.push({\n type: \"json\",\n content: redactInlineImagePayloads(result, event.actionName),\n });\n }\n\n return { modalities };\n}\n"]}
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Public re-exports for the verifier subsystem.
3
+ */
4
+ export type { AgentEvidence, AgentEvidenceModality, CriterionScore, EvaluationResult, FirstPointOfFailure, ProbeEvidence, Rubric, RubricCriterion, TaskSpec, TaskValidity, ToolOutput, Trajectory, TrajectoryStatus, TrajectoryStep, TrajectoryUsage, Verifier, VerifierFinding, VerifierRawSteps, } from "./types.js";
5
+ export { buildAgentEvidenceFromStepFinished, collectInlineImagePayloads, mergeAgentEvidence, redactInlineImagePayloads, REDACTED_INLINE_IMAGE, } from "./evidenceNormalization.js";
6
+ export { loadTrajectoryFromDisk, nextResultFilename, normalizeRubric, shouldPersistTrajectory, writeTrajectoryDir, } from "./trajectory.js";
@@ -0,0 +1,16 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.writeTrajectoryDir = exports.shouldPersistTrajectory = exports.normalizeRubric = exports.nextResultFilename = exports.loadTrajectoryFromDisk = exports.REDACTED_INLINE_IMAGE = exports.redactInlineImagePayloads = exports.mergeAgentEvidence = exports.collectInlineImagePayloads = exports.buildAgentEvidenceFromStepFinished = void 0;
4
+ var evidenceNormalization_js_1 = require("./evidenceNormalization.js");
5
+ Object.defineProperty(exports, "buildAgentEvidenceFromStepFinished", { enumerable: true, get: function () { return evidenceNormalization_js_1.buildAgentEvidenceFromStepFinished; } });
6
+ Object.defineProperty(exports, "collectInlineImagePayloads", { enumerable: true, get: function () { return evidenceNormalization_js_1.collectInlineImagePayloads; } });
7
+ Object.defineProperty(exports, "mergeAgentEvidence", { enumerable: true, get: function () { return evidenceNormalization_js_1.mergeAgentEvidence; } });
8
+ Object.defineProperty(exports, "redactInlineImagePayloads", { enumerable: true, get: function () { return evidenceNormalization_js_1.redactInlineImagePayloads; } });
9
+ Object.defineProperty(exports, "REDACTED_INLINE_IMAGE", { enumerable: true, get: function () { return evidenceNormalization_js_1.REDACTED_INLINE_IMAGE; } });
10
+ var trajectory_js_1 = require("./trajectory.js");
11
+ Object.defineProperty(exports, "loadTrajectoryFromDisk", { enumerable: true, get: function () { return trajectory_js_1.loadTrajectoryFromDisk; } });
12
+ Object.defineProperty(exports, "nextResultFilename", { enumerable: true, get: function () { return trajectory_js_1.nextResultFilename; } });
13
+ Object.defineProperty(exports, "normalizeRubric", { enumerable: true, get: function () { return trajectory_js_1.normalizeRubric; } });
14
+ Object.defineProperty(exports, "shouldPersistTrajectory", { enumerable: true, get: function () { return trajectory_js_1.shouldPersistTrajectory; } });
15
+ Object.defineProperty(exports, "writeTrajectoryDir", { enumerable: true, get: function () { return trajectory_js_1.writeTrajectoryDir; } });
16
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../../../lib/v3/verifier/index.ts"],"names":[],"mappings":";;;AAuBA,uEAMoC;AALlC,8IAAA,kCAAkC,OAAA;AAClC,sIAAA,0BAA0B,OAAA;AAC1B,8HAAA,kBAAkB,OAAA;AAClB,qIAAA,yBAAyB,OAAA;AACzB,iIAAA,qBAAqB,OAAA;AAEvB,iDAMyB;AALvB,uHAAA,sBAAsB,OAAA;AACtB,mHAAA,kBAAkB,OAAA;AAClB,gHAAA,eAAe,OAAA;AACf,wHAAA,uBAAuB,OAAA;AACvB,mHAAA,kBAAkB,OAAA","sourcesContent":["/**\n * Public re-exports for the verifier subsystem.\n */\nexport type {\n AgentEvidence,\n AgentEvidenceModality,\n CriterionScore,\n EvaluationResult,\n FirstPointOfFailure,\n ProbeEvidence,\n Rubric,\n RubricCriterion,\n TaskSpec,\n TaskValidity,\n ToolOutput,\n Trajectory,\n TrajectoryStatus,\n TrajectoryStep,\n TrajectoryUsage,\n Verifier,\n VerifierFinding,\n VerifierRawSteps,\n} from \"./types.js\";\nexport {\n buildAgentEvidenceFromStepFinished,\n collectInlineImagePayloads,\n mergeAgentEvidence,\n redactInlineImagePayloads,\n REDACTED_INLINE_IMAGE,\n} from \"./evidenceNormalization.js\";\nexport {\n loadTrajectoryFromDisk,\n nextResultFilename,\n normalizeRubric,\n shouldPersistTrajectory,\n writeTrajectoryDir,\n} from \"./trajectory.js\";\n"]}
@@ -0,0 +1,50 @@
1
+ import type { Rubric, Trajectory } from "./types.js";
2
+ /**
3
+ * Convert dataset or generated rubric JSON into the public Stagehand shape.
4
+ * Snake-case dataset fields are accepted here so serialized quirks do not leak
5
+ * into the canonical rubric type.
6
+ */
7
+ export declare function normalizeRubric(rubric: unknown): Rubric | undefined;
8
+ /**
9
+ * Hydrate a Trajectory from the on-disk directory layout written by
10
+ * TrajectoryRecorder.persist(). Used by the offline re-scoring CLI (`bench
11
+ * verify`) and by any consumer that wants to feed a saved trajectory back
12
+ * into V3Evaluator.verify() without running an agent.
13
+ *
14
+ * Reverses the recorder's serialization tweaks:
15
+ * - `probeEvidence.screenshotPath` → read file into `probeEvidence.screenshot`.
16
+ * - Image modalities in `agentEvidence.modalities` carry `imagePath` on
17
+ * disk instead of raw Buffer; legacy `bytesBase64` fixtures are also
18
+ * accepted.
19
+ *
20
+ * @param dir absolute or cwd-relative path to a `<run-id>/<task-id>/` directory.
21
+ */
22
+ export declare function loadTrajectoryFromDisk(dir: string): Promise<Trajectory>;
23
+ /**
24
+ * Build a `result*.json` filename for persisted evaluator output.
25
+ *
26
+ * Convention: the live run writes `result.json`; offline re-score attempts use
27
+ * a label-based name (e.g., `result_rescore-2026-05-11.json`) so they coexist
28
+ * without collisions and remain easy to diff.
29
+ */
30
+ export declare function nextResultFilename(label?: string): string;
31
+ /**
32
+ * Default persistence policy: explicit override, then env, then "on unless CI".
33
+ */
34
+ export declare function shouldPersistTrajectory(override: boolean | undefined): boolean;
35
+ /**
36
+ * Write the on-disk trajectory layout under `dir`:
37
+ *
38
+ * <dir>/
39
+ * ├── task_data.json
40
+ * ├── trajectory.json (screenshots referenced by path)
41
+ * ├── screenshots/
42
+ * │ ├── probe/<N>.png
43
+ * │ └── agent/<N>[_M].png
44
+ * ├── scores/ (empty; populated separately)
45
+ * └── core.log
46
+ *
47
+ * Image bytes are externalized to PNG files; the in-memory Trajectory is left
48
+ * untouched so callers can keep using it after persistence.
49
+ */
50
+ export declare function writeTrajectoryDir(dir: string, trajectory: Trajectory): Promise<void>;
@@ -0,0 +1,316 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.normalizeRubric = normalizeRubric;
40
+ exports.loadTrajectoryFromDisk = loadTrajectoryFromDisk;
41
+ exports.nextResultFilename = nextResultFilename;
42
+ exports.shouldPersistTrajectory = shouldPersistTrajectory;
43
+ exports.writeTrajectoryDir = writeTrajectoryDir;
44
+ const promises_1 = __importDefault(require("node:fs/promises"));
45
+ const node_path_1 = __importDefault(require("node:path"));
46
+ const evidenceNormalization_js_1 = require("./evidenceNormalization.js");
47
+ /**
48
+ * Convert dataset or generated rubric JSON into the public Stagehand shape.
49
+ * Snake-case dataset fields are accepted here so serialized quirks do not leak
50
+ * into the canonical rubric type.
51
+ */
52
+ function normalizeRubric(rubric) {
53
+ if (rubric == null)
54
+ return undefined;
55
+ if (typeof rubric !== "object") {
56
+ throw new TypeError("Rubric must be an object");
57
+ }
58
+ const rawRubric = rubric;
59
+ if (!Array.isArray(rawRubric.items)) {
60
+ throw new TypeError("Rubric is missing an items array");
61
+ }
62
+ return {
63
+ items: rawRubric.items.map((item) => {
64
+ const criterion = normalizeRequiredString(item.criterion, "criterion");
65
+ const description = normalizeRequiredString(item.description, "description");
66
+ const maxPoints = normalizeMaxPoints(item);
67
+ if (typeof maxPoints !== "number" || !Number.isFinite(maxPoints)) {
68
+ throw new TypeError(`Rubric criterion "${criterion}" is missing a numeric maxPoints value`);
69
+ }
70
+ return {
71
+ criterion,
72
+ description,
73
+ maxPoints,
74
+ ...(typeof item.condition === "string" && {
75
+ condition: item.condition,
76
+ }),
77
+ };
78
+ }),
79
+ };
80
+ }
81
+ function normalizeRequiredString(value, fieldName) {
82
+ if (typeof value === "string" && value.length) {
83
+ return value;
84
+ }
85
+ throw new TypeError(`Rubric criterion is missing a ${fieldName} value`);
86
+ }
87
+ function normalizeMaxPoints(item) {
88
+ return item.maxPoints ?? item.max_points;
89
+ }
90
+ function normalizeResultLabel(label) {
91
+ return (label ?? `rescore-${new Date().toISOString()}`).replace(/[^A-Za-z0-9._-]/g, "_");
92
+ }
93
+ // ─────────────────────────────────────────────────────────────────────────────
94
+ // On-disk loader
95
+ // ─────────────────────────────────────────────────────────────────────────────
96
+ /**
97
+ * Hydrate a Trajectory from the on-disk directory layout written by
98
+ * TrajectoryRecorder.persist(). Used by the offline re-scoring CLI (`bench
99
+ * verify`) and by any consumer that wants to feed a saved trajectory back
100
+ * into V3Evaluator.verify() without running an agent.
101
+ *
102
+ * Reverses the recorder's serialization tweaks:
103
+ * - `probeEvidence.screenshotPath` → read file into `probeEvidence.screenshot`.
104
+ * - Image modalities in `agentEvidence.modalities` carry `imagePath` on
105
+ * disk instead of raw Buffer; legacy `bytesBase64` fixtures are also
106
+ * accepted.
107
+ *
108
+ * @param dir absolute or cwd-relative path to a `<run-id>/<task-id>/` directory.
109
+ */
110
+ async function loadTrajectoryFromDisk(dir) {
111
+ const fs = await Promise.resolve().then(() => __importStar(require("node:fs/promises")));
112
+ const path = await Promise.resolve().then(() => __importStar(require("node:path")));
113
+ const trajectoryDir = path.resolve(dir);
114
+ const trajectoryPath = path.join(trajectoryDir, "trajectory.json");
115
+ const raw = await fs.readFile(trajectoryPath, "utf8");
116
+ const parsed = JSON.parse(raw);
117
+ const resolveWithinTrajectoryDir = (candidate, fieldName = "screenshotPath") => {
118
+ const resolved = path.resolve(trajectoryDir, candidate);
119
+ const relative = path.relative(trajectoryDir, resolved);
120
+ const outside = relative === ".." ||
121
+ relative.startsWith(`..${path.sep}`) ||
122
+ path.isAbsolute(relative);
123
+ if (outside) {
124
+ throw new Error(`Trajectory ${fieldName} escapes trajectory directory: ${candidate}`);
125
+ }
126
+ return resolved;
127
+ };
128
+ const hydrateProbeScreenshot = async (probe) => {
129
+ if (probe?.screenshotPath && !probe.screenshot) {
130
+ const resolved = resolveWithinTrajectoryDir(probe.screenshotPath);
131
+ try {
132
+ probe.screenshot = await fs.readFile(resolved);
133
+ }
134
+ catch {
135
+ // Missing screenshot file: leave probe.screenshot unset. The verifier's
136
+ // evidence_insufficient path will handle it.
137
+ }
138
+ }
139
+ };
140
+ for (const step of parsed.steps) {
141
+ // Rehydrate tier-2 probe screenshot from its on-disk file reference.
142
+ await hydrateProbeScreenshot(step.probeEvidence);
143
+ // Decode image modalities from disk references back to Buffer.
144
+ if (step.agentEvidence?.modalities) {
145
+ const modalities = [];
146
+ for (const m of step.agentEvidence.modalities) {
147
+ // The on-disk shape carries imagePath/bytesBase64 instead of bytes,
148
+ // so we look through `unknown` rather than rely on the typed union.
149
+ const raw = m;
150
+ if (m.type === "image" && typeof raw.bytesBase64 === "string") {
151
+ modalities.push({
152
+ type: "image",
153
+ bytes: Buffer.from(raw.bytesBase64, "base64"),
154
+ mediaType: m.mediaType,
155
+ });
156
+ continue;
157
+ }
158
+ if (m.type === "image" && typeof raw.imagePath === "string") {
159
+ const resolved = resolveWithinTrajectoryDir(raw.imagePath, "imagePath");
160
+ try {
161
+ modalities.push({
162
+ type: "image",
163
+ bytes: await fs.readFile(resolved),
164
+ mediaType: m.mediaType,
165
+ });
166
+ }
167
+ catch {
168
+ // Missing agent image file: omit that image modality. The
169
+ // verifier's evidence_insufficient path will handle missing bytes.
170
+ }
171
+ continue;
172
+ }
173
+ modalities.push(m);
174
+ }
175
+ step.agentEvidence.modalities = modalities;
176
+ }
177
+ }
178
+ await hydrateProbeScreenshot(parsed.finalObservation);
179
+ return parsed;
180
+ }
181
+ /**
182
+ * Build a `result*.json` filename for persisted evaluator output.
183
+ *
184
+ * Convention: the live run writes `result.json`; offline re-score attempts use
185
+ * a label-based name (e.g., `result_rescore-2026-05-11.json`) so they coexist
186
+ * without collisions and remain easy to diff.
187
+ */
188
+ function nextResultFilename(label) {
189
+ return `result_${normalizeResultLabel(label)}.json`;
190
+ }
191
+ /**
192
+ * Default persistence policy: explicit override, then env, then "on unless CI".
193
+ */
194
+ function shouldPersistTrajectory(override) {
195
+ if (override !== undefined)
196
+ return override;
197
+ const env = process.env.VERIFIER_PERSIST_TRAJECTORIES?.toLowerCase();
198
+ if (env === "1" || env === "true")
199
+ return true;
200
+ if (env === "0" || env === "false")
201
+ return false;
202
+ return !process.env.CI;
203
+ }
204
+ /**
205
+ * Write the on-disk trajectory layout under `dir`:
206
+ *
207
+ * <dir>/
208
+ * ├── task_data.json
209
+ * ├── trajectory.json (screenshots referenced by path)
210
+ * ├── screenshots/
211
+ * │ ├── probe/<N>.png
212
+ * │ └── agent/<N>[_M].png
213
+ * ├── scores/ (empty; populated separately)
214
+ * └── core.log
215
+ *
216
+ * Image bytes are externalized to PNG files; the in-memory Trajectory is left
217
+ * untouched so callers can keep using it after persistence.
218
+ */
219
+ async function writeTrajectoryDir(dir, trajectory) {
220
+ await promises_1.default.mkdir(dir, { recursive: true });
221
+ await promises_1.default.mkdir(node_path_1.default.join(dir, "screenshots", "probe"), { recursive: true });
222
+ await promises_1.default.mkdir(node_path_1.default.join(dir, "screenshots", "agent"), { recursive: true });
223
+ const serializableSteps = [];
224
+ // A single post-turn probe is fanned across every step of a multi-tool turn,
225
+ // and a single agent screenshot is shared across every action a CUA provider
226
+ // chose from it, so the same Buffer is shared by reference. Dedupe by
227
+ // identity: write the PNG once and point every sharing step at the same file.
228
+ const probePathByBuffer = new Map();
229
+ const agentPathByBuffer = new Map();
230
+ for (const [i, step] of trajectory.steps.entries()) {
231
+ const probe = { ...step.probeEvidence };
232
+ if (probe.screenshot) {
233
+ let relPath = probePathByBuffer.get(probe.screenshot);
234
+ if (!relPath) {
235
+ relPath = `screenshots/probe/${i + 1}.png`;
236
+ await promises_1.default.writeFile(node_path_1.default.join(dir, relPath), probe.screenshot);
237
+ probePathByBuffer.set(probe.screenshot, relPath);
238
+ }
239
+ probe.screenshotPath = relPath;
240
+ delete probe.screenshot;
241
+ }
242
+ const imageModalities = step.agentEvidence.modalities.filter((m) => m.type === "image");
243
+ const multipleImages = imageModalities.length > 1;
244
+ let imageSeq = 0;
245
+ const modalities = [];
246
+ for (const m of step.agentEvidence.modalities) {
247
+ if (m.type !== "image") {
248
+ modalities.push(m.type === "json"
249
+ ? {
250
+ ...m,
251
+ content: (0, evidenceNormalization_js_1.redactInlineImagePayloads)(m.content, step.actionName),
252
+ }
253
+ : m);
254
+ continue;
255
+ }
256
+ let relPath = agentPathByBuffer.get(m.bytes);
257
+ if (!relPath) {
258
+ const suffix = multipleImages ? `_${imageSeq}` : "";
259
+ relPath = `screenshots/agent/${i + 1}${suffix}.png`;
260
+ await promises_1.default.writeFile(node_path_1.default.join(dir, relPath), m.bytes);
261
+ agentPathByBuffer.set(m.bytes, relPath);
262
+ }
263
+ modalities.push({
264
+ type: "image",
265
+ imagePath: relPath,
266
+ mediaType: m.mediaType,
267
+ });
268
+ imageSeq += 1;
269
+ }
270
+ serializableSteps.push({
271
+ ...step,
272
+ probeEvidence: probe,
273
+ agentEvidence: { modalities },
274
+ toolOutput: {
275
+ ...step.toolOutput,
276
+ result: (0, evidenceNormalization_js_1.redactInlineImagePayloads)(step.toolOutput.result, step.actionName),
277
+ },
278
+ });
279
+ }
280
+ const finalObservation = trajectory.finalObservation === undefined
281
+ ? undefined
282
+ : { ...trajectory.finalObservation };
283
+ if (finalObservation?.screenshot) {
284
+ const relPath = "screenshots/probe/final.png";
285
+ await promises_1.default.writeFile(node_path_1.default.join(dir, relPath), finalObservation.screenshot);
286
+ finalObservation.screenshotPath = relPath;
287
+ delete finalObservation.screenshot;
288
+ }
289
+ // Image modalities carry imagePath instead of raw bytes on disk; cast
290
+ // through unknown rather than widen Trajectory's type contract.
291
+ const serialized = {
292
+ ...trajectory,
293
+ steps: serializableSteps,
294
+ ...(finalObservation ? { finalObservation } : {}),
295
+ };
296
+ await promises_1.default.writeFile(node_path_1.default.join(dir, "trajectory.json"), JSON.stringify(serialized, null, 2));
297
+ await promises_1.default.writeFile(node_path_1.default.join(dir, "task_data.json"), JSON.stringify({
298
+ task: trajectory.task,
299
+ status: trajectory.status,
300
+ finalAnswer: trajectory.finalAnswer ?? null,
301
+ }, null, 2));
302
+ await promises_1.default.mkdir(node_path_1.default.join(dir, "scores"), { recursive: true });
303
+ await promises_1.default.writeFile(node_path_1.default.join(dir, "core.log"), coreLog(trajectory));
304
+ }
305
+ function coreLog(trajectory) {
306
+ return (trajectory.steps
307
+ .map((step, i) => JSON.stringify({
308
+ step: i,
309
+ action: step.actionName,
310
+ url: step.probeEvidence.url ?? null,
311
+ ok: step.toolOutput.ok,
312
+ reasoning: step.reasoning || undefined,
313
+ }))
314
+ .join("\n") + "\n");
315
+ }
316
+ //# sourceMappingURL=trajectory.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"trajectory.js","sourceRoot":"","sources":["../../../../../lib/v3/verifier/trajectory.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAgCA,0CAoCC;AAuCD,wDAiHC;AASD,gDAEC;AAKD,0DAQC;AAiBD,gDAiHC;AAtXD,gEAAkC;AAClC,0DAA6B;AAQ7B,yEAAuE;AAkBvE;;;;GAIG;AACH,SAAgB,eAAe,CAAC,MAAe;IAC7C,IAAI,MAAM,IAAI,IAAI;QAAE,OAAO,SAAS,CAAC;IACrC,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,MAAM,IAAI,SAAS,CAAC,0BAA0B,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,SAAS,GAAG,MAAmB,CAAC;IACtC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC;QACpC,MAAM,IAAI,SAAS,CAAC,kCAAkC,CAAC,CAAC;IAC1D,CAAC;IAED,OAAO;QACL,KAAK,EAAE,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YAClC,MAAM,SAAS,GAAG,uBAAuB,CAAC,IAAI,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;YACvE,MAAM,WAAW,GAAG,uBAAuB,CACzC,IAAI,CAAC,WAAW,EAChB,aAAa,CACd,CAAC;YACF,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAC;YAE3C,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBACjE,MAAM,IAAI,SAAS,CACjB,qBAAqB,SAAS,wCAAwC,CACvE,CAAC;YACJ,CAAC;YAED,OAAO;gBACL,SAAS;gBACT,WAAW;gBACX,SAAS;gBACT,GAAG,CAAC,OAAO,IAAI,CAAC,SAAS,KAAK,QAAQ,IAAI;oBACxC,SAAS,EAAE,IAAI,CAAC,SAAS;iBAC1B,CAAC;aACH,CAAC;QACJ,CAAC,CAAC;KACH,CAAC;AACJ,CAAC;AAED,SAAS,uBAAuB,CAAC,KAAc,EAAE,SAAiB;IAChE,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QAC9C,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,IAAI,SAAS,CAAC,iCAAiC,SAAS,QAAQ,CAAC,CAAC;AAC1E,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAwB;IAClD,OAAO,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,UAAU,CAAC;AAC3C,CAAC;AAED,SAAS,oBAAoB,CAAC,KAAc;IAC1C,OAAO,CAAC,KAAK,IAAI,WAAW,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,OAAO,CAC7D,kBAAkB,EAClB,GAAG,CACJ,CAAC;AACJ,CAAC;AAED,gFAAgF;AAChF,iBAAiB;AACjB,gFAAgF;AAEhF;;;;;;;;;;;;;GAaG;AACI,KAAK,UAAU,sBAAsB,CAAC,GAAW;IACtD,MAAM,EAAE,GAAG,wDAAa,kBAAkB,GAAC,CAAC;IAC5C,MAAM,IAAI,GAAG,wDAAa,WAAW,GAAC,CAAC;IACvC,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAExC,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,iBAAiB,CAAC,CAAC;IACnE,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAsB5B,CAAC;IAEF,MAAM,0BAA0B,GAAG,CACjC,SAAiB,EACjB,SAAS,GAAG,gBAAgB,EACpB,EAAE;QACV,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;QACxD,MAAM,OAAO,GACX,QAAQ,KAAK,IAAI;YACjB,QAAQ,CAAC,UAAU,CAAC,KAAK,IAAI,CAAC,GAAG,EAAE,CAAC;YACpC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAE5B,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CACb,cAAc,SAAS,kCAAkC,SAAS,EAAE,CACrE,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC,CAAC;IAEF,MAAM,sBAAsB,GAAG,KAAK,EAClC,KAAyC,EAC1B,EAAE;QACjB,IAAI,KAAK,EAAE,cAAc,IAAI,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YAC/C,MAAM,QAAQ,GAAG,0BAA0B,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;YAClE,IAAI,CAAC;gBACH,KAAK,CAAC,UAAU,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YACjD,CAAC;YAAC,MAAM,CAAC;gBACP,wEAAwE;gBACxE,6CAA6C;YAC/C,CAAC;QACH,CAAC;IACH,CAAC,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QAChC,qEAAqE;QACrE,MAAM,sBAAsB,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAEjD,+DAA+D;QAC/D,IAAI,IAAI,CAAC,aAAa,EAAE,UAAU,EAAE,CAAC;YACnC,MAAM,UAAU,GAA4B,EAAE,CAAC;YAC/C,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,CAAC;gBAC9C,oEAAoE;gBACpE,oEAAoE;gBACpE,MAAM,GAAG,GAAG,CAGX,CAAC;gBACF,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO,IAAI,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;oBAC9D,UAAU,CAAC,IAAI,CAAC;wBACd,IAAI,EAAE,OAAgB;wBACtB,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,QAAQ,CAAC;wBAC7C,SAAS,EAAE,CAAC,CAAC,SAAS;qBACvB,CAAC,CAAC;oBACH,SAAS;gBACX,CAAC;gBACD,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO,IAAI,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;oBAC5D,MAAM,QAAQ,GAAG,0BAA0B,CACzC,GAAG,CAAC,SAAS,EACb,WAAW,CACZ,CAAC;oBACF,IAAI,CAAC;wBACH,UAAU,CAAC,IAAI,CAAC;4BACd,IAAI,EAAE,OAAgB;4BACtB,KAAK,EAAE,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;4BAClC,SAAS,EAAE,CAAC,CAAC,SAAS;yBACvB,CAAC,CAAC;oBACL,CAAC;oBAAC,MAAM,CAAC;wBACP,0DAA0D;wBAC1D,mEAAmE;oBACrE,CAAC;oBACD,SAAS;gBACX,CAAC;gBACD,UAAU,CAAC,IAAI,CAAC,CAA0B,CAAC,CAAC;YAC9C,CAAC;YACD,IAAI,CAAC,aAAa,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,MAAM,sBAAsB,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC;IAEtD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,kBAAkB,CAAC,KAAc;IAC/C,OAAO,UAAU,oBAAoB,CAAC,KAAK,CAAC,OAAO,CAAC;AACtD,CAAC;AAED;;GAEG;AACH,SAAgB,uBAAuB,CACrC,QAA6B;IAE7B,IAAI,QAAQ,KAAK,SAAS;QAAE,OAAO,QAAQ,CAAC;IAC5C,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,6BAA6B,EAAE,WAAW,EAAE,CAAC;IACrE,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,MAAM;QAAE,OAAO,IAAI,CAAC;IAC/C,IAAI,GAAG,KAAK,GAAG,IAAI,GAAG,KAAK,OAAO;QAAE,OAAO,KAAK,CAAC;IACjD,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;AACzB,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACI,KAAK,UAAU,kBAAkB,CACtC,GAAW,EACX,UAAsB;IAEtB,MAAM,kBAAE,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzC,MAAM,kBAAE,CAAC,KAAK,CAAC,mBAAI,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5E,MAAM,kBAAE,CAAC,KAAK,CAAC,mBAAI,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5E,MAAM,iBAAiB,GAAc,EAAE,CAAC;IACxC,6EAA6E;IAC7E,6EAA6E;IAC7E,sEAAsE;IACtE,8EAA8E;IAC9E,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC;IACpD,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC;IACpD,KAAK,MAAM,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;QACnD,MAAM,KAAK,GAAkB,EAAE,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;QACvD,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;YACrB,IAAI,OAAO,GAAG,iBAAiB,CAAC,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YACtD,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,OAAO,GAAG,qBAAqB,CAAC,GAAG,CAAC,MAAM,CAAC;gBAC3C,MAAM,kBAAE,CAAC,SAAS,CAAC,mBAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;gBAC9D,iBAAiB,CAAC,GAAG,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;YACnD,CAAC;YACD,KAAK,CAAC,cAAc,GAAG,OAAO,CAAC;YAC/B,OAAO,KAAK,CAAC,UAAU,CAAC;QAC1B,CAAC;QAED,MAAM,eAAe,GAAG,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,MAAM,CAC1D,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAC1B,CAAC;QACF,MAAM,cAAc,GAAG,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC;QAClD,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,MAAM,UAAU,GAAc,EAAE,CAAC;QACjC,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,CAAC;YAC9C,IAAI,CAAC,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;gBACvB,UAAU,CAAC,IAAI,CACb,CAAC,CAAC,IAAI,KAAK,MAAM;oBACf,CAAC,CAAC;wBACE,GAAG,CAAC;wBACJ,OAAO,EAAE,IAAA,oDAAyB,EAAC,CAAC,CAAC,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC;qBAC/D;oBACH,CAAC,CAAC,CAAC,CACN,CAAC;gBACF,SAAS;YACX,CAAC;YACD,IAAI,OAAO,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YAC7C,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,MAAM,MAAM,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACpD,OAAO,GAAG,qBAAqB,CAAC,GAAG,CAAC,GAAG,MAAM,MAAM,CAAC;gBACpD,MAAM,kBAAE,CAAC,SAAS,CAAC,mBAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC;gBACrD,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;YAC1C,CAAC;YACD,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,OAAO;gBACb,SAAS,EAAE,OAAO;gBAClB,SAAS,EAAE,CAAC,CAAC,SAAS;aACvB,CAAC,CAAC;YACH,QAAQ,IAAI,CAAC,CAAC;QAChB,CAAC;QACD,iBAAiB,CAAC,IAAI,CAAC;YACrB,GAAG,IAAI;YACP,aAAa,EAAE,KAAK;YACpB,aAAa,EAAE,EAAE,UAAU,EAAE;YAC7B,UAAU,EAAE;gBACV,GAAG,IAAI,CAAC,UAAU;gBAClB,MAAM,EAAE,IAAA,oDAAyB,EAC/B,IAAI,CAAC,UAAU,CAAC,MAAM,EACtB,IAAI,CAAC,UAAU,CAChB;aACF;SACF,CAAC,CAAC;IACL,CAAC;IAED,MAAM,gBAAgB,GACpB,UAAU,CAAC,gBAAgB,KAAK,SAAS;QACvC,CAAC,CAAC,SAAS;QACX,CAAC,CAAC,EAAE,GAAG,UAAU,CAAC,gBAAgB,EAAE,CAAC;IACzC,IAAI,gBAAgB,EAAE,UAAU,EAAE,CAAC;QACjC,MAAM,OAAO,GAAG,6BAA6B,CAAC;QAC9C,MAAM,kBAAE,CAAC,SAAS,CAAC,mBAAI,CAAC,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACzE,gBAAgB,CAAC,cAAc,GAAG,OAAO,CAAC;QAC1C,OAAO,gBAAgB,CAAC,UAAU,CAAC;IACrC,CAAC;IAED,sEAAsE;IACtE,gEAAgE;IAChE,MAAM,UAAU,GAAG;QACjB,GAAG,UAAU;QACb,KAAK,EAAE,iBAAiB;QACxB,GAAG,CAAC,gBAAgB,CAAC,CAAC,CAAC,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACvC,CAAC;IAEb,MAAM,kBAAE,CAAC,SAAS,CAChB,mBAAI,CAAC,IAAI,CAAC,GAAG,EAAE,iBAAiB,CAAC,EACjC,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CACpC,CAAC;IAEF,MAAM,kBAAE,CAAC,SAAS,CAChB,mBAAI,CAAC,IAAI,CAAC,GAAG,EAAE,gBAAgB,CAAC,EAChC,IAAI,CAAC,SAAS,CACZ;QACE,IAAI,EAAE,UAAU,CAAC,IAAI;QACrB,MAAM,EAAE,UAAU,CAAC,MAAM;QACzB,WAAW,EAAE,UAAU,CAAC,WAAW,IAAI,IAAI;KAC5C,EACD,IAAI,EACJ,CAAC,CACF,CACF,CAAC;IAEF,MAAM,kBAAE,CAAC,KAAK,CAAC,mBAAI,CAAC,IAAI,CAAC,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC9D,MAAM,kBAAE,CAAC,SAAS,CAAC,mBAAI,CAAC,IAAI,CAAC,GAAG,EAAE,UAAU,CAAC,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,OAAO,CAAC,UAAsB;IACrC,OAAO,CACL,UAAU,CAAC,KAAK;SACb,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CACf,IAAI,CAAC,SAAS,CAAC;QACb,IAAI,EAAE,CAAC;QACP,MAAM,EAAE,IAAI,CAAC,UAAU;QACvB,GAAG,EAAE,IAAI,CAAC,aAAa,CAAC,GAAG,IAAI,IAAI;QACnC,EAAE,EAAE,IAAI,CAAC,UAAU,CAAC,EAAE;QACtB,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,SAAS;KACvC,CAAC,CACH;SACA,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CACrB,CAAC;AACJ,CAAC","sourcesContent":["import fs from \"node:fs/promises\";\nimport path from \"node:path\";\nimport type {\n AgentEvidenceModality,\n ProbeEvidence,\n Rubric,\n Trajectory,\n TrajectoryStep,\n} from \"./types.js\";\nimport { redactInlineImagePayloads } from \"./evidenceNormalization.js\";\n\ntype RawRubricCriterion = {\n criterion: unknown;\n description: unknown;\n max_points?: unknown;\n maxPoints?: unknown;\n condition?: unknown;\n};\n\ntype RawRubric = {\n items?: unknown;\n};\n\ntype PersistedProbeEvidence = ProbeEvidence & {\n screenshotPath?: string;\n};\n\n/**\n * Convert dataset or generated rubric JSON into the public Stagehand shape.\n * Snake-case dataset fields are accepted here so serialized quirks do not leak\n * into the canonical rubric type.\n */\nexport function normalizeRubric(rubric: unknown): Rubric | undefined {\n if (rubric == null) return undefined;\n if (typeof rubric !== \"object\") {\n throw new TypeError(\"Rubric must be an object\");\n }\n\n const rawRubric = rubric as RawRubric;\n if (!Array.isArray(rawRubric.items)) {\n throw new TypeError(\"Rubric is missing an items array\");\n }\n\n return {\n items: rawRubric.items.map((item) => {\n const criterion = normalizeRequiredString(item.criterion, \"criterion\");\n const description = normalizeRequiredString(\n item.description,\n \"description\",\n );\n const maxPoints = normalizeMaxPoints(item);\n\n if (typeof maxPoints !== \"number\" || !Number.isFinite(maxPoints)) {\n throw new TypeError(\n `Rubric criterion \"${criterion}\" is missing a numeric maxPoints value`,\n );\n }\n\n return {\n criterion,\n description,\n maxPoints,\n ...(typeof item.condition === \"string\" && {\n condition: item.condition,\n }),\n };\n }),\n };\n}\n\nfunction normalizeRequiredString(value: unknown, fieldName: string): string {\n if (typeof value === \"string\" && value.length) {\n return value;\n }\n\n throw new TypeError(`Rubric criterion is missing a ${fieldName} value`);\n}\n\nfunction normalizeMaxPoints(item: RawRubricCriterion): unknown {\n return item.maxPoints ?? item.max_points;\n}\n\nfunction normalizeResultLabel(label?: string): string {\n return (label ?? `rescore-${new Date().toISOString()}`).replace(\n /[^A-Za-z0-9._-]/g,\n \"_\",\n );\n}\n\n// ─────────────────────────────────────────────────────────────────────────────\n// On-disk loader\n// ─────────────────────────────────────────────────────────────────────────────\n\n/**\n * Hydrate a Trajectory from the on-disk directory layout written by\n * TrajectoryRecorder.persist(). Used by the offline re-scoring CLI (`bench\n * verify`) and by any consumer that wants to feed a saved trajectory back\n * into V3Evaluator.verify() without running an agent.\n *\n * Reverses the recorder's serialization tweaks:\n * - `probeEvidence.screenshotPath` → read file into `probeEvidence.screenshot`.\n * - Image modalities in `agentEvidence.modalities` carry `imagePath` on\n * disk instead of raw Buffer; legacy `bytesBase64` fixtures are also\n * accepted.\n *\n * @param dir absolute or cwd-relative path to a `<run-id>/<task-id>/` directory.\n */\nexport async function loadTrajectoryFromDisk(dir: string): Promise<Trajectory> {\n const fs = await import(\"node:fs/promises\");\n const path = await import(\"node:path\");\n const trajectoryDir = path.resolve(dir);\n\n const trajectoryPath = path.join(trajectoryDir, \"trajectory.json\");\n const raw = await fs.readFile(trajectoryPath, \"utf8\");\n const parsed = JSON.parse(raw) as Trajectory & {\n finalObservation?: PersistedProbeEvidence;\n steps: Array<\n TrajectoryStep & {\n agentEvidence: {\n modalities: Array<\n | { type: \"text\"; content: string }\n | {\n type: \"image\";\n mediaType: string;\n // On-disk forms. Current writer externalizes bytes to\n // imagePath; bytesBase64 is accepted for older fixtures.\n bytes?: unknown;\n bytesBase64?: string;\n imagePath?: string;\n }\n | { type: \"json\"; content: unknown }\n >;\n };\n probeEvidence: PersistedProbeEvidence;\n }\n >;\n };\n\n const resolveWithinTrajectoryDir = (\n candidate: string,\n fieldName = \"screenshotPath\",\n ): string => {\n const resolved = path.resolve(trajectoryDir, candidate);\n const relative = path.relative(trajectoryDir, resolved);\n const outside =\n relative === \"..\" ||\n relative.startsWith(`..${path.sep}`) ||\n path.isAbsolute(relative);\n\n if (outside) {\n throw new Error(\n `Trajectory ${fieldName} escapes trajectory directory: ${candidate}`,\n );\n }\n\n return resolved;\n };\n\n const hydrateProbeScreenshot = async (\n probe: PersistedProbeEvidence | undefined,\n ): Promise<void> => {\n if (probe?.screenshotPath && !probe.screenshot) {\n const resolved = resolveWithinTrajectoryDir(probe.screenshotPath);\n try {\n probe.screenshot = await fs.readFile(resolved);\n } catch {\n // Missing screenshot file: leave probe.screenshot unset. The verifier's\n // evidence_insufficient path will handle it.\n }\n }\n };\n\n for (const step of parsed.steps) {\n // Rehydrate tier-2 probe screenshot from its on-disk file reference.\n await hydrateProbeScreenshot(step.probeEvidence);\n\n // Decode image modalities from disk references back to Buffer.\n if (step.agentEvidence?.modalities) {\n const modalities: AgentEvidenceModality[] = [];\n for (const m of step.agentEvidence.modalities) {\n // The on-disk shape carries imagePath/bytesBase64 instead of bytes,\n // so we look through `unknown` rather than rely on the typed union.\n const raw = m as unknown as {\n bytesBase64?: string;\n imagePath?: string;\n };\n if (m.type === \"image\" && typeof raw.bytesBase64 === \"string\") {\n modalities.push({\n type: \"image\" as const,\n bytes: Buffer.from(raw.bytesBase64, \"base64\"),\n mediaType: m.mediaType,\n });\n continue;\n }\n if (m.type === \"image\" && typeof raw.imagePath === \"string\") {\n const resolved = resolveWithinTrajectoryDir(\n raw.imagePath,\n \"imagePath\",\n );\n try {\n modalities.push({\n type: \"image\" as const,\n bytes: await fs.readFile(resolved),\n mediaType: m.mediaType,\n });\n } catch {\n // Missing agent image file: omit that image modality. The\n // verifier's evidence_insufficient path will handle missing bytes.\n }\n continue;\n }\n modalities.push(m as AgentEvidenceModality);\n }\n step.agentEvidence.modalities = modalities;\n }\n }\n\n await hydrateProbeScreenshot(parsed.finalObservation);\n\n return parsed;\n}\n\n/**\n * Build a `result*.json` filename for persisted evaluator output.\n *\n * Convention: the live run writes `result.json`; offline re-score attempts use\n * a label-based name (e.g., `result_rescore-2026-05-11.json`) so they coexist\n * without collisions and remain easy to diff.\n */\nexport function nextResultFilename(label?: string): string {\n return `result_${normalizeResultLabel(label)}.json`;\n}\n\n/**\n * Default persistence policy: explicit override, then env, then \"on unless CI\".\n */\nexport function shouldPersistTrajectory(\n override: boolean | undefined,\n): boolean {\n if (override !== undefined) return override;\n const env = process.env.VERIFIER_PERSIST_TRAJECTORIES?.toLowerCase();\n if (env === \"1\" || env === \"true\") return true;\n if (env === \"0\" || env === \"false\") return false;\n return !process.env.CI;\n}\n\n/**\n * Write the on-disk trajectory layout under `dir`:\n *\n * <dir>/\n * ├── task_data.json\n * ├── trajectory.json (screenshots referenced by path)\n * ├── screenshots/\n * │ ├── probe/<N>.png\n * │ └── agent/<N>[_M].png\n * ├── scores/ (empty; populated separately)\n * └── core.log\n *\n * Image bytes are externalized to PNG files; the in-memory Trajectory is left\n * untouched so callers can keep using it after persistence.\n */\nexport async function writeTrajectoryDir(\n dir: string,\n trajectory: Trajectory,\n): Promise<void> {\n await fs.mkdir(dir, { recursive: true });\n await fs.mkdir(path.join(dir, \"screenshots\", \"probe\"), { recursive: true });\n await fs.mkdir(path.join(dir, \"screenshots\", \"agent\"), { recursive: true });\n\n const serializableSteps: unknown[] = [];\n // A single post-turn probe is fanned across every step of a multi-tool turn,\n // and a single agent screenshot is shared across every action a CUA provider\n // chose from it, so the same Buffer is shared by reference. Dedupe by\n // identity: write the PNG once and point every sharing step at the same file.\n const probePathByBuffer = new Map<Buffer, string>();\n const agentPathByBuffer = new Map<Buffer, string>();\n for (const [i, step] of trajectory.steps.entries()) {\n const probe: ProbeEvidence = { ...step.probeEvidence };\n if (probe.screenshot) {\n let relPath = probePathByBuffer.get(probe.screenshot);\n if (!relPath) {\n relPath = `screenshots/probe/${i + 1}.png`;\n await fs.writeFile(path.join(dir, relPath), probe.screenshot);\n probePathByBuffer.set(probe.screenshot, relPath);\n }\n probe.screenshotPath = relPath;\n delete probe.screenshot;\n }\n\n const imageModalities = step.agentEvidence.modalities.filter(\n (m) => m.type === \"image\",\n );\n const multipleImages = imageModalities.length > 1;\n let imageSeq = 0;\n const modalities: unknown[] = [];\n for (const m of step.agentEvidence.modalities) {\n if (m.type !== \"image\") {\n modalities.push(\n m.type === \"json\"\n ? {\n ...m,\n content: redactInlineImagePayloads(m.content, step.actionName),\n }\n : m,\n );\n continue;\n }\n let relPath = agentPathByBuffer.get(m.bytes);\n if (!relPath) {\n const suffix = multipleImages ? `_${imageSeq}` : \"\";\n relPath = `screenshots/agent/${i + 1}${suffix}.png`;\n await fs.writeFile(path.join(dir, relPath), m.bytes);\n agentPathByBuffer.set(m.bytes, relPath);\n }\n modalities.push({\n type: \"image\",\n imagePath: relPath,\n mediaType: m.mediaType,\n });\n imageSeq += 1;\n }\n serializableSteps.push({\n ...step,\n probeEvidence: probe,\n agentEvidence: { modalities },\n toolOutput: {\n ...step.toolOutput,\n result: redactInlineImagePayloads(\n step.toolOutput.result,\n step.actionName,\n ),\n },\n });\n }\n\n const finalObservation: ProbeEvidence | undefined =\n trajectory.finalObservation === undefined\n ? undefined\n : { ...trajectory.finalObservation };\n if (finalObservation?.screenshot) {\n const relPath = \"screenshots/probe/final.png\";\n await fs.writeFile(path.join(dir, relPath), finalObservation.screenshot);\n finalObservation.screenshotPath = relPath;\n delete finalObservation.screenshot;\n }\n\n // Image modalities carry imagePath instead of raw bytes on disk; cast\n // through unknown rather than widen Trajectory's type contract.\n const serialized = {\n ...trajectory,\n steps: serializableSteps,\n ...(finalObservation ? { finalObservation } : {}),\n } as unknown;\n\n await fs.writeFile(\n path.join(dir, \"trajectory.json\"),\n JSON.stringify(serialized, null, 2),\n );\n\n await fs.writeFile(\n path.join(dir, \"task_data.json\"),\n JSON.stringify(\n {\n task: trajectory.task,\n status: trajectory.status,\n finalAnswer: trajectory.finalAnswer ?? null,\n },\n null,\n 2,\n ),\n );\n\n await fs.mkdir(path.join(dir, \"scores\"), { recursive: true });\n await fs.writeFile(path.join(dir, \"core.log\"), coreLog(trajectory));\n}\n\nfunction coreLog(trajectory: Trajectory): string {\n return (\n trajectory.steps\n .map((step, i) =>\n JSON.stringify({\n step: i,\n action: step.actionName,\n url: step.probeEvidence.url ?? null,\n ok: step.toolOutput.ok,\n reasoning: step.reasoning || undefined,\n }),\n )\n .join(\"\\n\") + \"\\n\"\n );\n}\n"]}