@browserbasehq/orca 3.4.0-preview.0 → 3.5.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (919) hide show
  1. package/dist/cjs/lib/inference.d.ts +2 -1
  2. package/dist/cjs/lib/inference.js +10 -3
  3. package/dist/cjs/lib/inference.js.map +1 -1
  4. package/dist/cjs/lib/prompt.d.ts +2 -2
  5. package/dist/cjs/lib/prompt.js +19 -4
  6. package/dist/cjs/lib/prompt.js.map +1 -1
  7. package/dist/cjs/lib/v3/agent/AgentProvider.d.ts +1 -1
  8. package/dist/cjs/lib/v3/agent/AgentProvider.js.map +1 -1
  9. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js +10 -0
  10. package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
  11. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
  12. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js +38 -0
  13. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
  14. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
  15. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js +54 -0
  16. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
  17. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
  18. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js +62 -0
  19. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
  20. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
  21. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js +25 -0
  22. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
  23. package/dist/cjs/lib/v3/api.d.ts +7 -1
  24. package/dist/cjs/lib/v3/api.js +100 -29
  25. package/dist/cjs/lib/v3/api.js.map +1 -1
  26. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
  27. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js +31 -0
  28. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
  29. package/dist/cjs/lib/v3/handlers/extractHandler.js +19 -2
  30. package/dist/cjs/lib/v3/handlers/extractHandler.js.map +1 -1
  31. package/dist/cjs/lib/v3/handlers/observeHandler.js +2 -1
  32. package/dist/cjs/lib/v3/handlers/observeHandler.js.map +1 -1
  33. package/dist/cjs/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
  34. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js +83 -7
  35. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  36. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
  37. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js +120 -5
  38. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  39. package/dist/cjs/lib/v3/index.d.ts +12 -0
  40. package/dist/cjs/lib/v3/index.js +19 -1
  41. package/dist/cjs/lib/v3/index.js.map +1 -1
  42. package/dist/cjs/lib/v3/launch/local.d.ts +2 -7
  43. package/dist/cjs/lib/v3/launch/local.js +47 -7
  44. package/dist/cjs/lib/v3/launch/local.js.map +1 -1
  45. package/dist/cjs/lib/v3/llm/LLMProvider.d.ts +3 -0
  46. package/dist/cjs/lib/v3/llm/LLMProvider.js +28 -8
  47. package/dist/cjs/lib/v3/llm/LLMProvider.js.map +1 -1
  48. package/dist/cjs/lib/v3/llm/aisdk.js +5 -0
  49. package/dist/cjs/lib/v3/llm/aisdk.js.map +1 -1
  50. package/dist/cjs/lib/v3/types/private/handlers.d.ts +2 -0
  51. package/dist/cjs/lib/v3/types/private/handlers.js.map +1 -1
  52. package/dist/cjs/lib/v3/types/private/snapshot.d.ts +2 -0
  53. package/dist/cjs/lib/v3/types/private/snapshot.js.map +1 -1
  54. package/dist/cjs/lib/v3/types/public/agent.d.ts +7 -1
  55. package/dist/cjs/lib/v3/types/public/agent.js.map +1 -1
  56. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
  57. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js +15 -0
  58. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
  59. package/dist/cjs/lib/v3/types/public/api.d.ts +584 -65
  60. package/dist/cjs/lib/v3/types/public/api.js +107 -7
  61. package/dist/cjs/lib/v3/types/public/api.js.map +1 -1
  62. package/dist/cjs/lib/v3/types/public/index.d.ts +1 -0
  63. package/dist/cjs/lib/v3/types/public/index.js +1 -0
  64. package/dist/cjs/lib/v3/types/public/index.js.map +1 -1
  65. package/dist/cjs/lib/v3/types/public/methods.d.ts +2 -0
  66. package/dist/cjs/lib/v3/types/public/methods.js.map +1 -1
  67. package/dist/cjs/lib/v3/types/public/model.d.ts +23 -10
  68. package/dist/cjs/lib/v3/types/public/model.js.map +1 -1
  69. package/dist/cjs/lib/v3/understudy/a11y/snapshot/a11yTree.js +25 -0
  70. package/dist/cjs/lib/v3/understudy/a11y/snapshot/a11yTree.js.map +1 -1
  71. package/dist/cjs/lib/v3/understudy/a11y/snapshot/treeFormatUtils.js +10 -1
  72. package/dist/cjs/lib/v3/understudy/a11y/snapshot/treeFormatUtils.js.map +1 -1
  73. package/dist/cjs/lib/v3/v3.d.ts +1 -0
  74. package/dist/cjs/lib/v3/v3.js +21 -47
  75. package/dist/cjs/lib/v3/v3.js.map +1 -1
  76. package/dist/cjs/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
  77. package/dist/cjs/lib/v3/verifier/evidenceNormalization.js +100 -0
  78. package/dist/cjs/lib/v3/verifier/evidenceNormalization.js.map +1 -0
  79. package/dist/cjs/lib/v3/verifier/index.d.ts +6 -0
  80. package/dist/cjs/lib/v3/verifier/index.js +16 -0
  81. package/dist/cjs/lib/v3/verifier/index.js.map +1 -0
  82. package/dist/cjs/lib/v3/verifier/trajectory.d.ts +50 -0
  83. package/dist/cjs/lib/v3/verifier/trajectory.js +316 -0
  84. package/dist/cjs/lib/v3/verifier/trajectory.js.map +1 -0
  85. package/dist/cjs/lib/v3/verifier/types.d.ts +281 -0
  86. package/dist/cjs/lib/v3/verifier/types.js +10 -0
  87. package/dist/cjs/lib/v3/verifier/types.js.map +1 -0
  88. package/dist/cjs/lib/v3Evaluator.d.ts +29 -16
  89. package/dist/cjs/lib/v3Evaluator.js +184 -194
  90. package/dist/cjs/lib/v3Evaluator.js.map +1 -1
  91. package/dist/cjs/lib/v3LegacyEvaluator.d.ts +20 -0
  92. package/dist/cjs/lib/v3LegacyEvaluator.js +219 -0
  93. package/dist/cjs/lib/v3LegacyEvaluator.js.map +1 -0
  94. package/dist/cjs/lib/version.d.ts +1 -1
  95. package/dist/cjs/lib/version.js +1 -1
  96. package/dist/cjs/lib/version.js.map +1 -1
  97. package/dist/esm/lib/inference.d.ts +2 -1
  98. package/dist/esm/lib/inference.js +10 -3
  99. package/dist/esm/lib/inference.js.map +1 -1
  100. package/dist/esm/lib/prompt.d.ts +2 -2
  101. package/dist/esm/lib/prompt.js +19 -4
  102. package/dist/esm/lib/prompt.js.map +1 -1
  103. package/dist/esm/lib/v3/agent/AgentProvider.d.ts +1 -1
  104. package/dist/esm/lib/v3/agent/AgentProvider.js.map +1 -1
  105. package/dist/esm/lib/v3/agent/AnthropicCUAClient.js +10 -0
  106. package/dist/esm/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
  107. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
  108. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js +35 -0
  109. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
  110. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
  111. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js +50 -0
  112. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
  113. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
  114. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js +59 -0
  115. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
  116. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
  117. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js +22 -0
  118. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
  119. package/dist/esm/lib/v3/api.d.ts +7 -1
  120. package/dist/esm/lib/v3/api.js +100 -29
  121. package/dist/esm/lib/v3/api.js.map +1 -1
  122. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
  123. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js +28 -0
  124. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
  125. package/dist/esm/lib/v3/handlers/extractHandler.js +19 -2
  126. package/dist/esm/lib/v3/handlers/extractHandler.js.map +1 -1
  127. package/dist/esm/lib/v3/handlers/observeHandler.js +2 -1
  128. package/dist/esm/lib/v3/handlers/observeHandler.js.map +1 -1
  129. package/dist/esm/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
  130. package/dist/esm/lib/v3/handlers/v3AgentHandler.js +83 -7
  131. package/dist/esm/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  132. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
  133. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js +120 -5
  134. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  135. package/dist/esm/lib/v3/index.d.ts +12 -0
  136. package/dist/esm/lib/v3/index.js +10 -0
  137. package/dist/esm/lib/v3/index.js.map +1 -1
  138. package/dist/esm/lib/v3/launch/local.d.ts +2 -7
  139. package/dist/esm/lib/v3/launch/local.js +48 -8
  140. package/dist/esm/lib/v3/launch/local.js.map +1 -1
  141. package/dist/esm/lib/v3/llm/LLMProvider.d.ts +3 -0
  142. package/dist/esm/lib/v3/llm/LLMProvider.js +28 -9
  143. package/dist/esm/lib/v3/llm/LLMProvider.js.map +1 -1
  144. package/dist/esm/lib/v3/llm/aisdk.js +5 -0
  145. package/dist/esm/lib/v3/llm/aisdk.js.map +1 -1
  146. package/dist/esm/lib/v3/types/private/handlers.d.ts +2 -0
  147. package/dist/esm/lib/v3/types/private/handlers.js.map +1 -1
  148. package/dist/esm/lib/v3/types/private/snapshot.d.ts +2 -0
  149. package/dist/esm/lib/v3/types/private/snapshot.js.map +1 -1
  150. package/dist/esm/lib/v3/types/public/agent.d.ts +7 -1
  151. package/dist/esm/lib/v3/types/public/agent.js.map +1 -1
  152. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
  153. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js +14 -0
  154. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
  155. package/dist/esm/lib/v3/types/public/api.d.ts +584 -65
  156. package/dist/esm/lib/v3/types/public/api.js +105 -5
  157. package/dist/esm/lib/v3/types/public/api.js.map +1 -1
  158. package/dist/esm/lib/v3/types/public/index.d.ts +1 -0
  159. package/dist/esm/lib/v3/types/public/index.js +1 -0
  160. package/dist/esm/lib/v3/types/public/index.js.map +1 -1
  161. package/dist/esm/lib/v3/types/public/methods.d.ts +2 -0
  162. package/dist/esm/lib/v3/types/public/methods.js.map +1 -1
  163. package/dist/esm/lib/v3/types/public/model.d.ts +23 -10
  164. package/dist/esm/lib/v3/types/public/model.js.map +1 -1
  165. package/dist/esm/lib/v3/understudy/a11y/snapshot/a11yTree.js +25 -0
  166. package/dist/esm/lib/v3/understudy/a11y/snapshot/a11yTree.js.map +1 -1
  167. package/dist/esm/lib/v3/understudy/a11y/snapshot/treeFormatUtils.js +10 -1
  168. package/dist/esm/lib/v3/understudy/a11y/snapshot/treeFormatUtils.js.map +1 -1
  169. package/dist/esm/lib/v3/v3.d.ts +1 -0
  170. package/dist/esm/lib/v3/v3.js +21 -47
  171. package/dist/esm/lib/v3/v3.js.map +1 -1
  172. package/dist/esm/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
  173. package/dist/esm/lib/v3/verifier/evidenceNormalization.js +93 -0
  174. package/dist/esm/lib/v3/verifier/evidenceNormalization.js.map +1 -0
  175. package/dist/esm/lib/v3/verifier/index.d.ts +6 -0
  176. package/dist/esm/lib/v3/verifier/index.js +3 -0
  177. package/dist/esm/lib/v3/verifier/index.js.map +1 -0
  178. package/dist/esm/lib/v3/verifier/trajectory.d.ts +50 -0
  179. package/dist/esm/lib/v3/verifier/trajectory.js +273 -0
  180. package/dist/esm/lib/v3/verifier/trajectory.js.map +1 -0
  181. package/dist/esm/lib/v3/verifier/types.d.ts +281 -0
  182. package/dist/esm/lib/v3/verifier/types.js +9 -0
  183. package/dist/esm/lib/v3/verifier/types.js.map +1 -0
  184. package/dist/esm/lib/v3Evaluator.d.ts +29 -16
  185. package/dist/esm/lib/v3Evaluator.js +183 -193
  186. package/dist/esm/lib/v3Evaluator.js.map +1 -1
  187. package/dist/esm/lib/v3LegacyEvaluator.d.ts +20 -0
  188. package/dist/esm/lib/v3LegacyEvaluator.js +215 -0
  189. package/dist/esm/lib/v3LegacyEvaluator.js.map +1 -0
  190. package/dist/esm/lib/version.d.ts +1 -1
  191. package/dist/esm/lib/version.js +1 -1
  192. package/dist/esm/lib/version.js.map +1 -1
  193. package/package.json +33 -21
  194. package/dist/cjs/tests/cache-variables.test.d.ts +0 -1
  195. package/dist/cjs/tests/cache-variables.test.js +0 -221
  196. package/dist/cjs/tests/cache-variables.test.js.map +0 -1
  197. package/dist/cjs/tests/integration/agent-abort-signal.spec.d.ts +0 -1
  198. package/dist/cjs/tests/integration/agent-abort-signal.spec.js +0 -123
  199. package/dist/cjs/tests/integration/agent-abort-signal.spec.js.map +0 -1
  200. package/dist/cjs/tests/integration/agent-cache-self-heal.spec.d.ts +0 -1
  201. package/dist/cjs/tests/integration/agent-cache-self-heal.spec.js +0 -83
  202. package/dist/cjs/tests/integration/agent-cache-self-heal.spec.js.map +0 -1
  203. package/dist/cjs/tests/integration/agent-callbacks.spec.d.ts +0 -1
  204. package/dist/cjs/tests/integration/agent-callbacks.spec.js +0 -385
  205. package/dist/cjs/tests/integration/agent-callbacks.spec.js.map +0 -1
  206. package/dist/cjs/tests/integration/agent-experimental-validation.spec.d.ts +0 -1
  207. package/dist/cjs/tests/integration/agent-experimental-validation.spec.js +0 -362
  208. package/dist/cjs/tests/integration/agent-experimental-validation.spec.js.map +0 -1
  209. package/dist/cjs/tests/integration/agent-hybrid-mode.spec.d.ts +0 -1
  210. package/dist/cjs/tests/integration/agent-hybrid-mode.spec.js +0 -262
  211. package/dist/cjs/tests/integration/agent-hybrid-mode.spec.js.map +0 -1
  212. package/dist/cjs/tests/integration/agent-message-continuation.spec.d.ts +0 -1
  213. package/dist/cjs/tests/integration/agent-message-continuation.spec.js +0 -112
  214. package/dist/cjs/tests/integration/agent-message-continuation.spec.js.map +0 -1
  215. package/dist/cjs/tests/integration/agent-streaming.spec.d.ts +0 -1
  216. package/dist/cjs/tests/integration/agent-streaming.spec.js +0 -140
  217. package/dist/cjs/tests/integration/agent-streaming.spec.js.map +0 -1
  218. package/dist/cjs/tests/integration/cdp-close-api-region.spec.d.ts +0 -1
  219. package/dist/cjs/tests/integration/cdp-close-api-region.spec.js +0 -41
  220. package/dist/cjs/tests/integration/cdp-close-api-region.spec.js.map +0 -1
  221. package/dist/cjs/tests/integration/cdp-connection-close.spec.d.ts +0 -1
  222. package/dist/cjs/tests/integration/cdp-connection-close.spec.js +0 -76
  223. package/dist/cjs/tests/integration/cdp-connection-close.spec.js.map +0 -1
  224. package/dist/cjs/tests/integration/cdp-session-detached.spec.d.ts +0 -1
  225. package/dist/cjs/tests/integration/cdp-session-detached.spec.js +0 -45
  226. package/dist/cjs/tests/integration/cdp-session-detached.spec.js.map +0 -1
  227. package/dist/cjs/tests/integration/chrome-newtab-page-tracking.spec.d.ts +0 -1
  228. package/dist/cjs/tests/integration/chrome-newtab-page-tracking.spec.js +0 -56
  229. package/dist/cjs/tests/integration/chrome-newtab-page-tracking.spec.js.map +0 -1
  230. package/dist/cjs/tests/integration/click-count.spec.d.ts +0 -1
  231. package/dist/cjs/tests/integration/click-count.spec.js +0 -163
  232. package/dist/cjs/tests/integration/click-count.spec.js.map +0 -1
  233. package/dist/cjs/tests/integration/connect-to-existing-browser.spec.d.ts +0 -1
  234. package/dist/cjs/tests/integration/connect-to-existing-browser.spec.js +0 -99
  235. package/dist/cjs/tests/integration/connect-to-existing-browser.spec.js.map +0 -1
  236. package/dist/cjs/tests/integration/context-addInitScript.spec.d.ts +0 -1
  237. package/dist/cjs/tests/integration/context-addInitScript.spec.js +0 -285
  238. package/dist/cjs/tests/integration/context-addInitScript.spec.js.map +0 -1
  239. package/dist/cjs/tests/integration/context-extra-http-headers.spec.d.ts +0 -1
  240. package/dist/cjs/tests/integration/context-extra-http-headers.spec.js +0 -49
  241. package/dist/cjs/tests/integration/context-extra-http-headers.spec.js.map +0 -1
  242. package/dist/cjs/tests/integration/cookies.spec.d.ts +0 -1
  243. package/dist/cjs/tests/integration/cookies.spec.js +0 -187
  244. package/dist/cjs/tests/integration/cookies.spec.js.map +0 -1
  245. package/dist/cjs/tests/integration/default-page-tracking.spec.d.ts +0 -1
  246. package/dist/cjs/tests/integration/default-page-tracking.spec.js +0 -52
  247. package/dist/cjs/tests/integration/default-page-tracking.spec.js.map +0 -1
  248. package/dist/cjs/tests/integration/downloads.spec.d.ts +0 -1
  249. package/dist/cjs/tests/integration/downloads.spec.js +0 -52
  250. package/dist/cjs/tests/integration/downloads.spec.js.map +0 -1
  251. package/dist/cjs/tests/integration/flowLogger.spec.d.ts +0 -1
  252. package/dist/cjs/tests/integration/flowLogger.spec.js +0 -718
  253. package/dist/cjs/tests/integration/flowLogger.spec.js.map +0 -1
  254. package/dist/cjs/tests/integration/frame-get-location-and-click.spec.d.ts +0 -1
  255. package/dist/cjs/tests/integration/frame-get-location-and-click.spec.js +0 -57
  256. package/dist/cjs/tests/integration/frame-get-location-and-click.spec.js.map +0 -1
  257. package/dist/cjs/tests/integration/iframe-ctx-addInitScript-race.spec.d.ts +0 -1
  258. package/dist/cjs/tests/integration/iframe-ctx-addInitScript-race.spec.js +0 -219
  259. package/dist/cjs/tests/integration/iframe-ctx-addInitScript-race.spec.js.map +0 -1
  260. package/dist/cjs/tests/integration/iframe-ctx-addInitScript.spec.d.ts +0 -1
  261. package/dist/cjs/tests/integration/iframe-ctx-addInitScript.spec.js +0 -474
  262. package/dist/cjs/tests/integration/iframe-ctx-addInitScript.spec.js.map +0 -1
  263. package/dist/cjs/tests/integration/keep-alive.child.d.ts +0 -1
  264. package/dist/cjs/tests/integration/keep-alive.child.js +0 -85
  265. package/dist/cjs/tests/integration/keep-alive.child.js.map +0 -1
  266. package/dist/cjs/tests/integration/keep-alive.spec.d.ts +0 -22
  267. package/dist/cjs/tests/integration/keep-alive.spec.js +0 -490
  268. package/dist/cjs/tests/integration/keep-alive.spec.js.map +0 -1
  269. package/dist/cjs/tests/integration/keyboard.spec.d.ts +0 -1
  270. package/dist/cjs/tests/integration/keyboard.spec.js +0 -259
  271. package/dist/cjs/tests/integration/keyboard.spec.js.map +0 -1
  272. package/dist/cjs/tests/integration/locator-backend-node-id.spec.d.ts +0 -1
  273. package/dist/cjs/tests/integration/locator-backend-node-id.spec.js +0 -150
  274. package/dist/cjs/tests/integration/locator-backend-node-id.spec.js.map +0 -1
  275. package/dist/cjs/tests/integration/locator-content-methods.spec.d.ts +0 -1
  276. package/dist/cjs/tests/integration/locator-content-methods.spec.js +0 -180
  277. package/dist/cjs/tests/integration/locator-content-methods.spec.js.map +0 -1
  278. package/dist/cjs/tests/integration/locator-count-iframe.spec.d.ts +0 -1
  279. package/dist/cjs/tests/integration/locator-count-iframe.spec.js +0 -129
  280. package/dist/cjs/tests/integration/locator-count-iframe.spec.js.map +0 -1
  281. package/dist/cjs/tests/integration/locator-count.spec.d.ts +0 -1
  282. package/dist/cjs/tests/integration/locator-count.spec.js +0 -68
  283. package/dist/cjs/tests/integration/locator-count.spec.js.map +0 -1
  284. package/dist/cjs/tests/integration/locator-fill.spec.d.ts +0 -1
  285. package/dist/cjs/tests/integration/locator-fill.spec.js +0 -117
  286. package/dist/cjs/tests/integration/locator-fill.spec.js.map +0 -1
  287. package/dist/cjs/tests/integration/locator-input-methods.spec.d.ts +0 -1
  288. package/dist/cjs/tests/integration/locator-input-methods.spec.js +0 -127
  289. package/dist/cjs/tests/integration/locator-input-methods.spec.js.map +0 -1
  290. package/dist/cjs/tests/integration/locator-nth.spec.d.ts +0 -1
  291. package/dist/cjs/tests/integration/locator-nth.spec.js +0 -175
  292. package/dist/cjs/tests/integration/locator-nth.spec.js.map +0 -1
  293. package/dist/cjs/tests/integration/locator-select-option.spec.d.ts +0 -1
  294. package/dist/cjs/tests/integration/locator-select-option.spec.js +0 -216
  295. package/dist/cjs/tests/integration/locator-select-option.spec.js.map +0 -1
  296. package/dist/cjs/tests/integration/logger-initialization.spec.d.ts +0 -1
  297. package/dist/cjs/tests/integration/logger-initialization.spec.js +0 -597
  298. package/dist/cjs/tests/integration/logger-initialization.spec.js.map +0 -1
  299. package/dist/cjs/tests/integration/multi-instance-logger.spec.d.ts +0 -1
  300. package/dist/cjs/tests/integration/multi-instance-logger.spec.js +0 -293
  301. package/dist/cjs/tests/integration/multi-instance-logger.spec.js.map +0 -1
  302. package/dist/cjs/tests/integration/nested-div.spec.d.ts +0 -1
  303. package/dist/cjs/tests/integration/nested-div.spec.js +0 -22
  304. package/dist/cjs/tests/integration/nested-div.spec.js.map +0 -1
  305. package/dist/cjs/tests/integration/observe-element-id-format.spec.d.ts +0 -1
  306. package/dist/cjs/tests/integration/observe-element-id-format.spec.js +0 -130
  307. package/dist/cjs/tests/integration/observe-element-id-format.spec.js.map +0 -1
  308. package/dist/cjs/tests/integration/page-addInitScript.spec.d.ts +0 -1
  309. package/dist/cjs/tests/integration/page-addInitScript.spec.js +0 -94
  310. package/dist/cjs/tests/integration/page-addInitScript.spec.js.map +0 -1
  311. package/dist/cjs/tests/integration/page-console.spec.d.ts +0 -1
  312. package/dist/cjs/tests/integration/page-console.spec.js +0 -47
  313. package/dist/cjs/tests/integration/page-console.spec.js.map +0 -1
  314. package/dist/cjs/tests/integration/page-drag-and-drop.spec.d.ts +0 -1
  315. package/dist/cjs/tests/integration/page-drag-and-drop.spec.js +0 -437
  316. package/dist/cjs/tests/integration/page-drag-and-drop.spec.js.map +0 -1
  317. package/dist/cjs/tests/integration/page-extra-http-headers.spec.d.ts +0 -1
  318. package/dist/cjs/tests/integration/page-extra-http-headers.spec.js +0 -85
  319. package/dist/cjs/tests/integration/page-extra-http-headers.spec.js.map +0 -1
  320. package/dist/cjs/tests/integration/page-goto-response.spec.d.ts +0 -1
  321. package/dist/cjs/tests/integration/page-goto-response.spec.js +0 -34
  322. package/dist/cjs/tests/integration/page-goto-response.spec.js.map +0 -1
  323. package/dist/cjs/tests/integration/page-hover.spec.d.ts +0 -1
  324. package/dist/cjs/tests/integration/page-hover.spec.js +0 -165
  325. package/dist/cjs/tests/integration/page-hover.spec.js.map +0 -1
  326. package/dist/cjs/tests/integration/page-screenshot.spec.d.ts +0 -1
  327. package/dist/cjs/tests/integration/page-screenshot.spec.js +0 -292
  328. package/dist/cjs/tests/integration/page-screenshot.spec.js.map +0 -1
  329. package/dist/cjs/tests/integration/page-scroll.spec.d.ts +0 -1
  330. package/dist/cjs/tests/integration/page-scroll.spec.js +0 -183
  331. package/dist/cjs/tests/integration/page-scroll.spec.js.map +0 -1
  332. package/dist/cjs/tests/integration/page-send-cdp.spec.d.ts +0 -1
  333. package/dist/cjs/tests/integration/page-send-cdp.spec.js +0 -47
  334. package/dist/cjs/tests/integration/page-send-cdp.spec.js.map +0 -1
  335. package/dist/cjs/tests/integration/perform-understudy-method.spec.d.ts +0 -1
  336. package/dist/cjs/tests/integration/perform-understudy-method.spec.js +0 -63
  337. package/dist/cjs/tests/integration/perform-understudy-method.spec.js.map +0 -1
  338. package/dist/cjs/tests/integration/setinputfiles.spec.d.ts +0 -1
  339. package/dist/cjs/tests/integration/setinputfiles.spec.js +0 -133
  340. package/dist/cjs/tests/integration/setinputfiles.spec.js.map +0 -1
  341. package/dist/cjs/tests/integration/shadow-iframe-oopif.spec.d.ts +0 -1
  342. package/dist/cjs/tests/integration/shadow-iframe-oopif.spec.js +0 -161
  343. package/dist/cjs/tests/integration/shadow-iframe-oopif.spec.js.map +0 -1
  344. package/dist/cjs/tests/integration/shadow-iframe-spif.spec.d.ts +0 -1
  345. package/dist/cjs/tests/integration/shadow-iframe-spif.spec.js +0 -160
  346. package/dist/cjs/tests/integration/shadow-iframe-spif.spec.js.map +0 -1
  347. package/dist/cjs/tests/integration/testUtils.d.ts +0 -40
  348. package/dist/cjs/tests/integration/testUtils.js +0 -188
  349. package/dist/cjs/tests/integration/testUtils.js.map +0 -1
  350. package/dist/cjs/tests/integration/text-selector-innermost.spec.d.ts +0 -1
  351. package/dist/cjs/tests/integration/text-selector-innermost.spec.js +0 -105
  352. package/dist/cjs/tests/integration/text-selector-innermost.spec.js.map +0 -1
  353. package/dist/cjs/tests/integration/timeouts.spec.d.ts +0 -1
  354. package/dist/cjs/tests/integration/timeouts.spec.js +0 -309
  355. package/dist/cjs/tests/integration/timeouts.spec.js.map +0 -1
  356. package/dist/cjs/tests/integration/user-data-dir.spec.d.ts +0 -1
  357. package/dist/cjs/tests/integration/user-data-dir.spec.js +0 -77
  358. package/dist/cjs/tests/integration/user-data-dir.spec.js.map +0 -1
  359. package/dist/cjs/tests/integration/v3.config.d.ts +0 -4
  360. package/dist/cjs/tests/integration/v3.config.js +0 -11
  361. package/dist/cjs/tests/integration/v3.config.js.map +0 -1
  362. package/dist/cjs/tests/integration/v3.dynamic.config.d.ts +0 -4
  363. package/dist/cjs/tests/integration/v3.dynamic.config.js +0 -48
  364. package/dist/cjs/tests/integration/v3.dynamic.config.js.map +0 -1
  365. package/dist/cjs/tests/integration/v3.playwright.config.d.ts +0 -2
  366. package/dist/cjs/tests/integration/v3.playwright.config.js +0 -48
  367. package/dist/cjs/tests/integration/v3.playwright.config.js.map +0 -1
  368. package/dist/cjs/tests/integration/wait-for-selector.spec.d.ts +0 -1
  369. package/dist/cjs/tests/integration/wait-for-selector.spec.js +0 -683
  370. package/dist/cjs/tests/integration/wait-for-selector.spec.js.map +0 -1
  371. package/dist/cjs/tests/integration/wait-for-timeout.spec.d.ts +0 -1
  372. package/dist/cjs/tests/integration/wait-for-timeout.spec.js +0 -118
  373. package/dist/cjs/tests/integration/wait-for-timeout.spec.js.map +0 -1
  374. package/dist/cjs/tests/integration/xpath-for-location-deep.spec.d.ts +0 -1
  375. package/dist/cjs/tests/integration/xpath-for-location-deep.spec.js +0 -87
  376. package/dist/cjs/tests/integration/xpath-for-location-deep.spec.js.map +0 -1
  377. package/dist/cjs/tests/unit/agent-captcha-hooks.test.d.ts +0 -1
  378. package/dist/cjs/tests/unit/agent-captcha-hooks.test.js +0 -341
  379. package/dist/cjs/tests/unit/agent-captcha-hooks.test.js.map +0 -1
  380. package/dist/cjs/tests/unit/agent-execution-model.test.d.ts +0 -1
  381. package/dist/cjs/tests/unit/agent-execution-model.test.js +0 -150
  382. package/dist/cjs/tests/unit/agent-execution-model.test.js.map +0 -1
  383. package/dist/cjs/tests/unit/agent-metrics.test.d.ts +0 -1
  384. package/dist/cjs/tests/unit/agent-metrics.test.js +0 -112
  385. package/dist/cjs/tests/unit/agent-metrics.test.js.map +0 -1
  386. package/dist/cjs/tests/unit/agent-mode-routing.test.d.ts +0 -1
  387. package/dist/cjs/tests/unit/agent-mode-routing.test.js +0 -88
  388. package/dist/cjs/tests/unit/agent-mode-routing.test.js.map +0 -1
  389. package/dist/cjs/tests/unit/agent-system-prompt-variables.test.d.ts +0 -1
  390. package/dist/cjs/tests/unit/agent-system-prompt-variables.test.js +0 -23
  391. package/dist/cjs/tests/unit/agent-system-prompt-variables.test.js.map +0 -1
  392. package/dist/cjs/tests/unit/agent-temperature.test.d.ts +0 -1
  393. package/dist/cjs/tests/unit/agent-temperature.test.js +0 -191
  394. package/dist/cjs/tests/unit/agent-temperature.test.js.map +0 -1
  395. package/dist/cjs/tests/unit/agent-variables-validation.test.d.ts +0 -1
  396. package/dist/cjs/tests/unit/agent-variables-validation.test.js +0 -43
  397. package/dist/cjs/tests/unit/agent-variables-validation.test.js.map +0 -1
  398. package/dist/cjs/tests/unit/aisdk-clients.test.d.ts +0 -1
  399. package/dist/cjs/tests/unit/aisdk-clients.test.js +0 -86
  400. package/dist/cjs/tests/unit/aisdk-clients.test.js.map +0 -1
  401. package/dist/cjs/tests/unit/anthropic-cua-adaptive-thinking.test.d.ts +0 -1
  402. package/dist/cjs/tests/unit/anthropic-cua-adaptive-thinking.test.js +0 -250
  403. package/dist/cjs/tests/unit/anthropic-cua-adaptive-thinking.test.js.map +0 -1
  404. package/dist/cjs/tests/unit/api-client-observe-variables.test.d.ts +0 -1
  405. package/dist/cjs/tests/unit/api-client-observe-variables.test.js +0 -133
  406. package/dist/cjs/tests/unit/api-client-observe-variables.test.js.map +0 -1
  407. package/dist/cjs/tests/unit/api-multiregion.test.d.ts +0 -1
  408. package/dist/cjs/tests/unit/api-multiregion.test.js +0 -58
  409. package/dist/cjs/tests/unit/api-multiregion.test.js.map +0 -1
  410. package/dist/cjs/tests/unit/api-optional-model-api-key.test.d.ts +0 -1
  411. package/dist/cjs/tests/unit/api-optional-model-api-key.test.js +0 -155
  412. package/dist/cjs/tests/unit/api-optional-model-api-key.test.js.map +0 -1
  413. package/dist/cjs/tests/unit/api-variables-schema.test.d.ts +0 -1
  414. package/dist/cjs/tests/unit/api-variables-schema.test.js +0 -62
  415. package/dist/cjs/tests/unit/api-variables-schema.test.js.map +0 -1
  416. package/dist/cjs/tests/unit/browserbase-session-accessors.test.d.ts +0 -1
  417. package/dist/cjs/tests/unit/browserbase-session-accessors.test.js +0 -142
  418. package/dist/cjs/tests/unit/browserbase-session-accessors.test.js.map +0 -1
  419. package/dist/cjs/tests/unit/cache-llm-resolution.test.d.ts +0 -1
  420. package/dist/cjs/tests/unit/cache-llm-resolution.test.js +0 -186
  421. package/dist/cjs/tests/unit/cache-llm-resolution.test.js.map +0 -1
  422. package/dist/cjs/tests/unit/captcha-solver.test.d.ts +0 -1
  423. package/dist/cjs/tests/unit/captcha-solver.test.js +0 -154
  424. package/dist/cjs/tests/unit/captcha-solver.test.js.map +0 -1
  425. package/dist/cjs/tests/unit/cdp-connection-close.test.d.ts +0 -1
  426. package/dist/cjs/tests/unit/cdp-connection-close.test.js +0 -74
  427. package/dist/cjs/tests/unit/cdp-connection-close.test.js.map +0 -1
  428. package/dist/cjs/tests/unit/context-extra-http-headers.test.d.ts +0 -1
  429. package/dist/cjs/tests/unit/context-extra-http-headers.test.js +0 -58
  430. package/dist/cjs/tests/unit/context-extra-http-headers.test.js.map +0 -1
  431. package/dist/cjs/tests/unit/cookies.test.d.ts +0 -1
  432. package/dist/cjs/tests/unit/cookies.test.js +0 -944
  433. package/dist/cjs/tests/unit/cookies.test.js.map +0 -1
  434. package/dist/cjs/tests/unit/flowlogger-capturing-cdp.test.d.ts +0 -1
  435. package/dist/cjs/tests/unit/flowlogger-capturing-cdp.test.js +0 -95
  436. package/dist/cjs/tests/unit/flowlogger-capturing-cdp.test.js.map +0 -1
  437. package/dist/cjs/tests/unit/flowlogger-capturing-llm.test.d.ts +0 -1
  438. package/dist/cjs/tests/unit/flowlogger-capturing-llm.test.js +0 -43
  439. package/dist/cjs/tests/unit/flowlogger-capturing-llm.test.js.map +0 -1
  440. package/dist/cjs/tests/unit/flowlogger-eventstore.test.d.ts +0 -1
  441. package/dist/cjs/tests/unit/flowlogger-eventstore.test.js +0 -250
  442. package/dist/cjs/tests/unit/flowlogger-eventstore.test.js.map +0 -1
  443. package/dist/cjs/tests/unit/frame-registry-oopif-adoption.test.d.ts +0 -1
  444. package/dist/cjs/tests/unit/frame-registry-oopif-adoption.test.js +0 -60
  445. package/dist/cjs/tests/unit/frame-registry-oopif-adoption.test.js.map +0 -1
  446. package/dist/cjs/tests/unit/helpers/mockCDPSession.d.ts +0 -19
  447. package/dist/cjs/tests/unit/helpers/mockCDPSession.js +0 -29
  448. package/dist/cjs/tests/unit/helpers/mockCDPSession.js.map +0 -1
  449. package/dist/cjs/tests/unit/inference-temperature.test.d.ts +0 -1
  450. package/dist/cjs/tests/unit/inference-temperature.test.js +0 -65
  451. package/dist/cjs/tests/unit/inference-temperature.test.js.map +0 -1
  452. package/dist/cjs/tests/unit/llm-middleware.test.d.ts +0 -1
  453. package/dist/cjs/tests/unit/llm-middleware.test.js +0 -495
  454. package/dist/cjs/tests/unit/llm-middleware.test.js.map +0 -1
  455. package/dist/cjs/tests/unit/llm-provider.test.d.ts +0 -1
  456. package/dist/cjs/tests/unit/llm-provider.test.js +0 -64
  457. package/dist/cjs/tests/unit/llm-provider.test.js.map +0 -1
  458. package/dist/cjs/tests/unit/microsoft-cua-client.test.d.ts +0 -1
  459. package/dist/cjs/tests/unit/microsoft-cua-client.test.js +0 -86
  460. package/dist/cjs/tests/unit/microsoft-cua-client.test.js.map +0 -1
  461. package/dist/cjs/tests/unit/model-deprecation.test.d.ts +0 -1
  462. package/dist/cjs/tests/unit/model-deprecation.test.js +0 -142
  463. package/dist/cjs/tests/unit/model-deprecation.test.js.map +0 -1
  464. package/dist/cjs/tests/unit/model-utils.test.d.ts +0 -1
  465. package/dist/cjs/tests/unit/model-utils.test.js +0 -42
  466. package/dist/cjs/tests/unit/model-utils.test.js.map +0 -1
  467. package/dist/cjs/tests/unit/openai-compatible-temperature.test.d.ts +0 -1
  468. package/dist/cjs/tests/unit/openai-compatible-temperature.test.js +0 -84
  469. package/dist/cjs/tests/unit/openai-compatible-temperature.test.js.map +0 -1
  470. package/dist/cjs/tests/unit/openai-cua-client.test.d.ts +0 -1
  471. package/dist/cjs/tests/unit/openai-cua-client.test.js +0 -71
  472. package/dist/cjs/tests/unit/openai-cua-client.test.js.map +0 -1
  473. package/dist/cjs/tests/unit/page-extra-http-headers.test.d.ts +0 -1
  474. package/dist/cjs/tests/unit/page-extra-http-headers.test.js +0 -92
  475. package/dist/cjs/tests/unit/page-extra-http-headers.test.js.map +0 -1
  476. package/dist/cjs/tests/unit/page-snapshot.test.d.ts +0 -1
  477. package/dist/cjs/tests/unit/page-snapshot.test.js +0 -75
  478. package/dist/cjs/tests/unit/page-snapshot.test.js.map +0 -1
  479. package/dist/cjs/tests/unit/prompt-observe-variables.test.d.ts +0 -1
  480. package/dist/cjs/tests/unit/prompt-observe-variables.test.js +0 -25
  481. package/dist/cjs/tests/unit/prompt-observe-variables.test.js.map +0 -1
  482. package/dist/cjs/tests/unit/public-api/export-surface.test.d.ts +0 -1
  483. package/dist/cjs/tests/unit/public-api/export-surface.test.js +0 -107
  484. package/dist/cjs/tests/unit/public-api/export-surface.test.js.map +0 -1
  485. package/dist/cjs/tests/unit/public-api/llm-and-agents.test.d.ts +0 -1
  486. package/dist/cjs/tests/unit/public-api/llm-and-agents.test.js +0 -179
  487. package/dist/cjs/tests/unit/public-api/llm-and-agents.test.js.map +0 -1
  488. package/dist/cjs/tests/unit/public-api/public-error-types.test.d.ts +0 -62
  489. package/dist/cjs/tests/unit/public-api/public-error-types.test.js +0 -110
  490. package/dist/cjs/tests/unit/public-api/public-error-types.test.js.map +0 -1
  491. package/dist/cjs/tests/unit/public-api/public-types.test.d.ts +0 -1
  492. package/dist/cjs/tests/unit/public-api/public-types.test.js +0 -99
  493. package/dist/cjs/tests/unit/public-api/public-types.test.js.map +0 -1
  494. package/dist/cjs/tests/unit/public-api/runtime-utils.test.d.ts +0 -1
  495. package/dist/cjs/tests/unit/public-api/runtime-utils.test.js +0 -60
  496. package/dist/cjs/tests/unit/public-api/runtime-utils.test.js.map +0 -1
  497. package/dist/cjs/tests/unit/public-api/schema-utils.test.d.ts +0 -1
  498. package/dist/cjs/tests/unit/public-api/schema-utils.test.js +0 -95
  499. package/dist/cjs/tests/unit/public-api/schema-utils.test.js.map +0 -1
  500. package/dist/cjs/tests/unit/public-api/timeout-error-types.test.d.ts +0 -1
  501. package/dist/cjs/tests/unit/public-api/timeout-error-types.test.js +0 -121
  502. package/dist/cjs/tests/unit/public-api/timeout-error-types.test.js.map +0 -1
  503. package/dist/cjs/tests/unit/public-api/tool-type-export.test.d.ts +0 -1
  504. package/dist/cjs/tests/unit/public-api/tool-type-export.test.js +0 -65
  505. package/dist/cjs/tests/unit/public-api/tool-type-export.test.js.map +0 -1
  506. package/dist/cjs/tests/unit/public-api/v3-core.test.d.ts +0 -1
  507. package/dist/cjs/tests/unit/public-api/v3-core.test.js +0 -108
  508. package/dist/cjs/tests/unit/public-api/v3-core.test.js.map +0 -1
  509. package/dist/cjs/tests/unit/safety-confirmation.test.d.ts +0 -1
  510. package/dist/cjs/tests/unit/safety-confirmation.test.js +0 -110
  511. package/dist/cjs/tests/unit/safety-confirmation.test.js.map +0 -1
  512. package/dist/cjs/tests/unit/snapshot-a11y-resolvers.test.d.ts +0 -1
  513. package/dist/cjs/tests/unit/snapshot-a11y-resolvers.test.js +0 -431
  514. package/dist/cjs/tests/unit/snapshot-a11y-resolvers.test.js.map +0 -1
  515. package/dist/cjs/tests/unit/snapshot-a11y-tree-utils.test.d.ts +0 -1
  516. package/dist/cjs/tests/unit/snapshot-a11y-tree-utils.test.js +0 -306
  517. package/dist/cjs/tests/unit/snapshot-a11y-tree-utils.test.js.map +0 -1
  518. package/dist/cjs/tests/unit/snapshot-capture-orchestration.test.d.ts +0 -1
  519. package/dist/cjs/tests/unit/snapshot-capture-orchestration.test.js +0 -504
  520. package/dist/cjs/tests/unit/snapshot-capture-orchestration.test.js.map +0 -1
  521. package/dist/cjs/tests/unit/snapshot-cbor.test.d.ts +0 -1
  522. package/dist/cjs/tests/unit/snapshot-cbor.test.js +0 -204
  523. package/dist/cjs/tests/unit/snapshot-cbor.test.js.map +0 -1
  524. package/dist/cjs/tests/unit/snapshot-dom-session-builders.test.d.ts +0 -1
  525. package/dist/cjs/tests/unit/snapshot-dom-session-builders.test.js +0 -220
  526. package/dist/cjs/tests/unit/snapshot-dom-session-builders.test.js.map +0 -1
  527. package/dist/cjs/tests/unit/snapshot-dom-tree-utils.test.d.ts +0 -1
  528. package/dist/cjs/tests/unit/snapshot-dom-tree-utils.test.js +0 -107
  529. package/dist/cjs/tests/unit/snapshot-dom-tree-utils.test.js.map +0 -1
  530. package/dist/cjs/tests/unit/snapshot-focus-selectors-utils.test.d.ts +0 -1
  531. package/dist/cjs/tests/unit/snapshot-focus-selectors-utils.test.js +0 -89
  532. package/dist/cjs/tests/unit/snapshot-focus-selectors-utils.test.js.map +0 -1
  533. package/dist/cjs/tests/unit/snapshot-frame-merge.test.d.ts +0 -1
  534. package/dist/cjs/tests/unit/snapshot-frame-merge.test.js +0 -335
  535. package/dist/cjs/tests/unit/snapshot-frame-merge.test.js.map +0 -1
  536. package/dist/cjs/tests/unit/snapshot-tree-format-utils.test.d.ts +0 -1
  537. package/dist/cjs/tests/unit/snapshot-tree-format-utils.test.js +0 -84
  538. package/dist/cjs/tests/unit/snapshot-tree-format-utils.test.js.map +0 -1
  539. package/dist/cjs/tests/unit/snapshot-xpath-utils.test.d.ts +0 -1
  540. package/dist/cjs/tests/unit/snapshot-xpath-utils.test.js +0 -74
  541. package/dist/cjs/tests/unit/snapshot-xpath-utils.test.js.map +0 -1
  542. package/dist/cjs/tests/unit/timeout-handlers.test.d.ts +0 -1
  543. package/dist/cjs/tests/unit/timeout-handlers.test.js +0 -897
  544. package/dist/cjs/tests/unit/timeout-handlers.test.js.map +0 -1
  545. package/dist/cjs/tests/unit/understudy-command-exception.test.d.ts +0 -1
  546. package/dist/cjs/tests/unit/understudy-command-exception.test.js +0 -57
  547. package/dist/cjs/tests/unit/understudy-command-exception.test.js.map +0 -1
  548. package/dist/cjs/tests/unit/xpath-parser.test.d.ts +0 -1
  549. package/dist/cjs/tests/unit/xpath-parser.test.js +0 -311
  550. package/dist/cjs/tests/unit/xpath-parser.test.js.map +0 -1
  551. package/dist/cjs/tests/unit/xpath-resolver.test.d.ts +0 -1
  552. package/dist/cjs/tests/unit/xpath-resolver.test.js +0 -80
  553. package/dist/cjs/tests/unit/xpath-resolver.test.js.map +0 -1
  554. package/dist/cjs/tests/unit/zod-enum-compatibility.test.d.ts +0 -1
  555. package/dist/cjs/tests/unit/zod-enum-compatibility.test.js +0 -149
  556. package/dist/cjs/tests/unit/zod-enum-compatibility.test.js.map +0 -1
  557. package/dist/esm/tests/cache-variables.test.d.ts +0 -1
  558. package/dist/esm/tests/cache-variables.test.js +0 -219
  559. package/dist/esm/tests/cache-variables.test.js.map +0 -1
  560. package/dist/esm/tests/integration/agent-abort-signal.spec.d.ts +0 -1
  561. package/dist/esm/tests/integration/agent-abort-signal.spec.js +0 -121
  562. package/dist/esm/tests/integration/agent-abort-signal.spec.js.map +0 -1
  563. package/dist/esm/tests/integration/agent-cache-self-heal.spec.d.ts +0 -1
  564. package/dist/esm/tests/integration/agent-cache-self-heal.spec.js +0 -78
  565. package/dist/esm/tests/integration/agent-cache-self-heal.spec.js.map +0 -1
  566. package/dist/esm/tests/integration/agent-callbacks.spec.d.ts +0 -1
  567. package/dist/esm/tests/integration/agent-callbacks.spec.js +0 -383
  568. package/dist/esm/tests/integration/agent-callbacks.spec.js.map +0 -1
  569. package/dist/esm/tests/integration/agent-experimental-validation.spec.d.ts +0 -1
  570. package/dist/esm/tests/integration/agent-experimental-validation.spec.js +0 -360
  571. package/dist/esm/tests/integration/agent-experimental-validation.spec.js.map +0 -1
  572. package/dist/esm/tests/integration/agent-hybrid-mode.spec.d.ts +0 -1
  573. package/dist/esm/tests/integration/agent-hybrid-mode.spec.js +0 -260
  574. package/dist/esm/tests/integration/agent-hybrid-mode.spec.js.map +0 -1
  575. package/dist/esm/tests/integration/agent-message-continuation.spec.d.ts +0 -1
  576. package/dist/esm/tests/integration/agent-message-continuation.spec.js +0 -110
  577. package/dist/esm/tests/integration/agent-message-continuation.spec.js.map +0 -1
  578. package/dist/esm/tests/integration/agent-streaming.spec.d.ts +0 -1
  579. package/dist/esm/tests/integration/agent-streaming.spec.js +0 -138
  580. package/dist/esm/tests/integration/agent-streaming.spec.js.map +0 -1
  581. package/dist/esm/tests/integration/cdp-close-api-region.spec.d.ts +0 -1
  582. package/dist/esm/tests/integration/cdp-close-api-region.spec.js +0 -39
  583. package/dist/esm/tests/integration/cdp-close-api-region.spec.js.map +0 -1
  584. package/dist/esm/tests/integration/cdp-connection-close.spec.d.ts +0 -1
  585. package/dist/esm/tests/integration/cdp-connection-close.spec.js +0 -74
  586. package/dist/esm/tests/integration/cdp-connection-close.spec.js.map +0 -1
  587. package/dist/esm/tests/integration/cdp-session-detached.spec.d.ts +0 -1
  588. package/dist/esm/tests/integration/cdp-session-detached.spec.js +0 -43
  589. package/dist/esm/tests/integration/cdp-session-detached.spec.js.map +0 -1
  590. package/dist/esm/tests/integration/chrome-newtab-page-tracking.spec.d.ts +0 -1
  591. package/dist/esm/tests/integration/chrome-newtab-page-tracking.spec.js +0 -54
  592. package/dist/esm/tests/integration/chrome-newtab-page-tracking.spec.js.map +0 -1
  593. package/dist/esm/tests/integration/click-count.spec.d.ts +0 -1
  594. package/dist/esm/tests/integration/click-count.spec.js +0 -161
  595. package/dist/esm/tests/integration/click-count.spec.js.map +0 -1
  596. package/dist/esm/tests/integration/connect-to-existing-browser.spec.d.ts +0 -1
  597. package/dist/esm/tests/integration/connect-to-existing-browser.spec.js +0 -97
  598. package/dist/esm/tests/integration/connect-to-existing-browser.spec.js.map +0 -1
  599. package/dist/esm/tests/integration/context-addInitScript.spec.d.ts +0 -1
  600. package/dist/esm/tests/integration/context-addInitScript.spec.js +0 -283
  601. package/dist/esm/tests/integration/context-addInitScript.spec.js.map +0 -1
  602. package/dist/esm/tests/integration/context-extra-http-headers.spec.d.ts +0 -1
  603. package/dist/esm/tests/integration/context-extra-http-headers.spec.js +0 -47
  604. package/dist/esm/tests/integration/context-extra-http-headers.spec.js.map +0 -1
  605. package/dist/esm/tests/integration/cookies.spec.d.ts +0 -1
  606. package/dist/esm/tests/integration/cookies.spec.js +0 -185
  607. package/dist/esm/tests/integration/cookies.spec.js.map +0 -1
  608. package/dist/esm/tests/integration/default-page-tracking.spec.d.ts +0 -1
  609. package/dist/esm/tests/integration/default-page-tracking.spec.js +0 -50
  610. package/dist/esm/tests/integration/default-page-tracking.spec.js.map +0 -1
  611. package/dist/esm/tests/integration/downloads.spec.d.ts +0 -1
  612. package/dist/esm/tests/integration/downloads.spec.js +0 -47
  613. package/dist/esm/tests/integration/downloads.spec.js.map +0 -1
  614. package/dist/esm/tests/integration/flowLogger.spec.d.ts +0 -1
  615. package/dist/esm/tests/integration/flowLogger.spec.js +0 -716
  616. package/dist/esm/tests/integration/flowLogger.spec.js.map +0 -1
  617. package/dist/esm/tests/integration/frame-get-location-and-click.spec.d.ts +0 -1
  618. package/dist/esm/tests/integration/frame-get-location-and-click.spec.js +0 -55
  619. package/dist/esm/tests/integration/frame-get-location-and-click.spec.js.map +0 -1
  620. package/dist/esm/tests/integration/iframe-ctx-addInitScript-race.spec.d.ts +0 -1
  621. package/dist/esm/tests/integration/iframe-ctx-addInitScript-race.spec.js +0 -217
  622. package/dist/esm/tests/integration/iframe-ctx-addInitScript-race.spec.js.map +0 -1
  623. package/dist/esm/tests/integration/iframe-ctx-addInitScript.spec.d.ts +0 -1
  624. package/dist/esm/tests/integration/iframe-ctx-addInitScript.spec.js +0 -472
  625. package/dist/esm/tests/integration/iframe-ctx-addInitScript.spec.js.map +0 -1
  626. package/dist/esm/tests/integration/keep-alive.child.d.ts +0 -1
  627. package/dist/esm/tests/integration/keep-alive.child.js +0 -83
  628. package/dist/esm/tests/integration/keep-alive.child.js.map +0 -1
  629. package/dist/esm/tests/integration/keep-alive.spec.d.ts +0 -22
  630. package/dist/esm/tests/integration/keep-alive.spec.js +0 -482
  631. package/dist/esm/tests/integration/keep-alive.spec.js.map +0 -1
  632. package/dist/esm/tests/integration/keyboard.spec.d.ts +0 -1
  633. package/dist/esm/tests/integration/keyboard.spec.js +0 -257
  634. package/dist/esm/tests/integration/keyboard.spec.js.map +0 -1
  635. package/dist/esm/tests/integration/locator-backend-node-id.spec.d.ts +0 -1
  636. package/dist/esm/tests/integration/locator-backend-node-id.spec.js +0 -148
  637. package/dist/esm/tests/integration/locator-backend-node-id.spec.js.map +0 -1
  638. package/dist/esm/tests/integration/locator-content-methods.spec.d.ts +0 -1
  639. package/dist/esm/tests/integration/locator-content-methods.spec.js +0 -178
  640. package/dist/esm/tests/integration/locator-content-methods.spec.js.map +0 -1
  641. package/dist/esm/tests/integration/locator-count-iframe.spec.d.ts +0 -1
  642. package/dist/esm/tests/integration/locator-count-iframe.spec.js +0 -127
  643. package/dist/esm/tests/integration/locator-count-iframe.spec.js.map +0 -1
  644. package/dist/esm/tests/integration/locator-count.spec.d.ts +0 -1
  645. package/dist/esm/tests/integration/locator-count.spec.js +0 -66
  646. package/dist/esm/tests/integration/locator-count.spec.js.map +0 -1
  647. package/dist/esm/tests/integration/locator-fill.spec.d.ts +0 -1
  648. package/dist/esm/tests/integration/locator-fill.spec.js +0 -115
  649. package/dist/esm/tests/integration/locator-fill.spec.js.map +0 -1
  650. package/dist/esm/tests/integration/locator-input-methods.spec.d.ts +0 -1
  651. package/dist/esm/tests/integration/locator-input-methods.spec.js +0 -125
  652. package/dist/esm/tests/integration/locator-input-methods.spec.js.map +0 -1
  653. package/dist/esm/tests/integration/locator-nth.spec.d.ts +0 -1
  654. package/dist/esm/tests/integration/locator-nth.spec.js +0 -173
  655. package/dist/esm/tests/integration/locator-nth.spec.js.map +0 -1
  656. package/dist/esm/tests/integration/locator-select-option.spec.d.ts +0 -1
  657. package/dist/esm/tests/integration/locator-select-option.spec.js +0 -214
  658. package/dist/esm/tests/integration/locator-select-option.spec.js.map +0 -1
  659. package/dist/esm/tests/integration/logger-initialization.spec.d.ts +0 -1
  660. package/dist/esm/tests/integration/logger-initialization.spec.js +0 -595
  661. package/dist/esm/tests/integration/logger-initialization.spec.js.map +0 -1
  662. package/dist/esm/tests/integration/multi-instance-logger.spec.d.ts +0 -1
  663. package/dist/esm/tests/integration/multi-instance-logger.spec.js +0 -291
  664. package/dist/esm/tests/integration/multi-instance-logger.spec.js.map +0 -1
  665. package/dist/esm/tests/integration/nested-div.spec.d.ts +0 -1
  666. package/dist/esm/tests/integration/nested-div.spec.js +0 -20
  667. package/dist/esm/tests/integration/nested-div.spec.js.map +0 -1
  668. package/dist/esm/tests/integration/observe-element-id-format.spec.d.ts +0 -1
  669. package/dist/esm/tests/integration/observe-element-id-format.spec.js +0 -128
  670. package/dist/esm/tests/integration/observe-element-id-format.spec.js.map +0 -1
  671. package/dist/esm/tests/integration/page-addInitScript.spec.d.ts +0 -1
  672. package/dist/esm/tests/integration/page-addInitScript.spec.js +0 -92
  673. package/dist/esm/tests/integration/page-addInitScript.spec.js.map +0 -1
  674. package/dist/esm/tests/integration/page-console.spec.d.ts +0 -1
  675. package/dist/esm/tests/integration/page-console.spec.js +0 -45
  676. package/dist/esm/tests/integration/page-console.spec.js.map +0 -1
  677. package/dist/esm/tests/integration/page-drag-and-drop.spec.d.ts +0 -1
  678. package/dist/esm/tests/integration/page-drag-and-drop.spec.js +0 -435
  679. package/dist/esm/tests/integration/page-drag-and-drop.spec.js.map +0 -1
  680. package/dist/esm/tests/integration/page-extra-http-headers.spec.d.ts +0 -1
  681. package/dist/esm/tests/integration/page-extra-http-headers.spec.js +0 -83
  682. package/dist/esm/tests/integration/page-extra-http-headers.spec.js.map +0 -1
  683. package/dist/esm/tests/integration/page-goto-response.spec.d.ts +0 -1
  684. package/dist/esm/tests/integration/page-goto-response.spec.js +0 -32
  685. package/dist/esm/tests/integration/page-goto-response.spec.js.map +0 -1
  686. package/dist/esm/tests/integration/page-hover.spec.d.ts +0 -1
  687. package/dist/esm/tests/integration/page-hover.spec.js +0 -163
  688. package/dist/esm/tests/integration/page-hover.spec.js.map +0 -1
  689. package/dist/esm/tests/integration/page-screenshot.spec.d.ts +0 -1
  690. package/dist/esm/tests/integration/page-screenshot.spec.js +0 -257
  691. package/dist/esm/tests/integration/page-screenshot.spec.js.map +0 -1
  692. package/dist/esm/tests/integration/page-scroll.spec.d.ts +0 -1
  693. package/dist/esm/tests/integration/page-scroll.spec.js +0 -181
  694. package/dist/esm/tests/integration/page-scroll.spec.js.map +0 -1
  695. package/dist/esm/tests/integration/page-send-cdp.spec.d.ts +0 -1
  696. package/dist/esm/tests/integration/page-send-cdp.spec.js +0 -45
  697. package/dist/esm/tests/integration/page-send-cdp.spec.js.map +0 -1
  698. package/dist/esm/tests/integration/perform-understudy-method.spec.d.ts +0 -1
  699. package/dist/esm/tests/integration/perform-understudy-method.spec.js +0 -61
  700. package/dist/esm/tests/integration/perform-understudy-method.spec.js.map +0 -1
  701. package/dist/esm/tests/integration/setinputfiles.spec.d.ts +0 -1
  702. package/dist/esm/tests/integration/setinputfiles.spec.js +0 -128
  703. package/dist/esm/tests/integration/setinputfiles.spec.js.map +0 -1
  704. package/dist/esm/tests/integration/shadow-iframe-oopif.spec.d.ts +0 -1
  705. package/dist/esm/tests/integration/shadow-iframe-oopif.spec.js +0 -156
  706. package/dist/esm/tests/integration/shadow-iframe-oopif.spec.js.map +0 -1
  707. package/dist/esm/tests/integration/shadow-iframe-spif.spec.d.ts +0 -1
  708. package/dist/esm/tests/integration/shadow-iframe-spif.spec.js +0 -155
  709. package/dist/esm/tests/integration/shadow-iframe-spif.spec.js.map +0 -1
  710. package/dist/esm/tests/integration/testUtils.d.ts +0 -40
  711. package/dist/esm/tests/integration/testUtils.js +0 -178
  712. package/dist/esm/tests/integration/testUtils.js.map +0 -1
  713. package/dist/esm/tests/integration/text-selector-innermost.spec.d.ts +0 -1
  714. package/dist/esm/tests/integration/text-selector-innermost.spec.js +0 -103
  715. package/dist/esm/tests/integration/text-selector-innermost.spec.js.map +0 -1
  716. package/dist/esm/tests/integration/timeouts.spec.d.ts +0 -1
  717. package/dist/esm/tests/integration/timeouts.spec.js +0 -307
  718. package/dist/esm/tests/integration/timeouts.spec.js.map +0 -1
  719. package/dist/esm/tests/integration/user-data-dir.spec.d.ts +0 -1
  720. package/dist/esm/tests/integration/user-data-dir.spec.js +0 -42
  721. package/dist/esm/tests/integration/user-data-dir.spec.js.map +0 -1
  722. package/dist/esm/tests/integration/v3.config.d.ts +0 -4
  723. package/dist/esm/tests/integration/v3.config.js +0 -7
  724. package/dist/esm/tests/integration/v3.config.js.map +0 -1
  725. package/dist/esm/tests/integration/v3.dynamic.config.d.ts +0 -4
  726. package/dist/esm/tests/integration/v3.dynamic.config.js +0 -44
  727. package/dist/esm/tests/integration/v3.dynamic.config.js.map +0 -1
  728. package/dist/esm/tests/integration/v3.playwright.config.d.ts +0 -2
  729. package/dist/esm/tests/integration/v3.playwright.config.js +0 -46
  730. package/dist/esm/tests/integration/v3.playwright.config.js.map +0 -1
  731. package/dist/esm/tests/integration/wait-for-selector.spec.d.ts +0 -1
  732. package/dist/esm/tests/integration/wait-for-selector.spec.js +0 -681
  733. package/dist/esm/tests/integration/wait-for-selector.spec.js.map +0 -1
  734. package/dist/esm/tests/integration/wait-for-timeout.spec.d.ts +0 -1
  735. package/dist/esm/tests/integration/wait-for-timeout.spec.js +0 -116
  736. package/dist/esm/tests/integration/wait-for-timeout.spec.js.map +0 -1
  737. package/dist/esm/tests/integration/xpath-for-location-deep.spec.d.ts +0 -1
  738. package/dist/esm/tests/integration/xpath-for-location-deep.spec.js +0 -85
  739. package/dist/esm/tests/integration/xpath-for-location-deep.spec.js.map +0 -1
  740. package/dist/esm/tests/unit/agent-captcha-hooks.test.d.ts +0 -1
  741. package/dist/esm/tests/unit/agent-captcha-hooks.test.js +0 -339
  742. package/dist/esm/tests/unit/agent-captcha-hooks.test.js.map +0 -1
  743. package/dist/esm/tests/unit/agent-execution-model.test.d.ts +0 -1
  744. package/dist/esm/tests/unit/agent-execution-model.test.js +0 -148
  745. package/dist/esm/tests/unit/agent-execution-model.test.js.map +0 -1
  746. package/dist/esm/tests/unit/agent-metrics.test.d.ts +0 -1
  747. package/dist/esm/tests/unit/agent-metrics.test.js +0 -110
  748. package/dist/esm/tests/unit/agent-metrics.test.js.map +0 -1
  749. package/dist/esm/tests/unit/agent-mode-routing.test.d.ts +0 -1
  750. package/dist/esm/tests/unit/agent-mode-routing.test.js +0 -86
  751. package/dist/esm/tests/unit/agent-mode-routing.test.js.map +0 -1
  752. package/dist/esm/tests/unit/agent-system-prompt-variables.test.d.ts +0 -1
  753. package/dist/esm/tests/unit/agent-system-prompt-variables.test.js +0 -21
  754. package/dist/esm/tests/unit/agent-system-prompt-variables.test.js.map +0 -1
  755. package/dist/esm/tests/unit/agent-temperature.test.d.ts +0 -1
  756. package/dist/esm/tests/unit/agent-temperature.test.js +0 -189
  757. package/dist/esm/tests/unit/agent-temperature.test.js.map +0 -1
  758. package/dist/esm/tests/unit/agent-variables-validation.test.d.ts +0 -1
  759. package/dist/esm/tests/unit/agent-variables-validation.test.js +0 -41
  760. package/dist/esm/tests/unit/agent-variables-validation.test.js.map +0 -1
  761. package/dist/esm/tests/unit/aisdk-clients.test.d.ts +0 -1
  762. package/dist/esm/tests/unit/aisdk-clients.test.js +0 -84
  763. package/dist/esm/tests/unit/aisdk-clients.test.js.map +0 -1
  764. package/dist/esm/tests/unit/anthropic-cua-adaptive-thinking.test.d.ts +0 -1
  765. package/dist/esm/tests/unit/anthropic-cua-adaptive-thinking.test.js +0 -245
  766. package/dist/esm/tests/unit/anthropic-cua-adaptive-thinking.test.js.map +0 -1
  767. package/dist/esm/tests/unit/api-client-observe-variables.test.d.ts +0 -1
  768. package/dist/esm/tests/unit/api-client-observe-variables.test.js +0 -131
  769. package/dist/esm/tests/unit/api-client-observe-variables.test.js.map +0 -1
  770. package/dist/esm/tests/unit/api-multiregion.test.d.ts +0 -1
  771. package/dist/esm/tests/unit/api-multiregion.test.js +0 -56
  772. package/dist/esm/tests/unit/api-multiregion.test.js.map +0 -1
  773. package/dist/esm/tests/unit/api-optional-model-api-key.test.d.ts +0 -1
  774. package/dist/esm/tests/unit/api-optional-model-api-key.test.js +0 -153
  775. package/dist/esm/tests/unit/api-optional-model-api-key.test.js.map +0 -1
  776. package/dist/esm/tests/unit/api-variables-schema.test.d.ts +0 -1
  777. package/dist/esm/tests/unit/api-variables-schema.test.js +0 -60
  778. package/dist/esm/tests/unit/api-variables-schema.test.js.map +0 -1
  779. package/dist/esm/tests/unit/browserbase-session-accessors.test.d.ts +0 -1
  780. package/dist/esm/tests/unit/browserbase-session-accessors.test.js +0 -140
  781. package/dist/esm/tests/unit/browserbase-session-accessors.test.js.map +0 -1
  782. package/dist/esm/tests/unit/cache-llm-resolution.test.d.ts +0 -1
  783. package/dist/esm/tests/unit/cache-llm-resolution.test.js +0 -184
  784. package/dist/esm/tests/unit/cache-llm-resolution.test.js.map +0 -1
  785. package/dist/esm/tests/unit/captcha-solver.test.d.ts +0 -1
  786. package/dist/esm/tests/unit/captcha-solver.test.js +0 -152
  787. package/dist/esm/tests/unit/captcha-solver.test.js.map +0 -1
  788. package/dist/esm/tests/unit/cdp-connection-close.test.d.ts +0 -1
  789. package/dist/esm/tests/unit/cdp-connection-close.test.js +0 -72
  790. package/dist/esm/tests/unit/cdp-connection-close.test.js.map +0 -1
  791. package/dist/esm/tests/unit/context-extra-http-headers.test.d.ts +0 -1
  792. package/dist/esm/tests/unit/context-extra-http-headers.test.js +0 -56
  793. package/dist/esm/tests/unit/context-extra-http-headers.test.js.map +0 -1
  794. package/dist/esm/tests/unit/cookies.test.d.ts +0 -1
  795. package/dist/esm/tests/unit/cookies.test.js +0 -909
  796. package/dist/esm/tests/unit/cookies.test.js.map +0 -1
  797. package/dist/esm/tests/unit/flowlogger-capturing-cdp.test.d.ts +0 -1
  798. package/dist/esm/tests/unit/flowlogger-capturing-cdp.test.js +0 -93
  799. package/dist/esm/tests/unit/flowlogger-capturing-cdp.test.js.map +0 -1
  800. package/dist/esm/tests/unit/flowlogger-capturing-llm.test.d.ts +0 -1
  801. package/dist/esm/tests/unit/flowlogger-capturing-llm.test.js +0 -41
  802. package/dist/esm/tests/unit/flowlogger-capturing-llm.test.js.map +0 -1
  803. package/dist/esm/tests/unit/flowlogger-eventstore.test.d.ts +0 -1
  804. package/dist/esm/tests/unit/flowlogger-eventstore.test.js +0 -248
  805. package/dist/esm/tests/unit/flowlogger-eventstore.test.js.map +0 -1
  806. package/dist/esm/tests/unit/frame-registry-oopif-adoption.test.d.ts +0 -1
  807. package/dist/esm/tests/unit/frame-registry-oopif-adoption.test.js +0 -58
  808. package/dist/esm/tests/unit/frame-registry-oopif-adoption.test.js.map +0 -1
  809. package/dist/esm/tests/unit/helpers/mockCDPSession.d.ts +0 -19
  810. package/dist/esm/tests/unit/helpers/mockCDPSession.js +0 -25
  811. package/dist/esm/tests/unit/helpers/mockCDPSession.js.map +0 -1
  812. package/dist/esm/tests/unit/inference-temperature.test.d.ts +0 -1
  813. package/dist/esm/tests/unit/inference-temperature.test.js +0 -63
  814. package/dist/esm/tests/unit/inference-temperature.test.js.map +0 -1
  815. package/dist/esm/tests/unit/llm-middleware.test.d.ts +0 -1
  816. package/dist/esm/tests/unit/llm-middleware.test.js +0 -460
  817. package/dist/esm/tests/unit/llm-middleware.test.js.map +0 -1
  818. package/dist/esm/tests/unit/llm-provider.test.d.ts +0 -1
  819. package/dist/esm/tests/unit/llm-provider.test.js +0 -62
  820. package/dist/esm/tests/unit/llm-provider.test.js.map +0 -1
  821. package/dist/esm/tests/unit/microsoft-cua-client.test.d.ts +0 -1
  822. package/dist/esm/tests/unit/microsoft-cua-client.test.js +0 -84
  823. package/dist/esm/tests/unit/microsoft-cua-client.test.js.map +0 -1
  824. package/dist/esm/tests/unit/model-deprecation.test.d.ts +0 -1
  825. package/dist/esm/tests/unit/model-deprecation.test.js +0 -140
  826. package/dist/esm/tests/unit/model-deprecation.test.js.map +0 -1
  827. package/dist/esm/tests/unit/model-utils.test.d.ts +0 -1
  828. package/dist/esm/tests/unit/model-utils.test.js +0 -40
  829. package/dist/esm/tests/unit/model-utils.test.js.map +0 -1
  830. package/dist/esm/tests/unit/openai-compatible-temperature.test.d.ts +0 -1
  831. package/dist/esm/tests/unit/openai-compatible-temperature.test.js +0 -82
  832. package/dist/esm/tests/unit/openai-compatible-temperature.test.js.map +0 -1
  833. package/dist/esm/tests/unit/openai-cua-client.test.d.ts +0 -1
  834. package/dist/esm/tests/unit/openai-cua-client.test.js +0 -69
  835. package/dist/esm/tests/unit/openai-cua-client.test.js.map +0 -1
  836. package/dist/esm/tests/unit/page-extra-http-headers.test.d.ts +0 -1
  837. package/dist/esm/tests/unit/page-extra-http-headers.test.js +0 -90
  838. package/dist/esm/tests/unit/page-extra-http-headers.test.js.map +0 -1
  839. package/dist/esm/tests/unit/page-snapshot.test.d.ts +0 -1
  840. package/dist/esm/tests/unit/page-snapshot.test.js +0 -40
  841. package/dist/esm/tests/unit/page-snapshot.test.js.map +0 -1
  842. package/dist/esm/tests/unit/prompt-observe-variables.test.d.ts +0 -1
  843. package/dist/esm/tests/unit/prompt-observe-variables.test.js +0 -23
  844. package/dist/esm/tests/unit/prompt-observe-variables.test.js.map +0 -1
  845. package/dist/esm/tests/unit/public-api/export-surface.test.d.ts +0 -1
  846. package/dist/esm/tests/unit/public-api/export-surface.test.js +0 -72
  847. package/dist/esm/tests/unit/public-api/export-surface.test.js.map +0 -1
  848. package/dist/esm/tests/unit/public-api/llm-and-agents.test.d.ts +0 -1
  849. package/dist/esm/tests/unit/public-api/llm-and-agents.test.js +0 -144
  850. package/dist/esm/tests/unit/public-api/llm-and-agents.test.js.map +0 -1
  851. package/dist/esm/tests/unit/public-api/public-error-types.test.d.ts +0 -62
  852. package/dist/esm/tests/unit/public-api/public-error-types.test.js +0 -74
  853. package/dist/esm/tests/unit/public-api/public-error-types.test.js.map +0 -1
  854. package/dist/esm/tests/unit/public-api/public-types.test.d.ts +0 -1
  855. package/dist/esm/tests/unit/public-api/public-types.test.js +0 -97
  856. package/dist/esm/tests/unit/public-api/public-types.test.js.map +0 -1
  857. package/dist/esm/tests/unit/public-api/runtime-utils.test.d.ts +0 -1
  858. package/dist/esm/tests/unit/public-api/runtime-utils.test.js +0 -25
  859. package/dist/esm/tests/unit/public-api/runtime-utils.test.js.map +0 -1
  860. package/dist/esm/tests/unit/public-api/schema-utils.test.d.ts +0 -1
  861. package/dist/esm/tests/unit/public-api/schema-utils.test.js +0 -60
  862. package/dist/esm/tests/unit/public-api/schema-utils.test.js.map +0 -1
  863. package/dist/esm/tests/unit/public-api/timeout-error-types.test.d.ts +0 -1
  864. package/dist/esm/tests/unit/public-api/timeout-error-types.test.js +0 -86
  865. package/dist/esm/tests/unit/public-api/timeout-error-types.test.js.map +0 -1
  866. package/dist/esm/tests/unit/public-api/tool-type-export.test.d.ts +0 -1
  867. package/dist/esm/tests/unit/public-api/tool-type-export.test.js +0 -30
  868. package/dist/esm/tests/unit/public-api/tool-type-export.test.js.map +0 -1
  869. package/dist/esm/tests/unit/public-api/v3-core.test.d.ts +0 -1
  870. package/dist/esm/tests/unit/public-api/v3-core.test.js +0 -73
  871. package/dist/esm/tests/unit/public-api/v3-core.test.js.map +0 -1
  872. package/dist/esm/tests/unit/safety-confirmation.test.d.ts +0 -1
  873. package/dist/esm/tests/unit/safety-confirmation.test.js +0 -108
  874. package/dist/esm/tests/unit/safety-confirmation.test.js.map +0 -1
  875. package/dist/esm/tests/unit/snapshot-a11y-resolvers.test.d.ts +0 -1
  876. package/dist/esm/tests/unit/snapshot-a11y-resolvers.test.js +0 -396
  877. package/dist/esm/tests/unit/snapshot-a11y-resolvers.test.js.map +0 -1
  878. package/dist/esm/tests/unit/snapshot-a11y-tree-utils.test.d.ts +0 -1
  879. package/dist/esm/tests/unit/snapshot-a11y-tree-utils.test.js +0 -304
  880. package/dist/esm/tests/unit/snapshot-a11y-tree-utils.test.js.map +0 -1
  881. package/dist/esm/tests/unit/snapshot-capture-orchestration.test.d.ts +0 -1
  882. package/dist/esm/tests/unit/snapshot-capture-orchestration.test.js +0 -469
  883. package/dist/esm/tests/unit/snapshot-capture-orchestration.test.js.map +0 -1
  884. package/dist/esm/tests/unit/snapshot-cbor.test.d.ts +0 -1
  885. package/dist/esm/tests/unit/snapshot-cbor.test.js +0 -202
  886. package/dist/esm/tests/unit/snapshot-cbor.test.js.map +0 -1
  887. package/dist/esm/tests/unit/snapshot-dom-session-builders.test.d.ts +0 -1
  888. package/dist/esm/tests/unit/snapshot-dom-session-builders.test.js +0 -218
  889. package/dist/esm/tests/unit/snapshot-dom-session-builders.test.js.map +0 -1
  890. package/dist/esm/tests/unit/snapshot-dom-tree-utils.test.d.ts +0 -1
  891. package/dist/esm/tests/unit/snapshot-dom-tree-utils.test.js +0 -105
  892. package/dist/esm/tests/unit/snapshot-dom-tree-utils.test.js.map +0 -1
  893. package/dist/esm/tests/unit/snapshot-focus-selectors-utils.test.d.ts +0 -1
  894. package/dist/esm/tests/unit/snapshot-focus-selectors-utils.test.js +0 -87
  895. package/dist/esm/tests/unit/snapshot-focus-selectors-utils.test.js.map +0 -1
  896. package/dist/esm/tests/unit/snapshot-frame-merge.test.d.ts +0 -1
  897. package/dist/esm/tests/unit/snapshot-frame-merge.test.js +0 -333
  898. package/dist/esm/tests/unit/snapshot-frame-merge.test.js.map +0 -1
  899. package/dist/esm/tests/unit/snapshot-tree-format-utils.test.d.ts +0 -1
  900. package/dist/esm/tests/unit/snapshot-tree-format-utils.test.js +0 -82
  901. package/dist/esm/tests/unit/snapshot-tree-format-utils.test.js.map +0 -1
  902. package/dist/esm/tests/unit/snapshot-xpath-utils.test.d.ts +0 -1
  903. package/dist/esm/tests/unit/snapshot-xpath-utils.test.js +0 -72
  904. package/dist/esm/tests/unit/snapshot-xpath-utils.test.js.map +0 -1
  905. package/dist/esm/tests/unit/timeout-handlers.test.d.ts +0 -1
  906. package/dist/esm/tests/unit/timeout-handlers.test.js +0 -862
  907. package/dist/esm/tests/unit/timeout-handlers.test.js.map +0 -1
  908. package/dist/esm/tests/unit/understudy-command-exception.test.d.ts +0 -1
  909. package/dist/esm/tests/unit/understudy-command-exception.test.js +0 -55
  910. package/dist/esm/tests/unit/understudy-command-exception.test.js.map +0 -1
  911. package/dist/esm/tests/unit/xpath-parser.test.d.ts +0 -1
  912. package/dist/esm/tests/unit/xpath-parser.test.js +0 -309
  913. package/dist/esm/tests/unit/xpath-parser.test.js.map +0 -1
  914. package/dist/esm/tests/unit/xpath-resolver.test.d.ts +0 -1
  915. package/dist/esm/tests/unit/xpath-resolver.test.js +0 -78
  916. package/dist/esm/tests/unit/xpath-resolver.test.js.map +0 -1
  917. package/dist/esm/tests/unit/zod-enum-compatibility.test.d.ts +0 -1
  918. package/dist/esm/tests/unit/zod-enum-compatibility.test.js +0 -114
  919. package/dist/esm/tests/unit/zod-enum-compatibility.test.js.map +0 -1
@@ -1,210 +1,200 @@
1
- /**
2
- * V3Evaluator mirrors Evaluator but operates on a V3 instance instead of Stagehand.
3
- * It uses the V3 page/screenshot APIs and constructs an LLM client to run
4
- * structured evaluations (YES/NO with reasoning) on screenshots and/or text.
5
- */
6
- import { z } from "zod";
7
- import { LLMProvider } from "./v3/llm/LLMProvider.js";
8
1
  import { StagehandInvalidArgumentError } from "./v3/types/public/sdkErrors.js";
9
- const EvaluationSchema = z.object({
10
- evaluation: z.enum(["YES", "NO"]),
11
- reasoning: z.string(),
12
- });
13
- const BatchEvaluationSchema = z.array(EvaluationSchema);
2
+ import { LegacyV3Evaluator } from "./v3LegacyEvaluator.js";
3
+ const EVALUATOR_BACKEND_ENV = "STAGEHAND_EVALUATOR_BACKEND";
4
+ const DEFAULT_EVALUATOR_BACKEND = "legacy";
14
5
  export class V3Evaluator {
15
- v3;
16
- modelName;
17
- modelClientOptions;
18
- silentLogger = () => { };
19
- constructor(v3, modelName, modelClientOptions) {
20
- this.v3 = v3;
21
- this.modelName = modelName || "google/gemini-2.5-flash";
22
- this.modelClientOptions = modelClientOptions || {
23
- apiKey: process.env.GEMINI_API_KEY ||
24
- process.env.GOOGLE_GENERATIVE_AI_API_KEY ||
25
- "",
26
- };
27
- }
28
- getClient() {
29
- // Prefer a dedicated provider so we can override model per-evaluation
30
- const provider = new LLMProvider(this.v3.logger);
31
- return provider.getClient(this.modelName, this.modelClientOptions);
6
+ backend;
7
+ legacyEvaluator;
8
+ constructor(v3, modelNameOrOptions, modelClientOptions, options) {
9
+ const normalizedOptions = normalizeConstructorOptions(modelNameOrOptions, modelClientOptions, options);
10
+ this.backend = resolveEvaluatorBackend(normalizedOptions.backend);
11
+ this.legacyEvaluator = new LegacyV3Evaluator(v3, normalizedOptions.modelName, normalizedOptions.modelClientOptions);
32
12
  }
33
13
  async ask(options) {
34
- const { question, answer, screenshot = true, systemPrompt, screenshotDelayMs = 250, agentReasoning, } = options;
35
- if (!question)
36
- throw new StagehandInvalidArgumentError("Question cannot be an empty string");
37
- if (!answer && !screenshot)
38
- throw new StagehandInvalidArgumentError("Either answer (text) or screenshot must be provided");
39
- if (Array.isArray(screenshot)) {
40
- return this._evaluateWithMultipleScreenshots({
41
- question,
42
- screenshots: screenshot,
43
- systemPrompt,
44
- agentReasoning,
45
- });
14
+ return this.getLegacyBackend("ask").ask(options);
15
+ }
16
+ async batchAsk(options) {
17
+ return this.getLegacyBackend("batchAsk").batchAsk(options);
18
+ }
19
+ async verify(trajectory) {
20
+ const taskSpec = assertVerifierInput(trajectory);
21
+ if (this.backend === "legacy") {
22
+ return this.verifyTrajectoryWithLegacyEvaluator(trajectory, taskSpec);
46
23
  }
47
- const defaultSystemPrompt = `You are an expert evaluator that confidently returns YES or NO based on if the original goal was achieved. You have access to ${screenshot ? "a screenshot" : "the agents reasoning and actions throughout the task"} that you can use to evaluate the tasks completion. Provide detailed reasoning for your answer.\n Today's date is ${new Date().toLocaleDateString()}`;
48
- await new Promise((r) => setTimeout(r, screenshotDelayMs));
49
- let imageBuffer;
50
- if (screenshot) {
51
- const page = await this.v3.context.awaitActivePage();
52
- imageBuffer = await page.screenshot({ fullPage: false });
24
+ return this.unavailableVerifierBackend("verify");
25
+ }
26
+ async generateRubric(taskSpec) {
27
+ if (!taskSpec?.id) {
28
+ throw new StagehandInvalidArgumentError("TaskSpec.id is required for rubric generation");
53
29
  }
54
- const llmClient = this.getClient();
55
- const response = await llmClient.createChatCompletion({
56
- logger: this.silentLogger,
57
- options: {
58
- messages: [
59
- { role: "system", content: systemPrompt || defaultSystemPrompt },
60
- {
61
- role: "user",
62
- content: [
63
- {
64
- type: "text",
65
- text: agentReasoning
66
- ? `Question: ${question}\n\nAgent's reasoning and actions taken:\n${agentReasoning}`
67
- : question,
68
- },
69
- ...(screenshot && imageBuffer
70
- ? [
71
- {
72
- type: "image_url",
73
- image_url: {
74
- url: `data:image/jpeg;base64,${imageBuffer.toString("base64")}`,
75
- },
76
- },
77
- ]
78
- : []),
79
- ...(answer
80
- ? [{ type: "text", text: `the answer is ${answer}` }]
81
- : []),
82
- ],
83
- },
84
- ],
85
- response_model: { name: "EvaluationResult", schema: EvaluationSchema },
86
- },
87
- });
88
- try {
89
- const result = response.data;
90
- return { evaluation: result.evaluation, reasoning: result.reasoning };
30
+ if (this.backend === "verifier") {
31
+ return this.unavailableVerifierBackend("generateRubric");
91
32
  }
92
- catch (error) {
93
- const errorMessage = error instanceof Error ? error.message : String(error);
94
- return {
95
- evaluation: "INVALID",
96
- reasoning: `Failed to get structured response: ${errorMessage}`,
97
- };
33
+ return {
34
+ items: [legacyTaskCompletionCriterion(taskSpec)],
35
+ };
36
+ }
37
+ getLegacyBackend(methodName) {
38
+ if (this.backend === "legacy") {
39
+ return this.legacyEvaluator;
98
40
  }
41
+ return this.unavailableVerifierBackend(methodName);
99
42
  }
100
- async batchAsk(options) {
101
- const { questions, screenshot = true, systemPrompt = "You are an expert evaluator that returns YES or NO with a concise reasoning.", screenshotDelayMs = 250, } = options;
102
- if (!questions?.length)
103
- throw new StagehandInvalidArgumentError("Questions array cannot be empty");
104
- await new Promise((r) => setTimeout(r, screenshotDelayMs));
105
- let imageBuffer;
106
- if (screenshot) {
107
- const page = await this.v3.context.awaitActivePage();
108
- imageBuffer = await page.screenshot({ fullPage: false });
43
+ unavailableVerifierBackend(methodName) {
44
+ throw new StagehandInvalidArgumentError(`V3Evaluator.${methodName}() was configured with ${EVALUATOR_BACKEND_ENV}=verifier, but the verifier backend is not available in this build. Use "legacy" or install the verifier backend PR.`);
45
+ }
46
+ async verifyTrajectoryWithLegacyEvaluator(trajectory, taskSpec) {
47
+ const screenshots = collectLegacyScreenshots(trajectory);
48
+ const agentReasoning = renderLegacyAgentReasoning(trajectory);
49
+ const answer = trajectory.finalAnswer;
50
+ if (!screenshots.length && !answer) {
51
+ return legacyInsufficientEvidenceResult("Legacy evaluator compatibility mode had no screenshots or final answer to evaluate.");
109
52
  }
110
- const llmClient = this.getClient();
111
- const formatted = questions
112
- .map((item, i) => `${i + 1}. ${item.question}${item.answer ? `\n Answer: ${item.answer}` : ""}`)
113
- .join("\n\n");
114
- const response = await llmClient.createChatCompletion({
115
- logger: this.silentLogger,
116
- options: {
117
- messages: [
118
- {
119
- role: "system",
120
- content: `${systemPrompt}\n\nYou will be given multiple questions${screenshot ? " with a screenshot" : ""}. ${questions.some((q) => q.answer) ? "Some questions include answers to evaluate." : ""} Answer each question by returning an object in the specified JSON format. Return a single JSON array containing one object for each question in the order they were asked.`,
121
- },
122
- {
123
- role: "user",
124
- content: [
125
- { type: "text", text: formatted },
126
- ...(screenshot && imageBuffer
127
- ? [
128
- {
129
- type: "image_url",
130
- image_url: {
131
- url: `data:image/jpeg;base64,${imageBuffer.toString("base64")}`,
132
- },
133
- },
134
- ]
135
- : []),
136
- ],
137
- },
138
- ],
139
- response_model: {
140
- name: "BatchEvaluationResult",
141
- schema: BatchEvaluationSchema,
142
- },
143
- },
53
+ const result = await this.legacyEvaluator.ask({
54
+ question: taskSpec.instruction,
55
+ screenshot: screenshots.length ? screenshots : false,
56
+ answer,
57
+ agentReasoning,
144
58
  });
145
- try {
146
- const results = response.data;
147
- return results.map((r) => ({
148
- evaluation: r.evaluation,
149
- reasoning: r.reasoning,
150
- }));
151
- }
152
- catch (error) {
153
- const errorMessage = error instanceof Error ? error.message : String(error);
154
- return questions.map(() => ({
155
- evaluation: "INVALID",
156
- reasoning: `Failed to get structured response: ${errorMessage}`,
157
- }));
158
- }
59
+ return legacyEvaluationToResult(result, screenshots.length);
159
60
  }
160
- async _evaluateWithMultipleScreenshots(options) {
161
- const { question, screenshots, agentReasoning, systemPrompt = `You are an expert evaluator that confidently returns YES or NO given a question and multiple screenshots showing the progression of a task.
162
- ${agentReasoning ? "You also have access to the agent's detailed reasoning and thought process throughout the task." : ""}
163
- Analyze ALL screenshots to understand the complete journey. Look for evidence of task completion across all screenshots, not just the last one.
164
- Success criteria may appear at different points in the sequence (confirmation messages, intermediate states, etc).
165
- ${agentReasoning ? "The agent's reasoning provides crucial context about what actions were attempted, what was observed, and the decision-making process. Use this alongside the visual evidence to make a comprehensive evaluation." : ""}
166
- Today's date is ${new Date().toLocaleDateString()}`, } = options;
167
- if (!question)
168
- throw new StagehandInvalidArgumentError("Question cannot be an empty string");
169
- if (!screenshots || screenshots.length === 0)
170
- throw new StagehandInvalidArgumentError("At least one screenshot must be provided");
171
- const llmClient = this.getClient();
172
- const imageContents = screenshots.map((s) => ({
173
- type: "image_url",
174
- image_url: { url: `data:image/jpeg;base64,${s.toString("base64")}` },
175
- }));
176
- const response = await llmClient.createChatCompletion({
177
- logger: this.silentLogger,
178
- options: {
179
- messages: [
180
- { role: "system", content: systemPrompt },
181
- {
182
- role: "user",
183
- content: [
184
- {
185
- type: "text",
186
- text: agentReasoning
187
- ? `Question: ${question}\n\nAgent's reasoning and actions throughout the task:\n${agentReasoning}\n\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze both the agent's reasoning and all screenshots to determine if the task was completed successfully.`
188
- : `${question}\n\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze all of them to determine if the task was completed successfully.`,
189
- },
190
- ...imageContents,
191
- ],
192
- },
193
- ],
194
- response_model: { name: "EvaluationResult", schema: EvaluationSchema },
195
- },
196
- });
197
- try {
198
- const result = response.data;
199
- return { evaluation: result.evaluation, reasoning: result.reasoning };
61
+ }
62
+ function normalizeConstructorOptions(modelNameOrOptions, modelClientOptions, options) {
63
+ if (modelNameOrOptions &&
64
+ typeof modelNameOrOptions === "object" &&
65
+ !Array.isArray(modelNameOrOptions)) {
66
+ return {
67
+ modelName: modelNameOrOptions.modelName,
68
+ modelClientOptions: modelNameOrOptions.modelClientOptions,
69
+ backend: modelNameOrOptions.backend ?? options?.backend,
70
+ };
71
+ }
72
+ return {
73
+ modelName: modelNameOrOptions,
74
+ modelClientOptions,
75
+ backend: options?.backend,
76
+ };
77
+ }
78
+ function resolveEvaluatorBackend(explicitBackend) {
79
+ const configuredBackend = explicitBackend ??
80
+ process.env[EVALUATOR_BACKEND_ENV] ??
81
+ DEFAULT_EVALUATOR_BACKEND;
82
+ const normalizedBackend = configuredBackend.trim().toLowerCase();
83
+ if (normalizedBackend === "legacy" || normalizedBackend === "verifier") {
84
+ return normalizedBackend;
85
+ }
86
+ throw new StagehandInvalidArgumentError(`Invalid ${EVALUATOR_BACKEND_ENV}="${configuredBackend}". Expected "legacy" or "verifier".`);
87
+ }
88
+ function assertVerifierInput(trajectory) {
89
+ if (!trajectory) {
90
+ throw new StagehandInvalidArgumentError("Trajectory is required for verification");
91
+ }
92
+ if (!trajectory.task?.id) {
93
+ throw new StagehandInvalidArgumentError("Trajectory.task.id is required for verification");
94
+ }
95
+ return trajectory.task;
96
+ }
97
+ function legacyTaskCompletionCriterion(taskSpec) {
98
+ return {
99
+ criterion: "legacy-task-completion",
100
+ description: `Evaluate whether the task was completed successfully: ${taskSpec.instruction}`,
101
+ maxPoints: 1,
102
+ };
103
+ }
104
+ function collectLegacyScreenshots(trajectory) {
105
+ const screenshots = [];
106
+ for (const step of trajectory.steps ?? []) {
107
+ if (Buffer.isBuffer(step.probeEvidence?.screenshot)) {
108
+ screenshots.push(step.probeEvidence.screenshot);
109
+ continue;
200
110
  }
201
- catch (error) {
202
- const errorMessage = error instanceof Error ? error.message : String(error);
203
- return {
204
- evaluation: "INVALID",
205
- reasoning: `Failed to get structured response: ${errorMessage}`,
206
- };
111
+ const agentImage = step.agentEvidence?.modalities?.find((modality) => modality.type === "image" && Buffer.isBuffer(modality.bytes));
112
+ if (agentImage) {
113
+ screenshots.push(agentImage.bytes);
207
114
  }
208
115
  }
116
+ if (Buffer.isBuffer(trajectory.finalObservation?.screenshot)) {
117
+ screenshots.push(trajectory.finalObservation.screenshot);
118
+ }
119
+ return screenshots;
120
+ }
121
+ function renderLegacyAgentReasoning(trajectory) {
122
+ const stepLines = (trajectory.steps ?? []).map((step, i) => {
123
+ const status = step.toolOutput?.ok === false ? "Tool status: failed" : "";
124
+ const output = step.toolOutput?.error
125
+ ? `Tool error: ${step.toolOutput.error}`
126
+ : `Tool output: ${stringifyForPrompt(step.toolOutput?.result)}`;
127
+ return [
128
+ `Step ${i}: ${step.actionName}`,
129
+ step.reasoning ? `Reasoning: ${step.reasoning}` : undefined,
130
+ status || undefined,
131
+ output,
132
+ ]
133
+ .filter(Boolean)
134
+ .join("\n");
135
+ });
136
+ if (!stepLines.length) {
137
+ return undefined;
138
+ }
139
+ return truncateForPrompt(`Agent trajectory:\n${stepLines.join("\n\n")}`, 16000);
140
+ }
141
+ function stringifyForPrompt(value) {
142
+ if (typeof value === "string") {
143
+ return value;
144
+ }
145
+ try {
146
+ const serialized = JSON.stringify(value);
147
+ return serialized ?? String(value);
148
+ }
149
+ catch {
150
+ return String(value);
151
+ }
152
+ }
153
+ function truncateForPrompt(value, maxLength) {
154
+ if (value.length <= maxLength) {
155
+ return value;
156
+ }
157
+ return `${value.slice(0, maxLength)}... [truncated]`;
158
+ }
159
+ function legacyEvaluationToResult(result, screenshotCount) {
160
+ const outcomeSuccess = result.evaluation === "YES";
161
+ const invalid = result.evaluation === "INVALID";
162
+ const findings = invalid
163
+ ? [
164
+ {
165
+ category: "verifier_uncertainty",
166
+ severity: "warning",
167
+ description: result.reasoning,
168
+ },
169
+ ]
170
+ : [];
171
+ return {
172
+ outcomeSuccess,
173
+ explanation: result.reasoning,
174
+ ...(findings.length ? { findings } : {}),
175
+ rawSteps: {
176
+ backend: "legacy",
177
+ legacyEvaluation: result.evaluation,
178
+ screenshotCount,
179
+ },
180
+ };
181
+ }
182
+ function legacyInsufficientEvidenceResult(reason) {
183
+ return {
184
+ outcomeSuccess: false,
185
+ explanation: reason,
186
+ findings: [
187
+ {
188
+ category: "trajectory_capture",
189
+ severity: "blocking",
190
+ description: reason,
191
+ },
192
+ ],
193
+ rawSteps: {
194
+ backend: "legacy",
195
+ legacyEvaluation: "INVALID",
196
+ screenshotCount: 0,
197
+ },
198
+ };
209
199
  }
210
200
  //# sourceMappingURL=v3Evaluator.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"v3Evaluator.js","sourceRoot":"","sources":["../../../lib/v3Evaluator.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAWxB,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,6BAA6B,EAAE,MAAM,gCAAgC,CAAC;AAE/E,MAAM,gBAAgB,GAAG,CAAC,CAAC,MAAM,CAAC;IAChC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACjC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE;CACtB,CAAC,CAAC;AAEH,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;AAExD,MAAM,OAAO,WAAW;IACd,EAAE,CAAK;IACP,SAAS,CAAiB;IAC1B,kBAAkB,CAAqC;IACvD,YAAY,GAA+B,GAAG,EAAE,GAAE,CAAC,CAAC;IAE5D,YACE,EAAM,EACN,SAA0B,EAC1B,kBAAkC;QAElC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,SAAS,GAAG,SAAS,IAAK,yBAA4C,CAAC;QAC5E,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,IAAI;YAC9C,MAAM,EACJ,OAAO,CAAC,GAAG,CAAC,cAAc;gBAC1B,OAAO,CAAC,GAAG,CAAC,4BAA4B;gBACxC,EAAE;SACL,CAAC;IACJ,CAAC;IAEO,SAAS;QACf,sEAAsE;QACtE,MAAM,QAAQ,GAAG,IAAI,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;QACjD,OAAO,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACrE,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,OAAwB;QAChC,MAAM,EACJ,QAAQ,EACR,MAAM,EACN,UAAU,GAAG,IAAI,EACjB,YAAY,EACZ,iBAAiB,GAAG,GAAG,EACvB,cAAc,GACf,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,6BAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU;YACxB,MAAM,IAAI,6BAA6B,CACrC,qDAAqD,CACtD,CAAC;QAEJ,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,gCAAgC,CAAC;gBAC3C,QAAQ;gBACR,WAAW,EAAE,UAAU;gBACvB,YAAY;gBACZ,cAAc;aACf,CAAC,CAAC;QACL,CAAC;QAED,MAAM,mBAAmB,GAAG,kIAAkI,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,sDAAsD,8HAA8H,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,CAAC;QAElZ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,IAAI,mBAAmB,EAAE;oBAChE;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,6CAA6C,cAAc,EAAE;oCACpF,CAAC,CAAC,QAAQ;6BACb;4BACD,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;4BACP,GAAG,CAAC,MAAM;gCACR,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,iBAAiB,MAAM,EAAE,EAAE,CAAC;gCAC9D,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,MAAM,EACJ,SAAS,EACT,UAAU,GAAG,IAAI,EACjB,YAAY,GAAG,8EAA8E,EAC7F,iBAAiB,GAAG,GAAG,GACxB,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,SAAS,EAAE,MAAM;YACpB,MAAM,IAAI,6BAA6B,CACrC,iCAAiC,CAClC,CAAC;QAEJ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,SAAS,GAAG,SAAS;aACxB,GAAG,CACF,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CACV,GAAG,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,gBAAgB,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAClF;aACA,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,QAAQ;wBACd,OAAO,EAAE,GAAG,YAAY,2CAA2C,UAAU,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,6CAA6C,CAAC,CAAC,CAAC,EAAE,6KAA6K;qBAChX;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE;4BACjC,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE;oBACd,IAAI,EAAE,uBAAuB;oBAC7B,MAAM,EAAE,qBAAqB;iBAC9B;aACF;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,IAExB,CAAC;YACF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACzB,UAAU,EAAE,CAAC,CAAC,UAAU;gBACxB,SAAS,EAAE,CAAC,CAAC,SAAS;aACvB,CAAC,CAAC,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC;gBAC1B,UAAU,EAAE,SAAkB;gBAC9B,SAAS,EAAE,sCAAsC,YAAY,EAAE;aAChE,CAAC,CAAC,CAAC;QACN,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,gCAAgC,CAAC,OAK9C;QACC,MAAM,EACJ,QAAQ,EACR,WAAW,EACX,cAAc,EACd,YAAY,GAAG;UACX,cAAc,CAAC,CAAC,CAAC,iGAAiG,CAAC,CAAC,CAAC,EAAE;;;UAGvH,cAAc,CAAC,CAAC,CAAC,kNAAkN,CAAC,CAAC,CAAC,EAAE;0BACxN,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,GACtD,GAAG,OAAO,CAAC;QAEZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,6BAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAC1C,MAAM,IAAI,6BAA6B,CACrC,0CAA0C,CAC3C,CAAC;QAEJ,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC5C,IAAI,EAAE,WAAoB;YAC1B,SAAS,EAAE,EAAE,GAAG,EAAE,0BAA0B,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,EAAE;SACrE,CAAC,CAAC,CAAC;QAEJ,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;oBACzC;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,2DAA2D,cAAc,qBAAqB,WAAW,CAAC,MAAM,sKAAsK;oCAC7S,CAAC,CAAC,GAAG,QAAQ,qBAAqB,WAAW,CAAC,MAAM,mIAAmI;6BAC1L;4BACD,GAAG,aAAa;yBACjB;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * V3Evaluator mirrors Evaluator but operates on a V3 instance instead of Stagehand.\n * It uses the V3 page/screenshot APIs and constructs an LLM client to run\n * structured evaluations (YES/NO with reasoning) on screenshots and/or text.\n */\n\nimport { z } from \"zod\";\nimport type { AvailableModel, ClientOptions } from \"./v3/types/public/model.js\";\nimport type {\n EvaluateOptions,\n BatchAskOptions,\n EvaluationResult,\n} from \"./v3/types/private/evaluator.js\";\nimport { LLMParsedResponse } from \"./inference.js\";\nimport { LLMResponse, LLMClient } from \"./v3/llm/LLMClient.js\";\nimport { LogLine } from \"./v3/types/public/logs.js\";\nimport { V3 } from \"./v3/v3.js\";\nimport { LLMProvider } from \"./v3/llm/LLMProvider.js\";\nimport { StagehandInvalidArgumentError } from \"./v3/types/public/sdkErrors.js\";\n\nconst EvaluationSchema = z.object({\n evaluation: z.enum([\"YES\", \"NO\"]),\n reasoning: z.string(),\n});\n\nconst BatchEvaluationSchema = z.array(EvaluationSchema);\n\nexport class V3Evaluator {\n private v3: V3;\n private modelName: AvailableModel;\n private modelClientOptions: ClientOptions | { apiKey: string };\n private silentLogger: (message: LogLine) => void = () => {};\n\n constructor(\n v3: V3,\n modelName?: AvailableModel,\n modelClientOptions?: ClientOptions,\n ) {\n this.v3 = v3;\n this.modelName = modelName || (\"google/gemini-2.5-flash\" as AvailableModel);\n this.modelClientOptions = modelClientOptions || {\n apiKey:\n process.env.GEMINI_API_KEY ||\n process.env.GOOGLE_GENERATIVE_AI_API_KEY ||\n \"\",\n };\n }\n\n private getClient(): LLMClient {\n // Prefer a dedicated provider so we can override model per-evaluation\n const provider = new LLMProvider(this.v3.logger);\n return provider.getClient(this.modelName, this.modelClientOptions);\n }\n\n async ask(options: EvaluateOptions): Promise<EvaluationResult> {\n const {\n question,\n answer,\n screenshot = true,\n systemPrompt,\n screenshotDelayMs = 250,\n agentReasoning,\n } = options;\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!answer && !screenshot)\n throw new StagehandInvalidArgumentError(\n \"Either answer (text) or screenshot must be provided\",\n );\n\n if (Array.isArray(screenshot)) {\n return this._evaluateWithMultipleScreenshots({\n question,\n screenshots: screenshot,\n systemPrompt,\n agentReasoning,\n });\n }\n\n const defaultSystemPrompt = `You are an expert evaluator that confidently returns YES or NO based on if the original goal was achieved. You have access to ${screenshot ? \"a screenshot\" : \"the agents reasoning and actions throughout the task\"} that you can use to evaluate the tasks completion. Provide detailed reasoning for your answer.\\n Today's date is ${new Date().toLocaleDateString()}`;\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt || defaultSystemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions taken:\\n${agentReasoning}`\n : question,\n },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ...(answer\n ? [{ type: \"text\" as const, text: `the answer is ${answer}` }]\n : []),\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n\n async batchAsk(options: BatchAskOptions): Promise<EvaluationResult[]> {\n const {\n questions,\n screenshot = true,\n systemPrompt = \"You are an expert evaluator that returns YES or NO with a concise reasoning.\",\n screenshotDelayMs = 250,\n } = options;\n if (!questions?.length)\n throw new StagehandInvalidArgumentError(\n \"Questions array cannot be empty\",\n );\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const formatted = questions\n .map(\n (item, i) =>\n `${i + 1}. ${item.question}${item.answer ? `\\n Answer: ${item.answer}` : \"\"}`,\n )\n .join(\"\\n\\n\");\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n {\n role: \"system\",\n content: `${systemPrompt}\\n\\nYou will be given multiple questions${screenshot ? \" with a screenshot\" : \"\"}. ${questions.some((q) => q.answer) ? \"Some questions include answers to evaluate.\" : \"\"} Answer each question by returning an object in the specified JSON format. Return a single JSON array containing one object for each question in the order they were asked.`,\n },\n {\n role: \"user\",\n content: [\n { type: \"text\", text: formatted },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ],\n },\n ],\n response_model: {\n name: \"BatchEvaluationResult\",\n schema: BatchEvaluationSchema,\n },\n },\n });\n\n try {\n const results = response.data as unknown as z.infer<\n typeof BatchEvaluationSchema\n >;\n return results.map((r) => ({\n evaluation: r.evaluation,\n reasoning: r.reasoning,\n }));\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return questions.map(() => ({\n evaluation: \"INVALID\" as const,\n reasoning: `Failed to get structured response: ${errorMessage}`,\n }));\n }\n }\n\n private async _evaluateWithMultipleScreenshots(options: {\n question: string;\n screenshots: Buffer[];\n systemPrompt?: string;\n agentReasoning?: string;\n }): Promise<EvaluationResult> {\n const {\n question,\n screenshots,\n agentReasoning,\n systemPrompt = `You are an expert evaluator that confidently returns YES or NO given a question and multiple screenshots showing the progression of a task.\n ${agentReasoning ? \"You also have access to the agent's detailed reasoning and thought process throughout the task.\" : \"\"}\n Analyze ALL screenshots to understand the complete journey. Look for evidence of task completion across all screenshots, not just the last one.\n Success criteria may appear at different points in the sequence (confirmation messages, intermediate states, etc).\n ${agentReasoning ? \"The agent's reasoning provides crucial context about what actions were attempted, what was observed, and the decision-making process. Use this alongside the visual evidence to make a comprehensive evaluation.\" : \"\"}\n Today's date is ${new Date().toLocaleDateString()}`,\n } = options;\n\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!screenshots || screenshots.length === 0)\n throw new StagehandInvalidArgumentError(\n \"At least one screenshot must be provided\",\n );\n\n const llmClient = this.getClient();\n\n const imageContents = screenshots.map((s) => ({\n type: \"image_url\" as const,\n image_url: { url: `data:image/jpeg;base64,${s.toString(\"base64\")}` },\n }));\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions throughout the task:\\n${agentReasoning}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze both the agent's reasoning and all screenshots to determine if the task was completed successfully.`\n : `${question}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze all of them to determine if the task was completed successfully.`,\n },\n ...imageContents,\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n}\n"]}
1
+ {"version":3,"file":"v3Evaluator.js","sourceRoot":"","sources":["../../../lib/v3Evaluator.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,6BAA6B,EAAE,MAAM,gCAAgC,CAAC;AAC/E,OAAO,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC;AAW3D,MAAM,qBAAqB,GAAG,6BAA6B,CAAC;AAC5D,MAAM,yBAAyB,GAAuB,QAAQ,CAAC;AA2B/D,MAAM,OAAO,WAAW;IACL,OAAO,CAAqB;IAC5B,eAAe,CAAoB;IAEpD,YACE,EAAM,EACN,kBAAmE,EACnE,kBAAkC,EAClC,OAA4B;QAE5B,MAAM,iBAAiB,GAAG,2BAA2B,CACnD,kBAAkB,EAClB,kBAAkB,EAClB,OAAO,CACR,CAAC;QAEF,IAAI,CAAC,OAAO,GAAG,uBAAuB,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAClE,IAAI,CAAC,eAAe,GAAG,IAAI,iBAAiB,CAC1C,EAAE,EACF,iBAAiB,CAAC,SAAS,EAC3B,iBAAiB,CAAC,kBAAkB,CACrC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,OAAwB;QAChC,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACnD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,OAAO,IAAI,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAC7D,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,UAAsB;QACjC,MAAM,QAAQ,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;QAEjD,IAAI,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,mCAAmC,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QACxE,CAAC;QAED,OAAO,IAAI,CAAC,0BAA0B,CAAC,QAAQ,CAAC,CAAC;IACnD,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,QAAkB;QACrC,IAAI,CAAC,QAAQ,EAAE,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,6BAA6B,CACrC,+CAA+C,CAChD,CAAC;QACJ,CAAC;QAED,IAAI,IAAI,CAAC,OAAO,KAAK,UAAU,EAAE,CAAC;YAChC,OAAO,IAAI,CAAC,0BAA0B,CAAC,gBAAgB,CAAC,CAAC;QAC3D,CAAC;QAED,OAAO;YACL,KAAK,EAAE,CAAC,6BAA6B,CAAC,QAAQ,CAAC,CAAC;SACjD,CAAC;IACJ,CAAC;IAEO,gBAAgB,CAAC,UAAkB;QACzC,IAAI,IAAI,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,eAAe,CAAC;QAC9B,CAAC;QAED,OAAO,IAAI,CAAC,0BAA0B,CAAC,UAAU,CAAC,CAAC;IACrD,CAAC;IAEO,0BAA0B,CAAC,UAAkB;QACnD,MAAM,IAAI,6BAA6B,CACrC,eAAe,UAAU,0BAA0B,qBAAqB,sHAAsH,CAC/L,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,mCAAmC,CAC/C,UAAsB,EACtB,QAAkB;QAElB,MAAM,WAAW,GAAG,wBAAwB,CAAC,UAAU,CAAC,CAAC;QACzD,MAAM,cAAc,GAAG,0BAA0B,CAAC,UAAU,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,WAAW,CAAC;QAEtC,IAAI,CAAC,WAAW,CAAC,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC;YACnC,OAAO,gCAAgC,CACrC,qFAAqF,CACtF,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC;YAC5C,QAAQ,EAAE,QAAQ,CAAC,WAAW;YAC9B,UAAU,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK;YACpD,MAAM;YACN,cAAc;SACf,CAAC,CAAC;QAEH,OAAO,wBAAwB,CAAC,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC;IAC9D,CAAC;CACF;AAED,SAAS,2BAA2B,CAClC,kBAAmE,EACnE,kBAAkC,EAClC,OAA4B;IAE5B,IACE,kBAAkB;QAClB,OAAO,kBAAkB,KAAK,QAAQ;QACtC,CAAC,KAAK,CAAC,OAAO,CAAC,kBAAkB,CAAC,EAClC,CAAC;QACD,OAAO;YACL,SAAS,EAAE,kBAAkB,CAAC,SAAS;YACvC,kBAAkB,EAAE,kBAAkB,CAAC,kBAAkB;YACzD,OAAO,EAAE,kBAAkB,CAAC,OAAO,IAAI,OAAO,EAAE,OAAO;SACxD,CAAC;IACJ,CAAC;IAED,OAAO;QACL,SAAS,EAAE,kBAAgD;QAC3D,kBAAkB;QAClB,OAAO,EAAE,OAAO,EAAE,OAAO;KAC1B,CAAC;AACJ,CAAC;AAED,SAAS,uBAAuB,CAC9B,eAAoC;IAEpC,MAAM,iBAAiB,GACrB,eAAe;QACf,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC;QAClC,yBAAyB,CAAC;IAC5B,MAAM,iBAAiB,GAAG,iBAAiB,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAEjE,IAAI,iBAAiB,KAAK,QAAQ,IAAI,iBAAiB,KAAK,UAAU,EAAE,CAAC;QACvE,OAAO,iBAAiB,CAAC;IAC3B,CAAC;IAED,MAAM,IAAI,6BAA6B,CACrC,WAAW,qBAAqB,KAAK,iBAAiB,qCAAqC,CAC5F,CAAC;AACJ,CAAC;AAED,SAAS,mBAAmB,CAAC,UAAsB;IACjD,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,IAAI,6BAA6B,CACrC,yCAAyC,CAC1C,CAAC;IACJ,CAAC;IACD,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC;QACzB,MAAM,IAAI,6BAA6B,CACrC,iDAAiD,CAClD,CAAC;IACJ,CAAC;IACD,OAAO,UAAU,CAAC,IAAI,CAAC;AACzB,CAAC;AAED,SAAS,6BAA6B,CAAC,QAAkB;IACvD,OAAO;QACL,SAAS,EAAE,wBAAwB;QACnC,WAAW,EAAE,yDAAyD,QAAQ,CAAC,WAAW,EAAE;QAC5F,SAAS,EAAE,CAAC;KACb,CAAC;AACJ,CAAC;AAED,SAAS,wBAAwB,CAAC,UAAsB;IACtD,MAAM,WAAW,GAAa,EAAE,CAAC;IAEjC,KAAK,MAAM,IAAI,IAAI,UAAU,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;QAC1C,IAAI,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,UAAU,CAAC,EAAE,CAAC;YACpD,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;YAChD,SAAS;QACX,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,EAAE,UAAU,EAAE,IAAI,CACrD,CACE,QAAQ,EACuD,EAAE,CACjE,QAAQ,CAAC,IAAI,KAAK,OAAO,IAAI,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,CAC/D,CAAC;QAEF,IAAI,UAAU,EAAE,CAAC;YACf,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACrC,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,gBAAgB,EAAE,UAAU,CAAC,EAAE,CAAC;QAC7D,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;IAC3D,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,SAAS,0BAA0B,CACjC,UAAsB;IAEtB,MAAM,SAAS,GAAG,CAAC,UAAU,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QACzD,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,EAAE,EAAE,KAAK,KAAK,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,EAAE,CAAC;QAC1E,MAAM,MAAM,GAAG,IAAI,CAAC,UAAU,EAAE,KAAK;YACnC,CAAC,CAAC,eAAe,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE;YACxC,CAAC,CAAC,gBAAgB,kBAAkB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC,EAAE,CAAC;QAClE,OAAO;YACL,QAAQ,CAAC,KAAK,IAAI,CAAC,UAAU,EAAE;YAC/B,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,cAAc,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS;YAC3D,MAAM,IAAI,SAAS;YACnB,MAAM;SACP;aACE,MAAM,CAAC,OAAO,CAAC;aACf,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;QACtB,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,OAAO,iBAAiB,CACtB,sBAAsB,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,EAC9C,KAAK,CACN,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAc;IACxC,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACzC,OAAO,UAAU,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC;IACrC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IACvB,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAa,EAAE,SAAiB;IACzD,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,iBAAiB,CAAC;AACvD,CAAC;AAED,SAAS,wBAAwB,CAC/B,MAA8B,EAC9B,eAAuB;IAEvB,MAAM,cAAc,GAAG,MAAM,CAAC,UAAU,KAAK,KAAK,CAAC;IACnD,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,KAAK,SAAS,CAAC;IAChD,MAAM,QAAQ,GAAsB,OAAO;QACzC,CAAC,CAAC;YACE;gBACE,QAAQ,EAAE,sBAAsB;gBAChC,QAAQ,EAAE,SAAS;gBACnB,WAAW,EAAE,MAAM,CAAC,SAAS;aAC9B;SACF;QACH,CAAC,CAAC,EAAE,CAAC;IAEP,OAAO;QACL,cAAc;QACd,WAAW,EAAE,MAAM,CAAC,SAAS;QAC7B,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACxC,QAAQ,EAAE;YACR,OAAO,EAAE,QAAQ;YACjB,gBAAgB,EAAE,MAAM,CAAC,UAAU;YACnC,eAAe;SAChB;KACF,CAAC;AACJ,CAAC;AAED,SAAS,gCAAgC,CAAC,MAAc;IACtD,OAAO;QACL,cAAc,EAAE,KAAK;QACrB,WAAW,EAAE,MAAM;QACnB,QAAQ,EAAE;YACR;gBACE,QAAQ,EAAE,oBAAoB;gBAC9B,QAAQ,EAAE,UAAU;gBACpB,WAAW,EAAE,MAAM;aACpB;SACF;QACD,QAAQ,EAAE;YACR,OAAO,EAAE,QAAQ;YACjB,gBAAgB,EAAE,SAAS;YAC3B,eAAe,EAAE,CAAC;SACnB;KACF,CAAC;AACJ,CAAC","sourcesContent":["import type { AvailableModel, ClientOptions } from \"./v3/types/public/model.js\";\nimport type {\n EvaluateOptions,\n BatchAskOptions,\n EvaluationResult as LegacyEvaluationResult,\n} from \"./v3/types/private/evaluator.js\";\nimport { V3 } from \"./v3/v3.js\";\nimport { StagehandInvalidArgumentError } from \"./v3/types/public/sdkErrors.js\";\nimport { LegacyV3Evaluator } from \"./v3LegacyEvaluator.js\";\nimport type {\n Trajectory,\n TaskSpec,\n EvaluationResult,\n Rubric,\n Verifier,\n AgentEvidenceModality,\n VerifierFinding,\n} from \"./v3/verifier/index.js\";\n\nconst EVALUATOR_BACKEND_ENV = \"STAGEHAND_EVALUATOR_BACKEND\";\nconst DEFAULT_EVALUATOR_BACKEND: V3EvaluatorBackend = \"legacy\";\n\nexport type V3EvaluatorBackend = \"legacy\" | \"verifier\";\n\nexport type V3EvaluatorOptions = {\n /**\n * Selects the evaluator implementation.\n *\n * \"legacy\" preserves the existing screenshot/text YES/NO evaluator.\n * \"verifier\" is reserved for the rubric verifier backend.\n *\n * @default process.env.STAGEHAND_EVALUATOR_BACKEND || \"legacy\"\n */\n backend?: V3EvaluatorBackend;\n};\n\nexport type V3EvaluatorConstructorOptions = V3EvaluatorOptions & {\n modelName?: AvailableModel;\n modelClientOptions?: ClientOptions;\n};\n\ntype NormalizedConstructorOptions = {\n modelName?: AvailableModel;\n modelClientOptions?: ClientOptions;\n backend?: V3EvaluatorBackend;\n};\n\nexport class V3Evaluator implements Verifier {\n private readonly backend: V3EvaluatorBackend;\n private readonly legacyEvaluator: LegacyV3Evaluator;\n\n constructor(\n v3: V3,\n modelNameOrOptions?: AvailableModel | V3EvaluatorConstructorOptions,\n modelClientOptions?: ClientOptions,\n options?: V3EvaluatorOptions,\n ) {\n const normalizedOptions = normalizeConstructorOptions(\n modelNameOrOptions,\n modelClientOptions,\n options,\n );\n\n this.backend = resolveEvaluatorBackend(normalizedOptions.backend);\n this.legacyEvaluator = new LegacyV3Evaluator(\n v3,\n normalizedOptions.modelName,\n normalizedOptions.modelClientOptions,\n );\n }\n\n async ask(options: EvaluateOptions): Promise<LegacyEvaluationResult> {\n return this.getLegacyBackend(\"ask\").ask(options);\n }\n\n async batchAsk(options: BatchAskOptions): Promise<LegacyEvaluationResult[]> {\n return this.getLegacyBackend(\"batchAsk\").batchAsk(options);\n }\n\n async verify(trajectory: Trajectory): Promise<EvaluationResult> {\n const taskSpec = assertVerifierInput(trajectory);\n\n if (this.backend === \"legacy\") {\n return this.verifyTrajectoryWithLegacyEvaluator(trajectory, taskSpec);\n }\n\n return this.unavailableVerifierBackend(\"verify\");\n }\n\n async generateRubric(taskSpec: TaskSpec): Promise<Rubric> {\n if (!taskSpec?.id) {\n throw new StagehandInvalidArgumentError(\n \"TaskSpec.id is required for rubric generation\",\n );\n }\n\n if (this.backend === \"verifier\") {\n return this.unavailableVerifierBackend(\"generateRubric\");\n }\n\n return {\n items: [legacyTaskCompletionCriterion(taskSpec)],\n };\n }\n\n private getLegacyBackend(methodName: string): LegacyV3Evaluator {\n if (this.backend === \"legacy\") {\n return this.legacyEvaluator;\n }\n\n return this.unavailableVerifierBackend(methodName);\n }\n\n private unavailableVerifierBackend(methodName: string): never {\n throw new StagehandInvalidArgumentError(\n `V3Evaluator.${methodName}() was configured with ${EVALUATOR_BACKEND_ENV}=verifier, but the verifier backend is not available in this build. Use \"legacy\" or install the verifier backend PR.`,\n );\n }\n\n private async verifyTrajectoryWithLegacyEvaluator(\n trajectory: Trajectory,\n taskSpec: TaskSpec,\n ): Promise<EvaluationResult> {\n const screenshots = collectLegacyScreenshots(trajectory);\n const agentReasoning = renderLegacyAgentReasoning(trajectory);\n const answer = trajectory.finalAnswer;\n\n if (!screenshots.length && !answer) {\n return legacyInsufficientEvidenceResult(\n \"Legacy evaluator compatibility mode had no screenshots or final answer to evaluate.\",\n );\n }\n\n const result = await this.legacyEvaluator.ask({\n question: taskSpec.instruction,\n screenshot: screenshots.length ? screenshots : false,\n answer,\n agentReasoning,\n });\n\n return legacyEvaluationToResult(result, screenshots.length);\n }\n}\n\nfunction normalizeConstructorOptions(\n modelNameOrOptions?: AvailableModel | V3EvaluatorConstructorOptions,\n modelClientOptions?: ClientOptions,\n options?: V3EvaluatorOptions,\n): NormalizedConstructorOptions {\n if (\n modelNameOrOptions &&\n typeof modelNameOrOptions === \"object\" &&\n !Array.isArray(modelNameOrOptions)\n ) {\n return {\n modelName: modelNameOrOptions.modelName,\n modelClientOptions: modelNameOrOptions.modelClientOptions,\n backend: modelNameOrOptions.backend ?? options?.backend,\n };\n }\n\n return {\n modelName: modelNameOrOptions as AvailableModel | undefined,\n modelClientOptions,\n backend: options?.backend,\n };\n}\n\nfunction resolveEvaluatorBackend(\n explicitBackend?: V3EvaluatorBackend,\n): V3EvaluatorBackend {\n const configuredBackend =\n explicitBackend ??\n process.env[EVALUATOR_BACKEND_ENV] ??\n DEFAULT_EVALUATOR_BACKEND;\n const normalizedBackend = configuredBackend.trim().toLowerCase();\n\n if (normalizedBackend === \"legacy\" || normalizedBackend === \"verifier\") {\n return normalizedBackend;\n }\n\n throw new StagehandInvalidArgumentError(\n `Invalid ${EVALUATOR_BACKEND_ENV}=\"${configuredBackend}\". Expected \"legacy\" or \"verifier\".`,\n );\n}\n\nfunction assertVerifierInput(trajectory: Trajectory): TaskSpec {\n if (!trajectory) {\n throw new StagehandInvalidArgumentError(\n \"Trajectory is required for verification\",\n );\n }\n if (!trajectory.task?.id) {\n throw new StagehandInvalidArgumentError(\n \"Trajectory.task.id is required for verification\",\n );\n }\n return trajectory.task;\n}\n\nfunction legacyTaskCompletionCriterion(taskSpec: TaskSpec) {\n return {\n criterion: \"legacy-task-completion\",\n description: `Evaluate whether the task was completed successfully: ${taskSpec.instruction}`,\n maxPoints: 1,\n };\n}\n\nfunction collectLegacyScreenshots(trajectory: Trajectory): Buffer[] {\n const screenshots: Buffer[] = [];\n\n for (const step of trajectory.steps ?? []) {\n if (Buffer.isBuffer(step.probeEvidence?.screenshot)) {\n screenshots.push(step.probeEvidence.screenshot);\n continue;\n }\n\n const agentImage = step.agentEvidence?.modalities?.find(\n (\n modality,\n ): modality is Extract<AgentEvidenceModality, { type: \"image\" }> =>\n modality.type === \"image\" && Buffer.isBuffer(modality.bytes),\n );\n\n if (agentImage) {\n screenshots.push(agentImage.bytes);\n }\n }\n\n if (Buffer.isBuffer(trajectory.finalObservation?.screenshot)) {\n screenshots.push(trajectory.finalObservation.screenshot);\n }\n\n return screenshots;\n}\n\nfunction renderLegacyAgentReasoning(\n trajectory: Trajectory,\n): string | undefined {\n const stepLines = (trajectory.steps ?? []).map((step, i) => {\n const status = step.toolOutput?.ok === false ? \"Tool status: failed\" : \"\";\n const output = step.toolOutput?.error\n ? `Tool error: ${step.toolOutput.error}`\n : `Tool output: ${stringifyForPrompt(step.toolOutput?.result)}`;\n return [\n `Step ${i}: ${step.actionName}`,\n step.reasoning ? `Reasoning: ${step.reasoning}` : undefined,\n status || undefined,\n output,\n ]\n .filter(Boolean)\n .join(\"\\n\");\n });\n\n if (!stepLines.length) {\n return undefined;\n }\n\n return truncateForPrompt(\n `Agent trajectory:\\n${stepLines.join(\"\\n\\n\")}`,\n 16000,\n );\n}\n\nfunction stringifyForPrompt(value: unknown): string {\n if (typeof value === \"string\") {\n return value;\n }\n\n try {\n const serialized = JSON.stringify(value);\n return serialized ?? String(value);\n } catch {\n return String(value);\n }\n}\n\nfunction truncateForPrompt(value: string, maxLength: number): string {\n if (value.length <= maxLength) {\n return value;\n }\n\n return `${value.slice(0, maxLength)}... [truncated]`;\n}\n\nfunction legacyEvaluationToResult(\n result: LegacyEvaluationResult,\n screenshotCount: number,\n): EvaluationResult {\n const outcomeSuccess = result.evaluation === \"YES\";\n const invalid = result.evaluation === \"INVALID\";\n const findings: VerifierFinding[] = invalid\n ? [\n {\n category: \"verifier_uncertainty\",\n severity: \"warning\",\n description: result.reasoning,\n },\n ]\n : [];\n\n return {\n outcomeSuccess,\n explanation: result.reasoning,\n ...(findings.length ? { findings } : {}),\n rawSteps: {\n backend: \"legacy\",\n legacyEvaluation: result.evaluation,\n screenshotCount,\n },\n };\n}\n\nfunction legacyInsufficientEvidenceResult(reason: string): EvaluationResult {\n return {\n outcomeSuccess: false,\n explanation: reason,\n findings: [\n {\n category: \"trajectory_capture\",\n severity: \"blocking\",\n description: reason,\n },\n ],\n rawSteps: {\n backend: \"legacy\",\n legacyEvaluation: \"INVALID\",\n screenshotCount: 0,\n },\n };\n}\n"]}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Legacy V3 evaluator implementation.
3
+ *
4
+ * This is the behavior-preserving implementation that backs V3Evaluator when
5
+ * STAGEHAND_EVALUATOR_BACKEND=legacy.
6
+ */
7
+ import type { AvailableModel, ClientOptions } from "./v3/types/public/model.js";
8
+ import type { EvaluateOptions, BatchAskOptions, EvaluationResult } from "./v3/types/private/evaluator.js";
9
+ import { V3 } from "./v3/v3.js";
10
+ export declare class LegacyV3Evaluator {
11
+ private v3;
12
+ private modelName;
13
+ private modelClientOptions;
14
+ private silentLogger;
15
+ constructor(v3: V3, modelName?: AvailableModel, modelClientOptions?: ClientOptions);
16
+ private getClient;
17
+ ask(options: EvaluateOptions): Promise<EvaluationResult>;
18
+ batchAsk(options: BatchAskOptions): Promise<EvaluationResult[]>;
19
+ private _evaluateWithMultipleScreenshots;
20
+ }