@browserbasehq/orca 3.5.0-preview.0 → 3.5.0-preview.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
  2. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js +38 -0
  3. package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
  4. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
  5. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js +54 -0
  6. package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
  7. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
  8. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js +62 -0
  9. package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
  10. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
  11. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js +25 -0
  12. package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
  13. package/dist/cjs/lib/v3/api.d.ts +7 -1
  14. package/dist/cjs/lib/v3/api.js +100 -29
  15. package/dist/cjs/lib/v3/api.js.map +1 -1
  16. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
  17. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js +31 -0
  18. package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
  19. package/dist/cjs/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
  20. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js +83 -7
  21. package/dist/cjs/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  22. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
  23. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js +119 -5
  24. package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  25. package/dist/cjs/lib/v3/index.d.ts +11 -0
  26. package/dist/cjs/lib/v3/index.js +19 -1
  27. package/dist/cjs/lib/v3/index.js.map +1 -1
  28. package/dist/cjs/lib/v3/llm/LLMProvider.d.ts +3 -0
  29. package/dist/cjs/lib/v3/llm/LLMProvider.js +28 -8
  30. package/dist/cjs/lib/v3/llm/LLMProvider.js.map +1 -1
  31. package/dist/cjs/lib/v3/types/public/agent.d.ts +6 -0
  32. package/dist/cjs/lib/v3/types/public/agent.js.map +1 -1
  33. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
  34. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js +15 -0
  35. package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
  36. package/dist/cjs/lib/v3/types/public/api.d.ts +414 -182
  37. package/dist/cjs/lib/v3/types/public/api.js +62 -20
  38. package/dist/cjs/lib/v3/types/public/api.js.map +1 -1
  39. package/dist/cjs/lib/v3/types/public/index.d.ts +1 -0
  40. package/dist/cjs/lib/v3/types/public/index.js +1 -0
  41. package/dist/cjs/lib/v3/types/public/index.js.map +1 -1
  42. package/dist/cjs/lib/v3/types/public/model.d.ts +16 -7
  43. package/dist/cjs/lib/v3/types/public/model.js.map +1 -1
  44. package/dist/cjs/lib/v3/v3.d.ts +1 -0
  45. package/dist/cjs/lib/v3/v3.js +14 -0
  46. package/dist/cjs/lib/v3/v3.js.map +1 -1
  47. package/dist/cjs/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
  48. package/dist/cjs/lib/v3/verifier/evidenceNormalization.js +100 -0
  49. package/dist/cjs/lib/v3/verifier/evidenceNormalization.js.map +1 -0
  50. package/dist/cjs/lib/v3/verifier/index.d.ts +6 -0
  51. package/dist/cjs/lib/v3/verifier/index.js +16 -0
  52. package/dist/cjs/lib/v3/verifier/index.js.map +1 -0
  53. package/dist/cjs/lib/v3/verifier/trajectory.d.ts +50 -0
  54. package/dist/cjs/lib/v3/verifier/trajectory.js +316 -0
  55. package/dist/cjs/lib/v3/verifier/trajectory.js.map +1 -0
  56. package/dist/cjs/lib/v3/verifier/types.d.ts +281 -0
  57. package/dist/cjs/lib/v3/verifier/types.js +10 -0
  58. package/dist/cjs/lib/v3/verifier/types.js.map +1 -0
  59. package/dist/cjs/lib/v3Evaluator.d.ts +9 -4
  60. package/dist/cjs/lib/v3Evaluator.js +148 -0
  61. package/dist/cjs/lib/v3Evaluator.js.map +1 -1
  62. package/dist/cjs/lib/v3LegacyEvaluator.js +5 -1
  63. package/dist/cjs/lib/v3LegacyEvaluator.js.map +1 -1
  64. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
  65. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js +35 -0
  66. package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
  67. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
  68. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js +50 -0
  69. package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
  70. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
  71. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js +59 -0
  72. package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
  73. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
  74. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js +22 -0
  75. package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
  76. package/dist/esm/lib/v3/api.d.ts +7 -1
  77. package/dist/esm/lib/v3/api.js +100 -29
  78. package/dist/esm/lib/v3/api.js.map +1 -1
  79. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
  80. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js +28 -0
  81. package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
  82. package/dist/esm/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
  83. package/dist/esm/lib/v3/handlers/v3AgentHandler.js +83 -7
  84. package/dist/esm/lib/v3/handlers/v3AgentHandler.js.map +1 -1
  85. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
  86. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js +119 -5
  87. package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
  88. package/dist/esm/lib/v3/index.d.ts +11 -0
  89. package/dist/esm/lib/v3/index.js +10 -0
  90. package/dist/esm/lib/v3/index.js.map +1 -1
  91. package/dist/esm/lib/v3/llm/LLMProvider.d.ts +3 -0
  92. package/dist/esm/lib/v3/llm/LLMProvider.js +28 -9
  93. package/dist/esm/lib/v3/llm/LLMProvider.js.map +1 -1
  94. package/dist/esm/lib/v3/types/public/agent.d.ts +6 -0
  95. package/dist/esm/lib/v3/types/public/agent.js.map +1 -1
  96. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
  97. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js +14 -0
  98. package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
  99. package/dist/esm/lib/v3/types/public/api.d.ts +414 -182
  100. package/dist/esm/lib/v3/types/public/api.js +60 -18
  101. package/dist/esm/lib/v3/types/public/api.js.map +1 -1
  102. package/dist/esm/lib/v3/types/public/index.d.ts +1 -0
  103. package/dist/esm/lib/v3/types/public/index.js +1 -0
  104. package/dist/esm/lib/v3/types/public/index.js.map +1 -1
  105. package/dist/esm/lib/v3/types/public/model.d.ts +16 -7
  106. package/dist/esm/lib/v3/types/public/model.js.map +1 -1
  107. package/dist/esm/lib/v3/v3.d.ts +1 -0
  108. package/dist/esm/lib/v3/v3.js +14 -0
  109. package/dist/esm/lib/v3/v3.js.map +1 -1
  110. package/dist/esm/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
  111. package/dist/esm/lib/v3/verifier/evidenceNormalization.js +93 -0
  112. package/dist/esm/lib/v3/verifier/evidenceNormalization.js.map +1 -0
  113. package/dist/esm/lib/v3/verifier/index.d.ts +6 -0
  114. package/dist/esm/lib/v3/verifier/index.js +3 -0
  115. package/dist/esm/lib/v3/verifier/index.js.map +1 -0
  116. package/dist/esm/lib/v3/verifier/trajectory.d.ts +50 -0
  117. package/dist/esm/lib/v3/verifier/trajectory.js +273 -0
  118. package/dist/esm/lib/v3/verifier/trajectory.js.map +1 -0
  119. package/dist/esm/lib/v3/verifier/types.d.ts +281 -0
  120. package/dist/esm/lib/v3/verifier/types.js +9 -0
  121. package/dist/esm/lib/v3/verifier/types.js.map +1 -0
  122. package/dist/esm/lib/v3Evaluator.d.ts +9 -4
  123. package/dist/esm/lib/v3Evaluator.js +148 -0
  124. package/dist/esm/lib/v3Evaluator.js.map +1 -1
  125. package/dist/esm/lib/v3LegacyEvaluator.js +5 -1
  126. package/dist/esm/lib/v3LegacyEvaluator.js.map +1 -1
  127. package/package.json +4 -4
@@ -1 +1 @@
1
- {"version":3,"file":"v3LegacyEvaluator.js","sourceRoot":"","sources":["../../../lib/v3LegacyEvaluator.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;AAEH,6BAAwB;AAWxB,4DAAsD;AACtD,iEAA+E;AAE/E,MAAM,gBAAgB,GAAG,OAAC,CAAC,MAAM,CAAC;IAChC,UAAU,EAAE,OAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACjC,SAAS,EAAE,OAAC,CAAC,MAAM,EAAE;CACtB,CAAC,CAAC;AAEH,MAAM,qBAAqB,GAAG,OAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;AAExD,MAAa,iBAAiB;IACpB,EAAE,CAAK;IACP,SAAS,CAAiB;IAC1B,kBAAkB,CAAqC;IACvD,YAAY,GAA+B,GAAG,EAAE,GAAE,CAAC,CAAC;IAE5D,YACE,EAAM,EACN,SAA0B,EAC1B,kBAAkC;QAElC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,SAAS,GAAG,SAAS,IAAK,yBAA4C,CAAC;QAC5E,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,IAAI;YAC9C,MAAM,EACJ,OAAO,CAAC,GAAG,CAAC,cAAc;gBAC1B,OAAO,CAAC,GAAG,CAAC,4BAA4B;gBACxC,EAAE;SACL,CAAC;IACJ,CAAC;IAEO,SAAS;QACf,sEAAsE;QACtE,MAAM,QAAQ,GAAG,IAAI,4BAAW,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;QACjD,OAAO,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACrE,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,OAAwB;QAChC,MAAM,EACJ,QAAQ,EACR,MAAM,EACN,UAAU,GAAG,IAAI,EACjB,YAAY,EACZ,iBAAiB,GAAG,GAAG,EACvB,cAAc,GACf,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,4CAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU;YACxB,MAAM,IAAI,4CAA6B,CACrC,qDAAqD,CACtD,CAAC;QAEJ,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,gCAAgC,CAAC;gBAC3C,QAAQ;gBACR,WAAW,EAAE,UAAU;gBACvB,YAAY;gBACZ,cAAc;aACf,CAAC,CAAC;QACL,CAAC;QAED,MAAM,mBAAmB,GAAG,kIAAkI,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,sDAAsD,8HAA8H,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,CAAC;QAElZ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,IAAI,mBAAmB,EAAE;oBAChE;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,6CAA6C,cAAc,EAAE;oCACpF,CAAC,CAAC,QAAQ;6BACb;4BACD,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;4BACP,GAAG,CAAC,MAAM;gCACR,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,iBAAiB,MAAM,EAAE,EAAE,CAAC;gCAC9D,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,MAAM,EACJ,SAAS,EACT,UAAU,GAAG,IAAI,EACjB,YAAY,GAAG,8EAA8E,EAC7F,iBAAiB,GAAG,GAAG,GACxB,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,SAAS,EAAE,MAAM;YACpB,MAAM,IAAI,4CAA6B,CACrC,iCAAiC,CAClC,CAAC;QAEJ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,SAAS,GAAG,SAAS;aACxB,GAAG,CACF,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CACV,GAAG,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,gBAAgB,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAClF;aACA,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,QAAQ;wBACd,OAAO,EAAE,GAAG,YAAY,2CAA2C,UAAU,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,6CAA6C,CAAC,CAAC,CAAC,EAAE,6KAA6K;qBAChX;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE;4BACjC,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE;oBACd,IAAI,EAAE,uBAAuB;oBAC7B,MAAM,EAAE,qBAAqB;iBAC9B;aACF;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,IAExB,CAAC;YACF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACzB,UAAU,EAAE,CAAC,CAAC,UAAU;gBACxB,SAAS,EAAE,CAAC,CAAC,SAAS;aACvB,CAAC,CAAC,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC;gBAC1B,UAAU,EAAE,SAAkB;gBAC9B,SAAS,EAAE,sCAAsC,YAAY,EAAE;aAChE,CAAC,CAAC,CAAC;QACN,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,gCAAgC,CAAC,OAK9C;QACC,MAAM,EACJ,QAAQ,EACR,WAAW,EACX,cAAc,EACd,YAAY,GAAG;UACX,cAAc,CAAC,CAAC,CAAC,iGAAiG,CAAC,CAAC,CAAC,EAAE;;;UAGvH,cAAc,CAAC,CAAC,CAAC,kNAAkN,CAAC,CAAC,CAAC,EAAE;0BACxN,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,GACtD,GAAG,OAAO,CAAC;QAEZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,4CAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAC1C,MAAM,IAAI,4CAA6B,CACrC,0CAA0C,CAC3C,CAAC;QAEJ,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC5C,IAAI,EAAE,WAAoB;YAC1B,SAAS,EAAE,EAAE,GAAG,EAAE,0BAA0B,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,EAAE;SACrE,CAAC,CAAC,CAAC;QAEJ,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;oBACzC;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,2DAA2D,cAAc,qBAAqB,WAAW,CAAC,MAAM,sKAAsK;oCAC7S,CAAC,CAAC,GAAG,QAAQ,qBAAqB,WAAW,CAAC,MAAM,mIAAmI;6BAC1L;4BACD,GAAG,aAAa;yBACjB;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;CACF;AA5QD,8CA4QC","sourcesContent":["/**\n * Legacy V3 evaluator implementation.\n *\n * This is the behavior-preserving implementation that backs V3Evaluator when\n * STAGEHAND_EVALUATOR_BACKEND=legacy.\n */\n\nimport { z } from \"zod\";\nimport type { AvailableModel, ClientOptions } from \"./v3/types/public/model.js\";\nimport type {\n EvaluateOptions,\n BatchAskOptions,\n EvaluationResult,\n} from \"./v3/types/private/evaluator.js\";\nimport { LLMParsedResponse } from \"./inference.js\";\nimport { LLMResponse, LLMClient } from \"./v3/llm/LLMClient.js\";\nimport { LogLine } from \"./v3/types/public/logs.js\";\nimport { V3 } from \"./v3/v3.js\";\nimport { LLMProvider } from \"./v3/llm/LLMProvider.js\";\nimport { StagehandInvalidArgumentError } from \"./v3/types/public/sdkErrors.js\";\n\nconst EvaluationSchema = z.object({\n evaluation: z.enum([\"YES\", \"NO\"]),\n reasoning: z.string(),\n});\n\nconst BatchEvaluationSchema = z.array(EvaluationSchema);\n\nexport class LegacyV3Evaluator {\n private v3: V3;\n private modelName: AvailableModel;\n private modelClientOptions: ClientOptions | { apiKey: string };\n private silentLogger: (message: LogLine) => void = () => {};\n\n constructor(\n v3: V3,\n modelName?: AvailableModel,\n modelClientOptions?: ClientOptions,\n ) {\n this.v3 = v3;\n this.modelName = modelName || (\"google/gemini-2.5-flash\" as AvailableModel);\n this.modelClientOptions = modelClientOptions || {\n apiKey:\n process.env.GEMINI_API_KEY ||\n process.env.GOOGLE_GENERATIVE_AI_API_KEY ||\n \"\",\n };\n }\n\n private getClient(): LLMClient {\n // Prefer a dedicated provider so we can override model per-evaluation\n const provider = new LLMProvider(this.v3.logger);\n return provider.getClient(this.modelName, this.modelClientOptions);\n }\n\n async ask(options: EvaluateOptions): Promise<EvaluationResult> {\n const {\n question,\n answer,\n screenshot = true,\n systemPrompt,\n screenshotDelayMs = 250,\n agentReasoning,\n } = options;\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!answer && !screenshot)\n throw new StagehandInvalidArgumentError(\n \"Either answer (text) or screenshot must be provided\",\n );\n\n if (Array.isArray(screenshot)) {\n return this._evaluateWithMultipleScreenshots({\n question,\n screenshots: screenshot,\n systemPrompt,\n agentReasoning,\n });\n }\n\n const defaultSystemPrompt = `You are an expert evaluator that confidently returns YES or NO based on if the original goal was achieved. You have access to ${screenshot ? \"a screenshot\" : \"the agents reasoning and actions throughout the task\"} that you can use to evaluate the tasks completion. Provide detailed reasoning for your answer.\\n Today's date is ${new Date().toLocaleDateString()}`;\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt || defaultSystemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions taken:\\n${agentReasoning}`\n : question,\n },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ...(answer\n ? [{ type: \"text\" as const, text: `the answer is ${answer}` }]\n : []),\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n\n async batchAsk(options: BatchAskOptions): Promise<EvaluationResult[]> {\n const {\n questions,\n screenshot = true,\n systemPrompt = \"You are an expert evaluator that returns YES or NO with a concise reasoning.\",\n screenshotDelayMs = 250,\n } = options;\n if (!questions?.length)\n throw new StagehandInvalidArgumentError(\n \"Questions array cannot be empty\",\n );\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const formatted = questions\n .map(\n (item, i) =>\n `${i + 1}. ${item.question}${item.answer ? `\\n Answer: ${item.answer}` : \"\"}`,\n )\n .join(\"\\n\\n\");\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n {\n role: \"system\",\n content: `${systemPrompt}\\n\\nYou will be given multiple questions${screenshot ? \" with a screenshot\" : \"\"}. ${questions.some((q) => q.answer) ? \"Some questions include answers to evaluate.\" : \"\"} Answer each question by returning an object in the specified JSON format. Return a single JSON array containing one object for each question in the order they were asked.`,\n },\n {\n role: \"user\",\n content: [\n { type: \"text\", text: formatted },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ],\n },\n ],\n response_model: {\n name: \"BatchEvaluationResult\",\n schema: BatchEvaluationSchema,\n },\n },\n });\n\n try {\n const results = response.data as unknown as z.infer<\n typeof BatchEvaluationSchema\n >;\n return results.map((r) => ({\n evaluation: r.evaluation,\n reasoning: r.reasoning,\n }));\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return questions.map(() => ({\n evaluation: \"INVALID\" as const,\n reasoning: `Failed to get structured response: ${errorMessage}`,\n }));\n }\n }\n\n private async _evaluateWithMultipleScreenshots(options: {\n question: string;\n screenshots: Buffer[];\n systemPrompt?: string;\n agentReasoning?: string;\n }): Promise<EvaluationResult> {\n const {\n question,\n screenshots,\n agentReasoning,\n systemPrompt = `You are an expert evaluator that confidently returns YES or NO given a question and multiple screenshots showing the progression of a task.\n ${agentReasoning ? \"You also have access to the agent's detailed reasoning and thought process throughout the task.\" : \"\"}\n Analyze ALL screenshots to understand the complete journey. Look for evidence of task completion across all screenshots, not just the last one.\n Success criteria may appear at different points in the sequence (confirmation messages, intermediate states, etc).\n ${agentReasoning ? \"The agent's reasoning provides crucial context about what actions were attempted, what was observed, and the decision-making process. Use this alongside the visual evidence to make a comprehensive evaluation.\" : \"\"}\n Today's date is ${new Date().toLocaleDateString()}`,\n } = options;\n\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!screenshots || screenshots.length === 0)\n throw new StagehandInvalidArgumentError(\n \"At least one screenshot must be provided\",\n );\n\n const llmClient = this.getClient();\n\n const imageContents = screenshots.map((s) => ({\n type: \"image_url\" as const,\n image_url: { url: `data:image/jpeg;base64,${s.toString(\"base64\")}` },\n }));\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions throughout the task:\\n${agentReasoning}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze both the agent's reasoning and all screenshots to determine if the task was completed successfully.`\n : `${question}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze all of them to determine if the task was completed successfully.`,\n },\n ...imageContents,\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n}\n"]}
1
+ {"version":3,"file":"v3LegacyEvaluator.js","sourceRoot":"","sources":["../../../lib/v3LegacyEvaluator.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;;AAEH,6BAAwB;AAWxB,4DAAsD;AACtD,iEAA+E;AAE/E,MAAM,gBAAgB,GAAG,OAAC,CAAC,MAAM,CAAC;IAChC,UAAU,EAAE,OAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACjC,SAAS,EAAE,OAAC,CAAC,MAAM,EAAE;CACtB,CAAC,CAAC;AAEH,MAAM,qBAAqB,GAAG,OAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;AAExD,MAAa,iBAAiB;IACpB,EAAE,CAAK;IACP,SAAS,CAAiB;IAC1B,kBAAkB,CAAqC;IACvD,YAAY,GAA+B,GAAG,EAAE,GAAE,CAAC,CAAC;IAE5D,YACE,EAAM,EACN,SAA0B,EAC1B,kBAAkC;QAElC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,SAAS,GAAG,SAAS,IAAK,yBAA4C,CAAC;QAC5E,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,IAAI;YAC9C,MAAM,EACJ,OAAO,CAAC,GAAG,CAAC,cAAc;gBAC1B,OAAO,CAAC,GAAG,CAAC,4BAA4B;gBACxC,EAAE;SACL,CAAC;IACJ,CAAC;IAEO,SAAS;QACf,sEAAsE;QACtE,MAAM,QAAQ,GAAG,IAAI,4BAAW,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;QACjD,OAAO,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACrE,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,OAAwB;QAChC,MAAM,EACJ,QAAQ,EACR,MAAM,EACN,UAAU,GAAG,IAAI,EACjB,YAAY,EACZ,iBAAiB,GAAG,GAAG,EACvB,cAAc,GACf,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,4CAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU;YACxB,MAAM,IAAI,4CAA6B,CACrC,qDAAqD,CACtD,CAAC;QAEJ,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,gCAAgC,CAAC;gBAC3C,QAAQ;gBACR,MAAM;gBACN,WAAW,EAAE,UAAU;gBACvB,YAAY;gBACZ,cAAc;aACf,CAAC,CAAC;QACL,CAAC;QAED,MAAM,mBAAmB,GAAG,kIAAkI,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,sDAAsD,8HAA8H,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,CAAC;QAElZ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,IAAI,mBAAmB,EAAE;oBAChE;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,6CAA6C,cAAc,EAAE;oCACpF,CAAC,CAAC,QAAQ;6BACb;4BACD,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;4BACP,GAAG,CAAC,MAAM;gCACR,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,iBAAiB,MAAM,EAAE,EAAE,CAAC;gCAC9D,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,MAAM,EACJ,SAAS,EACT,UAAU,GAAG,IAAI,EACjB,YAAY,GAAG,8EAA8E,EAC7F,iBAAiB,GAAG,GAAG,GACxB,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,SAAS,EAAE,MAAM;YACpB,MAAM,IAAI,4CAA6B,CACrC,iCAAiC,CAClC,CAAC;QAEJ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,SAAS,GAAG,SAAS;aACxB,GAAG,CACF,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CACV,GAAG,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,gBAAgB,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAClF;aACA,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,QAAQ;wBACd,OAAO,EAAE,GAAG,YAAY,2CAA2C,UAAU,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,6CAA6C,CAAC,CAAC,CAAC,EAAE,6KAA6K;qBAChX;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE;4BACjC,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE;oBACd,IAAI,EAAE,uBAAuB;oBAC7B,MAAM,EAAE,qBAAqB;iBAC9B;aACF;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,IAExB,CAAC;YACF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACzB,UAAU,EAAE,CAAC,CAAC,UAAU;gBACxB,SAAS,EAAE,CAAC,CAAC,SAAS;aACvB,CAAC,CAAC,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC;gBAC1B,UAAU,EAAE,SAAkB;gBAC9B,SAAS,EAAE,sCAAsC,YAAY,EAAE;aAChE,CAAC,CAAC,CAAC;QACN,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,gCAAgC,CAAC,OAM9C;QACC,MAAM,EACJ,QAAQ,EACR,MAAM,EACN,WAAW,EACX,cAAc,EACd,YAAY,GAAG;UACX,cAAc,CAAC,CAAC,CAAC,iGAAiG,CAAC,CAAC,CAAC,EAAE;;;UAGvH,cAAc,CAAC,CAAC,CAAC,kNAAkN,CAAC,CAAC,CAAC,EAAE;0BACxN,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,GACtD,GAAG,OAAO,CAAC;QAEZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,4CAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAC1C,MAAM,IAAI,4CAA6B,CACrC,0CAA0C,CAC3C,CAAC;QAEJ,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC5C,IAAI,EAAE,WAAoB;YAC1B,SAAS,EAAE,EAAE,GAAG,EAAE,0BAA0B,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,EAAE;SACrE,CAAC,CAAC,CAAC;QAEJ,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;oBACzC;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,2DAA2D,cAAc,qBAAqB,WAAW,CAAC,MAAM,sKAAsK;oCAC7S,CAAC,CAAC,GAAG,QAAQ,qBAAqB,WAAW,CAAC,MAAM,mIAAmI;6BAC1L;4BACD,GAAG,CAAC,MAAM;gCACR,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,iBAAiB,MAAM,EAAE,EAAE,CAAC;gCAC9D,CAAC,CAAC,EAAE,CAAC;4BACP,GAAG,aAAa;yBACjB;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;CACF;AAlRD,8CAkRC","sourcesContent":["/**\n * Legacy V3 evaluator implementation.\n *\n * This is the behavior-preserving implementation that backs V3Evaluator when\n * STAGEHAND_EVALUATOR_BACKEND=legacy.\n */\n\nimport { z } from \"zod\";\nimport type { AvailableModel, ClientOptions } from \"./v3/types/public/model.js\";\nimport type {\n EvaluateOptions,\n BatchAskOptions,\n EvaluationResult,\n} from \"./v3/types/private/evaluator.js\";\nimport { LLMParsedResponse } from \"./inference.js\";\nimport { LLMResponse, LLMClient } from \"./v3/llm/LLMClient.js\";\nimport { LogLine } from \"./v3/types/public/logs.js\";\nimport { V3 } from \"./v3/v3.js\";\nimport { LLMProvider } from \"./v3/llm/LLMProvider.js\";\nimport { StagehandInvalidArgumentError } from \"./v3/types/public/sdkErrors.js\";\n\nconst EvaluationSchema = z.object({\n evaluation: z.enum([\"YES\", \"NO\"]),\n reasoning: z.string(),\n});\n\nconst BatchEvaluationSchema = z.array(EvaluationSchema);\n\nexport class LegacyV3Evaluator {\n private v3: V3;\n private modelName: AvailableModel;\n private modelClientOptions: ClientOptions | { apiKey: string };\n private silentLogger: (message: LogLine) => void = () => {};\n\n constructor(\n v3: V3,\n modelName?: AvailableModel,\n modelClientOptions?: ClientOptions,\n ) {\n this.v3 = v3;\n this.modelName = modelName || (\"google/gemini-2.5-flash\" as AvailableModel);\n this.modelClientOptions = modelClientOptions || {\n apiKey:\n process.env.GEMINI_API_KEY ||\n process.env.GOOGLE_GENERATIVE_AI_API_KEY ||\n \"\",\n };\n }\n\n private getClient(): LLMClient {\n // Prefer a dedicated provider so we can override model per-evaluation\n const provider = new LLMProvider(this.v3.logger);\n return provider.getClient(this.modelName, this.modelClientOptions);\n }\n\n async ask(options: EvaluateOptions): Promise<EvaluationResult> {\n const {\n question,\n answer,\n screenshot = true,\n systemPrompt,\n screenshotDelayMs = 250,\n agentReasoning,\n } = options;\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!answer && !screenshot)\n throw new StagehandInvalidArgumentError(\n \"Either answer (text) or screenshot must be provided\",\n );\n\n if (Array.isArray(screenshot)) {\n return this._evaluateWithMultipleScreenshots({\n question,\n answer,\n screenshots: screenshot,\n systemPrompt,\n agentReasoning,\n });\n }\n\n const defaultSystemPrompt = `You are an expert evaluator that confidently returns YES or NO based on if the original goal was achieved. You have access to ${screenshot ? \"a screenshot\" : \"the agents reasoning and actions throughout the task\"} that you can use to evaluate the tasks completion. Provide detailed reasoning for your answer.\\n Today's date is ${new Date().toLocaleDateString()}`;\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt || defaultSystemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions taken:\\n${agentReasoning}`\n : question,\n },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ...(answer\n ? [{ type: \"text\" as const, text: `the answer is ${answer}` }]\n : []),\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n\n async batchAsk(options: BatchAskOptions): Promise<EvaluationResult[]> {\n const {\n questions,\n screenshot = true,\n systemPrompt = \"You are an expert evaluator that returns YES or NO with a concise reasoning.\",\n screenshotDelayMs = 250,\n } = options;\n if (!questions?.length)\n throw new StagehandInvalidArgumentError(\n \"Questions array cannot be empty\",\n );\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const formatted = questions\n .map(\n (item, i) =>\n `${i + 1}. ${item.question}${item.answer ? `\\n Answer: ${item.answer}` : \"\"}`,\n )\n .join(\"\\n\\n\");\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n {\n role: \"system\",\n content: `${systemPrompt}\\n\\nYou will be given multiple questions${screenshot ? \" with a screenshot\" : \"\"}. ${questions.some((q) => q.answer) ? \"Some questions include answers to evaluate.\" : \"\"} Answer each question by returning an object in the specified JSON format. Return a single JSON array containing one object for each question in the order they were asked.`,\n },\n {\n role: \"user\",\n content: [\n { type: \"text\", text: formatted },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ],\n },\n ],\n response_model: {\n name: \"BatchEvaluationResult\",\n schema: BatchEvaluationSchema,\n },\n },\n });\n\n try {\n const results = response.data as unknown as z.infer<\n typeof BatchEvaluationSchema\n >;\n return results.map((r) => ({\n evaluation: r.evaluation,\n reasoning: r.reasoning,\n }));\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return questions.map(() => ({\n evaluation: \"INVALID\" as const,\n reasoning: `Failed to get structured response: ${errorMessage}`,\n }));\n }\n }\n\n private async _evaluateWithMultipleScreenshots(options: {\n question: string;\n answer?: string;\n screenshots: Buffer[];\n systemPrompt?: string;\n agentReasoning?: string;\n }): Promise<EvaluationResult> {\n const {\n question,\n answer,\n screenshots,\n agentReasoning,\n systemPrompt = `You are an expert evaluator that confidently returns YES or NO given a question and multiple screenshots showing the progression of a task.\n ${agentReasoning ? \"You also have access to the agent's detailed reasoning and thought process throughout the task.\" : \"\"}\n Analyze ALL screenshots to understand the complete journey. Look for evidence of task completion across all screenshots, not just the last one.\n Success criteria may appear at different points in the sequence (confirmation messages, intermediate states, etc).\n ${agentReasoning ? \"The agent's reasoning provides crucial context about what actions were attempted, what was observed, and the decision-making process. Use this alongside the visual evidence to make a comprehensive evaluation.\" : \"\"}\n Today's date is ${new Date().toLocaleDateString()}`,\n } = options;\n\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!screenshots || screenshots.length === 0)\n throw new StagehandInvalidArgumentError(\n \"At least one screenshot must be provided\",\n );\n\n const llmClient = this.getClient();\n\n const imageContents = screenshots.map((s) => ({\n type: \"image_url\" as const,\n image_url: { url: `data:image/jpeg;base64,${s.toString(\"base64\")}` },\n }));\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions throughout the task:\\n${agentReasoning}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze both the agent's reasoning and all screenshots to determine if the task was completed successfully.`\n : `${question}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze all of them to determine if the task was completed successfully.`,\n },\n ...(answer\n ? [{ type: \"text\" as const, text: `the answer is ${answer}` }]\n : []),\n ...imageContents,\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n}\n"]}
@@ -0,0 +1,35 @@
1
+ /**
2
+ * captureAriaTreeProbe — capture a truncated accessibility tree of the active
3
+ * page for use as tier-2 evidence in the trajectory recorder.
4
+ *
5
+ * Shared by v3AgentHandler and v3CuaAgentHandler. Listener-gated by the
6
+ * callers so ordinary agent runs (no TrajectoryRecorder attached) don't pay
7
+ * the cost.
8
+ *
9
+ * The a11y tree is the same payload the agent's `ariaTree` tool sees, but
10
+ * captured by the harness (not the agent) so the verifier has independent
11
+ * textual ground truth for grounding non-visual claims — prices, names,
12
+ * dates, list contents — without OCR'ing screenshots.
13
+ *
14
+ * Budget: defaults to ~8000 tokens (32k chars). Per-step a11y captures
15
+ * across a ~30-step trajectory at that cap sum to ~240k tokens total,
16
+ * which the verifier handles via per-criterion top-K selection. The cap
17
+ * is configurable via VERIFIER_ARIATREE_TOKEN_BUDGET so consumers can
18
+ * trade RAM/disk for fidelity. Truncated content is marked explicitly so
19
+ * the verifier knows it was clipped.
20
+ */
21
+ import type { V3 } from "../../v3.js";
22
+ interface CaptureAriaTreeOptions {
23
+ /** Soft cap on token count (chars/4 approximation). Default 8000. */
24
+ tokenBudget?: number;
25
+ /** Hard timeout on the capture. Default 5s. */
26
+ timeoutMs?: number;
27
+ }
28
+ /**
29
+ * Returns the truncated a11y tree as a plain string, or undefined when
30
+ * capture fails. Never throws — a11y capture is best-effort tier-2 evidence,
31
+ * not a hard requirement, so failures are silently absorbed (the verifier
32
+ * surfaces this via evidence_insufficient).
33
+ */
34
+ export declare function captureAriaTreeProbe(v3: V3, opts?: CaptureAriaTreeOptions): Promise<string | undefined>;
35
+ export {};
@@ -0,0 +1,35 @@
1
+ const APPROX_CHARS_PER_TOKEN = 4;
2
+ const DEFAULT_TOKEN_BUDGET = 8_000;
3
+ const DEFAULT_TIMEOUT_MS = 5_000;
4
+ /**
5
+ * Returns the truncated a11y tree as a plain string, or undefined when
6
+ * capture fails. Never throws — a11y capture is best-effort tier-2 evidence,
7
+ * not a hard requirement, so failures are silently absorbed (the verifier
8
+ * surfaces this via evidence_insufficient).
9
+ */
10
+ export async function captureAriaTreeProbe(v3, opts = {}) {
11
+ const envBudget = parseInt(process.env.VERIFIER_ARIATREE_TOKEN_BUDGET ?? "", 10);
12
+ const tokenBudget = opts.tokenBudget ??
13
+ (Number.isFinite(envBudget) && envBudget > 0
14
+ ? envBudget
15
+ : DEFAULT_TOKEN_BUDGET);
16
+ const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
17
+ const maxChars = tokenBudget * APPROX_CHARS_PER_TOKEN;
18
+ try {
19
+ // v3.extract() without a schema returns { pageText } where pageText is the
20
+ // rendered accessibility tree — same path the agent's ariaTree tool uses.
21
+ const result = await v3.extract({ timeout: timeoutMs });
22
+ const pageText = result?.pageText;
23
+ if (typeof pageText !== "string" || pageText.length === 0)
24
+ return undefined;
25
+ if (pageText.length > maxChars) {
26
+ return (pageText.slice(0, maxChars) +
27
+ `\n\n[CONTENT TRUNCATED at ~${tokenBudget} tokens — set VERIFIER_ARIATREE_TOKEN_BUDGET to raise]`);
28
+ }
29
+ return pageText;
30
+ }
31
+ catch {
32
+ return undefined;
33
+ }
34
+ }
35
+ //# sourceMappingURL=captureAriaTreeProbe.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"captureAriaTreeProbe.js","sourceRoot":"","sources":["../../../../../../lib/v3/agent/utils/captureAriaTreeProbe.ts"],"names":[],"mappings":"AAsBA,MAAM,sBAAsB,GAAG,CAAC,CAAC;AACjC,MAAM,oBAAoB,GAAG,KAAK,CAAC;AACnC,MAAM,kBAAkB,GAAG,KAAK,CAAC;AASjC;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,EAAM,EACN,OAA+B,EAAE;IAEjC,MAAM,SAAS,GAAG,QAAQ,CACxB,OAAO,CAAC,GAAG,CAAC,8BAA8B,IAAI,EAAE,EAChD,EAAE,CACH,CAAC;IACF,MAAM,WAAW,GACf,IAAI,CAAC,WAAW;QAChB,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,GAAG,CAAC;YAC1C,CAAC,CAAC,SAAS;YACX,CAAC,CAAC,oBAAoB,CAAC,CAAC;IAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,kBAAkB,CAAC;IACvD,MAAM,QAAQ,GAAG,WAAW,GAAG,sBAAsB,CAAC;IAEtD,IAAI,CAAC;QACH,2EAA2E;QAC3E,0EAA0E;QAC1E,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,MAAM,EAAE,QAAQ,CAAC;QAClC,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,SAAS,CAAC;QAE5E,IAAI,QAAQ,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;YAC/B,OAAO,CACL,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC;gBAC3B,8BAA8B,WAAW,wDAAwD,CAClG,CAAC;QACJ,CAAC;QACD,OAAO,QAAQ,CAAC;IAClB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC","sourcesContent":["/**\n * captureAriaTreeProbe — capture a truncated accessibility tree of the active\n * page for use as tier-2 evidence in the trajectory recorder.\n *\n * Shared by v3AgentHandler and v3CuaAgentHandler. Listener-gated by the\n * callers so ordinary agent runs (no TrajectoryRecorder attached) don't pay\n * the cost.\n *\n * The a11y tree is the same payload the agent's `ariaTree` tool sees, but\n * captured by the harness (not the agent) so the verifier has independent\n * textual ground truth for grounding non-visual claims — prices, names,\n * dates, list contents — without OCR'ing screenshots.\n *\n * Budget: defaults to ~8000 tokens (32k chars). Per-step a11y captures\n * across a ~30-step trajectory at that cap sum to ~240k tokens total,\n * which the verifier handles via per-criterion top-K selection. The cap\n * is configurable via VERIFIER_ARIATREE_TOKEN_BUDGET so consumers can\n * trade RAM/disk for fidelity. Truncated content is marked explicitly so\n * the verifier knows it was clipped.\n */\nimport type { V3 } from \"../../v3.js\";\n\nconst APPROX_CHARS_PER_TOKEN = 4;\nconst DEFAULT_TOKEN_BUDGET = 8_000;\nconst DEFAULT_TIMEOUT_MS = 5_000;\n\ninterface CaptureAriaTreeOptions {\n /** Soft cap on token count (chars/4 approximation). Default 8000. */\n tokenBudget?: number;\n /** Hard timeout on the capture. Default 5s. */\n timeoutMs?: number;\n}\n\n/**\n * Returns the truncated a11y tree as a plain string, or undefined when\n * capture fails. Never throws — a11y capture is best-effort tier-2 evidence,\n * not a hard requirement, so failures are silently absorbed (the verifier\n * surfaces this via evidence_insufficient).\n */\nexport async function captureAriaTreeProbe(\n v3: V3,\n opts: CaptureAriaTreeOptions = {},\n): Promise<string | undefined> {\n const envBudget = parseInt(\n process.env.VERIFIER_ARIATREE_TOKEN_BUDGET ?? \"\",\n 10,\n );\n const tokenBudget =\n opts.tokenBudget ??\n (Number.isFinite(envBudget) && envBudget > 0\n ? envBudget\n : DEFAULT_TOKEN_BUDGET);\n const timeoutMs = opts.timeoutMs ?? DEFAULT_TIMEOUT_MS;\n const maxChars = tokenBudget * APPROX_CHARS_PER_TOKEN;\n\n try {\n // v3.extract() without a schema returns { pageText } where pageText is the\n // rendered accessibility tree — same path the agent's ariaTree tool uses.\n const result = await v3.extract({ timeout: timeoutMs });\n const pageText = result?.pageText;\n if (typeof pageText !== \"string\" || pageText.length === 0) return undefined;\n\n if (pageText.length > maxChars) {\n return (\n pageText.slice(0, maxChars) +\n `\\n\\n[CONTENT TRUNCATED at ~${tokenBudget} tokens — set VERIFIER_ARIATREE_TOKEN_BUDGET to raise]`\n );\n }\n return pageText;\n } catch {\n return undefined;\n }\n}\n"]}
@@ -0,0 +1,19 @@
1
+ import type { AgentEvidenceCallback } from "../../types/public/agentEvidenceEvents.js";
2
+ import type { LogLine } from "../../types/public/logs.js";
3
+ import type { V3 } from "../../v3.js";
4
+ interface CaptureProbeEvidenceOptions {
5
+ v3: V3;
6
+ url: string;
7
+ logger: (message: LogLine) => void;
8
+ warningMessage: string;
9
+ }
10
+ interface EmitPostStepProbeEvidenceOptions extends CaptureProbeEvidenceOptions {
11
+ evidenceCallback?: AgentEvidenceCallback;
12
+ }
13
+ export declare function captureProbeEvidence({ v3, url, logger, warningMessage, }: CaptureProbeEvidenceOptions): Promise<{
14
+ url: string;
15
+ screenshot?: Buffer;
16
+ ariaTree?: string;
17
+ }>;
18
+ export declare function emitPostStepProbeEvidence({ v3, url, evidenceCallback, logger, warningMessage, }: EmitPostStepProbeEvidenceOptions): Promise<void>;
19
+ export {};
@@ -0,0 +1,50 @@
1
+ import { captureAriaTreeProbe } from "./captureAriaTreeProbe.js";
2
+ function errorMessage(error) {
3
+ return error instanceof Error ? error.message : String(error);
4
+ }
5
+ export async function captureProbeEvidence({ v3, url, logger, warningMessage, }) {
6
+ let probeUrl = url;
7
+ let screenshot;
8
+ try {
9
+ const page = await v3.context.awaitActivePage();
10
+ probeUrl = page.url();
11
+ screenshot = await page.screenshot({ fullPage: false });
12
+ }
13
+ catch (e) {
14
+ logger({
15
+ category: "agent",
16
+ message: `${warningMessage}: ${errorMessage(e)}`,
17
+ level: 1,
18
+ });
19
+ }
20
+ const ariaTree = await captureAriaTreeProbe(v3);
21
+ return {
22
+ url: probeUrl,
23
+ ...(screenshot ? { screenshot } : {}),
24
+ ...(ariaTree !== undefined ? { ariaTree } : {}),
25
+ };
26
+ }
27
+ export async function emitPostStepProbeEvidence({ v3, url, evidenceCallback, logger, warningMessage, }) {
28
+ if (!evidenceCallback)
29
+ return;
30
+ const probe = await captureProbeEvidence({
31
+ v3,
32
+ url,
33
+ logger,
34
+ warningMessage,
35
+ });
36
+ if (probe.screenshot) {
37
+ await evidenceCallback({
38
+ type: "screenshot",
39
+ screenshot: probe.screenshot,
40
+ url: probe.url,
41
+ evidenceRole: "probe",
42
+ });
43
+ }
44
+ await evidenceCallback({
45
+ type: "step_observed",
46
+ url: probe.url,
47
+ ariaTree: probe.ariaTree,
48
+ });
49
+ }
50
+ //# sourceMappingURL=postStepProbeEvidence.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"postStepProbeEvidence.js","sourceRoot":"","sources":["../../../../../../lib/v3/agent/utils/postStepProbeEvidence.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AAajE,SAAS,YAAY,CAAC,KAAc;IAClC,OAAO,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAChE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,EACzC,EAAE,EACF,GAAG,EACH,MAAM,EACN,cAAc,GACc;IAK5B,IAAI,QAAQ,GAAG,GAAG,CAAC;IACnB,IAAI,UAA8B,CAAC;IACnC,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;QAChD,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACtB,UAAU,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;IAC1D,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,CAAC;YACL,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,GAAG,cAAc,KAAK,YAAY,CAAC,CAAC,CAAC,EAAE;YAChD,KAAK,EAAE,CAAC;SACT,CAAC,CAAC;IACL,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,oBAAoB,CAAC,EAAE,CAAC,CAAC;IAChD,OAAO;QACL,GAAG,EAAE,QAAQ;QACb,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACrC,GAAG,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAChD,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,EAC9C,EAAE,EACF,GAAG,EACH,gBAAgB,EAChB,MAAM,EACN,cAAc,GACmB;IACjC,IAAI,CAAC,gBAAgB;QAAE,OAAO;IAE9B,MAAM,KAAK,GAAG,MAAM,oBAAoB,CAAC;QACvC,EAAE;QACF,GAAG;QACH,MAAM;QACN,cAAc;KACf,CAAC,CAAC;IACH,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;QACrB,MAAM,gBAAgB,CAAC;YACrB,IAAI,EAAE,YAAY;YAClB,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,YAAY,EAAE,OAAO;SACtB,CAAC,CAAC;IACL,CAAC;IACD,MAAM,gBAAgB,CAAC;QACrB,IAAI,EAAE,eAAe;QACrB,GAAG,EAAE,KAAK,CAAC,GAAG;QACd,QAAQ,EAAE,KAAK,CAAC,QAAQ;KACzB,CAAC,CAAC;AACL,CAAC","sourcesContent":["import type { AgentEvidenceCallback } from \"../../types/public/agentEvidenceEvents.js\";\nimport type { LogLine } from \"../../types/public/logs.js\";\nimport type { V3 } from \"../../v3.js\";\nimport { captureAriaTreeProbe } from \"./captureAriaTreeProbe.js\";\n\ninterface CaptureProbeEvidenceOptions {\n v3: V3;\n url: string;\n logger: (message: LogLine) => void;\n warningMessage: string;\n}\n\ninterface EmitPostStepProbeEvidenceOptions extends CaptureProbeEvidenceOptions {\n evidenceCallback?: AgentEvidenceCallback;\n}\n\nfunction errorMessage(error: unknown): string {\n return error instanceof Error ? error.message : String(error);\n}\n\nexport async function captureProbeEvidence({\n v3,\n url,\n logger,\n warningMessage,\n}: CaptureProbeEvidenceOptions): Promise<{\n url: string;\n screenshot?: Buffer;\n ariaTree?: string;\n}> {\n let probeUrl = url;\n let screenshot: Buffer | undefined;\n try {\n const page = await v3.context.awaitActivePage();\n probeUrl = page.url();\n screenshot = await page.screenshot({ fullPage: false });\n } catch (e) {\n logger({\n category: \"agent\",\n message: `${warningMessage}: ${errorMessage(e)}`,\n level: 1,\n });\n }\n\n const ariaTree = await captureAriaTreeProbe(v3);\n return {\n url: probeUrl,\n ...(screenshot ? { screenshot } : {}),\n ...(ariaTree !== undefined ? { ariaTree } : {}),\n };\n}\n\nexport async function emitPostStepProbeEvidence({\n v3,\n url,\n evidenceCallback,\n logger,\n warningMessage,\n}: EmitPostStepProbeEvidenceOptions): Promise<void> {\n if (!evidenceCallback) return;\n\n const probe = await captureProbeEvidence({\n v3,\n url,\n logger,\n warningMessage,\n });\n if (probe.screenshot) {\n await evidenceCallback({\n type: \"screenshot\",\n screenshot: probe.screenshot,\n url: probe.url,\n evidenceRole: \"probe\",\n });\n }\n await evidenceCallback({\n type: \"step_observed\",\n url: probe.url,\n ariaTree: probe.ariaTree,\n });\n}\n"]}
@@ -0,0 +1,2 @@
1
+ import type { AgentStepFinishedEvent } from "../../types/public/agentEvidenceEvents.js";
2
+ export declare function inferToolOutput(toolResult: unknown): AgentStepFinishedEvent["toolOutput"];
@@ -0,0 +1,59 @@
1
+ const ERROR_STRING_LIMIT = 1000;
2
+ function isRecord(value) {
3
+ return value !== null && typeof value === "object" && !Array.isArray(value);
4
+ }
5
+ function hasOwn(value, key) {
6
+ return Object.prototype.hasOwnProperty.call(value, key);
7
+ }
8
+ function normalizeError(value) {
9
+ if (value === undefined || value === null || value === false) {
10
+ return undefined;
11
+ }
12
+ if (value instanceof Error) {
13
+ return value.message;
14
+ }
15
+ if (typeof value === "string") {
16
+ return value;
17
+ }
18
+ if (typeof value === "number" ||
19
+ typeof value === "boolean" ||
20
+ typeof value === "bigint") {
21
+ return String(value);
22
+ }
23
+ let serialized;
24
+ try {
25
+ serialized = JSON.stringify(value) ?? String(value);
26
+ }
27
+ catch {
28
+ serialized = String(value);
29
+ }
30
+ if (serialized.length <= ERROR_STRING_LIMIT) {
31
+ return serialized;
32
+ }
33
+ return `${serialized.slice(0, ERROR_STRING_LIMIT)}... [truncated]`;
34
+ }
35
+ function statusCandidates(toolResult) {
36
+ if (!isRecord(toolResult)) {
37
+ return [];
38
+ }
39
+ const candidates = [toolResult];
40
+ const output = toolResult.output;
41
+ if (isRecord(output)) {
42
+ candidates.push(output);
43
+ }
44
+ return candidates;
45
+ }
46
+ export function inferToolOutput(toolResult) {
47
+ const candidates = statusCandidates(toolResult);
48
+ const error = candidates
49
+ .map((candidate) => hasOwn(candidate, "error") ? normalizeError(candidate.error) : undefined)
50
+ .find((message) => message !== undefined);
51
+ const successFalse = candidates.some((candidate) => candidate.success === false);
52
+ const isError = candidates.some((candidate) => Boolean(candidate.isError));
53
+ return {
54
+ ok: error === undefined && !isError && !successFalse,
55
+ result: toolResult,
56
+ error,
57
+ };
58
+ }
59
+ //# sourceMappingURL=toolOutputEvidence.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toolOutputEvidence.js","sourceRoot":"","sources":["../../../../../../lib/v3/agent/utils/toolOutputEvidence.ts"],"names":[],"mappings":"AAEA,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAEhC,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,KAAK,KAAK,IAAI,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;AAC9E,CAAC;AAED,SAAS,MAAM,CAAC,KAA8B,EAAE,GAAW;IACzD,OAAO,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;AAC1D,CAAC;AAED,SAAS,cAAc,CAAC,KAAc;IACpC,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,KAAK,EAAE,CAAC;QAC7D,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,KAAK,YAAY,KAAK,EAAE,CAAC;QAC3B,OAAO,KAAK,CAAC,OAAO,CAAC;IACvB,CAAC;IACD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAC;IACf,CAAC;IACD,IACE,OAAO,KAAK,KAAK,QAAQ;QACzB,OAAO,KAAK,KAAK,SAAS;QAC1B,OAAO,KAAK,KAAK,QAAQ,EACzB,CAAC;QACD,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IACvB,CAAC;IAED,IAAI,UAAkB,CAAC;IACvB,IAAI,CAAC;QACH,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC;IACtD,CAAC;IAAC,MAAM,CAAC;QACP,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;IACD,IAAI,UAAU,CAAC,MAAM,IAAI,kBAAkB,EAAE,CAAC;QAC5C,OAAO,UAAU,CAAC;IACpB,CAAC;IACD,OAAO,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,kBAAkB,CAAC,iBAAiB,CAAC;AACrE,CAAC;AAED,SAAS,gBAAgB,CAAC,UAAmB;IAC3C,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QAC1B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAAG,CAAC,UAAU,CAAC,CAAC;IAChC,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,CAAC;IACjC,IAAI,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACrB,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC1B,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,UAAmB;IAEnB,MAAM,UAAU,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,UAAU;SACrB,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CACjB,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CACzE;SACA,IAAI,CAAC,CAAC,OAAO,EAAqB,EAAE,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC;IAE/D,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,CAClC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,OAAO,KAAK,KAAK,CAC3C,CAAC;IACF,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IAE3E,OAAO;QACL,EAAE,EAAE,KAAK,KAAK,SAAS,IAAI,CAAC,OAAO,IAAI,CAAC,YAAY;QACpD,MAAM,EAAE,UAAU;QAClB,KAAK;KACN,CAAC;AACJ,CAAC","sourcesContent":["import type { AgentStepFinishedEvent } from \"../../types/public/agentEvidenceEvents.js\";\n\nconst ERROR_STRING_LIMIT = 1000;\n\nfunction isRecord(value: unknown): value is Record<string, unknown> {\n return value !== null && typeof value === \"object\" && !Array.isArray(value);\n}\n\nfunction hasOwn(value: Record<string, unknown>, key: string): boolean {\n return Object.prototype.hasOwnProperty.call(value, key);\n}\n\nfunction normalizeError(value: unknown): string | undefined {\n if (value === undefined || value === null || value === false) {\n return undefined;\n }\n if (value instanceof Error) {\n return value.message;\n }\n if (typeof value === \"string\") {\n return value;\n }\n if (\n typeof value === \"number\" ||\n typeof value === \"boolean\" ||\n typeof value === \"bigint\"\n ) {\n return String(value);\n }\n\n let serialized: string;\n try {\n serialized = JSON.stringify(value) ?? String(value);\n } catch {\n serialized = String(value);\n }\n if (serialized.length <= ERROR_STRING_LIMIT) {\n return serialized;\n }\n return `${serialized.slice(0, ERROR_STRING_LIMIT)}... [truncated]`;\n}\n\nfunction statusCandidates(toolResult: unknown): Record<string, unknown>[] {\n if (!isRecord(toolResult)) {\n return [];\n }\n\n const candidates = [toolResult];\n const output = toolResult.output;\n if (isRecord(output)) {\n candidates.push(output);\n }\n return candidates;\n}\n\nexport function inferToolOutput(\n toolResult: unknown,\n): AgentStepFinishedEvent[\"toolOutput\"] {\n const candidates = statusCandidates(toolResult);\n const error = candidates\n .map((candidate) =>\n hasOwn(candidate, \"error\") ? normalizeError(candidate.error) : undefined,\n )\n .find((message): message is string => message !== undefined);\n\n const successFalse = candidates.some(\n (candidate) => candidate.success === false,\n );\n const isError = candidates.some((candidate) => Boolean(candidate.isError));\n\n return {\n ok: error === undefined && !isError && !successFalse,\n result: toolResult,\n error,\n };\n}\n"]}
@@ -0,0 +1,3 @@
1
+ import type { AgentEvidenceCallback } from "../../types/public/agentEvidenceEvents.js";
2
+ import type { LogLine } from "../../types/public/logs.js";
3
+ export declare function wrapEvidenceCallback(callback: AgentEvidenceCallback | undefined, logger: (message: LogLine) => void): AgentEvidenceCallback | undefined;
@@ -0,0 +1,22 @@
1
+ // onEvidence is a user-supplied observability hook (trajectory recording,
2
+ // verifier capture, etc.). Wrap it once at the boundary where the handler
3
+ // receives it so a throwing user callback can never abort the agent loop —
4
+ // internal emit sites can then call the wrapped callback directly without
5
+ // per-site try/catch.
6
+ export function wrapEvidenceCallback(callback, logger) {
7
+ if (!callback)
8
+ return undefined;
9
+ return async (event) => {
10
+ try {
11
+ await callback(event);
12
+ }
13
+ catch (e) {
14
+ logger({
15
+ category: "agent",
16
+ message: `Warning: onEvidence callback failed for ${event.type}: ${e instanceof Error ? e.message : String(e)}`,
17
+ level: 1,
18
+ });
19
+ }
20
+ };
21
+ }
22
+ //# sourceMappingURL=wrapEvidenceCallback.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"wrapEvidenceCallback.js","sourceRoot":"","sources":["../../../../../../lib/v3/agent/utils/wrapEvidenceCallback.ts"],"names":[],"mappings":"AAGA,0EAA0E;AAC1E,0EAA0E;AAC1E,2EAA2E;AAC3E,0EAA0E;AAC1E,sBAAsB;AACtB,MAAM,UAAU,oBAAoB,CAClC,QAA2C,EAC3C,MAAkC;IAElC,IAAI,CAAC,QAAQ;QAAE,OAAO,SAAS,CAAC;IAChC,OAAO,KAAK,EAAE,KAAK,EAAE,EAAE;QACrB,IAAI,CAAC;YACH,MAAM,QAAQ,CAAC,KAAK,CAAC,CAAC;QACxB,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,MAAM,CAAC;gBACL,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,2CAA2C,KAAK,CAAC,IAAI,KAC5D,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAC3C,EAAE;gBACF,KAAK,EAAE,CAAC;aACT,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC;AACJ,CAAC","sourcesContent":["import type { AgentEvidenceCallback } from \"../../types/public/agentEvidenceEvents.js\";\nimport type { LogLine } from \"../../types/public/logs.js\";\n\n// onEvidence is a user-supplied observability hook (trajectory recording,\n// verifier capture, etc.). Wrap it once at the boundary where the handler\n// receives it so a throwing user callback can never abort the agent loop —\n// internal emit sites can then call the wrapped callback directly without\n// per-site try/catch.\nexport function wrapEvidenceCallback(\n callback: AgentEvidenceCallback | undefined,\n logger: (message: LogLine) => void,\n): AgentEvidenceCallback | undefined {\n if (!callback) return undefined;\n return async (event) => {\n try {\n await callback(event);\n } catch (e) {\n logger({\n category: \"agent\",\n message: `Warning: onEvidence callback failed for ${event.type}: ${\n e instanceof Error ? e.message : String(e)\n }`,\n level: 1,\n });\n }\n };\n}\n"]}
@@ -1,5 +1,6 @@
1
1
  import type { ActResult, AgentConfig, AgentExecuteOptions, AgentResult, ExtractResult, ObserveResult, LogLine, StagehandMetrics, BrowserbaseRegion, ActOptions, ExtractOptions, ObserveOptions, Api } from "./types/public/index.js";
2
2
  import type { SerializableResponse, AgentCacheTransferPayload } from "./types/private/index.js";
3
+ import type { ModelConfiguration } from "./types/public/model.js";
3
4
  import type { StagehandZodSchema } from "./zodCompat.js";
4
5
  /**
5
6
  * Mapping of Browserbase regions to their corresponding Stagehand API base URLs.
@@ -41,6 +42,8 @@ interface ClientSessionStartParams extends Api.SessionStartRequest {
41
42
  * Optional: when omitted, requests are sent without the x-model-api-key header
42
43
  * and the server is expected to handle model authentication on its own. */
43
44
  modelApiKey?: string;
45
+ /** Default model config for later action requests. Not sent to /sessions/start. */
46
+ defaultModelConfig?: ModelConfiguration;
44
47
  }
45
48
  /**
46
49
  * Client parameters for act() method.
@@ -79,6 +82,7 @@ export declare class StagehandAPIClient {
79
82
  private sessionId?;
80
83
  private modelApiKey?;
81
84
  private modelProvider?;
85
+ private defaultModelConfig?;
82
86
  private region?;
83
87
  private logger;
84
88
  private fetchWithCookies;
@@ -87,7 +91,7 @@ export declare class StagehandAPIClient {
87
91
  private latestAgentCacheEntry;
88
92
  private warnedStagehandBaseUrl;
89
93
  constructor({ apiKey, projectId, logger, serverCache, }: StagehandAPIConstructorParams);
90
- init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID, }: ClientSessionStartParams): Promise<Api.SessionStartResult>;
94
+ init({ modelName, modelApiKey, defaultModelConfig, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID, }: ClientSessionStartParams): Promise<Api.SessionStartResult>;
91
95
  act({ input, options, frameId, }: ClientActParameters): Promise<ActResult>;
92
96
  extract<T extends StagehandZodSchema>({ instruction, schema: zodSchema, options, frameId, }: ClientExtractParameters): Promise<ExtractResult<T>>;
93
97
  observe({ instruction, options, frameId, }: ClientObserveParameters): Promise<ObserveResult>;
@@ -105,6 +109,8 @@ export declare class StagehandAPIClient {
105
109
  * model provider differs from the one used to init the session.
106
110
  */
107
111
  private prepareModelConfig;
112
+ private getDefaultModelConfig;
113
+ private getModelProvider;
108
114
  private consumeFinishedEventData;
109
115
  private execute;
110
116
  /**
@@ -35,6 +35,7 @@ export class StagehandAPIClient {
35
35
  sessionId;
36
36
  modelApiKey;
37
37
  modelProvider;
38
+ defaultModelConfig;
38
39
  region;
39
40
  logger;
40
41
  fetchWithCookies;
@@ -50,7 +51,7 @@ export class StagehandAPIClient {
50
51
  // Create a single cookie jar instance that will persist across all requests
51
52
  this.fetchWithCookies = makeFetchCookie(fetch);
52
53
  }
53
- async init({ modelName, modelApiKey, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID,
54
+ async init({ modelName, modelApiKey, defaultModelConfig, domSettleTimeoutMs, verbose, systemPrompt, selfHeal, browserbaseSessionCreateParams, browserbaseSessionID,
54
55
  // browser, TODO for local browsers
55
56
  }) {
56
57
  this.modelApiKey = modelApiKey;
@@ -58,6 +59,9 @@ export class StagehandAPIClient {
58
59
  this.modelProvider = modelName?.includes("/")
59
60
  ? modelName.split("/")[0]
60
61
  : undefined;
62
+ this.defaultModelConfig = defaultModelConfig
63
+ ? this.prepareModelConfig(defaultModelConfig)
64
+ : undefined;
61
65
  // Store the region for multi-region API URL resolution
62
66
  this.region = browserbaseSessionCreateParams?.region;
63
67
  this.logger({
@@ -111,13 +115,21 @@ export class StagehandAPIClient {
111
115
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
112
116
  const { page: _, serverCache: enableCache, ...restOptions } = options;
113
117
  serverCache = enableCache;
118
+ if (restOptions.model) {
119
+ restOptions.model = this.prepareModelConfig(restOptions.model);
120
+ }
121
+ else if (this.defaultModelConfig) {
122
+ restOptions.model = this.getDefaultModelConfig();
123
+ }
114
124
  if (Object.keys(restOptions).length > 0) {
115
- if (restOptions.model) {
116
- restOptions.model = this.prepareModelConfig(restOptions.model);
117
- }
118
125
  wireOptions = restOptions;
119
126
  }
120
127
  }
128
+ else if (this.defaultModelConfig) {
129
+ wireOptions = {
130
+ model: this.getDefaultModelConfig(),
131
+ };
132
+ }
121
133
  // Build wire-format request body
122
134
  const requestBody = {
123
135
  input,
@@ -140,13 +152,21 @@ export class StagehandAPIClient {
140
152
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
141
153
  const { page: _, serverCache: enableCache, ...restOptions } = options;
142
154
  serverCache = enableCache;
155
+ if (restOptions.model) {
156
+ restOptions.model = this.prepareModelConfig(restOptions.model);
157
+ }
158
+ else if (this.defaultModelConfig) {
159
+ restOptions.model = this.getDefaultModelConfig();
160
+ }
143
161
  if (Object.keys(restOptions).length > 0) {
144
- if (restOptions.model) {
145
- restOptions.model = this.prepareModelConfig(restOptions.model);
146
- }
147
162
  wireOptions = restOptions;
148
163
  }
149
164
  }
165
+ else if (this.defaultModelConfig) {
166
+ wireOptions = {
167
+ model: this.getDefaultModelConfig(),
168
+ };
169
+ }
150
170
  // Build wire-format request body
151
171
  const requestBody = {
152
172
  instruction,
@@ -168,13 +188,21 @@ export class StagehandAPIClient {
168
188
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
169
189
  const { page: _, serverCache: enableCache, ...restOptions } = options;
170
190
  serverCache = enableCache;
191
+ if (restOptions.model) {
192
+ restOptions.model = this.prepareModelConfig(restOptions.model);
193
+ }
194
+ else if (this.defaultModelConfig) {
195
+ restOptions.model = this.getDefaultModelConfig();
196
+ }
171
197
  if (Object.keys(restOptions).length > 0) {
172
- if (restOptions.model) {
173
- restOptions.model = this.prepareModelConfig(restOptions.model);
174
- }
175
198
  wireOptions = restOptions;
176
199
  }
177
200
  }
201
+ else if (this.defaultModelConfig) {
202
+ wireOptions = {
203
+ model: this.getDefaultModelConfig(),
204
+ };
205
+ }
178
206
  // Build wire-format request body
179
207
  const requestBody = {
180
208
  instruction,
@@ -188,7 +216,19 @@ export class StagehandAPIClient {
188
216
  });
189
217
  }
190
218
  async goto(url, options, frameId) {
191
- const requestBody = { url, options, frameId };
219
+ const publicOptions = { ...(options ?? {}) };
220
+ delete publicOptions.model;
221
+ const wireOptions = {
222
+ ...publicOptions,
223
+ ...(this.defaultModelConfig
224
+ ? { model: this.getDefaultModelConfig() }
225
+ : {}),
226
+ };
227
+ const requestBody = {
228
+ url,
229
+ options: Object.keys(wireOptions).length > 0 ? wireOptions : undefined,
230
+ frameId,
231
+ };
192
232
  return this.execute({
193
233
  method: "navigate",
194
234
  args: requestBody,
@@ -218,7 +258,7 @@ export class StagehandAPIClient {
218
258
  cua: agentConfig.mode === undefined ? agentConfig.cua : undefined,
219
259
  model: agentConfig.model
220
260
  ? this.prepareModelConfig(agentConfig.model)
221
- : undefined,
261
+ : this.getDefaultModelConfig(),
222
262
  executionModel: agentConfig.executionModel
223
263
  ? this.prepareModelConfig(agentConfig.executionModel)
224
264
  : undefined,
@@ -374,28 +414,41 @@ export class StagehandAPIClient {
374
414
  prepareModelConfig(model) {
375
415
  if (typeof model === "string") {
376
416
  // Extract provider from model string (e.g., "openai/gpt-5-nano" -> "openai")
377
- const provider = model.includes("/") ? model.split("/")[0] : undefined;
417
+ const provider = this.getModelProvider(model);
418
+ const inheritedDefault = provider && provider === this.modelProvider
419
+ ? this.getDefaultModelConfig()
420
+ : undefined;
378
421
  const apiKey = provider && provider !== this.modelProvider
379
422
  ? (loadApiKeyFromEnv(provider, this.logger) ?? this.modelApiKey)
380
423
  : this.modelApiKey;
381
424
  return {
425
+ ...inheritedDefault,
382
426
  modelName: model,
383
427
  ...(apiKey ? { apiKey } : {}),
384
428
  };
385
429
  }
386
- if (!model.apiKey) {
387
- const provider = model.modelName?.includes("/")
388
- ? model.modelName.split("/")[0]
430
+ const provider = this.getModelProvider(model.modelName);
431
+ const inheritedDefault = provider && provider === this.modelProvider
432
+ ? this.getDefaultModelConfig()
433
+ : undefined;
434
+ const apiKey = !model.apiKey && provider && provider !== this.modelProvider
435
+ ? (loadApiKeyFromEnv(provider, this.logger) ?? this.modelApiKey)
436
+ : !model.apiKey
437
+ ? this.modelApiKey
389
438
  : undefined;
390
- const apiKey = provider && provider !== this.modelProvider
391
- ? (loadApiKeyFromEnv(provider, this.logger) ?? this.modelApiKey)
392
- : this.modelApiKey;
393
- return {
394
- ...model,
395
- ...(apiKey ? { apiKey } : {}),
396
- };
397
- }
398
- return model;
439
+ return {
440
+ ...inheritedDefault,
441
+ ...model,
442
+ ...(apiKey ? { apiKey } : {}),
443
+ };
444
+ }
445
+ getDefaultModelConfig() {
446
+ return this.defaultModelConfig
447
+ ? { ...this.defaultModelConfig }
448
+ : undefined;
449
+ }
450
+ getModelProvider(modelName) {
451
+ return modelName?.includes("/") ? modelName.split("/")[0] : undefined;
399
452
  }
400
453
  consumeFinishedEventData() {
401
454
  const data = this.lastFinishedEventData;
@@ -577,12 +630,30 @@ export class StagehandAPIClient {
577
630
  else {
578
631
  baseUrl = getApiUrlForRegion(this.region);
579
632
  }
633
+ const headers = {
634
+ ...defaultHeaders,
635
+ ...options.headers,
636
+ };
637
+ if (path.endsWith("/navigate")) {
638
+ let body = options.body;
639
+ if (typeof options.body === "string") {
640
+ try {
641
+ body = JSON.parse(options.body);
642
+ }
643
+ catch {
644
+ body = options.body;
645
+ }
646
+ }
647
+ console.log("Stagehand goto request", JSON.stringify({
648
+ url: `${baseUrl}${path}`,
649
+ method: options.method,
650
+ headers,
651
+ body,
652
+ }, null, 2));
653
+ }
580
654
  const response = await this.fetchWithCookies(`${baseUrl}${path}`, {
581
655
  ...options,
582
- headers: {
583
- ...defaultHeaders,
584
- ...options.headers,
585
- },
656
+ headers,
586
657
  });
587
658
  return response;
588
659
  }