@browserbasehq/orca 3.5.0-vertex-test → 3.5.1-preview.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/cjs/lib/utils.d.ts +2 -0
- package/dist/cjs/lib/utils.js +20 -0
- package/dist/cjs/lib/utils.js.map +1 -1
- package/dist/cjs/lib/v3/agent/AgentProvider.js +1 -0
- package/dist/cjs/lib/v3/agent/AgentProvider.js.map +1 -1
- package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js +1 -0
- package/dist/cjs/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
- package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
- package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js +38 -0
- package/dist/cjs/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
- package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
- package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js +54 -0
- package/dist/cjs/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
- package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
- package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js +62 -0
- package/dist/cjs/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
- package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
- package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js +25 -0
- package/dist/cjs/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
- package/dist/cjs/lib/v3/api.d.ts +1 -0
- package/dist/cjs/lib/v3/api.js +37 -16
- package/dist/cjs/lib/v3/api.js.map +1 -1
- package/dist/cjs/lib/v3/dom/build/locatorScripts.generated.d.ts +24 -24
- package/dist/cjs/lib/v3/dom/build/locatorScripts.generated.js +24 -24
- package/dist/cjs/lib/v3/dom/build/locatorScripts.generated.js.map +1 -1
- package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
- package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js +31 -0
- package/dist/cjs/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
- package/dist/cjs/lib/v3/dom/locatorScripts/xpathResolver.js +79 -10
- package/dist/cjs/lib/v3/dom/locatorScripts/xpathResolver.js.map +1 -1
- package/dist/cjs/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
- package/dist/cjs/lib/v3/handlers/v3AgentHandler.js +83 -7
- package/dist/cjs/lib/v3/handlers/v3AgentHandler.js.map +1 -1
- package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
- package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js +119 -5
- package/dist/cjs/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
- package/dist/cjs/lib/v3/index.d.ts +13 -1
- package/dist/cjs/lib/v3/index.js +19 -1
- package/dist/cjs/lib/v3/index.js.map +1 -1
- package/dist/cjs/lib/v3/llm/LLMProvider.d.ts +3 -0
- package/dist/cjs/lib/v3/llm/LLMProvider.js +44 -3
- package/dist/cjs/lib/v3/llm/LLMProvider.js.map +1 -1
- package/dist/cjs/lib/v3/types/public/agent.d.ts +8 -2
- package/dist/cjs/lib/v3/types/public/agent.js +1 -0
- package/dist/cjs/lib/v3/types/public/agent.js.map +1 -1
- package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
- package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js +15 -0
- package/dist/cjs/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
- package/dist/cjs/lib/v3/types/public/api.d.ts +925 -182
- package/dist/cjs/lib/v3/types/public/api.js +138 -20
- package/dist/cjs/lib/v3/types/public/api.js.map +1 -1
- package/dist/cjs/lib/v3/types/public/clipboard.d.ts +15 -0
- package/dist/cjs/lib/v3/types/public/clipboard.js +3 -0
- package/dist/cjs/lib/v3/types/public/clipboard.js.map +1 -0
- package/dist/cjs/lib/v3/types/public/index.d.ts +2 -0
- package/dist/cjs/lib/v3/types/public/index.js +2 -0
- package/dist/cjs/lib/v3/types/public/index.js.map +1 -1
- package/dist/cjs/lib/v3/types/public/model.d.ts +30 -6
- package/dist/cjs/lib/v3/types/public/model.js.map +1 -1
- package/dist/cjs/lib/v3/types/public/page.d.ts +29 -0
- package/dist/cjs/lib/v3/types/public/page.js.map +1 -1
- package/dist/cjs/lib/v3/types/public/sdkErrors.d.ts +3 -0
- package/dist/cjs/lib/v3/types/public/sdkErrors.js +8 -2
- package/dist/cjs/lib/v3/types/public/sdkErrors.js.map +1 -1
- package/dist/cjs/lib/v3/understudy/clipboard.d.ts +24 -0
- package/dist/cjs/lib/v3/understudy/clipboard.js +166 -0
- package/dist/cjs/lib/v3/understudy/clipboard.js.map +1 -0
- package/dist/cjs/lib/v3/understudy/context.d.ts +3 -0
- package/dist/cjs/lib/v3/understudy/context.js +15 -0
- package/dist/cjs/lib/v3/understudy/context.js.map +1 -1
- package/dist/cjs/lib/v3/understudy/page.d.ts +23 -1
- package/dist/cjs/lib/v3/understudy/page.js +283 -0
- package/dist/cjs/lib/v3/understudy/page.js.map +1 -1
- package/dist/cjs/lib/v3/v3.js +15 -6
- package/dist/cjs/lib/v3/v3.js.map +1 -1
- package/dist/cjs/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
- package/dist/cjs/lib/v3/verifier/evidenceNormalization.js +100 -0
- package/dist/cjs/lib/v3/verifier/evidenceNormalization.js.map +1 -0
- package/dist/cjs/lib/v3/verifier/index.d.ts +6 -0
- package/dist/cjs/lib/v3/verifier/index.js +16 -0
- package/dist/cjs/lib/v3/verifier/index.js.map +1 -0
- package/dist/cjs/lib/v3/verifier/trajectory.d.ts +50 -0
- package/dist/cjs/lib/v3/verifier/trajectory.js +316 -0
- package/dist/cjs/lib/v3/verifier/trajectory.js.map +1 -0
- package/dist/cjs/lib/v3/verifier/types.d.ts +281 -0
- package/dist/cjs/lib/v3/verifier/types.js +10 -0
- package/dist/cjs/lib/v3/verifier/types.js.map +1 -0
- package/dist/cjs/lib/v3Evaluator.d.ts +9 -4
- package/dist/cjs/lib/v3Evaluator.js +148 -0
- package/dist/cjs/lib/v3Evaluator.js.map +1 -1
- package/dist/cjs/lib/v3LegacyEvaluator.js +5 -1
- package/dist/cjs/lib/v3LegacyEvaluator.js.map +1 -1
- package/dist/cjs/lib/version.d.ts +1 -1
- package/dist/cjs/lib/version.js +1 -1
- package/dist/cjs/lib/version.js.map +1 -1
- package/dist/esm/lib/utils.d.ts +2 -0
- package/dist/esm/lib/utils.js +18 -0
- package/dist/esm/lib/utils.js.map +1 -1
- package/dist/esm/lib/v3/agent/AgentProvider.js +1 -0
- package/dist/esm/lib/v3/agent/AgentProvider.js.map +1 -1
- package/dist/esm/lib/v3/agent/AnthropicCUAClient.js +1 -0
- package/dist/esm/lib/v3/agent/AnthropicCUAClient.js.map +1 -1
- package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.d.ts +35 -0
- package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js +35 -0
- package/dist/esm/lib/v3/agent/utils/captureAriaTreeProbe.js.map +1 -0
- package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.d.ts +19 -0
- package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js +50 -0
- package/dist/esm/lib/v3/agent/utils/postStepProbeEvidence.js.map +1 -0
- package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.d.ts +2 -0
- package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js +59 -0
- package/dist/esm/lib/v3/agent/utils/toolOutputEvidence.js.map +1 -0
- package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.d.ts +3 -0
- package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js +22 -0
- package/dist/esm/lib/v3/agent/utils/wrapEvidenceCallback.js.map +1 -0
- package/dist/esm/lib/v3/api.d.ts +1 -0
- package/dist/esm/lib/v3/api.js +38 -17
- package/dist/esm/lib/v3/api.js.map +1 -1
- package/dist/esm/lib/v3/dom/build/locatorScripts.generated.d.ts +24 -24
- package/dist/esm/lib/v3/dom/build/locatorScripts.generated.js +24 -24
- package/dist/esm/lib/v3/dom/build/locatorScripts.generated.js.map +1 -1
- package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.d.ts +24 -0
- package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js +28 -0
- package/dist/esm/lib/v3/dom/build/selectorRuntime.generated.js.map +1 -0
- package/dist/esm/lib/v3/dom/locatorScripts/xpathResolver.js +79 -10
- package/dist/esm/lib/v3/dom/locatorScripts/xpathResolver.js.map +1 -1
- package/dist/esm/lib/v3/handlers/v3AgentHandler.d.ts +1 -0
- package/dist/esm/lib/v3/handlers/v3AgentHandler.js +83 -7
- package/dist/esm/lib/v3/handlers/v3AgentHandler.js.map +1 -1
- package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.d.ts +11 -0
- package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js +119 -5
- package/dist/esm/lib/v3/handlers/v3CuaAgentHandler.js.map +1 -1
- package/dist/esm/lib/v3/index.d.ts +13 -1
- package/dist/esm/lib/v3/index.js +10 -0
- package/dist/esm/lib/v3/index.js.map +1 -1
- package/dist/esm/lib/v3/llm/LLMProvider.d.ts +3 -0
- package/dist/esm/lib/v3/llm/LLMProvider.js +43 -3
- package/dist/esm/lib/v3/llm/LLMProvider.js.map +1 -1
- package/dist/esm/lib/v3/types/public/agent.d.ts +8 -2
- package/dist/esm/lib/v3/types/public/agent.js +1 -0
- package/dist/esm/lib/v3/types/public/agent.js.map +1 -1
- package/dist/esm/lib/v3/types/public/agentEvidenceEvents.d.ts +85 -0
- package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js +14 -0
- package/dist/esm/lib/v3/types/public/agentEvidenceEvents.js.map +1 -0
- package/dist/esm/lib/v3/types/public/api.d.ts +925 -182
- package/dist/esm/lib/v3/types/public/api.js +136 -18
- package/dist/esm/lib/v3/types/public/api.js.map +1 -1
- package/dist/esm/lib/v3/types/public/clipboard.d.ts +15 -0
- package/dist/esm/lib/v3/types/public/clipboard.js +2 -0
- package/dist/esm/lib/v3/types/public/clipboard.js.map +1 -0
- package/dist/esm/lib/v3/types/public/index.d.ts +2 -0
- package/dist/esm/lib/v3/types/public/index.js +2 -0
- package/dist/esm/lib/v3/types/public/index.js.map +1 -1
- package/dist/esm/lib/v3/types/public/model.d.ts +30 -6
- package/dist/esm/lib/v3/types/public/model.js.map +1 -1
- package/dist/esm/lib/v3/types/public/page.d.ts +29 -0
- package/dist/esm/lib/v3/types/public/page.js.map +1 -1
- package/dist/esm/lib/v3/types/public/sdkErrors.d.ts +3 -0
- package/dist/esm/lib/v3/types/public/sdkErrors.js +5 -0
- package/dist/esm/lib/v3/types/public/sdkErrors.js.map +1 -1
- package/dist/esm/lib/v3/understudy/clipboard.d.ts +24 -0
- package/dist/esm/lib/v3/understudy/clipboard.js +163 -0
- package/dist/esm/lib/v3/understudy/clipboard.js.map +1 -0
- package/dist/esm/lib/v3/understudy/context.d.ts +3 -0
- package/dist/esm/lib/v3/understudy/context.js +15 -0
- package/dist/esm/lib/v3/understudy/context.js.map +1 -1
- package/dist/esm/lib/v3/understudy/page.d.ts +23 -1
- package/dist/esm/lib/v3/understudy/page.js +284 -1
- package/dist/esm/lib/v3/understudy/page.js.map +1 -1
- package/dist/esm/lib/v3/v3.js +16 -7
- package/dist/esm/lib/v3/v3.js.map +1 -1
- package/dist/esm/lib/v3/verifier/evidenceNormalization.d.ts +7 -0
- package/dist/esm/lib/v3/verifier/evidenceNormalization.js +93 -0
- package/dist/esm/lib/v3/verifier/evidenceNormalization.js.map +1 -0
- package/dist/esm/lib/v3/verifier/index.d.ts +6 -0
- package/dist/esm/lib/v3/verifier/index.js +3 -0
- package/dist/esm/lib/v3/verifier/index.js.map +1 -0
- package/dist/esm/lib/v3/verifier/trajectory.d.ts +50 -0
- package/dist/esm/lib/v3/verifier/trajectory.js +273 -0
- package/dist/esm/lib/v3/verifier/trajectory.js.map +1 -0
- package/dist/esm/lib/v3/verifier/types.d.ts +281 -0
- package/dist/esm/lib/v3/verifier/types.js +9 -0
- package/dist/esm/lib/v3/verifier/types.js.map +1 -0
- package/dist/esm/lib/v3Evaluator.d.ts +9 -4
- package/dist/esm/lib/v3Evaluator.js +148 -0
- package/dist/esm/lib/v3Evaluator.js.map +1 -1
- package/dist/esm/lib/v3LegacyEvaluator.js +5 -1
- package/dist/esm/lib/v3LegacyEvaluator.js.map +1 -1
- package/dist/esm/lib/version.d.ts +1 -1
- package/dist/esm/lib/version.js +1 -1
- package/dist/esm/lib/version.js.map +1 -1
- package/package.json +16 -10
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"v3LegacyEvaluator.js","sourceRoot":"","sources":["../../../lib/v3LegacyEvaluator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAWxB,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,6BAA6B,EAAE,MAAM,gCAAgC,CAAC;AAE/E,MAAM,gBAAgB,GAAG,CAAC,CAAC,MAAM,CAAC;IAChC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACjC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE;CACtB,CAAC,CAAC;AAEH,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;AAExD,MAAM,OAAO,iBAAiB;IACpB,EAAE,CAAK;IACP,SAAS,CAAiB;IAC1B,kBAAkB,CAAqC;IACvD,YAAY,GAA+B,GAAG,EAAE,GAAE,CAAC,CAAC;IAE5D,YACE,EAAM,EACN,SAA0B,EAC1B,kBAAkC;QAElC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,SAAS,GAAG,SAAS,IAAK,yBAA4C,CAAC;QAC5E,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,IAAI;YAC9C,MAAM,EACJ,OAAO,CAAC,GAAG,CAAC,cAAc;gBAC1B,OAAO,CAAC,GAAG,CAAC,4BAA4B;gBACxC,EAAE;SACL,CAAC;IACJ,CAAC;IAEO,SAAS;QACf,sEAAsE;QACtE,MAAM,QAAQ,GAAG,IAAI,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;QACjD,OAAO,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACrE,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,OAAwB;QAChC,MAAM,EACJ,QAAQ,EACR,MAAM,EACN,UAAU,GAAG,IAAI,EACjB,YAAY,EACZ,iBAAiB,GAAG,GAAG,EACvB,cAAc,GACf,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,6BAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU;YACxB,MAAM,IAAI,6BAA6B,CACrC,qDAAqD,CACtD,CAAC;QAEJ,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,gCAAgC,CAAC;gBAC3C,QAAQ;gBACR,WAAW,EAAE,UAAU;gBACvB,YAAY;gBACZ,cAAc;aACf,CAAC,CAAC;QACL,CAAC;QAED,MAAM,mBAAmB,GAAG,kIAAkI,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,sDAAsD,8HAA8H,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,CAAC;QAElZ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,IAAI,mBAAmB,EAAE;oBAChE;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,6CAA6C,cAAc,EAAE;oCACpF,CAAC,CAAC,QAAQ;6BACb;4BACD,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;4BACP,GAAG,CAAC,MAAM;gCACR,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,iBAAiB,MAAM,EAAE,EAAE,CAAC;gCAC9D,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,MAAM,EACJ,SAAS,EACT,UAAU,GAAG,IAAI,EACjB,YAAY,GAAG,8EAA8E,EAC7F,iBAAiB,GAAG,GAAG,GACxB,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,SAAS,EAAE,MAAM;YACpB,MAAM,IAAI,6BAA6B,CACrC,iCAAiC,CAClC,CAAC;QAEJ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,SAAS,GAAG,SAAS;aACxB,GAAG,CACF,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CACV,GAAG,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,gBAAgB,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAClF;aACA,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,QAAQ;wBACd,OAAO,EAAE,GAAG,YAAY,2CAA2C,UAAU,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,6CAA6C,CAAC,CAAC,CAAC,EAAE,6KAA6K;qBAChX;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE;4BACjC,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE;oBACd,IAAI,EAAE,uBAAuB;oBAC7B,MAAM,EAAE,qBAAqB;iBAC9B;aACF;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,IAExB,CAAC;YACF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACzB,UAAU,EAAE,CAAC,CAAC,UAAU;gBACxB,SAAS,EAAE,CAAC,CAAC,SAAS;aACvB,CAAC,CAAC,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC;gBAC1B,UAAU,EAAE,SAAkB;gBAC9B,SAAS,EAAE,sCAAsC,YAAY,EAAE;aAChE,CAAC,CAAC,CAAC;QACN,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,gCAAgC,CAAC,OAK9C;QACC,MAAM,EACJ,QAAQ,EACR,WAAW,EACX,cAAc,EACd,YAAY,GAAG;UACX,cAAc,CAAC,CAAC,CAAC,iGAAiG,CAAC,CAAC,CAAC,EAAE;;;UAGvH,cAAc,CAAC,CAAC,CAAC,kNAAkN,CAAC,CAAC,CAAC,EAAE;0BACxN,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,GACtD,GAAG,OAAO,CAAC;QAEZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,6BAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAC1C,MAAM,IAAI,6BAA6B,CACrC,0CAA0C,CAC3C,CAAC;QAEJ,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC5C,IAAI,EAAE,WAAoB;YAC1B,SAAS,EAAE,EAAE,GAAG,EAAE,0BAA0B,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,EAAE;SACrE,CAAC,CAAC,CAAC;QAEJ,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;oBACzC;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,2DAA2D,cAAc,qBAAqB,WAAW,CAAC,MAAM,sKAAsK;oCAC7S,CAAC,CAAC,GAAG,QAAQ,qBAAqB,WAAW,CAAC,MAAM,mIAAmI;6BAC1L;4BACD,GAAG,aAAa;yBACjB;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * Legacy V3 evaluator implementation.\n *\n * This is the behavior-preserving implementation that backs V3Evaluator when\n * STAGEHAND_EVALUATOR_BACKEND=legacy.\n */\n\nimport { z } from \"zod\";\nimport type { AvailableModel, ClientOptions } from \"./v3/types/public/model.js\";\nimport type {\n EvaluateOptions,\n BatchAskOptions,\n EvaluationResult,\n} from \"./v3/types/private/evaluator.js\";\nimport { LLMParsedResponse } from \"./inference.js\";\nimport { LLMResponse, LLMClient } from \"./v3/llm/LLMClient.js\";\nimport { LogLine } from \"./v3/types/public/logs.js\";\nimport { V3 } from \"./v3/v3.js\";\nimport { LLMProvider } from \"./v3/llm/LLMProvider.js\";\nimport { StagehandInvalidArgumentError } from \"./v3/types/public/sdkErrors.js\";\n\nconst EvaluationSchema = z.object({\n evaluation: z.enum([\"YES\", \"NO\"]),\n reasoning: z.string(),\n});\n\nconst BatchEvaluationSchema = z.array(EvaluationSchema);\n\nexport class LegacyV3Evaluator {\n private v3: V3;\n private modelName: AvailableModel;\n private modelClientOptions: ClientOptions | { apiKey: string };\n private silentLogger: (message: LogLine) => void = () => {};\n\n constructor(\n v3: V3,\n modelName?: AvailableModel,\n modelClientOptions?: ClientOptions,\n ) {\n this.v3 = v3;\n this.modelName = modelName || (\"google/gemini-2.5-flash\" as AvailableModel);\n this.modelClientOptions = modelClientOptions || {\n apiKey:\n process.env.GEMINI_API_KEY ||\n process.env.GOOGLE_GENERATIVE_AI_API_KEY ||\n \"\",\n };\n }\n\n private getClient(): LLMClient {\n // Prefer a dedicated provider so we can override model per-evaluation\n const provider = new LLMProvider(this.v3.logger);\n return provider.getClient(this.modelName, this.modelClientOptions);\n }\n\n async ask(options: EvaluateOptions): Promise<EvaluationResult> {\n const {\n question,\n answer,\n screenshot = true,\n systemPrompt,\n screenshotDelayMs = 250,\n agentReasoning,\n } = options;\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!answer && !screenshot)\n throw new StagehandInvalidArgumentError(\n \"Either answer (text) or screenshot must be provided\",\n );\n\n if (Array.isArray(screenshot)) {\n return this._evaluateWithMultipleScreenshots({\n question,\n screenshots: screenshot,\n systemPrompt,\n agentReasoning,\n });\n }\n\n const defaultSystemPrompt = `You are an expert evaluator that confidently returns YES or NO based on if the original goal was achieved. You have access to ${screenshot ? \"a screenshot\" : \"the agents reasoning and actions throughout the task\"} that you can use to evaluate the tasks completion. Provide detailed reasoning for your answer.\\n Today's date is ${new Date().toLocaleDateString()}`;\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt || defaultSystemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions taken:\\n${agentReasoning}`\n : question,\n },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ...(answer\n ? [{ type: \"text\" as const, text: `the answer is ${answer}` }]\n : []),\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n\n async batchAsk(options: BatchAskOptions): Promise<EvaluationResult[]> {\n const {\n questions,\n screenshot = true,\n systemPrompt = \"You are an expert evaluator that returns YES or NO with a concise reasoning.\",\n screenshotDelayMs = 250,\n } = options;\n if (!questions?.length)\n throw new StagehandInvalidArgumentError(\n \"Questions array cannot be empty\",\n );\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const formatted = questions\n .map(\n (item, i) =>\n `${i + 1}. ${item.question}${item.answer ? `\\n Answer: ${item.answer}` : \"\"}`,\n )\n .join(\"\\n\\n\");\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n {\n role: \"system\",\n content: `${systemPrompt}\\n\\nYou will be given multiple questions${screenshot ? \" with a screenshot\" : \"\"}. ${questions.some((q) => q.answer) ? \"Some questions include answers to evaluate.\" : \"\"} Answer each question by returning an object in the specified JSON format. Return a single JSON array containing one object for each question in the order they were asked.`,\n },\n {\n role: \"user\",\n content: [\n { type: \"text\", text: formatted },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ],\n },\n ],\n response_model: {\n name: \"BatchEvaluationResult\",\n schema: BatchEvaluationSchema,\n },\n },\n });\n\n try {\n const results = response.data as unknown as z.infer<\n typeof BatchEvaluationSchema\n >;\n return results.map((r) => ({\n evaluation: r.evaluation,\n reasoning: r.reasoning,\n }));\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return questions.map(() => ({\n evaluation: \"INVALID\" as const,\n reasoning: `Failed to get structured response: ${errorMessage}`,\n }));\n }\n }\n\n private async _evaluateWithMultipleScreenshots(options: {\n question: string;\n screenshots: Buffer[];\n systemPrompt?: string;\n agentReasoning?: string;\n }): Promise<EvaluationResult> {\n const {\n question,\n screenshots,\n agentReasoning,\n systemPrompt = `You are an expert evaluator that confidently returns YES or NO given a question and multiple screenshots showing the progression of a task.\n ${agentReasoning ? \"You also have access to the agent's detailed reasoning and thought process throughout the task.\" : \"\"}\n Analyze ALL screenshots to understand the complete journey. Look for evidence of task completion across all screenshots, not just the last one.\n Success criteria may appear at different points in the sequence (confirmation messages, intermediate states, etc).\n ${agentReasoning ? \"The agent's reasoning provides crucial context about what actions were attempted, what was observed, and the decision-making process. Use this alongside the visual evidence to make a comprehensive evaluation.\" : \"\"}\n Today's date is ${new Date().toLocaleDateString()}`,\n } = options;\n\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!screenshots || screenshots.length === 0)\n throw new StagehandInvalidArgumentError(\n \"At least one screenshot must be provided\",\n );\n\n const llmClient = this.getClient();\n\n const imageContents = screenshots.map((s) => ({\n type: \"image_url\" as const,\n image_url: { url: `data:image/jpeg;base64,${s.toString(\"base64\")}` },\n }));\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions throughout the task:\\n${agentReasoning}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze both the agent's reasoning and all screenshots to determine if the task was completed successfully.`\n : `${question}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze all of them to determine if the task was completed successfully.`,\n },\n ...imageContents,\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n}\n"]}
|
|
1
|
+
{"version":3,"file":"v3LegacyEvaluator.js","sourceRoot":"","sources":["../../../lib/v3LegacyEvaluator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAWxB,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,6BAA6B,EAAE,MAAM,gCAAgC,CAAC;AAE/E,MAAM,gBAAgB,GAAG,CAAC,CAAC,MAAM,CAAC;IAChC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IACjC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE;CACtB,CAAC,CAAC;AAEH,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;AAExD,MAAM,OAAO,iBAAiB;IACpB,EAAE,CAAK;IACP,SAAS,CAAiB;IAC1B,kBAAkB,CAAqC;IACvD,YAAY,GAA+B,GAAG,EAAE,GAAE,CAAC,CAAC;IAE5D,YACE,EAAM,EACN,SAA0B,EAC1B,kBAAkC;QAElC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;QACb,IAAI,CAAC,SAAS,GAAG,SAAS,IAAK,yBAA4C,CAAC;QAC5E,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,IAAI;YAC9C,MAAM,EACJ,OAAO,CAAC,GAAG,CAAC,cAAc;gBAC1B,OAAO,CAAC,GAAG,CAAC,4BAA4B;gBACxC,EAAE;SACL,CAAC;IACJ,CAAC;IAEO,SAAS;QACf,sEAAsE;QACtE,MAAM,QAAQ,GAAG,IAAI,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC;QACjD,OAAO,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACrE,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,OAAwB;QAChC,MAAM,EACJ,QAAQ,EACR,MAAM,EACN,UAAU,GAAG,IAAI,EACjB,YAAY,EACZ,iBAAiB,GAAG,GAAG,EACvB,cAAc,GACf,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,6BAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU;YACxB,MAAM,IAAI,6BAA6B,CACrC,qDAAqD,CACtD,CAAC;QAEJ,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YAC9B,OAAO,IAAI,CAAC,gCAAgC,CAAC;gBAC3C,QAAQ;gBACR,MAAM;gBACN,WAAW,EAAE,UAAU;gBACvB,YAAY;gBACZ,cAAc;aACf,CAAC,CAAC;QACL,CAAC;QAED,MAAM,mBAAmB,GAAG,kIAAkI,UAAU,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,sDAAsD,8HAA8H,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,CAAC;QAElZ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,IAAI,mBAAmB,EAAE;oBAChE;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,6CAA6C,cAAc,EAAE;oCACpF,CAAC,CAAC,QAAQ;6BACb;4BACD,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;4BACP,GAAG,CAAC,MAAM;gCACR,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,iBAAiB,MAAM,EAAE,EAAE,CAAC;gCAC9D,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,MAAM,EACJ,SAAS,EACT,UAAU,GAAG,IAAI,EACjB,YAAY,GAAG,8EAA8E,EAC7F,iBAAiB,GAAG,GAAG,GACxB,GAAG,OAAO,CAAC;QACZ,IAAI,CAAC,SAAS,EAAE,MAAM;YACpB,MAAM,IAAI,6BAA6B,CACrC,iCAAiC,CAClC,CAAC;QAEJ,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,iBAAiB,CAAC,CAAC,CAAC;QAC3D,IAAI,WAA+B,CAAC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC;YACrD,WAAW,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,SAAS,GAAG,SAAS;aACxB,GAAG,CACF,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CACV,GAAG,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,gBAAgB,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAClF;aACA,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,QAAQ;wBACd,OAAO,EAAE,GAAG,YAAY,2CAA2C,UAAU,CAAC,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,6CAA6C,CAAC,CAAC,CAAC,EAAE,6KAA6K;qBAChX;oBACD;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE;4BACjC,GAAG,CAAC,UAAU,IAAI,WAAW;gCAC3B,CAAC,CAAC;oCACE;wCACE,IAAI,EAAE,WAAoB;wCAC1B,SAAS,EAAE;4CACT,GAAG,EAAE,0BAA0B,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;yCAChE;qCACF;iCACF;gCACH,CAAC,CAAC,EAAE,CAAC;yBACR;qBACF;iBACF;gBACD,cAAc,EAAE;oBACd,IAAI,EAAE,uBAAuB;oBAC7B,MAAM,EAAE,qBAAqB;iBAC9B;aACF;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,QAAQ,CAAC,IAExB,CAAC;YACF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACzB,UAAU,EAAE,CAAC,CAAC,UAAU;gBACxB,SAAS,EAAE,CAAC,CAAC,SAAS;aACvB,CAAC,CAAC,CAAC;QACN,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC;gBAC1B,UAAU,EAAE,SAAkB;gBAC9B,SAAS,EAAE,sCAAsC,YAAY,EAAE;aAChE,CAAC,CAAC,CAAC;QACN,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,gCAAgC,CAAC,OAM9C;QACC,MAAM,EACJ,QAAQ,EACR,MAAM,EACN,WAAW,EACX,cAAc,EACd,YAAY,GAAG;UACX,cAAc,CAAC,CAAC,CAAC,iGAAiG,CAAC,CAAC,CAAC,EAAE;;;UAGvH,cAAc,CAAC,CAAC,CAAC,kNAAkN,CAAC,CAAC,CAAC,EAAE;0BACxN,IAAI,IAAI,EAAE,CAAC,kBAAkB,EAAE,EAAE,GACtD,GAAG,OAAO,CAAC;QAEZ,IAAI,CAAC,QAAQ;YACX,MAAM,IAAI,6BAA6B,CACrC,oCAAoC,CACrC,CAAC;QACJ,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAC1C,MAAM,IAAI,6BAA6B,CACrC,0CAA0C,CAC3C,CAAC;QAEJ,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAEnC,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC5C,IAAI,EAAE,WAAoB;YAC1B,SAAS,EAAE,EAAE,GAAG,EAAE,0BAA0B,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,EAAE;SACrE,CAAC,CAAC,CAAC;QAEJ,MAAM,QAAQ,GAAG,MAAM,SAAS,CAAC,oBAAoB,CAEnD;YACA,MAAM,EAAE,IAAI,CAAC,YAAY;YACzB,OAAO,EAAE;gBACP,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,YAAY,EAAE;oBACzC;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE,cAAc;oCAClB,CAAC,CAAC,aAAa,QAAQ,2DAA2D,cAAc,qBAAqB,WAAW,CAAC,MAAM,sKAAsK;oCAC7S,CAAC,CAAC,GAAG,QAAQ,qBAAqB,WAAW,CAAC,MAAM,mIAAmI;6BAC1L;4BACD,GAAG,CAAC,MAAM;gCACR,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,iBAAiB,MAAM,EAAE,EAAE,CAAC;gCAC9D,CAAC,CAAC,EAAE,CAAC;4BACP,GAAG,aAAa;yBACjB;qBACF;iBACF;gBACD,cAAc,EAAE,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE;aACvE;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,QAAQ,CAAC,IAEvB,CAAC;YACF,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACzD,OAAO;gBACL,UAAU,EAAE,SAAS;gBACrB,SAAS,EAAE,sCAAsC,YAAY,EAAE;aACvD,CAAC;QACb,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * Legacy V3 evaluator implementation.\n *\n * This is the behavior-preserving implementation that backs V3Evaluator when\n * STAGEHAND_EVALUATOR_BACKEND=legacy.\n */\n\nimport { z } from \"zod\";\nimport type { AvailableModel, ClientOptions } from \"./v3/types/public/model.js\";\nimport type {\n EvaluateOptions,\n BatchAskOptions,\n EvaluationResult,\n} from \"./v3/types/private/evaluator.js\";\nimport { LLMParsedResponse } from \"./inference.js\";\nimport { LLMResponse, LLMClient } from \"./v3/llm/LLMClient.js\";\nimport { LogLine } from \"./v3/types/public/logs.js\";\nimport { V3 } from \"./v3/v3.js\";\nimport { LLMProvider } from \"./v3/llm/LLMProvider.js\";\nimport { StagehandInvalidArgumentError } from \"./v3/types/public/sdkErrors.js\";\n\nconst EvaluationSchema = z.object({\n evaluation: z.enum([\"YES\", \"NO\"]),\n reasoning: z.string(),\n});\n\nconst BatchEvaluationSchema = z.array(EvaluationSchema);\n\nexport class LegacyV3Evaluator {\n private v3: V3;\n private modelName: AvailableModel;\n private modelClientOptions: ClientOptions | { apiKey: string };\n private silentLogger: (message: LogLine) => void = () => {};\n\n constructor(\n v3: V3,\n modelName?: AvailableModel,\n modelClientOptions?: ClientOptions,\n ) {\n this.v3 = v3;\n this.modelName = modelName || (\"google/gemini-2.5-flash\" as AvailableModel);\n this.modelClientOptions = modelClientOptions || {\n apiKey:\n process.env.GEMINI_API_KEY ||\n process.env.GOOGLE_GENERATIVE_AI_API_KEY ||\n \"\",\n };\n }\n\n private getClient(): LLMClient {\n // Prefer a dedicated provider so we can override model per-evaluation\n const provider = new LLMProvider(this.v3.logger);\n return provider.getClient(this.modelName, this.modelClientOptions);\n }\n\n async ask(options: EvaluateOptions): Promise<EvaluationResult> {\n const {\n question,\n answer,\n screenshot = true,\n systemPrompt,\n screenshotDelayMs = 250,\n agentReasoning,\n } = options;\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!answer && !screenshot)\n throw new StagehandInvalidArgumentError(\n \"Either answer (text) or screenshot must be provided\",\n );\n\n if (Array.isArray(screenshot)) {\n return this._evaluateWithMultipleScreenshots({\n question,\n answer,\n screenshots: screenshot,\n systemPrompt,\n agentReasoning,\n });\n }\n\n const defaultSystemPrompt = `You are an expert evaluator that confidently returns YES or NO based on if the original goal was achieved. You have access to ${screenshot ? \"a screenshot\" : \"the agents reasoning and actions throughout the task\"} that you can use to evaluate the tasks completion. Provide detailed reasoning for your answer.\\n Today's date is ${new Date().toLocaleDateString()}`;\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt || defaultSystemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions taken:\\n${agentReasoning}`\n : question,\n },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ...(answer\n ? [{ type: \"text\" as const, text: `the answer is ${answer}` }]\n : []),\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n\n async batchAsk(options: BatchAskOptions): Promise<EvaluationResult[]> {\n const {\n questions,\n screenshot = true,\n systemPrompt = \"You are an expert evaluator that returns YES or NO with a concise reasoning.\",\n screenshotDelayMs = 250,\n } = options;\n if (!questions?.length)\n throw new StagehandInvalidArgumentError(\n \"Questions array cannot be empty\",\n );\n\n await new Promise((r) => setTimeout(r, screenshotDelayMs));\n let imageBuffer: Buffer | undefined;\n if (screenshot) {\n const page = await this.v3.context.awaitActivePage();\n imageBuffer = await page.screenshot({ fullPage: false });\n }\n\n const llmClient = this.getClient();\n\n const formatted = questions\n .map(\n (item, i) =>\n `${i + 1}. ${item.question}${item.answer ? `\\n Answer: ${item.answer}` : \"\"}`,\n )\n .join(\"\\n\\n\");\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n {\n role: \"system\",\n content: `${systemPrompt}\\n\\nYou will be given multiple questions${screenshot ? \" with a screenshot\" : \"\"}. ${questions.some((q) => q.answer) ? \"Some questions include answers to evaluate.\" : \"\"} Answer each question by returning an object in the specified JSON format. Return a single JSON array containing one object for each question in the order they were asked.`,\n },\n {\n role: \"user\",\n content: [\n { type: \"text\", text: formatted },\n ...(screenshot && imageBuffer\n ? [\n {\n type: \"image_url\" as const,\n image_url: {\n url: `data:image/jpeg;base64,${imageBuffer.toString(\"base64\")}`,\n },\n },\n ]\n : []),\n ],\n },\n ],\n response_model: {\n name: \"BatchEvaluationResult\",\n schema: BatchEvaluationSchema,\n },\n },\n });\n\n try {\n const results = response.data as unknown as z.infer<\n typeof BatchEvaluationSchema\n >;\n return results.map((r) => ({\n evaluation: r.evaluation,\n reasoning: r.reasoning,\n }));\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return questions.map(() => ({\n evaluation: \"INVALID\" as const,\n reasoning: `Failed to get structured response: ${errorMessage}`,\n }));\n }\n }\n\n private async _evaluateWithMultipleScreenshots(options: {\n question: string;\n answer?: string;\n screenshots: Buffer[];\n systemPrompt?: string;\n agentReasoning?: string;\n }): Promise<EvaluationResult> {\n const {\n question,\n answer,\n screenshots,\n agentReasoning,\n systemPrompt = `You are an expert evaluator that confidently returns YES or NO given a question and multiple screenshots showing the progression of a task.\n ${agentReasoning ? \"You also have access to the agent's detailed reasoning and thought process throughout the task.\" : \"\"}\n Analyze ALL screenshots to understand the complete journey. Look for evidence of task completion across all screenshots, not just the last one.\n Success criteria may appear at different points in the sequence (confirmation messages, intermediate states, etc).\n ${agentReasoning ? \"The agent's reasoning provides crucial context about what actions were attempted, what was observed, and the decision-making process. Use this alongside the visual evidence to make a comprehensive evaluation.\" : \"\"}\n Today's date is ${new Date().toLocaleDateString()}`,\n } = options;\n\n if (!question)\n throw new StagehandInvalidArgumentError(\n \"Question cannot be an empty string\",\n );\n if (!screenshots || screenshots.length === 0)\n throw new StagehandInvalidArgumentError(\n \"At least one screenshot must be provided\",\n );\n\n const llmClient = this.getClient();\n\n const imageContents = screenshots.map((s) => ({\n type: \"image_url\" as const,\n image_url: { url: `data:image/jpeg;base64,${s.toString(\"base64\")}` },\n }));\n\n const response = await llmClient.createChatCompletion<\n LLMParsedResponse<LLMResponse>\n >({\n logger: this.silentLogger,\n options: {\n messages: [\n { role: \"system\", content: systemPrompt },\n {\n role: \"user\",\n content: [\n {\n type: \"text\",\n text: agentReasoning\n ? `Question: ${question}\\n\\nAgent's reasoning and actions throughout the task:\\n${agentReasoning}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze both the agent's reasoning and all screenshots to determine if the task was completed successfully.`\n : `${question}\\n\\nI'm providing ${screenshots.length} screenshots showing the progression of the task. Please analyze all of them to determine if the task was completed successfully.`,\n },\n ...(answer\n ? [{ type: \"text\" as const, text: `the answer is ${answer}` }]\n : []),\n ...imageContents,\n ],\n },\n ],\n response_model: { name: \"EvaluationResult\", schema: EvaluationSchema },\n },\n });\n\n try {\n const result = response.data as unknown as z.infer<\n typeof EvaluationSchema\n >;\n return { evaluation: result.evaluation, reasoning: result.reasoning };\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return {\n evaluation: \"INVALID\",\n reasoning: `Failed to get structured response: ${errorMessage}`,\n } as const;\n }\n }\n}\n"]}
|
package/dist/esm/lib/version.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"version.js","sourceRoot":"","sources":["../../../lib/version.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,OAAgB,CAAC","sourcesContent":["/**\n * AUTO-GENERATED — DO NOT EDIT BY HAND\n * Run `pnpm run gen-version` to refresh.\n */\nexport const STAGEHAND_VERSION = \"3.
|
|
1
|
+
{"version":3,"file":"version.js","sourceRoot":"","sources":["../../../lib/version.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG,OAAgB,CAAC","sourcesContent":["/**\n * AUTO-GENERATED — DO NOT EDIT BY HAND\n * Run `pnpm run gen-version` to refresh.\n */\nexport const STAGEHAND_VERSION = \"3.5.0\" as const;\n"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@browserbasehq/orca",
|
|
3
|
-
"version": "3.5.0
|
|
3
|
+
"version": "3.5.1-preview.0",
|
|
4
4
|
"description": "An AI web browsing framework focused on simplicity and extensibility.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/cjs/index.js",
|
|
@@ -35,9 +35,10 @@
|
|
|
35
35
|
"build-dom-scripts": "tsx scripts/build-dom-scripts.ts",
|
|
36
36
|
"build:cjs": "tsx scripts/build-cjs.ts",
|
|
37
37
|
"build:esm": "tsx scripts/build-esm.ts",
|
|
38
|
-
"build": "pnpm --filter @browserbasehq/
|
|
38
|
+
"build": "pnpm --filter @browserbasehq/stagehand run --parallel \"/^build:(esm|cjs)$/\"",
|
|
39
|
+
"prepack": "pnpm -w --dir ../.. exec turbo run build --filter=@browserbasehq/stagehand",
|
|
39
40
|
"example": "node --import tsx -e \"const args=process.argv.slice(1).filter(a=>a!=='--'); const [p]=args; const n=(p||'example').replace(/^\\.\\//,'').replace(/\\.ts$/i,''); import('node:path').then(path=>import(new URL(path.resolve('examples', n + '.ts'), 'file:')));\" --",
|
|
40
|
-
"test": "pnpm -w --dir ../.. exec turbo run test:core test:e2e --filter=@browserbasehq/
|
|
41
|
+
"test": "pnpm -w --dir ../.. exec turbo run test:core test:e2e --filter=@browserbasehq/stagehand --",
|
|
41
42
|
"test:core": "tsx scripts/test-core.ts",
|
|
42
43
|
"test:e2e": "tsx scripts/test-e2e.ts",
|
|
43
44
|
"format": "prettier --write .",
|
|
@@ -46,10 +47,15 @@
|
|
|
46
47
|
"lint": "cd ../.. && prettier --check packages/core && cd packages/core && pnpm run eslint && pnpm run typecheck"
|
|
47
48
|
},
|
|
48
49
|
"files": [
|
|
49
|
-
"dist/esm",
|
|
50
|
-
"dist/
|
|
51
|
-
"
|
|
52
|
-
"
|
|
50
|
+
"dist/esm/index.d.ts",
|
|
51
|
+
"dist/esm/index.js",
|
|
52
|
+
"dist/esm/lib",
|
|
53
|
+
"dist/esm/package.json",
|
|
54
|
+
"dist/cjs/cli.js",
|
|
55
|
+
"dist/cjs/index.d.ts",
|
|
56
|
+
"dist/cjs/index.js",
|
|
57
|
+
"dist/cjs/lib",
|
|
58
|
+
"dist/cjs/package.json"
|
|
53
59
|
],
|
|
54
60
|
"keywords": [
|
|
55
61
|
"ai",
|
|
@@ -84,19 +90,19 @@
|
|
|
84
90
|
"@google/genai": "^1.22.0",
|
|
85
91
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
86
92
|
"ai": "^5.0.133",
|
|
87
|
-
"devtools-protocol": "^0.0.
|
|
93
|
+
"devtools-protocol": "^0.0.1642743",
|
|
88
94
|
"fetch-cookie": "^3.1.0",
|
|
89
95
|
"openai": "^4.104.0",
|
|
90
96
|
"pino": "^9.6.0",
|
|
91
97
|
"pino-pretty": "^13.0.0",
|
|
92
98
|
"uuid": "^11.1.1",
|
|
93
|
-
"ws": "^8.
|
|
99
|
+
"ws": "^8.21.0",
|
|
94
100
|
"zod-to-json-schema": "^3.25.0"
|
|
95
101
|
},
|
|
96
102
|
"optionalDependencies": {
|
|
97
103
|
"@ai-sdk/amazon-bedrock": "^3.0.73",
|
|
98
104
|
"@ai-sdk/anthropic": "^2.0.34",
|
|
99
|
-
"@ai-sdk/azure": "^2.0.
|
|
105
|
+
"@ai-sdk/azure": "^2.0.109",
|
|
100
106
|
"@ai-sdk/cerebras": "^1.0.25",
|
|
101
107
|
"@ai-sdk/deepseek": "^1.0.23",
|
|
102
108
|
"@ai-sdk/google": "^2.0.53",
|