site-agent-pro 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +689 -0
  2. package/dist/auth/credentialStore.js +62 -0
  3. package/dist/auth/inbox.js +193 -0
  4. package/dist/auth/profile.js +379 -0
  5. package/dist/auth/runner.js +1124 -0
  6. package/dist/backend/dashboardData.js +194 -0
  7. package/dist/backend/runArtifacts.js +48 -0
  8. package/dist/backend/runRepository.js +93 -0
  9. package/dist/bin.js +2 -0
  10. package/dist/cli/backfillSiteChecks.js +143 -0
  11. package/dist/cli/run.js +309 -0
  12. package/dist/cli/trade.js +69 -0
  13. package/dist/config.js +199 -0
  14. package/dist/core/agentProfiles.js +55 -0
  15. package/dist/core/aggregateReport.js +382 -0
  16. package/dist/core/audit.js +30 -0
  17. package/dist/core/customTaskSuite.js +148 -0
  18. package/dist/core/evaluator.js +217 -0
  19. package/dist/core/executor.js +788 -0
  20. package/dist/core/fallbackReport.js +335 -0
  21. package/dist/core/formHeuristics.js +411 -0
  22. package/dist/core/gameplaySummary.js +164 -0
  23. package/dist/core/interaction.js +202 -0
  24. package/dist/core/pageState.js +201 -0
  25. package/dist/core/planner.js +1669 -0
  26. package/dist/core/processSubmissionBatch.js +204 -0
  27. package/dist/core/runAuditJob.js +170 -0
  28. package/dist/core/runner.js +2352 -0
  29. package/dist/core/siteBrief.js +107 -0
  30. package/dist/core/siteChecks.js +1526 -0
  31. package/dist/core/taskDirectives.js +279 -0
  32. package/dist/core/taskHeuristics.js +263 -0
  33. package/dist/dashboard/client.js +1256 -0
  34. package/dist/dashboard/contracts.js +95 -0
  35. package/dist/dashboard/narrative.js +277 -0
  36. package/dist/dashboard/server.js +458 -0
  37. package/dist/dashboard/theme.js +888 -0
  38. package/dist/index.js +84 -0
  39. package/dist/llm/client.js +188 -0
  40. package/dist/paystack/account.js +123 -0
  41. package/dist/paystack/client.js +100 -0
  42. package/dist/paystack/index.js +13 -0
  43. package/dist/paystack/test-paystack.js +83 -0
  44. package/dist/paystack/transfer.js +138 -0
  45. package/dist/paystack/types.js +74 -0
  46. package/dist/paystack/webhook.js +121 -0
  47. package/dist/prompts/browserAgent.js +124 -0
  48. package/dist/prompts/reviewer.js +71 -0
  49. package/dist/reporting/clickReplay.js +290 -0
  50. package/dist/reporting/html.js +930 -0
  51. package/dist/reporting/markdown.js +238 -0
  52. package/dist/reporting/template.js +1141 -0
  53. package/dist/schemas/types.js +361 -0
  54. package/dist/submissions/customTasks.js +196 -0
  55. package/dist/submissions/html.js +770 -0
  56. package/dist/submissions/model.js +56 -0
  57. package/dist/submissions/publicUrl.js +76 -0
  58. package/dist/submissions/service.js +74 -0
  59. package/dist/submissions/store.js +37 -0
  60. package/dist/submissions/types.js +65 -0
  61. package/dist/trade/engine.js +241 -0
  62. package/dist/trade/evm/erc20.js +44 -0
  63. package/dist/trade/extractor.js +148 -0
  64. package/dist/trade/policy.js +35 -0
  65. package/dist/trade/session.js +31 -0
  66. package/dist/trade/types.js +107 -0
  67. package/dist/trade/validator.js +148 -0
  68. package/dist/utils/files.js +59 -0
  69. package/dist/utils/log.js +24 -0
  70. package/dist/utils/playwrightCompat.js +14 -0
  71. package/dist/utils/time.js +3 -0
  72. package/dist/wallet/provider.js +345 -0
  73. package/dist/wallet/relay.js +129 -0
  74. package/dist/wallet/wallet.js +178 -0
  75. package/docs/01-installation.md +134 -0
  76. package/docs/02-running-your-first-audit.md +136 -0
  77. package/docs/03-configuration.md +233 -0
  78. package/docs/04-how-the-agent-thinks.md +41 -0
  79. package/docs/05-extending-personas-and-tasks.md +42 -0
  80. package/docs/06-hardening-for-production.md +92 -0
  81. package/package.json +60 -0
@@ -0,0 +1,2352 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import { chromium, devices } from "playwright";
4
+ import { captureInboxCheckpoint, waitForVerificationEmail } from "../auth/inbox.js";
5
+ import { getMailboxConfig, getPreferredAccessIdentity, isAuthBootstrapConfigured } from "../auth/profile.js";
6
+ import { detectAuthWall, runAuthFlowInContext } from "../auth/runner.js";
7
+ import { clampRunDurationMs, config } from "../config.js";
8
+ import { isWalletConfigured, getWalletConfig, getWalletChainId, getMetaMaskExtensionPath, getMetaMaskUserDataDir } from "../wallet/wallet.js";
9
+ import { buildWeb3InjectionScript } from "../wallet/provider.js";
10
+ import { startSigningRelay } from "../wallet/relay.js";
11
+ import { runAccessibilityAudit } from "./audit.js";
12
+ import { buildLooseAccessiblePattern, prepareLocatorForInteraction } from "./interaction.js";
13
+ import { capturePageState } from "./pageState.js";
14
+ import { decideNextAction } from "./planner.js";
15
+ import { executeDecision, prepareClickDecision } from "./executor.js";
16
+ import { isGameplayTask, summarizeGameplayHistory } from "./gameplaySummary.js";
17
+ import { deriveSiteBrief } from "./siteBrief.js";
18
+ import { runSiteChecks } from "./siteChecks.js";
19
+ import { parseTaskDirectives } from "./taskDirectives.js";
20
+ import { classifyTaskText, hasTaskKeywordEvidence, isRegressiveTaskControlLabel, textHasInstructionCue, textHasOutcomeCue, textHasPlayActionCue } from "./taskHeuristics.js";
21
+ import { ensureDir, writeJson } from "../utils/files.js";
22
+ import { debug, warn } from "../utils/log.js";
23
+ import { installPlaywrightPageCompat } from "../utils/playwrightCompat.js";
24
+ import { sleep } from "../utils/time.js";
25
+ import { extractSellInstruction, taskLooksLikeTrade } from "../trade/extractor.js";
26
+ import { executeTradeInstruction } from "../trade/engine.js";
27
+ import { getTradePolicy, buildDefaultTradeRunOptions } from "../trade/policy.js";
28
+ import { sendMoney, resolveBankCode } from "../paystack/index.js";
29
+ const INTERSTITIAL_PATTERNS = [
30
+ /just a moment/i,
31
+ /verification successful/i,
32
+ /checking your browser/i,
33
+ /cloudflare/i,
34
+ /security check/i,
35
+ /access denied/i,
36
+ /captcha/i,
37
+ /human verification/i
38
+ ];
39
+ const TIME_LIMIT_PATTERNS = [
40
+ /remaining session time/i,
41
+ /execution budget/i,
42
+ /time limit/i,
43
+ /ran out of time/i,
44
+ /too short for another meaningful interaction/i
45
+ ];
46
+ const AUTO_AUTH_SKIP_TASK_PATTERNS = [
47
+ /before sign(?:-| )?up/i,
48
+ /without (?:creating an account|signing up|registering|logging in|signing in)/i,
49
+ /reach (?:the )?(?:sign ?up|signup|register|registration|login|sign in|sign-in) page/i
50
+ ];
51
+ const STAGNATION_WINDOW = 5;
52
+ const ACCOUNT_CREATION_TASK_PATTERN = /\b(?:sign ?up|signup|register|create(?:\s+your)?\s+(?:account|profile)|create\s+my\s+account|join)\b/i;
53
+ const ACCOUNT_CREATION_SUBMIT_PATTERN = /\b(?:submit|register|sign ?up|create\b.*\baccount|join)\b/i;
54
+ const ACCOUNT_CREATION_SUCCESS_PATTERN = /\b(?:registered users?|add another registration|account created|account ready|welcome|dashboard|profile active|view live market screen)\b/i;
55
+ const ACCOUNT_CREATION_LOCAL_ONLY_PATTERN = /\b(?:browser fallback|browser storage only|using browser storage only|local server is unavailable|api is unavailable)\b/i;
56
+ const ACCOUNT_CREATION_FORM_STILL_VISIBLE_PATTERN = /\b(?:first\s*name|last\s*name|email\s*address|confirm\s*password|phone\s*number|date\s*of\s*birth)\b.*\b(?:create\s*(?:my\s*)?account|sign\s*up|register)\b/is;
57
+ const ACCOUNT_CREATION_VERIFICATION_PENDING_PATTERN = /\b(?:please\s+verify|verify\s+your\s+email|check\s+your\s+email|send\s+otp|enter\s+(?:the\s+)?(?:code|otp)|verification\s+code)\b/i;
58
+ const OTP_TRIGGER_CLICK_PATTERN = /\b(?:send\s*(?:otp|code)|get\s*(?:otp|code)|verify\s*email|request\s*(?:otp|code))\b/i;
59
+ const OTP_FIELD_PATTERN = /\b(?:otp|one[- ]?time|verification|passcode|security\s*code|auth\s*code|enter\s*code)\b/i;
60
+ const WALLET_PENDING_PATTERN = /\b(?:requesting\s+account|check\s+(?:your\s+)?wallet|confirm\s+(?:in|with)\s+metamask|confirm\s+in\s+your\s+wallet|open\s+metamask|awaiting\s+wallet|signature\s+request|approval\s+pending|waiting\s+for\s+wallet)\b/i;
61
+ const WALLET_CONNECT_TARGET_PATTERN = /\bconnect\b.*\bwallet\b|\bwallet\b.*\bconnect\b/i;
62
+ const WALLET_CONNECT_SURFACE_PATTERN = /\b(?:connect(?:\s+your)?\s+wallet|select(?:\s+a)?\s+wallet|choose(?:\s+a)?\s+wallet|walletconnect|connect with metamask|wallet connection)\b/i;
63
+ const WALLET_CONNECT_PROVIDER_PATTERN = /\b(?:wallet|metamask|walletconnect|coinbase|phantom|rabby|rainbow|web3|ethereum)\b/i;
64
+ const OTP_VERIFY_SUBMIT_LABELS = [
65
+ "verify",
66
+ "confirm",
67
+ "continue",
68
+ "submit",
69
+ "finish",
70
+ "complete",
71
+ "activate",
72
+ "create my account",
73
+ "create account",
74
+ "register",
75
+ "sign up",
76
+ "signup"
77
+ ];
78
+ const CONTINUATION_FLOW_PATTERN = /\b(?:wallet|metamask|popup|pop up|modal|dialog|signature|sign(?:ing)?|approve|approval|accept|confirm|execute|transaction|buy|sell|swap|deposit|withdraw|send|transfer|amount|qty|quantity|price|token|network|chain|checkout|cart|order|review|payment)\b/i;
79
+ const DIRECT_CONTINUATION_VERB_PATTERN = /^(?:accept|approve|confirm|sign|execute|submit|continue|next|finish|complete|review)\b/i;
80
+ function shouldUseServerlessChromium() {
81
+ const useServerless = process.env.USE_SERVERLESS_CHROMIUM === "true";
82
+ debug("chromium mode", {
83
+ useServerless,
84
+ USE_SERVERLESS_CHROMIUM: process.env.USE_SERVERLESS_CHROMIUM,
85
+ RENDER: process.env.RENDER
86
+ });
87
+ return useServerless;
88
+ }
89
+ async function resolveLaunchOptions(options) {
90
+ const explicitExecutablePath = process.env.PLAYWRIGHT_EXECUTABLE_PATH?.trim();
91
+ const metamaskPath = getMetaMaskExtensionPath();
92
+ // When MetaMask extension mode is requested, force headed + extension args
93
+ if (metamaskPath) {
94
+ debug("launch options: MetaMask extension mode (headed)", { metamaskPath });
95
+ const baseArgs = [
96
+ `--disable-extensions-except=${metamaskPath}`,
97
+ `--load-extension=${metamaskPath}`
98
+ ];
99
+ return {
100
+ headless: false,
101
+ args: baseArgs,
102
+ ...(explicitExecutablePath ? { executablePath: explicitExecutablePath } : {})
103
+ };
104
+ }
105
+ if (explicitExecutablePath) {
106
+ debug("launch options: using explicit executable path");
107
+ return {
108
+ executablePath: explicitExecutablePath,
109
+ headless: options.headed ? false : config.headless
110
+ };
111
+ }
112
+ if (!shouldUseServerlessChromium()) {
113
+ debug("launch options: using default Playwright browser");
114
+ return {
115
+ headless: options.headed ? false : config.headless
116
+ };
117
+ }
118
+ const moduleName = "@sparticuz/chromium";
119
+ const imported = (await import(moduleName));
120
+ const serverlessChromium = imported.default ?? imported;
121
+ const location = process.env.SPARTICUZ_CHROMIUM_LOCATION?.trim() || undefined;
122
+ if ("setGraphicsMode" in serverlessChromium) {
123
+ serverlessChromium.setGraphicsMode = false;
124
+ }
125
+ debug("launch options: using serverless chromium", {
126
+ location: location ?? null
127
+ });
128
+ return {
129
+ args: serverlessChromium.args,
130
+ executablePath: await serverlessChromium.executablePath(location),
131
+ headless: true
132
+ };
133
+ }
134
+ function cleanErrorMessage(error) {
135
+ const message = error instanceof Error ? error.message : String(error);
136
+ const withoutAnsi = message.replace(/\u001b\[[0-9;]*m/g, "");
137
+ return withoutAnsi.replace(/\s+/g, " ").trim() || "Unknown error";
138
+ }
139
+ function resolveLocalPath(filePath) {
140
+ return path.isAbsolute(filePath) ? filePath : path.resolve(process.cwd(), filePath);
141
+ }
142
+ function summarizeLocalPath(filePath) {
143
+ const relativePath = path.relative(process.cwd(), filePath);
144
+ return relativePath && relativePath !== "" && !relativePath.startsWith("..")
145
+ ? relativePath
146
+ : path.basename(filePath);
147
+ }
148
+ function taskAllowsAutoAuth(taskGoal) {
149
+ return !AUTO_AUTH_SKIP_TASK_PATTERNS.some((pattern) => pattern.test(taskGoal));
150
+ }
151
+ function normalizeVisibleText(value) {
152
+ return value.replace(/\s+/g, " ").trim();
153
+ }
154
+ function countWords(value) {
155
+ return normalizeVisibleText(value)
156
+ .split(/\s+/)
157
+ .filter(Boolean).length;
158
+ }
159
+ function extractCompactAmountTaskValue(taskGoal) {
160
+ const match = normalizeVisibleText(taskGoal).match(/^(?:buy|sell|swap)\s+([0-9]+(?:\.[0-9]+)?)$/i);
161
+ return match?.[1] ?? null;
162
+ }
163
+ function compactTargetsMatch(left, right) {
164
+ const normalizedLeft = normalizeVisibleText(left).toLowerCase().replace(/\s*\/\s*/g, " / ");
165
+ const normalizedRight = normalizeVisibleText(right).toLowerCase().replace(/\s*\/\s*/g, " / ");
166
+ if (!normalizedLeft || !normalizedRight) {
167
+ return false;
168
+ }
169
+ if (normalizedLeft === normalizedRight || normalizedLeft.includes(normalizedRight) || normalizedRight.includes(normalizedLeft)) {
170
+ return true;
171
+ }
172
+ const leftWords = normalizedLeft.split(/\s+/).filter(Boolean);
173
+ const rightWords = normalizedRight.split(/\s+/).filter(Boolean);
174
+ const sharedWordCount = leftWords.filter((word) => rightWords.includes(word)).length;
175
+ return sharedWordCount >= Math.min(leftWords.length, rightWords.length) || sharedWordCount >= 2;
176
+ }
177
+ function taskShouldEndAfterSuccessfulCompactStep(taskGoal, history) {
178
+ const amountValue = extractCompactAmountTaskValue(taskGoal);
179
+ if (amountValue) {
180
+ return history.some((entry) => entry.result.success &&
181
+ entry.decision.action === "type" &&
182
+ compactTargetsMatch(entry.decision.target || entry.decision.instructionQuote || "", "amount"));
183
+ }
184
+ const directives = parseTaskDirectives(taskGoal).filter((directive) => directive.action !== "stop");
185
+ if (directives.length !== 1) {
186
+ return false;
187
+ }
188
+ const [directive] = directives;
189
+ if (!directive || directive.action === "unstructured" || directive.action === "fill_visible_form") {
190
+ return false;
191
+ }
192
+ if (directive.action === "click") {
193
+ return history.some((entry) => entry.result.success &&
194
+ entry.decision.action === "click" &&
195
+ compactTargetsMatch(entry.decision.target || entry.decision.instructionQuote || "", directive.target));
196
+ }
197
+ if (directive.action === "type_field") {
198
+ return history.some((entry) => entry.result.success &&
199
+ entry.decision.action === "type" &&
200
+ (compactTargetsMatch(entry.decision.target || entry.decision.instructionQuote || "", directive.target) ||
201
+ (directive.value !== undefined && normalizeVisibleText(entry.decision.text) === normalizeVisibleText(directive.value))));
202
+ }
203
+ if (directive.action === "submit") {
204
+ return history.some((entry) => entry.result.success &&
205
+ entry.decision.action === "click" &&
206
+ /^(?:submit|continue|next|finish|complete|done|send|verify|sign ?up|register|join|execute real transaction)$/i.test(normalizeVisibleText(entry.decision.target || entry.decision.instructionQuote || "")));
207
+ }
208
+ return false;
209
+ }
210
+ function taskLooksLikeCompactActionFlowStep(taskGoal) {
211
+ const normalized = normalizeVisibleText(taskGoal);
212
+ if (!normalized || countWords(normalized) > 8) {
213
+ return false;
214
+ }
215
+ if (extractCompactAmountTaskValue(normalized)) {
216
+ return true;
217
+ }
218
+ const directives = parseTaskDirectives(normalized).filter((directive) => directive.action !== "stop");
219
+ if (directives.length === 0 || directives.length > 2) {
220
+ return false;
221
+ }
222
+ return directives.every((directive) => directive.action !== "unstructured");
223
+ }
224
+ function taskNeedsContinuousContext(taskGoal) {
225
+ const normalized = normalizeVisibleText(taskGoal).toLowerCase();
226
+ return DIRECT_CONTINUATION_VERB_PATTERN.test(normalized) || CONTINUATION_FLOW_PATTERN.test(normalized);
227
+ }
228
+ function directiveTargetMatches(directive, pattern) {
229
+ return Boolean(directive && "target" in directive && pattern.test(normalizeVisibleText(directive.target || directive.raw)));
230
+ }
231
+ function historyShowsMeaningfulProgress(history) {
232
+ return history.some((entry) => entry.result.success &&
233
+ (entry.result.stateChanged !== false ||
234
+ Boolean(entry.result.destinationUrl) ||
235
+ Boolean(entry.result.destinationTitle) ||
236
+ Boolean(entry.result.visibleTextSnippet)));
237
+ }
238
+ function shouldContinueFromCurrentPage(args) {
239
+ if (!args.previousTask || !historyShowsMeaningfulProgress(args.previousHistory)) {
240
+ return false;
241
+ }
242
+ if (!taskLooksLikeCompactActionFlowStep(args.previousTask.goal) ||
243
+ !taskLooksLikeCompactActionFlowStep(args.currentTask.goal)) {
244
+ return false;
245
+ }
246
+ if (taskNeedsContinuousContext(args.previousTask.goal) || taskNeedsContinuousContext(args.currentTask.goal)) {
247
+ return true;
248
+ }
249
+ const previousDirectives = parseTaskDirectives(args.previousTask.goal);
250
+ const currentDirectives = parseTaskDirectives(args.currentTask.goal);
251
+ const previousLastDirective = previousDirectives[previousDirectives.length - 1];
252
+ const currentFirstDirective = currentDirectives[0];
253
+ if (!previousLastDirective || !currentFirstDirective) {
254
+ return false;
255
+ }
256
+ const previousTarget = normalizeVisibleText(previousLastDirective.target || "");
257
+ const currentTarget = normalizeVisibleText(currentFirstDirective.target || "");
258
+ if (previousLastDirective.action === "type_field" && currentFirstDirective.action === "click") {
259
+ return directiveTargetMatches(currentFirstDirective, /\b(?:continue|next|proceed|payment|review|confirm)\b/i);
260
+ }
261
+ if (previousLastDirective.action === "click" && currentFirstDirective.action === "type_field") {
262
+ return (/\b(?:continue|next|proceed|payment|review)\b/i.test(previousTarget) &&
263
+ directiveTargetMatches(currentFirstDirective, /\b(?:bank|account|receiving|recipient|wallet|address|amount)\b/i));
264
+ }
265
+ if (previousLastDirective.action !== "click" || currentFirstDirective.action !== "click") {
266
+ return false;
267
+ }
268
+ return ((/\b(?:connect|wallet)\b/i.test(previousTarget) &&
269
+ /\b(?:wallet|buy|sell|swap|review|confirm|approve|sign)\b/i.test(currentTarget)) ||
270
+ (/\b(?:continue|next|proceed|payment|review)\b/i.test(previousTarget) &&
271
+ /\b(?:paid|payment|continue|next|proceed|confirm|done)\b/i.test(currentTarget)));
272
+ }
273
+ function findInteractiveLabelByTargetId(pageState, targetId) {
274
+ const normalizedTargetId = normalizeVisibleText(targetId);
275
+ if (!normalizedTargetId) {
276
+ return "";
277
+ }
278
+ return normalizeVisibleText(pageState.interactive.find((item) => item.agentId === normalizedTargetId)?.text || "");
279
+ }
280
+ function textLooksLikeWalletConnect(source) {
281
+ const normalizedSource = normalizeVisibleText(source);
282
+ if (!normalizedSource) {
283
+ return false;
284
+ }
285
+ if (WALLET_CONNECT_TARGET_PATTERN.test(normalizedSource) || WALLET_CONNECT_SURFACE_PATTERN.test(normalizedSource)) {
286
+ return true;
287
+ }
288
+ return /\bconnect\b/i.test(normalizedSource) && WALLET_CONNECT_PROVIDER_PATTERN.test(normalizedSource);
289
+ }
290
+ async function readLocatorWalletConnectContext(locator) {
291
+ const context = await locator
292
+ .evaluate((element) => {
293
+ const htmlElement = element;
294
+ const closestContainer = htmlElement.closest("dialog, [role='dialog'], form, section, article, main, nav, aside, header, div") || htmlElement.parentElement;
295
+ const containerText = closestContainer?.textContent || "";
296
+ const href = element instanceof HTMLAnchorElement ? element.href : "";
297
+ const className = typeof htmlElement.className === "string" ? htmlElement.className : "";
298
+ return [
299
+ htmlElement.innerText || htmlElement.textContent || "",
300
+ element.getAttribute("aria-label") || "",
301
+ element.getAttribute("title") || "",
302
+ element.getAttribute("name") || "",
303
+ element.getAttribute("id") || "",
304
+ element.getAttribute("data-testid") || "",
305
+ element.getAttribute("data-test") || "",
306
+ className,
307
+ href,
308
+ containerText.slice(0, 300)
309
+ ]
310
+ .join(" ")
311
+ .trim();
312
+ })
313
+ .catch(() => "");
314
+ return normalizeVisibleText(context);
315
+ }
316
+ async function decisionLooksLikeWalletConnect(args) {
317
+ if (args.decision.action !== "click") {
318
+ return false;
319
+ }
320
+ const labels = [
321
+ normalizeVisibleText(args.decision.target || ""),
322
+ normalizeVisibleText(args.decision.instructionQuote || ""),
323
+ findInteractiveLabelByTargetId(args.pageState, args.decision.target_id || "")
324
+ ];
325
+ if (args.locator) {
326
+ labels.push(await readLocatorWalletConnectContext(args.locator));
327
+ }
328
+ return labels.some((label) => textLooksLikeWalletConnect(label));
329
+ }
330
+ const METAMASK_EXTENSION_URL_PATTERN = "chrome-extension://";
331
+ const METAMASK_MAX_ATTEMPTS = 60;
332
+ const METAMASK_WAIT_MS = 1000;
333
+ const METAMASK_REPEAT_CLICK_COOLDOWN_MS = 2500;
334
+ const METAMASK_ACTIONS = {
335
+ next: {
336
+ key: "next",
337
+ label: "Next",
338
+ selectors: ['[data-testid="page-container-footer-next"]', 'button[data-testid*="next"]'],
339
+ buttonNames: [/^next$/i, /^continue$/i, /^review$/i]
340
+ },
341
+ connect: {
342
+ key: "connect",
343
+ label: "Connect",
344
+ selectors: ['button[data-testid*="connect"]', '[role="button"][data-testid*="connect"]'],
345
+ buttonNames: [/^connect$/i, /^connect wallet$/i]
346
+ },
347
+ approve: {
348
+ key: "approve",
349
+ label: "Approve",
350
+ selectors: [
351
+ 'button[data-testid*="approve"]',
352
+ '[role="button"][data-testid*="approve"]',
353
+ 'button[data-testid*="allow"]'
354
+ ],
355
+ buttonNames: [/^approve$/i, /^allow$/i, /^approve and continue$/i]
356
+ },
357
+ confirm: {
358
+ key: "confirm",
359
+ label: "Confirm",
360
+ selectors: [
361
+ '[data-testid="confirm-footer-button"]',
362
+ '[data-testid="confirm-btn"]',
363
+ '[data-testid="confirmation-submit-button"]',
364
+ 'button[data-testid*="confirm"]',
365
+ 'button[data-testid*="submit"]'
366
+ ],
367
+ buttonNames: [/^confirm$/i, /^submit$/i]
368
+ },
369
+ sign: {
370
+ key: "sign",
371
+ label: "Sign",
372
+ selectors: [
373
+ '[data-testid="signature-request-footer__sign-button"]',
374
+ 'button[data-testid*="signature"][data-testid*="sign"]',
375
+ 'button[data-testid*="sign"]'
376
+ ],
377
+ buttonNames: [/^sign$/i, /^sign message$/i, /^sign typed data$/i]
378
+ },
379
+ switchNetwork: {
380
+ key: "switchNetwork",
381
+ label: "Switch network",
382
+ selectors: ['button[data-testid*="switch-network"]', '[role="button"][data-testid*="switch-network"]'],
383
+ buttonNames: [/^switch network$/i, /^switch to .+$/i]
384
+ },
385
+ addNetwork: {
386
+ key: "addNetwork",
387
+ label: "Add network",
388
+ selectors: ['button[data-testid*="add-network"]', '[role="button"][data-testid*="add-network"]'],
389
+ buttonNames: [/^add network$/i, /^add suggested network$/i]
390
+ },
391
+ gotIt: {
392
+ key: "gotIt",
393
+ label: "Got it",
394
+ selectors: [
395
+ 'button[data-testid*="got-it"]',
396
+ '[role="button"][data-testid*="got-it"]',
397
+ 'button[data-testid*="done"]'
398
+ ],
399
+ buttonNames: [/^got it$/i, /^done$/i, /^okay$/i, /^ok$/i]
400
+ }
401
+ };
402
+ const METAMASK_ACTION_PLANS = {
403
+ connect: ["next", "connect", "approve", "confirm", "gotIt"],
404
+ signature: ["sign", "confirm", "approve", "next", "gotIt"],
405
+ transaction: ["next", "confirm", "approve", "gotIt"],
406
+ network: ["approve", "switchNetwork", "addNetwork", "confirm", "next", "gotIt"],
407
+ approval: ["approve", "confirm", "next", "gotIt"],
408
+ generic: ["next", "connect", "approve", "confirm", "sign", "switchNetwork", "addNetwork", "gotIt"]
409
+ };
410
+ function classifyMetaMaskPopupKind(source) {
411
+ if (/\b(signature request|sign message|sign typed data|typed data|review signature|signature)\b/i.test(source)) {
412
+ return "signature";
413
+ }
414
+ if (/\b(confirm transaction|transaction request|gas fee|max fee|network fee|nonce|spending cap)\b/i.test(source)) {
415
+ return "transaction";
416
+ }
417
+ if (/\b(add network|switch network|allow this site to switch|allow this site to add)\b/i.test(source)) {
418
+ return "network";
419
+ }
420
+ if (/\b(connect with metamask|connect this account|select an account|connect wallet|connect)\b/i.test(source)) {
421
+ return "connect";
422
+ }
423
+ if (/\b(permission|permissions request|approve|allow)\b/i.test(source)) {
424
+ return "approval";
425
+ }
426
+ return "generic";
427
+ }
428
+ async function locatorIsInteractable(locator) {
429
+ try {
430
+ return (await locator.isVisible({ timeout: 300 })) && (await locator.isEnabled({ timeout: 300 }));
431
+ }
432
+ catch {
433
+ return false;
434
+ }
435
+ }
436
+ async function readLocatorLabel(locator) {
437
+ const label = await locator
438
+ .evaluate((element) => {
439
+ const htmlElement = element;
440
+ const text = htmlElement.innerText || htmlElement.textContent || "";
441
+ const ariaLabel = element.getAttribute("aria-label") || "";
442
+ const inputValue = element instanceof HTMLInputElement ? element.value : "";
443
+ return (ariaLabel || inputValue || text).trim();
444
+ })
445
+ .catch(() => "");
446
+ return normalizeVisibleText(label);
447
+ }
448
+ async function readMetaMaskPopupState(popupPage) {
449
+ const url = popupPage.url();
450
+ const title = normalizeVisibleText(await popupPage.title().catch(() => "MetaMask"));
451
+ const bodyText = normalizeVisibleText(await popupPage.locator("body").innerText().catch(() => ""));
452
+ const source = `${url} ${title} ${bodyText}`;
453
+ return {
454
+ url,
455
+ title,
456
+ bodyText,
457
+ kind: classifyMetaMaskPopupKind(source),
458
+ fingerprint: normalizeVisibleText(source).toLowerCase().slice(0, 600)
459
+ };
460
+ }
461
+ async function isMetaMaskUnlockScreen(popupPage, state) {
462
+ if (!/\bunlock\b/i.test(`${state.title} ${state.bodyText}`)) {
463
+ return false;
464
+ }
465
+ const passwordField = popupPage.locator('input[type="password"]').first();
466
+ const unlockButton = popupPage.getByRole("button", { name: /^unlock$/i }).first();
467
+ return (await locatorIsInteractable(passwordField)) && (await locatorIsInteractable(unlockButton));
468
+ }
469
+ async function advanceMetaMaskReview(popupPage) {
470
+ let progressed = false;
471
+ const scrollSelectors = [
472
+ '[data-testid="page-scroll-down"]',
473
+ '[data-testid*="scroll-down"]',
474
+ '[data-testid*="scroll-button"]'
475
+ ];
476
+ for (const selector of scrollSelectors) {
477
+ const scrollButton = popupPage.locator(selector).first();
478
+ try {
479
+ if (await scrollButton.isVisible({ timeout: 250 })) {
480
+ await scrollButton.click({ timeout: 1000 });
481
+ progressed = true;
482
+ await popupPage.waitForTimeout(300);
483
+ }
484
+ }
485
+ catch {
486
+ // The scroll affordance is optional across MetaMask screens.
487
+ }
488
+ }
489
+ const scrolledProgrammatically = await popupPage
490
+ .evaluate(() => {
491
+ const candidates = Array.from(document.querySelectorAll("main, section, article, div"))
492
+ .filter((element) => {
493
+ const style = window.getComputedStyle(element);
494
+ return /(auto|scroll)/i.test(style.overflowY) && element.scrollHeight > element.clientHeight + 24;
495
+ })
496
+ .sort((left, right) => right.scrollHeight - right.clientHeight - (left.scrollHeight - left.clientHeight));
497
+ const scrollingRoot = document.scrollingElement instanceof HTMLElement ? document.scrollingElement : document.documentElement;
498
+ const target = candidates[0] ?? scrollingRoot;
499
+ if (!target) {
500
+ return false;
501
+ }
502
+ const before = target.scrollTop;
503
+ target.scrollTop = target.scrollHeight;
504
+ return target.scrollTop > before;
505
+ })
506
+ .catch(() => false);
507
+ if (scrolledProgrammatically) {
508
+ progressed = true;
509
+ await popupPage.waitForTimeout(300);
510
+ }
511
+ return progressed;
512
+ }
513
+ async function resolveMetaMaskActionLocator(popupPage, definition) {
514
+ for (const selector of definition.selectors) {
515
+ const locator = popupPage.locator(selector).first();
516
+ if (await locatorIsInteractable(locator)) {
517
+ return locator;
518
+ }
519
+ }
520
+ for (const pattern of definition.buttonNames) {
521
+ const locator = popupPage.getByRole("button", { name: pattern }).first();
522
+ if (await locatorIsInteractable(locator)) {
523
+ return locator;
524
+ }
525
+ }
526
+ for (const pattern of definition.buttonNames) {
527
+ const locator = popupPage
528
+ .locator('button, [role="button"], input[type="button"], input[type="submit"]', { hasText: pattern })
529
+ .first();
530
+ if (await locatorIsInteractable(locator)) {
531
+ return locator;
532
+ }
533
+ }
534
+ return null;
535
+ }
536
+ async function captureMetaMaskPopupScreenshot(popupPage, runDir, suffix) {
537
+ const filename = `metamask-${Date.now()}-${suffix}.png`;
538
+ const targetPath = path.join(runDir, filename);
539
+ try {
540
+ await popupPage.screenshot({ path: targetPath, animations: "disabled" });
541
+ return filename;
542
+ }
543
+ catch {
544
+ return null;
545
+ }
546
+ }
547
+ async function clickMetaMaskAction(args) {
548
+ const actualLabel = (await readLocatorLabel(args.locator)) || args.definition.label;
549
+ const interactionTime = new Date().toISOString();
550
+ const beforeScreenshot = await captureMetaMaskPopupScreenshot(args.popupPage, args.runDir, "before");
551
+ await args.locator.scrollIntoViewIfNeeded().catch(() => undefined);
552
+ await args.locator.click({ timeout: 3000 });
553
+ await args.popupPage.waitForTimeout(METAMASK_WAIT_MS);
554
+ const afterScreenshot = !args.popupPage.isClosed()
555
+ ? await captureMetaMaskPopupScreenshot(args.popupPage, args.runDir, "after")
556
+ : null;
557
+ args.runtimeState.lastActionAt = Date.now();
558
+ args.rawEvents.push({
559
+ type: "metamask_popup_action",
560
+ time: interactionTime,
561
+ action: args.definition.key,
562
+ label: actualLabel,
563
+ popupKind: args.state.kind,
564
+ url: args.state.url,
565
+ note: `Auto-clicked '${actualLabel}' in MetaMask ${args.state.kind} flow.`,
566
+ beforeScreenshot,
567
+ afterScreenshot
568
+ });
569
+ const taskRef = args.getCurrentTaskRef();
570
+ if (!taskRef) {
571
+ return;
572
+ }
573
+ taskRef.step += 1;
574
+ const activeStep = taskRef.step;
575
+ taskRef.history.push({
576
+ time: interactionTime,
577
+ task: taskRef.name,
578
+ step: activeStep,
579
+ url: args.state.url,
580
+ title: args.state.title,
581
+ decision: {
582
+ thought: `Automatically approving MetaMask ${args.state.kind} request: ${actualLabel}`,
583
+ stepNumber: activeStep,
584
+ instructionQuote: "MetaMask Approval",
585
+ action: "click",
586
+ target: actualLabel,
587
+ target_id: "",
588
+ text: "",
589
+ expectation: "The MetaMask request should be approved and the popup should close or advance.",
590
+ friction: "none"
591
+ },
592
+ result: {
593
+ success: true,
594
+ note: `Auto-clicked '${actualLabel}' in MetaMask popup.`,
595
+ stateChanged: true,
596
+ beforeScreenshotPath: beforeScreenshot ?? undefined,
597
+ afterScreenshotPath: afterScreenshot ?? undefined
598
+ }
599
+ });
600
+ }
601
+ async function handleMetaMaskPopup(args) {
602
+ try {
603
+ debug("Popup detected, waiting for URL...");
604
+ await args.popupPage
605
+ .waitForURL((url) => url.toString().includes(METAMASK_EXTENSION_URL_PATTERN), { timeout: 3000 })
606
+ .catch(() => undefined);
607
+ const popupUrl = args.popupPage.url();
608
+ if (!popupUrl.includes(METAMASK_EXTENSION_URL_PATTERN)) {
609
+ debug("Bailing from popup handler: not an extension URL", { url: popupUrl });
610
+ return;
611
+ }
612
+ await args.popupPage.waitForLoadState("domcontentloaded").catch(() => undefined);
613
+ await args.popupPage.waitForTimeout(METAMASK_WAIT_MS);
614
+ args.runtimeState.activePopupCount += 1;
615
+ args.runtimeState.lastDetectedAt = Date.now();
616
+ args.popupPage.once("close", () => {
617
+ args.runtimeState.activePopupCount = Math.max(0, args.runtimeState.activePopupCount - 1);
618
+ args.runtimeState.lastClosedAt = Date.now();
619
+ });
620
+ let attempts = 0;
621
+ let idleAttempts = 0;
622
+ let lastClickedFingerprint = "";
623
+ let lastClickAt = 0;
624
+ let lastState = await readMetaMaskPopupState(args.popupPage);
625
+ args.rawEvents.push({
626
+ type: "metamask_popup_detected",
627
+ time: new Date().toISOString(),
628
+ url: lastState.url,
629
+ title: lastState.title,
630
+ popupKind: lastState.kind,
631
+ note: `MetaMask extension popup detected for a ${lastState.kind} flow.`
632
+ });
633
+ debug("MetaMask popup detected", { url: lastState.url, kind: lastState.kind, title: lastState.title });
634
+ while (!args.popupPage.isClosed() && attempts < METAMASK_MAX_ATTEMPTS) {
635
+ attempts++;
636
+ lastState = await readMetaMaskPopupState(args.popupPage);
637
+ if (await isMetaMaskUnlockScreen(args.popupPage, lastState)) {
638
+ args.rawEvents.push({
639
+ type: "metamask_popup_blocked",
640
+ time: new Date().toISOString(),
641
+ url: lastState.url,
642
+ title: lastState.title,
643
+ popupKind: lastState.kind,
644
+ note: "MetaMask is locked. Unlock the extension profile before running signing or confirmation flows."
645
+ });
646
+ return;
647
+ }
648
+ if (lastState.kind === "signature") {
649
+ await advanceMetaMaskReview(args.popupPage).catch(() => undefined);
650
+ }
651
+ const plan = METAMASK_ACTION_PLANS[lastState.kind] ?? METAMASK_ACTION_PLANS.generic;
652
+ let clickedAction = false;
653
+ for (const actionKey of plan) {
654
+ const definition = METAMASK_ACTIONS[actionKey];
655
+ const locator = await resolveMetaMaskActionLocator(args.popupPage, definition);
656
+ if (!locator) {
657
+ continue;
658
+ }
659
+ const clickFingerprint = `${lastState.fingerprint}|${definition.key}`;
660
+ const now = Date.now();
661
+ if (clickFingerprint === lastClickedFingerprint && now - lastClickAt < METAMASK_REPEAT_CLICK_COOLDOWN_MS) {
662
+ continue;
663
+ }
664
+ await clickMetaMaskAction({
665
+ popupPage: args.popupPage,
666
+ state: lastState,
667
+ definition,
668
+ locator,
669
+ runDir: args.runDir,
670
+ rawEvents: args.rawEvents,
671
+ runtimeState: args.runtimeState,
672
+ getCurrentTaskRef: args.getCurrentTaskRef
673
+ });
674
+ clickedAction = true;
675
+ idleAttempts = 0;
676
+ lastClickedFingerprint = clickFingerprint;
677
+ lastClickAt = now;
678
+ break;
679
+ }
680
+ if (clickedAction) {
681
+ continue;
682
+ }
683
+ idleAttempts++;
684
+ if (idleAttempts === 1 || idleAttempts % 10 === 0) {
685
+ args.rawEvents.push({
686
+ type: "metamask_popup_waiting",
687
+ time: new Date().toISOString(),
688
+ url: lastState.url,
689
+ title: lastState.title,
690
+ popupKind: lastState.kind,
691
+ attempts,
692
+ note: `MetaMask popup is still open without a clickable approval target. Visible content: '${lastState.bodyText.slice(0, 200) || "n/a"}'.`
693
+ });
694
+ }
695
+ await advanceMetaMaskReview(args.popupPage).catch(() => undefined);
696
+ await args.popupPage.waitForTimeout(800);
697
+ }
698
+ if (!args.popupPage.isClosed()) {
699
+ args.rawEvents.push({
700
+ type: "metamask_popup_timeout",
701
+ time: new Date().toISOString(),
702
+ url: lastState.url,
703
+ title: lastState.title,
704
+ popupKind: lastState.kind,
705
+ note: `MetaMask popup remained open after ${METAMASK_MAX_ATTEMPTS} checks without a completed approval flow.`
706
+ });
707
+ }
708
+ }
709
+ catch (error) {
710
+ args.rawEvents.push({
711
+ type: "metamask_popup_error",
712
+ time: new Date().toISOString(),
713
+ note: `MetaMask popup handler error: ${cleanErrorMessage(error)}`
714
+ });
715
+ }
716
+ }
717
+ function pageLooksLikeWalletFlowIsPending(pageState) {
718
+ return WALLET_PENDING_PATTERN.test(`${pageState.title} ${pageState.visibleText}`);
719
+ }
720
+ async function pageStillNeedsOtpVerification(page) {
721
+ const bodyText = normalizeVisibleText(await page.locator("body").innerText().catch(() => ""));
722
+ if (ACCOUNT_CREATION_VERIFICATION_PENDING_PATTERN.test(bodyText)) {
723
+ return true;
724
+ }
725
+ const hasVisibleOtpField = await page
726
+ .evaluate((otpPattern) => {
727
+ const inputs = Array.from(document.querySelectorAll("input, textarea"));
728
+ return inputs.some((input) => {
729
+ const rect = input.getBoundingClientRect();
730
+ const style = window.getComputedStyle(input);
731
+ if (rect.width <= 0 ||
732
+ rect.height <= 0 ||
733
+ style.visibility === "hidden" ||
734
+ style.display === "none" ||
735
+ input.disabled) {
736
+ return false;
737
+ }
738
+ const key = [
739
+ input.getAttribute("placeholder") || "",
740
+ input.getAttribute("name") || "",
741
+ input.id || "",
742
+ input.getAttribute("aria-label") || "",
743
+ input.getAttribute("autocomplete") || "",
744
+ input.type || ""
745
+ ]
746
+ .join(" ")
747
+ .toLowerCase();
748
+ return new RegExp(otpPattern, "i").test(key) || input.getAttribute("autocomplete") === "one-time-code";
749
+ });
750
+ }, OTP_FIELD_PATTERN.source)
751
+ .catch(() => false);
752
+ return hasVisibleOtpField;
753
+ }
754
+ async function clickFirstVisibleAction(page, labels) {
755
+ for (const label of labels) {
756
+ const pattern = buildLooseAccessiblePattern(label);
757
+ if (!pattern) {
758
+ continue;
759
+ }
760
+ const locators = [
761
+ page.getByRole("button", { name: pattern }),
762
+ page.getByRole("link", { name: pattern }),
763
+ page.getByText(pattern)
764
+ ];
765
+ for (const locator of locators) {
766
+ const candidate = locator.first();
767
+ try {
768
+ if (!(await candidate.isVisible({ timeout: 500 }))) {
769
+ continue;
770
+ }
771
+ }
772
+ catch {
773
+ continue;
774
+ }
775
+ const prepared = await prepareLocatorForInteraction(candidate).catch(() => candidate);
776
+ await prepared.click({ timeout: 5000 }).catch(async () => {
777
+ await prepared.click({ force: true, timeout: 5000 });
778
+ });
779
+ await page.waitForLoadState("domcontentloaded").catch(() => undefined);
780
+ await page.waitForTimeout(config.actionDelayMs);
781
+ return label;
782
+ }
783
+ }
784
+ return null;
785
+ }
786
+ async function attemptOtpRetrieval(args) {
787
+ const mailbox = getMailboxConfig();
788
+ if (!mailbox) {
789
+ return { filled: false, error: "IMAP mailbox is not configured (AUTH_IMAP_* settings missing)." };
790
+ }
791
+ const identity = getPreferredAccessIdentity(args.baseUrl);
792
+ const siteHost = new URL(args.baseUrl).hostname;
793
+ try {
794
+ const checkpoint = await captureInboxCheckpoint(mailbox);
795
+ args.rawEvents.push({
796
+ type: "otp_inbox_checkpoint",
797
+ time: new Date().toISOString(),
798
+ task: args.taskName,
799
+ step: args.step,
800
+ note: `Captured mailbox checkpoint at UID ${checkpoint.uidNext} to watch for verification email.`
801
+ });
802
+ const message = await waitForVerificationEmail({
803
+ mailbox,
804
+ checkpoint,
805
+ siteHost,
806
+ recipientEmail: identity.email,
807
+ timeoutMs: 60000,
808
+ pollIntervalMs: 3000
809
+ });
810
+ args.rawEvents.push({
811
+ type: "otp_email_received",
812
+ time: new Date().toISOString(),
813
+ task: args.taskName,
814
+ step: args.step,
815
+ note: `Received verification email '${message.subject}' with ${message.otpCode ? "OTP code" : "no OTP code"}.`,
816
+ hasOtpCode: Boolean(message.otpCode),
817
+ hasVerificationLink: Boolean(message.verificationLink)
818
+ });
819
+ if (!message.otpCode) {
820
+ return { filled: false, error: "Verification email arrived but no OTP code could be extracted." };
821
+ }
822
+ // Wait for OTP input fields to appear on the page (they may render after a short delay)
823
+ await args.page.waitForTimeout(1500);
824
+ // Detect OTP input fields — handles both split-digit (6 separate inputs) and single-field patterns
825
+ const otpFieldInfo = await args.page.evaluate((otpPattern) => {
826
+ const inputs = Array.from(document.querySelectorAll("input, textarea"));
827
+ const visibleInputs = inputs.filter((input) => {
828
+ const rect = input.getBoundingClientRect();
829
+ const style = window.getComputedStyle(input);
830
+ return (rect.width > 0 &&
831
+ rect.height > 0 &&
832
+ style.visibility !== "hidden" &&
833
+ style.display !== "none" &&
834
+ !input.disabled);
835
+ });
836
+ // Strategy 1: Detect split-digit OTP inputs (multiple single-char inputs with numeric inputMode)
837
+ const singleDigitInputs = visibleInputs.filter((input) => input.maxLength === 1 &&
838
+ (input.inputMode === "numeric" || input.type === "tel" || input.type === "number") &&
839
+ input.type !== "hidden");
840
+ if (singleDigitInputs.length >= 4 && singleDigitInputs.length <= 8) {
841
+ const agentIds = singleDigitInputs.map((input, index) => {
842
+ let id = input.getAttribute("data-site-agent-id");
843
+ if (!id) {
844
+ id = `temp-otp-${index}`;
845
+ input.setAttribute("data-site-agent-id", id);
846
+ }
847
+ return id;
848
+ });
849
+ return { type: "split-digit", count: singleDigitInputs.length, agentIds };
850
+ }
851
+ // Strategy 2: Detect single OTP input field by semantic attributes
852
+ for (const input of visibleInputs) {
853
+ const fieldKey = [
854
+ input.getAttribute("placeholder") || "",
855
+ input.getAttribute("name") || "",
856
+ input.id || "",
857
+ input.getAttribute("aria-label") || "",
858
+ input.getAttribute("autocomplete") || "",
859
+ input.type || ""
860
+ ].join(" ").toLowerCase();
861
+ if (new RegExp(otpPattern, "i").test(fieldKey) ||
862
+ input.getAttribute("autocomplete") === "one-time-code" ||
863
+ (input.maxLength >= 4 && input.maxLength <= 8 && input.inputMode === "numeric" && !input.value)) {
864
+ let agentId = input.getAttribute("data-site-agent-id");
865
+ if (!agentId) {
866
+ agentId = "temp-otp-single";
867
+ input.setAttribute("data-site-agent-id", agentId);
868
+ }
869
+ return { type: "single", agentId, label: fieldKey.trim().slice(0, 60) };
870
+ }
871
+ }
872
+ return null;
873
+ }, OTP_FIELD_PATTERN.source);
874
+ if (!otpFieldInfo) {
875
+ return { filled: false, otpCode: message.otpCode, error: "OTP code was extracted from email but no OTP input field was found on the page." };
876
+ }
877
+ // Fill the OTP field(s)
878
+ if (otpFieldInfo.type === "split-digit") {
879
+ // Fill each digit input individually using keyboard input to trigger React onChange
880
+ const digits = message.otpCode.split("").slice(0, otpFieldInfo.count);
881
+ for (let i = 0; i < digits.length; i++) {
882
+ const agentId = otpFieldInfo.agentIds[i];
883
+ if (!agentId) {
884
+ continue;
885
+ }
886
+ const digitLocator = args.page.locator(`[data-site-agent-id="${agentId}"]`).first();
887
+ await digitLocator.click();
888
+ await args.page.keyboard.press(digits[i]);
889
+ await args.page.waitForTimeout(100);
890
+ }
891
+ // Wait for auto-verification to complete (triggered when all digits are filled)
892
+ await args.page.waitForTimeout(2000);
893
+ args.rawEvents.push({
894
+ type: "otp_field_filled",
895
+ time: new Date().toISOString(),
896
+ task: args.taskName,
897
+ step: args.step,
898
+ note: `Filled ${digits.length} split-digit OTP inputs with code from verification email.`,
899
+ fieldType: "split-digit",
900
+ digitCount: digits.length
901
+ });
902
+ }
903
+ else {
904
+ // Single OTP input field
905
+ const locator = otpFieldInfo.agentId
906
+ ? args.page.locator(`[data-site-agent-id="${otpFieldInfo.agentId}"]`).first()
907
+ : args.page.locator("input[autocomplete='one-time-code']").first();
908
+ await locator.fill(message.otpCode);
909
+ await args.page.waitForTimeout(500);
910
+ args.rawEvents.push({
911
+ type: "otp_field_filled",
912
+ time: new Date().toISOString(),
913
+ task: args.taskName,
914
+ step: args.step,
915
+ note: `Filled OTP field with code from verification email.`,
916
+ fieldType: "single",
917
+ fieldLabel: otpFieldInfo.label
918
+ });
919
+ }
920
+ if (await pageStillNeedsOtpVerification(args.page)) {
921
+ const clickedLabel = await clickFirstVisibleAction(args.page, OTP_VERIFY_SUBMIT_LABELS);
922
+ if (clickedLabel) {
923
+ args.rawEvents.push({
924
+ type: "otp_verify_submit",
925
+ time: new Date().toISOString(),
926
+ task: args.taskName,
927
+ step: args.step,
928
+ note: `Clicked '${clickedLabel}' after filling the OTP to finalize verification.`
929
+ });
930
+ }
931
+ }
932
+ return { filled: true, otpCode: message.otpCode };
933
+ }
934
+ catch (error) {
935
+ const errorMessage = error instanceof Error ? error.message : String(error);
936
+ args.rawEvents.push({
937
+ type: "otp_retrieval_error",
938
+ time: new Date().toISOString(),
939
+ task: args.taskName,
940
+ step: args.step,
941
+ note: `OTP retrieval failed: ${errorMessage}`
942
+ });
943
+ return { filled: false, error: errorMessage };
944
+ }
945
+ }
946
+ function buildNavigationBlockedSiteBrief(args) {
947
+ return {
948
+ sitePurpose: "The submitted URL could not be loaded, so the site purpose could not be observed.",
949
+ intendedUserActions: [],
950
+ summary: `Navigation to '${args.baseUrl}' failed before any visible landing-page content could be captured.`,
951
+ evidence: [args.baseUrl, args.note]
952
+ };
953
+ }
954
+ function isBrowserErrorPage(url) {
955
+ return url === "about:blank" || url.startsWith("chrome-error://");
956
+ }
957
+ function buildInteractionScreenshotName(args) {
958
+ return `task-${String(args.taskIndex + 1).padStart(2, "0")}-step-${String(args.step).padStart(2, "0")}-${args.phase}.png`;
959
+ }
960
+ async function captureInteractionScreenshot(args) {
961
+ const fileName = buildInteractionScreenshotName({
962
+ taskIndex: args.taskIndex,
963
+ step: args.step,
964
+ phase: args.phase
965
+ });
966
+ const filePath = path.join(args.runDir, fileName);
967
+ try {
968
+ await args.page.screenshot({
969
+ path: filePath,
970
+ animations: "disabled"
971
+ });
972
+ return fileName;
973
+ }
974
+ catch (error) {
975
+ args.rawEvents.push({
976
+ type: "screenshot_error",
977
+ time: new Date().toISOString(),
978
+ task: args.taskName,
979
+ step: args.step,
980
+ phase: args.phase,
981
+ path: fileName,
982
+ note: `Failed to capture ${args.phase} screenshot for task step ${args.step}: ${cleanErrorMessage(error)}`
983
+ });
984
+ return undefined;
985
+ }
986
+ }
987
+ function collectTaskStrings(history, finalUrl, finalTitle, taskNotes) {
988
+ return [
989
+ finalUrl,
990
+ finalTitle,
991
+ ...taskNotes,
992
+ ...history.flatMap((entry) => [
993
+ entry.url,
994
+ entry.title,
995
+ entry.decision.target,
996
+ entry.decision.expectation,
997
+ entry.result.note,
998
+ entry.result.destinationUrl ?? "",
999
+ entry.result.destinationTitle ?? "",
1000
+ entry.result.visibleTextSnippet ?? ""
1001
+ ])
1002
+ ].filter(Boolean);
1003
+ }
1004
+ function matchesAnyPattern(values, patterns) {
1005
+ return values.some((value) => patterns.some((pattern) => pattern.test(value)));
1006
+ }
1007
+ function taskLooksLikeAccountCreation(goal) {
1008
+ return ACCOUNT_CREATION_TASK_PATTERN.test(goal);
1009
+ }
1010
+ async function navigateToBaseUrl(args) {
1011
+ try {
1012
+ await args.page.goto(args.baseUrl, { waitUntil: "domcontentloaded" });
1013
+ return { success: true };
1014
+ }
1015
+ catch (error) {
1016
+ const note = `Navigation to '${args.baseUrl}' failed: ${cleanErrorMessage(error)}`;
1017
+ args.rawEvents.push({
1018
+ type: "navigation_error",
1019
+ time: new Date().toISOString(),
1020
+ phase: args.phase,
1021
+ task: args.taskName,
1022
+ url: args.baseUrl,
1023
+ currentUrl: args.page.url(),
1024
+ note
1025
+ });
1026
+ if (args.taskNotes) {
1027
+ args.taskNotes.push(note);
1028
+ }
1029
+ warn(args.phase === "initial"
1030
+ ? note
1031
+ : `${note}${args.taskName ? ` while preparing '${args.taskName}'` : ""}`);
1032
+ return { success: false, note };
1033
+ }
1034
+ }
1035
+ function inferTaskStatus(history, finalUrl, finalTitle, task, taskNotes = []) {
1036
+ const successfulTrades = history.filter((entry) => entry.decision.action === "trade" && entry.result.success);
1037
+ if (successfulTrades.length > 0) {
1038
+ const lastSuccessfulTrade = successfulTrades[successfulTrades.length - 1];
1039
+ const note = lastSuccessfulTrade.result.note;
1040
+ return {
1041
+ status: /\bdry run\b/i.test(note) ? "partial_success" : "success",
1042
+ reason: note
1043
+ };
1044
+ }
1045
+ const failedTrades = history.filter((entry) => entry.decision.action === "trade" && !entry.result.success);
1046
+ if (failedTrades.length > 0) {
1047
+ return {
1048
+ status: "failed",
1049
+ reason: failedTrades[0].result.note
1050
+ };
1051
+ }
1052
+ const successfulRealTransactionClick = history.find((entry) => entry.decision.action === "click" &&
1053
+ entry.result.success &&
1054
+ /\bexecute\s+real\s+transaction\b/i.test(entry.decision.target || entry.decision.instructionQuote || ""));
1055
+ if (taskLooksLikeTrade(task.goal) && successfulRealTransactionClick) {
1056
+ const transactionEvidence = collectTaskStrings(history, finalUrl, finalTitle, taskNotes).join(" ");
1057
+ if (/\b(?:failed|reverted|rejected|denied|cancelled|canceled|error)\b/i.test(transactionEvidence)) {
1058
+ return {
1059
+ status: "failed",
1060
+ reason: "The real transaction was submitted, but the visible transaction history reported a failed or rejected outcome."
1061
+ };
1062
+ }
1063
+ if (/\b(?:succeeded|success|successful|confirmed|completed|broadcast)\b/i.test(transactionEvidence)) {
1064
+ return {
1065
+ status: "success",
1066
+ reason: "The real transaction was submitted and the visible transaction history reached a final successful or broadcast state."
1067
+ };
1068
+ }
1069
+ if (/\b(?:sending|pending|confirming|processing|submitted)\b/i.test(transactionEvidence)) {
1070
+ return {
1071
+ status: "partial_success",
1072
+ reason: "The real transaction was submitted and accepted, but the page still showed it as pending before the run ended."
1073
+ };
1074
+ }
1075
+ }
1076
+ const taskProfile = classifyTaskText(task.goal);
1077
+ const taskStrings = collectTaskStrings(history, finalUrl, finalTitle, taskNotes);
1078
+ const taskEvidenceBlob = taskStrings.join(" ");
1079
+ const successfulActions = history.filter((entry) => entry.result.success);
1080
+ const successfulClicks = history.filter((entry) => entry.decision.action === "click" && entry.result.success);
1081
+ const isSetupOrGateControl = (target) => /(?:create|register|sign ?up|log ?in|continue|enter|unlock|access|submit|profile)/i.test(target);
1082
+ const meaningfulEngagementClicks = successfulClicks.filter((entry) => {
1083
+ const target = entry.decision.target.trim();
1084
+ const engagementText = [
1085
+ target,
1086
+ entry.result.note,
1087
+ entry.result.visibleTextSnippet ?? "",
1088
+ entry.result.destinationTitle ?? ""
1089
+ ].join(" ");
1090
+ if (textHasPlayActionCue(engagementText)) {
1091
+ return true;
1092
+ }
1093
+ if (isRegressiveTaskControlLabel(target)) {
1094
+ return false;
1095
+ }
1096
+ if (isSetupOrGateControl(target) && !textHasPlayActionCue(engagementText)) {
1097
+ return false;
1098
+ }
1099
+ return Boolean(entry.result.stateChanged) && target.length > 0;
1100
+ });
1101
+ const failedClicks = history.filter((entry) => entry.decision.action === "click" && !entry.result.success);
1102
+ const distinctTargets = new Set(successfulClicks.map((entry) => entry.decision.target.toLowerCase()).filter(Boolean));
1103
+ const distinctDestinations = new Set(successfulClicks
1104
+ .map((entry) => entry.result.destinationUrl ?? entry.url)
1105
+ .map((value) => value.replace(/[?#].*$/, ""))
1106
+ .filter(Boolean));
1107
+ const visibleChanges = successfulClicks.filter((entry) => entry.result.stateChanged);
1108
+ const blockedByInterstitial = matchesAnyPattern(taskStrings, INTERSTITIAL_PATTERNS);
1109
+ const timeLimited = matchesAnyPattern(taskStrings, TIME_LIMIT_PATTERNS);
1110
+ const hasGoalAlignedEvidence = taskProfile.broadNavigation || hasTaskKeywordEvidence(task.goal, taskStrings);
1111
+ const extractSucceeded = history.some((entry) => entry.decision.action === "extract" && entry.result.success);
1112
+ const sawEngagementOpportunity = history.some((entry) => {
1113
+ const visibleText = [
1114
+ entry.result.visibleTextSnippet ?? "",
1115
+ entry.result.note,
1116
+ entry.result.destinationTitle ?? "",
1117
+ entry.result.destinationUrl ?? ""
1118
+ ].join(" ");
1119
+ return textHasPlayActionCue(visibleText) ||
1120
+ /(?:bet amount|target multiplier|cash out|play again|recent crashes|how to play|round|multiplier)/i.test(visibleText);
1121
+ });
1122
+ const successfulSubmitEntry = [...history]
1123
+ .reverse()
1124
+ .find((entry) => entry.decision.action === "click" &&
1125
+ entry.result.success &&
1126
+ ACCOUNT_CREATION_SUBMIT_PATTERN.test(entry.decision.target || ""));
1127
+ const accountCreationEvidenceBlob = [
1128
+ successfulSubmitEntry?.result.note ?? "",
1129
+ successfulSubmitEntry?.result.visibleTextSnippet ?? "",
1130
+ successfulSubmitEntry?.result.destinationTitle ?? "",
1131
+ finalTitle,
1132
+ ...taskNotes
1133
+ ].join(" ");
1134
+ const accountCreationLocalOnly = taskLooksLikeAccountCreation(task.goal) &&
1135
+ Boolean(successfulSubmitEntry) &&
1136
+ ACCOUNT_CREATION_LOCAL_ONLY_PATTERN.test(accountCreationEvidenceBlob);
1137
+ const postSubmitSnippet = successfulSubmitEntry?.result.visibleTextSnippet ?? "";
1138
+ const accountCreationFormStillVisible = ACCOUNT_CREATION_FORM_STILL_VISIBLE_PATTERN.test(postSubmitSnippet);
1139
+ const accountCreationVerificationPending = taskLooksLikeAccountCreation(task.goal) &&
1140
+ Boolean(successfulSubmitEntry) &&
1141
+ ACCOUNT_CREATION_VERIFICATION_PENDING_PATTERN.test(postSubmitSnippet) &&
1142
+ accountCreationFormStillVisible;
1143
+ const accountCreationSucceeded = taskLooksLikeAccountCreation(task.goal) &&
1144
+ Boolean(successfulSubmitEntry) &&
1145
+ ACCOUNT_CREATION_SUCCESS_PATTERN.test(accountCreationEvidenceBlob) &&
1146
+ !accountCreationFormStillVisible;
1147
+ if (isGameplayTask(task)) {
1148
+ const gameplay = summarizeGameplayHistory(history);
1149
+ const gameplayText = taskEvidenceBlob;
1150
+ const reachedPlayableState = history.some((entry) => entry.result.success &&
1151
+ (Boolean(entry.result.stateChanged) ||
1152
+ /(?:\bplay\b|\bstart\b|\bnew game\b|\bplay again\b|\bretry\b|\brestart\b)/i.test(`${entry.decision.target} ${entry.result.note} ${entry.result.visibleTextSnippet ?? ""}`)));
1153
+ if (task.gameplay?.rounds) {
1154
+ const requestedRounds = task.gameplay.rounds;
1155
+ const recordedSummary = `${gameplay.wins} win(s), ${gameplay.losses} loss(es), ${gameplay.draws} draw(s)`;
1156
+ if (gameplay.roundsRecorded >= requestedRounds) {
1157
+ return {
1158
+ status: "success",
1159
+ reason: `Recorded ${gameplay.roundsRecorded}/${requestedRounds} requested round outcome(s): ${recordedSummary}.`
1160
+ };
1161
+ }
1162
+ if (gameplay.roundsRecorded > 0) {
1163
+ return {
1164
+ status: "partial_success",
1165
+ reason: `Recorded only ${gameplay.roundsRecorded}/${requestedRounds} requested round outcome(s): ${recordedSummary}. ${taskNotes[0] ?? "Further rounds stayed blocked or inconclusive."}`.trim()
1166
+ };
1167
+ }
1168
+ if (taskNotes.length > 0) {
1169
+ return { status: "failed", reason: taskNotes[0] };
1170
+ }
1171
+ return {
1172
+ status: "failed",
1173
+ reason: /(?:play|game|round|retry|restart)/i.test(gameplayText)
1174
+ ? "The game path was reached, but no clear round outcome could be recorded."
1175
+ : "The agent could not reach a clearly playable round state."
1176
+ };
1177
+ }
1178
+ if (task.gameplay?.requireHowToPlay) {
1179
+ if (gameplay.howToPlayConfirmed && reachedPlayableState) {
1180
+ return {
1181
+ status: "success",
1182
+ reason: "Visible how-to-play guidance was confirmed and the agent reached a playable game state."
1183
+ };
1184
+ }
1185
+ if (gameplay.howToPlayConfirmed || reachedPlayableState) {
1186
+ return {
1187
+ status: "partial_success",
1188
+ reason: gameplay.howToPlayConfirmed
1189
+ ? "The visible rules or how-to-play guidance appeared, but the path into a clearly playable state stayed under-validated."
1190
+ : "The agent reached a playable-looking state, but the visible rules or how-to-play guidance were not clearly confirmed."
1191
+ };
1192
+ }
1193
+ if (taskNotes.length > 0) {
1194
+ return { status: "failed", reason: taskNotes[0] };
1195
+ }
1196
+ return {
1197
+ status: "failed",
1198
+ reason: "The agent could not clearly confirm the visible how-to-play guidance or reach a stable playable state."
1199
+ };
1200
+ }
1201
+ if (gameplay.roundsRecorded > 0 && reachedPlayableState) {
1202
+ return {
1203
+ status: "success",
1204
+ reason: `Recorded ${gameplay.roundsRecorded} visible round outcome(s): ${gameplay.wins} win(s), ${gameplay.losses} loss(es), ${gameplay.draws} draw(s).`
1205
+ };
1206
+ }
1207
+ if (reachedPlayableState || gameplay.howToPlayConfirmed || textHasOutcomeCue(gameplayText)) {
1208
+ return {
1209
+ status: "partial_success",
1210
+ reason: gameplay.howToPlayConfirmed
1211
+ ? "Visible gameplay guidance appeared and the agent got partway through the playable flow, but the requested outcome evidence stayed incomplete."
1212
+ : "The agent reached part of the gameplay flow, but the visible outcome evidence stayed incomplete."
1213
+ };
1214
+ }
1215
+ if (taskNotes.length > 0) {
1216
+ return { status: "failed", reason: taskNotes[0] };
1217
+ }
1218
+ return {
1219
+ status: "failed",
1220
+ reason: "The agent could not clearly reach a playable state or capture a visible outcome for this gameplay task."
1221
+ };
1222
+ }
1223
+ if (blockedByInterstitial) {
1224
+ return {
1225
+ status: "failed",
1226
+ reason: "A security or verification interstitial blocked the destination page before the agent could fairly validate this navigation path."
1227
+ };
1228
+ }
1229
+ if (taskProfile.instructionFocus) {
1230
+ if (taskProfile.engagement) {
1231
+ if (textHasInstructionCue(taskEvidenceBlob) && meaningfulEngagementClicks.length > 0) {
1232
+ return {
1233
+ status: "success",
1234
+ reason: "The run confirmed the visible instructions and also used live on-page controls instead of stopping at the instruction copy alone."
1235
+ };
1236
+ }
1237
+ if (textHasInstructionCue(taskEvidenceBlob) && (extractSucceeded || successfulActions.length > 0 || sawEngagementOpportunity)) {
1238
+ return {
1239
+ status: "partial_success",
1240
+ reason: sawEngagementOpportunity
1241
+ ? "The run confirmed the visible instructions and reached an interactive state, but it did not capture enough direct engagement evidence from the live controls."
1242
+ : "The run confirmed the visible instructions, but it did not gather direct evidence of meaningful engagement beyond that."
1243
+ };
1244
+ }
1245
+ }
1246
+ if (textHasInstructionCue(taskEvidenceBlob) && (extractSucceeded || successfulActions.length > 0)) {
1247
+ return {
1248
+ status: "success",
1249
+ reason: "The run captured visible rules, instructions, or how-to-play guidance that matched this task."
1250
+ };
1251
+ }
1252
+ if (failedClicks.length > 0) {
1253
+ return { status: "failed", reason: failedClicks[0].result.note };
1254
+ }
1255
+ if (taskNotes.length > 0) {
1256
+ return { status: "failed", reason: taskNotes[0] };
1257
+ }
1258
+ return {
1259
+ status: "failed",
1260
+ reason: "The run did not capture clear visible instructions or rules that matched this task."
1261
+ };
1262
+ }
1263
+ if (taskProfile.buttonCoverage) {
1264
+ if (distinctTargets.size >= 5) {
1265
+ return {
1266
+ status: "success",
1267
+ reason: `The agent interacted with ${distinctTargets.size} distinct visible controls and recorded their visible responses step by step.`
1268
+ };
1269
+ }
1270
+ if (distinctTargets.size >= 2) {
1271
+ return {
1272
+ status: "partial_success",
1273
+ reason: `The agent interacted with ${distinctTargets.size} distinct visible controls, but did not capture broad enough button coverage to call the task complete.`
1274
+ };
1275
+ }
1276
+ if (failedClicks.length > 0) {
1277
+ return { status: "failed", reason: failedClicks[0].result.note };
1278
+ }
1279
+ return {
1280
+ status: "failed",
1281
+ reason: "The run did not gather enough button-by-button interaction evidence to support this task."
1282
+ };
1283
+ }
1284
+ if (!hasGoalAlignedEvidence && !taskProfile.broadNavigation) {
1285
+ if (accountCreationSucceeded) {
1286
+ if (accountCreationLocalOnly) {
1287
+ return {
1288
+ status: "partial_success",
1289
+ reason: "The signup form submitted and the site showed a post-registration state, but it explicitly reported browser-only fallback storage, so the account was not confirmed on a shared backend/dashboard."
1290
+ };
1291
+ }
1292
+ return {
1293
+ status: "success",
1294
+ reason: "The signup flow submitted successfully and the visible page switched into a post-registration state."
1295
+ };
1296
+ }
1297
+ if (accountCreationVerificationPending) {
1298
+ return {
1299
+ status: "partial_success",
1300
+ reason: "The signup form was filled and submitted, but the page is still requesting email or OTP verification before the account can be created."
1301
+ };
1302
+ }
1303
+ if (failedClicks.length > 0) {
1304
+ return { status: "failed", reason: failedClicks[0].result.note };
1305
+ }
1306
+ if (taskNotes.length > 0) {
1307
+ return { status: "failed", reason: taskNotes[0] };
1308
+ }
1309
+ return {
1310
+ status: "failed",
1311
+ reason: "The run did not gather enough task-specific evidence to confirm this requested path."
1312
+ };
1313
+ }
1314
+ if (distinctTargets.size >= 3 && visibleChanges.length >= 2 && distinctDestinations.size >= 2 && failedClicks.length === 0) {
1315
+ return {
1316
+ status: "success",
1317
+ reason: "Multiple visible links, tabs, or buttons opened clear destination pages or visible state changes as expected."
1318
+ };
1319
+ }
1320
+ if (accountCreationSucceeded) {
1321
+ if (accountCreationLocalOnly) {
1322
+ return {
1323
+ status: "partial_success",
1324
+ reason: "The signup form submitted and the site showed a post-registration state, but it explicitly reported browser-only fallback storage, so the account was not confirmed on a shared backend/dashboard."
1325
+ };
1326
+ }
1327
+ return {
1328
+ status: "success",
1329
+ reason: "The signup flow submitted successfully and the visible page switched into a post-registration state."
1330
+ };
1331
+ }
1332
+ if (accountCreationVerificationPending) {
1333
+ return {
1334
+ status: "partial_success",
1335
+ reason: "The signup form was filled and submitted, but the page is still requesting email or OTP verification before the account can be created."
1336
+ };
1337
+ }
1338
+ if (timeLimited && successfulClicks.length > 0) {
1339
+ return {
1340
+ status: "partial_success",
1341
+ reason: "The agent validated some visible destinations, but the run ended before it could cover more of this navigation path."
1342
+ };
1343
+ }
1344
+ if (distinctTargets.size >= 2 && visibleChanges.length >= 1) {
1345
+ return {
1346
+ status: "partial_success",
1347
+ reason: "Several visible destinations responded correctly, but not enough unique paths were validated to mark the whole task complete."
1348
+ };
1349
+ }
1350
+ if (failedClicks.length > 0) {
1351
+ return { status: "failed", reason: failedClicks[0].result.note };
1352
+ }
1353
+ if (taskNotes.length > 0) {
1354
+ return { status: "failed", reason: taskNotes[0] };
1355
+ }
1356
+ return {
1357
+ status: "failed",
1358
+ reason: "The agent did not gather enough visible evidence to confirm whether this navigation path worked as expected."
1359
+ };
1360
+ }
1361
+ async function executeTradeHandoff(args) {
1362
+ const instruction = extractSellInstruction({
1363
+ pageState: args.pageState,
1364
+ taskGoal: args.taskGoal,
1365
+ defaultChainId: getWalletChainId()
1366
+ });
1367
+ if (!instruction) {
1368
+ args.rawEvents.push({
1369
+ type: "trade_instruction_missing",
1370
+ time: new Date().toISOString(),
1371
+ task: args.taskName,
1372
+ url: args.pageState.url,
1373
+ note: "Trade handoff was requested, but a deterministic sell instruction could not be extracted from the visible page."
1374
+ });
1375
+ return {
1376
+ success: false,
1377
+ note: "Trade handoff could not extract a deterministic recipient address, amount, token, and chain from the visible page."
1378
+ };
1379
+ }
1380
+ args.rawEvents.push({
1381
+ type: "trade_instruction_extracted",
1382
+ time: new Date().toISOString(),
1383
+ task: args.taskName,
1384
+ url: args.pageState.url,
1385
+ instruction
1386
+ });
1387
+ const record = await executeTradeInstruction({
1388
+ runDir: args.runDir,
1389
+ instruction,
1390
+ runOptions: {
1391
+ ...buildDefaultTradeRunOptions(),
1392
+ ...args.tradeOptions
1393
+ },
1394
+ policy: getTradePolicy(),
1395
+ source: "browser"
1396
+ });
1397
+ args.rawEvents.push({
1398
+ type: "trade_execution_result",
1399
+ time: new Date().toISOString(),
1400
+ task: args.taskName,
1401
+ url: args.pageState.url,
1402
+ status: record.status,
1403
+ selectedMode: record.selectedMode,
1404
+ txHash: record.txHash,
1405
+ validation: record.validation,
1406
+ note: record.note
1407
+ });
1408
+ return {
1409
+ success: record.status === "dry_run" || record.status === "broadcast" || record.status === "confirmed",
1410
+ note: record.note,
1411
+ stop: true
1412
+ };
1413
+ }
1414
+ async function executePaystackTransfer(args) {
1415
+ const parts = (args.decision.text || "").split(":");
1416
+ if (parts.length < 3) {
1417
+ return {
1418
+ success: false,
1419
+ note: "Invalid payment format. Expected amount:bank:account (e.g. 100:058:0000000000)"
1420
+ };
1421
+ }
1422
+ const amountNaira = parseFloat(parts[0].trim());
1423
+ const bankRef = parts[1].trim();
1424
+ const accountNumber = parts[2].trim();
1425
+ if (isNaN(amountNaira)) {
1426
+ return { success: false, note: `Invalid amount: ${parts[0]}` };
1427
+ }
1428
+ // Resolve bank code if it's a name (e.g. "GTBank" -> "058")
1429
+ let bankCode = bankRef;
1430
+ if (!/^\d{3}$/.test(bankRef)) {
1431
+ const resolved = await resolveBankCode(bankRef);
1432
+ if (!resolved) {
1433
+ return { success: false, note: `Could not resolve bank name: ${bankRef}` };
1434
+ }
1435
+ bankCode = resolved;
1436
+ }
1437
+ try {
1438
+ const result = await sendMoney({
1439
+ amountNaira,
1440
+ bankCode,
1441
+ accountNumber,
1442
+ recipientName: "Site Agent Transfer",
1443
+ reason: args.decision.thought || "Autonomous Agent Payment"
1444
+ });
1445
+ args.rawEvents.push({
1446
+ type: "paystack_transfer_success",
1447
+ time: new Date().toISOString(),
1448
+ amount: amountNaira,
1449
+ bank: bankRef,
1450
+ bankCode,
1451
+ account: accountNumber,
1452
+ transferCode: result.transfer.transfer_code,
1453
+ status: result.transfer.status
1454
+ });
1455
+ return {
1456
+ success: true,
1457
+ note: `Successfully initiated transfer of ₦${amountNaira} to ${bankRef} (${accountNumber}). Status: ${result.transfer.status}`,
1458
+ stop: true
1459
+ };
1460
+ }
1461
+ catch (err) {
1462
+ const message = err instanceof Error ? err.message : String(err);
1463
+ args.rawEvents.push({
1464
+ type: "paystack_transfer_failed",
1465
+ time: new Date().toISOString(),
1466
+ amount: amountNaira,
1467
+ bank: bankRef,
1468
+ account: accountNumber,
1469
+ error: message
1470
+ });
1471
+ return {
1472
+ success: false,
1473
+ note: `Paystack transfer failed: ${message}`
1474
+ };
1475
+ }
1476
+ }
1477
+ function buildPageSignature(pageState) {
1478
+ const interactiveSummary = pageState.interactive
1479
+ .slice(0, 8)
1480
+ .map((item) => `${item.role}:${item.text}:${item.disabled ? "disabled" : "enabled"}`)
1481
+ .join("|");
1482
+ const formSummary = pageState.formFields
1483
+ .slice(0, 12)
1484
+ .map((field) => {
1485
+ const fieldLabel = [field.label, field.placeholder, field.name, field.id, field.inputType].find(Boolean) || "field";
1486
+ const state = field.inputType === "checkbox" || field.inputType === "radio"
1487
+ ? field.checked
1488
+ ? "checked"
1489
+ : "unchecked"
1490
+ : field.value
1491
+ ? "filled"
1492
+ : "empty";
1493
+ return `${fieldLabel}:${state}`;
1494
+ })
1495
+ .join("|");
1496
+ return [
1497
+ pageState.url,
1498
+ pageState.title,
1499
+ pageState.visibleText.slice(0, 900),
1500
+ interactiveSummary,
1501
+ formSummary
1502
+ ].join("::");
1503
+ }
1504
+ function shouldStopForStagnation(args) {
1505
+ if (args.history.length < STAGNATION_WINDOW || args.pageSignatures.length < STAGNATION_WINDOW) {
1506
+ return false;
1507
+ }
1508
+ const recentEntries = args.history.slice(-STAGNATION_WINDOW);
1509
+ const recentSignatures = args.pageSignatures.slice(-STAGNATION_WINDOW);
1510
+ const repeatedPage = recentSignatures.every((signature) => signature === args.pageSignature);
1511
+ const stalledAttempts = recentEntries.every((entry) => entry.decision.action === "wait" ||
1512
+ entry.decision.friction === "high" ||
1513
+ entry.result.success === false ||
1514
+ (entry.decision.action === "type" && entry.result.stateChanged === false));
1515
+ return repeatedPage && stalledAttempts;
1516
+ }
1517
+ function shouldPauseAfterStep(args) {
1518
+ if (args.stopped) {
1519
+ return false;
1520
+ }
1521
+ if (args.decisionAction === "wait") {
1522
+ return false;
1523
+ }
1524
+ if (!args.resultSuccess) {
1525
+ return false;
1526
+ }
1527
+ return true;
1528
+ }
1529
+ function derivePerTaskStepCap(totalTasks) {
1530
+ if (totalTasks <= 1) {
1531
+ return config.maxStepsPerTask;
1532
+ }
1533
+ return Math.max(8, Math.min(config.maxStepsPerTask, Math.ceil((config.maxStepsPerTask * 2) / totalTasks)));
1534
+ }
1535
+ function shouldPreserveCoverageForRemainingTasks(args) {
1536
+ if (args.remainingTasksAfterCurrent <= 0) {
1537
+ return false;
1538
+ }
1539
+ const futureTaskReserveMs = args.remainingTasksAfterCurrent * Math.max(8000, Math.min(20000, args.minimumUsefulStepWindowMs));
1540
+ return args.remainingSessionMs < config.postRunAuditReserveMs + args.minimumUsefulStepWindowMs + futureTaskReserveMs;
1541
+ }
1542
+ export async function runTaskSuite(options) {
1543
+ const llm = {
1544
+ ...(options.provider ? { provider: options.provider } : {}),
1545
+ ...(options.model ? { model: options.model } : {}),
1546
+ ...(options.ollamaBaseUrl ? { ollamaBaseUrl: options.ollamaBaseUrl } : {})
1547
+ };
1548
+ const tradeOptions = {
1549
+ ...buildDefaultTradeRunOptions(),
1550
+ ...(options.tradeOptions ?? {})
1551
+ };
1552
+ const executionBudgetMs = clampRunDurationMs(options.maxSessionDurationMs ?? config.maxSessionDurationMs);
1553
+ const executionBudgetSeconds = Math.round(executionBudgetMs / 1000);
1554
+ const sessionDeadline = Date.now() + executionBudgetMs;
1555
+ const minimumUsefulStepWindowMs = Math.min(30000, Math.max(12000, config.navigationTimeoutMs + 2000));
1556
+ const storageStatePath = options.storageStatePath ? resolveLocalPath(options.storageStatePath) : undefined;
1557
+ const saveStorageStatePath = options.saveStorageStatePath ? resolveLocalPath(options.saveStorageStatePath) : undefined;
1558
+ const contextOptions = options.mobile
1559
+ ? {
1560
+ ...devices["iPhone 13"],
1561
+ viewport: config.mobileViewport,
1562
+ ignoreHTTPSErrors: Boolean(options.ignoreHttpsErrors),
1563
+ timezoneId: config.deviceTimezone
1564
+ }
1565
+ : {
1566
+ viewport: config.desktopViewport,
1567
+ ignoreHTTPSErrors: Boolean(options.ignoreHttpsErrors),
1568
+ timezoneId: config.deviceTimezone,
1569
+ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"
1570
+ };
1571
+ if (storageStatePath) {
1572
+ contextOptions.storageState = storageStatePath;
1573
+ }
1574
+ if (config.recordVideo) {
1575
+ contextOptions.recordVideo = { dir: options.runDir };
1576
+ }
1577
+ const rawEvents = [];
1578
+ const taskResults = [];
1579
+ const perTaskStepCap = derivePerTaskStepCap(options.suite.tasks.length);
1580
+ let browser = null;
1581
+ let context = null;
1582
+ let page = null;
1583
+ let signingRelay = null;
1584
+ let browserTimezone = config.deviceTimezone;
1585
+ const metaMaskRuntimeState = {
1586
+ activePopupCount: 0,
1587
+ lastDetectedAt: 0,
1588
+ lastActionAt: 0,
1589
+ lastClosedAt: 0
1590
+ };
1591
+ let accessibility = {
1592
+ violations: [],
1593
+ error: "Accessibility audit did not run because the session ended before it reached the audit phase."
1594
+ };
1595
+ let currentTaskRef = null;
1596
+ let walletConnectAttempt = null;
1597
+ let siteBrief = null;
1598
+ let siteChecks = {
1599
+ generatedAt: new Date().toISOString(),
1600
+ baseUrl: options.baseUrl,
1601
+ finalResolvedUrl: null,
1602
+ coverage: {
1603
+ performance: { status: "blocked", summary: "Performance checks did not run.", evidence: [], blockers: [] },
1604
+ seo: { status: "blocked", summary: "SEO checks did not run.", evidence: [], blockers: [] },
1605
+ uiux: { status: "inferred", summary: "UI and UX coverage relies on the interaction audit only.", evidence: [], blockers: [] },
1606
+ security: { status: "blocked", summary: "Security checks did not run.", evidence: [], blockers: [] },
1607
+ technicalHealth: { status: "inferred", summary: "Technical health relies on runtime signals only.", evidence: [], blockers: [] },
1608
+ mobileOptimization: { status: "blocked", summary: "Mobile checks did not run.", evidence: [], blockers: [] },
1609
+ contentQuality: { status: "blocked", summary: "Content checks did not run.", evidence: [], blockers: [] },
1610
+ cro: { status: "inferred", summary: "CRO coverage relies on the interaction audit only.", evidence: [], blockers: [] }
1611
+ },
1612
+ performance: {
1613
+ desktop: null,
1614
+ mobile: null,
1615
+ failedRequestCount: 0,
1616
+ imageFailureCount: 0,
1617
+ apiFailureCount: 0,
1618
+ navigationErrorCount: 0,
1619
+ stalledInteractionCount: 0,
1620
+ evidence: []
1621
+ },
1622
+ seo: {
1623
+ robotsTxt: { url: new URL("/robots.txt", options.baseUrl).toString(), ok: false, statusCode: null, note: "Checks did not run." },
1624
+ sitemap: { url: new URL("/sitemap.xml", options.baseUrl).toString(), ok: false, statusCode: null, note: "Checks did not run." },
1625
+ brokenLinkCount: 0,
1626
+ checkedLinkCount: 0,
1627
+ brokenLinks: [],
1628
+ evidence: []
1629
+ },
1630
+ security: {
1631
+ https: options.baseUrl.startsWith("https://"),
1632
+ secureTransportVerified: false,
1633
+ initialStatusCode: null,
1634
+ securityHeaders: [],
1635
+ missingHeaders: [],
1636
+ evidence: []
1637
+ },
1638
+ technicalHealth: {
1639
+ framework: null,
1640
+ consoleErrorCount: 0,
1641
+ consoleWarningCount: 0,
1642
+ pageErrorCount: 0,
1643
+ apiFailureCount: 0,
1644
+ evidence: []
1645
+ },
1646
+ mobileOptimization: {
1647
+ desktop: null,
1648
+ mobile: null,
1649
+ responsiveVerdict: "blocked",
1650
+ evidence: []
1651
+ },
1652
+ contentQuality: {
1653
+ readabilityScore: null,
1654
+ readabilityLabel: "Blocked",
1655
+ wordCount: 0,
1656
+ longParagraphCount: 0,
1657
+ mediaCount: 0,
1658
+ evidence: []
1659
+ },
1660
+ cro: {
1661
+ ctaCount: 0,
1662
+ primaryCtas: [],
1663
+ formCount: 0,
1664
+ submitControlCount: 0,
1665
+ trustSignalCount: 0,
1666
+ evidence: []
1667
+ }
1668
+ };
1669
+ try {
1670
+ if (storageStatePath) {
1671
+ const storageStateLabel = summarizeLocalPath(storageStatePath);
1672
+ if (!fs.existsSync(storageStatePath)) {
1673
+ throw new Error(`Configured storage state file '${storageStateLabel}' was not found.`);
1674
+ }
1675
+ rawEvents.push({
1676
+ type: "storage_state_load",
1677
+ time: new Date().toISOString(),
1678
+ path: storageStateLabel,
1679
+ note: `Loading Playwright storage state from '${storageStateLabel}'.`
1680
+ });
1681
+ }
1682
+ const metamaskExtensionPath = getMetaMaskExtensionPath();
1683
+ const metaMaskBrowserMode = Boolean(metamaskExtensionPath);
1684
+ if (metaMaskBrowserMode) {
1685
+ const configuredUserDataDir = getMetaMaskUserDataDir();
1686
+ const defaultUserDataDir = path.join(process.env.HOME || process.cwd(), ".site-agent-metamask-profile");
1687
+ const userDataDir = resolveLocalPath(configuredUserDataDir || defaultUserDataDir);
1688
+ ensureDir(userDataDir);
1689
+ rawEvents.push({
1690
+ type: "metamask_profile",
1691
+ time: new Date().toISOString(),
1692
+ path: summarizeLocalPath(userDataDir),
1693
+ persistent: Boolean(configuredUserDataDir),
1694
+ note: configuredUserDataDir
1695
+ ? `Launching MetaMask with persistent Chromium user data from '${summarizeLocalPath(userDataDir)}'.`
1696
+ : `Launching MetaMask with the default persistent Chromium user data at '${summarizeLocalPath(userDataDir)}'. Set WALLET_METAMASK_USER_DATA_DIR to reuse a specific unlocked MetaMask profile.`
1697
+ });
1698
+ context = await chromium.launchPersistentContext(userDataDir, {
1699
+ ...(await resolveLaunchOptions({ headed: options.headed })),
1700
+ ...contextOptions
1701
+ });
1702
+ browser = context.browser();
1703
+ }
1704
+ else {
1705
+ browser = await chromium.launch(await resolveLaunchOptions({ headed: options.headed }));
1706
+ context = await browser.newContext(contextOptions);
1707
+ }
1708
+ await installPlaywrightPageCompat(context);
1709
+ // --- Web3 wallet injection ---
1710
+ if (isWalletConfigured()) {
1711
+ try {
1712
+ const walletConfig = await getWalletConfig();
1713
+ if (walletConfig) {
1714
+ signingRelay = await startSigningRelay();
1715
+ const injectionScript = buildWeb3InjectionScript({
1716
+ walletConfig,
1717
+ relayPort: signingRelay.port
1718
+ });
1719
+ await context.addInitScript(injectionScript);
1720
+ rawEvents.push({
1721
+ type: "wallet_injected",
1722
+ time: new Date().toISOString(),
1723
+ address: walletConfig.address,
1724
+ chainId: walletConfig.chainId,
1725
+ relayPort: signingRelay.port,
1726
+ mode: walletConfig.metamaskExtensionPath ? "metamask_extension" : "programmatic",
1727
+ note: `Web3 wallet injected — address ${walletConfig.address} on chain ${walletConfig.chainId} (relay on port ${signingRelay.port}).`
1728
+ });
1729
+ // MetaMask popup auto-approve handler
1730
+ if (metaMaskBrowserMode) {
1731
+ context.on("page", async (popupPage) => {
1732
+ await handleMetaMaskPopup({
1733
+ popupPage,
1734
+ runDir: options.runDir,
1735
+ rawEvents,
1736
+ runtimeState: metaMaskRuntimeState,
1737
+ getCurrentTaskRef: () => currentTaskRef
1738
+ });
1739
+ });
1740
+ }
1741
+ }
1742
+ }
1743
+ catch (walletError) {
1744
+ rawEvents.push({
1745
+ type: "wallet_injection_error",
1746
+ time: new Date().toISOString(),
1747
+ note: `Failed to inject Web3 wallet: ${cleanErrorMessage(walletError)}`
1748
+ });
1749
+ warn(`Web3 wallet injection failed: ${cleanErrorMessage(walletError)}`);
1750
+ }
1751
+ }
1752
+ page = await context.newPage();
1753
+ page.setDefaultNavigationTimeout(config.navigationTimeoutMs);
1754
+ page.setDefaultTimeout(config.navigationTimeoutMs);
1755
+ browserTimezone =
1756
+ (await page.evaluate(() => Intl.DateTimeFormat().resolvedOptions().timeZone).catch(() => config.deviceTimezone)) ||
1757
+ config.deviceTimezone;
1758
+ rawEvents.push({
1759
+ type: "execution_budget",
1760
+ time: new Date().toISOString(),
1761
+ budgetSeconds: executionBudgetSeconds,
1762
+ note: `Browser execution budget is capped at ${executionBudgetSeconds} seconds for this run.`
1763
+ });
1764
+ rawEvents.push({
1765
+ type: "trade_configuration",
1766
+ time: new Date().toISOString(),
1767
+ tradeOptions,
1768
+ note: tradeOptions.enabled
1769
+ ? `Deterministic trade execution is enabled for this run${tradeOptions.dryRun ? " in dry-run mode" : ""}.`
1770
+ : "Deterministic trade execution is disabled for this run."
1771
+ });
1772
+ rawEvents.push({
1773
+ type: "timezone_sync",
1774
+ time: new Date().toISOString(),
1775
+ deviceTimezone: config.deviceTimezone,
1776
+ browserTimezone,
1777
+ note: browserTimezone === config.deviceTimezone
1778
+ ? `Browser timezone was synchronized to ${browserTimezone}.`
1779
+ : `Browser reported ${browserTimezone} while the device timezone is ${config.deviceTimezone}.`
1780
+ });
1781
+ page.on("console", (msg) => {
1782
+ rawEvents.push({ type: "console", level: msg.type(), text: msg.text(), time: new Date().toISOString() });
1783
+ });
1784
+ page.on("pageerror", (error) => {
1785
+ rawEvents.push({ type: "pageerror", text: error.message, time: new Date().toISOString() });
1786
+ });
1787
+ page.on("requestfailed", (request) => {
1788
+ rawEvents.push({
1789
+ type: "requestfailed",
1790
+ url: request.url(),
1791
+ method: request.method(),
1792
+ failure: request.failure()?.errorText ?? "unknown",
1793
+ time: new Date().toISOString()
1794
+ });
1795
+ });
1796
+ const initialNavigation = await navigateToBaseUrl({
1797
+ page,
1798
+ baseUrl: options.baseUrl,
1799
+ rawEvents,
1800
+ phase: "initial"
1801
+ });
1802
+ const initialPageState = await capturePageState(page);
1803
+ if (!initialNavigation.success || isBrowserErrorPage(initialPageState.url)) {
1804
+ const blockedNote = !initialNavigation.success
1805
+ ? initialNavigation.note
1806
+ : `The browser remained on '${initialPageState.url}' after attempting the submitted URL.`;
1807
+ siteBrief = buildNavigationBlockedSiteBrief({
1808
+ baseUrl: options.baseUrl,
1809
+ note: blockedNote
1810
+ });
1811
+ rawEvents.push({
1812
+ type: "site_brief",
1813
+ time: new Date().toISOString(),
1814
+ summary: siteBrief.summary,
1815
+ sitePurpose: siteBrief.sitePurpose,
1816
+ intendedUserActions: siteBrief.intendedUserActions,
1817
+ evidence: siteBrief.evidence,
1818
+ note: `Skipped model-based site brief because the submitted URL did not load cleanly: ${blockedNote}`
1819
+ });
1820
+ }
1821
+ else {
1822
+ const siteBriefResolution = await deriveSiteBrief({
1823
+ pageState: initialPageState,
1824
+ llm
1825
+ });
1826
+ siteBrief = siteBriefResolution.siteBrief;
1827
+ rawEvents.push({
1828
+ type: "site_brief",
1829
+ time: new Date().toISOString(),
1830
+ summary: siteBrief.summary,
1831
+ sitePurpose: siteBrief.sitePurpose,
1832
+ intendedUserActions: siteBrief.intendedUserActions,
1833
+ evidence: siteBrief.evidence,
1834
+ note: siteBriefResolution.fallbackReason
1835
+ ? `The site brief fell back to a deterministic summary after the model-based comprehension step failed: ${siteBriefResolution.fallbackReason}`
1836
+ : "The run generated an upfront site brief before the accepted tasks started."
1837
+ });
1838
+ if (siteBriefResolution.fallbackReason) {
1839
+ warn(`Site brief fallback for '${options.baseUrl}': ${siteBriefResolution.fallbackReason}`);
1840
+ }
1841
+ }
1842
+ const authBootstrapConfigured = isAuthBootstrapConfigured(options.baseUrl);
1843
+ let autoAuthAttempted = false;
1844
+ for (const [index, task] of options.suite.tasks.entries()) {
1845
+ if (Date.now() >= sessionDeadline) {
1846
+ rawEvents.push({
1847
+ type: "session_timeout",
1848
+ time: new Date().toISOString(),
1849
+ note: `Session reached its ${executionBudgetSeconds}-second execution budget before the next task started.`
1850
+ });
1851
+ break;
1852
+ }
1853
+ const history = [];
1854
+ const taskNotes = [];
1855
+ const pageSignatures = [];
1856
+ currentTaskRef = {
1857
+ name: task.name,
1858
+ index,
1859
+ history,
1860
+ step: 0
1861
+ };
1862
+ const previousTask = index > 0 ? options.suite.tasks[index - 1] : undefined;
1863
+ const previousHistory = taskResults[taskResults.length - 1]?.history ?? [];
1864
+ const continueFromCurrentPage = !isBrowserErrorPage(page.url()) &&
1865
+ shouldContinueFromCurrentPage({
1866
+ previousTask,
1867
+ currentTask: task,
1868
+ previousHistory
1869
+ });
1870
+ if (continueFromCurrentPage) {
1871
+ rawEvents.push({
1872
+ type: "task_continuation",
1873
+ time: new Date().toISOString(),
1874
+ task: task.name,
1875
+ url: page.url(),
1876
+ note: `Continuing '${task.name}' from the current page because it appears to be the next compact step in the same flow rather than an independent resettable task.`
1877
+ });
1878
+ taskNotes.push("This task continued from the previous task's final page state because the submitted steps appeared to form one compact flow.");
1879
+ await sleep(config.actionDelayMs);
1880
+ }
1881
+ else {
1882
+ const resetNavigation = await navigateToBaseUrl({
1883
+ page,
1884
+ baseUrl: options.baseUrl,
1885
+ rawEvents,
1886
+ phase: "task_reset",
1887
+ taskName: task.name,
1888
+ taskNotes
1889
+ });
1890
+ if (resetNavigation.success) {
1891
+ await sleep(config.actionDelayMs);
1892
+ }
1893
+ }
1894
+ for (let step = 1; step <= perTaskStepCap; step += 1) {
1895
+ if (currentTaskRef) {
1896
+ currentTaskRef.step = Math.max(currentTaskRef.step, step - 1) + 1;
1897
+ }
1898
+ const activeStep = currentTaskRef ? currentTaskRef.step : step;
1899
+ const remainingSessionMs = sessionDeadline - Date.now();
1900
+ if (remainingSessionMs <= 0) {
1901
+ rawEvents.push({
1902
+ type: "session_timeout",
1903
+ time: new Date().toISOString(),
1904
+ task: task.name,
1905
+ note: `Session reached its ${executionBudgetSeconds}-second execution budget before the next action.`
1906
+ });
1907
+ break;
1908
+ }
1909
+ if (remainingSessionMs < minimumUsefulStepWindowMs + config.postRunAuditReserveMs) {
1910
+ rawEvents.push({
1911
+ type: "session_timeout",
1912
+ time: new Date().toISOString(),
1913
+ task: task.name,
1914
+ note: `Session stopped with ${Math.ceil(remainingSessionMs / 1000)} seconds left to preserve time for supplemental site checks and final analysis.`
1915
+ });
1916
+ taskNotes.push("The agent stopped exploring because the remaining session time was too short for another meaningful interaction while still preserving post-run verification time.");
1917
+ break;
1918
+ }
1919
+ if (shouldPreserveCoverageForRemainingTasks({
1920
+ remainingSessionMs,
1921
+ remainingTasksAfterCurrent: options.suite.tasks.length - index - 1,
1922
+ minimumUsefulStepWindowMs
1923
+ })) {
1924
+ rawEvents.push({
1925
+ type: "session_timeout",
1926
+ time: new Date().toISOString(),
1927
+ task: task.name,
1928
+ note: `This task stopped early because the remaining session time needed to be preserved for the remaining coverage lanes and supplemental site checks.`
1929
+ });
1930
+ taskNotes.push("The agent stopped this task because the remaining session time needed to be preserved for the remaining coverage lanes and supplemental site checks.");
1931
+ break;
1932
+ }
1933
+ const pageState = await capturePageState(page);
1934
+ if (authBootstrapConfigured && !autoAuthAttempted && context && taskAllowsAutoAuth(task.goal)) {
1935
+ const authWall = await detectAuthWall(page);
1936
+ const autoAuthBudgetMs = Math.max(0, remainingSessionMs - config.postRunAuditReserveMs);
1937
+ if (authWall.required && autoAuthBudgetMs >= Math.max(20000, minimumUsefulStepWindowMs)) {
1938
+ autoAuthAttempted = true;
1939
+ rawEvents.push({
1940
+ type: "auto_auth_start",
1941
+ time: new Date().toISOString(),
1942
+ task: task.name,
1943
+ step: activeStep,
1944
+ url: pageState.url,
1945
+ authKind: authWall.kind,
1946
+ note: `Detected an auth wall during task execution and will attempt automatic signup/login: ${authWall.reason}`
1947
+ });
1948
+ const authExecution = await runAuthFlowInContext({
1949
+ page,
1950
+ context,
1951
+ baseUrl: options.baseUrl,
1952
+ runDir: options.runDir,
1953
+ accessUrl: pageState.url,
1954
+ timeoutMs: autoAuthBudgetMs,
1955
+ headed: Boolean(options.headed),
1956
+ mobile: Boolean(options.mobile)
1957
+ });
1958
+ rawEvents.push({
1959
+ type: "auto_auth_result",
1960
+ time: new Date().toISOString(),
1961
+ task: task.name,
1962
+ step: activeStep,
1963
+ status: authExecution.status,
1964
+ accessConfirmed: authExecution.accessConfirmed,
1965
+ accountEmail: authExecution.accountEmail || null,
1966
+ verificationMethod: authExecution.verificationMethod,
1967
+ note: authExecution.status === "failed"
1968
+ ? `Automatic signup/login failed: ${authExecution.error ?? "Unknown auth error"}`
1969
+ : `Automatic signup/login completed with status '${authExecution.status}'.`
1970
+ });
1971
+ if (authExecution.status !== "failed") {
1972
+ if (authExecution.accessConfirmed) {
1973
+ const refreshedPageState = await capturePageState(page);
1974
+ const refreshedSiteBriefResolution = await deriveSiteBrief({
1975
+ pageState: refreshedPageState,
1976
+ llm
1977
+ });
1978
+ siteBrief = refreshedSiteBriefResolution.siteBrief;
1979
+ rawEvents.push({
1980
+ type: "site_brief_refresh",
1981
+ time: new Date().toISOString(),
1982
+ summary: siteBrief.summary,
1983
+ sitePurpose: siteBrief.sitePurpose,
1984
+ intendedUserActions: siteBrief.intendedUserActions,
1985
+ evidence: siteBrief.evidence,
1986
+ note: refreshedSiteBriefResolution.fallbackReason
1987
+ ? `The site brief was refreshed after automatic auth using a deterministic fallback: ${refreshedSiteBriefResolution.fallbackReason}`
1988
+ : "The site brief was refreshed after the automatic auth recovery succeeded."
1989
+ });
1990
+ if (refreshedSiteBriefResolution.fallbackReason) {
1991
+ warn(`Site brief refresh fallback for '${options.baseUrl}': ${refreshedSiteBriefResolution.fallbackReason}`);
1992
+ }
1993
+ }
1994
+ await sleep(config.actionDelayMs);
1995
+ continue;
1996
+ }
1997
+ taskNotes.push(`Automatic signup/login failed: ${authExecution.error ?? authWall.reason}`);
1998
+ }
1999
+ }
2000
+ const pageSignature = buildPageSignature(pageState);
2001
+ const shouldStop = shouldStopForStagnation({
2002
+ history,
2003
+ pageSignature,
2004
+ pageSignatures
2005
+ });
2006
+ const planning = shouldStop
2007
+ ? {
2008
+ decision: {
2009
+ thought: "The page has remained effectively unchanged across repeated high-friction or no-progress steps even after extended follow-up attempts.",
2010
+ stepNumber: null,
2011
+ instructionQuote: "",
2012
+ action: "stop",
2013
+ target_id: "",
2014
+ target: "",
2015
+ text: "",
2016
+ expectation: "Stop this task and record that the page appears stalled or blocked.",
2017
+ friction: "high"
2018
+ }
2019
+ }
2020
+ : await decideNextAction({
2021
+ suite: options.suite,
2022
+ taskIndex: index,
2023
+ siteBrief: siteBrief ?? {
2024
+ sitePurpose: "The site purpose could not be confidently summarized before task execution began.",
2025
+ intendedUserActions: [],
2026
+ summary: "The site purpose could not be confidently summarized before task execution began.",
2027
+ evidence: []
2028
+ },
2029
+ pageState,
2030
+ history,
2031
+ remainingSeconds: Math.max(1, Math.floor((sessionDeadline - Date.now()) / 1000)),
2032
+ tradeOptions,
2033
+ llm
2034
+ });
2035
+ let decision = planning.decision;
2036
+ if (planning.fallbackReason) {
2037
+ rawEvents.push({
2038
+ type: "planner_fallback",
2039
+ time: new Date().toISOString(),
2040
+ task: task.name,
2041
+ step: activeStep,
2042
+ url: pageState.url,
2043
+ note: `Planner request did not finish cleanly, so a deterministic fallback action was used (${decision.action}${decision.target ? ` '${decision.target}'` : ""}): ${planning.fallbackReason}`
2044
+ });
2045
+ taskNotes.push(`The agent used a heuristic fallback action at step ${activeStep} because the planner did not respond in time: ${planning.fallbackReason}`);
2046
+ warn(`Planner fallback for '${task.name}' step ${activeStep} at '${pageState.url}': ${planning.fallbackReason}`);
2047
+ }
2048
+ const metaMaskFlowLooksPending = Boolean(metamaskExtensionPath) &&
2049
+ (pageLooksLikeWalletFlowIsPending(pageState) ||
2050
+ metaMaskRuntimeState.activePopupCount > 0 ||
2051
+ Date.now() - metaMaskRuntimeState.lastDetectedAt < 15000 ||
2052
+ Date.now() - metaMaskRuntimeState.lastActionAt < 10000);
2053
+ if (!shouldStop && decision.action === "stop" && metaMaskFlowLooksPending) {
2054
+ rawEvents.push({
2055
+ type: "metamask_pending_wait",
2056
+ time: new Date().toISOString(),
2057
+ task: task.name,
2058
+ step: activeStep,
2059
+ url: pageState.url,
2060
+ note: "A MetaMask interaction still appears to be in flight, so the runner will wait instead of stopping early."
2061
+ });
2062
+ decision = {
2063
+ thought: "A MetaMask interaction appears to still be pending, so pause briefly instead of stopping while the wallet popup opens or finishes auto-approval.",
2064
+ stepNumber: null,
2065
+ instructionQuote: decision.instructionQuote,
2066
+ action: "wait",
2067
+ target_id: "",
2068
+ target: "",
2069
+ text: "",
2070
+ expectation: "The MetaMask popup should appear, update, or finish auto-approval.",
2071
+ friction: "low"
2072
+ };
2073
+ }
2074
+ let preparedClickResolution = !shouldStop && decision.action === "click"
2075
+ ? await prepareClickDecision(page, decision)
2076
+ : null;
2077
+ const walletConnectClickDetected = !shouldStop && decision.action === "click"
2078
+ ? await decisionLooksLikeWalletConnect({
2079
+ pageState,
2080
+ decision,
2081
+ ...(preparedClickResolution?.preparedClick?.locator
2082
+ ? { locator: preparedClickResolution.preparedClick.locator }
2083
+ : {})
2084
+ })
2085
+ : false;
2086
+ if (!shouldStop && walletConnectClickDetected && walletConnectAttempt) {
2087
+ rawEvents.push({
2088
+ type: "wallet_connect_repeat_prevented",
2089
+ time: new Date().toISOString(),
2090
+ task: task.name,
2091
+ step: activeStep,
2092
+ url: pageState.url,
2093
+ note: `Skipped a repeated wallet-connect click because '${walletConnectAttempt.target}' was already clicked during '${walletConnectAttempt.taskName}' step ${walletConnectAttempt.step}.`
2094
+ });
2095
+ decision = {
2096
+ thought: metaMaskFlowLooksPending
2097
+ ? "A Connect Wallet click already happened earlier in this run, so wait for that wallet flow to resolve instead of clicking the trigger again."
2098
+ : "Connect Wallet was already clicked once, and there is no wallet popup or page transition currently in flight, so stop instead of waiting on the same unchanged step.",
2099
+ stepNumber: decision.stepNumber,
2100
+ instructionQuote: decision.instructionQuote,
2101
+ action: metaMaskFlowLooksPending ? "wait" : "stop",
2102
+ target_id: "",
2103
+ target: "",
2104
+ text: "",
2105
+ expectation: metaMaskFlowLooksPending
2106
+ ? "The existing wallet connection flow should complete or expose a clearer next step without opening duplicate requests."
2107
+ : "Stop this task and report that the single allowed wallet-connect click did not lead to a visible wallet flow or page change.",
2108
+ friction: metaMaskFlowLooksPending ? "low" : "high"
2109
+ };
2110
+ preparedClickResolution = null;
2111
+ }
2112
+ else if (walletConnectClickDetected && preparedClickResolution?.preparedClick && !walletConnectAttempt) {
2113
+ walletConnectAttempt = {
2114
+ taskName: task.name,
2115
+ step: activeStep,
2116
+ target: (await readLocatorLabel(preparedClickResolution.preparedClick.locator)) || decision.target || "Connect Wallet"
2117
+ };
2118
+ }
2119
+ const beforeScreenshotPath = decision.action === "click"
2120
+ ? await captureInteractionScreenshot({
2121
+ page,
2122
+ runDir: options.runDir,
2123
+ taskName: task.name,
2124
+ taskIndex: index,
2125
+ step: activeStep,
2126
+ phase: "before",
2127
+ rawEvents
2128
+ })
2129
+ : undefined;
2130
+ const result = shouldStop
2131
+ ? {
2132
+ success: true,
2133
+ stop: true,
2134
+ note: "Stopped after repeated unchanged page states with no meaningful progress even after extended follow-up attempts."
2135
+ }
2136
+ : decision.action === "trade"
2137
+ ? await executeTradeHandoff({
2138
+ pageState,
2139
+ taskName: task.name,
2140
+ taskGoal: task.goal,
2141
+ runDir: options.runDir,
2142
+ tradeOptions,
2143
+ rawEvents
2144
+ })
2145
+ : decision.action === "pay"
2146
+ ? await executePaystackTransfer({
2147
+ decision,
2148
+ rawEvents
2149
+ })
2150
+ : decision.action === "click" && preparedClickResolution && !preparedClickResolution.preparedClick
2151
+ ? {
2152
+ success: false,
2153
+ note: preparedClickResolution.note ?? `Could not find clickable element for '${decision.target.trim()}'`
2154
+ }
2155
+ : await executeDecision(page, decision, preparedClickResolution?.preparedClick, {
2156
+ singleClickAttempt: walletConnectClickDetected
2157
+ });
2158
+ const afterScreenshotPath = decision.action === "click"
2159
+ ? await captureInteractionScreenshot({
2160
+ page,
2161
+ runDir: options.runDir,
2162
+ taskName: task.name,
2163
+ taskIndex: index,
2164
+ step: activeStep,
2165
+ phase: "after",
2166
+ rawEvents
2167
+ })
2168
+ : undefined;
2169
+ const resultWithArtifacts = {
2170
+ ...result,
2171
+ ...(beforeScreenshotPath ? { beforeScreenshotPath } : {}),
2172
+ ...(afterScreenshotPath ? { afterScreenshotPath } : {})
2173
+ };
2174
+ const entry = {
2175
+ time: new Date().toISOString(),
2176
+ task: task.name,
2177
+ step: activeStep,
2178
+ url: page.url(),
2179
+ title: await page.title().catch(() => ""),
2180
+ decision,
2181
+ result: resultWithArtifacts
2182
+ };
2183
+ history.push(entry);
2184
+ pageSignatures.push(pageSignature);
2185
+ rawEvents.push({ type: "interaction", ...entry });
2186
+ if (result.stop || decision.action === "stop") {
2187
+ break;
2188
+ }
2189
+ // After a successful OTP-trigger click, attempt to retrieve and fill the OTP code
2190
+ if (decision.action === "click" &&
2191
+ result.success &&
2192
+ OTP_TRIGGER_CLICK_PATTERN.test(decision.target || "")) {
2193
+ rawEvents.push({
2194
+ type: "otp_trigger_detected",
2195
+ time: new Date().toISOString(),
2196
+ task: task.name,
2197
+ step,
2198
+ note: `Detected OTP trigger click on '${decision.target}'. Will attempt to retrieve OTP from email.`
2199
+ });
2200
+ const otpResult = await attemptOtpRetrieval({
2201
+ page,
2202
+ baseUrl: options.baseUrl,
2203
+ rawEvents,
2204
+ taskName: task.name,
2205
+ step
2206
+ });
2207
+ if (otpResult.filled) {
2208
+ taskNotes.push(`OTP code was retrieved from email and filled into the verification field.`);
2209
+ }
2210
+ else if (otpResult.error) {
2211
+ taskNotes.push(`OTP retrieval: ${otpResult.error}`);
2212
+ // Halt execution completely if OTP retrieval fails so the agent does not wander
2213
+ result.success = false;
2214
+ result.stop = true;
2215
+ result.note += ` (Stopped due to OTP retrieval failure: ${otpResult.error})`;
2216
+ history[history.length - 1].result = result;
2217
+ break;
2218
+ }
2219
+ }
2220
+ if (result.success && taskShouldEndAfterSuccessfulCompactStep(task.goal, history)) {
2221
+ rawEvents.push({
2222
+ type: "compact_task_completed",
2223
+ time: new Date().toISOString(),
2224
+ task: task.name,
2225
+ step: activeStep,
2226
+ url: page.url(),
2227
+ note: `Completed the explicit compact task '${task.goal}' and will continue to the next accepted task instead of exploring additional controls in the same view.`
2228
+ });
2229
+ break;
2230
+ }
2231
+ if (shouldPauseAfterStep({
2232
+ decisionAction: decision.action,
2233
+ resultSuccess: result.success,
2234
+ stopped: Boolean(result.stop)
2235
+ })) {
2236
+ await sleep(config.actionDelayMs);
2237
+ }
2238
+ }
2239
+ const finalUrl = page.url();
2240
+ const finalTitle = await page.title().catch(() => "");
2241
+ const inferred = inferTaskStatus(history, finalUrl, finalTitle, task, taskNotes);
2242
+ taskResults.push({
2243
+ name: task.name,
2244
+ status: inferred.status,
2245
+ finalUrl,
2246
+ finalTitle,
2247
+ history,
2248
+ reason: inferred.reason
2249
+ });
2250
+ if (Date.now() >= sessionDeadline) {
2251
+ break;
2252
+ }
2253
+ }
2254
+ const currentStorageState = context ? await context.storageState().catch(() => undefined) : undefined;
2255
+ const siteChecksBrowser = browser ?? context?.browser() ?? null;
2256
+ const remainingSiteChecksBudgetMs = Math.max(0, sessionDeadline - Date.now());
2257
+ if (siteChecksBrowser) {
2258
+ siteChecks = await runSiteChecks({
2259
+ browser: siteChecksBrowser,
2260
+ baseUrl: options.baseUrl,
2261
+ ignoreHttpsErrors: Boolean(options.ignoreHttpsErrors),
2262
+ browserTimezone,
2263
+ storageState: currentStorageState,
2264
+ rawEvents,
2265
+ taskResults,
2266
+ budgetMs: remainingSiteChecksBudgetMs
2267
+ });
2268
+ }
2269
+ else {
2270
+ rawEvents.push({
2271
+ type: "site_checks_skipped",
2272
+ time: new Date().toISOString(),
2273
+ note: "Skipped site checks because no browser instance was available after the MetaMask session."
2274
+ });
2275
+ }
2276
+ const remainingAccessibilityBudgetMs = sessionDeadline - Date.now();
2277
+ accessibility =
2278
+ remainingAccessibilityBudgetMs < 5000
2279
+ ? {
2280
+ violations: [],
2281
+ error: `Accessibility audit skipped because the ${executionBudgetSeconds}-second browser execution budget was exhausted.`
2282
+ }
2283
+ : await runAccessibilityAudit(page).catch((error) => ({
2284
+ violations: [],
2285
+ error: `Accessibility audit failed: ${cleanErrorMessage(error)}`
2286
+ }));
2287
+ }
2288
+ catch (error) {
2289
+ const note = `Runner recovered from an unexpected error and will finalize the report with partial evidence: ${cleanErrorMessage(error)}`;
2290
+ rawEvents.push({
2291
+ type: "runner_error",
2292
+ time: new Date().toISOString(),
2293
+ note
2294
+ });
2295
+ accessibility = {
2296
+ violations: accessibility.violations,
2297
+ error: `Accessibility audit could not be completed because the session ended early: ${cleanErrorMessage(error)}`
2298
+ };
2299
+ }
2300
+ finally {
2301
+ if (saveStorageStatePath) {
2302
+ const storageStateLabel = summarizeLocalPath(saveStorageStatePath);
2303
+ if (!context) {
2304
+ rawEvents.push({
2305
+ type: "storage_state_save_error",
2306
+ time: new Date().toISOString(),
2307
+ path: storageStateLabel,
2308
+ note: `Requested storage state save to '${storageStateLabel}', but no browser context was available.`
2309
+ });
2310
+ }
2311
+ else {
2312
+ try {
2313
+ ensureDir(path.dirname(saveStorageStatePath));
2314
+ await context.storageState({ path: saveStorageStatePath });
2315
+ rawEvents.push({
2316
+ type: "storage_state_save",
2317
+ time: new Date().toISOString(),
2318
+ path: storageStateLabel,
2319
+ note: `Saved Playwright storage state to '${storageStateLabel}'.`
2320
+ });
2321
+ }
2322
+ catch (error) {
2323
+ rawEvents.push({
2324
+ type: "storage_state_save_error",
2325
+ time: new Date().toISOString(),
2326
+ path: storageStateLabel,
2327
+ note: `Failed to save Playwright storage state to '${storageStateLabel}': ${cleanErrorMessage(error)}`
2328
+ });
2329
+ }
2330
+ }
2331
+ }
2332
+ writeJson(path.join(options.runDir, "raw-events.json"), rawEvents);
2333
+ writeJson(path.join(options.runDir, "task-results.json"), taskResults);
2334
+ writeJson(path.join(options.runDir, "accessibility.json"), accessibility);
2335
+ writeJson(path.join(options.runDir, "site-checks.json"), siteChecks);
2336
+ await context?.close().catch(() => undefined);
2337
+ await browser?.close().catch(() => undefined);
2338
+ // Clean up the signing relay if it was started
2339
+ if (signingRelay) {
2340
+ await signingRelay.close().catch(() => undefined);
2341
+ }
2342
+ }
2343
+ return {
2344
+ rawEvents,
2345
+ taskResults,
2346
+ accessibility,
2347
+ siteChecks,
2348
+ siteBrief,
2349
+ browserTimezone,
2350
+ deviceTimezone: config.deviceTimezone
2351
+ };
2352
+ }