@canaryai/cli 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1083 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/runner/config.ts
4
+ import path2 from "path";
5
+
6
+ // src/runner/env.ts
7
+ import fs from "fs";
8
+ import path from "path";
9
+ var loaded = false;
10
+ function loadCanaryEnv() {
11
+ if (loaded) return;
12
+ loaded = true;
13
+ const explicitPath = process.env.CANARY_ENV_FILE;
14
+ const defaultPath = path.join(process.cwd(), ".env");
15
+ const envPath = explicitPath || defaultPath;
16
+ if (fs.existsSync(envPath)) {
17
+ loadFile(envPath);
18
+ }
19
+ }
20
+ function loadFile(filePath) {
21
+ try {
22
+ const raw = fs.readFileSync(filePath, "utf-8");
23
+ for (const line of raw.split("\n")) {
24
+ const trimmed = line.trim();
25
+ if (!trimmed || trimmed.startsWith("#")) continue;
26
+ const idx = trimmed.indexOf("=");
27
+ if (idx === -1) continue;
28
+ const key = trimmed.slice(0, idx).trim();
29
+ const value = trimmed.slice(idx + 1).trim();
30
+ if (!key) continue;
31
+ if (process.env[key] === void 0) {
32
+ process.env[key] = stripQuotes(value);
33
+ }
34
+ }
35
+ } catch {
36
+ }
37
+ }
38
+ function stripQuotes(value) {
39
+ if (value.startsWith('"') && value.endsWith('"') || value.startsWith("'") && value.endsWith("'")) {
40
+ return value.slice(1, -1);
41
+ }
42
+ return value;
43
+ }
44
+
45
+ // src/runner/config.ts
46
+ function loadCanaryConfig() {
47
+ loadCanaryEnv();
48
+ const eventLogPath = (() => {
49
+ const userPath = process.env.CANARY_EVENT_LOG;
50
+ if (userPath) return userPath;
51
+ const worker = process.env.TEST_WORKER_INDEX ?? "runner";
52
+ return path2.join(process.cwd(), "test-results", "ai-healer", `events-worker-${worker}.jsonl`);
53
+ })();
54
+ const baseConfig = {
55
+ enabled: true,
56
+ allowedPlaywrightVersion: process.env.CANARY_PLAYWRIGHT_VERSION,
57
+ aiProvider: process.env.AI_PROVIDER,
58
+ aiModel: process.env.AI_MODEL,
59
+ apiKey: process.env.AI_API_KEY,
60
+ healTimeoutMs: Number(process.env.AI_TIMEOUT_MS ?? 12e4),
61
+ // 2 minutes for agentic healing
62
+ maxActions: Number(process.env.CANARY_MAX_ACTIONS ?? 50),
63
+ // Generous step limit for agentic healing
64
+ dryRun: process.env.CANARY_DRY_RUN === "1",
65
+ warnOnly: process.env.CANARY_WARN_ONLY === "1",
66
+ visionEnabled: process.env.CANARY_VISION === "1" || process.env.AI_VISION === "1",
67
+ debug: process.env.CANARY_DEBUG === "1",
68
+ readOnly: process.env.CANARY_READ_ONLY === "1",
69
+ allowRunCode: process.env.CANARY_ALLOW_RUN_CODE === "1",
70
+ allowEvaluate: process.env.CANARY_ALLOW_EVALUATE !== "0",
71
+ maxPayloadBytes: Number(process.env.CANARY_MAX_PAYLOAD_BYTES ?? 6e4),
72
+ // cap snapshots/screenshots/text
73
+ eventLogPath,
74
+ eventLoggingEnabled: process.env.CANARY_EVENT_LOG !== "0"
75
+ };
76
+ const disabled = process.env.CANARY_ENABLED === "0" || process.env.CANARY_DISABLED === "1" || process.env.AI_HEALING === "0";
77
+ return {
78
+ ...baseConfig,
79
+ enabled: !disabled && baseConfig.enabled
80
+ };
81
+ }
82
+
83
+ // src/runner/state.ts
84
+ import fs2 from "fs";
85
+ import path3 from "path";
86
+ function getEventLog() {
87
+ if (!globalThis.__CANARY_EVENTS) {
88
+ globalThis.__CANARY_EVENTS = [];
89
+ }
90
+ return globalThis.__CANARY_EVENTS;
91
+ }
92
+ function setEventLogPath(path4) {
93
+ globalThis.__CANARY_EVENT_LOG_PATH = path4;
94
+ }
95
+ function getEventLogPath() {
96
+ return globalThis.__CANARY_EVENT_LOG_PATH;
97
+ }
98
+ function recordHealingEvent(event) {
99
+ loadCanaryEnv();
100
+ const log = getEventLog();
101
+ const entry = {
102
+ ...event,
103
+ workerId: process.env.TEST_WORKER_INDEX,
104
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
105
+ };
106
+ log.push(entry);
107
+ appendEvent(entry);
108
+ }
109
+ function markPatched() {
110
+ globalThis.__CANARY_PATCHED = true;
111
+ }
112
+ function alreadyPatched() {
113
+ return globalThis.__CANARY_PATCHED === true;
114
+ }
115
+ function appendEvent(event) {
116
+ const logPath = getEventLogPath();
117
+ if (!logPath) return;
118
+ const config = loadCanaryConfig();
119
+ if (!config.eventLoggingEnabled) return;
120
+ try {
121
+ fs2.mkdirSync(path3.dirname(logPath), { recursive: true });
122
+ fs2.appendFileSync(logPath, JSON.stringify(event) + "\n");
123
+ } catch {
124
+ }
125
+ }
126
+
127
+ // src/runner/healer.ts
128
+ import { stepCountIs, streamText, tool } from "ai";
129
+ import { z } from "zod";
130
+
131
+ // src/runner/ai-client.ts
132
+ import { createOpenAI } from "@ai-sdk/openai";
133
+ import { createAnthropic } from "@ai-sdk/anthropic";
134
+ import { createAzure } from "@ai-sdk/azure";
135
+ function resolveHealerModel(config = loadCanaryConfig()) {
136
+ if (!config.enabled) {
137
+ return { model: null, reason: "healing_disabled" };
138
+ }
139
+ if (!config.apiKey) {
140
+ return { model: null, reason: "missing_api_key" };
141
+ }
142
+ const provider = (config.aiProvider ?? "openai").toLowerCase();
143
+ const modelId = config.aiModel || (provider === "anthropic" ? "claude-3-5-haiku-20241022" : "gpt-4o-mini");
144
+ try {
145
+ switch (provider) {
146
+ case "openai": {
147
+ const client = createOpenAI({ apiKey: config.apiKey, baseURL: process.env.AI_BASE_URL });
148
+ return { model: client(modelId), modelId };
149
+ }
150
+ case "anthropic": {
151
+ const client = createAnthropic({ apiKey: config.apiKey, baseURL: process.env.AI_BASE_URL });
152
+ return { model: client(modelId), modelId };
153
+ }
154
+ case "azure": {
155
+ const resourceName = process.env.AZURE_OPENAI_RESOURCE_NAME;
156
+ const apiVersion = process.env.AZURE_OPENAI_API_VERSION;
157
+ if (!resourceName) {
158
+ return { model: null, reason: "missing_azure_resource" };
159
+ }
160
+ const client = createAzure({
161
+ apiKey: config.apiKey,
162
+ resourceName,
163
+ apiVersion
164
+ });
165
+ return { model: client(modelId), modelId };
166
+ }
167
+ default:
168
+ return { model: null, reason: "unsupported_provider" };
169
+ }
170
+ } catch (error) {
171
+ const message = error instanceof Error ? error.message : String(error);
172
+ return { model: null, reason: `model_error:${message}` };
173
+ }
174
+ }
175
+
176
+ // src/runner/healer.ts
177
+ var REDACTION_PATTERNS = [
178
+ { regex: /bearer\s+[a-z0-9._-]+/gi, replacement: "[REDACTED_BEARER]" },
179
+ { regex: /api[_-]?key[:\s"']+[a-z0-9._-]+/gi, replacement: "[REDACTED_API_KEY]" },
180
+ { regex: /secret[:\s"']+[a-z0-9._-]+/gi, replacement: "[REDACTED_SECRET]" },
181
+ { regex: /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/g, replacement: "[REDACTED_EMAIL]" },
182
+ { regex: /[0-9]{12,}/g, replacement: "[REDACTED_NUMBER]" }
183
+ ];
184
+ var CODE_DENY_PATTERNS = [/process\.env/i, /child_process/i, /\brequire\s*\(/i, /\bimport\s*\(/i];
185
+ function classifyFailure(context) {
186
+ const message = (context.errorMessage ?? "").toLowerCase();
187
+ if (message.includes("closed") || message.includes("target page, context or browser has been closed")) {
188
+ return { healable: false, reason: "context_closed" };
189
+ }
190
+ if (message.includes("navigation failed") && message.includes("net::")) {
191
+ return { healable: false, reason: "navigation_failed" };
192
+ }
193
+ return { healable: true, reason: "agent_healing" };
194
+ }
195
+ async function executeHealActions(decision, failure, execCtx) {
196
+ const config = loadCanaryConfig();
197
+ const started = Date.now();
198
+ const mode = resolveMode(config);
199
+ const actionsRun = [];
200
+ const errors = [];
201
+ const logTools = config.debug || process.env.CANARY_TOOL_LOG === "1";
202
+ const initialContext = await buildInitialPageContext(execCtx.page, config);
203
+ if (!decision.healable) {
204
+ return baseOutcome({
205
+ mode,
206
+ started,
207
+ healed: false,
208
+ shouldRetryOriginal: false,
209
+ reason: decision.reason,
210
+ actionsRun,
211
+ errors
212
+ });
213
+ }
214
+ const { model, modelId, reason: modelReason } = resolveHealerModel(config);
215
+ if (!model) {
216
+ return baseOutcome({
217
+ mode,
218
+ started,
219
+ healed: false,
220
+ shouldRetryOriginal: false,
221
+ reason: modelReason ?? "no_model",
222
+ actionsRun,
223
+ errors
224
+ });
225
+ }
226
+ const testContext = {
227
+ testFile: execCtx.testContext?.testFile,
228
+ testTitle: execCtx.testContext?.testTitle,
229
+ testSource: execCtx.testContext?.testSource ? sanitizeString(execCtx.testContext.testSource) : void 0,
230
+ currentStep: `${failure.action ?? "unknown"}(${failure.target ?? "unknown"})`,
231
+ expectedAfter: execCtx.testContext?.expectedAfter,
232
+ action: failure.action ?? "unknown",
233
+ target: failure.target,
234
+ errorMessage: failure.errorMessage
235
+ };
236
+ const toolset = createAgenticTools({
237
+ page: execCtx.page,
238
+ config,
239
+ actionsRun,
240
+ debug: config.debug
241
+ });
242
+ let completionReason;
243
+ try {
244
+ const abort = AbortSignal.timeout(config.healTimeoutMs);
245
+ const result = await streamText({
246
+ model,
247
+ system: buildSystemPrompt(testContext, config),
248
+ messages: buildInitialMessages(testContext, failure, initialContext),
249
+ tools: toolset.tools,
250
+ stopWhen: stepCountIs(Math.max(1, config.maxActions)),
251
+ abortSignal: abort,
252
+ maxRetries: 0,
253
+ onStepFinish: (step) => {
254
+ if (config.debug) {
255
+ console.log(
256
+ `[canary][debug] step finish: finishReason=${step.finishReason}; toolCalls=${step.toolCalls?.length ?? 0}; toolResults=${step.toolResults?.length ?? 0}`
257
+ );
258
+ }
259
+ if (logTools) {
260
+ logToolStep(step);
261
+ }
262
+ }
263
+ });
264
+ for await (const _ of result.fullStream) {
265
+ if (toolset.isComplete()) {
266
+ completionReason = toolset.getCompletionReason();
267
+ if (config.debug) {
268
+ console.log(`[canary][debug] agent marked complete: ${completionReason}`);
269
+ }
270
+ break;
271
+ }
272
+ }
273
+ } catch (error) {
274
+ const message = error instanceof Error ? error.message : String(error);
275
+ errors.push(message);
276
+ if (config.debug) {
277
+ console.log(`[canary][debug] agent error: ${message}`);
278
+ }
279
+ }
280
+ const healed = toolset.isComplete();
281
+ return {
282
+ healed,
283
+ shouldRetryOriginal: false,
284
+ // Agent-controlled completion, no retry needed
285
+ actionsRun,
286
+ timedOut: Date.now() - started > config.healTimeoutMs,
287
+ durationMs: Date.now() - started,
288
+ errors,
289
+ mode,
290
+ reason: completionReason ?? (healed ? "agent_healed" : "agent_incomplete"),
291
+ modelId,
292
+ summary: toolset.getSummary()
293
+ };
294
+ }
295
+ function createAgenticTools(params) {
296
+ const { page, config, actionsRun, debug } = params;
297
+ let complete = false;
298
+ let completionReason;
299
+ let completionSummary;
300
+ const maxPayloadBytes = Math.max(1e3, config.maxPayloadBytes || 6e4);
301
+ const log = (msg) => {
302
+ if (debug) {
303
+ console.log(`[canary][debug] ${msg}`);
304
+ }
305
+ };
306
+ const track = (action) => {
307
+ actionsRun.push(action);
308
+ };
309
+ const tools = {
310
+ // === Core Action Tools ===
311
+ click: tool({
312
+ description: "Click an element by its visible text",
313
+ inputSchema: z.object({ text: z.string().describe("The visible text to click") }),
314
+ execute: async ({ text }) => {
315
+ track(`click:${text}`);
316
+ log(`click called with text="${text}"`);
317
+ if (config.readOnly) return { clicked: "", error: "read_only_mode" };
318
+ if (config.dryRun) return { clicked: text, mode: "dry-run" };
319
+ if (!page?.getByText) return { clicked: "", error: "Page unavailable" };
320
+ try {
321
+ const el = page.getByText(text);
322
+ if (el?.scrollIntoViewIfNeeded) {
323
+ await el.scrollIntoViewIfNeeded({ timeout: 2e3 });
324
+ }
325
+ const clickable = el;
326
+ if (clickable?.click) {
327
+ await clickable.click({ timeout: 5e3 });
328
+ }
329
+ log(`click succeeded for "${text}"`);
330
+ return { clicked: text };
331
+ } catch (err) {
332
+ const msg = err instanceof Error ? err.message : String(err);
333
+ log(`click error: ${msg}`);
334
+ return { clicked: "", error: msg };
335
+ }
336
+ }
337
+ }),
338
+ click_selector: tool({
339
+ description: "Click an element by CSS or Playwright selector",
340
+ inputSchema: z.object({ selector: z.string().describe("CSS or Playwright selector") }),
341
+ execute: async ({ selector }) => {
342
+ track(`click_selector:${selector}`);
343
+ log(`click_selector called with selector="${selector}"`);
344
+ if (config.readOnly) return { clicked: "", error: "read_only_mode" };
345
+ if (config.dryRun) return { clicked: selector, mode: "dry-run" };
346
+ if (!page?.locator) return { clicked: "", error: "Page unavailable" };
347
+ try {
348
+ const el = page.locator(selector);
349
+ if (el?.scrollIntoViewIfNeeded) {
350
+ await el.scrollIntoViewIfNeeded({ timeout: 2e3 });
351
+ }
352
+ if (el?.click) {
353
+ await el.click({ timeout: 5e3 });
354
+ }
355
+ log(`click_selector succeeded for "${selector}"`);
356
+ return { clicked: selector };
357
+ } catch (err) {
358
+ const msg = err instanceof Error ? err.message : String(err);
359
+ log(`click_selector error: ${msg}`);
360
+ return { clicked: "", error: msg };
361
+ }
362
+ }
363
+ }),
364
+ fill: tool({
365
+ description: "Fill an input field with a value using CSS selector (clears existing content)",
366
+ inputSchema: z.object({
367
+ selector: z.string().describe('CSS selector for the input (e.g. input[placeholder="..."])'),
368
+ value: z.string().describe("Value to fill")
369
+ }),
370
+ execute: async ({ selector, value }) => {
371
+ track(`fill:${selector}`);
372
+ log(`fill called with selector="${selector}" value="${value}"`);
373
+ if (config.readOnly) return { filled: "", error: "read_only_mode" };
374
+ if (config.dryRun) return { filled: selector, mode: "dry-run" };
375
+ if (!page?.locator) return { filled: "", error: "Page unavailable" };
376
+ try {
377
+ const el = page.locator(selector);
378
+ if (el?.fill) {
379
+ await el.fill(value, { timeout: 5e3 });
380
+ }
381
+ log(`fill succeeded`);
382
+ return { filled: selector, value };
383
+ } catch (err) {
384
+ const msg = err instanceof Error ? err.message : String(err);
385
+ log(`fill error: ${msg}`);
386
+ return { filled: "", error: msg };
387
+ }
388
+ }
389
+ }),
390
+ fill_by_name: tool({
391
+ description: 'Fill a textbox by its accessible name (shown in snapshot as textbox "NAME"). This is the PREFERRED way to fill inputs - use the name from the snapshot directly.',
392
+ inputSchema: z.object({
393
+ name: z.string().describe('The accessible name of the textbox (from snapshot, e.g. "Enter 6-digit code")'),
394
+ value: z.string().describe("Value to fill")
395
+ }),
396
+ execute: async ({ name, value }) => {
397
+ track(`fill_by_name:${name}`);
398
+ log(`fill_by_name called with name="${name}" value="${value}"`);
399
+ if (config.readOnly) return { filled: "", error: "read_only_mode" };
400
+ if (config.dryRun) return { filled: name, mode: "dry-run" };
401
+ if (!page?.locator) return { filled: "", error: "Page unavailable" };
402
+ try {
403
+ const el = page.locator(`role=textbox[name="${name}"]`);
404
+ if (el?.fill) {
405
+ await el.fill(value, { timeout: 5e3 });
406
+ }
407
+ log(`fill_by_name succeeded`);
408
+ return { filled: name, value };
409
+ } catch (err) {
410
+ const msg = err instanceof Error ? err.message : String(err);
411
+ log(`fill_by_name error: ${msg}`);
412
+ return { filled: "", error: msg };
413
+ }
414
+ }
415
+ }),
416
+ type: tool({
417
+ description: "Type text character by character (triggers key events)",
418
+ inputSchema: z.object({
419
+ selector: z.string().describe("CSS or Playwright selector"),
420
+ text: z.string().describe("Text to type")
421
+ }),
422
+ execute: async ({ selector, text }) => {
423
+ track(`type:${selector}`);
424
+ log(`type called with selector="${selector}" text="${text}"`);
425
+ if (config.readOnly) return { typed: "", error: "read_only_mode" };
426
+ if (config.dryRun) return { typed: text, mode: "dry-run" };
427
+ if (!page?.locator) return { typed: "", error: "Page unavailable" };
428
+ try {
429
+ const el = page.locator(selector);
430
+ if (el?.pressSequentially) {
431
+ await el.pressSequentially(text);
432
+ }
433
+ log(`type succeeded`);
434
+ return { typed: text, into: selector };
435
+ } catch (err) {
436
+ const msg = err instanceof Error ? err.message : String(err);
437
+ log(`type error: ${msg}`);
438
+ return { typed: "", error: msg };
439
+ }
440
+ }
441
+ }),
442
+ press_key: tool({
443
+ description: "Press a keyboard key (Enter, Tab, Escape, ArrowDown, etc.)",
444
+ inputSchema: z.object({ key: z.string().describe("Key to press") }),
445
+ execute: async ({ key }) => {
446
+ track(`press_key:${key}`);
447
+ log(`press_key called with key="${key}"`);
448
+ if (config.readOnly) return { pressed: "", error: "read_only_mode" };
449
+ if (config.dryRun) return { pressed: key, mode: "dry-run" };
450
+ if (!page?.keyboard?.press) return { pressed: "", error: "Page unavailable" };
451
+ try {
452
+ await page.keyboard.press(key);
453
+ log(`press_key succeeded`);
454
+ return { pressed: key };
455
+ } catch (err) {
456
+ const msg = err instanceof Error ? err.message : String(err);
457
+ log(`press_key error: ${msg}`);
458
+ return { pressed: "", error: msg };
459
+ }
460
+ }
461
+ }),
462
+ hover: tool({
463
+ description: "Hover over an element",
464
+ inputSchema: z.object({ selector: z.string().describe("CSS or Playwright selector") }),
465
+ execute: async ({ selector }) => {
466
+ track(`hover:${selector}`);
467
+ log(`hover called with selector="${selector}"`);
468
+ if (config.readOnly) return { hovered: "", error: "read_only_mode" };
469
+ if (config.dryRun) return { hovered: selector, mode: "dry-run" };
470
+ if (!page?.locator) return { hovered: "", error: "Page unavailable" };
471
+ try {
472
+ const el = page.locator(selector);
473
+ if (el?.hover) {
474
+ await el.hover();
475
+ }
476
+ log(`hover succeeded`);
477
+ return { hovered: selector };
478
+ } catch (err) {
479
+ const msg = err instanceof Error ? err.message : String(err);
480
+ log(`hover error: ${msg}`);
481
+ return { hovered: "", error: msg };
482
+ }
483
+ }
484
+ }),
485
+ select_option: tool({
486
+ description: "Select an option from a dropdown",
487
+ inputSchema: z.object({
488
+ selector: z.string().describe("CSS or Playwright selector for the select element"),
489
+ value: z.string().describe("Value or label to select")
490
+ }),
491
+ execute: async ({ selector, value }) => {
492
+ track(`select_option:${selector}`);
493
+ log(`select_option called with selector="${selector}" value="${value}"`);
494
+ if (config.readOnly) return { selected: "", error: "read_only_mode" };
495
+ if (config.dryRun) return { selected: value, mode: "dry-run" };
496
+ if (!page?.locator) return { selected: "", error: "Page unavailable" };
497
+ try {
498
+ const el = page.locator(selector);
499
+ if (el?.selectOption) {
500
+ await el.selectOption(value);
501
+ }
502
+ log(`select_option succeeded`);
503
+ return { selected: value, from: selector };
504
+ } catch (err) {
505
+ const msg = err instanceof Error ? err.message : String(err);
506
+ log(`select_option error: ${msg}`);
507
+ return { selected: "", error: msg };
508
+ }
509
+ }
510
+ }),
511
+ // === Navigation & Waiting ===
512
+ wait: tool({
513
+ description: "Wait for a duration in milliseconds",
514
+ inputSchema: z.object({ ms: z.number().min(100).max(3e4).describe("Milliseconds to wait") }),
515
+ execute: async ({ ms }) => {
516
+ track(`wait:${ms}`);
517
+ log(`wait called with ms=${ms}`);
518
+ if (config.dryRun) return { waited: ms, mode: "dry-run" };
519
+ if (page?.waitForTimeout) {
520
+ await page.waitForTimeout(ms);
521
+ } else {
522
+ await new Promise((resolve) => setTimeout(resolve, ms));
523
+ }
524
+ log(`wait completed`);
525
+ return { waited: ms };
526
+ }
527
+ }),
528
+ wait_for_selector: tool({
529
+ description: "Wait for an element to reach a state (visible, hidden, attached, detached)",
530
+ inputSchema: z.object({
531
+ selector: z.string().describe("CSS or Playwright selector"),
532
+ state: z.enum(["visible", "hidden", "attached", "detached"]).optional().describe("State to wait for")
533
+ }),
534
+ execute: async ({ selector, state }) => {
535
+ track(`wait_for_selector:${selector}`);
536
+ log(`wait_for_selector called with selector="${selector}" state="${state ?? "visible"}"`);
537
+ if (config.dryRun) return { found: selector, mode: "dry-run" };
538
+ if (!page?.locator) return { found: "", error: "Page unavailable" };
539
+ try {
540
+ const el = page.locator(selector);
541
+ if (el?.waitFor) {
542
+ await el.waitFor({ state: state ?? "visible" });
543
+ }
544
+ log(`wait_for_selector succeeded`);
545
+ return { found: selector, state: state ?? "visible" };
546
+ } catch (err) {
547
+ const msg = err instanceof Error ? err.message : String(err);
548
+ log(`wait_for_selector error: ${msg}`);
549
+ return { found: "", error: msg };
550
+ }
551
+ }
552
+ }),
553
+ navigate: tool({
554
+ description: "Navigate to a URL",
555
+ inputSchema: z.object({ url: z.string().describe("URL to navigate to") }),
556
+ execute: async ({ url }) => {
557
+ track(`navigate:${url}`);
558
+ log(`navigate called with url="${url}"`);
559
+ if (config.readOnly) return { navigated: "", error: "read_only_mode" };
560
+ if (config.dryRun) return { navigated: url, mode: "dry-run" };
561
+ if (!page?.goto) return { navigated: "", error: "Page unavailable" };
562
+ try {
563
+ await page.goto(url);
564
+ log(`navigate succeeded`);
565
+ return { navigated: url };
566
+ } catch (err) {
567
+ const msg = err instanceof Error ? err.message : String(err);
568
+ log(`navigate error: ${msg}`);
569
+ return { navigated: "", error: msg };
570
+ }
571
+ }
572
+ }),
573
+ go_back: tool({
574
+ description: "Go back in browser history",
575
+ inputSchema: z.object({}),
576
+ execute: async () => {
577
+ track("go_back");
578
+ log(`go_back called`);
579
+ if (config.readOnly) return { action: "", error: "read_only_mode" };
580
+ if (config.dryRun) return { action: "back", mode: "dry-run" };
581
+ if (!page?.goBack) return { action: "", error: "Page unavailable" };
582
+ try {
583
+ await page.goBack();
584
+ log(`go_back succeeded`);
585
+ return { action: "back" };
586
+ } catch (err) {
587
+ const msg = err instanceof Error ? err.message : String(err);
588
+ log(`go_back error: ${msg}`);
589
+ return { action: "", error: msg };
590
+ }
591
+ }
592
+ }),
593
+ reload: tool({
594
+ description: "Reload the current page",
595
+ inputSchema: z.object({}),
596
+ execute: async () => {
597
+ track("reload");
598
+ log(`reload called`);
599
+ if (config.readOnly) return { action: "", error: "read_only_mode" };
600
+ if (config.dryRun) return { action: "reload", mode: "dry-run" };
601
+ if (!page?.reload) return { action: "", error: "Page unavailable" };
602
+ try {
603
+ await page.reload();
604
+ log(`reload succeeded`);
605
+ return { action: "reload" };
606
+ } catch (err) {
607
+ const msg = err instanceof Error ? err.message : String(err);
608
+ log(`reload error: ${msg}`);
609
+ return { action: "", error: msg };
610
+ }
611
+ }
612
+ }),
613
+ // === Inspection Tools ===
614
+ snapshot: tool({
615
+ description: "Get the accessibility tree of the current page to understand its structure. ALWAYS call this first!",
616
+ inputSchema: z.object({}),
617
+ execute: async () => {
618
+ track("snapshot");
619
+ log(`snapshot called`);
620
+ if (config.dryRun) return { tree: null, mode: "dry-run" };
621
+ if (!page) return { error: "Page unavailable" };
622
+ let tree;
623
+ let html;
624
+ try {
625
+ if (page.accessibility && typeof page.accessibility.snapshot === "function") {
626
+ tree = await page.accessibility.snapshot();
627
+ }
628
+ } catch (err) {
629
+ log(`accessibility snapshot error: ${err instanceof Error ? err.message : String(err)}`);
630
+ }
631
+ if (!tree && page.evaluate) {
632
+ try {
633
+ html = await page.evaluate("document.body.innerHTML");
634
+ if (html) {
635
+ log(`snapshot using HTML fallback`);
636
+ }
637
+ } catch (err) {
638
+ log(`HTML fallback error: ${err instanceof Error ? err.message : String(err)}`);
639
+ }
640
+ }
641
+ if (!tree && !html) {
642
+ return { error: "Could not get page content" };
643
+ }
644
+ log(`snapshot succeeded`);
645
+ const sanitizedTree = sanitizeUnknown(tree, maxPayloadBytes);
646
+ const sanitizedHtml = html ? truncate(sanitizeString(html), maxPayloadBytes) : void 0;
647
+ return { tree: sanitizedTree, html: sanitizedHtml };
648
+ }
649
+ }),
650
+ screenshot: tool({
651
+ description: "Capture a screenshot of the current page",
652
+ inputSchema: z.object({
653
+ fullPage: z.boolean().optional().describe("Whether to capture the full page")
654
+ }),
655
+ execute: async ({ fullPage }) => {
656
+ track("screenshot");
657
+ log(`screenshot called fullPage=${fullPage}`);
658
+ if (config.dryRun) return { screenshot: void 0, mode: "dry-run" };
659
+ if (!page?.screenshot) return { error: "Page unavailable" };
660
+ try {
661
+ const buffer = await page.screenshot({ fullPage: fullPage ?? false });
662
+ const b = typeof buffer === "string" ? Buffer.from(buffer) : Buffer.from(buffer);
663
+ const base64 = b.toString("base64");
664
+ if (b.byteLength > maxPayloadBytes || base64.length > maxPayloadBytes) {
665
+ return { error: "screenshot_too_large" };
666
+ }
667
+ const dataUrl = `data:image/png;base64,${truncate(base64, maxPayloadBytes)}`;
668
+ log(`screenshot succeeded`);
669
+ return { screenshot: dataUrl };
670
+ } catch (err) {
671
+ const msg = err instanceof Error ? err.message : String(err);
672
+ log(`screenshot error: ${msg}`);
673
+ return { error: msg };
674
+ }
675
+ }
676
+ }),
677
+ get_text: tool({
678
+ description: "Get the text content of an element",
679
+ inputSchema: z.object({ selector: z.string().describe("CSS or Playwright selector") }),
680
+ execute: async ({ selector }) => {
681
+ track(`get_text:${selector}`);
682
+ log(`get_text called with selector="${selector}"`);
683
+ if (config.dryRun) return { text: null, mode: "dry-run" };
684
+ if (!page?.locator) return { error: "Page unavailable" };
685
+ try {
686
+ const el = page.locator(selector);
687
+ const text = el?.textContent ? await el.textContent() : null;
688
+ const sanitized = text ? truncate(sanitizeString(text), maxPayloadBytes) : text;
689
+ log(`get_text succeeded: "${sanitized}"`);
690
+ return { text: sanitized };
691
+ } catch (err) {
692
+ const msg = err instanceof Error ? err.message : String(err);
693
+ log(`get_text error: ${msg}`);
694
+ return { error: msg };
695
+ }
696
+ }
697
+ }),
698
+ is_visible: tool({
699
+ description: "Check if an element is visible",
700
+ inputSchema: z.object({ selector: z.string().describe("CSS or Playwright selector") }),
701
+ execute: async ({ selector }) => {
702
+ track(`is_visible:${selector}`);
703
+ log(`is_visible called with selector="${selector}"`);
704
+ if (config.dryRun) return { visible: false, mode: "dry-run" };
705
+ if (!page?.locator) return { error: "Page unavailable" };
706
+ try {
707
+ const el = page.locator(selector);
708
+ const visible = el?.isVisible ? await el.isVisible() : false;
709
+ log(`is_visible succeeded: ${visible}`);
710
+ return { visible };
711
+ } catch (err) {
712
+ const msg = err instanceof Error ? err.message : String(err);
713
+ log(`is_visible error: ${msg}`);
714
+ return { error: msg };
715
+ }
716
+ }
717
+ }),
718
+ evaluate: tool({
719
+ description: "Run JavaScript in the page context",
720
+ inputSchema: z.object({ script: z.string().describe("JavaScript code to execute") }),
721
+ execute: async ({ script }) => {
722
+ track("evaluate");
723
+ log(`evaluate called`);
724
+ if (!config.allowEvaluate) return { error: "evaluate_disabled" };
725
+ if (violatesCodeDenylist(script)) return { error: "evaluate_blocked" };
726
+ if (config.dryRun) return { result: null, mode: "dry-run" };
727
+ if (!page?.evaluate) return { error: "Page unavailable" };
728
+ try {
729
+ const scriptSafe = truncate(script, maxPayloadBytes);
730
+ const result = await page.evaluate(scriptSafe);
731
+ log(`evaluate succeeded`);
732
+ return { result: sanitizeUnknown(result, maxPayloadBytes) };
733
+ } catch (err) {
734
+ const msg = err instanceof Error ? err.message : String(err);
735
+ log(`evaluate error: ${msg}`);
736
+ return { error: msg };
737
+ }
738
+ }
739
+ }),
740
+ // === Power Tool: Arbitrary Playwright Code ===
741
+ run_playwright_code: tool({
742
+ description: 'Execute arbitrary Playwright code. Has access to `page` object. Use for complex operations not covered by other tools. Example: `await page.getByRole("button", { name: "Submit" }).click();`',
743
+ inputSchema: z.object({
744
+ code: z.string().describe("Playwright code to execute (has access to `page` object)")
745
+ }),
746
+ execute: async ({ code }) => {
747
+ track("run_playwright_code");
748
+ log(`run_playwright_code called with code:
749
+ ${code}`);
750
+ if (!config.allowRunCode) return { executed: false, error: "run_code_disabled" };
751
+ if (violatesCodeDenylist(code)) return { executed: false, error: "run_code_blocked" };
752
+ if (config.dryRun) return { executed: false, mode: "dry-run" };
753
+ if (!page) return { executed: false, error: "Page unavailable" };
754
+ try {
755
+ const boundedCode = truncate(code, maxPayloadBytes);
756
+ const fn = new Function("page", `return (async () => { ${boundedCode} })();`);
757
+ const result = await fn(page);
758
+ log(`run_playwright_code succeeded`);
759
+ return { executed: true, result: sanitizeUnknown(result, maxPayloadBytes) };
760
+ } catch (err) {
761
+ const msg = err instanceof Error ? err.message : String(err);
762
+ log(`run_playwright_code error: ${msg}`);
763
+ return { executed: false, error: msg };
764
+ }
765
+ }
766
+ }),
767
+ // === Completion Tool ===
768
+ mark_complete: tool({
769
+ description: "Call this when the browser is ready for the test to continue. You MUST call this when done healing!",
770
+ inputSchema: z.object({
771
+ reason: z.string().describe("Brief technical description of what was done"),
772
+ summary: z.string().describe(`One-line human-readable summary for the test report, e.g. "Clicked 'Send OTP' instead of 'Submit'" or "Filled email field by accessible name"`)
773
+ }),
774
+ execute: async ({ reason, summary }) => {
775
+ track("mark_complete");
776
+ log(`mark_complete called with reason="${reason}" summary="${summary}"`);
777
+ if (!config.dryRun) {
778
+ complete = true;
779
+ completionReason = reason;
780
+ completionSummary = summary;
781
+ }
782
+ return { complete: !config.dryRun, reason, summary, mode: config.dryRun ? "dry-run" : void 0 };
783
+ }
784
+ })
785
+ };
786
+ return {
787
+ tools,
788
+ isComplete: () => complete,
789
+ getCompletionReason: () => completionReason,
790
+ getSummary: () => completionSummary
791
+ };
792
+ }
793
+ function buildSystemPrompt(testContext, config) {
794
+ const testInfo = testContext.testFile ? `
795
+ ## Test Context
796
+ File: ${testContext.testFile}
797
+ Test: "${testContext.testTitle ?? "unknown"}"
798
+ ` : "";
799
+ const testSource = testContext.testSource ? `
800
+ ### Full Test Code:
801
+ \`\`\`typescript
802
+ ${testContext.testSource}
803
+ \`\`\`
804
+ ` : "";
805
+ const expectedAfter = testContext.expectedAfter ? `
806
+ ### What the Test Expects After This Step:
807
+ ${testContext.expectedAfter}
808
+ ` : "";
809
+ return `You are an AI test healer. A Playwright test step failed. Your job is to make the browser ready for the next step in the test **without violating the intent of the test**. Heal only the failing step; do not advance beyond it.
810
+
811
+ ## Your Goal
812
+ Get the browser into the state the test expects, then call mark_complete.
813
+ ${testInfo}${testSource}
814
+ ### Current Step That Failed:
815
+ ${testContext.currentStep}
816
+ ${expectedAfter}
817
+ ## The Failure
818
+ Action attempted: ${testContext.action}
819
+ Target: ${testContext.target ?? "unknown"}
820
+ Error: ${testContext.errorMessage ?? "unknown"}
821
+
822
+ ## Test Intent and Discipline
823
+ - Adhere to the spirit of the test. If advancing would bypass a required validation or skip a missing field, you must not proceed\u2014report failure instead.
824
+ - Heal only this step. Avoid extra navigation or state changes unrelated to the intended action. If you take a detour, undo it before completion.
825
+ - If you cannot confidently achieve the expected postcondition for this step, do not mark complete; let the test fail.
826
+
827
+ ## Tools Available
828
+
829
+ ### Inspection (use first!)
830
+ - snapshot() - Get accessibility tree to see page structure
831
+ - screenshot() - Capture visual screenshot
832
+ - get_text(selector) - Get text content of an element
833
+ - is_visible(selector) - Check if element is visible
834
+
835
+ ### Actions
836
+ - click(text) - Click element by visible text
837
+ - click_selector(selector) - Click by CSS/Playwright selector
838
+ - fill_by_name(name, value) - PREFERRED: Fill textbox by accessible name from snapshot (e.g. textbox "Enter 6-digit code")
839
+ - fill(selector, value) - Fill input by CSS selector (use placeholder attribute, NOT aria-label)
840
+ - type(selector, text) - Type text character by character
841
+ - press_key(key) - Press keyboard key (Enter, Tab, etc.)
842
+ - hover(selector) - Hover over element
843
+ - select_option(selector, value) - Select from dropdown
844
+
845
+ ### Navigation & Waiting
846
+ - wait(ms) - Wait for a duration
847
+ - wait_for_selector(selector, state?) - Wait for element state
848
+ - navigate(url) - Navigate to URL
849
+ - go_back() - Go back in history
850
+ - reload() - Reload page
851
+
852
+ ### Power Tool
853
+ - run_playwright_code(code) - Execute any Playwright code with \`page\` object
854
+ Example: \`await page.getByRole('button', { name: 'Submit' }).click();\`
855
+
856
+ ### Completion (REQUIRED)
857
+ - mark_complete(reason, summary) - Call when browser is ready for test to continue
858
+ - reason: technical description of what was done
859
+ - summary: one-line human-readable summary for test report (e.g. "Clicked 'Send OTP' instead of 'Submit'")
860
+
861
+ ## CRITICAL RULES - READ CAREFULLY
862
+
863
+ ### Rule 1: YOU MUST PERFORM THE ACTION YOURSELF
864
+ The test tried to ${testContext.action}("${testContext.target ?? ""}") but it failed.
865
+ YOU must perform the equivalent action on the correct element.
866
+ DO NOT leave it for "the next step" - there is no next step until YOU complete the action.
867
+
868
+ ### Rule 2: The workflow is ALWAYS:
869
+ 1. snapshot() - understand the page
870
+ 2. click() / fill() / type() - PERFORM THE ACTION on the correct element
871
+ 3. mark_complete() - report what you did
872
+
873
+ ### Rule 3: NEVER call mark_complete without performing an action
874
+ If mark_complete is called without a click/fill/type, the test WILL FAIL.
875
+
876
+ ## Example 1: Test tried click("Submit") but button says "Send OTP"
877
+ CORRECT:
878
+ 1. snapshot() \u2192 see "Send OTP" button exists
879
+ 2. click("Send OTP") \u2192 CLICK THE BUTTON
880
+ 3. mark_complete({reason: "clicked Send OTP button", summary: "Clicked 'Send OTP' instead of 'Submit'"})
881
+
882
+ WRONG (this will cause test failure):
883
+ 1. snapshot() \u2192 see "Send OTP" button exists
884
+ 2. mark_complete(...) \u2190 WRONG! No click performed!
885
+
886
+ ## Example 2: Test tried fill with wrong placeholder
887
+ If snapshot shows: textbox "Enter 6-digit code" [ref=e37]
888
+ CORRECT:
889
+ 1. snapshot() \u2192 see textbox "Enter 6-digit code"
890
+ 2. fill_by_name("Enter 6-digit code", "111222") \u2192 USE THE NAME FROM SNAPSHOT
891
+ 3. mark_complete({reason: "filled by accessible name", summary: "Filled 'Enter 6-digit code' field"})
892
+
893
+ ## If healing would break test intent
894
+ - If the page is missing required UI (e.g., a field not present) or advancing would skip validation, do NOT hack around it. Report failure by not calling mark_complete.
895
+ - If you temporarily navigate or change state, undo it before completion so the test can continue from the intended point.
896
+
897
+ ## Start now
898
+ 1. Call snapshot()
899
+ 2. Identify the correct element for the intended action
900
+ 3. PERFORM THE ACTION (click, fill, type, etc.)
901
+ 4. Call mark_complete with what you did
902
+
903
+ Mode: ${resolveMode(config)}.`;
904
+ }
905
+ function resolveMode(config) {
906
+ if (config.dryRun) return "dry-run";
907
+ if (config.warnOnly) return "warn";
908
+ return "full";
909
+ }
910
+ function baseOutcome({
911
+ started,
912
+ healed,
913
+ shouldRetryOriginal,
914
+ mode,
915
+ reason,
916
+ actionsRun = [],
917
+ errors = []
918
+ }) {
919
+ return {
920
+ healed,
921
+ shouldRetryOriginal,
922
+ actionsRun,
923
+ timedOut: false,
924
+ durationMs: Date.now() - started,
925
+ errors,
926
+ mode,
927
+ reason
928
+ };
929
+ }
930
+ function logToolStep(step) {
931
+ const color = {
932
+ magenta: "\x1B[35m",
933
+ cyan: "\x1B[36m",
934
+ yellow: "\x1B[33m",
935
+ green: "\x1B[32m",
936
+ reset: "\x1B[0m"
937
+ };
938
+ const calls = step.toolCalls ?? [];
939
+ const results = step.toolResults ?? [];
940
+ if (calls.length === 0 && results.length === 0) {
941
+ console.log(
942
+ `${color.magenta}[canary][tool] step${color.reset} (no tool calls/results; finish=${step.finishReason ?? "unknown"})`
943
+ );
944
+ }
945
+ for (const call of calls) {
946
+ const name = call.toolName ?? "unknown_tool";
947
+ const args = call.args ? truncate(safeJson(call.args), 300) : "";
948
+ console.log(
949
+ `${color.magenta}[canary][tool] call${color.reset} ${color.cyan}${name}${color.reset}${args ? ` args=${args}` : ""}`
950
+ );
951
+ }
952
+ for (const result of results) {
953
+ const name = result.toolName ?? "unknown_tool";
954
+ const res = result.result ? truncate(safeJson(result.result), 300) : void 0;
955
+ const err = result.error ? truncate(safeJson(result.error), 300) : void 0;
956
+ const statusColor = err ? color.yellow : color.green;
957
+ console.log(
958
+ `${color.magenta}[canary][tool] result${color.reset} ${color.cyan}${name}${color.reset}` + (res ? ` ${statusColor}->${color.reset} ${res}` : "") + (err ? ` ${color.yellow}error=${err}${color.reset}` : "")
959
+ );
960
+ }
961
+ }
962
+ function buildInitialMessages(testContext, failure, initial) {
963
+ const parts = [];
964
+ parts.push(
965
+ `Please heal this test step. Start by reviewing the snapshot below. Failing step: ${failure.action ?? "unknown"}(${failure.target ?? "unknown"}). Error: ${failure.errorMessage ?? "unknown"}.`
966
+ );
967
+ if (testContext.testSource) {
968
+ parts.push(
969
+ `Full test source (truncated/redacted):
970
+ \`\`\`typescript
971
+ ${truncate(testContext.testSource, 15e3)}
972
+ \`\`\``
973
+ );
974
+ }
975
+ parts.push(`Test file: ${testContext.testFile ?? "unknown"}`);
976
+ parts.push(`Test title: ${testContext.testTitle ?? "unknown"}`);
977
+ if (initial.snapshot) {
978
+ parts.push(`Initial accessibility snapshot (sanitized):
979
+ \`\`\`json
980
+ ${truncate(safeJson(initial.snapshot), 8e3)}
981
+ \`\`\``);
982
+ }
983
+ if (initial.html) {
984
+ parts.push(`HTML fallback (sanitized):
985
+ \`\`\`html
986
+ ${truncate(initial.html, 4e3)}
987
+ \`\`\``);
988
+ }
989
+ if (initial.screenshot) {
990
+ parts.push(`Screenshot (base64 data URL, truncated): ${truncate(initial.screenshot, 12e3)}`);
991
+ }
992
+ parts.push(`Remember: perform only the intended step and mark_complete only when the postcondition for this step is truly met.`);
993
+ return [{ role: "user", content: parts.join("\n\n") }];
994
+ }
995
+ async function buildInitialPageContext(page, config) {
996
+ const maxPayloadBytes = Math.max(1e3, config.maxPayloadBytes || 6e4);
997
+ const result = {};
998
+ if (!page) return result;
999
+ try {
1000
+ if (page.accessibility?.snapshot) {
1001
+ const tree = await page.accessibility.snapshot();
1002
+ if (tree) {
1003
+ result.snapshot = sanitizeUnknown(tree, maxPayloadBytes);
1004
+ }
1005
+ }
1006
+ } catch {
1007
+ }
1008
+ if (!result.snapshot && page.evaluate) {
1009
+ try {
1010
+ const html = await page.evaluate("document.body.innerHTML");
1011
+ if (html) {
1012
+ result.html = truncate(sanitizeString(html), maxPayloadBytes);
1013
+ }
1014
+ } catch {
1015
+ }
1016
+ }
1017
+ if (config.visionEnabled && page.screenshot) {
1018
+ try {
1019
+ const buffer = await page.screenshot({ fullPage: false });
1020
+ const b = typeof buffer === "string" ? Buffer.from(buffer) : Buffer.from(buffer);
1021
+ const base64 = b.toString("base64");
1022
+ if (b.byteLength <= maxPayloadBytes && base64.length <= maxPayloadBytes) {
1023
+ result.screenshot = `data:image/png;base64,${truncate(base64, maxPayloadBytes)}`;
1024
+ }
1025
+ } catch {
1026
+ }
1027
+ }
1028
+ return result;
1029
+ }
1030
+ function truncate(value, max) {
1031
+ if (value.length <= max) return value;
1032
+ return value.slice(0, max) + "...";
1033
+ }
1034
+ function safeJson(value) {
1035
+ try {
1036
+ return JSON.stringify(value);
1037
+ } catch {
1038
+ return String(value);
1039
+ }
1040
+ }
1041
+ function sanitizeString(value) {
1042
+ let result = value;
1043
+ for (const { regex, replacement } of REDACTION_PATTERNS) {
1044
+ result = result.replace(regex, replacement);
1045
+ }
1046
+ return result;
1047
+ }
1048
+ function sanitizeUnknown(value, maxPayloadBytes, seen = /* @__PURE__ */ new WeakSet()) {
1049
+ if (value === null || value === void 0) return value;
1050
+ if (typeof value === "string") {
1051
+ return truncate(sanitizeString(value), maxPayloadBytes);
1052
+ }
1053
+ if (typeof value === "number" || typeof value === "boolean") return value;
1054
+ if (typeof value === "object") {
1055
+ if (seen.has(value)) return "[REDACTED_CYCLE]";
1056
+ seen.add(value);
1057
+ if (Array.isArray(value)) {
1058
+ return value.slice(0, 50).map((v) => sanitizeUnknown(v, maxPayloadBytes, seen));
1059
+ }
1060
+ const out = {};
1061
+ const entries = Object.entries(value).slice(0, 50);
1062
+ for (const [k, v] of entries) {
1063
+ out[k] = sanitizeUnknown(v, maxPayloadBytes, seen);
1064
+ }
1065
+ return out;
1066
+ }
1067
+ return "[REDACTED_UNKNOWN]";
1068
+ }
1069
+ function violatesCodeDenylist(code) {
1070
+ return CODE_DENY_PATTERNS.some((p) => p.test(code));
1071
+ }
1072
+
1073
+ export {
1074
+ loadCanaryConfig,
1075
+ getEventLog,
1076
+ setEventLogPath,
1077
+ recordHealingEvent,
1078
+ markPatched,
1079
+ alreadyPatched,
1080
+ classifyFailure,
1081
+ executeHealActions
1082
+ };
1083
+ //# sourceMappingURL=chunk-7AP5KRVU.js.map