explorbot 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/README.md +27 -1
  2. package/bin/explorbot-cli.ts +27 -18
  3. package/dist/bin/explorbot-cli.js +26 -18
  4. package/dist/package.json +2 -2
  5. package/dist/rules/navigator/output.md +9 -0
  6. package/dist/rules/navigator/verification-actions.md +2 -0
  7. package/dist/src/action-result.js +23 -1
  8. package/dist/src/action.js +46 -38
  9. package/dist/src/ai/bosun.js +11 -1
  10. package/dist/src/ai/conversation.js +39 -0
  11. package/dist/src/ai/historian/codeceptjs.js +109 -0
  12. package/dist/src/ai/historian/experience.js +320 -0
  13. package/dist/src/ai/historian/mixin.js +2 -0
  14. package/dist/src/ai/historian/playwright.js +145 -0
  15. package/dist/src/ai/historian/utils.js +18 -0
  16. package/dist/src/ai/historian.js +19 -405
  17. package/dist/src/ai/navigator.js +82 -29
  18. package/dist/src/ai/pilot.js +232 -13
  19. package/dist/src/ai/planner.js +29 -9
  20. package/dist/src/ai/provider.js +54 -17
  21. package/dist/src/ai/researcher.js +41 -32
  22. package/dist/src/ai/rules.js +26 -14
  23. package/dist/src/ai/tester.js +90 -26
  24. package/dist/src/ai/tools.js +13 -7
  25. package/dist/src/browser-server.js +16 -3
  26. package/dist/src/commands/add-rule-command.js +11 -8
  27. package/dist/src/commands/clean-command.js +2 -1
  28. package/dist/src/commands/explore-command.js +27 -15
  29. package/dist/src/commands/init-command.js +9 -8
  30. package/dist/src/commands/plan-command.js +32 -0
  31. package/dist/src/commands/plan-save-command.js +19 -7
  32. package/dist/src/commands/rerun-command.js +4 -0
  33. package/dist/src/components/App.js +15 -5
  34. package/dist/src/execution-controller.js +13 -2
  35. package/dist/src/experience-tracker.js +20 -64
  36. package/dist/src/explorbot.js +5 -8
  37. package/dist/src/explorer.js +9 -2
  38. package/dist/src/observability.js +50 -99
  39. package/dist/src/playwright-recorder.js +309 -0
  40. package/dist/src/test-plan.js +12 -0
  41. package/dist/src/utils/aria.js +37 -1
  42. package/dist/src/utils/error-page.js +20 -7
  43. package/dist/src/utils/next-steps.js +37 -0
  44. package/package.json +2 -2
  45. package/rules/navigator/output.md +9 -0
  46. package/rules/navigator/verification-actions.md +2 -0
  47. package/src/action-result.ts +26 -1
  48. package/src/action.ts +44 -37
  49. package/src/ai/bosun.ts +11 -1
  50. package/src/ai/conversation.ts +37 -0
  51. package/src/ai/historian/codeceptjs.ts +130 -0
  52. package/src/ai/historian/experience.ts +383 -0
  53. package/src/ai/historian/mixin.ts +4 -0
  54. package/src/ai/historian/playwright.ts +169 -0
  55. package/src/ai/historian/utils.ts +23 -0
  56. package/src/ai/historian.ts +35 -473
  57. package/src/ai/navigator.ts +82 -29
  58. package/src/ai/pilot.ts +237 -14
  59. package/src/ai/planner.ts +29 -9
  60. package/src/ai/provider.ts +51 -17
  61. package/src/ai/researcher.ts +45 -33
  62. package/src/ai/rules.ts +27 -14
  63. package/src/ai/tester.ts +94 -26
  64. package/src/ai/tools.ts +47 -25
  65. package/src/browser-server.ts +17 -3
  66. package/src/commands/add-rule-command.ts +11 -7
  67. package/src/commands/clean-command.ts +2 -1
  68. package/src/commands/explore-command.ts +29 -15
  69. package/src/commands/init-command.ts +9 -8
  70. package/src/commands/plan-command.ts +35 -0
  71. package/src/commands/plan-save-command.ts +18 -7
  72. package/src/commands/rerun-command.ts +5 -0
  73. package/src/components/App.tsx +16 -5
  74. package/src/config.ts +6 -1
  75. package/src/execution-controller.ts +14 -3
  76. package/src/experience-tracker.ts +21 -72
  77. package/src/explorbot.ts +5 -8
  78. package/src/explorer.ts +11 -2
  79. package/src/observability.ts +50 -109
  80. package/src/playwright-recorder.ts +305 -0
  81. package/src/test-plan.ts +12 -0
  82. package/src/utils/aria.ts +38 -1
  83. package/src/utils/error-page.ts +22 -7
  84. package/src/utils/next-steps.ts +51 -0
@@ -0,0 +1,383 @@
1
+ import dedent from 'dedent';
2
+ import { z } from 'zod';
3
+ import { ActionResult } from '../../action-result.ts';
4
+ import { ExperienceTracker, type SessionStep } from '../../experience-tracker.ts';
5
+ import type { PlaywrightRecorder } from '../../playwright-recorder.ts';
6
+ import type { Reporter, ReporterStep } from '../../reporter.ts';
7
+ import type { StateManager } from '../../state-manager.ts';
8
+ import { type Task, Test } from '../../test-plan.ts';
9
+ import { tag } from '../../utils/logger.ts';
10
+ import { extractStatePath } from '../../utils/url-matcher.ts';
11
+ import type { Conversation, ToolExecution } from '../conversation.ts';
12
+ import type { Provider } from '../provider.ts';
13
+ import { CODECEPT_TOOLS } from '../tools.ts';
14
+ import { type Constructor, debugLog } from './mixin.ts';
15
+ import { getExecutionLabel, isNonReusableCode, stripComments } from './utils.ts';
16
+
17
+ export interface ExperienceMethods {
18
+ saveSession(task: Task, initialState: ActionResult, conversation: Conversation): Promise<void>;
19
+ }
20
+
21
+ export function WithExperience<T extends Constructor>(Base: T) {
22
+ return class extends Base {
23
+ declare provider: Provider;
24
+ declare experienceTracker: ExperienceTracker;
25
+ declare reporter: Reporter | undefined;
26
+ declare stateManager: StateManager | undefined;
27
+ declare recorder: PlaywrightRecorder | undefined;
28
+ declare isPlaywrightFramework: () => boolean;
29
+ declare toCode: (conversation: Conversation, scenario: string) => string;
30
+ declare toPlaywrightCode: (conversation: Conversation, scenario: string) => Promise<string>;
31
+
32
+ async saveSession(task: Task, initialState: ActionResult, conversation: Conversation): Promise<void> {
33
+ debugLog('Saving session experience');
34
+
35
+ const result = task.getRunResult();
36
+ const toolExecutions = conversation.getToolExecutions();
37
+
38
+ if (task instanceof Test) {
39
+ task.generatedCode = this.isPlaywrightFramework() ? await this.toPlaywrightCode(conversation, task.description) : this.toCode(conversation, task.description);
40
+ }
41
+
42
+ const steps = await this.extractSteps(toolExecutions);
43
+
44
+ const skipExperience = result === 'failed' || (task instanceof Test && (task.hasFailed || task.isSkipped));
45
+ if (!skipExperience) {
46
+ await this.detectRetryPatterns(toolExecutions, initialState);
47
+ const body = await this.curateFlow(steps, task, initialState);
48
+ if (body.trim()) {
49
+ const relatedUrls = this.extractVisitedUrls(toolExecutions, initialState.url || '');
50
+ this.experienceTracker.writeFlow(initialState, body, relatedUrls);
51
+ }
52
+ }
53
+
54
+ if (task instanceof Test && result !== 'failed') {
55
+ await this.reportSession(task, steps);
56
+ }
57
+
58
+ tag('substep').log(`Historian saved session for: ${task.description}`);
59
+ }
60
+
61
+ private async reportSession(test: Test, steps: SessionStep[]): Promise<void> {
62
+ if (!this.reporter) return;
63
+
64
+ const reporterSteps: ReporterStep[] = steps.map((step) => ({
65
+ title: step.message,
66
+ status: step.status === 'passed' ? 'passed' : 'failed',
67
+ code: step.code ? step.code.split('\n').filter((l) => l.trim()) : [],
68
+ discovery: step.discovery,
69
+ }));
70
+
71
+ await this.reporter.reportSteps(test, reporterSteps);
72
+ }
73
+
74
+ private async extractSteps(toolExecutions: ToolExecution[]): Promise<SessionStep[]> {
75
+ const stepsWithDiffs: Array<{ step: SessionStep; ariaDiff: string | null }> = [];
76
+
77
+ for (const exec of toolExecutions) {
78
+ if (!CODECEPT_TOOLS.includes(exec.toolName as any)) continue;
79
+ if (!exec.output?.code) continue;
80
+ if (!exec.wasSuccessful) continue;
81
+ if (isNonReusableCode(exec.output.code)) continue;
82
+
83
+ const step: SessionStep = {
84
+ message: getExecutionLabel(exec, `Executed ${exec.toolName}`),
85
+ status: 'passed',
86
+ tool: exec.toolName,
87
+ code: stripComments(exec.output.code),
88
+ };
89
+
90
+ stepsWithDiffs.push({ step, ariaDiff: exec.output?.pageDiff?.ariaChanges || null });
91
+ }
92
+
93
+ await this.analyzeDiscoveries(stepsWithDiffs);
94
+
95
+ return stepsWithDiffs.map((s) => s.step);
96
+ }
97
+
98
+ private async curateFlow(steps: SessionStep[], task: Task, initialState: ActionResult): Promise<string> {
99
+ if (steps.length === 0) return '';
100
+
101
+ const existingExperience = this.experienceTracker
102
+ .getRelevantExperience(initialState)
103
+ .map((e) => e.content)
104
+ .filter(Boolean)
105
+ .join('\n');
106
+ const existingSummary = existingExperience.length > 2000 ? existingExperience.substring(0, 2000) : existingExperience;
107
+
108
+ const stepsBlock = steps
109
+ .map((s, i) => {
110
+ const lines = [`Step ${i + 1}: ${s.message}`];
111
+ if (s.code) {
112
+ lines.push('```js');
113
+ lines.push(s.code);
114
+ lines.push('```');
115
+ }
116
+ if (s.discovery) {
117
+ for (const d of s.discovery.split('\n').filter((line) => line.trim())) {
118
+ lines.push(`> ${d.trim()}`);
119
+ }
120
+ }
121
+ return lines.join('\n');
122
+ })
123
+ .join('\n\n');
124
+
125
+ const expected = task instanceof Test && task.expected.length > 0 ? task.expected.map((e) => `- ${e}`).join('\n') : '';
126
+ const notes = task.notesToString();
127
+
128
+ const prompt = dedent`
129
+ You are curating a how-to recipe from a recorded test run. Decide whether the run produced
130
+ anything reusable, and if so, output a single \`## FLOW: ...\` markdown block. Otherwise output
131
+ an empty response (no text at all).
132
+
133
+ <original_scenario>
134
+ ${task.description}
135
+ </original_scenario>
136
+
137
+ ${expected ? `<expected_outcomes>\n${expected}\n</expected_outcomes>` : ''}
138
+
139
+ ${notes ? `<run_notes>\n${notes}\n</run_notes>` : ''}
140
+
141
+ <recorded_steps>
142
+ ${stepsBlock}
143
+ </recorded_steps>
144
+
145
+ ${existingSummary ? `<existing_experience_for_this_page>\n${existingSummary}\n</existing_experience_for_this_page>` : ''}
146
+
147
+ Output a FLOW block in EXACTLY this format:
148
+
149
+ ## FLOW: <imperative how-to that matches what the steps actually accomplished>
150
+
151
+ * <action description>
152
+
153
+ \`\`\`js
154
+ <code from input>
155
+ \`\`\`
156
+
157
+ > <relevant element or observation worth remembering>
158
+
159
+ * <next action>
160
+
161
+ \`\`\`js
162
+ <code from input>
163
+ \`\`\`
164
+
165
+ ---
166
+
167
+ Rules:
168
+ - Title is an imperative phrase answering "how do I X". It must describe what the steps
169
+ ACTUALLY accomplished, not the original scenario if the run drifted off course.
170
+ - Drop steps that wandered onto unrelated pages or did not contribute to a reusable recipe.
171
+ - Drop discoveries that are noise (loading states, timestamps, repeated buttons).
172
+ - Code blocks may only contain code that appears verbatim in <recorded_steps>. Do not invent
173
+ CodeceptJS calls.
174
+ - Lowercase the first letter of the title. No trailing punctuation.
175
+
176
+ Return an EMPTY response (no markdown, no explanation) if any of:
177
+ - The original scenario is a negative test (verifying an error, validation rejection, blocked
178
+ or forbidden action, "should fail" expectation).
179
+ - The surviving steps do not accomplish anything reusable.
180
+ - The recipe duplicates a recipe already present in <existing_experience_for_this_page>.
181
+ `;
182
+
183
+ try {
184
+ const response = await this.provider.chat(
185
+ [
186
+ { role: 'system', content: 'Curate reusable how-to recipes from recorded test runs. Be selective — only emit a FLOW when the steps demonstrate a coherent, reusable, positive recipe. Otherwise return nothing.' },
187
+ { role: 'user', content: prompt },
188
+ ],
189
+ this.provider.getModelForAgent('historian'),
190
+ { agentName: 'historian', telemetryFunctionId: 'historian.curateFlow' }
191
+ );
192
+
193
+ const body = (response?.text || '').trim();
194
+ if (!body) {
195
+ debugLog('curateFlow returned empty — skipping flow write');
196
+ return '';
197
+ }
198
+ if (!body.includes('## FLOW:')) {
199
+ debugLog('curateFlow output missing ## FLOW: heading — skipping');
200
+ return '';
201
+ }
202
+ return `${body}\n`;
203
+ } catch (error: any) {
204
+ debugLog('curateFlow failed, skipping flow write: %s', error.message);
205
+ return '';
206
+ }
207
+ }
208
+
209
+ private async detectRetryPatterns(toolExecutions: ToolExecution[], initialState: ActionResult): Promise<void> {
210
+ if (!this.experienceTracker || !this.stateManager) return;
211
+
212
+ const failedByTool = new Map<string, ToolExecution[]>();
213
+ const candidates: Array<{ failed: ToolExecution[]; success: ToolExecution }> = [];
214
+
215
+ for (const exec of toolExecutions) {
216
+ if (!CODECEPT_TOOLS.includes(exec.toolName as any)) continue;
217
+ if (!exec.output?.code) continue;
218
+
219
+ if (!exec.wasSuccessful) {
220
+ const bucket = failedByTool.get(exec.toolName) || [];
221
+ bucket.push(exec);
222
+ failedByTool.set(exec.toolName, bucket);
223
+ continue;
224
+ }
225
+
226
+ const failed = failedByTool.get(exec.toolName);
227
+ if (failed?.length) {
228
+ candidates.push({ failed: [...failed], success: exec });
229
+ failedByTool.set(exec.toolName, []);
230
+ }
231
+ }
232
+
233
+ if (candidates.length === 0) return;
234
+
235
+ const prompt = dedent`
236
+ Analyze these retry patterns where a tool failed multiple times before succeeding.
237
+ For each candidate, determine which failed attempts were trying to do the same thing as the success.
238
+
239
+ ${candidates
240
+ .map(
241
+ (c, i) => dedent`
242
+ Candidate ${i}:
243
+ Failed attempts:
244
+ ${c.failed.map((f, j) => ` ${j}: ${getExecutionLabel(f, f.toolName)} → code: ${f.output?.code}`).join('\n')}
245
+ Succeeded:
246
+ ${getExecutionLabel(c.success, c.success.toolName)} → code: ${c.success.output.code}
247
+ `
248
+ )
249
+ .join('\n\n')}
250
+
251
+ For each candidate where failures share the same intent as the success:
252
+ - candidateIndex: index of the candidate
253
+ - failedIndices: which failed attempts share the same intent
254
+ - intent: business-focused description of what was being done
255
+ - explanation: actionable tip explaining which element works and what to avoid
256
+ `;
257
+
258
+ const schema = z.object({
259
+ retryPatterns: z.array(
260
+ z.object({
261
+ candidateIndex: z.number(),
262
+ failedIndices: z.array(z.number()),
263
+ intent: z.string(),
264
+ explanation: z.string(),
265
+ })
266
+ ),
267
+ });
268
+
269
+ try {
270
+ const response = await this.provider.generateObject(
271
+ [
272
+ { role: 'system', content: 'Analyze retry patterns in web testing tool executions. Identify when failed attempts share the same intent as a successful one.' },
273
+ { role: 'user', content: prompt },
274
+ ],
275
+ schema
276
+ );
277
+
278
+ for (const pattern of response?.object?.retryPatterns || []) {
279
+ const candidate = candidates[pattern.candidateIndex];
280
+ if (!candidate) continue;
281
+
282
+ const url = candidate.success.output?.pageDiff?.currentUrl;
283
+ let state: ActionResult = initialState;
284
+
285
+ if (url && url !== initialState.url) {
286
+ const transition = this.stateManager.getLastVisitToPath(url);
287
+ if (transition) {
288
+ state = ActionResult.fromState(transition.toState);
289
+ }
290
+ }
291
+
292
+ if (isNonReusableCode(candidate.success.output.code)) continue;
293
+ this.experienceTracker.writeAction(state, { title: pattern.intent, code: candidate.success.output.code, explanation: pattern.explanation });
294
+ }
295
+
296
+ debugLog('Detected %d retry patterns', response?.object?.retryPatterns?.length || 0);
297
+ } catch (error: any) {
298
+ debugLog('Failed to detect retry patterns: %s', error.message);
299
+ }
300
+ }
301
+
302
+ private async analyzeDiscoveries(stepsWithDiffs: Array<{ step: SessionStep; ariaDiff: string | null }>): Promise<void> {
303
+ if (!stepsWithDiffs.some((s) => s.ariaDiff)) return;
304
+
305
+ const prompt = this.buildDiscoveryPrompt(stepsWithDiffs);
306
+
307
+ const schema = z.object({
308
+ discoveries: z.array(
309
+ z.object({
310
+ stepNumber: z.number(),
311
+ discoveries: z.array(z.string()),
312
+ })
313
+ ),
314
+ });
315
+
316
+ try {
317
+ const response = await this.provider.generateObject(
318
+ [
319
+ { role: 'system', content: 'Analyze test execution steps and identify valuable UI discoveries. Return multiple discoveries per step when multiple new elements appear. Return no discoveries for steps with no meaningful changes.' },
320
+ { role: 'user', content: prompt },
321
+ ],
322
+ schema
323
+ );
324
+
325
+ for (const { stepNumber, discoveries } of response?.object?.discoveries || []) {
326
+ const stepIndex = stepNumber - 1;
327
+ if (!stepsWithDiffs[stepIndex]) continue;
328
+ if (discoveries.length === 0) continue;
329
+ stepsWithDiffs[stepIndex].step.discovery = discoveries.join('\n');
330
+ }
331
+ } catch (error: any) {
332
+ debugLog('Failed to analyze discoveries: %s', error.message);
333
+ }
334
+ }
335
+
336
+ private buildDiscoveryPrompt(stepsWithDiffs: Array<{ step: SessionStep; ariaDiff: string | null }>): string {
337
+ const stepsBlock = stepsWithDiffs
338
+ .map(({ step, ariaDiff }, i) => {
339
+ const lines = [`Step ${i + 1}: ${step.message}`];
340
+ if (ariaDiff) lines.push(ariaDiff);
341
+ return lines.join('\n');
342
+ })
343
+ .join('\n\n');
344
+
345
+ return dedent`
346
+ Review these test steps and their ARIA diffs. Identify new UI elements that appeared
347
+ which could be valuable for deeper testing of this feature or related features that can
348
+ be triggered from this flow.
349
+
350
+ Return MULTIPLE discoveries per step when multiple new elements appear (buttons, inputs,
351
+ links, errors, warnings — list them all). Return an empty array for a step with no new
352
+ elements or only generic changes (loading spinners, timestamps).
353
+
354
+ <steps>
355
+ ${stepsBlock}
356
+ </steps>
357
+
358
+ Format:
359
+ - stepNumber: which step revealed these elements
360
+ - discoveries: array of brief descriptions, e.g. ["A new button appeared: Publish To Twitter", "A new input field appeared: Description"]
361
+
362
+ Only return actionable elements that could lead to new test scenarios.
363
+ `;
364
+ }
365
+
366
+ private extractVisitedUrls(toolExecutions: ToolExecution[], initialUrl: string): string[] {
367
+ const urls = new Set<string>();
368
+ const initialPath = extractStatePath(initialUrl);
369
+
370
+ for (const exec of toolExecutions) {
371
+ const currentUrl = exec.output?.pageDiff?.currentUrl;
372
+ if (!currentUrl) continue;
373
+
374
+ const relativePath = extractStatePath(currentUrl);
375
+ if (relativePath && relativePath !== initialPath) {
376
+ urls.add(relativePath);
377
+ }
378
+ }
379
+
380
+ return [...urls];
381
+ }
382
+ };
383
+ }
@@ -0,0 +1,4 @@
1
+ import { createDebug } from '../../utils/logger.js';
2
+
3
+ export type Constructor<T = object> = new (...args: any[]) => T;
4
+ export const debugLog = createDebug('explorbot:historian');
@@ -0,0 +1,169 @@
1
+ import { mkdirSync, writeFileSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { ActionResult } from '../../action-result.ts';
4
+ import { ConfigParser } from '../../config.ts';
5
+ import { KnowledgeTracker } from '../../knowledge-tracker.ts';
6
+ import { type PlaywrightRecorder, type TraceCall, renderAssertion, renderCall } from '../../playwright-recorder.ts';
7
+ import type { Plan } from '../../test-plan.ts';
8
+ import { tag } from '../../utils/logger.ts';
9
+ import { relativeToCwd } from '../../utils/next-steps.ts';
10
+ import type { Conversation } from '../conversation.ts';
11
+ import { ASSERTION_TOOLS, CODECEPT_TOOLS } from '../tools.ts';
12
+ import type { Constructor } from './mixin.ts';
13
+ import { escapeString, getExecutionLabel } from './utils.ts';
14
+
15
+ const PLAYWRIGHT_EMITTED_TOOLS = [...CODECEPT_TOOLS, ...ASSERTION_TOOLS] as const;
16
+
17
+ export interface PlaywrightMethods {
18
+ toPlaywrightCode(conversation: Conversation, scenario: string): Promise<string>;
19
+ savePlaywrightPlanToFile(plan: Plan): string;
20
+ }
21
+
22
+ export function WithPlaywright<T extends Constructor>(Base: T) {
23
+ return class extends Base {
24
+ declare recorder: PlaywrightRecorder | undefined;
25
+ declare savedFiles: Set<string>;
26
+
27
+ async toPlaywrightCode(conversation: Conversation, scenario: string): Promise<string> {
28
+ const toolExecutions = conversation.getToolExecutions();
29
+ const successfulSteps = toolExecutions.filter((exec) => exec.wasSuccessful && PLAYWRIGHT_EMITTED_TOOLS.includes(exec.toolName as any));
30
+
31
+ const callsByGroup = this.recorder ? await this.recorder.exportChunk() : new Map<string, TraceCall[]>();
32
+
33
+ const stepLines: string[] = [];
34
+ for (const exec of successfulSteps) {
35
+ const explanation = getExecutionLabel(exec);
36
+ const execLines: string[] = [];
37
+ const groupId: string | undefined = exec.output?.playwrightGroupId;
38
+ const calls = groupId ? callsByGroup.get(groupId) || [] : [];
39
+ for (const call of calls) {
40
+ execLines.push(renderCall(call));
41
+ }
42
+ const assertions: Array<{ name: string; args: any[] }> = exec.output?.assertionSteps || [];
43
+ for (const assertion of assertions) {
44
+ const line = renderAssertion(assertion);
45
+ if (line) execLines.push(line);
46
+ }
47
+ if (execLines.length === 0) continue;
48
+ stepLines.push('');
49
+ if (explanation) {
50
+ stepLines.push(` await test.step('${escapeString(explanation)}', async () => {`);
51
+ for (const line of execLines) {
52
+ stepLines.push(` ${line}`);
53
+ }
54
+ stepLines.push(' });');
55
+ } else {
56
+ for (const line of execLines) {
57
+ stepLines.push(` ${line}`);
58
+ }
59
+ }
60
+ }
61
+
62
+ const pilotVerifications = this.recorder ? this.recorder.drainVerifications() : [];
63
+ if (pilotVerifications.length > 0) {
64
+ const assertionLines: string[] = [];
65
+ for (const step of pilotVerifications) {
66
+ const line = renderAssertion(step);
67
+ if (line) assertionLines.push(line);
68
+ }
69
+ if (assertionLines.length > 0) {
70
+ stepLines.push('');
71
+ stepLines.push(` await test.step('Verification', async () => {`);
72
+ for (const line of assertionLines) {
73
+ stepLines.push(` ${line}`);
74
+ }
75
+ stepLines.push(' });');
76
+ }
77
+ }
78
+
79
+ if (stepLines.length === 0) {
80
+ return '';
81
+ }
82
+
83
+ const lines: string[] = [];
84
+ lines.push(`test('${escapeString(scenario)}', async ({ page }) => {`);
85
+ lines.push(...stepLines);
86
+ lines.push('});');
87
+ return lines.join('\n');
88
+ }
89
+
90
+ savePlaywrightPlanToFile(plan: Plan): string {
91
+ const lines: string[] = [];
92
+
93
+ lines.push(`import { test, expect } from '@playwright/test';`);
94
+ lines.push('');
95
+ lines.push(`test.describe('${escapeString(plan.title)}', () => {`);
96
+
97
+ const startUrl = plan.url || plan.tests[0]?.startUrl;
98
+ if (startUrl) {
99
+ lines.push(' test.beforeEach(async ({ page }) => {');
100
+ lines.push(` await page.goto('${escapeString(startUrl)}');`);
101
+ for (const line of this.getPlaywrightKnowledgeLines(startUrl, ' ')) {
102
+ lines.push(line);
103
+ }
104
+ lines.push(' });');
105
+ lines.push('');
106
+ }
107
+
108
+ for (const test of plan.tests) {
109
+ if (test.generatedCode) {
110
+ const indented = indentBlock(test.generatedCode, ' ');
111
+ if (test.isSuccessful) {
112
+ lines.push(indented);
113
+ } else {
114
+ lines.push(` // FAILED: ${escapeString(test.scenario)}`);
115
+ lines.push(indented.replace(/test\(/, 'test.skip('));
116
+ }
117
+ lines.push('');
118
+ continue;
119
+ }
120
+
121
+ lines.push(` test.fixme('${escapeString(test.scenario)}', async ({ page }) => {`);
122
+ if (test.plannedSteps.length > 0) {
123
+ for (const step of test.plannedSteps) {
124
+ lines.push(` // ${step}`);
125
+ }
126
+ } else {
127
+ lines.push(` // ${test.scenario}`);
128
+ }
129
+ lines.push(' });');
130
+ lines.push('');
131
+ }
132
+
133
+ lines.push('});');
134
+
135
+ const testsDir = ConfigParser.getInstance().getTestsDir();
136
+ mkdirSync(testsDir, { recursive: true });
137
+
138
+ const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
139
+ const filePath = join(testsDir, `${filename}.spec.ts`);
140
+ writeFileSync(filePath, lines.join('\n'));
141
+ this.savedFiles.add(filePath);
142
+
143
+ tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
144
+ return filePath;
145
+ }
146
+
147
+ private getPlaywrightKnowledgeLines(url: string, indent = ' '): string[] {
148
+ const knowledgeTracker = new KnowledgeTracker();
149
+ const state = new ActionResult({ url });
150
+ const { wait, waitForElement } = knowledgeTracker.getStateParameters(state, ['wait', 'waitForElement']);
151
+
152
+ const lines: string[] = [];
153
+ if (wait !== undefined) {
154
+ lines.push(`${indent}await page.waitForTimeout(${Number(wait) * 1000});`);
155
+ }
156
+ if (waitForElement) {
157
+ lines.push(`${indent}await page.locator(${JSON.stringify(waitForElement)}).waitFor();`);
158
+ }
159
+ return lines;
160
+ }
161
+ };
162
+ }
163
+
164
+ function indentBlock(block: string, indent: string): string {
165
+ return block
166
+ .split('\n')
167
+ .map((line) => (line ? indent + line : line))
168
+ .join('\n');
169
+ }
@@ -0,0 +1,23 @@
1
+ import type { ToolExecution } from '../conversation.ts';
2
+
3
+ export function isNonReusableCode(code: string): boolean {
4
+ return /\bI\.clickXY\s*\(/.test(code);
5
+ }
6
+
7
+ export function escapeString(str: string): string {
8
+ return str.replace(/'/g, "\\'").replace(/\n/g, ' ');
9
+ }
10
+
11
+ export function stripComments(code: string): string {
12
+ return code
13
+ .split('\n')
14
+ .filter((line) => {
15
+ const trimmed = line.trim();
16
+ return trimmed && !trimmed.startsWith('//') && !trimmed.startsWith('/*') && !trimmed.startsWith('*');
17
+ })
18
+ .join('\n');
19
+ }
20
+
21
+ export function getExecutionLabel(exec: ToolExecution, fallback?: string): string {
22
+ return exec.input?.explanation || exec.input?.assertion || exec.input?.note || fallback || '';
23
+ }