explorbot 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +37 -1
  2. package/bin/explorbot-cli.ts +27 -18
  3. package/dist/bin/explorbot-cli.js +26 -18
  4. package/dist/package.json +3 -3
  5. package/dist/rules/navigator/output.md +9 -0
  6. package/dist/rules/navigator/verification-actions.md +2 -0
  7. package/dist/src/action-result.js +23 -1
  8. package/dist/src/action.js +51 -42
  9. package/dist/src/ai/bosun.js +11 -1
  10. package/dist/src/ai/conversation.js +39 -0
  11. package/dist/src/ai/historian/codeceptjs.js +109 -0
  12. package/dist/src/ai/historian/experience.js +321 -0
  13. package/dist/src/ai/historian/mixin.js +2 -0
  14. package/dist/src/ai/historian/playwright.js +145 -0
  15. package/dist/src/ai/historian/screencast.js +121 -0
  16. package/dist/src/ai/historian/utils.js +18 -0
  17. package/dist/src/ai/historian.js +21 -405
  18. package/dist/src/ai/navigator.js +82 -29
  19. package/dist/src/ai/pilot.js +232 -13
  20. package/dist/src/ai/planner.js +29 -9
  21. package/dist/src/ai/provider.js +54 -17
  22. package/dist/src/ai/researcher.js +41 -32
  23. package/dist/src/ai/rules.js +26 -14
  24. package/dist/src/ai/tester.js +90 -26
  25. package/dist/src/ai/tools.js +13 -7
  26. package/dist/src/browser-server.js +16 -3
  27. package/dist/src/commands/add-rule-command.js +11 -8
  28. package/dist/src/commands/clean-command.js +2 -1
  29. package/dist/src/commands/explore-command.js +43 -15
  30. package/dist/src/commands/init-command.js +9 -8
  31. package/dist/src/commands/plan-command.js +32 -0
  32. package/dist/src/commands/plan-save-command.js +19 -7
  33. package/dist/src/commands/rerun-command.js +4 -0
  34. package/dist/src/components/App.js +15 -5
  35. package/dist/src/execution-controller.js +13 -2
  36. package/dist/src/experience-tracker.js +20 -64
  37. package/dist/src/explorbot.js +8 -8
  38. package/dist/src/explorer.js +11 -3
  39. package/dist/src/observability.js +50 -99
  40. package/dist/src/playwright-recorder.js +309 -0
  41. package/dist/src/reporter.js +4 -1
  42. package/dist/src/test-plan.js +12 -0
  43. package/dist/src/utils/aria.js +37 -1
  44. package/dist/src/utils/error-page.js +20 -7
  45. package/dist/src/utils/next-steps.js +37 -0
  46. package/dist/src/utils/strings.js +15 -0
  47. package/package.json +3 -3
  48. package/rules/navigator/output.md +9 -0
  49. package/rules/navigator/verification-actions.md +2 -0
  50. package/src/action-result.ts +26 -1
  51. package/src/action.ts +49 -41
  52. package/src/ai/bosun.ts +11 -1
  53. package/src/ai/conversation.ts +37 -0
  54. package/src/ai/historian/codeceptjs.ts +130 -0
  55. package/src/ai/historian/experience.ts +384 -0
  56. package/src/ai/historian/mixin.ts +4 -0
  57. package/src/ai/historian/playwright.ts +169 -0
  58. package/src/ai/historian/screencast.ts +133 -0
  59. package/src/ai/historian/utils.ts +23 -0
  60. package/src/ai/historian.ts +37 -473
  61. package/src/ai/navigator.ts +82 -29
  62. package/src/ai/pilot.ts +237 -14
  63. package/src/ai/planner.ts +29 -9
  64. package/src/ai/provider.ts +51 -17
  65. package/src/ai/researcher.ts +45 -33
  66. package/src/ai/rules.ts +27 -14
  67. package/src/ai/tester.ts +94 -26
  68. package/src/ai/tools.ts +47 -25
  69. package/src/browser-server.ts +17 -3
  70. package/src/commands/add-rule-command.ts +11 -7
  71. package/src/commands/clean-command.ts +2 -1
  72. package/src/commands/explore-command.ts +46 -14
  73. package/src/commands/init-command.ts +9 -8
  74. package/src/commands/plan-command.ts +35 -0
  75. package/src/commands/plan-save-command.ts +18 -7
  76. package/src/commands/rerun-command.ts +5 -0
  77. package/src/components/App.tsx +16 -5
  78. package/src/config.ts +12 -1
  79. package/src/execution-controller.ts +14 -3
  80. package/src/experience-tracker.ts +21 -72
  81. package/src/explorbot.ts +8 -8
  82. package/src/explorer.ts +13 -3
  83. package/src/observability.ts +50 -109
  84. package/src/playwright-recorder.ts +305 -0
  85. package/src/reporter.ts +4 -1
  86. package/src/test-plan.ts +12 -0
  87. package/src/utils/aria.ts +38 -1
  88. package/src/utils/error-page.ts +22 -7
  89. package/src/utils/next-steps.ts +51 -0
  90. package/src/utils/strings.ts +17 -0
@@ -0,0 +1,384 @@
1
+ import dedent from 'dedent';
2
+ import { z } from 'zod';
3
+ import { ActionResult } from '../../action-result.ts';
4
+ import { ExperienceTracker, type SessionStep } from '../../experience-tracker.ts';
5
+ import type { Reporter, ReporterStep } from '../../reporter.ts';
6
+ import type { StateManager } from '../../state-manager.ts';
7
+ import { type Task, Test } from '../../test-plan.ts';
8
+ import { tag } from '../../utils/logger.ts';
9
+ import { extractStatePath } from '../../utils/url-matcher.ts';
10
+ import type { Conversation, ToolExecution } from '../conversation.ts';
11
+ import type { Provider } from '../provider.ts';
12
+ import { CODECEPT_TOOLS } from '../tools.ts';
13
+ import { type Constructor, debugLog } from './mixin.ts';
14
+ import { getExecutionLabel, isNonReusableCode, stripComments } from './utils.ts';
15
+
16
+ export interface ExperienceMethods {
17
+ saveSession(task: Task, initialState: ActionResult, conversation: Conversation): Promise<void>;
18
+ }
19
+
20
+ export function WithExperience<T extends Constructor>(Base: T) {
21
+ return class extends Base {
22
+ declare provider: Provider;
23
+ declare experienceTracker: ExperienceTracker;
24
+ declare reporter: Reporter | undefined;
25
+ declare stateManager: StateManager | undefined;
26
+ declare isPlaywrightFramework: () => boolean;
27
+ declare toCode: (conversation: Conversation, scenario: string) => string;
28
+ declare toPlaywrightCode: (conversation: Conversation, scenario: string) => Promise<string>;
29
+ declare stopScreencast: () => Promise<void>;
30
+
31
+ async saveSession(task: Task, initialState: ActionResult, conversation: Conversation): Promise<void> {
32
+ debugLog('Saving session experience');
33
+
34
+ const result = task.getRunResult();
35
+ const toolExecutions = conversation.getToolExecutions();
36
+
37
+ if (task instanceof Test) {
38
+ task.generatedCode = this.isPlaywrightFramework() ? await this.toPlaywrightCode(conversation, task.description) : this.toCode(conversation, task.description);
39
+ }
40
+
41
+ const steps = await this.extractSteps(toolExecutions);
42
+
43
+ const skipExperience = result === 'failed' || (task instanceof Test && (task.hasFailed || task.isSkipped));
44
+ if (!skipExperience) {
45
+ await this.detectRetryPatterns(toolExecutions, initialState);
46
+ const body = await this.curateFlow(steps, task, initialState);
47
+ if (body.trim()) {
48
+ const relatedUrls = this.extractVisitedUrls(toolExecutions, initialState.url || '');
49
+ this.experienceTracker.writeFlow(initialState, body, relatedUrls);
50
+ }
51
+ }
52
+
53
+ if (task instanceof Test && result !== 'failed') {
54
+ await this.reportSession(task, steps);
55
+ }
56
+
57
+ await this.stopScreencast();
58
+
59
+ tag('substep').log(`Historian saved session for: ${task.description}`);
60
+ }
61
+
62
+ private async reportSession(test: Test, steps: SessionStep[]): Promise<void> {
63
+ if (!this.reporter) return;
64
+
65
+ const reporterSteps: ReporterStep[] = steps.map((step) => ({
66
+ title: step.message,
67
+ status: step.status === 'passed' ? 'passed' : 'failed',
68
+ code: step.code ? step.code.split('\n').filter((l) => l.trim()) : [],
69
+ discovery: step.discovery,
70
+ }));
71
+
72
+ await this.reporter.reportSteps(test, reporterSteps);
73
+ }
74
+
75
+ private async extractSteps(toolExecutions: ToolExecution[]): Promise<SessionStep[]> {
76
+ const stepsWithDiffs: Array<{ step: SessionStep; ariaDiff: string | null }> = [];
77
+
78
+ for (const exec of toolExecutions) {
79
+ if (!CODECEPT_TOOLS.includes(exec.toolName as any)) continue;
80
+ if (!exec.output?.code) continue;
81
+ if (!exec.wasSuccessful) continue;
82
+ if (isNonReusableCode(exec.output.code)) continue;
83
+
84
+ const step: SessionStep = {
85
+ message: getExecutionLabel(exec, `Executed ${exec.toolName}`),
86
+ status: 'passed',
87
+ tool: exec.toolName,
88
+ code: stripComments(exec.output.code),
89
+ };
90
+
91
+ stepsWithDiffs.push({ step, ariaDiff: exec.output?.pageDiff?.ariaChanges || null });
92
+ }
93
+
94
+ await this.analyzeDiscoveries(stepsWithDiffs);
95
+
96
+ return stepsWithDiffs.map((s) => s.step);
97
+ }
98
+
99
+ private async curateFlow(steps: SessionStep[], task: Task, initialState: ActionResult): Promise<string> {
100
+ if (steps.length === 0) return '';
101
+
102
+ const existingExperience = this.experienceTracker
103
+ .getRelevantExperience(initialState)
104
+ .map((e) => e.content)
105
+ .filter(Boolean)
106
+ .join('\n');
107
+ const existingSummary = existingExperience.length > 2000 ? existingExperience.substring(0, 2000) : existingExperience;
108
+
109
+ const stepsBlock = steps
110
+ .map((s, i) => {
111
+ const lines = [`Step ${i + 1}: ${s.message}`];
112
+ if (s.code) {
113
+ lines.push('```js');
114
+ lines.push(s.code);
115
+ lines.push('```');
116
+ }
117
+ if (s.discovery) {
118
+ for (const d of s.discovery.split('\n').filter((line) => line.trim())) {
119
+ lines.push(`> ${d.trim()}`);
120
+ }
121
+ }
122
+ return lines.join('\n');
123
+ })
124
+ .join('\n\n');
125
+
126
+ const expected = task instanceof Test && task.expected.length > 0 ? task.expected.map((e) => `- ${e}`).join('\n') : '';
127
+ const notes = task.notesToString();
128
+
129
+ const prompt = dedent`
130
+ You are curating a how-to recipe from a recorded test run. Decide whether the run produced
131
+ anything reusable, and if so, output a single \`## FLOW: ...\` markdown block. Otherwise output
132
+ an empty response (no text at all).
133
+
134
+ <original_scenario>
135
+ ${task.description}
136
+ </original_scenario>
137
+
138
+ ${expected ? `<expected_outcomes>\n${expected}\n</expected_outcomes>` : ''}
139
+
140
+ ${notes ? `<run_notes>\n${notes}\n</run_notes>` : ''}
141
+
142
+ <recorded_steps>
143
+ ${stepsBlock}
144
+ </recorded_steps>
145
+
146
+ ${existingSummary ? `<existing_experience_for_this_page>\n${existingSummary}\n</existing_experience_for_this_page>` : ''}
147
+
148
+ Output a FLOW block in EXACTLY this format:
149
+
150
+ ## FLOW: <imperative how-to that matches what the steps actually accomplished>
151
+
152
+ * <action description>
153
+
154
+ \`\`\`js
155
+ <code from input>
156
+ \`\`\`
157
+
158
+ > <relevant element or observation worth remembering>
159
+
160
+ * <next action>
161
+
162
+ \`\`\`js
163
+ <code from input>
164
+ \`\`\`
165
+
166
+ ---
167
+
168
+ Rules:
169
+ - Title is an imperative phrase answering "how do I X". It must describe what the steps
170
+ ACTUALLY accomplished, not the original scenario if the run drifted off course.
171
+ - Drop steps that wandered onto unrelated pages or did not contribute to a reusable recipe.
172
+ - Drop discoveries that are noise (loading states, timestamps, repeated buttons).
173
+ - Code blocks may only contain code that appears verbatim in <recorded_steps>. Do not invent
174
+ CodeceptJS calls.
175
+ - Lowercase the first letter of the title. No trailing punctuation.
176
+
177
+ Return an EMPTY response (no markdown, no explanation) if any of:
178
+ - The original scenario is a negative test (verifying an error, validation rejection, blocked
179
+ or forbidden action, "should fail" expectation).
180
+ - The surviving steps do not accomplish anything reusable.
181
+ - The recipe duplicates a recipe already present in <existing_experience_for_this_page>.
182
+ `;
183
+
184
+ try {
185
+ const response = await this.provider.chat(
186
+ [
187
+ { role: 'system', content: 'Curate reusable how-to recipes from recorded test runs. Be selective — only emit a FLOW when the steps demonstrate a coherent, reusable, positive recipe. Otherwise return nothing.' },
188
+ { role: 'user', content: prompt },
189
+ ],
190
+ this.provider.getModelForAgent('historian'),
191
+ { agentName: 'historian', telemetryFunctionId: 'historian.curateFlow' }
192
+ );
193
+
194
+ const body = (response?.text || '').trim();
195
+ if (!body) {
196
+ debugLog('curateFlow returned empty — skipping flow write');
197
+ return '';
198
+ }
199
+ if (!body.includes('## FLOW:')) {
200
+ debugLog('curateFlow output missing ## FLOW: heading — skipping');
201
+ return '';
202
+ }
203
+ return `${body}\n`;
204
+ } catch (error: any) {
205
+ debugLog('curateFlow failed, skipping flow write: %s', error.message);
206
+ return '';
207
+ }
208
+ }
209
+
210
+ private async detectRetryPatterns(toolExecutions: ToolExecution[], initialState: ActionResult): Promise<void> {
211
+ if (!this.experienceTracker || !this.stateManager) return;
212
+
213
+ const failedByTool = new Map<string, ToolExecution[]>();
214
+ const candidates: Array<{ failed: ToolExecution[]; success: ToolExecution }> = [];
215
+
216
+ for (const exec of toolExecutions) {
217
+ if (!CODECEPT_TOOLS.includes(exec.toolName as any)) continue;
218
+ if (!exec.output?.code) continue;
219
+
220
+ if (!exec.wasSuccessful) {
221
+ const bucket = failedByTool.get(exec.toolName) || [];
222
+ bucket.push(exec);
223
+ failedByTool.set(exec.toolName, bucket);
224
+ continue;
225
+ }
226
+
227
+ const failed = failedByTool.get(exec.toolName);
228
+ if (failed?.length) {
229
+ candidates.push({ failed: [...failed], success: exec });
230
+ failedByTool.set(exec.toolName, []);
231
+ }
232
+ }
233
+
234
+ if (candidates.length === 0) return;
235
+
236
+ const prompt = dedent`
237
+ Analyze these retry patterns where a tool failed multiple times before succeeding.
238
+ For each candidate, determine which failed attempts were trying to do the same thing as the success.
239
+
240
+ ${candidates
241
+ .map(
242
+ (c, i) => dedent`
243
+ Candidate ${i}:
244
+ Failed attempts:
245
+ ${c.failed.map((f, j) => ` ${j}: ${getExecutionLabel(f, f.toolName)} → code: ${f.output?.code}`).join('\n')}
246
+ Succeeded:
247
+ ${getExecutionLabel(c.success, c.success.toolName)} → code: ${c.success.output.code}
248
+ `
249
+ )
250
+ .join('\n\n')}
251
+
252
+ For each candidate where failures share the same intent as the success:
253
+ - candidateIndex: index of the candidate
254
+ - failedIndices: which failed attempts share the same intent
255
+ - intent: business-focused description of what was being done
256
+ - explanation: actionable tip explaining which element works and what to avoid
257
+ `;
258
+
259
+ const schema = z.object({
260
+ retryPatterns: z.array(
261
+ z.object({
262
+ candidateIndex: z.number(),
263
+ failedIndices: z.array(z.number()),
264
+ intent: z.string(),
265
+ explanation: z.string(),
266
+ })
267
+ ),
268
+ });
269
+
270
+ try {
271
+ const response = await this.provider.generateObject(
272
+ [
273
+ { role: 'system', content: 'Analyze retry patterns in web testing tool executions. Identify when failed attempts share the same intent as a successful one.' },
274
+ { role: 'user', content: prompt },
275
+ ],
276
+ schema
277
+ );
278
+
279
+ for (const pattern of response?.object?.retryPatterns || []) {
280
+ const candidate = candidates[pattern.candidateIndex];
281
+ if (!candidate) continue;
282
+
283
+ const url = candidate.success.output?.pageDiff?.currentUrl;
284
+ let state: ActionResult = initialState;
285
+
286
+ if (url && url !== initialState.url) {
287
+ const transition = this.stateManager.getLastVisitToPath(url);
288
+ if (transition) {
289
+ state = ActionResult.fromState(transition.toState);
290
+ }
291
+ }
292
+
293
+ if (isNonReusableCode(candidate.success.output.code)) continue;
294
+ this.experienceTracker.writeAction(state, { title: pattern.intent, code: candidate.success.output.code, explanation: pattern.explanation });
295
+ }
296
+
297
+ debugLog('Detected %d retry patterns', response?.object?.retryPatterns?.length || 0);
298
+ } catch (error: any) {
299
+ debugLog('Failed to detect retry patterns: %s', error.message);
300
+ }
301
+ }
302
+
303
+ private async analyzeDiscoveries(stepsWithDiffs: Array<{ step: SessionStep; ariaDiff: string | null }>): Promise<void> {
304
+ if (!stepsWithDiffs.some((s) => s.ariaDiff)) return;
305
+
306
+ const prompt = this.buildDiscoveryPrompt(stepsWithDiffs);
307
+
308
+ const schema = z.object({
309
+ discoveries: z.array(
310
+ z.object({
311
+ stepNumber: z.number(),
312
+ discoveries: z.array(z.string()),
313
+ })
314
+ ),
315
+ });
316
+
317
+ try {
318
+ const response = await this.provider.generateObject(
319
+ [
320
+ { role: 'system', content: 'Analyze test execution steps and identify valuable UI discoveries. Return multiple discoveries per step when multiple new elements appear. Return no discoveries for steps with no meaningful changes.' },
321
+ { role: 'user', content: prompt },
322
+ ],
323
+ schema
324
+ );
325
+
326
+ for (const { stepNumber, discoveries } of response?.object?.discoveries || []) {
327
+ const stepIndex = stepNumber - 1;
328
+ if (!stepsWithDiffs[stepIndex]) continue;
329
+ if (discoveries.length === 0) continue;
330
+ stepsWithDiffs[stepIndex].step.discovery = discoveries.join('\n');
331
+ }
332
+ } catch (error: any) {
333
+ debugLog('Failed to analyze discoveries: %s', error.message);
334
+ }
335
+ }
336
+
337
+ private buildDiscoveryPrompt(stepsWithDiffs: Array<{ step: SessionStep; ariaDiff: string | null }>): string {
338
+ const stepsBlock = stepsWithDiffs
339
+ .map(({ step, ariaDiff }, i) => {
340
+ const lines = [`Step ${i + 1}: ${step.message}`];
341
+ if (ariaDiff) lines.push(ariaDiff);
342
+ return lines.join('\n');
343
+ })
344
+ .join('\n\n');
345
+
346
+ return dedent`
347
+ Review these test steps and their ARIA diffs. Identify new UI elements that appeared
348
+ which could be valuable for deeper testing of this feature or related features that can
349
+ be triggered from this flow.
350
+
351
+ Return MULTIPLE discoveries per step when multiple new elements appear (buttons, inputs,
352
+ links, errors, warnings — list them all). Return an empty array for a step with no new
353
+ elements or only generic changes (loading spinners, timestamps).
354
+
355
+ <steps>
356
+ ${stepsBlock}
357
+ </steps>
358
+
359
+ Format:
360
+ - stepNumber: which step revealed these elements
361
+ - discoveries: array of brief descriptions, e.g. ["A new button appeared: Publish To Twitter", "A new input field appeared: Description"]
362
+
363
+ Only return actionable elements that could lead to new test scenarios.
364
+ `;
365
+ }
366
+
367
+ private extractVisitedUrls(toolExecutions: ToolExecution[], initialUrl: string): string[] {
368
+ const urls = new Set<string>();
369
+ const initialPath = extractStatePath(initialUrl);
370
+
371
+ for (const exec of toolExecutions) {
372
+ const currentUrl = exec.output?.pageDiff?.currentUrl;
373
+ if (!currentUrl) continue;
374
+
375
+ const relativePath = extractStatePath(currentUrl);
376
+ if (relativePath && relativePath !== initialPath) {
377
+ urls.add(relativePath);
378
+ }
379
+ }
380
+
381
+ return [...urls];
382
+ }
383
+ };
384
+ }
@@ -0,0 +1,4 @@
1
+ import { createDebug } from '../../utils/logger.js';
2
+
3
+ export type Constructor<T = object> = new (...args: any[]) => T;
4
+ export const debugLog = createDebug('explorbot:historian');
@@ -0,0 +1,169 @@
1
+ import { mkdirSync, writeFileSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { ActionResult } from '../../action-result.ts';
4
+ import { ConfigParser } from '../../config.ts';
5
+ import { KnowledgeTracker } from '../../knowledge-tracker.ts';
6
+ import { type PlaywrightRecorder, type TraceCall, renderAssertion, renderCall } from '../../playwright-recorder.ts';
7
+ import type { Plan } from '../../test-plan.ts';
8
+ import { tag } from '../../utils/logger.ts';
9
+ import { relativeToCwd } from '../../utils/next-steps.ts';
10
+ import { safeFilename } from '../../utils/strings.ts';
11
+ import type { Conversation } from '../conversation.ts';
12
+ import { ASSERTION_TOOLS, CODECEPT_TOOLS } from '../tools.ts';
13
+ import type { Constructor } from './mixin.ts';
14
+ import { escapeString, getExecutionLabel } from './utils.ts';
15
+
16
+ const PLAYWRIGHT_EMITTED_TOOLS = [...CODECEPT_TOOLS, ...ASSERTION_TOOLS] as const;
17
+
18
+ export interface PlaywrightMethods {
19
+ toPlaywrightCode(conversation: Conversation, scenario: string): Promise<string>;
20
+ savePlaywrightPlanToFile(plan: Plan): string;
21
+ }
22
+
23
+ export function WithPlaywright<T extends Constructor>(Base: T) {
24
+ return class extends Base {
25
+ declare playwright: { recorder: PlaywrightRecorder; helper: any } | undefined;
26
+ declare savedFiles: Set<string>;
27
+
28
+ async toPlaywrightCode(conversation: Conversation, scenario: string): Promise<string> {
29
+ const toolExecutions = conversation.getToolExecutions();
30
+ const successfulSteps = toolExecutions.filter((exec) => exec.wasSuccessful && PLAYWRIGHT_EMITTED_TOOLS.includes(exec.toolName as any));
31
+
32
+ const callsByGroup = this.playwright?.recorder ? await this.playwright.recorder.exportChunk() : new Map<string, TraceCall[]>();
33
+
34
+ const stepLines: string[] = [];
35
+ for (const exec of successfulSteps) {
36
+ const explanation = getExecutionLabel(exec);
37
+ const execLines: string[] = [];
38
+ const groupId: string | undefined = exec.output?.playwrightGroupId;
39
+ const calls = groupId ? callsByGroup.get(groupId) || [] : [];
40
+ for (const call of calls) {
41
+ execLines.push(renderCall(call));
42
+ }
43
+ const assertions: Array<{ name: string; args: any[] }> = exec.output?.assertionSteps || [];
44
+ for (const assertion of assertions) {
45
+ const line = renderAssertion(assertion);
46
+ if (line) execLines.push(line);
47
+ }
48
+ if (execLines.length === 0) continue;
49
+ stepLines.push('');
50
+ if (explanation) {
51
+ stepLines.push(` await test.step('${escapeString(explanation)}', async () => {`);
52
+ for (const line of execLines) {
53
+ stepLines.push(` ${line}`);
54
+ }
55
+ stepLines.push(' });');
56
+ } else {
57
+ for (const line of execLines) {
58
+ stepLines.push(` ${line}`);
59
+ }
60
+ }
61
+ }
62
+
63
+ const pilotVerifications = this.playwright?.recorder ? this.playwright.recorder.drainVerifications() : [];
64
+ if (pilotVerifications.length > 0) {
65
+ const assertionLines: string[] = [];
66
+ for (const step of pilotVerifications) {
67
+ const line = renderAssertion(step);
68
+ if (line) assertionLines.push(line);
69
+ }
70
+ if (assertionLines.length > 0) {
71
+ stepLines.push('');
72
+ stepLines.push(` await test.step('Verification', async () => {`);
73
+ for (const line of assertionLines) {
74
+ stepLines.push(` ${line}`);
75
+ }
76
+ stepLines.push(' });');
77
+ }
78
+ }
79
+
80
+ if (stepLines.length === 0) {
81
+ return '';
82
+ }
83
+
84
+ const lines: string[] = [];
85
+ lines.push(`test('${escapeString(scenario)}', async ({ page }) => {`);
86
+ lines.push(...stepLines);
87
+ lines.push('});');
88
+ return lines.join('\n');
89
+ }
90
+
91
+ savePlaywrightPlanToFile(plan: Plan): string {
92
+ const lines: string[] = [];
93
+
94
+ lines.push(`import { test, expect } from '@playwright/test';`);
95
+ lines.push('');
96
+ lines.push(`test.describe('${escapeString(plan.title)}', () => {`);
97
+
98
+ const startUrl = plan.url || plan.tests[0]?.startUrl;
99
+ if (startUrl) {
100
+ lines.push(' test.beforeEach(async ({ page }) => {');
101
+ lines.push(` await page.goto('${escapeString(startUrl)}');`);
102
+ for (const line of this.getPlaywrightKnowledgeLines(startUrl, ' ')) {
103
+ lines.push(line);
104
+ }
105
+ lines.push(' });');
106
+ lines.push('');
107
+ }
108
+
109
+ for (const test of plan.tests) {
110
+ if (test.generatedCode) {
111
+ const indented = indentBlock(test.generatedCode, ' ');
112
+ if (test.isSuccessful) {
113
+ lines.push(indented);
114
+ } else {
115
+ lines.push(` // FAILED: ${escapeString(test.scenario)}`);
116
+ lines.push(indented.replace(/test\(/, 'test.skip('));
117
+ }
118
+ lines.push('');
119
+ continue;
120
+ }
121
+
122
+ lines.push(` test.fixme('${escapeString(test.scenario)}', async ({ page }) => {`);
123
+ if (test.plannedSteps.length > 0) {
124
+ for (const step of test.plannedSteps) {
125
+ lines.push(` // ${step}`);
126
+ }
127
+ } else {
128
+ lines.push(` // ${test.scenario}`);
129
+ }
130
+ lines.push(' });');
131
+ lines.push('');
132
+ }
133
+
134
+ lines.push('});');
135
+
136
+ const testsDir = ConfigParser.getInstance().getTestsDir();
137
+ mkdirSync(testsDir, { recursive: true });
138
+
139
+ const filePath = join(testsDir, safeFilename(plan.title, '.spec.ts'));
140
+ writeFileSync(filePath, lines.join('\n'));
141
+ this.savedFiles.add(filePath);
142
+
143
+ tag('substep').log(`Saved plan tests to: ${relativeToCwd(filePath)}`);
144
+ return filePath;
145
+ }
146
+
147
+ private getPlaywrightKnowledgeLines(url: string, indent = ' '): string[] {
148
+ const knowledgeTracker = new KnowledgeTracker();
149
+ const state = new ActionResult({ url });
150
+ const { wait, waitForElement } = knowledgeTracker.getStateParameters(state, ['wait', 'waitForElement']);
151
+
152
+ const lines: string[] = [];
153
+ if (wait !== undefined) {
154
+ lines.push(`${indent}await page.waitForTimeout(${Number(wait) * 1000});`);
155
+ }
156
+ if (waitForElement) {
157
+ lines.push(`${indent}await page.locator(${JSON.stringify(waitForElement)}).waitFor();`);
158
+ }
159
+ return lines;
160
+ }
161
+ };
162
+ }
163
+
164
+ function indentBlock(block: string, indent: string): string {
165
+ return block
166
+ .split('\n')
167
+ .map((line) => (line ? indent + line : line))
168
+ .join('\n');
169
+ }
@@ -0,0 +1,133 @@
1
+ import { mkdirSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ // @ts-ignore
4
+ import * as codeceptjs from 'codeceptjs';
5
+ import { outputPath } from '../../config.ts';
6
+ import type { ExplorbotConfig } from '../../config.ts';
7
+ import type { PlaywrightRecorder } from '../../playwright-recorder.ts';
8
+ import { tag } from '../../utils/logger.ts';
9
+ import { relativeToCwd } from '../../utils/next-steps.ts';
10
+ import { safeFilename } from '../../utils/strings.ts';
11
+ import { type Constructor, debugLog } from './mixin.ts';
12
+
13
+ export interface ScreencastMethods {
14
+ attachScreencast(): void;
15
+ isScreencastActive(): boolean;
16
+ stopScreencast(): Promise<void>;
17
+ }
18
+
19
+ export function WithScreencast<T extends Constructor>(Base: T) {
20
+ return class extends Base {
21
+ declare config: ExplorbotConfig | undefined;
22
+ declare savedFiles: Set<string>;
23
+ declare playwright: { recorder: PlaywrightRecorder; helper: any } | undefined;
24
+
25
+ private screencastPage: any = null;
26
+ private screencastActive = false;
27
+ private screencastPath: string | null = null;
28
+ private screencastListenersInstalled = false;
29
+ private screencastTask: any = null;
30
+ private screencastLastChapter: string | null = null;
31
+ private onTestBefore?: (test: any) => void;
32
+ private onStepPassed?: (step: any) => void;
33
+ private onTestAfter?: () => void;
34
+
35
+ isScreencastActive(): boolean {
36
+ return this.screencastActive;
37
+ }
38
+
39
+ attachScreencast(): void {
40
+ if (this.screencastListenersInstalled) return;
41
+ if (!this.config?.ai?.agents?.historian?.screencast) return;
42
+ if (!this.playwright?.helper) return;
43
+
44
+ this.onTestBefore = (test: any) => {
45
+ void this.startScreencast(test);
46
+ };
47
+ this.onStepPassed = (step: any) => {
48
+ void this.emitChapter(step);
49
+ };
50
+ this.onTestAfter = () => {
51
+ void this.stopScreencast();
52
+ };
53
+
54
+ codeceptjs.event.dispatcher.on('test.before', this.onTestBefore);
55
+ codeceptjs.event.dispatcher.on('step.passed', this.onStepPassed);
56
+ codeceptjs.event.dispatcher.on('test.after', this.onTestAfter);
57
+
58
+ this.screencastListenersInstalled = true;
59
+ }
60
+
61
+ private async startScreencast(test: any): Promise<void> {
62
+ if (this.screencastActive) return;
63
+ const page = this.playwright?.helper?.page;
64
+ if (!page?.screencast?.start) return;
65
+
66
+ const task = test?._explorbotTest;
67
+ const scenarioName = task?.scenario || test?.title || 'scenario';
68
+ const planTitle: string | undefined = task?.plan?.title;
69
+ const planTests: any[] | undefined = task?.plan?.tests;
70
+ const index = planTests && task ? planTests.indexOf(task) + 1 : 0;
71
+
72
+ const parts: string[] = [];
73
+ if (planTitle) parts.push(safeFilename(planTitle));
74
+ if (index > 0) parts.push(String(index));
75
+ parts.push(safeFilename(scenarioName));
76
+
77
+ const dir = outputPath('screencasts');
78
+ mkdirSync(dir, { recursive: true });
79
+ const filePath = join(dir, `${parts.join('-')}.webm`);
80
+
81
+ const screencastConfig = this.config?.ai?.agents?.historian?.screencast;
82
+ const screencastOpts = typeof screencastConfig === 'object' ? screencastConfig : {};
83
+ const size = screencastOpts.size ?? page.viewportSize?.() ?? undefined;
84
+ const quality = screencastOpts.quality ?? 95;
85
+
86
+ try {
87
+ await page.screencast.start({ path: filePath, quality, size });
88
+ await page.screencast.showActions({ position: 'top-left' });
89
+ this.screencastPage = page;
90
+ this.screencastPath = filePath;
91
+ this.screencastActive = true;
92
+ this.screencastTask = test?._explorbotTest || null;
93
+ this.screencastLastChapter = null;
94
+ } catch (err) {
95
+ tag('substep').log(`Screencast start failed: ${(err as Error).message}`);
96
+ }
97
+ }
98
+
99
+ private async emitChapter(_step: any): Promise<void> {
100
+ if (!this.screencastActive) return;
101
+ const explanation = this.screencastTask?.activeNote?.getMessage?.();
102
+ if (!explanation) return;
103
+ if (explanation === this.screencastLastChapter) return;
104
+ this.screencastLastChapter = explanation;
105
+ try {
106
+ await this.screencastPage.screencast.showChapter(explanation);
107
+ } catch (err) {
108
+ debugLog('screencast.showChapter failed:', err);
109
+ }
110
+ }
111
+
112
+ async stopScreencast(): Promise<void> {
113
+ if (!this.screencastActive) return;
114
+ const path = this.screencastPath;
115
+ const task = this.screencastTask;
116
+ try {
117
+ await this.screencastPage.screencast.stop();
118
+ } catch (err) {
119
+ tag('substep').log(`Screencast stop failed: ${(err as Error).message}`);
120
+ }
121
+ this.screencastActive = false;
122
+ this.screencastPage = null;
123
+ this.screencastPath = null;
124
+ this.screencastTask = null;
125
+ this.screencastLastChapter = null;
126
+ if (path) {
127
+ this.savedFiles.add(path);
128
+ task?.addArtifact?.(path);
129
+ tag('substep').log(`Saved screencast: ${relativeToCwd(path)}`);
130
+ }
131
+ }
132
+ };
133
+ }