explorbot 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/README.md +27 -1
  2. package/bin/explorbot-cli.ts +27 -18
  3. package/dist/bin/explorbot-cli.js +26 -18
  4. package/dist/package.json +2 -2
  5. package/dist/rules/navigator/output.md +9 -0
  6. package/dist/rules/navigator/verification-actions.md +2 -0
  7. package/dist/src/action-result.js +23 -1
  8. package/dist/src/action.js +46 -38
  9. package/dist/src/ai/bosun.js +11 -1
  10. package/dist/src/ai/conversation.js +39 -0
  11. package/dist/src/ai/historian/codeceptjs.js +109 -0
  12. package/dist/src/ai/historian/experience.js +320 -0
  13. package/dist/src/ai/historian/mixin.js +2 -0
  14. package/dist/src/ai/historian/playwright.js +145 -0
  15. package/dist/src/ai/historian/utils.js +18 -0
  16. package/dist/src/ai/historian.js +19 -405
  17. package/dist/src/ai/navigator.js +82 -29
  18. package/dist/src/ai/pilot.js +232 -13
  19. package/dist/src/ai/planner.js +29 -9
  20. package/dist/src/ai/provider.js +54 -17
  21. package/dist/src/ai/researcher.js +41 -32
  22. package/dist/src/ai/rules.js +26 -14
  23. package/dist/src/ai/tester.js +90 -26
  24. package/dist/src/ai/tools.js +13 -7
  25. package/dist/src/browser-server.js +16 -3
  26. package/dist/src/commands/add-rule-command.js +11 -8
  27. package/dist/src/commands/clean-command.js +2 -1
  28. package/dist/src/commands/explore-command.js +27 -15
  29. package/dist/src/commands/init-command.js +9 -8
  30. package/dist/src/commands/plan-command.js +32 -0
  31. package/dist/src/commands/plan-save-command.js +19 -7
  32. package/dist/src/commands/rerun-command.js +4 -0
  33. package/dist/src/components/App.js +15 -5
  34. package/dist/src/execution-controller.js +13 -2
  35. package/dist/src/experience-tracker.js +20 -64
  36. package/dist/src/explorbot.js +5 -8
  37. package/dist/src/explorer.js +9 -2
  38. package/dist/src/observability.js +50 -99
  39. package/dist/src/playwright-recorder.js +309 -0
  40. package/dist/src/test-plan.js +12 -0
  41. package/dist/src/utils/aria.js +37 -1
  42. package/dist/src/utils/error-page.js +20 -7
  43. package/dist/src/utils/next-steps.js +37 -0
  44. package/package.json +2 -2
  45. package/rules/navigator/output.md +9 -0
  46. package/rules/navigator/verification-actions.md +2 -0
  47. package/src/action-result.ts +26 -1
  48. package/src/action.ts +44 -37
  49. package/src/ai/bosun.ts +11 -1
  50. package/src/ai/conversation.ts +37 -0
  51. package/src/ai/historian/codeceptjs.ts +130 -0
  52. package/src/ai/historian/experience.ts +383 -0
  53. package/src/ai/historian/mixin.ts +4 -0
  54. package/src/ai/historian/playwright.ts +169 -0
  55. package/src/ai/historian/utils.ts +23 -0
  56. package/src/ai/historian.ts +35 -473
  57. package/src/ai/navigator.ts +82 -29
  58. package/src/ai/pilot.ts +237 -14
  59. package/src/ai/planner.ts +29 -9
  60. package/src/ai/provider.ts +51 -17
  61. package/src/ai/researcher.ts +45 -33
  62. package/src/ai/rules.ts +27 -14
  63. package/src/ai/tester.ts +94 -26
  64. package/src/ai/tools.ts +47 -25
  65. package/src/browser-server.ts +17 -3
  66. package/src/commands/add-rule-command.ts +11 -7
  67. package/src/commands/clean-command.ts +2 -1
  68. package/src/commands/explore-command.ts +29 -15
  69. package/src/commands/init-command.ts +9 -8
  70. package/src/commands/plan-command.ts +35 -0
  71. package/src/commands/plan-save-command.ts +18 -7
  72. package/src/commands/rerun-command.ts +5 -0
  73. package/src/components/App.tsx +16 -5
  74. package/src/config.ts +6 -1
  75. package/src/execution-controller.ts +14 -3
  76. package/src/experience-tracker.ts +21 -72
  77. package/src/explorbot.ts +5 -8
  78. package/src/explorer.ts +11 -2
  79. package/src/observability.ts +50 -109
  80. package/src/playwright-recorder.ts +305 -0
  81. package/src/test-plan.ts +12 -0
  82. package/src/utils/aria.ts +38 -1
  83. package/src/utils/error-page.ts +22 -7
  84. package/src/utils/next-steps.ts +51 -0
@@ -1,379 +1,31 @@
1
- import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
2
- import { join } from 'node:path';
3
- import dedent from 'dedent';
4
- import { z } from 'zod';
5
- import { ActionResult } from "../action-result.js";
6
- import { ConfigParser } from "../config.js";
1
+ import { readFileSync, writeFileSync } from 'node:fs';
7
2
  import { ExperienceTracker } from "../experience-tracker.js";
8
- import { KnowledgeTracker } from "../knowledge-tracker.js";
9
- import { Test } from "../test-plan.js";
10
- import { createDebug, tag } from "../utils/logger.js";
11
- import { extractStatePath } from "../utils/url-matcher.js";
12
- import { ASSERTION_TOOLS, CODECEPT_TOOLS } from "./tools.js";
13
- const debugLog = createDebug('explorbot:historian');
14
- export class Historian {
15
- provider;
16
- experienceTracker;
17
- reporter;
18
- stateManager;
19
- savedFiles = new Set();
20
- constructor(provider, experienceTracker, reporter, stateManager) {
3
+ import { tag } from "../utils/logger.js";
4
+ import { relativeToCwd } from "../utils/next-steps.js";
5
+ import { WithCodeceptJS } from "./historian/codeceptjs.js";
6
+ import { WithExperience } from "./historian/experience.js";
7
+ import { WithPlaywright } from "./historian/playwright.js";
8
+ export { isNonReusableCode } from "./historian/utils.js";
9
+ const HistorianBase = WithPlaywright(WithCodeceptJS(WithExperience(Object)));
10
+ export class Historian extends HistorianBase {
11
+ constructor(provider, experienceTracker, reporter, stateManager, config, recorder) {
12
+ super();
21
13
  this.provider = provider;
22
14
  this.experienceTracker = experienceTracker || new ExperienceTracker();
23
15
  this.reporter = reporter;
24
16
  this.stateManager = stateManager;
17
+ this.config = config;
18
+ this.recorder = recorder;
19
+ this.savedFiles = new Set();
20
+ }
21
+ isPlaywrightFramework() {
22
+ return this.config?.ai?.agents?.historian?.framework === 'playwright';
25
23
  }
26
24
  getSavedFiles() {
27
25
  return [...this.savedFiles];
28
26
  }
29
- async saveSession(task, initialState, conversation) {
30
- debugLog('Saving session experience');
31
- const result = this.determineResult(task);
32
- const toolExecutions = conversation.getToolExecutions();
33
- if (task instanceof Test) {
34
- task.generatedCode = this.toCode(conversation, task.description);
35
- }
36
- const steps = await this.extractSteps(toolExecutions);
37
- await this.detectRetryPatterns(toolExecutions, initialState);
38
- const verifiedSteps = await this.verifySteps(steps, initialState);
39
- if (verifiedSteps.length > 0) {
40
- const relatedUrls = this.extractVisitedUrls(toolExecutions, initialState.url || '');
41
- this.experienceTracker.writeFlow(initialState, {
42
- scenario: task.description,
43
- steps: verifiedSteps,
44
- relatedUrls,
45
- });
46
- }
47
- if (task instanceof Test && result !== 'failed') {
48
- await this.reportSession(task, steps);
49
- }
50
- tag('substep').log(`Historian saved session for: ${task.description}`);
51
- }
52
- async reportSession(test, steps) {
53
- if (!this.reporter)
54
- return;
55
- const reporterSteps = steps.map((step) => ({
56
- title: step.message,
57
- status: step.status === 'passed' ? 'passed' : 'failed',
58
- code: step.code ? step.code.split('\n').filter((l) => l.trim()) : [],
59
- discovery: step.discovery,
60
- }));
61
- await this.reporter.reportSteps(test, reporterSteps);
62
- }
63
- async extractSteps(toolExecutions) {
64
- const stepsWithDiffs = [];
65
- for (const exec of toolExecutions) {
66
- if (!CODECEPT_TOOLS.includes(exec.toolName))
67
- continue;
68
- if (!exec.output?.code)
69
- continue;
70
- if (!exec.wasSuccessful)
71
- continue;
72
- if (isNonReusableCode(exec.output.code))
73
- continue;
74
- const message = this.getExecutionLabel(exec, `Executed ${exec.toolName}`);
75
- const ariaDiff = exec.output?.pageDiff?.ariaChanges || null;
76
- const urlChanged = exec.output?.pageDiff?.urlChanged || false;
77
- const step = {
78
- message,
79
- status: 'passed',
80
- tool: exec.toolName,
81
- code: this.stripComments(exec.output.code),
82
- };
83
- stepsWithDiffs.push({ step, ariaDiff, urlChanged });
84
- }
85
- await this.analyzeDiscoveries(stepsWithDiffs);
86
- return stepsWithDiffs.map((s) => s.step);
87
- }
88
- async verifySteps(steps, initialState) {
89
- if (steps.length === 0)
90
- return [];
91
- const existingExperience = this.experienceTracker
92
- .getRelevantExperience(initialState)
93
- .map((e) => e.content)
94
- .filter(Boolean)
95
- .join('\n');
96
- const existingSummary = existingExperience.length > 2000 ? existingExperience.substring(0, 2000) : existingExperience;
97
- const stepsList = steps.map((s, i) => `${i}. ${s.message}\n Code: ${s.code || 'none'}`).join('\n');
98
- const prompt = dedent `
99
- Review these test steps and determine which are valuable to save as experience
100
- for future test executions on this page.
101
-
102
- <steps>
103
- ${stepsList}
104
- </steps>
105
-
106
- ${existingSummary ? `<existing_experience>\n${existingSummary}\n</existing_experience>` : ''}
107
-
108
- For each step, determine if it is useful:
109
- - NOT useful if it uses auto-generated or unstable locators (ember IDs, numeric data-testid, random IDs)
110
- - NOT useful if it is already documented in existing experience
111
- - NOT useful if it requires an unclear precondition that would not be reproducible
112
- - NOT useful if it is trivial navigation (I.amOnPage) without meaningful context
113
- - USEFUL if it demonstrates how to interact with a specific UI component (expand dropdown, fill form, etc)
114
- - USEFUL if it shows a working approach for a common task on this page
115
- `;
116
- const schema = z.object({
117
- steps: z.array(z.object({
118
- stepIndex: z.number(),
119
- useful: z.boolean(),
120
- })),
121
- });
122
- try {
123
- const response = await this.provider.generateObject([
124
- { role: 'system', content: 'Evaluate test steps for experience value. Be selective — only keep steps that teach something reusable.' },
125
- { role: 'user', content: prompt },
126
- ], schema, undefined, { telemetryFunctionId: 'historian.verifySteps' });
127
- const usefulIndices = new Set((response?.object?.steps || []).filter((s) => s.useful).map((s) => s.stepIndex));
128
- const verified = steps.filter((_, i) => usefulIndices.has(i));
129
- debugLog('Verified %d/%d steps as useful', verified.length, steps.length);
130
- return verified;
131
- }
132
- catch (error) {
133
- debugLog('Step verification failed, keeping all steps: %s', error.message);
134
- return steps;
135
- }
136
- }
137
- async detectRetryPatterns(toolExecutions, initialState) {
138
- if (!this.experienceTracker || !this.stateManager)
139
- return;
140
- const failedByTool = new Map();
141
- const candidates = [];
142
- for (const exec of toolExecutions) {
143
- if (!CODECEPT_TOOLS.includes(exec.toolName))
144
- continue;
145
- if (!exec.output?.code)
146
- continue;
147
- if (!exec.wasSuccessful) {
148
- const bucket = failedByTool.get(exec.toolName) || [];
149
- bucket.push(exec);
150
- failedByTool.set(exec.toolName, bucket);
151
- continue;
152
- }
153
- const failed = failedByTool.get(exec.toolName);
154
- if (failed?.length) {
155
- candidates.push({ failed: [...failed], success: exec });
156
- failedByTool.set(exec.toolName, []);
157
- }
158
- }
159
- if (candidates.length === 0)
160
- return;
161
- const prompt = dedent `
162
- Analyze these retry patterns where a tool failed multiple times before succeeding.
163
- For each candidate, determine which failed attempts were trying to do the same thing as the success.
164
-
165
- ${candidates
166
- .map((c, i) => dedent `
167
- Candidate ${i}:
168
- Failed attempts:
169
- ${c.failed.map((f, j) => ` ${j}: ${this.getExecutionLabel(f, f.toolName)} → code: ${f.output?.code}`).join('\n')}
170
- Succeeded:
171
- ${this.getExecutionLabel(c.success, c.success.toolName)} → code: ${c.success.output.code}
172
- `)
173
- .join('\n\n')}
174
-
175
- For each candidate where failures share the same intent as the success:
176
- - candidateIndex: index of the candidate
177
- - failedIndices: which failed attempts share the same intent
178
- - intent: business-focused description of what was being done
179
- - explanation: actionable tip explaining which element works and what to avoid
180
- `;
181
- const schema = z.object({
182
- retryPatterns: z.array(z.object({
183
- candidateIndex: z.number(),
184
- failedIndices: z.array(z.number()),
185
- intent: z.string(),
186
- explanation: z.string(),
187
- })),
188
- });
189
- try {
190
- const response = await this.provider.generateObject([
191
- { role: 'system', content: 'Analyze retry patterns in web testing tool executions. Identify when failed attempts share the same intent as a successful one.' },
192
- { role: 'user', content: prompt },
193
- ], schema);
194
- for (const pattern of response?.object?.retryPatterns || []) {
195
- const candidate = candidates[pattern.candidateIndex];
196
- if (!candidate)
197
- continue;
198
- const url = candidate.success.output?.pageDiff?.currentUrl;
199
- let state = initialState;
200
- if (url && url !== initialState.url) {
201
- const transition = this.stateManager.getLastVisitToPath(url);
202
- if (transition) {
203
- state = ActionResult.fromState(transition.toState);
204
- }
205
- }
206
- if (isNonReusableCode(candidate.success.output.code))
207
- continue;
208
- this.experienceTracker.writeAction(state, { title: pattern.intent, code: candidate.success.output.code, explanation: pattern.explanation });
209
- }
210
- debugLog('Detected %d retry patterns', response?.object?.retryPatterns?.length || 0);
211
- }
212
- catch (error) {
213
- debugLog('Failed to detect retry patterns: %s', error.message);
214
- }
215
- }
216
- async analyzeDiscoveries(stepsWithDiffs) {
217
- if (!stepsWithDiffs.some((s) => s.ariaDiff))
218
- return;
219
- const prompt = this.buildDiscoveryPrompt(stepsWithDiffs);
220
- const schema = z.object({
221
- discoveries: z.array(z.object({
222
- stepNumber: z.number(),
223
- discoveries: z.array(z.string()),
224
- })),
225
- });
226
- try {
227
- const response = await this.provider.generateObject([
228
- { role: 'system', content: 'Analyze test execution steps and identify valuable UI discoveries. Return multiple discoveries per step when multiple new elements appear. Return no discoveries for steps with no meaningful changes.' },
229
- { role: 'user', content: prompt },
230
- ], schema);
231
- for (const { stepNumber, discoveries } of response?.object?.discoveries || []) {
232
- const stepIndex = stepNumber - 1;
233
- if (!stepsWithDiffs[stepIndex])
234
- continue;
235
- if (discoveries.length === 0)
236
- continue;
237
- stepsWithDiffs[stepIndex].step.discovery = discoveries.join('\n');
238
- }
239
- }
240
- catch (error) {
241
- debugLog('Failed to analyze discoveries: %s', error.message);
242
- }
243
- }
244
- buildDiscoveryPrompt(stepsWithDiffs) {
245
- let prompt = dedent `
246
- Review these test steps and their ARIA diffs. Identify new UI elements that appeared
247
- which could be valuable for:
248
- - Deeper testing of this feature
249
- - Related features that can be triggered from this flow
250
-
251
- IMPORTANT:
252
- - Return MULTIPLE discoveries per step when multiple new elements appear (e.g., if 3 buttons appeared, return an array with 3 discoveries for that step)
253
- - Return NO discoveries (empty array) for a step if nothing new appeared or if elements were already discovered in previous steps
254
- - Only include steps that have discoveries
255
-
256
- Steps:
257
- `;
258
- for (let i = 0; i < stepsWithDiffs.length; i++) {
259
- const { step, ariaDiff, urlChanged } = stepsWithDiffs[i];
260
- prompt += `\n\nStep ${i + 1}: ${step.message}`;
261
- if (ariaDiff) {
262
- prompt += `\n${ariaDiff}`;
263
- }
264
- }
265
- prompt += dedent `
266
-
267
- Return discoveries in format:
268
- - stepNumber: which step revealed these elements
269
- - discoveries: array of brief descriptions like ["A new button appeared: Publish To Twitter", "A new input field appeared: Description"]
270
-
271
- Only return elements that are actionable and could lead to new test scenarios.
272
- Ignore generic UI changes (loading spinners, timestamps, etc).
273
- If errors or warnings appeared in the step, include them in the discoveries array.
274
- If multiple buttons, inputs, links, or other actionable elements appeared in the same step, include all of them in the discoveries array.
275
- `;
276
- return prompt;
277
- }
278
- determineResult(task) {
279
- if ('isSuccessful' in task && task.isSuccessful)
280
- return 'success';
281
- if ('hasAchievedAny' in task && task.hasAchievedAny())
282
- return 'partial';
283
- const hasPassedNotes = Object.values(task.notes).some((n) => n.status === 'passed');
284
- if (hasPassedNotes)
285
- return 'partial';
286
- return 'failed';
287
- }
288
- extractVisitedUrls(toolExecutions, initialUrl) {
289
- const urls = new Set();
290
- const initialPath = extractStatePath(initialUrl);
291
- for (const exec of toolExecutions) {
292
- const currentUrl = exec.output?.pageDiff?.currentUrl;
293
- if (!currentUrl)
294
- continue;
295
- const relativePath = extractStatePath(currentUrl);
296
- if (relativePath && relativePath !== initialPath) {
297
- urls.add(relativePath);
298
- }
299
- }
300
- return [...urls];
301
- }
302
- toCode(conversation, scenario) {
303
- const toolExecutions = conversation.getToolExecutions();
304
- const TRACKABLE_TOOLS = [...CODECEPT_TOOLS, ...ASSERTION_TOOLS];
305
- const successfulSteps = toolExecutions.filter((exec) => exec.wasSuccessful && TRACKABLE_TOOLS.includes(exec.toolName) && exec.output?.code);
306
- if (successfulSteps.length === 0) {
307
- return '';
308
- }
309
- const lines = [];
310
- lines.push(`Scenario('${this.escapeString(scenario)}', ({ I }) => {`);
311
- for (const exec of successfulSteps) {
312
- if (isNonReusableCode(exec.output.code))
313
- continue;
314
- const explanation = this.getExecutionLabel(exec);
315
- if (explanation) {
316
- lines.push('');
317
- lines.push(` Section('${this.escapeString(explanation)}');`);
318
- }
319
- const code = this.stripComments(exec.output.code);
320
- const codeLines = code.includes('\n') ? code.split('\n') : code.split('; ');
321
- for (const codeLine of codeLines) {
322
- const trimmed = codeLine.trim();
323
- if (trimmed) {
324
- lines.push(` ${trimmed}`);
325
- }
326
- }
327
- }
328
- lines.push('});');
329
- return lines.join('\n');
330
- }
331
27
  savePlanToFile(plan) {
332
- const lines = [];
333
- lines.push(`import step, { Section } from 'codeceptjs/steps';`);
334
- lines.push('');
335
- lines.push(`Feature('${this.escapeString(plan.title)}')`);
336
- lines.push('');
337
- const startUrl = plan.url || plan.tests[0]?.startUrl;
338
- if (startUrl) {
339
- lines.push('Before(({ I }) => {');
340
- lines.push(` I.amOnPage('${this.escapeString(startUrl)}');`);
341
- lines.push(...this.getKnowledgeLines(startUrl));
342
- lines.push('});');
343
- lines.push('');
344
- }
345
- for (const test of plan.tests) {
346
- if (test.generatedCode) {
347
- if (test.isSuccessful) {
348
- lines.push(test.generatedCode);
349
- }
350
- else {
351
- lines.push(`// FAILED: ${test.scenario}`);
352
- lines.push(test.generatedCode.replace(/Scenario\(/, 'Scenario.skip('));
353
- }
354
- lines.push('');
355
- continue;
356
- }
357
- lines.push(`Scenario.todo('${this.escapeString(test.scenario)}', ({ I }) => {`);
358
- if (test.plannedSteps.length > 0) {
359
- for (const step of test.plannedSteps) {
360
- lines.push(` // ${step}`);
361
- }
362
- }
363
- else {
364
- lines.push(` // ${test.scenario}`);
365
- }
366
- lines.push('});');
367
- lines.push('');
368
- }
369
- const testsDir = ConfigParser.getInstance().getTestsDir();
370
- mkdirSync(testsDir, { recursive: true });
371
- const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
372
- const filePath = join(testsDir, `${filename}.js`);
373
- writeFileSync(filePath, lines.join('\n'));
374
- this.savedFiles.add(filePath);
375
- tag('substep').log(`Saved plan tests to: ${filePath}`);
376
- return filePath;
28
+ return this.isPlaywrightFramework() ? this.savePlaywrightPlanToFile(plan) : this.saveCodeceptPlanToFile(plan);
377
29
  }
378
30
  rewriteScenarioInFile(filePath, healedSteps) {
379
31
  let content = readFileSync(filePath, 'utf-8');
@@ -384,44 +36,6 @@ export class Historian {
384
36
  }
385
37
  writeFileSync(filePath, content);
386
38
  this.savedFiles.add(filePath);
387
- tag('substep').log(`Updated test file with healed steps: ${filePath}`);
39
+ tag('substep').log(`Updated test file with healed steps: ${relativeToCwd(filePath)}`);
388
40
  }
389
- getExecutionLabel(exec, fallback) {
390
- return exec.input?.explanation || exec.input?.assertion || exec.input?.note || fallback || '';
391
- }
392
- escapeString(str) {
393
- return str.replace(/'/g, "\\'").replace(/\n/g, ' ');
394
- }
395
- getKnowledgeLines(url, indent = ' ') {
396
- const knowledgeTracker = new KnowledgeTracker();
397
- const state = new ActionResult({ url });
398
- const { wait, waitForElement, code } = knowledgeTracker.getStateParameters(state, ['wait', 'waitForElement', 'code']);
399
- const lines = [];
400
- if (wait !== undefined) {
401
- lines.push(`${indent}I.wait(${wait});`);
402
- }
403
- if (waitForElement) {
404
- lines.push(`${indent}I.waitForElement(${JSON.stringify(waitForElement)});`);
405
- }
406
- if (code) {
407
- for (const codeLine of code.split('\n')) {
408
- const trimmed = codeLine.trim();
409
- if (trimmed)
410
- lines.push(`${indent}${trimmed}`);
411
- }
412
- }
413
- return lines;
414
- }
415
- stripComments(code) {
416
- return code
417
- .split('\n')
418
- .filter((line) => {
419
- const trimmed = line.trim();
420
- return trimmed && !trimmed.startsWith('//') && !trimmed.startsWith('/*') && !trimmed.startsWith('*');
421
- })
422
- .join('\n');
423
- }
424
- }
425
- export function isNonReusableCode(code) {
426
- return /\bI\.clickXY\s*\(/.test(code);
427
41
  }
@@ -8,8 +8,9 @@ import { HooksRunner } from "../utils/hooks-runner.js";
8
8
  import { createDebug, pluralize, tag } from '../utils/logger.js';
9
9
  import { loop, pause } from '../utils/loop.js';
10
10
  import { RulesLoader } from "../utils/rules-loader.js";
11
+ import { extractStatePath } from '../utils/url-matcher.js';
11
12
  import { Researcher } from "./researcher.js";
12
- import { actionRule, locatorRule } from './rules.js';
13
+ import { actionRule, locatorRule, unexpectedPopupRule } from './rules.js';
13
14
  import { isInteractive } from './task-agent.js';
14
15
  import { createAgentTools } from "./tools.js";
15
16
  const debugLog = createDebug('explorbot:navigator');
@@ -131,11 +132,10 @@ class Navigator {
131
132
  </hint>`;
132
133
  }
133
134
  if (!actionResult.isInsideIframe) {
134
- const toc = this.experienceTracker.getExperienceTableOfContents(actionResult);
135
- if (toc.length > 0) {
136
- const totalSections = toc.reduce((sum, entry) => sum + entry.sections.length, 0);
137
- tag('substep').log(`Found ${toc.length} experience ${pluralize(toc.length, 'file')} (${totalSections} sections) for: ${actionResult.url}`);
138
- experience = renderExperienceToc(toc);
135
+ const successful = this.experienceTracker.getSuccessfulExperience(actionResult);
136
+ if (successful.length > 0) {
137
+ tag('substep').log(`Found ${successful.length} experience ${pluralize(successful.length, 'file')} for: ${actionResult.url}`);
138
+ experience = `<experience>\nPast successful recipes recorded from prior runs for this page. Prefer these solutions first if they match the goal.\n\n${successful.join('\n\n')}\n</experience>`;
139
139
  }
140
140
  }
141
141
  const prompt = dedent `
@@ -163,6 +163,8 @@ class Navigator {
163
163
 
164
164
  ${actionRule}
165
165
 
166
+ ${unexpectedPopupRule}
167
+
166
168
  ${RulesLoader.loadRules('navigator', ['multiple-locator', 'output'], actionResult.url || '').replace('{{maxAttempts}}', String(this.MAX_ATTEMPTS))}
167
169
 
168
170
  ${experience}
@@ -171,11 +173,13 @@ class Navigator {
171
173
  `;
172
174
  const conversation = this.provider.startConversation(this.systemPrompt, 'navigator');
173
175
  conversation.addUserText(prompt);
174
- const tools = this.buildExperienceTools();
176
+ const tools = undefined;
175
177
  let codeBlocks = [];
176
178
  let htmlContextAdded = false;
177
179
  let codeBlockIndex = 0;
178
180
  let totalAttempts = 0;
181
+ const progressBlocks = [];
182
+ const batchFailures = [];
179
183
  let resolved = false;
180
184
  await loop(async ({ stop }) => {
181
185
  if (codeBlocks.length === 0) {
@@ -194,43 +198,90 @@ class Navigator {
194
198
  }
195
199
  const codeBlock = codeBlocks[codeBlockIndex];
196
200
  if (!codeBlock) {
201
+ if (batchFailures.length === 0 && htmlContextAdded) {
202
+ stop();
203
+ return;
204
+ }
205
+ tag('substep').log('Feeding failures back to AI for a new batch...');
206
+ let contextMsg = 'Previous solutions did not work. Analyze the failures and try DIFFERENT strategies (not syntactic variants of the same locator).\n\n';
207
+ if (batchFailures.length > 0) {
208
+ const lines = batchFailures.map((f) => `- \`${f.code.split('\n')[0]}\` → ${f.error}`).join('\n');
209
+ contextMsg += `<previous_failures>\n${lines}\n</previous_failures>\n\n`;
210
+ }
197
211
  if (!htmlContextAdded) {
198
212
  htmlContextAdded = true;
199
- tag('substep').log('Adding HTML context for better resolution...');
200
- conversation.addUserText(dedent `
201
- Previous solutions did not work. Here is the full HTML context:
202
-
203
- <page_html>
204
- ${await actionResult.combinedHtml()}
205
- </page_html>
206
-
207
- Please suggest new solutions based on this additional context.
208
- `);
209
- codeBlocks = [];
210
- return;
213
+ contextMsg += `Full HTML context:\n\n<page_html>\n${await actionResult.combinedHtml()}\n</page_html>\n\n`;
211
214
  }
212
- stop();
215
+ contextMsg += 'Propose new solutions. If errors mention "intercepts pointer events" or timeouts on visible elements, an overlay is blocking — dismiss it first (Escape, click outside, Close button) before retrying the original action.';
216
+ conversation.addUserText(contextMsg);
217
+ codeBlocks = [];
218
+ batchFailures.length = 0;
213
219
  return;
214
220
  }
215
221
  codeBlockIndex++;
216
222
  totalAttempts++;
217
223
  await this.explorer.switchToMainFrame();
224
+ const prevHash = action.actionResult?.getStateHash() ?? actionResult.getStateHash();
218
225
  debugLog(`Attempting resolution: ${codeBlock}`);
219
- resolved = await action.attempt(codeBlock, message);
226
+ const attemptOk = await action.attempt(codeBlock, message);
227
+ const page = action.playwrightHelper?.page;
228
+ if (page) {
229
+ try {
230
+ await page.waitForLoadState('load', { timeout: 5000 });
231
+ }
232
+ catch {
233
+ // Navigation did not reach 'load' state within timeout; continue and verify URL
234
+ }
235
+ }
236
+ if (!attemptOk) {
237
+ const raw = action.lastError?.message || 'attempt failed';
238
+ const firstMeaningful = raw.split('\n').find((l) => l.trim() && !l.trim().startsWith('at ')) || raw;
239
+ const shortErr = firstMeaningful.replace(/\s+/g, ' ').trim().slice(0, 220);
240
+ batchFailures.push({ code: codeBlock, error: shortErr });
241
+ }
220
242
  if (expectedUrl) {
221
- await action.getActor().wait(2);
222
- const freshState = await action.capturePageState();
223
- if (normalizeUrl(freshState.url || '') === normalizeUrl(expectedUrl)) {
224
- resolved = true;
243
+ if (page) {
244
+ try {
245
+ await page.waitForURL((url) => normalizeUrl(url.pathname) === normalizeUrl(expectedUrl), { timeout: 5000 });
246
+ }
247
+ catch {
248
+ // URL did not transition to expectedUrl within timeout
249
+ }
225
250
  }
226
- else if (resolved) {
251
+ const freshState = await action.capturePageState();
252
+ const urlMatches = normalizeUrl(freshState.url || '') === normalizeUrl(expectedUrl);
253
+ const stateChanged = freshState.getStateHash() !== actionResult.getStateHash();
254
+ resolved = urlMatches && stateChanged;
255
+ if (!resolved && attemptOk) {
227
256
  tag('warning').log(`URL verification failed: expected ${expectedUrl}, got ${freshState.url}`);
228
- resolved = false;
257
+ }
258
+ if (freshState.getStateHash() !== prevHash && (attemptOk || urlMatches)) {
259
+ progressBlocks.push(codeBlock);
229
260
  }
230
261
  }
262
+ else {
263
+ resolved = attemptOk;
264
+ if (attemptOk)
265
+ progressBlocks.push(codeBlock);
266
+ }
231
267
  if (resolved) {
232
268
  tag('success').log('Navigation resolved successfully');
233
- this.experienceTracker.writeAction(actionResult, { title: message, code: codeBlock });
269
+ let scenario = message.split('\n')[0];
270
+ if (expectedUrl) {
271
+ const fromPath = extractStatePath(actionResult.url || '');
272
+ const toPath = extractStatePath(expectedUrl);
273
+ scenario = `reach ${toPath} from ${fromPath}`;
274
+ }
275
+ const recipe = progressBlocks
276
+ .join('\n')
277
+ .split('\n')
278
+ .filter((line) => !/^\s*I\.amOnPage\s*\(/.test(line))
279
+ .join('\n')
280
+ .trim();
281
+ if (recipe) {
282
+ const body = `## FLOW: ${scenario}\n\n* ${scenario}\n\n\`\`\`js\n${recipe}\n\`\`\`\n\n---\n`;
283
+ this.experienceTracker.writeFlow(actionResult, body);
284
+ }
234
285
  stop();
235
286
  return;
236
287
  }
@@ -455,6 +506,7 @@ class Navigator {
455
506
  const tools = this.buildExperienceTools();
456
507
  let codeBlocks = [];
457
508
  const successfulCodes = [];
509
+ const assertionSteps = [];
458
510
  const action = this.explorer.createAction();
459
511
  await loop(async ({ stop, iteration }) => {
460
512
  if (codeBlocks.length === 0) {
@@ -479,6 +531,7 @@ class Navigator {
479
531
  if (verified) {
480
532
  tag('success').log('Verification passed');
481
533
  successfulCodes.push(codeBlock);
534
+ assertionSteps.push(...action.assertionSteps);
482
535
  }
483
536
  }, {
484
537
  maxAttempts: this.MAX_ATTEMPTS,
@@ -493,7 +546,7 @@ class Navigator {
493
546
  const verified = totalAttempted <= 1 ? successfulCodes.length > 0 : successfulCodes.length > totalAttempted / 2;
494
547
  actionResult.addVerification(message, verified);
495
548
  this.explorer.getStateManager().updateState(actionResult);
496
- return { verified, successfulCodes, totalAttempted };
549
+ return { verified, successfulCodes, assertionSteps, totalAttempted };
497
550
  }
498
551
  }
499
552
  export { Navigator };