explorbot 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -1
- package/bin/explorbot-cli.ts +27 -18
- package/dist/bin/explorbot-cli.js +26 -18
- package/dist/package.json +2 -2
- package/dist/rules/navigator/output.md +9 -0
- package/dist/rules/navigator/verification-actions.md +2 -0
- package/dist/src/action-result.js +23 -1
- package/dist/src/action.js +46 -38
- package/dist/src/ai/bosun.js +11 -1
- package/dist/src/ai/conversation.js +39 -0
- package/dist/src/ai/historian/codeceptjs.js +109 -0
- package/dist/src/ai/historian/experience.js +320 -0
- package/dist/src/ai/historian/mixin.js +2 -0
- package/dist/src/ai/historian/playwright.js +145 -0
- package/dist/src/ai/historian/utils.js +18 -0
- package/dist/src/ai/historian.js +19 -405
- package/dist/src/ai/navigator.js +82 -29
- package/dist/src/ai/pilot.js +232 -13
- package/dist/src/ai/planner.js +29 -9
- package/dist/src/ai/provider.js +54 -17
- package/dist/src/ai/researcher.js +41 -32
- package/dist/src/ai/rules.js +26 -14
- package/dist/src/ai/tester.js +90 -26
- package/dist/src/ai/tools.js +13 -7
- package/dist/src/browser-server.js +16 -3
- package/dist/src/commands/add-rule-command.js +11 -8
- package/dist/src/commands/clean-command.js +2 -1
- package/dist/src/commands/explore-command.js +27 -15
- package/dist/src/commands/init-command.js +9 -8
- package/dist/src/commands/plan-command.js +32 -0
- package/dist/src/commands/plan-save-command.js +19 -7
- package/dist/src/commands/rerun-command.js +4 -0
- package/dist/src/components/App.js +15 -5
- package/dist/src/execution-controller.js +13 -2
- package/dist/src/experience-tracker.js +20 -64
- package/dist/src/explorbot.js +5 -8
- package/dist/src/explorer.js +9 -2
- package/dist/src/observability.js +50 -99
- package/dist/src/playwright-recorder.js +309 -0
- package/dist/src/test-plan.js +12 -0
- package/dist/src/utils/aria.js +37 -1
- package/dist/src/utils/error-page.js +20 -7
- package/dist/src/utils/next-steps.js +37 -0
- package/package.json +2 -2
- package/rules/navigator/output.md +9 -0
- package/rules/navigator/verification-actions.md +2 -0
- package/src/action-result.ts +26 -1
- package/src/action.ts +44 -37
- package/src/ai/bosun.ts +11 -1
- package/src/ai/conversation.ts +37 -0
- package/src/ai/historian/codeceptjs.ts +130 -0
- package/src/ai/historian/experience.ts +383 -0
- package/src/ai/historian/mixin.ts +4 -0
- package/src/ai/historian/playwright.ts +169 -0
- package/src/ai/historian/utils.ts +23 -0
- package/src/ai/historian.ts +35 -473
- package/src/ai/navigator.ts +82 -29
- package/src/ai/pilot.ts +237 -14
- package/src/ai/planner.ts +29 -9
- package/src/ai/provider.ts +51 -17
- package/src/ai/researcher.ts +45 -33
- package/src/ai/rules.ts +27 -14
- package/src/ai/tester.ts +94 -26
- package/src/ai/tools.ts +47 -25
- package/src/browser-server.ts +17 -3
- package/src/commands/add-rule-command.ts +11 -7
- package/src/commands/clean-command.ts +2 -1
- package/src/commands/explore-command.ts +29 -15
- package/src/commands/init-command.ts +9 -8
- package/src/commands/plan-command.ts +35 -0
- package/src/commands/plan-save-command.ts +18 -7
- package/src/commands/rerun-command.ts +5 -0
- package/src/components/App.tsx +16 -5
- package/src/config.ts +6 -1
- package/src/execution-controller.ts +14 -3
- package/src/experience-tracker.ts +21 -72
- package/src/explorbot.ts +5 -8
- package/src/explorer.ts +11 -2
- package/src/observability.ts +50 -109
- package/src/playwright-recorder.ts +305 -0
- package/src/test-plan.ts +12 -0
- package/src/utils/aria.ts +38 -1
- package/src/utils/error-page.ts +22 -7
- package/src/utils/next-steps.ts +51 -0
package/dist/src/ai/historian.js
CHANGED
|
@@ -1,379 +1,31 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { join } from 'node:path';
|
|
3
|
-
import dedent from 'dedent';
|
|
4
|
-
import { z } from 'zod';
|
|
5
|
-
import { ActionResult } from "../action-result.js";
|
|
6
|
-
import { ConfigParser } from "../config.js";
|
|
1
|
+
import { readFileSync, writeFileSync } from 'node:fs';
|
|
7
2
|
import { ExperienceTracker } from "../experience-tracker.js";
|
|
8
|
-
import {
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
experienceTracker
|
|
17
|
-
|
|
18
|
-
stateManager;
|
|
19
|
-
savedFiles = new Set();
|
|
20
|
-
constructor(provider, experienceTracker, reporter, stateManager) {
|
|
3
|
+
import { tag } from "../utils/logger.js";
|
|
4
|
+
import { relativeToCwd } from "../utils/next-steps.js";
|
|
5
|
+
import { WithCodeceptJS } from "./historian/codeceptjs.js";
|
|
6
|
+
import { WithExperience } from "./historian/experience.js";
|
|
7
|
+
import { WithPlaywright } from "./historian/playwright.js";
|
|
8
|
+
export { isNonReusableCode } from "./historian/utils.js";
|
|
9
|
+
const HistorianBase = WithPlaywright(WithCodeceptJS(WithExperience(Object)));
|
|
10
|
+
export class Historian extends HistorianBase {
|
|
11
|
+
constructor(provider, experienceTracker, reporter, stateManager, config, recorder) {
|
|
12
|
+
super();
|
|
21
13
|
this.provider = provider;
|
|
22
14
|
this.experienceTracker = experienceTracker || new ExperienceTracker();
|
|
23
15
|
this.reporter = reporter;
|
|
24
16
|
this.stateManager = stateManager;
|
|
17
|
+
this.config = config;
|
|
18
|
+
this.recorder = recorder;
|
|
19
|
+
this.savedFiles = new Set();
|
|
20
|
+
}
|
|
21
|
+
isPlaywrightFramework() {
|
|
22
|
+
return this.config?.ai?.agents?.historian?.framework === 'playwright';
|
|
25
23
|
}
|
|
26
24
|
getSavedFiles() {
|
|
27
25
|
return [...this.savedFiles];
|
|
28
26
|
}
|
|
29
|
-
async saveSession(task, initialState, conversation) {
|
|
30
|
-
debugLog('Saving session experience');
|
|
31
|
-
const result = this.determineResult(task);
|
|
32
|
-
const toolExecutions = conversation.getToolExecutions();
|
|
33
|
-
if (task instanceof Test) {
|
|
34
|
-
task.generatedCode = this.toCode(conversation, task.description);
|
|
35
|
-
}
|
|
36
|
-
const steps = await this.extractSteps(toolExecutions);
|
|
37
|
-
await this.detectRetryPatterns(toolExecutions, initialState);
|
|
38
|
-
const verifiedSteps = await this.verifySteps(steps, initialState);
|
|
39
|
-
if (verifiedSteps.length > 0) {
|
|
40
|
-
const relatedUrls = this.extractVisitedUrls(toolExecutions, initialState.url || '');
|
|
41
|
-
this.experienceTracker.writeFlow(initialState, {
|
|
42
|
-
scenario: task.description,
|
|
43
|
-
steps: verifiedSteps,
|
|
44
|
-
relatedUrls,
|
|
45
|
-
});
|
|
46
|
-
}
|
|
47
|
-
if (task instanceof Test && result !== 'failed') {
|
|
48
|
-
await this.reportSession(task, steps);
|
|
49
|
-
}
|
|
50
|
-
tag('substep').log(`Historian saved session for: ${task.description}`);
|
|
51
|
-
}
|
|
52
|
-
async reportSession(test, steps) {
|
|
53
|
-
if (!this.reporter)
|
|
54
|
-
return;
|
|
55
|
-
const reporterSteps = steps.map((step) => ({
|
|
56
|
-
title: step.message,
|
|
57
|
-
status: step.status === 'passed' ? 'passed' : 'failed',
|
|
58
|
-
code: step.code ? step.code.split('\n').filter((l) => l.trim()) : [],
|
|
59
|
-
discovery: step.discovery,
|
|
60
|
-
}));
|
|
61
|
-
await this.reporter.reportSteps(test, reporterSteps);
|
|
62
|
-
}
|
|
63
|
-
async extractSteps(toolExecutions) {
|
|
64
|
-
const stepsWithDiffs = [];
|
|
65
|
-
for (const exec of toolExecutions) {
|
|
66
|
-
if (!CODECEPT_TOOLS.includes(exec.toolName))
|
|
67
|
-
continue;
|
|
68
|
-
if (!exec.output?.code)
|
|
69
|
-
continue;
|
|
70
|
-
if (!exec.wasSuccessful)
|
|
71
|
-
continue;
|
|
72
|
-
if (isNonReusableCode(exec.output.code))
|
|
73
|
-
continue;
|
|
74
|
-
const message = this.getExecutionLabel(exec, `Executed ${exec.toolName}`);
|
|
75
|
-
const ariaDiff = exec.output?.pageDiff?.ariaChanges || null;
|
|
76
|
-
const urlChanged = exec.output?.pageDiff?.urlChanged || false;
|
|
77
|
-
const step = {
|
|
78
|
-
message,
|
|
79
|
-
status: 'passed',
|
|
80
|
-
tool: exec.toolName,
|
|
81
|
-
code: this.stripComments(exec.output.code),
|
|
82
|
-
};
|
|
83
|
-
stepsWithDiffs.push({ step, ariaDiff, urlChanged });
|
|
84
|
-
}
|
|
85
|
-
await this.analyzeDiscoveries(stepsWithDiffs);
|
|
86
|
-
return stepsWithDiffs.map((s) => s.step);
|
|
87
|
-
}
|
|
88
|
-
async verifySteps(steps, initialState) {
|
|
89
|
-
if (steps.length === 0)
|
|
90
|
-
return [];
|
|
91
|
-
const existingExperience = this.experienceTracker
|
|
92
|
-
.getRelevantExperience(initialState)
|
|
93
|
-
.map((e) => e.content)
|
|
94
|
-
.filter(Boolean)
|
|
95
|
-
.join('\n');
|
|
96
|
-
const existingSummary = existingExperience.length > 2000 ? existingExperience.substring(0, 2000) : existingExperience;
|
|
97
|
-
const stepsList = steps.map((s, i) => `${i}. ${s.message}\n Code: ${s.code || 'none'}`).join('\n');
|
|
98
|
-
const prompt = dedent `
|
|
99
|
-
Review these test steps and determine which are valuable to save as experience
|
|
100
|
-
for future test executions on this page.
|
|
101
|
-
|
|
102
|
-
<steps>
|
|
103
|
-
${stepsList}
|
|
104
|
-
</steps>
|
|
105
|
-
|
|
106
|
-
${existingSummary ? `<existing_experience>\n${existingSummary}\n</existing_experience>` : ''}
|
|
107
|
-
|
|
108
|
-
For each step, determine if it is useful:
|
|
109
|
-
- NOT useful if it uses auto-generated or unstable locators (ember IDs, numeric data-testid, random IDs)
|
|
110
|
-
- NOT useful if it is already documented in existing experience
|
|
111
|
-
- NOT useful if it requires an unclear precondition that would not be reproducible
|
|
112
|
-
- NOT useful if it is trivial navigation (I.amOnPage) without meaningful context
|
|
113
|
-
- USEFUL if it demonstrates how to interact with a specific UI component (expand dropdown, fill form, etc)
|
|
114
|
-
- USEFUL if it shows a working approach for a common task on this page
|
|
115
|
-
`;
|
|
116
|
-
const schema = z.object({
|
|
117
|
-
steps: z.array(z.object({
|
|
118
|
-
stepIndex: z.number(),
|
|
119
|
-
useful: z.boolean(),
|
|
120
|
-
})),
|
|
121
|
-
});
|
|
122
|
-
try {
|
|
123
|
-
const response = await this.provider.generateObject([
|
|
124
|
-
{ role: 'system', content: 'Evaluate test steps for experience value. Be selective — only keep steps that teach something reusable.' },
|
|
125
|
-
{ role: 'user', content: prompt },
|
|
126
|
-
], schema, undefined, { telemetryFunctionId: 'historian.verifySteps' });
|
|
127
|
-
const usefulIndices = new Set((response?.object?.steps || []).filter((s) => s.useful).map((s) => s.stepIndex));
|
|
128
|
-
const verified = steps.filter((_, i) => usefulIndices.has(i));
|
|
129
|
-
debugLog('Verified %d/%d steps as useful', verified.length, steps.length);
|
|
130
|
-
return verified;
|
|
131
|
-
}
|
|
132
|
-
catch (error) {
|
|
133
|
-
debugLog('Step verification failed, keeping all steps: %s', error.message);
|
|
134
|
-
return steps;
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
async detectRetryPatterns(toolExecutions, initialState) {
|
|
138
|
-
if (!this.experienceTracker || !this.stateManager)
|
|
139
|
-
return;
|
|
140
|
-
const failedByTool = new Map();
|
|
141
|
-
const candidates = [];
|
|
142
|
-
for (const exec of toolExecutions) {
|
|
143
|
-
if (!CODECEPT_TOOLS.includes(exec.toolName))
|
|
144
|
-
continue;
|
|
145
|
-
if (!exec.output?.code)
|
|
146
|
-
continue;
|
|
147
|
-
if (!exec.wasSuccessful) {
|
|
148
|
-
const bucket = failedByTool.get(exec.toolName) || [];
|
|
149
|
-
bucket.push(exec);
|
|
150
|
-
failedByTool.set(exec.toolName, bucket);
|
|
151
|
-
continue;
|
|
152
|
-
}
|
|
153
|
-
const failed = failedByTool.get(exec.toolName);
|
|
154
|
-
if (failed?.length) {
|
|
155
|
-
candidates.push({ failed: [...failed], success: exec });
|
|
156
|
-
failedByTool.set(exec.toolName, []);
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
if (candidates.length === 0)
|
|
160
|
-
return;
|
|
161
|
-
const prompt = dedent `
|
|
162
|
-
Analyze these retry patterns where a tool failed multiple times before succeeding.
|
|
163
|
-
For each candidate, determine which failed attempts were trying to do the same thing as the success.
|
|
164
|
-
|
|
165
|
-
${candidates
|
|
166
|
-
.map((c, i) => dedent `
|
|
167
|
-
Candidate ${i}:
|
|
168
|
-
Failed attempts:
|
|
169
|
-
${c.failed.map((f, j) => ` ${j}: ${this.getExecutionLabel(f, f.toolName)} → code: ${f.output?.code}`).join('\n')}
|
|
170
|
-
Succeeded:
|
|
171
|
-
${this.getExecutionLabel(c.success, c.success.toolName)} → code: ${c.success.output.code}
|
|
172
|
-
`)
|
|
173
|
-
.join('\n\n')}
|
|
174
|
-
|
|
175
|
-
For each candidate where failures share the same intent as the success:
|
|
176
|
-
- candidateIndex: index of the candidate
|
|
177
|
-
- failedIndices: which failed attempts share the same intent
|
|
178
|
-
- intent: business-focused description of what was being done
|
|
179
|
-
- explanation: actionable tip explaining which element works and what to avoid
|
|
180
|
-
`;
|
|
181
|
-
const schema = z.object({
|
|
182
|
-
retryPatterns: z.array(z.object({
|
|
183
|
-
candidateIndex: z.number(),
|
|
184
|
-
failedIndices: z.array(z.number()),
|
|
185
|
-
intent: z.string(),
|
|
186
|
-
explanation: z.string(),
|
|
187
|
-
})),
|
|
188
|
-
});
|
|
189
|
-
try {
|
|
190
|
-
const response = await this.provider.generateObject([
|
|
191
|
-
{ role: 'system', content: 'Analyze retry patterns in web testing tool executions. Identify when failed attempts share the same intent as a successful one.' },
|
|
192
|
-
{ role: 'user', content: prompt },
|
|
193
|
-
], schema);
|
|
194
|
-
for (const pattern of response?.object?.retryPatterns || []) {
|
|
195
|
-
const candidate = candidates[pattern.candidateIndex];
|
|
196
|
-
if (!candidate)
|
|
197
|
-
continue;
|
|
198
|
-
const url = candidate.success.output?.pageDiff?.currentUrl;
|
|
199
|
-
let state = initialState;
|
|
200
|
-
if (url && url !== initialState.url) {
|
|
201
|
-
const transition = this.stateManager.getLastVisitToPath(url);
|
|
202
|
-
if (transition) {
|
|
203
|
-
state = ActionResult.fromState(transition.toState);
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
if (isNonReusableCode(candidate.success.output.code))
|
|
207
|
-
continue;
|
|
208
|
-
this.experienceTracker.writeAction(state, { title: pattern.intent, code: candidate.success.output.code, explanation: pattern.explanation });
|
|
209
|
-
}
|
|
210
|
-
debugLog('Detected %d retry patterns', response?.object?.retryPatterns?.length || 0);
|
|
211
|
-
}
|
|
212
|
-
catch (error) {
|
|
213
|
-
debugLog('Failed to detect retry patterns: %s', error.message);
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
async analyzeDiscoveries(stepsWithDiffs) {
|
|
217
|
-
if (!stepsWithDiffs.some((s) => s.ariaDiff))
|
|
218
|
-
return;
|
|
219
|
-
const prompt = this.buildDiscoveryPrompt(stepsWithDiffs);
|
|
220
|
-
const schema = z.object({
|
|
221
|
-
discoveries: z.array(z.object({
|
|
222
|
-
stepNumber: z.number(),
|
|
223
|
-
discoveries: z.array(z.string()),
|
|
224
|
-
})),
|
|
225
|
-
});
|
|
226
|
-
try {
|
|
227
|
-
const response = await this.provider.generateObject([
|
|
228
|
-
{ role: 'system', content: 'Analyze test execution steps and identify valuable UI discoveries. Return multiple discoveries per step when multiple new elements appear. Return no discoveries for steps with no meaningful changes.' },
|
|
229
|
-
{ role: 'user', content: prompt },
|
|
230
|
-
], schema);
|
|
231
|
-
for (const { stepNumber, discoveries } of response?.object?.discoveries || []) {
|
|
232
|
-
const stepIndex = stepNumber - 1;
|
|
233
|
-
if (!stepsWithDiffs[stepIndex])
|
|
234
|
-
continue;
|
|
235
|
-
if (discoveries.length === 0)
|
|
236
|
-
continue;
|
|
237
|
-
stepsWithDiffs[stepIndex].step.discovery = discoveries.join('\n');
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
catch (error) {
|
|
241
|
-
debugLog('Failed to analyze discoveries: %s', error.message);
|
|
242
|
-
}
|
|
243
|
-
}
|
|
244
|
-
buildDiscoveryPrompt(stepsWithDiffs) {
|
|
245
|
-
let prompt = dedent `
|
|
246
|
-
Review these test steps and their ARIA diffs. Identify new UI elements that appeared
|
|
247
|
-
which could be valuable for:
|
|
248
|
-
- Deeper testing of this feature
|
|
249
|
-
- Related features that can be triggered from this flow
|
|
250
|
-
|
|
251
|
-
IMPORTANT:
|
|
252
|
-
- Return MULTIPLE discoveries per step when multiple new elements appear (e.g., if 3 buttons appeared, return an array with 3 discoveries for that step)
|
|
253
|
-
- Return NO discoveries (empty array) for a step if nothing new appeared or if elements were already discovered in previous steps
|
|
254
|
-
- Only include steps that have discoveries
|
|
255
|
-
|
|
256
|
-
Steps:
|
|
257
|
-
`;
|
|
258
|
-
for (let i = 0; i < stepsWithDiffs.length; i++) {
|
|
259
|
-
const { step, ariaDiff, urlChanged } = stepsWithDiffs[i];
|
|
260
|
-
prompt += `\n\nStep ${i + 1}: ${step.message}`;
|
|
261
|
-
if (ariaDiff) {
|
|
262
|
-
prompt += `\n${ariaDiff}`;
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
prompt += dedent `
|
|
266
|
-
|
|
267
|
-
Return discoveries in format:
|
|
268
|
-
- stepNumber: which step revealed these elements
|
|
269
|
-
- discoveries: array of brief descriptions like ["A new button appeared: Publish To Twitter", "A new input field appeared: Description"]
|
|
270
|
-
|
|
271
|
-
Only return elements that are actionable and could lead to new test scenarios.
|
|
272
|
-
Ignore generic UI changes (loading spinners, timestamps, etc).
|
|
273
|
-
If errors or warnings appeared in the step, include them in the discoveries array.
|
|
274
|
-
If multiple buttons, inputs, links, or other actionable elements appeared in the same step, include all of them in the discoveries array.
|
|
275
|
-
`;
|
|
276
|
-
return prompt;
|
|
277
|
-
}
|
|
278
|
-
determineResult(task) {
|
|
279
|
-
if ('isSuccessful' in task && task.isSuccessful)
|
|
280
|
-
return 'success';
|
|
281
|
-
if ('hasAchievedAny' in task && task.hasAchievedAny())
|
|
282
|
-
return 'partial';
|
|
283
|
-
const hasPassedNotes = Object.values(task.notes).some((n) => n.status === 'passed');
|
|
284
|
-
if (hasPassedNotes)
|
|
285
|
-
return 'partial';
|
|
286
|
-
return 'failed';
|
|
287
|
-
}
|
|
288
|
-
extractVisitedUrls(toolExecutions, initialUrl) {
|
|
289
|
-
const urls = new Set();
|
|
290
|
-
const initialPath = extractStatePath(initialUrl);
|
|
291
|
-
for (const exec of toolExecutions) {
|
|
292
|
-
const currentUrl = exec.output?.pageDiff?.currentUrl;
|
|
293
|
-
if (!currentUrl)
|
|
294
|
-
continue;
|
|
295
|
-
const relativePath = extractStatePath(currentUrl);
|
|
296
|
-
if (relativePath && relativePath !== initialPath) {
|
|
297
|
-
urls.add(relativePath);
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
return [...urls];
|
|
301
|
-
}
|
|
302
|
-
toCode(conversation, scenario) {
|
|
303
|
-
const toolExecutions = conversation.getToolExecutions();
|
|
304
|
-
const TRACKABLE_TOOLS = [...CODECEPT_TOOLS, ...ASSERTION_TOOLS];
|
|
305
|
-
const successfulSteps = toolExecutions.filter((exec) => exec.wasSuccessful && TRACKABLE_TOOLS.includes(exec.toolName) && exec.output?.code);
|
|
306
|
-
if (successfulSteps.length === 0) {
|
|
307
|
-
return '';
|
|
308
|
-
}
|
|
309
|
-
const lines = [];
|
|
310
|
-
lines.push(`Scenario('${this.escapeString(scenario)}', ({ I }) => {`);
|
|
311
|
-
for (const exec of successfulSteps) {
|
|
312
|
-
if (isNonReusableCode(exec.output.code))
|
|
313
|
-
continue;
|
|
314
|
-
const explanation = this.getExecutionLabel(exec);
|
|
315
|
-
if (explanation) {
|
|
316
|
-
lines.push('');
|
|
317
|
-
lines.push(` Section('${this.escapeString(explanation)}');`);
|
|
318
|
-
}
|
|
319
|
-
const code = this.stripComments(exec.output.code);
|
|
320
|
-
const codeLines = code.includes('\n') ? code.split('\n') : code.split('; ');
|
|
321
|
-
for (const codeLine of codeLines) {
|
|
322
|
-
const trimmed = codeLine.trim();
|
|
323
|
-
if (trimmed) {
|
|
324
|
-
lines.push(` ${trimmed}`);
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
lines.push('});');
|
|
329
|
-
return lines.join('\n');
|
|
330
|
-
}
|
|
331
27
|
savePlanToFile(plan) {
|
|
332
|
-
|
|
333
|
-
lines.push(`import step, { Section } from 'codeceptjs/steps';`);
|
|
334
|
-
lines.push('');
|
|
335
|
-
lines.push(`Feature('${this.escapeString(plan.title)}')`);
|
|
336
|
-
lines.push('');
|
|
337
|
-
const startUrl = plan.url || plan.tests[0]?.startUrl;
|
|
338
|
-
if (startUrl) {
|
|
339
|
-
lines.push('Before(({ I }) => {');
|
|
340
|
-
lines.push(` I.amOnPage('${this.escapeString(startUrl)}');`);
|
|
341
|
-
lines.push(...this.getKnowledgeLines(startUrl));
|
|
342
|
-
lines.push('});');
|
|
343
|
-
lines.push('');
|
|
344
|
-
}
|
|
345
|
-
for (const test of plan.tests) {
|
|
346
|
-
if (test.generatedCode) {
|
|
347
|
-
if (test.isSuccessful) {
|
|
348
|
-
lines.push(test.generatedCode);
|
|
349
|
-
}
|
|
350
|
-
else {
|
|
351
|
-
lines.push(`// FAILED: ${test.scenario}`);
|
|
352
|
-
lines.push(test.generatedCode.replace(/Scenario\(/, 'Scenario.skip('));
|
|
353
|
-
}
|
|
354
|
-
lines.push('');
|
|
355
|
-
continue;
|
|
356
|
-
}
|
|
357
|
-
lines.push(`Scenario.todo('${this.escapeString(test.scenario)}', ({ I }) => {`);
|
|
358
|
-
if (test.plannedSteps.length > 0) {
|
|
359
|
-
for (const step of test.plannedSteps) {
|
|
360
|
-
lines.push(` // ${step}`);
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
else {
|
|
364
|
-
lines.push(` // ${test.scenario}`);
|
|
365
|
-
}
|
|
366
|
-
lines.push('});');
|
|
367
|
-
lines.push('');
|
|
368
|
-
}
|
|
369
|
-
const testsDir = ConfigParser.getInstance().getTestsDir();
|
|
370
|
-
mkdirSync(testsDir, { recursive: true });
|
|
371
|
-
const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
|
|
372
|
-
const filePath = join(testsDir, `${filename}.js`);
|
|
373
|
-
writeFileSync(filePath, lines.join('\n'));
|
|
374
|
-
this.savedFiles.add(filePath);
|
|
375
|
-
tag('substep').log(`Saved plan tests to: ${filePath}`);
|
|
376
|
-
return filePath;
|
|
28
|
+
return this.isPlaywrightFramework() ? this.savePlaywrightPlanToFile(plan) : this.saveCodeceptPlanToFile(plan);
|
|
377
29
|
}
|
|
378
30
|
rewriteScenarioInFile(filePath, healedSteps) {
|
|
379
31
|
let content = readFileSync(filePath, 'utf-8');
|
|
@@ -384,44 +36,6 @@ export class Historian {
|
|
|
384
36
|
}
|
|
385
37
|
writeFileSync(filePath, content);
|
|
386
38
|
this.savedFiles.add(filePath);
|
|
387
|
-
tag('substep').log(`Updated test file with healed steps: ${filePath}`);
|
|
39
|
+
tag('substep').log(`Updated test file with healed steps: ${relativeToCwd(filePath)}`);
|
|
388
40
|
}
|
|
389
|
-
getExecutionLabel(exec, fallback) {
|
|
390
|
-
return exec.input?.explanation || exec.input?.assertion || exec.input?.note || fallback || '';
|
|
391
|
-
}
|
|
392
|
-
escapeString(str) {
|
|
393
|
-
return str.replace(/'/g, "\\'").replace(/\n/g, ' ');
|
|
394
|
-
}
|
|
395
|
-
getKnowledgeLines(url, indent = ' ') {
|
|
396
|
-
const knowledgeTracker = new KnowledgeTracker();
|
|
397
|
-
const state = new ActionResult({ url });
|
|
398
|
-
const { wait, waitForElement, code } = knowledgeTracker.getStateParameters(state, ['wait', 'waitForElement', 'code']);
|
|
399
|
-
const lines = [];
|
|
400
|
-
if (wait !== undefined) {
|
|
401
|
-
lines.push(`${indent}I.wait(${wait});`);
|
|
402
|
-
}
|
|
403
|
-
if (waitForElement) {
|
|
404
|
-
lines.push(`${indent}I.waitForElement(${JSON.stringify(waitForElement)});`);
|
|
405
|
-
}
|
|
406
|
-
if (code) {
|
|
407
|
-
for (const codeLine of code.split('\n')) {
|
|
408
|
-
const trimmed = codeLine.trim();
|
|
409
|
-
if (trimmed)
|
|
410
|
-
lines.push(`${indent}${trimmed}`);
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
return lines;
|
|
414
|
-
}
|
|
415
|
-
stripComments(code) {
|
|
416
|
-
return code
|
|
417
|
-
.split('\n')
|
|
418
|
-
.filter((line) => {
|
|
419
|
-
const trimmed = line.trim();
|
|
420
|
-
return trimmed && !trimmed.startsWith('//') && !trimmed.startsWith('/*') && !trimmed.startsWith('*');
|
|
421
|
-
})
|
|
422
|
-
.join('\n');
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
export function isNonReusableCode(code) {
|
|
426
|
-
return /\bI\.clickXY\s*\(/.test(code);
|
|
427
41
|
}
|
package/dist/src/ai/navigator.js
CHANGED
|
@@ -8,8 +8,9 @@ import { HooksRunner } from "../utils/hooks-runner.js";
|
|
|
8
8
|
import { createDebug, pluralize, tag } from '../utils/logger.js';
|
|
9
9
|
import { loop, pause } from '../utils/loop.js';
|
|
10
10
|
import { RulesLoader } from "../utils/rules-loader.js";
|
|
11
|
+
import { extractStatePath } from '../utils/url-matcher.js';
|
|
11
12
|
import { Researcher } from "./researcher.js";
|
|
12
|
-
import { actionRule, locatorRule } from './rules.js';
|
|
13
|
+
import { actionRule, locatorRule, unexpectedPopupRule } from './rules.js';
|
|
13
14
|
import { isInteractive } from './task-agent.js';
|
|
14
15
|
import { createAgentTools } from "./tools.js";
|
|
15
16
|
const debugLog = createDebug('explorbot:navigator');
|
|
@@ -131,11 +132,10 @@ class Navigator {
|
|
|
131
132
|
</hint>`;
|
|
132
133
|
}
|
|
133
134
|
if (!actionResult.isInsideIframe) {
|
|
134
|
-
const
|
|
135
|
-
if (
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
experience = renderExperienceToc(toc);
|
|
135
|
+
const successful = this.experienceTracker.getSuccessfulExperience(actionResult);
|
|
136
|
+
if (successful.length > 0) {
|
|
137
|
+
tag('substep').log(`Found ${successful.length} experience ${pluralize(successful.length, 'file')} for: ${actionResult.url}`);
|
|
138
|
+
experience = `<experience>\nPast successful recipes recorded from prior runs for this page. Prefer these solutions first if they match the goal.\n\n${successful.join('\n\n')}\n</experience>`;
|
|
139
139
|
}
|
|
140
140
|
}
|
|
141
141
|
const prompt = dedent `
|
|
@@ -163,6 +163,8 @@ class Navigator {
|
|
|
163
163
|
|
|
164
164
|
${actionRule}
|
|
165
165
|
|
|
166
|
+
${unexpectedPopupRule}
|
|
167
|
+
|
|
166
168
|
${RulesLoader.loadRules('navigator', ['multiple-locator', 'output'], actionResult.url || '').replace('{{maxAttempts}}', String(this.MAX_ATTEMPTS))}
|
|
167
169
|
|
|
168
170
|
${experience}
|
|
@@ -171,11 +173,13 @@ class Navigator {
|
|
|
171
173
|
`;
|
|
172
174
|
const conversation = this.provider.startConversation(this.systemPrompt, 'navigator');
|
|
173
175
|
conversation.addUserText(prompt);
|
|
174
|
-
const tools =
|
|
176
|
+
const tools = undefined;
|
|
175
177
|
let codeBlocks = [];
|
|
176
178
|
let htmlContextAdded = false;
|
|
177
179
|
let codeBlockIndex = 0;
|
|
178
180
|
let totalAttempts = 0;
|
|
181
|
+
const progressBlocks = [];
|
|
182
|
+
const batchFailures = [];
|
|
179
183
|
let resolved = false;
|
|
180
184
|
await loop(async ({ stop }) => {
|
|
181
185
|
if (codeBlocks.length === 0) {
|
|
@@ -194,43 +198,90 @@ class Navigator {
|
|
|
194
198
|
}
|
|
195
199
|
const codeBlock = codeBlocks[codeBlockIndex];
|
|
196
200
|
if (!codeBlock) {
|
|
201
|
+
if (batchFailures.length === 0 && htmlContextAdded) {
|
|
202
|
+
stop();
|
|
203
|
+
return;
|
|
204
|
+
}
|
|
205
|
+
tag('substep').log('Feeding failures back to AI for a new batch...');
|
|
206
|
+
let contextMsg = 'Previous solutions did not work. Analyze the failures and try DIFFERENT strategies (not syntactic variants of the same locator).\n\n';
|
|
207
|
+
if (batchFailures.length > 0) {
|
|
208
|
+
const lines = batchFailures.map((f) => `- \`${f.code.split('\n')[0]}\` → ${f.error}`).join('\n');
|
|
209
|
+
contextMsg += `<previous_failures>\n${lines}\n</previous_failures>\n\n`;
|
|
210
|
+
}
|
|
197
211
|
if (!htmlContextAdded) {
|
|
198
212
|
htmlContextAdded = true;
|
|
199
|
-
|
|
200
|
-
conversation.addUserText(dedent `
|
|
201
|
-
Previous solutions did not work. Here is the full HTML context:
|
|
202
|
-
|
|
203
|
-
<page_html>
|
|
204
|
-
${await actionResult.combinedHtml()}
|
|
205
|
-
</page_html>
|
|
206
|
-
|
|
207
|
-
Please suggest new solutions based on this additional context.
|
|
208
|
-
`);
|
|
209
|
-
codeBlocks = [];
|
|
210
|
-
return;
|
|
213
|
+
contextMsg += `Full HTML context:\n\n<page_html>\n${await actionResult.combinedHtml()}\n</page_html>\n\n`;
|
|
211
214
|
}
|
|
212
|
-
|
|
215
|
+
contextMsg += 'Propose new solutions. If errors mention "intercepts pointer events" or timeouts on visible elements, an overlay is blocking — dismiss it first (Escape, click outside, Close button) before retrying the original action.';
|
|
216
|
+
conversation.addUserText(contextMsg);
|
|
217
|
+
codeBlocks = [];
|
|
218
|
+
batchFailures.length = 0;
|
|
213
219
|
return;
|
|
214
220
|
}
|
|
215
221
|
codeBlockIndex++;
|
|
216
222
|
totalAttempts++;
|
|
217
223
|
await this.explorer.switchToMainFrame();
|
|
224
|
+
const prevHash = action.actionResult?.getStateHash() ?? actionResult.getStateHash();
|
|
218
225
|
debugLog(`Attempting resolution: ${codeBlock}`);
|
|
219
|
-
|
|
226
|
+
const attemptOk = await action.attempt(codeBlock, message);
|
|
227
|
+
const page = action.playwrightHelper?.page;
|
|
228
|
+
if (page) {
|
|
229
|
+
try {
|
|
230
|
+
await page.waitForLoadState('load', { timeout: 5000 });
|
|
231
|
+
}
|
|
232
|
+
catch {
|
|
233
|
+
// Navigation did not reach 'load' state within timeout; continue and verify URL
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
if (!attemptOk) {
|
|
237
|
+
const raw = action.lastError?.message || 'attempt failed';
|
|
238
|
+
const firstMeaningful = raw.split('\n').find((l) => l.trim() && !l.trim().startsWith('at ')) || raw;
|
|
239
|
+
const shortErr = firstMeaningful.replace(/\s+/g, ' ').trim().slice(0, 220);
|
|
240
|
+
batchFailures.push({ code: codeBlock, error: shortErr });
|
|
241
|
+
}
|
|
220
242
|
if (expectedUrl) {
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
243
|
+
if (page) {
|
|
244
|
+
try {
|
|
245
|
+
await page.waitForURL((url) => normalizeUrl(url.pathname) === normalizeUrl(expectedUrl), { timeout: 5000 });
|
|
246
|
+
}
|
|
247
|
+
catch {
|
|
248
|
+
// URL did not transition to expectedUrl within timeout
|
|
249
|
+
}
|
|
225
250
|
}
|
|
226
|
-
|
|
251
|
+
const freshState = await action.capturePageState();
|
|
252
|
+
const urlMatches = normalizeUrl(freshState.url || '') === normalizeUrl(expectedUrl);
|
|
253
|
+
const stateChanged = freshState.getStateHash() !== actionResult.getStateHash();
|
|
254
|
+
resolved = urlMatches && stateChanged;
|
|
255
|
+
if (!resolved && attemptOk) {
|
|
227
256
|
tag('warning').log(`URL verification failed: expected ${expectedUrl}, got ${freshState.url}`);
|
|
228
|
-
|
|
257
|
+
}
|
|
258
|
+
if (freshState.getStateHash() !== prevHash && (attemptOk || urlMatches)) {
|
|
259
|
+
progressBlocks.push(codeBlock);
|
|
229
260
|
}
|
|
230
261
|
}
|
|
262
|
+
else {
|
|
263
|
+
resolved = attemptOk;
|
|
264
|
+
if (attemptOk)
|
|
265
|
+
progressBlocks.push(codeBlock);
|
|
266
|
+
}
|
|
231
267
|
if (resolved) {
|
|
232
268
|
tag('success').log('Navigation resolved successfully');
|
|
233
|
-
|
|
269
|
+
let scenario = message.split('\n')[0];
|
|
270
|
+
if (expectedUrl) {
|
|
271
|
+
const fromPath = extractStatePath(actionResult.url || '');
|
|
272
|
+
const toPath = extractStatePath(expectedUrl);
|
|
273
|
+
scenario = `reach ${toPath} from ${fromPath}`;
|
|
274
|
+
}
|
|
275
|
+
const recipe = progressBlocks
|
|
276
|
+
.join('\n')
|
|
277
|
+
.split('\n')
|
|
278
|
+
.filter((line) => !/^\s*I\.amOnPage\s*\(/.test(line))
|
|
279
|
+
.join('\n')
|
|
280
|
+
.trim();
|
|
281
|
+
if (recipe) {
|
|
282
|
+
const body = `## FLOW: ${scenario}\n\n* ${scenario}\n\n\`\`\`js\n${recipe}\n\`\`\`\n\n---\n`;
|
|
283
|
+
this.experienceTracker.writeFlow(actionResult, body);
|
|
284
|
+
}
|
|
234
285
|
stop();
|
|
235
286
|
return;
|
|
236
287
|
}
|
|
@@ -455,6 +506,7 @@ class Navigator {
|
|
|
455
506
|
const tools = this.buildExperienceTools();
|
|
456
507
|
let codeBlocks = [];
|
|
457
508
|
const successfulCodes = [];
|
|
509
|
+
const assertionSteps = [];
|
|
458
510
|
const action = this.explorer.createAction();
|
|
459
511
|
await loop(async ({ stop, iteration }) => {
|
|
460
512
|
if (codeBlocks.length === 0) {
|
|
@@ -479,6 +531,7 @@ class Navigator {
|
|
|
479
531
|
if (verified) {
|
|
480
532
|
tag('success').log('Verification passed');
|
|
481
533
|
successfulCodes.push(codeBlock);
|
|
534
|
+
assertionSteps.push(...action.assertionSteps);
|
|
482
535
|
}
|
|
483
536
|
}, {
|
|
484
537
|
maxAttempts: this.MAX_ATTEMPTS,
|
|
@@ -493,7 +546,7 @@ class Navigator {
|
|
|
493
546
|
const verified = totalAttempted <= 1 ? successfulCodes.length > 0 : successfulCodes.length > totalAttempted / 2;
|
|
494
547
|
actionResult.addVerification(message, verified);
|
|
495
548
|
this.explorer.getStateManager().updateState(actionResult);
|
|
496
|
-
return { verified, successfulCodes, totalAttempted };
|
|
549
|
+
return { verified, successfulCodes, assertionSteps, totalAttempted };
|
|
497
550
|
}
|
|
498
551
|
}
|
|
499
552
|
export { Navigator };
|