explorbot 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -1
- package/bin/explorbot-cli.ts +86 -15
- package/boat/api-tester/src/ai/curler-tools.ts +3 -3
- package/boat/api-tester/src/ai/curler.ts +1 -1
- package/boat/api-tester/src/apibot.ts +2 -2
- package/boat/api-tester/src/config.ts +1 -1
- package/dist/bin/explorbot-cli.js +85 -14
- package/dist/boat/api-tester/src/ai/curler-tools.js +2 -2
- package/dist/boat/api-tester/src/apibot.js +2 -2
- package/dist/package.json +2 -2
- package/dist/rules/navigator/output.md +9 -0
- package/dist/rules/navigator/verification-actions.md +2 -0
- package/dist/src/action-result.js +23 -1
- package/dist/src/action.js +46 -38
- package/dist/src/ai/bosun.js +16 -2
- package/dist/src/ai/conversation.js +39 -0
- package/dist/src/ai/experience-compactor.js +235 -50
- package/dist/src/ai/historian/codeceptjs.js +109 -0
- package/dist/src/ai/historian/experience.js +320 -0
- package/dist/src/ai/historian/mixin.js +2 -0
- package/dist/src/ai/historian/playwright.js +145 -0
- package/dist/src/ai/historian/utils.js +18 -0
- package/dist/src/ai/historian.js +19 -398
- package/dist/src/ai/navigator.js +133 -80
- package/dist/src/ai/pilot.js +254 -13
- package/dist/src/ai/planner/subpages.js +1 -30
- package/dist/src/ai/planner.js +33 -13
- package/dist/src/ai/provider.js +55 -18
- package/dist/src/ai/rerunner.js +3 -3
- package/dist/src/ai/researcher/deep-analysis.js +1 -1
- package/dist/src/ai/researcher/fingerprint-worker.js +1 -1
- package/dist/src/ai/researcher/locators.js +1 -1
- package/dist/src/ai/researcher/sections.js +8 -1
- package/dist/src/ai/researcher.js +43 -41
- package/dist/src/ai/rules.js +26 -14
- package/dist/src/ai/tester.js +90 -26
- package/dist/src/ai/tools.js +18 -10
- package/dist/src/api/request-store.js +20 -0
- package/dist/src/api/xhr-capture.js +19 -3
- package/dist/src/browser-server.js +16 -3
- package/dist/src/command-handler.js +1 -1
- package/dist/src/commands/add-rule-command.js +12 -9
- package/dist/src/commands/base-command.js +20 -0
- package/dist/src/commands/clean-command.js +3 -2
- package/dist/src/commands/compact-command.js +138 -0
- package/dist/src/commands/context-command.js +7 -1
- package/dist/src/commands/drill-command.js +4 -1
- package/dist/src/commands/experience-command.js +104 -0
- package/dist/src/commands/explore-command.js +54 -19
- package/dist/src/commands/freesail-command.js +2 -0
- package/dist/src/commands/index.js +7 -3
- package/dist/src/commands/init-command.js +11 -10
- package/dist/src/commands/learn-command.js +1 -1
- package/dist/src/commands/navigate-command.js +4 -1
- package/dist/src/commands/plan-clear-command.js +4 -1
- package/dist/src/commands/plan-command.js +43 -4
- package/dist/src/commands/plan-edit-command.js +1 -1
- package/dist/src/commands/plan-load-command.js +4 -1
- package/dist/src/commands/plan-reload-command.js +4 -1
- package/dist/src/commands/plan-save-command.js +20 -8
- package/dist/src/commands/rerun-command.js +4 -0
- package/dist/src/commands/research-command.js +5 -2
- package/dist/src/commands/start-command.js +5 -1
- package/dist/src/commands/test-command.js +7 -1
- package/dist/src/components/App.js +15 -5
- package/dist/src/execution-controller.js +13 -2
- package/dist/src/experience-tracker.js +174 -83
- package/dist/src/explorbot.js +31 -22
- package/dist/src/explorer.js +12 -5
- package/dist/src/observability.js +50 -99
- package/dist/src/playwright-recorder.js +309 -0
- package/dist/src/reporter.js +17 -2
- package/dist/src/stats.js +2 -0
- package/dist/src/suite.js +1 -1
- package/dist/src/test-plan.js +12 -0
- package/dist/src/utils/aria.js +37 -1
- package/dist/src/utils/error-page.js +30 -7
- package/dist/src/utils/logger.js +1 -1
- package/dist/src/utils/next-steps.js +37 -0
- package/dist/src/utils/rules-loader.js +1 -1
- package/dist/src/utils/test-files.js +1 -1
- package/dist/src/utils/url-matcher.js +50 -0
- package/package.json +2 -2
- package/rules/navigator/output.md +9 -0
- package/rules/navigator/verification-actions.md +2 -0
- package/src/action-result.ts +26 -1
- package/src/action.ts +44 -37
- package/src/ai/bosun.ts +16 -2
- package/src/ai/conversation.ts +37 -0
- package/src/ai/experience-compactor.ts +270 -63
- package/src/ai/historian/codeceptjs.ts +130 -0
- package/src/ai/historian/experience.ts +383 -0
- package/src/ai/historian/mixin.ts +4 -0
- package/src/ai/historian/playwright.ts +169 -0
- package/src/ai/historian/utils.ts +23 -0
- package/src/ai/historian.ts +35 -468
- package/src/ai/navigator.ts +140 -85
- package/src/ai/pilot.ts +259 -14
- package/src/ai/planner/subpages.ts +1 -24
- package/src/ai/planner.ts +34 -14
- package/src/ai/provider.ts +52 -18
- package/src/ai/rerunner.ts +3 -3
- package/src/ai/researcher/deep-analysis.ts +1 -1
- package/src/ai/researcher/fingerprint-worker.ts +1 -1
- package/src/ai/researcher/locators.ts +2 -2
- package/src/ai/researcher/sections.ts +7 -1
- package/src/ai/researcher.ts +47 -42
- package/src/ai/rules.ts +27 -14
- package/src/ai/task-agent.ts +1 -1
- package/src/ai/tester.ts +94 -26
- package/src/ai/tools.ts +53 -29
- package/src/api/request-store.ts +22 -0
- package/src/api/xhr-capture.ts +21 -3
- package/src/browser-server.ts +17 -3
- package/src/command-handler.ts +1 -1
- package/src/commands/add-rule-command.ts +13 -9
- package/src/commands/base-command.ts +26 -1
- package/src/commands/clean-command.ts +4 -3
- package/src/commands/compact-command.ts +156 -0
- package/src/commands/context-command.ts +8 -2
- package/src/commands/drill-command.ts +5 -2
- package/src/commands/experience-command.ts +125 -0
- package/src/commands/explore-command.ts +58 -21
- package/src/commands/freesail-command.ts +2 -0
- package/src/commands/index.ts +7 -3
- package/src/commands/init-command.ts +11 -10
- package/src/commands/learn-command.ts +2 -2
- package/src/commands/navigate-command.ts +5 -2
- package/src/commands/plan-clear-command.ts +5 -2
- package/src/commands/plan-command.ts +47 -5
- package/src/commands/plan-edit-command.ts +2 -2
- package/src/commands/plan-load-command.ts +5 -2
- package/src/commands/plan-reload-command.ts +5 -2
- package/src/commands/plan-save-command.ts +20 -9
- package/src/commands/rerun-command.ts +5 -0
- package/src/commands/research-command.ts +6 -3
- package/src/commands/start-command.ts +6 -2
- package/src/commands/test-command.ts +8 -2
- package/src/components/App.tsx +16 -5
- package/src/config.ts +6 -1
- package/src/execution-controller.ts +14 -3
- package/src/experience-tracker.ts +198 -100
- package/src/explorbot.ts +33 -23
- package/src/explorer.ts +14 -5
- package/src/observability.ts +50 -109
- package/src/playwright-recorder.ts +305 -0
- package/src/reporter.ts +17 -3
- package/src/stats.ts +4 -0
- package/src/suite.ts +1 -1
- package/src/test-plan.ts +12 -0
- package/src/utils/aria.ts +38 -1
- package/src/utils/error-page.ts +32 -7
- package/src/utils/logger.ts +1 -1
- package/src/utils/next-steps.ts +51 -0
- package/src/utils/rules-loader.ts +1 -1
- package/src/utils/test-files.ts +1 -1
- package/src/utils/url-matcher.ts +43 -0
package/src/ai/historian.ts
CHANGED
|
@@ -1,447 +1,53 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import { ConfigParser } from '../config.ts';
|
|
7
|
-
import { KnowledgeTracker } from '../knowledge-tracker.ts';
|
|
8
|
-
import { ExperienceTracker, type SessionExperienceEntry, type SessionStep } from '../experience-tracker.ts';
|
|
9
|
-
import { type Reporter, type ReporterStep } from '../reporter.ts';
|
|
1
|
+
import { readFileSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import type { ExplorbotConfig } from '../config.ts';
|
|
3
|
+
import { ExperienceTracker } from '../experience-tracker.ts';
|
|
4
|
+
import type { PlaywrightRecorder } from '../playwright-recorder.ts';
|
|
5
|
+
import type { Reporter } from '../reporter.ts';
|
|
10
6
|
import type { StateManager } from '../state-manager.ts';
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import
|
|
7
|
+
import type { Plan } from '../test-plan.ts';
|
|
8
|
+
import { tag } from '../utils/logger.ts';
|
|
9
|
+
import { relativeToCwd } from '../utils/next-steps.ts';
|
|
10
|
+
import { type CodeceptJSMethods, WithCodeceptJS } from './historian/codeceptjs.ts';
|
|
11
|
+
import { type ExperienceMethods, WithExperience } from './historian/experience.ts';
|
|
12
|
+
import { type PlaywrightMethods, WithPlaywright } from './historian/playwright.ts';
|
|
14
13
|
import type { Provider } from './provider.ts';
|
|
15
|
-
import { extractStatePath } from '../utils/url-matcher.ts';
|
|
16
|
-
import { ASSERTION_TOOLS, CODECEPT_TOOLS } from './tools.ts';
|
|
17
14
|
|
|
18
|
-
|
|
15
|
+
export { isNonReusableCode } from './historian/utils.ts';
|
|
19
16
|
|
|
20
|
-
|
|
21
|
-
private provider: Provider;
|
|
22
|
-
private experienceTracker: ExperienceTracker;
|
|
23
|
-
private reporter?: Reporter;
|
|
24
|
-
private stateManager?: StateManager;
|
|
25
|
-
private savedFiles = new Set<string>();
|
|
17
|
+
const HistorianBase = WithPlaywright(WithCodeceptJS(WithExperience(Object as unknown as new (...args: any[]) => object)));
|
|
26
18
|
|
|
27
|
-
|
|
19
|
+
export interface Historian extends ExperienceMethods, CodeceptJSMethods, PlaywrightMethods {}
|
|
20
|
+
|
|
21
|
+
export class Historian extends HistorianBase {
|
|
22
|
+
declare provider: Provider;
|
|
23
|
+
declare experienceTracker: ExperienceTracker;
|
|
24
|
+
declare reporter: Reporter | undefined;
|
|
25
|
+
declare stateManager: StateManager | undefined;
|
|
26
|
+
declare config: ExplorbotConfig | undefined;
|
|
27
|
+
declare recorder: PlaywrightRecorder | undefined;
|
|
28
|
+
declare savedFiles: Set<string>;
|
|
29
|
+
|
|
30
|
+
constructor(provider: Provider, experienceTracker?: ExperienceTracker, reporter?: Reporter, stateManager?: StateManager, config?: ExplorbotConfig, recorder?: PlaywrightRecorder) {
|
|
31
|
+
super();
|
|
28
32
|
this.provider = provider;
|
|
29
33
|
this.experienceTracker = experienceTracker || new ExperienceTracker();
|
|
30
34
|
this.reporter = reporter;
|
|
31
35
|
this.stateManager = stateManager;
|
|
36
|
+
this.config = config;
|
|
37
|
+
this.recorder = recorder;
|
|
38
|
+
this.savedFiles = new Set();
|
|
32
39
|
}
|
|
33
40
|
|
|
34
|
-
|
|
35
|
-
return
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
async saveSession(task: Task, initialState: ActionResult, conversation: Conversation): Promise<void> {
|
|
39
|
-
debugLog('Saving session experience');
|
|
40
|
-
|
|
41
|
-
const result = this.determineResult(task);
|
|
42
|
-
const toolExecutions = conversation.getToolExecutions();
|
|
43
|
-
|
|
44
|
-
if (task instanceof Test) {
|
|
45
|
-
task.generatedCode = this.toCode(conversation, task.description);
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
const steps = await this.extractSteps(toolExecutions);
|
|
49
|
-
await this.detectRetryPatterns(toolExecutions, initialState);
|
|
50
|
-
const verifiedSteps = await this.verifySteps(steps, initialState);
|
|
51
|
-
|
|
52
|
-
if (verifiedSteps.length > 0) {
|
|
53
|
-
const relatedUrls = this.extractVisitedUrls(toolExecutions, initialState.url || '');
|
|
54
|
-
const entry: SessionExperienceEntry = {
|
|
55
|
-
scenario: task.description,
|
|
56
|
-
result,
|
|
57
|
-
steps: verifiedSteps,
|
|
58
|
-
relatedUrls,
|
|
59
|
-
};
|
|
60
|
-
this.experienceTracker.saveSessionExperience(initialState, entry);
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
if (task instanceof Test && result !== 'failed') {
|
|
64
|
-
await this.reportSession(task, steps);
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
tag('substep').log(`Historian saved session for: ${task.description}`);
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
private async reportSession(test: Test, steps: SessionStep[]): Promise<void> {
|
|
71
|
-
if (!this.reporter) return;
|
|
72
|
-
|
|
73
|
-
const reporterSteps: ReporterStep[] = steps.map((step) => ({
|
|
74
|
-
title: step.message,
|
|
75
|
-
status: step.status === 'passed' ? 'passed' : 'failed',
|
|
76
|
-
code: step.code ? step.code.split('\n').filter((l) => l.trim()) : [],
|
|
77
|
-
discovery: step.discovery,
|
|
78
|
-
}));
|
|
79
|
-
|
|
80
|
-
await this.reporter.reportSteps(test, reporterSteps);
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
private async extractSteps(toolExecutions: ToolExecution[]): Promise<SessionStep[]> {
|
|
84
|
-
const stepsWithDiffs: Array<{ step: SessionStep; ariaDiff: string | null; urlChanged: boolean }> = [];
|
|
85
|
-
|
|
86
|
-
for (const exec of toolExecutions) {
|
|
87
|
-
if (!CODECEPT_TOOLS.includes(exec.toolName as any)) continue;
|
|
88
|
-
if (!exec.output?.code) continue;
|
|
89
|
-
if (!exec.wasSuccessful) continue;
|
|
90
|
-
|
|
91
|
-
const message = this.getExecutionLabel(exec, `Executed ${exec.toolName}`);
|
|
92
|
-
const ariaDiff = exec.output?.pageDiff?.ariaChanges || null;
|
|
93
|
-
const urlChanged = exec.output?.pageDiff?.urlChanged || false;
|
|
94
|
-
|
|
95
|
-
const step: SessionStep = {
|
|
96
|
-
message,
|
|
97
|
-
status: 'passed',
|
|
98
|
-
tool: exec.toolName,
|
|
99
|
-
code: this.stripComments(exec.output.code),
|
|
100
|
-
};
|
|
101
|
-
|
|
102
|
-
stepsWithDiffs.push({ step, ariaDiff, urlChanged });
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
await this.analyzeDiscoveries(stepsWithDiffs);
|
|
106
|
-
|
|
107
|
-
return stepsWithDiffs.map((s) => s.step);
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
private async verifySteps(steps: SessionStep[], initialState: ActionResult): Promise<SessionStep[]> {
|
|
111
|
-
if (steps.length === 0) return [];
|
|
112
|
-
|
|
113
|
-
const existingExperience = this.experienceTracker
|
|
114
|
-
.getRelevantExperience(initialState)
|
|
115
|
-
.map((e) => e.content)
|
|
116
|
-
.filter(Boolean)
|
|
117
|
-
.join('\n');
|
|
118
|
-
|
|
119
|
-
const existingSummary = existingExperience.length > 2000 ? existingExperience.substring(0, 2000) : existingExperience;
|
|
120
|
-
|
|
121
|
-
const stepsList = steps.map((s, i) => `${i}. ${s.message}\n Code: ${s.code || 'none'}`).join('\n');
|
|
122
|
-
const prompt = dedent`
|
|
123
|
-
Review these test steps and determine which are valuable to save as experience
|
|
124
|
-
for future test executions on this page.
|
|
125
|
-
|
|
126
|
-
<steps>
|
|
127
|
-
${stepsList}
|
|
128
|
-
</steps>
|
|
129
|
-
|
|
130
|
-
${existingSummary ? `<existing_experience>\n${existingSummary}\n</existing_experience>` : ''}
|
|
131
|
-
|
|
132
|
-
For each step, determine if it is useful:
|
|
133
|
-
- NOT useful if it uses auto-generated or unstable locators (ember IDs, numeric data-testid, random IDs)
|
|
134
|
-
- NOT useful if it is already documented in existing experience
|
|
135
|
-
- NOT useful if it requires an unclear precondition that would not be reproducible
|
|
136
|
-
- NOT useful if it is trivial navigation (I.amOnPage) without meaningful context
|
|
137
|
-
- USEFUL if it demonstrates how to interact with a specific UI component (expand dropdown, fill form, etc)
|
|
138
|
-
- USEFUL if it shows a working approach for a common task on this page
|
|
139
|
-
`;
|
|
140
|
-
|
|
141
|
-
const schema = z.object({
|
|
142
|
-
steps: z.array(
|
|
143
|
-
z.object({
|
|
144
|
-
stepIndex: z.number(),
|
|
145
|
-
useful: z.boolean(),
|
|
146
|
-
})
|
|
147
|
-
),
|
|
148
|
-
});
|
|
149
|
-
|
|
150
|
-
try {
|
|
151
|
-
const response = await this.provider.generateObject(
|
|
152
|
-
[
|
|
153
|
-
{ role: 'system', content: 'Evaluate test steps for experience value. Be selective — only keep steps that teach something reusable.' },
|
|
154
|
-
{ role: 'user', content: prompt },
|
|
155
|
-
],
|
|
156
|
-
schema,
|
|
157
|
-
undefined,
|
|
158
|
-
{ telemetryFunctionId: 'historian.verifySteps' }
|
|
159
|
-
);
|
|
160
|
-
|
|
161
|
-
const usefulIndices = new Set((response?.object?.steps || []).filter((s) => s.useful).map((s) => s.stepIndex));
|
|
162
|
-
|
|
163
|
-
const verified = steps.filter((_, i) => usefulIndices.has(i));
|
|
164
|
-
debugLog('Verified %d/%d steps as useful', verified.length, steps.length);
|
|
165
|
-
return verified;
|
|
166
|
-
} catch (error: any) {
|
|
167
|
-
debugLog('Step verification failed, keeping all steps: %s', error.message);
|
|
168
|
-
return steps;
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
private async detectRetryPatterns(toolExecutions: ToolExecution[], initialState: ActionResult): Promise<void> {
|
|
173
|
-
if (!this.experienceTracker || !this.stateManager) return;
|
|
174
|
-
|
|
175
|
-
const failedByTool = new Map<string, ToolExecution[]>();
|
|
176
|
-
const candidates: Array<{ failed: ToolExecution[]; success: ToolExecution }> = [];
|
|
177
|
-
|
|
178
|
-
for (const exec of toolExecutions) {
|
|
179
|
-
if (!CODECEPT_TOOLS.includes(exec.toolName as any)) continue;
|
|
180
|
-
if (!exec.output?.code) continue;
|
|
181
|
-
|
|
182
|
-
if (!exec.wasSuccessful) {
|
|
183
|
-
const bucket = failedByTool.get(exec.toolName) || [];
|
|
184
|
-
bucket.push(exec);
|
|
185
|
-
failedByTool.set(exec.toolName, bucket);
|
|
186
|
-
continue;
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
const failed = failedByTool.get(exec.toolName);
|
|
190
|
-
if (failed?.length) {
|
|
191
|
-
candidates.push({ failed: [...failed], success: exec });
|
|
192
|
-
failedByTool.set(exec.toolName, []);
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
if (candidates.length === 0) return;
|
|
197
|
-
|
|
198
|
-
const prompt = dedent`
|
|
199
|
-
Analyze these retry patterns where a tool failed multiple times before succeeding.
|
|
200
|
-
For each candidate, determine which failed attempts were trying to do the same thing as the success.
|
|
201
|
-
|
|
202
|
-
${candidates
|
|
203
|
-
.map(
|
|
204
|
-
(c, i) => dedent`
|
|
205
|
-
Candidate ${i}:
|
|
206
|
-
Failed attempts:
|
|
207
|
-
${c.failed.map((f, j) => ` ${j}: ${this.getExecutionLabel(f, f.toolName)} → code: ${f.output?.code}`).join('\n')}
|
|
208
|
-
Succeeded:
|
|
209
|
-
${this.getExecutionLabel(c.success, c.success.toolName)} → code: ${c.success.output.code}
|
|
210
|
-
`
|
|
211
|
-
)
|
|
212
|
-
.join('\n\n')}
|
|
213
|
-
|
|
214
|
-
For each candidate where failures share the same intent as the success:
|
|
215
|
-
- candidateIndex: index of the candidate
|
|
216
|
-
- failedIndices: which failed attempts share the same intent
|
|
217
|
-
- intent: business-focused description of what was being done
|
|
218
|
-
- explanation: actionable tip explaining which element works and what to avoid
|
|
219
|
-
`;
|
|
220
|
-
|
|
221
|
-
const schema = z.object({
|
|
222
|
-
retryPatterns: z.array(
|
|
223
|
-
z.object({
|
|
224
|
-
candidateIndex: z.number(),
|
|
225
|
-
failedIndices: z.array(z.number()),
|
|
226
|
-
intent: z.string(),
|
|
227
|
-
explanation: z.string(),
|
|
228
|
-
})
|
|
229
|
-
),
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
try {
|
|
233
|
-
const response = await this.provider.generateObject(
|
|
234
|
-
[
|
|
235
|
-
{ role: 'system', content: 'Analyze retry patterns in web testing tool executions. Identify when failed attempts share the same intent as a successful one.' },
|
|
236
|
-
{ role: 'user', content: prompt },
|
|
237
|
-
],
|
|
238
|
-
schema
|
|
239
|
-
);
|
|
240
|
-
|
|
241
|
-
for (const pattern of response?.object?.retryPatterns || []) {
|
|
242
|
-
const candidate = candidates[pattern.candidateIndex];
|
|
243
|
-
if (!candidate) continue;
|
|
244
|
-
|
|
245
|
-
const url = candidate.success.output?.pageDiff?.currentUrl;
|
|
246
|
-
let state: ActionResult = initialState;
|
|
247
|
-
|
|
248
|
-
if (url && url !== initialState.url) {
|
|
249
|
-
const transition = this.stateManager.getLastVisitToPath(url);
|
|
250
|
-
if (transition) {
|
|
251
|
-
state = ActionResult.fromState(transition.toState);
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
await this.experienceTracker.saveSuccessfulResolution(state, pattern.intent, candidate.success.output.code, pattern.explanation);
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
debugLog('Detected %d retry patterns', response?.object?.retryPatterns?.length || 0);
|
|
259
|
-
} catch (error: any) {
|
|
260
|
-
debugLog('Failed to detect retry patterns: %s', error.message);
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
private async analyzeDiscoveries(stepsWithDiffs: Array<{ step: SessionStep; ariaDiff: string | null; urlChanged: boolean }>): Promise<void> {
|
|
265
|
-
if (!stepsWithDiffs.some((s) => s.ariaDiff)) return;
|
|
266
|
-
|
|
267
|
-
const prompt = this.buildDiscoveryPrompt(stepsWithDiffs);
|
|
268
|
-
|
|
269
|
-
const schema = z.object({
|
|
270
|
-
discoveries: z.array(
|
|
271
|
-
z.object({
|
|
272
|
-
stepNumber: z.number(),
|
|
273
|
-
discoveries: z.array(z.string()),
|
|
274
|
-
})
|
|
275
|
-
),
|
|
276
|
-
});
|
|
277
|
-
|
|
278
|
-
try {
|
|
279
|
-
const response = await this.provider.generateObject(
|
|
280
|
-
[
|
|
281
|
-
{ role: 'system', content: 'Analyze test execution steps and identify valuable UI discoveries. Return multiple discoveries per step when multiple new elements appear. Return no discoveries for steps with no meaningful changes.' },
|
|
282
|
-
{ role: 'user', content: prompt },
|
|
283
|
-
],
|
|
284
|
-
schema
|
|
285
|
-
);
|
|
286
|
-
|
|
287
|
-
for (const { stepNumber, discoveries } of response?.object?.discoveries || []) {
|
|
288
|
-
const stepIndex = stepNumber - 1;
|
|
289
|
-
if (!stepsWithDiffs[stepIndex]) continue;
|
|
290
|
-
if (discoveries.length === 0) continue;
|
|
291
|
-
stepsWithDiffs[stepIndex].step.discovery = discoveries.join('\n');
|
|
292
|
-
}
|
|
293
|
-
} catch (error: any) {
|
|
294
|
-
debugLog('Failed to analyze discoveries: %s', error.message);
|
|
295
|
-
}
|
|
41
|
+
isPlaywrightFramework(): boolean {
|
|
42
|
+
return this.config?.ai?.agents?.historian?.framework === 'playwright';
|
|
296
43
|
}
|
|
297
44
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
Review these test steps and their ARIA diffs. Identify new UI elements that appeared
|
|
301
|
-
which could be valuable for:
|
|
302
|
-
- Deeper testing of this feature
|
|
303
|
-
- Related features that can be triggered from this flow
|
|
304
|
-
|
|
305
|
-
IMPORTANT:
|
|
306
|
-
- Return MULTIPLE discoveries per step when multiple new elements appear (e.g., if 3 buttons appeared, return an array with 3 discoveries for that step)
|
|
307
|
-
- Return NO discoveries (empty array) for a step if nothing new appeared or if elements were already discovered in previous steps
|
|
308
|
-
- Only include steps that have discoveries
|
|
309
|
-
|
|
310
|
-
Steps:
|
|
311
|
-
`;
|
|
312
|
-
|
|
313
|
-
for (let i = 0; i < stepsWithDiffs.length; i++) {
|
|
314
|
-
const { step, ariaDiff, urlChanged } = stepsWithDiffs[i];
|
|
315
|
-
prompt += `\n\nStep ${i + 1}: ${step.message}`;
|
|
316
|
-
if (ariaDiff) {
|
|
317
|
-
prompt += `\n${ariaDiff}`;
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
prompt += dedent`
|
|
322
|
-
|
|
323
|
-
Return discoveries in format:
|
|
324
|
-
- stepNumber: which step revealed these elements
|
|
325
|
-
- discoveries: array of brief descriptions like ["A new button appeared: Publish To Twitter", "A new input field appeared: Description"]
|
|
326
|
-
|
|
327
|
-
Only return elements that are actionable and could lead to new test scenarios.
|
|
328
|
-
Ignore generic UI changes (loading spinners, timestamps, etc).
|
|
329
|
-
If errors or warnings appeared in the step, include them in the discoveries array.
|
|
330
|
-
If multiple buttons, inputs, links, or other actionable elements appeared in the same step, include all of them in the discoveries array.
|
|
331
|
-
`;
|
|
332
|
-
|
|
333
|
-
return prompt;
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
private determineResult(task: Task): 'success' | 'partial' | 'failed' {
|
|
337
|
-
if ('isSuccessful' in task && (task as any).isSuccessful) return 'success';
|
|
338
|
-
if ('hasAchievedAny' in task && (task as any).hasAchievedAny()) return 'partial';
|
|
339
|
-
|
|
340
|
-
const hasPassedNotes = Object.values(task.notes).some((n) => n.status === 'passed');
|
|
341
|
-
if (hasPassedNotes) return 'partial';
|
|
342
|
-
return 'failed';
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
private extractVisitedUrls(toolExecutions: ToolExecution[], initialUrl: string): string[] {
|
|
346
|
-
const urls = new Set<string>();
|
|
347
|
-
const initialPath = extractStatePath(initialUrl);
|
|
348
|
-
|
|
349
|
-
for (const exec of toolExecutions) {
|
|
350
|
-
const currentUrl = exec.output?.pageDiff?.currentUrl;
|
|
351
|
-
if (!currentUrl) continue;
|
|
352
|
-
|
|
353
|
-
const relativePath = extractStatePath(currentUrl);
|
|
354
|
-
if (relativePath && relativePath !== initialPath) {
|
|
355
|
-
urls.add(relativePath);
|
|
356
|
-
}
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
return [...urls];
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
toCode(conversation: Conversation, scenario: string): string {
|
|
363
|
-
const toolExecutions = conversation.getToolExecutions();
|
|
364
|
-
const TRACKABLE_TOOLS = [...CODECEPT_TOOLS, ...ASSERTION_TOOLS];
|
|
365
|
-
const successfulSteps = toolExecutions.filter((exec) => exec.wasSuccessful && TRACKABLE_TOOLS.includes(exec.toolName as any) && exec.output?.code);
|
|
366
|
-
|
|
367
|
-
if (successfulSteps.length === 0) {
|
|
368
|
-
return '';
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
const lines: string[] = [];
|
|
372
|
-
lines.push(`Scenario('${this.escapeString(scenario)}', ({ I }) => {`);
|
|
373
|
-
|
|
374
|
-
for (const exec of successfulSteps) {
|
|
375
|
-
const explanation = this.getExecutionLabel(exec);
|
|
376
|
-
if (explanation) {
|
|
377
|
-
lines.push('');
|
|
378
|
-
lines.push(` Section('${this.escapeString(explanation)}');`);
|
|
379
|
-
}
|
|
380
|
-
const code = this.stripComments(exec.output.code);
|
|
381
|
-
const codeLines = code.includes('\n') ? code.split('\n') : code.split('; ');
|
|
382
|
-
for (const codeLine of codeLines) {
|
|
383
|
-
const trimmed = codeLine.trim();
|
|
384
|
-
if (trimmed) {
|
|
385
|
-
lines.push(` ${trimmed}`);
|
|
386
|
-
}
|
|
387
|
-
}
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
lines.push('});');
|
|
391
|
-
return lines.join('\n');
|
|
45
|
+
getSavedFiles(): string[] {
|
|
46
|
+
return [...this.savedFiles];
|
|
392
47
|
}
|
|
393
48
|
|
|
394
49
|
savePlanToFile(plan: Plan): string {
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
lines.push(`import step, { Section } from 'codeceptjs/steps';`);
|
|
398
|
-
lines.push('');
|
|
399
|
-
lines.push(`Feature('${this.escapeString(plan.title)}')`);
|
|
400
|
-
lines.push('');
|
|
401
|
-
|
|
402
|
-
const startUrl = plan.url || plan.tests[0]?.startUrl;
|
|
403
|
-
if (startUrl) {
|
|
404
|
-
lines.push('Before(({ I }) => {');
|
|
405
|
-
lines.push(` I.amOnPage('${this.escapeString(startUrl)}');`);
|
|
406
|
-
lines.push(...this.getKnowledgeLines(startUrl));
|
|
407
|
-
lines.push('});');
|
|
408
|
-
lines.push('');
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
for (const test of plan.tests) {
|
|
412
|
-
if (test.generatedCode) {
|
|
413
|
-
if (test.isSuccessful) {
|
|
414
|
-
lines.push(test.generatedCode);
|
|
415
|
-
} else {
|
|
416
|
-
lines.push(`// FAILED: ${test.scenario}`);
|
|
417
|
-
lines.push(test.generatedCode.replace(/Scenario\(/, 'Scenario.skip('));
|
|
418
|
-
}
|
|
419
|
-
lines.push('');
|
|
420
|
-
continue;
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
lines.push(`Scenario.todo('${this.escapeString(test.scenario)}', ({ I }) => {`);
|
|
424
|
-
if (test.plannedSteps.length > 0) {
|
|
425
|
-
for (const step of test.plannedSteps) {
|
|
426
|
-
lines.push(` // ${step}`);
|
|
427
|
-
}
|
|
428
|
-
} else {
|
|
429
|
-
lines.push(` // ${test.scenario}`);
|
|
430
|
-
}
|
|
431
|
-
lines.push('});');
|
|
432
|
-
lines.push('');
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
const testsDir = ConfigParser.getInstance().getTestsDir();
|
|
436
|
-
mkdirSync(testsDir, { recursive: true });
|
|
437
|
-
|
|
438
|
-
const filename = plan.title.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase();
|
|
439
|
-
const filePath = join(testsDir, `${filename}.js`);
|
|
440
|
-
writeFileSync(filePath, lines.join('\n'));
|
|
441
|
-
this.savedFiles.add(filePath);
|
|
442
|
-
|
|
443
|
-
tag('substep').log(`Saved plan tests to: ${filePath}`);
|
|
444
|
-
return filePath;
|
|
50
|
+
return this.isPlaywrightFramework() ? this.savePlaywrightPlanToFile(plan) : this.saveCodeceptPlanToFile(plan);
|
|
445
51
|
}
|
|
446
52
|
|
|
447
53
|
rewriteScenarioInFile(filePath: string, healedSteps: Array<{ test: string; original: string; healed: string }>): void {
|
|
@@ -454,45 +60,6 @@ export class Historian {
|
|
|
454
60
|
|
|
455
61
|
writeFileSync(filePath, content);
|
|
456
62
|
this.savedFiles.add(filePath);
|
|
457
|
-
tag('substep').log(`Updated test file with healed steps: ${filePath}`);
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
private getExecutionLabel(exec: ToolExecution, fallback?: string): string {
|
|
461
|
-
return exec.input?.explanation || exec.input?.assertion || exec.input?.note || fallback || '';
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
private escapeString(str: string): string {
|
|
465
|
-
return str.replace(/'/g, "\\'").replace(/\n/g, ' ');
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
private getKnowledgeLines(url: string, indent = ' '): string[] {
|
|
469
|
-
const knowledgeTracker = new KnowledgeTracker();
|
|
470
|
-
const state = new ActionResult({ url });
|
|
471
|
-
const { wait, waitForElement, code } = knowledgeTracker.getStateParameters(state, ['wait', 'waitForElement', 'code']);
|
|
472
|
-
|
|
473
|
-
const lines: string[] = [];
|
|
474
|
-
if (wait !== undefined) {
|
|
475
|
-
lines.push(`${indent}I.wait(${wait});`);
|
|
476
|
-
}
|
|
477
|
-
if (waitForElement) {
|
|
478
|
-
lines.push(`${indent}I.waitForElement(${JSON.stringify(waitForElement)});`);
|
|
479
|
-
}
|
|
480
|
-
if (code) {
|
|
481
|
-
for (const codeLine of code.split('\n')) {
|
|
482
|
-
const trimmed = codeLine.trim();
|
|
483
|
-
if (trimmed) lines.push(`${indent}${trimmed}`);
|
|
484
|
-
}
|
|
485
|
-
}
|
|
486
|
-
return lines;
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
private stripComments(code: string): string {
|
|
490
|
-
return code
|
|
491
|
-
.split('\n')
|
|
492
|
-
.filter((line) => {
|
|
493
|
-
const trimmed = line.trim();
|
|
494
|
-
return trimmed && !trimmed.startsWith('//') && !trimmed.startsWith('/*') && !trimmed.startsWith('*');
|
|
495
|
-
})
|
|
496
|
-
.join('\n');
|
|
63
|
+
tag('substep').log(`Updated test file with healed steps: ${relativeToCwd(filePath)}`);
|
|
497
64
|
}
|
|
498
65
|
}
|