testchimp-runner-core 0.0.33 → 0.0.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/llm-facade.d.ts.map +1 -1
- package/dist/llm-facade.js +7 -7
- package/dist/llm-facade.js.map +1 -1
- package/dist/llm-provider.d.ts +9 -0
- package/dist/llm-provider.d.ts.map +1 -1
- package/dist/model-constants.d.ts +16 -5
- package/dist/model-constants.d.ts.map +1 -1
- package/dist/model-constants.js +17 -6
- package/dist/model-constants.js.map +1 -1
- package/dist/orchestrator/index.d.ts +1 -1
- package/dist/orchestrator/index.d.ts.map +1 -1
- package/dist/orchestrator/index.js +3 -2
- package/dist/orchestrator/index.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts +0 -8
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +206 -405
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts +20 -0
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -0
- package/dist/orchestrator/orchestrator-prompts.js +455 -0
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -0
- package/dist/orchestrator/tools/index.d.ts +2 -1
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js +4 -2
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/verify-action-result.d.ts +17 -0
- package/dist/orchestrator/tools/verify-action-result.d.ts.map +1 -0
- package/dist/orchestrator/tools/verify-action-result.js +140 -0
- package/dist/orchestrator/tools/verify-action-result.js.map +1 -0
- package/dist/orchestrator/types.d.ts +26 -0
- package/dist/orchestrator/types.d.ts.map +1 -1
- package/dist/orchestrator/types.js.map +1 -1
- package/dist/prompts.d.ts.map +1 -1
- package/dist/prompts.js +40 -34
- package/dist/prompts.js.map +1 -1
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +4 -1
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/utils/coordinate-converter.d.ts +32 -0
- package/dist/utils/coordinate-converter.d.ts.map +1 -0
- package/dist/utils/coordinate-converter.js +130 -0
- package/dist/utils/coordinate-converter.js.map +1 -0
- package/package.json +1 -1
- package/plandocs/BEFORE_AFTER_VERIFICATION.md +148 -0
- package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +144 -0
- package/plandocs/IMPLEMENTATION_STATUS.md +108 -0
- package/plandocs/PHASE_1_COMPLETE.md +165 -0
- package/plandocs/PHASE_1_SUMMARY.md +184 -0
- package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +120 -0
- package/plandocs/PROMPT_SANITY_CHECK.md +120 -0
- package/plandocs/SESSION_SUMMARY_v0.0.33.md +151 -0
- package/plandocs/TROUBLESHOOTING_SESSION.md +72 -0
- package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +396 -0
- package/plandocs/WHATS_NEW_v0.0.33.md +183 -0
- package/src/llm-facade.ts +8 -8
- package/src/llm-provider.ts +11 -1
- package/src/model-constants.ts +17 -5
- package/src/orchestrator/index.ts +3 -2
- package/src/orchestrator/orchestrator-agent.ts +249 -424
- package/src/orchestrator/orchestrator-agent.ts.backup +1386 -0
- package/src/orchestrator/orchestrator-prompts.ts +474 -0
- package/src/orchestrator/tools/index.ts +2 -1
- package/src/orchestrator/tools/verify-action-result.ts +159 -0
- package/src/orchestrator/types.ts +48 -0
- package/src/prompts.ts +40 -34
- package/src/scenario-worker-class.ts +7 -2
- package/src/utils/coordinate-converter.ts +162 -0
- package/testchimp-runner-core-0.0.33.tgz +0 -0
- /package/{CREDIT_CALLBACK_ARCHITECTURE.md → plandocs/CREDIT_CALLBACK_ARCHITECTURE.md} +0 -0
- /package/{INTEGRATION_COMPLETE.md → plandocs/INTEGRATION_COMPLETE.md} +0 -0
- /package/{VISION_DIAGNOSTICS_IMPROVEMENTS.md → plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md} +0 -0
|
@@ -12,6 +12,42 @@ export interface SelfReflection {
|
|
|
12
12
|
loopReasoning?: string; // "Tried text-based selectors 3 times, switching to IDs"
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
/**
|
|
16
|
+
* Note to future self - iteration-specific memory for tactical continuity
|
|
17
|
+
* More flexible than structured SelfReflection - agent can write freely
|
|
18
|
+
*/
|
|
19
|
+
export interface NoteToFutureSelf {
|
|
20
|
+
fromIteration: number;
|
|
21
|
+
content: string; // FREE-FORM - agent writes whatever tactical info it needs for next iteration
|
|
22
|
+
// Examples:
|
|
23
|
+
// - "Tried #menu-btn, failed. Will try SVG child next."
|
|
24
|
+
// - "Plan: Hover over menu to reveal dropdown, then click Profile option."
|
|
25
|
+
// - "Element loads async. Wait 2s after page load before clicking."
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Coordinate-based action (fallback when selectors fail)
|
|
30
|
+
* Uses percentage-based positioning for resolution independence
|
|
31
|
+
*/
|
|
32
|
+
export interface CoordinateAction {
|
|
33
|
+
type: 'coordinate';
|
|
34
|
+
action: 'click' | 'doubleClick' | 'rightClick' | 'hover' | 'drag' | 'fill' | 'scroll';
|
|
35
|
+
|
|
36
|
+
// Primary coordinates as percentages (0.0 to 100.0, 3 decimal precision for ~1px accuracy)
|
|
37
|
+
xPercent: number; // 0 = left edge, 100 = right edge
|
|
38
|
+
yPercent: number; // 0 = top edge, 100 = bottom edge
|
|
39
|
+
|
|
40
|
+
// For drag actions
|
|
41
|
+
toXPercent?: number;
|
|
42
|
+
toYPercent?: number;
|
|
43
|
+
|
|
44
|
+
// For fill actions (click then type)
|
|
45
|
+
value?: string;
|
|
46
|
+
|
|
47
|
+
// For scroll actions
|
|
48
|
+
scrollAmount?: number; // Positive = scroll down, negative = scroll up
|
|
49
|
+
}
|
|
50
|
+
|
|
15
51
|
/**
|
|
16
52
|
* Journey memory - tracks the agent's journey through the scenario
|
|
17
53
|
*/
|
|
@@ -24,6 +60,9 @@ export interface JourneyMemory {
|
|
|
24
60
|
|
|
25
61
|
// Data extracted and saved for later steps
|
|
26
62
|
extractedData: Record<string, string>;
|
|
63
|
+
|
|
64
|
+
// Latest note from agent (persists across steps for continuity)
|
|
65
|
+
latestNote?: NoteToFutureSelf;
|
|
27
66
|
}
|
|
28
67
|
|
|
29
68
|
/**
|
|
@@ -75,6 +114,12 @@ export interface AgentDecision {
|
|
|
75
114
|
// Self-reflection for next iteration
|
|
76
115
|
selfReflection?: SelfReflection;
|
|
77
116
|
|
|
117
|
+
// Note to future self (NEW - free-form iteration memory)
|
|
118
|
+
noteToFutureSelf?: string; // Free-form tactical note for next iteration
|
|
119
|
+
|
|
120
|
+
// Coordinate-based action (NEW - fallback when selectors fail)
|
|
121
|
+
coordinateAction?: CoordinateAction;
|
|
122
|
+
|
|
78
123
|
// Memory updates
|
|
79
124
|
memoryUpdate?: {
|
|
80
125
|
action: string;
|
|
@@ -130,6 +175,9 @@ export interface AgentContext {
|
|
|
130
175
|
// Self-reflection from previous iteration
|
|
131
176
|
previousIterationGuidance?: SelfReflection;
|
|
132
177
|
|
|
178
|
+
// Note from previous iteration (NEW - tactical continuity)
|
|
179
|
+
noteFromPreviousIteration?: NoteToFutureSelf;
|
|
180
|
+
|
|
133
181
|
// Tool results from this iteration (if any)
|
|
134
182
|
toolResults?: Record<string, ToolResult>;
|
|
135
183
|
}
|
package/src/prompts.ts
CHANGED
|
@@ -329,10 +329,10 @@ ${pageInfo.formattedElements}
|
|
|
329
329
|
|
|
330
330
|
CRITICAL: Extract and use exact values mentioned in the goal.
|
|
331
331
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
332
|
+
Examples:
|
|
333
|
+
- Goal: "Login with credentials: alice, pass123"
|
|
334
|
+
✅ Use: await page.fill('username', 'alice'); await page.fill('password', 'pass123');
|
|
335
|
+
❌ NOT: await page.fill('username', process.env.USERNAME);
|
|
336
336
|
|
|
337
337
|
- Goal: "Enter name: John Doe"
|
|
338
338
|
✅ Use: await page.fill('[name="name"]', 'John Doe');
|
|
@@ -362,36 +362,42 @@ ${pageInfo.formattedElements}
|
|
|
362
362
|
|
|
363
363
|
Playwright throws "strict mode violation" when a selector matches MULTIPLE elements.
|
|
364
364
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
365
|
+
**PROACTIVE DETECTION** - Check BEFORE generating command:
|
|
366
|
+
- Review the "CURRENT PAGE STATE" section below (accessibility tree / aria snapshot)
|
|
367
|
+
- Look for duplicate elements with same role/text (e.g., multiple links with "Settings")
|
|
368
|
+
- If duplicates exist, generate a MORE SPECIFIC selector from the start
|
|
369
|
+
- Don't wait for strict mode error - prevent it by analyzing the DOM structure
|
|
370
|
+
|
|
371
|
+
🚨 ERROR PATTERNS:
|
|
372
|
+
- "strict mode violation" → Your selector matched multiple elements
|
|
373
|
+
- "Multiple elements found" → Same issue
|
|
374
|
+
- "locator resolved to 2 elements" → Often one is a tooltip/popover with duplicate text
|
|
375
|
+
- Command chain with multiple strategies → Sign of selector problems
|
|
376
|
+
|
|
377
|
+
✅ SOLUTIONS (in order of preference):
|
|
378
|
+
|
|
379
|
+
**Option 1: Use Role-Based Selectors** (BEST - avoids tooltips):
|
|
380
|
+
- BAD: page.getByText('Settings').click() → Matches button AND its tooltip
|
|
381
|
+
- GOOD: page.getByRole('button', { name: 'Settings' }).click() → Only matches button role
|
|
382
|
+
- GOOD: page.locator('button').getByText('Settings').click() → Scoped to button tag
|
|
383
|
+
- GOOD: page.locator('[role="button"]').getByText('Settings').click() → Scoped to role
|
|
384
|
+
|
|
385
|
+
**Option 2: Scope to Container**:
|
|
386
|
+
- BAD: page.locator('a', { hasText: 'Settings' }).click() → Matches multiple links
|
|
387
|
+
- GOOD: page.locator('nav a', { hasText: 'Settings' }).click() → Scoped to nav
|
|
388
|
+
- GOOD: page.locator('a[href*="/settings"]').click() → Use unique attribute
|
|
389
|
+
|
|
390
|
+
**Option 2: Use Position-Based Selection**:
|
|
391
|
+
- If multiple matches are expected: page.locator('a', { hasText: 'Settings' }).first().click()
|
|
392
|
+
- Or use: .nth(0) for first, .last() for last
|
|
393
|
+
|
|
394
|
+
**Option 3: Filter by Visibility/State**:
|
|
395
|
+
- page.locator('button', { hasText: 'Submit' }).filter({ hasNotText: 'Draft' }).click()
|
|
396
|
+
|
|
397
|
+
🚫 **ANTI-PATTERNS (DON'T DO THIS)**:
|
|
398
|
+
- BAD: Chaining multiple selector strategies in one command with semicolons
|
|
399
|
+
- BAD: Using page.evaluate() to find/click elements (defeats Playwright's auto-waiting)
|
|
400
|
+
- GOOD: ONE clear, specific selector like page.locator('nav a', { hasText: 'Settings' }).click()
|
|
395
401
|
|
|
396
402
|
**When You See Strict Mode Errors:**
|
|
397
403
|
1. Analyze - Why did my selector match multiple elements?
|
|
@@ -21,7 +21,8 @@ import {
|
|
|
21
21
|
RecallHistoryTool,
|
|
22
22
|
InspectPageTool,
|
|
23
23
|
CheckPageReadyTool,
|
|
24
|
-
ExtractDataTool
|
|
24
|
+
ExtractDataTool,
|
|
25
|
+
VerifyActionResultTool
|
|
25
26
|
} from './orchestrator';
|
|
26
27
|
|
|
27
28
|
// Define a simple logging interface for compatibility
|
|
@@ -104,12 +105,16 @@ export class ScenarioWorker extends EventEmitter {
|
|
|
104
105
|
const takeScreenshotTool = new TakeScreenshotTool();
|
|
105
106
|
takeScreenshotTool.setLLMFacade(this.llmFacade); // Inject LLM for vision analysis
|
|
106
107
|
|
|
107
|
-
|
|
108
|
+
const verifyActionTool = new VerifyActionResultTool();
|
|
109
|
+
verifyActionTool.setLLMFacade(this.llmFacade); // Inject LLM for vision comparison
|
|
110
|
+
|
|
111
|
+
// Register 6 information-gathering tools (state changes via Playwright commands)
|
|
108
112
|
this.toolRegistry.register(takeScreenshotTool);
|
|
109
113
|
this.toolRegistry.register(new RecallHistoryTool());
|
|
110
114
|
this.toolRegistry.register(new InspectPageTool());
|
|
111
115
|
this.toolRegistry.register(new CheckPageReadyTool());
|
|
112
116
|
this.toolRegistry.register(new ExtractDataTool());
|
|
117
|
+
this.toolRegistry.register(verifyActionTool);
|
|
113
118
|
|
|
114
119
|
// Create orchestrator agent
|
|
115
120
|
this.orchestratorAgent = new OrchestratorAgent(
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coordinate Converter Utility
|
|
3
|
+
* Converts percentage-based coordinates to pixel coordinates and generates Playwright commands
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { CoordinateAction } from '../orchestrator/types';
|
|
7
|
+
|
|
8
|
+
export class CoordinateConverter {
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Convert percentage coordinates to actual pixel coordinates
|
|
12
|
+
*/
|
|
13
|
+
static percentToPixels(
|
|
14
|
+
xPercent: number,
|
|
15
|
+
yPercent: number,
|
|
16
|
+
viewportWidth: number,
|
|
17
|
+
viewportHeight: number
|
|
18
|
+
): { x: number; y: number } {
|
|
19
|
+
return {
|
|
20
|
+
x: Math.round((xPercent / 100) * viewportWidth),
|
|
21
|
+
y: Math.round((yPercent / 100) * viewportHeight)
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Get viewport dimensions from page
|
|
27
|
+
*/
|
|
28
|
+
static async getViewportSize(page: any): Promise<{ width: number; height: number }> {
|
|
29
|
+
return await page.evaluate((): { width: number; height: number } => {
|
|
30
|
+
const win = (globalThis as any).window;
|
|
31
|
+
return {
|
|
32
|
+
width: win.innerWidth as number,
|
|
33
|
+
height: win.innerHeight as number
|
|
34
|
+
};
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Convert coordinate action with percentages to Playwright commands
|
|
40
|
+
* Returns array of command strings
|
|
41
|
+
*/
|
|
42
|
+
static async generateCommands(
|
|
43
|
+
action: CoordinateAction,
|
|
44
|
+
page: any
|
|
45
|
+
): Promise<string[]> {
|
|
46
|
+
const viewport = await this.getViewportSize(page);
|
|
47
|
+
const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
|
|
48
|
+
|
|
49
|
+
const commands: string[] = [];
|
|
50
|
+
|
|
51
|
+
switch (action.action) {
|
|
52
|
+
case 'click':
|
|
53
|
+
commands.push(`await page.mouse.click(${x}, ${y});`);
|
|
54
|
+
break;
|
|
55
|
+
|
|
56
|
+
case 'doubleClick':
|
|
57
|
+
commands.push(`await page.mouse.dblclick(${x}, ${y});`);
|
|
58
|
+
break;
|
|
59
|
+
|
|
60
|
+
case 'rightClick':
|
|
61
|
+
commands.push(`await page.mouse.click(${x}, ${y}, { button: 'right' });`);
|
|
62
|
+
break;
|
|
63
|
+
|
|
64
|
+
case 'hover':
|
|
65
|
+
commands.push(`await page.mouse.move(${x}, ${y});`);
|
|
66
|
+
break;
|
|
67
|
+
|
|
68
|
+
case 'drag':
|
|
69
|
+
if (action.toXPercent === undefined || action.toYPercent === undefined) {
|
|
70
|
+
throw new Error('Drag action requires toXPercent and toYPercent');
|
|
71
|
+
}
|
|
72
|
+
const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
|
|
73
|
+
commands.push(`await page.mouse.move(${x}, ${y});`);
|
|
74
|
+
commands.push(`await page.mouse.down();`);
|
|
75
|
+
commands.push(`await page.mouse.move(${to.x}, ${to.y});`);
|
|
76
|
+
commands.push(`await page.mouse.up();`);
|
|
77
|
+
break;
|
|
78
|
+
|
|
79
|
+
case 'fill':
|
|
80
|
+
if (!action.value) {
|
|
81
|
+
throw new Error('Fill action requires value');
|
|
82
|
+
}
|
|
83
|
+
// Click to focus, wait briefly, then type
|
|
84
|
+
commands.push(`await page.mouse.click(${x}, ${y});`);
|
|
85
|
+
commands.push(`await page.waitForTimeout(100);`);
|
|
86
|
+
commands.push(`await page.keyboard.type(${JSON.stringify(action.value)});`);
|
|
87
|
+
break;
|
|
88
|
+
|
|
89
|
+
case 'scroll':
|
|
90
|
+
const scrollAmount = action.scrollAmount || 100;
|
|
91
|
+
// Move to position, then scroll
|
|
92
|
+
commands.push(`await page.mouse.move(${x}, ${y});`);
|
|
93
|
+
commands.push(`await page.mouse.wheel(0, ${scrollAmount});`);
|
|
94
|
+
break;
|
|
95
|
+
|
|
96
|
+
default:
|
|
97
|
+
throw new Error(`Unknown coordinate action: ${action.action}`);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return commands;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Execute coordinate action directly on page
|
|
105
|
+
* Used during agent execution (converts and runs immediately)
|
|
106
|
+
*/
|
|
107
|
+
static async executeAction(
|
|
108
|
+
action: CoordinateAction,
|
|
109
|
+
page: any
|
|
110
|
+
): Promise<void> {
|
|
111
|
+
const viewport = await this.getViewportSize(page);
|
|
112
|
+
const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
|
|
113
|
+
|
|
114
|
+
switch (action.action) {
|
|
115
|
+
case 'click':
|
|
116
|
+
await page.mouse.click(x, y);
|
|
117
|
+
break;
|
|
118
|
+
|
|
119
|
+
case 'doubleClick':
|
|
120
|
+
await page.mouse.dblclick(x, y);
|
|
121
|
+
break;
|
|
122
|
+
|
|
123
|
+
case 'rightClick':
|
|
124
|
+
await page.mouse.click(x, y, { button: 'right' });
|
|
125
|
+
break;
|
|
126
|
+
|
|
127
|
+
case 'hover':
|
|
128
|
+
await page.mouse.move(x, y);
|
|
129
|
+
break;
|
|
130
|
+
|
|
131
|
+
case 'drag':
|
|
132
|
+
if (action.toXPercent === undefined || action.toYPercent === undefined) {
|
|
133
|
+
throw new Error('Drag requires toXPercent and toYPercent');
|
|
134
|
+
}
|
|
135
|
+
const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
|
|
136
|
+
await page.mouse.move(x, y);
|
|
137
|
+
await page.mouse.down();
|
|
138
|
+
await page.mouse.move(to.x, to.y);
|
|
139
|
+
await page.mouse.up();
|
|
140
|
+
break;
|
|
141
|
+
|
|
142
|
+
case 'fill':
|
|
143
|
+
if (!action.value) {
|
|
144
|
+
throw new Error('Fill requires value');
|
|
145
|
+
}
|
|
146
|
+
await page.mouse.click(x, y);
|
|
147
|
+
await page.waitForTimeout(100);
|
|
148
|
+
await page.keyboard.type(action.value);
|
|
149
|
+
break;
|
|
150
|
+
|
|
151
|
+
case 'scroll':
|
|
152
|
+
const scrollAmount = action.scrollAmount || 100;
|
|
153
|
+
await page.mouse.move(x, y);
|
|
154
|
+
await page.mouse.wheel(0, scrollAmount);
|
|
155
|
+
break;
|
|
156
|
+
|
|
157
|
+
default:
|
|
158
|
+
throw new Error(`Unknown coordinate action: ${action.action}`);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|