testchimp-runner-core 0.0.33 → 0.0.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/llm-facade.d.ts.map +1 -1
  2. package/dist/llm-facade.js +7 -7
  3. package/dist/llm-facade.js.map +1 -1
  4. package/dist/llm-provider.d.ts +9 -0
  5. package/dist/llm-provider.d.ts.map +1 -1
  6. package/dist/model-constants.d.ts +16 -5
  7. package/dist/model-constants.d.ts.map +1 -1
  8. package/dist/model-constants.js +17 -6
  9. package/dist/model-constants.js.map +1 -1
  10. package/dist/orchestrator/index.d.ts +1 -1
  11. package/dist/orchestrator/index.d.ts.map +1 -1
  12. package/dist/orchestrator/index.js +3 -2
  13. package/dist/orchestrator/index.js.map +1 -1
  14. package/dist/orchestrator/orchestrator-agent.d.ts +0 -8
  15. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  16. package/dist/orchestrator/orchestrator-agent.js +206 -405
  17. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  18. package/dist/orchestrator/orchestrator-prompts.d.ts +20 -0
  19. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -0
  20. package/dist/orchestrator/orchestrator-prompts.js +455 -0
  21. package/dist/orchestrator/orchestrator-prompts.js.map +1 -0
  22. package/dist/orchestrator/tools/index.d.ts +2 -1
  23. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  24. package/dist/orchestrator/tools/index.js +4 -2
  25. package/dist/orchestrator/tools/index.js.map +1 -1
  26. package/dist/orchestrator/tools/verify-action-result.d.ts +17 -0
  27. package/dist/orchestrator/tools/verify-action-result.d.ts.map +1 -0
  28. package/dist/orchestrator/tools/verify-action-result.js +140 -0
  29. package/dist/orchestrator/tools/verify-action-result.js.map +1 -0
  30. package/dist/orchestrator/types.d.ts +26 -0
  31. package/dist/orchestrator/types.d.ts.map +1 -1
  32. package/dist/orchestrator/types.js.map +1 -1
  33. package/dist/prompts.d.ts.map +1 -1
  34. package/dist/prompts.js +40 -34
  35. package/dist/prompts.js.map +1 -1
  36. package/dist/scenario-worker-class.d.ts.map +1 -1
  37. package/dist/scenario-worker-class.js +4 -1
  38. package/dist/scenario-worker-class.js.map +1 -1
  39. package/dist/utils/coordinate-converter.d.ts +32 -0
  40. package/dist/utils/coordinate-converter.d.ts.map +1 -0
  41. package/dist/utils/coordinate-converter.js +130 -0
  42. package/dist/utils/coordinate-converter.js.map +1 -0
  43. package/package.json +1 -1
  44. package/plandocs/BEFORE_AFTER_VERIFICATION.md +148 -0
  45. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +144 -0
  46. package/plandocs/IMPLEMENTATION_STATUS.md +108 -0
  47. package/plandocs/PHASE_1_COMPLETE.md +165 -0
  48. package/plandocs/PHASE_1_SUMMARY.md +184 -0
  49. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +120 -0
  50. package/plandocs/PROMPT_SANITY_CHECK.md +120 -0
  51. package/plandocs/SESSION_SUMMARY_v0.0.33.md +151 -0
  52. package/plandocs/TROUBLESHOOTING_SESSION.md +72 -0
  53. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +396 -0
  54. package/plandocs/WHATS_NEW_v0.0.33.md +183 -0
  55. package/src/llm-facade.ts +8 -8
  56. package/src/llm-provider.ts +11 -1
  57. package/src/model-constants.ts +17 -5
  58. package/src/orchestrator/index.ts +3 -2
  59. package/src/orchestrator/orchestrator-agent.ts +249 -424
  60. package/src/orchestrator/orchestrator-agent.ts.backup +1386 -0
  61. package/src/orchestrator/orchestrator-prompts.ts +474 -0
  62. package/src/orchestrator/tools/index.ts +2 -1
  63. package/src/orchestrator/tools/verify-action-result.ts +159 -0
  64. package/src/orchestrator/types.ts +48 -0
  65. package/src/prompts.ts +40 -34
  66. package/src/scenario-worker-class.ts +7 -2
  67. package/src/utils/coordinate-converter.ts +162 -0
  68. package/testchimp-runner-core-0.0.33.tgz +0 -0
  69. /package/{CREDIT_CALLBACK_ARCHITECTURE.md → plandocs/CREDIT_CALLBACK_ARCHITECTURE.md} +0 -0
  70. /package/{INTEGRATION_COMPLETE.md → plandocs/INTEGRATION_COMPLETE.md} +0 -0
  71. /package/{VISION_DIAGNOSTICS_IMPROVEMENTS.md → plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md} +0 -0
@@ -12,6 +12,42 @@ export interface SelfReflection {
12
12
  loopReasoning?: string; // "Tried text-based selectors 3 times, switching to IDs"
13
13
  }
14
14
 
15
+ /**
16
+ * Note to future self - iteration-specific memory for tactical continuity
17
+ * More flexible than structured SelfReflection - agent can write freely
18
+ */
19
+ export interface NoteToFutureSelf {
20
+ fromIteration: number;
21
+ content: string; // FREE-FORM - agent writes whatever tactical info it needs for next iteration
22
+ // Examples:
23
+ // - "Tried #menu-btn, failed. Will try SVG child next."
24
+ // - "Plan: Hover over menu to reveal dropdown, then click Profile option."
25
+ // - "Element loads async. Wait 2s after page load before clicking."
26
+ }
27
+
28
+ /**
29
+ * Coordinate-based action (fallback when selectors fail)
30
+ * Uses percentage-based positioning for resolution independence
31
+ */
32
+ export interface CoordinateAction {
33
+ type: 'coordinate';
34
+ action: 'click' | 'doubleClick' | 'rightClick' | 'hover' | 'drag' | 'fill' | 'scroll';
35
+
36
+ // Primary coordinates as percentages (0.0 to 100.0, 3 decimal precision for ~1px accuracy)
37
+ xPercent: number; // 0 = left edge, 100 = right edge
38
+ yPercent: number; // 0 = top edge, 100 = bottom edge
39
+
40
+ // For drag actions
41
+ toXPercent?: number;
42
+ toYPercent?: number;
43
+
44
+ // For fill actions (click then type)
45
+ value?: string;
46
+
47
+ // For scroll actions
48
+ scrollAmount?: number; // Positive = scroll down, negative = scroll up
49
+ }
50
+
15
51
  /**
16
52
  * Journey memory - tracks the agent's journey through the scenario
17
53
  */
@@ -24,6 +60,9 @@ export interface JourneyMemory {
24
60
 
25
61
  // Data extracted and saved for later steps
26
62
  extractedData: Record<string, string>;
63
+
64
+ // Latest note from agent (persists across steps for continuity)
65
+ latestNote?: NoteToFutureSelf;
27
66
  }
28
67
 
29
68
  /**
@@ -75,6 +114,12 @@ export interface AgentDecision {
75
114
  // Self-reflection for next iteration
76
115
  selfReflection?: SelfReflection;
77
116
 
117
+ // Note to future self (NEW - free-form iteration memory)
118
+ noteToFutureSelf?: string; // Free-form tactical note for next iteration
119
+
120
+ // Coordinate-based action (NEW - fallback when selectors fail)
121
+ coordinateAction?: CoordinateAction;
122
+
78
123
  // Memory updates
79
124
  memoryUpdate?: {
80
125
  action: string;
@@ -130,6 +175,9 @@ export interface AgentContext {
130
175
  // Self-reflection from previous iteration
131
176
  previousIterationGuidance?: SelfReflection;
132
177
 
178
+ // Note from previous iteration (NEW - tactical continuity)
179
+ noteFromPreviousIteration?: NoteToFutureSelf;
180
+
133
181
  // Tool results from this iteration (if any)
134
182
  toolResults?: Record<string, ToolResult>;
135
183
  }
package/src/prompts.ts CHANGED
@@ -329,10 +329,10 @@ ${pageInfo.formattedElements}
329
329
 
330
330
  CRITICAL: Extract and use exact values mentioned in the goal.
331
331
 
332
- Examples:
333
- - Goal: "Login with credentials: Willy, Willy@1234"
334
- ✅ Use: await page.fill('username', 'Willy'); await page.fill('password', 'Willy@1234');
335
- ❌ NOT: await page.fill('username', process.env.USERNAME);
332
+ Examples:
333
+ - Goal: "Login with credentials: alice, pass123"
334
+ ✅ Use: await page.fill('username', 'alice'); await page.fill('password', 'pass123');
335
+ ❌ NOT: await page.fill('username', process.env.USERNAME);
336
336
 
337
337
  - Goal: "Enter name: John Doe"
338
338
  ✅ Use: await page.fill('[name="name"]', 'John Doe');
@@ -362,36 +362,42 @@ ${pageInfo.formattedElements}
362
362
 
363
363
  Playwright throws "strict mode violation" when a selector matches MULTIPLE elements.
364
364
 
365
- **PROACTIVE DETECTION** - Check BEFORE generating command:
366
- - Review the "CURRENT PAGE STATE" section below (accessibility tree / aria snapshot)
367
- - Look for duplicate elements with same role/text (e.g., multiple links with "Employee Information")
368
- - If duplicates exist, generate a MORE SPECIFIC selector from the start
369
- - Don't wait for strict mode error - prevent it by analyzing the DOM structure
370
-
371
- 🚨 ERROR PATTERNS:
372
- - "strict mode violation" → Your selector matched multiple elements
373
- - "Multiple elements found" → Same issue
374
- - Command chain with multiple strategiesSign of selector problems
375
-
376
- ✅ SOLUTIONS (in order of preference):
377
-
378
- **Option 1: Be More Specific** (BEST):
379
- - BAD: page.locator('a', { hasText: 'Employee Information' }).click() Matches multiple links
380
- - GOOD: page.locator('nav a', { hasText: 'Employee Information' }).click() → Scoped to nav
381
- - GOOD: page.getByRole('navigation').getByRole('link', { name: 'Employee Information' }).click() → Role-based scoping
382
- - GOOD: page.locator('a[href*="/employee"]', { hasText: 'Employee Information' }).click() → Combined attributes
383
-
384
- **Option 2: Use Position-Based Selection**:
385
- - If multiple matches are expected: page.locator('a', { hasText: 'Employee Information' }).first().click()
386
- - Or use: .nth(0) for first, .last() for last
387
-
388
- **Option 3: Filter by Visibility/State**:
389
- - page.locator('button', { hasText: 'Submit' }).filter({ hasNotText: 'Draft' }).click()
390
-
391
- 🚫 **ANTI-PATTERNS (DON'T DO THIS)**:
392
- - BAD: Chaining multiple selector strategies in one command with semicolons
393
- - BAD: Using page.evaluate() to find/click elements (defeats Playwright's auto-waiting)
394
- - GOOD: ONE clear, specific selector like page.locator('nav a', { hasText: 'Employee Information' }).click()
365
+ **PROACTIVE DETECTION** - Check BEFORE generating command:
366
+ - Review the "CURRENT PAGE STATE" section below (accessibility tree / aria snapshot)
367
+ - Look for duplicate elements with same role/text (e.g., multiple links with "Settings")
368
+ - If duplicates exist, generate a MORE SPECIFIC selector from the start
369
+ - Don't wait for strict mode error - prevent it by analyzing the DOM structure
370
+
371
+ 🚨 ERROR PATTERNS:
372
+ - "strict mode violation" → Your selector matched multiple elements
373
+ - "Multiple elements found" → Same issue
374
+ - "locator resolved to 2 elements"Often one is a tooltip/popover with duplicate text
375
+ - Command chain with multiple strategies → Sign of selector problems
376
+
377
+ ✅ SOLUTIONS (in order of preference):
378
+
379
+ **Option 1: Use Role-Based Selectors** (BEST - avoids tooltips):
380
+ - BAD: page.getByText('Settings').click() → Matches button AND its tooltip
381
+ - GOOD: page.getByRole('button', { name: 'Settings' }).click() → Only matches button role
382
+ - GOOD: page.locator('button').getByText('Settings').click() → Scoped to button tag
383
+ - GOOD: page.locator('[role="button"]').getByText('Settings').click() → Scoped to role
384
+
385
+ **Option 2: Scope to Container**:
386
+ - BAD: page.locator('a', { hasText: 'Settings' }).click() Matches multiple links
387
+ - GOOD: page.locator('nav a', { hasText: 'Settings' }).click() → Scoped to nav
388
+ - GOOD: page.locator('a[href*="/settings"]').click() Use unique attribute
389
+
390
+ **Option 2: Use Position-Based Selection**:
391
+ - If multiple matches are expected: page.locator('a', { hasText: 'Settings' }).first().click()
392
+ - Or use: .nth(0) for first, .last() for last
393
+
394
+ **Option 3: Filter by Visibility/State**:
395
+ - page.locator('button', { hasText: 'Submit' }).filter({ hasNotText: 'Draft' }).click()
396
+
397
+ 🚫 **ANTI-PATTERNS (DON'T DO THIS)**:
398
+ - BAD: Chaining multiple selector strategies in one command with semicolons
399
+ - BAD: Using page.evaluate() to find/click elements (defeats Playwright's auto-waiting)
400
+ - GOOD: ONE clear, specific selector like page.locator('nav a', { hasText: 'Settings' }).click()
395
401
 
396
402
  **When You See Strict Mode Errors:**
397
403
  1. Analyze - Why did my selector match multiple elements?
@@ -21,7 +21,8 @@ import {
21
21
  RecallHistoryTool,
22
22
  InspectPageTool,
23
23
  CheckPageReadyTool,
24
- ExtractDataTool
24
+ ExtractDataTool,
25
+ VerifyActionResultTool
25
26
  } from './orchestrator';
26
27
 
27
28
  // Define a simple logging interface for compatibility
@@ -104,12 +105,16 @@ export class ScenarioWorker extends EventEmitter {
104
105
  const takeScreenshotTool = new TakeScreenshotTool();
105
106
  takeScreenshotTool.setLLMFacade(this.llmFacade); // Inject LLM for vision analysis
106
107
 
107
- // Register 5 information-gathering tools (state changes via Playwright commands)
108
+ const verifyActionTool = new VerifyActionResultTool();
109
+ verifyActionTool.setLLMFacade(this.llmFacade); // Inject LLM for vision comparison
110
+
111
+ // Register 6 information-gathering tools (state changes via Playwright commands)
108
112
  this.toolRegistry.register(takeScreenshotTool);
109
113
  this.toolRegistry.register(new RecallHistoryTool());
110
114
  this.toolRegistry.register(new InspectPageTool());
111
115
  this.toolRegistry.register(new CheckPageReadyTool());
112
116
  this.toolRegistry.register(new ExtractDataTool());
117
+ this.toolRegistry.register(verifyActionTool);
113
118
 
114
119
  // Create orchestrator agent
115
120
  this.orchestratorAgent = new OrchestratorAgent(
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Coordinate Converter Utility
3
+ * Converts percentage-based coordinates to pixel coordinates and generates Playwright commands
4
+ */
5
+
6
+ import { CoordinateAction } from '../orchestrator/types';
7
+
8
+ export class CoordinateConverter {
9
+
10
+ /**
11
+ * Convert percentage coordinates to actual pixel coordinates
12
+ */
13
+ static percentToPixels(
14
+ xPercent: number,
15
+ yPercent: number,
16
+ viewportWidth: number,
17
+ viewportHeight: number
18
+ ): { x: number; y: number } {
19
+ return {
20
+ x: Math.round((xPercent / 100) * viewportWidth),
21
+ y: Math.round((yPercent / 100) * viewportHeight)
22
+ };
23
+ }
24
+
25
+ /**
26
+ * Get viewport dimensions from page
27
+ */
28
+ static async getViewportSize(page: any): Promise<{ width: number; height: number }> {
29
+ return await page.evaluate((): { width: number; height: number } => {
30
+ const win = (globalThis as any).window;
31
+ return {
32
+ width: win.innerWidth as number,
33
+ height: win.innerHeight as number
34
+ };
35
+ });
36
+ }
37
+
38
+ /**
39
+ * Convert coordinate action with percentages to Playwright commands
40
+ * Returns array of command strings
41
+ */
42
+ static async generateCommands(
43
+ action: CoordinateAction,
44
+ page: any
45
+ ): Promise<string[]> {
46
+ const viewport = await this.getViewportSize(page);
47
+ const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
48
+
49
+ const commands: string[] = [];
50
+
51
+ switch (action.action) {
52
+ case 'click':
53
+ commands.push(`await page.mouse.click(${x}, ${y});`);
54
+ break;
55
+
56
+ case 'doubleClick':
57
+ commands.push(`await page.mouse.dblclick(${x}, ${y});`);
58
+ break;
59
+
60
+ case 'rightClick':
61
+ commands.push(`await page.mouse.click(${x}, ${y}, { button: 'right' });`);
62
+ break;
63
+
64
+ case 'hover':
65
+ commands.push(`await page.mouse.move(${x}, ${y});`);
66
+ break;
67
+
68
+ case 'drag':
69
+ if (action.toXPercent === undefined || action.toYPercent === undefined) {
70
+ throw new Error('Drag action requires toXPercent and toYPercent');
71
+ }
72
+ const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
73
+ commands.push(`await page.mouse.move(${x}, ${y});`);
74
+ commands.push(`await page.mouse.down();`);
75
+ commands.push(`await page.mouse.move(${to.x}, ${to.y});`);
76
+ commands.push(`await page.mouse.up();`);
77
+ break;
78
+
79
+ case 'fill':
80
+ if (!action.value) {
81
+ throw new Error('Fill action requires value');
82
+ }
83
+ // Click to focus, wait briefly, then type
84
+ commands.push(`await page.mouse.click(${x}, ${y});`);
85
+ commands.push(`await page.waitForTimeout(100);`);
86
+ commands.push(`await page.keyboard.type(${JSON.stringify(action.value)});`);
87
+ break;
88
+
89
+ case 'scroll':
90
+ const scrollAmount = action.scrollAmount || 100;
91
+ // Move to position, then scroll
92
+ commands.push(`await page.mouse.move(${x}, ${y});`);
93
+ commands.push(`await page.mouse.wheel(0, ${scrollAmount});`);
94
+ break;
95
+
96
+ default:
97
+ throw new Error(`Unknown coordinate action: ${action.action}`);
98
+ }
99
+
100
+ return commands;
101
+ }
102
+
103
+ /**
104
+ * Execute coordinate action directly on page
105
+ * Used during agent execution (converts and runs immediately)
106
+ */
107
+ static async executeAction(
108
+ action: CoordinateAction,
109
+ page: any
110
+ ): Promise<void> {
111
+ const viewport = await this.getViewportSize(page);
112
+ const { x, y } = this.percentToPixels(action.xPercent, action.yPercent, viewport.width, viewport.height);
113
+
114
+ switch (action.action) {
115
+ case 'click':
116
+ await page.mouse.click(x, y);
117
+ break;
118
+
119
+ case 'doubleClick':
120
+ await page.mouse.dblclick(x, y);
121
+ break;
122
+
123
+ case 'rightClick':
124
+ await page.mouse.click(x, y, { button: 'right' });
125
+ break;
126
+
127
+ case 'hover':
128
+ await page.mouse.move(x, y);
129
+ break;
130
+
131
+ case 'drag':
132
+ if (action.toXPercent === undefined || action.toYPercent === undefined) {
133
+ throw new Error('Drag requires toXPercent and toYPercent');
134
+ }
135
+ const to = this.percentToPixels(action.toXPercent, action.toYPercent, viewport.width, viewport.height);
136
+ await page.mouse.move(x, y);
137
+ await page.mouse.down();
138
+ await page.mouse.move(to.x, to.y);
139
+ await page.mouse.up();
140
+ break;
141
+
142
+ case 'fill':
143
+ if (!action.value) {
144
+ throw new Error('Fill requires value');
145
+ }
146
+ await page.mouse.click(x, y);
147
+ await page.waitForTimeout(100);
148
+ await page.keyboard.type(action.value);
149
+ break;
150
+
151
+ case 'scroll':
152
+ const scrollAmount = action.scrollAmount || 100;
153
+ await page.mouse.move(x, y);
154
+ await page.mouse.wheel(0, scrollAmount);
155
+ break;
156
+
157
+ default:
158
+ throw new Error(`Unknown coordinate action: ${action.action}`);
159
+ }
160
+ }
161
+ }
162
+
Binary file