testchimp-runner-core 0.0.34 → 0.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +6 -1
  89. package/RELEASE_0.0.26.md +0 -165
  90. package/RELEASE_0.0.27.md +0 -236
  91. package/RELEASE_0.0.28.md +0 -286
  92. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  93. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  94. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  95. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  96. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  97. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  98. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  99. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  100. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  101. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  102. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  103. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  104. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  105. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  106. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  107. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  108. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  109. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  110. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  111. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  112. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  113. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  114. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  115. package/src/auth-config.ts +0 -84
  116. package/src/credit-usage-service.ts +0 -188
  117. package/src/env-loader.ts +0 -103
  118. package/src/execution-service.ts +0 -1413
  119. package/src/file-handler.ts +0 -104
  120. package/src/index.ts +0 -422
  121. package/src/llm-facade.ts +0 -821
  122. package/src/llm-provider.ts +0 -53
  123. package/src/model-constants.ts +0 -35
  124. package/src/orchestrator/index.ts +0 -34
  125. package/src/orchestrator/orchestrator-agent.ts +0 -862
  126. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  127. package/src/orchestrator/orchestrator-prompts.ts +0 -474
  128. package/src/orchestrator/tool-registry.ts +0 -182
  129. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  130. package/src/orchestrator/tools/extract-data.ts +0 -92
  131. package/src/orchestrator/tools/index.ts +0 -12
  132. package/src/orchestrator/tools/inspect-page.ts +0 -42
  133. package/src/orchestrator/tools/recall-history.ts +0 -72
  134. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  135. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  136. package/src/orchestrator/types.ts +0 -248
  137. package/src/playwright-mcp-service.ts +0 -224
  138. package/src/progress-reporter.ts +0 -144
  139. package/src/prompts.ts +0 -842
  140. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  141. package/src/providers/local-llm-provider.ts +0 -38
  142. package/src/scenario-service.ts +0 -232
  143. package/src/scenario-worker-class.ts +0 -1089
  144. package/src/script-utils.ts +0 -203
  145. package/src/types.ts +0 -239
  146. package/src/utils/browser-utils.ts +0 -348
  147. package/src/utils/coordinate-converter.ts +0 -162
  148. package/src/utils/page-info-utils.ts +0 -250
  149. package/testchimp-runner-core-0.0.33.tgz +0 -0
  150. package/tsconfig.json +0 -19
@@ -1,75 +0,0 @@
1
- /**
2
- * Check Page Ready Tool
3
- * Verify that page has finished loading and is interactive
4
- */
5
-
6
- import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
7
- import { ToolResult } from '../types';
8
-
9
- export class CheckPageReadyTool implements Tool {
10
- name = 'check_page_ready';
11
- description = 'Verify that the page has finished loading and is ready for interaction. Checks load state, no pending network requests, and DOM stability.';
12
-
13
- parameters: ToolParameter[] = [
14
- {
15
- name: 'timeout',
16
- type: 'number',
17
- description: 'Maximum time to wait for page to be ready (milliseconds)',
18
- required: false,
19
- default: 5000
20
- }
21
- ];
22
-
23
- async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
24
- const { page, logger } = context;
25
- const timeout = params.timeout || 5000;
26
-
27
- try {
28
- logger?.(`[CheckPageReady] Verifying page is ready (timeout: ${timeout}ms)`, 'log');
29
-
30
- // Wait for load state
31
- await page.waitForLoadState('domcontentloaded', { timeout });
32
-
33
- // Check if page is interactive (use string to avoid TypeScript DOM type issues)
34
- const isInteractive = await page.evaluate(() => {
35
- const doc = (globalThis as any).document;
36
- return doc.readyState === 'complete' || doc.readyState === 'interactive';
37
- });
38
-
39
- // Get page state info
40
- const url = page.url();
41
- const title = await page.title();
42
-
43
- if (isInteractive) {
44
- logger?.(`[CheckPageReady] ✓ Page is ready: ${title} (${url})`, 'log');
45
- return {
46
- success: true,
47
- data: {
48
- ready: true,
49
- url,
50
- title,
51
- readyState: 'complete'
52
- }
53
- };
54
- } else {
55
- logger?.(`[CheckPageReady] ⚠ Page not fully ready yet`, 'warn');
56
- return {
57
- success: true,
58
- data: {
59
- ready: false,
60
- url,
61
- title,
62
- readyState: 'loading'
63
- }
64
- };
65
- }
66
- } catch (error: any) {
67
- logger?.(`[CheckPageReady] ✗ Failed: ${error.message}`, 'error');
68
- return {
69
- success: false,
70
- error: `Page ready check failed: ${error.message}`
71
- };
72
- }
73
- }
74
- }
75
-
@@ -1,92 +0,0 @@
1
- /**
2
- * Extract Data Tool
3
- * Save data from page for use in later steps
4
- */
5
-
6
- import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
7
- import { ToolResult } from '../types';
8
- import { JourneyMemory } from '../types';
9
-
10
- export class ExtractDataTool implements Tool {
11
- name = 'extract_data';
12
- description = 'Extract and save data from the page for use in later steps. Useful for capturing IDs, usernames, confirmation codes, etc.';
13
-
14
- parameters: ToolParameter[] = [
15
- {
16
- name: 'selector',
17
- type: 'string',
18
- description: 'CSS selector or text content to extract',
19
- required: true
20
- },
21
- {
22
- name: 'dataName',
23
- type: 'string',
24
- description: 'Name to save the data under (e.g., "userId", "confirmationCode")',
25
- required: true
26
- },
27
- {
28
- name: 'attribute',
29
- type: 'string',
30
- description: 'Optional: Extract specific attribute instead of text content (e.g., "href", "value")',
31
- required: false
32
- }
33
- ];
34
-
35
- async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
36
- const { page, memory, logger } = context;
37
- const journeyMemory = memory as JourneyMemory;
38
- const { selector, dataName, attribute } = params;
39
-
40
- try {
41
- logger?.(`[ExtractData] Extracting "${dataName}" from selector: ${selector}`, 'log');
42
-
43
- // Try to find element
44
- const element = await page.locator(selector).first();
45
- const count = await page.locator(selector).count();
46
-
47
- if (count === 0) {
48
- logger?.(`[ExtractData] ✗ Element not found: ${selector}`, 'error');
49
- return {
50
- success: false,
51
- error: `Element not found: ${selector}`
52
- };
53
- }
54
-
55
- // Extract data
56
- let extractedValue: string;
57
- if (attribute) {
58
- extractedValue = await element.getAttribute(attribute) || '';
59
- } else {
60
- extractedValue = await element.textContent() || '';
61
- }
62
-
63
- extractedValue = extractedValue.trim();
64
-
65
- if (!extractedValue) {
66
- logger?.(`[ExtractData] ⚠ Extracted empty value from ${selector}`, 'warn');
67
- }
68
-
69
- // Save to memory
70
- journeyMemory.extractedData[dataName] = extractedValue;
71
-
72
- logger?.(`[ExtractData] ✓ Saved "${dataName}" = "${extractedValue.substring(0, 50)}${extractedValue.length > 50 ? '...' : ''}"`, 'log');
73
-
74
- return {
75
- success: true,
76
- data: {
77
- dataName,
78
- value: extractedValue,
79
- selector,
80
- attribute
81
- }
82
- };
83
- } catch (error: any) {
84
- logger?.(`[ExtractData] ✗ Failed: ${error.message}`, 'error');
85
- return {
86
- success: false,
87
- error: `Data extraction failed: ${error.message}`
88
- };
89
- }
90
- }
91
- }
92
-
@@ -1,12 +0,0 @@
1
- /**
2
- * Tool exports - 6 information-gathering tools
3
- * Note: State changes (navigation, clicks, fills) are done via Playwright commands, not tools
4
- */
5
-
6
- export { TakeScreenshotTool } from './take-screenshot';
7
- export { RecallHistoryTool } from './recall-history';
8
- export { InspectPageTool } from './inspect-page';
9
- export { CheckPageReadyTool } from './check-page-ready';
10
- export { ExtractDataTool } from './extract-data';
11
- export { VerifyActionResultTool } from './verify-action-result';
12
-
@@ -1,42 +0,0 @@
1
- /**
2
- * Inspect Page Tool
3
- * Get current DOM snapshot (might be redundant since DOM is always-provided, but keeps extensibility)
4
- */
5
-
6
- import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
7
- import { ToolResult } from '../types';
8
- import { getEnhancedPageInfo } from '../../utils/page-info-utils';
9
-
10
- export class InspectPageTool implements Tool {
11
- name = 'inspect_page';
12
- description = 'Get current page DOM snapshot. Note: Current page info is already provided in context, so this tool is mainly for forcing a fresh snapshot if needed.';
13
-
14
- parameters: ToolParameter[] = [];
15
-
16
- async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
17
- const { page, logger } = context;
18
-
19
- try {
20
- logger?.(`[InspectPage] Getting fresh DOM snapshot`, 'log');
21
-
22
- const pageInfo = await getEnhancedPageInfo(page);
23
-
24
- logger?.(`[InspectPage] ✓ DOM snapshot retrieved`, 'log');
25
-
26
- return {
27
- success: true,
28
- data: {
29
- pageInfo,
30
- url: page.url()
31
- }
32
- };
33
- } catch (error: any) {
34
- logger?.(`[InspectPage] ✗ Failed: ${error.message}`, 'error');
35
- return {
36
- success: false,
37
- error: `Page inspection failed: ${error.message}`
38
- };
39
- }
40
- }
41
- }
42
-
@@ -1,72 +0,0 @@
1
- /**
2
- * Recall History Tool
3
- * Access deeper history beyond the recent 6-7 steps always provided
4
- */
5
-
6
- import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
7
- import { ToolResult } from '../types';
8
- import { JourneyMemory } from '../types';
9
-
10
- export class RecallHistoryTool implements Tool {
11
- name = 'recall_history';
12
- description = 'Access journey history beyond the recent 6-7 steps always provided in context. Use when you need to remember what happened earlier in the journey.';
13
-
14
- parameters: ToolParameter[] = [
15
- {
16
- name: 'maxSteps',
17
- type: 'number',
18
- description: 'Maximum number of historical steps to retrieve (from most recent backwards)',
19
- required: false,
20
- default: 10
21
- },
22
- {
23
- name: 'query',
24
- type: 'string',
25
- description: 'Optional: Filter for specific actions (e.g., "login", "form fill")',
26
- required: false
27
- }
28
- ];
29
-
30
- async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
31
- const { memory, logger } = context;
32
- const journeyMemory = memory as JourneyMemory;
33
- const maxSteps = params.maxSteps || 10;
34
- const query = params.query?.toLowerCase();
35
-
36
- try {
37
- logger?.(`[RecallHistory] Retrieving up to ${maxSteps} historical steps${query ? ` matching "${query}"` : ''}`, 'log');
38
-
39
- let steps = [...journeyMemory.history];
40
-
41
- // Filter by query if provided
42
- if (query) {
43
- steps = steps.filter(step =>
44
- step.action.toLowerCase().includes(query) ||
45
- step.code.toLowerCase().includes(query) ||
46
- step.observation.toLowerCase().includes(query)
47
- );
48
- }
49
-
50
- // Take most recent N steps
51
- const recentSteps = steps.slice(-maxSteps);
52
-
53
- logger?.(`[RecallHistory] ✓ Found ${recentSteps.length} historical steps`, 'log');
54
-
55
- return {
56
- success: true,
57
- data: {
58
- steps: recentSteps,
59
- totalHistorySize: journeyMemory.history.length,
60
- filtered: !!query
61
- }
62
- };
63
- } catch (error: any) {
64
- logger?.(`[RecallHistory] ✗ Failed: ${error.message}`, 'error');
65
- return {
66
- success: false,
67
- error: `History recall failed: ${error.message}`
68
- };
69
- }
70
- }
71
- }
72
-
@@ -1,128 +0,0 @@
1
- /**
2
- * Take Screenshot Tool
3
- * Captures current page state visually and analyzes it with DOM snapshot
4
- */
5
-
6
- import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
7
- import { ToolResult } from '../types';
8
- import { getEnhancedPageInfo } from '../../utils/page-info-utils';
9
- import { VISION_MODEL } from '../../model-constants';
10
-
11
- export class TakeScreenshotTool implements Tool {
12
- name = 'take_screenshot';
13
- description = 'Capture a screenshot and analyze it with DOM snapshot to get actionable selector recommendations. Use when you need to see the actual page visually to find the right elements. Returns text-based analysis with selector suggestions.';
14
-
15
- // LLM facade will be injected
16
- private llmFacade?: any;
17
-
18
- setLLMFacade(llmFacade: any): void {
19
- this.llmFacade = llmFacade;
20
- }
21
-
22
- parameters: ToolParameter[] = [
23
- {
24
- name: 'isFullPage',
25
- type: 'boolean',
26
- description: 'If true, captures entire page (scrolling). If false, captures only viewport. DEFAULT: true (recommended for finding elements that may be below fold)',
27
- required: false,
28
- default: true
29
- },
30
- {
31
- name: 'purpose',
32
- type: 'string',
33
- description: 'What you are trying to find or understand from the screenshot',
34
- required: false
35
- }
36
- ];
37
-
38
- async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
39
- const { page, logger } = context;
40
- const isFullPage = params.isFullPage !== undefined ? params.isFullPage : true; // Default: true (capture full page)
41
- const purpose = params.purpose || 'Analyze page structure and elements';
42
-
43
- try {
44
- logger?.(`[TakeScreenshot] Capturing ${isFullPage ? 'full page' : 'viewport'} screenshot for: ${purpose}`, 'log');
45
-
46
- // Capture screenshot (JPEG 60 quality for smaller size)
47
- const screenshotBuffer = await page.screenshot({
48
- fullPage: isFullPage,
49
- type: 'jpeg',
50
- quality: 60
51
- });
52
-
53
- // Convert to data URL
54
- const base64 = screenshotBuffer.toString('base64');
55
- const dataUrl = `data:image/jpeg;base64,${base64}`;
56
-
57
- logger?.(`[TakeScreenshot] ✓ Screenshot captured (${Math.round(base64.length / 1024)}KB), analyzing with DOM...`, 'log');
58
-
59
- // Get DOM snapshot for correlation (already has ARIA tree + interactive elements with bboxes)
60
- const pageInfo = await getEnhancedPageInfo(page);
61
-
62
- // Analyze screenshot with structured DOM via LLM
63
- let analysis = 'Screenshot captured. Use DOM snapshot in context to find selectors.';
64
-
65
- if (this.llmFacade) {
66
- try {
67
- logger?.(`[TakeScreenshot] Calling LLM for vision analysis...`, 'log');
68
-
69
- const analysisPrompt = `Analyze screenshot WITH DOM structure to identify correct Playwright selectors.
70
-
71
- PURPOSE: ${purpose}
72
-
73
- INTERACTIVE ELEMENTS (with positions and suggested selectors):
74
- ${pageInfo.formattedElements}
75
-
76
- ARIA TREE (hierarchical structure):
77
- ${JSON.stringify(pageInfo.ariaSnapshot, null, 2).substring(0, 2000)}
78
-
79
- TASK:
80
- 1. Look at screenshot - identify the visual elements you need for: ${purpose}
81
- 2. Match visual position with bounding boxes above
82
- 3. Recommend SEMANTIC SELECTORS FIRST: getByRole, getByLabel, getByPlaceholder, getByText
83
- 4. AVOID auto-generated IDs with unicode (e.g., #«r3»-form-item)
84
-
85
- Output format:
86
- "For [visual element description]:
87
- Try: [semantic selector from list - prefer getByRole/getByLabel]
88
- Or: [alternative selector]"
89
-
90
- Be concise. Only 2-3 recommendations. Prioritize user-friendly semantic selectors.`;
91
-
92
-
93
- const llmResponse = await this.llmFacade.llmProvider.callLLM({
94
- model: VISION_MODEL,
95
- systemPrompt: 'You are a vision analysis expert for web automation. Analyze screenshots with DOM snapshots to recommend working Playwright selectors. ALWAYS prioritize semantic selectors (getByRole, getByLabel, getByText) over CSS selectors with auto-generated IDs.',
96
- userPrompt: analysisPrompt,
97
- imageUrl: dataUrl
98
- });
99
-
100
- analysis = llmResponse.answer || analysis;
101
- logger?.(`[TakeScreenshot] ✓ Vision analysis complete`, 'log');
102
-
103
- } catch (error: any) {
104
- logger?.(`[TakeScreenshot] ⚠ Vision analysis failed, returning raw screenshot: ${error.message}`, 'warn');
105
- }
106
- }
107
-
108
- return {
109
- success: true,
110
- data: {
111
- screenshot: dataUrl,
112
- isFullPage,
113
- size: base64.length,
114
- interactiveElementCount: pageInfo.interactiveElements.length
115
- },
116
- learning: analysis // Text-based analysis with selector recommendations
117
- };
118
- } catch (error: any) {
119
- logger?.(`[TakeScreenshot] ✗ Failed: ${error.message}`, 'error');
120
- return {
121
- success: false,
122
- error: `Screenshot capture failed: ${error.message}`
123
- };
124
- }
125
- }
126
- }
127
-
128
-
@@ -1,159 +0,0 @@
1
- /**
2
- * Verify Action Result Tool
3
- *
4
- * Compares before/after screenshots to verify if an action achieved its goal
5
- * USE SPARINGLY - expensive (sends 2 images to vision model)
6
- */
7
-
8
- import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
9
- import { ToolResult } from '../types';
10
- import { LabeledImage } from '../../llm-provider';
11
-
12
- export class VerifyActionResultTool implements Tool {
13
- name = 'verify_action_result';
14
-
15
- description = `Verify if your previous action achieved its goal by comparing before/after screenshots.
16
-
17
- ⚠️ USE SPARINGLY - EXPENSIVE (2 images sent to vision model)
18
-
19
- When to use:
20
- - After coordinate click to verify expected UI change occurred
21
- - After clicking element when unsure if goal achieved (no clear feedback)
22
- - To verify navigation or modal appeared
23
-
24
- How it works:
25
- 1. Takes screenshot BEFORE you run your next command
26
- 2. You run command (click, fill, etc.)
27
- 3. Takes screenshot AFTER command executes
28
- 4. Compares both and tells you if expected change happened
29
-
30
- Parameters:
31
- - expectedChange: What should have changed (e.g., "Dashboard page loaded", "Modal appeared", "Form submitted")
32
-
33
- Returns: { verified: boolean, reasoning: string, changes: string[] }
34
-
35
- Example:
36
- {
37
- "toolCalls": [{"name": "verify_action_result", "params": {"expectedChange": "Dashboard page loaded with data grid"}}],
38
- "toolReasoning": "Need to verify coordinate click navigated to correct page",
39
- "needsToolResults": true // IMPORTANT: Wait for verification before deciding status
40
- }`;
41
-
42
- parameters: ToolParameter[] = [
43
- {
44
- name: 'expectedChange',
45
- type: 'string',
46
- description: 'What UI change you expect to see (e.g., "Modal opened", "Page navigated", "New form appeared")',
47
- required: true
48
- }
49
- ];
50
-
51
- // LLM facade will be injected
52
- private llmFacade?: any;
53
-
54
- setLLMFacade(llmFacade: any): void {
55
- this.llmFacade = llmFacade;
56
- }
57
-
58
- async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
59
- const { page, logger } = context;
60
- const expectedChange = params.expectedChange || 'Expected UI change';
61
-
62
- logger?.(`[VerifyActionResult] Capturing before/after screenshots...`);
63
-
64
- try {
65
- // Capture BEFORE screenshot (viewport only, JPEG 60% quality for cost efficiency)
66
- const beforeScreenshot = await page.screenshot({
67
- encoding: 'base64',
68
- fullPage: false,
69
- type: 'jpeg',
70
- quality: 60
71
- });
72
- const beforeDataUrl = `data:image/jpeg;base64,${beforeScreenshot}`;
73
-
74
- logger?.(`[VerifyActionResult] ✓ BEFORE screenshot captured (JPEG 60%)`);
75
- logger?.(`[VerifyActionResult] ⏳ Waiting 1.5s for UI to settle after your previous action...`);
76
-
77
- // Wait for UI to settle after previous action
78
- await page.waitForTimeout(1500);
79
-
80
- // Capture AFTER screenshot
81
- const afterScreenshot = await page.screenshot({
82
- encoding: 'base64',
83
- fullPage: false,
84
- type: 'jpeg',
85
- quality: 60
86
- });
87
- const afterDataUrl = `data:image/jpeg;base64,${afterScreenshot}`;
88
-
89
- logger?.(`[VerifyActionResult] ✓ AFTER screenshot captured (JPEG 60%)`);
90
- logger?.(`[VerifyActionResult] 🔍 Calling vision model to compare...`);
91
-
92
- if (!this.llmFacade) {
93
- throw new Error('LLM facade not initialized');
94
- }
95
-
96
- // Call LLM with both screenshots
97
- const request = {
98
- model: 'gpt-5-mini', // Vision model
99
- systemPrompt: 'You are a visual verification expert for web automation. Compare before/after screenshots to determine if an action achieved its expected outcome.',
100
- userPrompt: `Expected change: ${expectedChange}
101
-
102
- Compare the [BEFORE] and [AFTER] screenshots.
103
-
104
- Did the expected change occur? Respond with JSON:
105
- {
106
- "verified": boolean,
107
- "reasoning": "What changed (or didn't change) between screenshots",
108
- "changes": ["Specific UI changes observed"]
109
- }
110
-
111
- Be objective:
112
- - Look for new elements, panels, forms, modals
113
- - Check for page navigation, URL changes, content changes
114
- - Note any visual indicators of success/failure
115
-
116
- Be strict: Only return true if you clearly see the expected change.`,
117
- images: [
118
- { label: 'BEFORE', dataUrl: beforeDataUrl },
119
- { label: 'AFTER', dataUrl: afterDataUrl }
120
- ]
121
- };
122
-
123
- const response = await this.llmFacade.llmProvider.callLLM(request);
124
-
125
- // Parse response
126
- const jsonMatch = response.answer.match(/\{[\s\S]*\}/);
127
- if (!jsonMatch) {
128
- logger?.(`[VerifyActionResult] ⚠️ Could not parse verification response`, 'warn');
129
- return {
130
- success: false,
131
- error: 'Failed to parse verification response'
132
- };
133
- }
134
-
135
- const parsed = JSON.parse(jsonMatch[0]);
136
-
137
- logger?.(`[VerifyActionResult] 📊 Result: ${parsed.verified ? '✅ VERIFIED' : '❌ NOT VERIFIED'}`);
138
- logger?.(`[VerifyActionResult] 💭 ${parsed.reasoning}`);
139
- if (parsed.changes && parsed.changes.length > 0) {
140
- logger?.(`[VerifyActionResult] 📝 Changes observed:`);
141
- parsed.changes.forEach((change: string) => logger?.(` - ${change}`));
142
- }
143
-
144
- return {
145
- success: true,
146
- data: parsed,
147
- learning: `Verification result: ${parsed.verified ? 'Goal achieved' : 'Goal not achieved'}. ${parsed.reasoning}`
148
- };
149
-
150
- } catch (error: any) {
151
- logger?.(`[VerifyActionResult] ❌ Error: ${error.message}`, 'error');
152
- return {
153
- success: false,
154
- error: error.message
155
- };
156
- }
157
- }
158
- }
159
-