testchimp-runner-core 0.0.33 → 0.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-service.d.ts +1 -4
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +155 -468
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/llm-facade.d.ts.map +1 -1
- package/dist/llm-facade.js +7 -7
- package/dist/llm-facade.js.map +1 -1
- package/dist/llm-provider.d.ts +9 -0
- package/dist/llm-provider.d.ts.map +1 -1
- package/dist/model-constants.d.ts +16 -5
- package/dist/model-constants.d.ts.map +1 -1
- package/dist/model-constants.js +17 -6
- package/dist/model-constants.js.map +1 -1
- package/dist/orchestrator/decision-parser.d.ts +18 -0
- package/dist/orchestrator/decision-parser.d.ts.map +1 -0
- package/dist/orchestrator/decision-parser.js +127 -0
- package/dist/orchestrator/decision-parser.js.map +1 -0
- package/dist/orchestrator/index.d.ts +4 -2
- package/dist/orchestrator/index.d.ts.map +1 -1
- package/dist/orchestrator/index.js +15 -2
- package/dist/orchestrator/index.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts +17 -22
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +708 -577
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts +32 -0
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -0
- package/dist/orchestrator/orchestrator-prompts.js +737 -0
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -0
- package/dist/orchestrator/page-som-handler.d.ts +106 -0
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
- package/dist/orchestrator/page-som-handler.js +1353 -0
- package/dist/orchestrator/page-som-handler.js.map +1 -0
- package/dist/orchestrator/som-types.d.ts +149 -0
- package/dist/orchestrator/som-types.d.ts.map +1 -0
- package/dist/orchestrator/som-types.js +87 -0
- package/dist/orchestrator/som-types.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +2 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -1
- package/dist/orchestrator/tool-registry.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +5 -1
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js +9 -2
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
- package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
- package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
- package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
- package/dist/orchestrator/tools/verify-action-result.d.ts +17 -0
- package/dist/orchestrator/tools/verify-action-result.d.ts.map +1 -0
- package/dist/orchestrator/tools/verify-action-result.js +140 -0
- package/dist/orchestrator/tools/verify-action-result.js.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +49 -1
- package/dist/orchestrator/types.d.ts.map +1 -1
- package/dist/orchestrator/types.js +11 -1
- package/dist/orchestrator/types.js.map +1 -1
- package/dist/prompts.d.ts.map +1 -1
- package/dist/prompts.js +40 -34
- package/dist/prompts.js.map +1 -1
- package/dist/scenario-service.d.ts +5 -0
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +17 -0
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +4 -0
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +21 -3
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/testing/agent-tester.d.ts +35 -0
- package/dist/testing/agent-tester.d.ts.map +1 -0
- package/dist/testing/agent-tester.js +84 -0
- package/dist/testing/agent-tester.js.map +1 -0
- package/dist/testing/ref-translator-tester.d.ts +44 -0
- package/dist/testing/ref-translator-tester.d.ts.map +1 -0
- package/dist/testing/ref-translator-tester.js +104 -0
- package/dist/testing/ref-translator-tester.js.map +1 -0
- package/dist/utils/coordinate-converter.d.ts +32 -0
- package/dist/utils/coordinate-converter.d.ts.map +1 -0
- package/dist/utils/coordinate-converter.js +130 -0
- package/dist/utils/coordinate-converter.js.map +1 -0
- package/dist/utils/hierarchical-selector.d.ts +47 -0
- package/dist/utils/hierarchical-selector.d.ts.map +1 -0
- package/dist/utils/hierarchical-selector.js +212 -0
- package/dist/utils/hierarchical-selector.js.map +1 -0
- package/dist/utils/page-info-retry.d.ts +14 -0
- package/dist/utils/page-info-retry.d.ts.map +1 -0
- package/dist/utils/page-info-retry.js +60 -0
- package/dist/utils/page-info-retry.js.map +1 -0
- package/dist/utils/page-info-utils.d.ts +1 -0
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +46 -18
- package/dist/utils/page-info-utils.js.map +1 -1
- package/dist/utils/ref-attacher.d.ts +21 -0
- package/dist/utils/ref-attacher.d.ts.map +1 -0
- package/dist/utils/ref-attacher.js +149 -0
- package/dist/utils/ref-attacher.js.map +1 -0
- package/dist/utils/ref-translator.d.ts +49 -0
- package/dist/utils/ref-translator.d.ts.map +1 -0
- package/dist/utils/ref-translator.js +276 -0
- package/dist/utils/ref-translator.js.map +1 -0
- package/package.json +1 -1
- package/plandocs/BEFORE_AFTER_VERIFICATION.md +148 -0
- package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +144 -0
- package/plandocs/IMPLEMENTATION_STATUS.md +108 -0
- package/plandocs/PHASE_1_COMPLETE.md +165 -0
- package/plandocs/PHASE_1_SUMMARY.md +184 -0
- package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +120 -0
- package/plandocs/PROMPT_SANITY_CHECK.md +120 -0
- package/plandocs/SESSION_SUMMARY_v0.0.33.md +151 -0
- package/plandocs/TROUBLESHOOTING_SESSION.md +72 -0
- package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +396 -0
- package/plandocs/WHATS_NEW_v0.0.33.md +183 -0
- package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
- package/plandocs/exploratory-mode-support.plan.md +928 -0
- package/plandocs/journey-id-tracking-addendum.md +227 -0
- package/src/execution-service.ts +179 -596
- package/src/index.ts +10 -0
- package/src/llm-facade.ts +8 -8
- package/src/llm-provider.ts +11 -1
- package/src/model-constants.ts +17 -5
- package/src/orchestrator/decision-parser.ts +139 -0
- package/src/orchestrator/index.ts +27 -2
- package/src/orchestrator/orchestrator-agent.ts +868 -623
- package/src/orchestrator/orchestrator-prompts.ts +786 -0
- package/src/orchestrator/page-som-handler.ts +1565 -0
- package/src/orchestrator/som-types.ts +188 -0
- package/src/orchestrator/tool-registry.ts +2 -0
- package/src/orchestrator/tools/index.ts +5 -1
- package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
- package/src/orchestrator/tools/verify-action-result.ts +159 -0
- package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
- package/src/orchestrator/types.ts +95 -4
- package/src/prompts.ts +40 -34
- package/src/scenario-service.ts +20 -0
- package/src/scenario-worker-class.ts +30 -4
- package/src/utils/coordinate-converter.ts +162 -0
- package/src/utils/page-info-retry.ts +65 -0
- package/src/utils/page-info-utils.ts +53 -18
- package/testchimp-runner-core-0.0.35.tgz +0 -0
- /package/{CREDIT_CALLBACK_ARCHITECTURE.md → plandocs/CREDIT_CALLBACK_ARCHITECTURE.md} +0 -0
- /package/{INTEGRATION_COMPLETE.md → plandocs/INTEGRATION_COMPLETE.md} +0 -0
- /package/{VISION_DIAGNOSTICS_IMPROVEMENTS.md → plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md} +0 -0
- /package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
- /package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
- /package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Set-of-Marks (SoM) Type Definitions
|
|
3
|
+
* Types for visual element identification and interaction
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export interface Coordinate {
|
|
7
|
+
x: number; // Percentage of viewport width: 0-100 (use 3 decimal precision, e.g., 15.625)
|
|
8
|
+
y: number; // Percentage of viewport height: 0-100 (use 3 decimal precision, e.g., 82.375)
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export enum InteractionAction {
|
|
12
|
+
// Click actions
|
|
13
|
+
CLICK = 'click',
|
|
14
|
+
DOUBLE_CLICK = 'doubleClick',
|
|
15
|
+
RIGHT_CLICK = 'rightClick',
|
|
16
|
+
|
|
17
|
+
// Mouse actions
|
|
18
|
+
HOVER = 'hover',
|
|
19
|
+
MOUSE_DOWN = 'mouseDown',
|
|
20
|
+
MOUSE_UP = 'mouseUp',
|
|
21
|
+
DRAG = 'drag',
|
|
22
|
+
|
|
23
|
+
// Input actions
|
|
24
|
+
FILL = 'fill',
|
|
25
|
+
TYPE = 'type',
|
|
26
|
+
CLEAR = 'clear',
|
|
27
|
+
|
|
28
|
+
// Keyboard actions
|
|
29
|
+
PRESS = 'press',
|
|
30
|
+
PRESS_SEQUENTIALLY = 'pressSequentially',
|
|
31
|
+
|
|
32
|
+
// Select/Checkbox actions
|
|
33
|
+
SELECT = 'select',
|
|
34
|
+
CHECK = 'check',
|
|
35
|
+
UNCHECK = 'uncheck',
|
|
36
|
+
|
|
37
|
+
// Focus/Scroll actions
|
|
38
|
+
FOCUS = 'focus',
|
|
39
|
+
BLUR = 'blur',
|
|
40
|
+
SCROLL = 'scroll',
|
|
41
|
+
SCROLL_INTO_VIEW = 'scrollIntoView',
|
|
42
|
+
|
|
43
|
+
// Navigation actions
|
|
44
|
+
NAVIGATE = 'navigate', // Go to URL (requires value field)
|
|
45
|
+
GO_BACK = 'goBack',
|
|
46
|
+
GO_FORWARD = 'goForward',
|
|
47
|
+
RELOAD = 'reload'
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface SomCommand {
|
|
51
|
+
elementRef?: string; // Integer as string: "1", "2", "42" (optional for coord-based commands)
|
|
52
|
+
action: InteractionAction;
|
|
53
|
+
|
|
54
|
+
// Coordinate-based action (use when elementRef is empty/null)
|
|
55
|
+
coord?: Coordinate; // Percentage-based (x: 0-100, y: 0-100 of viewport)
|
|
56
|
+
|
|
57
|
+
// Action-specific parameters
|
|
58
|
+
value?: string; // For fill/type/select/press actions
|
|
59
|
+
fromCoord?: Coordinate; // For drag (start) - percentage-based
|
|
60
|
+
toCoord?: Coordinate; // For drag (end) - percentage-based
|
|
61
|
+
force?: boolean; // Force action even if not actionable
|
|
62
|
+
scrollAmount?: number; // Pixels to scroll
|
|
63
|
+
scrollDirection?: 'up' | 'down' | 'left' | 'right';
|
|
64
|
+
button?: 'left' | 'right' | 'middle';
|
|
65
|
+
clickCount?: number;
|
|
66
|
+
modifiers?: Array<'Alt' | 'Control' | 'Meta' | 'Shift'>;
|
|
67
|
+
delay?: number; // Delay between keystrokes for TYPE (ms)
|
|
68
|
+
timeout?: number; // Override default timeout
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export enum CommandRunStatus {
|
|
72
|
+
SUCCESS = 'success',
|
|
73
|
+
FAILURE = 'failure'
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export interface CommandAttempt {
|
|
77
|
+
command?: string;
|
|
78
|
+
status: CommandRunStatus;
|
|
79
|
+
error?: string;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export interface DomMutation {
|
|
83
|
+
type: 'added' | 'removed' | 'modified' | 'attribute_changed';
|
|
84
|
+
elementDescription: string;
|
|
85
|
+
timestamp: number;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export interface SemanticCommandResult {
|
|
89
|
+
failedAttempts: CommandAttempt[];
|
|
90
|
+
successAttempt?: CommandAttempt;
|
|
91
|
+
error?: string;
|
|
92
|
+
status: CommandRunStatus;
|
|
93
|
+
mutations?: DomMutation[]; // Only for hover/focus, filtered for relevance
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export interface SomElement {
|
|
97
|
+
somId: string; // Simple integer as string: "1", "2", "3"
|
|
98
|
+
tag: string;
|
|
99
|
+
role: string;
|
|
100
|
+
text: string;
|
|
101
|
+
ariaLabel: string;
|
|
102
|
+
placeholder: string;
|
|
103
|
+
name: string;
|
|
104
|
+
type: string;
|
|
105
|
+
id: string;
|
|
106
|
+
className: string;
|
|
107
|
+
bbox: { x: number; y: number; width: number; height: number };
|
|
108
|
+
parent?: {
|
|
109
|
+
tag: string;
|
|
110
|
+
role: string;
|
|
111
|
+
className: string;
|
|
112
|
+
text: string;
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Typed selector (no string parsing needed)
|
|
118
|
+
* Supports chaining: parent.child for scoped selectors
|
|
119
|
+
*/
|
|
120
|
+
export interface TypedSelector {
|
|
121
|
+
type: 'id' | 'testId' | 'label' | 'role' | 'placeholder' | 'text' | 'title' | 'altText' | 'name' | 'locator';
|
|
122
|
+
value: string;
|
|
123
|
+
roleOptions?: { name?: string }; // For getByRole
|
|
124
|
+
parent?: TypedSelector; // For chaining: page.locator(parent).locator(this)
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Verification types for expect assertions
|
|
129
|
+
*/
|
|
130
|
+
export enum VerificationType {
|
|
131
|
+
// Text verifications
|
|
132
|
+
TEXT_CONTAINS = 'textContains',
|
|
133
|
+
TEXT_EQUALS = 'textEquals',
|
|
134
|
+
|
|
135
|
+
// Input verifications
|
|
136
|
+
VALUE_EQUALS = 'valueEquals',
|
|
137
|
+
VALUE_EMPTY = 'valueEmpty',
|
|
138
|
+
|
|
139
|
+
// Visibility verifications
|
|
140
|
+
IS_VISIBLE = 'isVisible',
|
|
141
|
+
IS_HIDDEN = 'isHidden',
|
|
142
|
+
|
|
143
|
+
// State verifications
|
|
144
|
+
IS_ENABLED = 'isEnabled',
|
|
145
|
+
IS_DISABLED = 'isDisabled',
|
|
146
|
+
IS_CHECKED = 'isChecked',
|
|
147
|
+
IS_UNCHECKED = 'isUnchecked',
|
|
148
|
+
|
|
149
|
+
// Count verifications (for lists, tables, etc.)
|
|
150
|
+
COUNT_EQUALS = 'countEquals',
|
|
151
|
+
COUNT_GREATER_THAN = 'countGreaterThan',
|
|
152
|
+
COUNT_LESS_THAN = 'countLessThan',
|
|
153
|
+
|
|
154
|
+
// Attribute verifications
|
|
155
|
+
HAS_CLASS = 'hasClass',
|
|
156
|
+
HAS_ATTRIBUTE = 'hasAttribute'
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* SoM verification command for expect assertions
|
|
161
|
+
*/
|
|
162
|
+
export interface SomVerification {
|
|
163
|
+
verificationType: VerificationType;
|
|
164
|
+
elementRef?: string; // SoM ID (e.g., "3") - optional for count verifications
|
|
165
|
+
expected?: string | number; // Expected value/text/count
|
|
166
|
+
description?: string; // Human-readable description
|
|
167
|
+
selector?: string; // For count verifications on non-SoM elements (CSS selector)
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Union type: commands array can contain both actions and verifications
|
|
172
|
+
*/
|
|
173
|
+
export type SomCommandOrVerification = SomCommand | SomVerification;
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Type guard to check if command is a verification
|
|
177
|
+
*/
|
|
178
|
+
export function isSomVerification(cmd: SomCommandOrVerification): cmd is SomVerification {
|
|
179
|
+
return 'verificationType' in cmd;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Type guard to check if command is an action
|
|
184
|
+
*/
|
|
185
|
+
export function isSomCommand(cmd: SomCommandOrVerification): cmd is SomCommand {
|
|
186
|
+
return 'action' in cmd;
|
|
187
|
+
}
|
|
188
|
+
|
|
@@ -40,6 +40,8 @@ export interface ToolExecutionContext {
|
|
|
40
40
|
memory: any; // JourneyMemory
|
|
41
41
|
stepNumber: number;
|
|
42
42
|
logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
|
|
43
|
+
previousSomScreenshot?: string; // For view_previous_screenshot tool
|
|
44
|
+
somHandler?: any; // PageSoMHandler for refresh_som_markers tool
|
|
43
45
|
}
|
|
44
46
|
|
|
45
47
|
/**
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Tool exports -
|
|
2
|
+
* Tool exports - 8 information-gathering tools
|
|
3
3
|
* Note: State changes (navigation, clicks, fills) are done via Playwright commands, not tools
|
|
4
|
+
* Ref-based commands (getByRef) are translated to Playwright at execution time
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
7
|
export { TakeScreenshotTool } from './take-screenshot';
|
|
8
|
+
export { ViewPreviousScreenshotTool } from './view-previous-screenshot';
|
|
9
|
+
export { RefreshSomMarkersTool } from './refresh-som-markers';
|
|
7
10
|
export { RecallHistoryTool } from './recall-history';
|
|
8
11
|
export { InspectPageTool } from './inspect-page';
|
|
9
12
|
export { CheckPageReadyTool } from './check-page-ready';
|
|
10
13
|
export { ExtractDataTool } from './extract-data';
|
|
14
|
+
export { VerifyActionResultTool } from './verify-action-result';
|
|
11
15
|
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
|
|
2
|
+
import { ToolResult } from '../types';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Tool to refresh SoM markers when they appear outdated or misaligned
|
|
6
|
+
*/
|
|
7
|
+
export class RefreshSomMarkersTool implements Tool {
|
|
8
|
+
name = 'refresh_som_markers';
|
|
9
|
+
description = 'Manually refresh the Set-of-Marks visual markers on the page. Use when: (1) Markers appear misaligned with actual UI elements, (2) Page content has changed but markers are stale (e.g., after dynamic content loads), (3) You suspect markers are from a previous page state. Returns updated screenshot with fresh markers.';
|
|
10
|
+
|
|
11
|
+
parameters: ToolParameter[] = [
|
|
12
|
+
{
|
|
13
|
+
name: 'reason',
|
|
14
|
+
type: 'string',
|
|
15
|
+
description: 'Why you need to refresh markers. Examples: "dropdown expanded but markers still show closed state", "new content loaded but not marked", "markers seem to point to wrong elements"',
|
|
16
|
+
required: true
|
|
17
|
+
}
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
|
|
21
|
+
const { logger, page, somHandler } = context;
|
|
22
|
+
const reason = params.reason || 'Markers appear outdated';
|
|
23
|
+
|
|
24
|
+
if (!somHandler) {
|
|
25
|
+
return {
|
|
26
|
+
success: false,
|
|
27
|
+
error: 'SoM mode not enabled - refresh markers tool unavailable.'
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (!page) {
|
|
32
|
+
return {
|
|
33
|
+
success: false,
|
|
34
|
+
error: 'No page context available.'
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
logger?.(`[RefreshSomMarkers] Refreshing markers due to: ${reason}`, 'log');
|
|
40
|
+
|
|
41
|
+
// Ensure somHandler has the latest page reference
|
|
42
|
+
somHandler.setPage(page);
|
|
43
|
+
|
|
44
|
+
// Re-scan page and update markers
|
|
45
|
+
await somHandler.updateSom();
|
|
46
|
+
logger?.(`[RefreshSomMarkers] ✓ Markers updated`, 'log');
|
|
47
|
+
|
|
48
|
+
// Capture fresh screenshot with new markers (viewport only - cheaper than full page)
|
|
49
|
+
const freshScreenshot = await somHandler.getScreenshot(true, false, 60);
|
|
50
|
+
logger?.(`[RefreshSomMarkers] ✓ Fresh screenshot captured (viewport)`, 'log');
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
success: true,
|
|
54
|
+
data: {
|
|
55
|
+
screenshot: freshScreenshot,
|
|
56
|
+
reason
|
|
57
|
+
},
|
|
58
|
+
learning: `SoM markers refreshed. New screenshot shows current page state with updated element markers. Reason: ${reason}`
|
|
59
|
+
};
|
|
60
|
+
} catch (error: any) {
|
|
61
|
+
logger?.(`[RefreshSomMarkers] ✗ Failed: ${error.message}`, 'error');
|
|
62
|
+
return {
|
|
63
|
+
success: false,
|
|
64
|
+
error: `Failed to refresh markers: ${error.message}`
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verify Action Result Tool
|
|
3
|
+
*
|
|
4
|
+
* Compares before/after screenshots to verify if an action achieved its goal
|
|
5
|
+
* USE SPARINGLY - expensive (sends 2 images to vision model)
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
|
|
9
|
+
import { ToolResult } from '../types';
|
|
10
|
+
import { LabeledImage } from '../../llm-provider';
|
|
11
|
+
|
|
12
|
+
export class VerifyActionResultTool implements Tool {
|
|
13
|
+
name = 'verify_action_result';
|
|
14
|
+
|
|
15
|
+
description = `Verify if your previous action achieved its goal by comparing before/after screenshots.
|
|
16
|
+
|
|
17
|
+
⚠️ USE SPARINGLY - EXPENSIVE (2 images sent to vision model)
|
|
18
|
+
|
|
19
|
+
When to use:
|
|
20
|
+
- After coordinate click to verify expected UI change occurred
|
|
21
|
+
- After clicking element when unsure if goal achieved (no clear feedback)
|
|
22
|
+
- To verify navigation or modal appeared
|
|
23
|
+
|
|
24
|
+
How it works:
|
|
25
|
+
1. Takes screenshot BEFORE you run your next command
|
|
26
|
+
2. You run command (click, fill, etc.)
|
|
27
|
+
3. Takes screenshot AFTER command executes
|
|
28
|
+
4. Compares both and tells you if expected change happened
|
|
29
|
+
|
|
30
|
+
Parameters:
|
|
31
|
+
- expectedChange: What should have changed (e.g., "Dashboard page loaded", "Modal appeared", "Form submitted")
|
|
32
|
+
|
|
33
|
+
Returns: { verified: boolean, reasoning: string, changes: string[] }
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
{
|
|
37
|
+
"toolCalls": [{"name": "verify_action_result", "params": {"expectedChange": "Dashboard page loaded with data grid"}}],
|
|
38
|
+
"toolReasoning": "Need to verify coordinate click navigated to correct page",
|
|
39
|
+
"needsToolResults": true // IMPORTANT: Wait for verification before deciding status
|
|
40
|
+
}`;
|
|
41
|
+
|
|
42
|
+
parameters: ToolParameter[] = [
|
|
43
|
+
{
|
|
44
|
+
name: 'expectedChange',
|
|
45
|
+
type: 'string',
|
|
46
|
+
description: 'What UI change you expect to see (e.g., "Modal opened", "Page navigated", "New form appeared")',
|
|
47
|
+
required: true
|
|
48
|
+
}
|
|
49
|
+
];
|
|
50
|
+
|
|
51
|
+
// LLM facade will be injected
|
|
52
|
+
private llmFacade?: any;
|
|
53
|
+
|
|
54
|
+
setLLMFacade(llmFacade: any): void {
|
|
55
|
+
this.llmFacade = llmFacade;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
|
|
59
|
+
const { page, logger } = context;
|
|
60
|
+
const expectedChange = params.expectedChange || 'Expected UI change';
|
|
61
|
+
|
|
62
|
+
logger?.(`[VerifyActionResult] Capturing before/after screenshots...`);
|
|
63
|
+
|
|
64
|
+
try {
|
|
65
|
+
// Capture BEFORE screenshot (viewport only, JPEG 60% quality for cost efficiency)
|
|
66
|
+
const beforeScreenshot = await page.screenshot({
|
|
67
|
+
encoding: 'base64',
|
|
68
|
+
fullPage: false,
|
|
69
|
+
type: 'jpeg',
|
|
70
|
+
quality: 60
|
|
71
|
+
});
|
|
72
|
+
const beforeDataUrl = `data:image/jpeg;base64,${beforeScreenshot}`;
|
|
73
|
+
|
|
74
|
+
logger?.(`[VerifyActionResult] ✓ BEFORE screenshot captured (JPEG 60%)`);
|
|
75
|
+
logger?.(`[VerifyActionResult] ⏳ Waiting 1.5s for UI to settle after your previous action...`);
|
|
76
|
+
|
|
77
|
+
// Wait for UI to settle after previous action
|
|
78
|
+
await page.waitForTimeout(1500);
|
|
79
|
+
|
|
80
|
+
// Capture AFTER screenshot
|
|
81
|
+
const afterScreenshot = await page.screenshot({
|
|
82
|
+
encoding: 'base64',
|
|
83
|
+
fullPage: false,
|
|
84
|
+
type: 'jpeg',
|
|
85
|
+
quality: 60
|
|
86
|
+
});
|
|
87
|
+
const afterDataUrl = `data:image/jpeg;base64,${afterScreenshot}`;
|
|
88
|
+
|
|
89
|
+
logger?.(`[VerifyActionResult] ✓ AFTER screenshot captured (JPEG 60%)`);
|
|
90
|
+
logger?.(`[VerifyActionResult] 🔍 Calling vision model to compare...`);
|
|
91
|
+
|
|
92
|
+
if (!this.llmFacade) {
|
|
93
|
+
throw new Error('LLM facade not initialized');
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Call LLM with both screenshots
|
|
97
|
+
const request = {
|
|
98
|
+
model: 'gpt-5-mini', // Vision model
|
|
99
|
+
systemPrompt: 'You are a visual verification expert for web automation. Compare before/after screenshots to determine if an action achieved its expected outcome.',
|
|
100
|
+
userPrompt: `Expected change: ${expectedChange}
|
|
101
|
+
|
|
102
|
+
Compare the [BEFORE] and [AFTER] screenshots.
|
|
103
|
+
|
|
104
|
+
Did the expected change occur? Respond with JSON:
|
|
105
|
+
{
|
|
106
|
+
"verified": boolean,
|
|
107
|
+
"reasoning": "What changed (or didn't change) between screenshots",
|
|
108
|
+
"changes": ["Specific UI changes observed"]
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
Be objective:
|
|
112
|
+
- Look for new elements, panels, forms, modals
|
|
113
|
+
- Check for page navigation, URL changes, content changes
|
|
114
|
+
- Note any visual indicators of success/failure
|
|
115
|
+
|
|
116
|
+
Be strict: Only return true if you clearly see the expected change.`,
|
|
117
|
+
images: [
|
|
118
|
+
{ label: 'BEFORE', dataUrl: beforeDataUrl },
|
|
119
|
+
{ label: 'AFTER', dataUrl: afterDataUrl }
|
|
120
|
+
]
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
const response = await this.llmFacade.llmProvider.callLLM(request);
|
|
124
|
+
|
|
125
|
+
// Parse response
|
|
126
|
+
const jsonMatch = response.answer.match(/\{[\s\S]*\}/);
|
|
127
|
+
if (!jsonMatch) {
|
|
128
|
+
logger?.(`[VerifyActionResult] ⚠️ Could not parse verification response`, 'warn');
|
|
129
|
+
return {
|
|
130
|
+
success: false,
|
|
131
|
+
error: 'Failed to parse verification response'
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
136
|
+
|
|
137
|
+
logger?.(`[VerifyActionResult] 📊 Result: ${parsed.verified ? '✅ VERIFIED' : '❌ NOT VERIFIED'}`);
|
|
138
|
+
logger?.(`[VerifyActionResult] 💭 ${parsed.reasoning}`);
|
|
139
|
+
if (parsed.changes && parsed.changes.length > 0) {
|
|
140
|
+
logger?.(`[VerifyActionResult] 📝 Changes observed:`);
|
|
141
|
+
parsed.changes.forEach((change: string) => logger?.(` - ${change}`));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
success: true,
|
|
146
|
+
data: parsed,
|
|
147
|
+
learning: `Verification result: ${parsed.verified ? 'Goal achieved' : 'Goal not achieved'}. ${parsed.reasoning}`
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
} catch (error: any) {
|
|
151
|
+
logger?.(`[VerifyActionResult] ❌ Error: ${error.message}`, 'error');
|
|
152
|
+
return {
|
|
153
|
+
success: false,
|
|
154
|
+
error: error.message
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* View Previous Screenshot Tool
|
|
3
|
+
* Access the screenshot from the previous iteration for continuity reasoning
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
|
|
7
|
+
import { ToolResult } from '../types';
|
|
8
|
+
|
|
9
|
+
export class ViewPreviousScreenshotTool implements Tool {
|
|
10
|
+
name = 'view_previous_screenshot';
|
|
11
|
+
description = 'View the screenshot from the PREVIOUS iteration to understand continuity. Common uses: (1) Verify coordinate-based clicks - look for magenta "clicked" marker to see where click landed, (2) Compare before/after states - see what changed after commands, (3) Check transient effects - see alerts/toasts that may have disappeared. Returns vision analysis of the previous screenshot.';
|
|
12
|
+
|
|
13
|
+
// LLM facade for vision analysis
|
|
14
|
+
private llmFacade?: any;
|
|
15
|
+
|
|
16
|
+
setLLMFacade(llmFacade: any): void {
|
|
17
|
+
this.llmFacade = llmFacade;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
parameters: ToolParameter[] = [
|
|
21
|
+
{
|
|
22
|
+
name: 'purpose',
|
|
23
|
+
type: 'string',
|
|
24
|
+
description: 'Why you need to see the previous screenshot. Examples: "verify coord click accuracy", "check if error message appeared then disappeared", "compare before/after form submission"',
|
|
25
|
+
required: true
|
|
26
|
+
}
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
|
|
30
|
+
const { logger } = context;
|
|
31
|
+
const purpose = params.purpose || 'Review previous page state';
|
|
32
|
+
|
|
33
|
+
// Access previousSomScreenshot from context (passed through)
|
|
34
|
+
const previousScreenshot = (context as any).previousSomScreenshot;
|
|
35
|
+
|
|
36
|
+
if (!previousScreenshot) {
|
|
37
|
+
return {
|
|
38
|
+
success: false,
|
|
39
|
+
error: 'No previous screenshot available (this is the first iteration)'
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
logger?.(`[ViewPreviousScreenshot] Analyzing previous iteration screenshot for: ${purpose}`, 'log');
|
|
45
|
+
|
|
46
|
+
// Analyze with vision LLM if available
|
|
47
|
+
let analysis = 'Previous screenshot retrieved.';
|
|
48
|
+
|
|
49
|
+
if (this.llmFacade) {
|
|
50
|
+
try {
|
|
51
|
+
const analysisPrompt = `Analyze the screenshot from the PREVIOUS iteration (before the most recent commands executed).
|
|
52
|
+
|
|
53
|
+
PURPOSE: ${purpose}
|
|
54
|
+
|
|
55
|
+
WHAT TO LOOK FOR:
|
|
56
|
+
1. **Coordinate verification**: If purpose mentions "coord" or "click", look for MAGENTA "clicked" marker (circle with yellow border)
|
|
57
|
+
- Describe marker position relative to UI elements
|
|
58
|
+
- Assess accuracy: "centered on button", "5% above target", etc.
|
|
59
|
+
|
|
60
|
+
2. **Before/after comparison**: Compare visual state with current page
|
|
61
|
+
- What changed after commands executed?
|
|
62
|
+
- New elements, removed elements, state changes?
|
|
63
|
+
|
|
64
|
+
3. **Transient effects**: Elements that may have appeared and disappeared
|
|
65
|
+
- Alerts, toasts, error messages that are now gone
|
|
66
|
+
- Loading states, spinners that finished
|
|
67
|
+
|
|
68
|
+
4. **General state**: Answer the specific question from purpose
|
|
69
|
+
|
|
70
|
+
TASK: Provide concise, specific observations relevant to the purpose.`;
|
|
71
|
+
|
|
72
|
+
const llmResponse = await this.llmFacade.llmProvider.callLLM({
|
|
73
|
+
systemPrompt: 'You are analyzing a screenshot from a previous test iteration to help with continuity reasoning. Provide specific, actionable observations.',
|
|
74
|
+
userPrompt: analysisPrompt,
|
|
75
|
+
imageUrl: previousScreenshot
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
analysis = llmResponse.answer || analysis;
|
|
79
|
+
logger?.(`[ViewPreviousScreenshot] ✓ Analysis complete`, 'log');
|
|
80
|
+
|
|
81
|
+
} catch (error: any) {
|
|
82
|
+
logger?.(`[ViewPreviousScreenshot] ⚠ Vision analysis failed: ${error.message}`, 'warn');
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
success: true,
|
|
88
|
+
data: {
|
|
89
|
+
screenshotAvailable: true,
|
|
90
|
+
purpose
|
|
91
|
+
},
|
|
92
|
+
learning: analysis
|
|
93
|
+
};
|
|
94
|
+
} catch (error: any) {
|
|
95
|
+
logger?.(`[ViewPreviousScreenshot] ✗ Failed: ${error.message}`, 'error');
|
|
96
|
+
return {
|
|
97
|
+
success: false,
|
|
98
|
+
error: `Failed to access previous screenshot: ${error.message}`
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|