testchimp-runner-core 0.0.34 → 0.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-service.d.ts +1 -4
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +155 -468
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/orchestrator/decision-parser.d.ts +18 -0
- package/dist/orchestrator/decision-parser.d.ts.map +1 -0
- package/dist/orchestrator/decision-parser.js +127 -0
- package/dist/orchestrator/decision-parser.js.map +1 -0
- package/dist/orchestrator/index.d.ts +4 -2
- package/dist/orchestrator/index.d.ts.map +1 -1
- package/dist/orchestrator/index.js +14 -2
- package/dist/orchestrator/index.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +534 -204
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +529 -247
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-som-handler.d.ts +106 -0
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
- package/dist/orchestrator/page-som-handler.js +1353 -0
- package/dist/orchestrator/page-som-handler.js.map +1 -0
- package/dist/orchestrator/som-types.d.ts +149 -0
- package/dist/orchestrator/som-types.d.ts.map +1 -0
- package/dist/orchestrator/som-types.js +87 -0
- package/dist/orchestrator/som-types.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +2 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -1
- package/dist/orchestrator/tool-registry.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +4 -1
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js +7 -2
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
- package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
- package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
- package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
- package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +23 -1
- package/dist/orchestrator/types.d.ts.map +1 -1
- package/dist/orchestrator/types.js +11 -1
- package/dist/orchestrator/types.js.map +1 -1
- package/dist/scenario-service.d.ts +5 -0
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +17 -0
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +4 -0
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +18 -3
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/testing/agent-tester.d.ts +35 -0
- package/dist/testing/agent-tester.d.ts.map +1 -0
- package/dist/testing/agent-tester.js +84 -0
- package/dist/testing/agent-tester.js.map +1 -0
- package/dist/testing/ref-translator-tester.d.ts +44 -0
- package/dist/testing/ref-translator-tester.d.ts.map +1 -0
- package/dist/testing/ref-translator-tester.js +104 -0
- package/dist/testing/ref-translator-tester.js.map +1 -0
- package/dist/utils/hierarchical-selector.d.ts +47 -0
- package/dist/utils/hierarchical-selector.d.ts.map +1 -0
- package/dist/utils/hierarchical-selector.js +212 -0
- package/dist/utils/hierarchical-selector.js.map +1 -0
- package/dist/utils/page-info-retry.d.ts +14 -0
- package/dist/utils/page-info-retry.d.ts.map +1 -0
- package/dist/utils/page-info-retry.js +60 -0
- package/dist/utils/page-info-retry.js.map +1 -0
- package/dist/utils/page-info-utils.d.ts +1 -0
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +46 -18
- package/dist/utils/page-info-utils.js.map +1 -1
- package/dist/utils/ref-attacher.d.ts +21 -0
- package/dist/utils/ref-attacher.d.ts.map +1 -0
- package/dist/utils/ref-attacher.js +149 -0
- package/dist/utils/ref-attacher.js.map +1 -0
- package/dist/utils/ref-translator.d.ts +49 -0
- package/dist/utils/ref-translator.d.ts.map +1 -0
- package/dist/utils/ref-translator.js +276 -0
- package/dist/utils/ref-translator.js.map +1 -0
- package/package.json +1 -1
- package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
- package/plandocs/exploratory-mode-support.plan.md +928 -0
- package/plandocs/journey-id-tracking-addendum.md +227 -0
- package/src/execution-service.ts +179 -596
- package/src/index.ts +10 -0
- package/src/orchestrator/decision-parser.ts +139 -0
- package/src/orchestrator/index.ts +25 -1
- package/src/orchestrator/orchestrator-agent.ts +656 -236
- package/src/orchestrator/orchestrator-prompts.ts +559 -247
- package/src/orchestrator/page-som-handler.ts +1565 -0
- package/src/orchestrator/som-types.ts +188 -0
- package/src/orchestrator/tool-registry.ts +2 -0
- package/src/orchestrator/tools/index.ts +4 -1
- package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
- package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
- package/src/orchestrator/types.ts +49 -6
- package/src/scenario-service.ts +20 -0
- package/src/scenario-worker-class.ts +24 -3
- package/src/utils/page-info-retry.ts +65 -0
- package/src/utils/page-info-utils.ts +53 -18
- package/testchimp-runner-core-0.0.35.tgz +0 -0
- package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
- package/testchimp-runner-core-0.0.33.tgz +0 -0
- /package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
- /package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
- /package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Set-of-Marks (SoM) Type Definitions
|
|
3
|
+
* Types for visual element identification and interaction
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export interface Coordinate {
|
|
7
|
+
x: number; // Percentage of viewport width: 0-100 (use 3 decimal precision, e.g., 15.625)
|
|
8
|
+
y: number; // Percentage of viewport height: 0-100 (use 3 decimal precision, e.g., 82.375)
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export enum InteractionAction {
|
|
12
|
+
// Click actions
|
|
13
|
+
CLICK = 'click',
|
|
14
|
+
DOUBLE_CLICK = 'doubleClick',
|
|
15
|
+
RIGHT_CLICK = 'rightClick',
|
|
16
|
+
|
|
17
|
+
// Mouse actions
|
|
18
|
+
HOVER = 'hover',
|
|
19
|
+
MOUSE_DOWN = 'mouseDown',
|
|
20
|
+
MOUSE_UP = 'mouseUp',
|
|
21
|
+
DRAG = 'drag',
|
|
22
|
+
|
|
23
|
+
// Input actions
|
|
24
|
+
FILL = 'fill',
|
|
25
|
+
TYPE = 'type',
|
|
26
|
+
CLEAR = 'clear',
|
|
27
|
+
|
|
28
|
+
// Keyboard actions
|
|
29
|
+
PRESS = 'press',
|
|
30
|
+
PRESS_SEQUENTIALLY = 'pressSequentially',
|
|
31
|
+
|
|
32
|
+
// Select/Checkbox actions
|
|
33
|
+
SELECT = 'select',
|
|
34
|
+
CHECK = 'check',
|
|
35
|
+
UNCHECK = 'uncheck',
|
|
36
|
+
|
|
37
|
+
// Focus/Scroll actions
|
|
38
|
+
FOCUS = 'focus',
|
|
39
|
+
BLUR = 'blur',
|
|
40
|
+
SCROLL = 'scroll',
|
|
41
|
+
SCROLL_INTO_VIEW = 'scrollIntoView',
|
|
42
|
+
|
|
43
|
+
// Navigation actions
|
|
44
|
+
NAVIGATE = 'navigate', // Go to URL (requires value field)
|
|
45
|
+
GO_BACK = 'goBack',
|
|
46
|
+
GO_FORWARD = 'goForward',
|
|
47
|
+
RELOAD = 'reload'
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface SomCommand {
|
|
51
|
+
elementRef?: string; // Integer as string: "1", "2", "42" (optional for coord-based commands)
|
|
52
|
+
action: InteractionAction;
|
|
53
|
+
|
|
54
|
+
// Coordinate-based action (use when elementRef is empty/null)
|
|
55
|
+
coord?: Coordinate; // Percentage-based (x: 0-100, y: 0-100 of viewport)
|
|
56
|
+
|
|
57
|
+
// Action-specific parameters
|
|
58
|
+
value?: string; // For fill/type/select/press actions
|
|
59
|
+
fromCoord?: Coordinate; // For drag (start) - percentage-based
|
|
60
|
+
toCoord?: Coordinate; // For drag (end) - percentage-based
|
|
61
|
+
force?: boolean; // Force action even if not actionable
|
|
62
|
+
scrollAmount?: number; // Pixels to scroll
|
|
63
|
+
scrollDirection?: 'up' | 'down' | 'left' | 'right';
|
|
64
|
+
button?: 'left' | 'right' | 'middle';
|
|
65
|
+
clickCount?: number;
|
|
66
|
+
modifiers?: Array<'Alt' | 'Control' | 'Meta' | 'Shift'>;
|
|
67
|
+
delay?: number; // Delay between keystrokes for TYPE (ms)
|
|
68
|
+
timeout?: number; // Override default timeout
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export enum CommandRunStatus {
|
|
72
|
+
SUCCESS = 'success',
|
|
73
|
+
FAILURE = 'failure'
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export interface CommandAttempt {
|
|
77
|
+
command?: string;
|
|
78
|
+
status: CommandRunStatus;
|
|
79
|
+
error?: string;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export interface DomMutation {
|
|
83
|
+
type: 'added' | 'removed' | 'modified' | 'attribute_changed';
|
|
84
|
+
elementDescription: string;
|
|
85
|
+
timestamp: number;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export interface SemanticCommandResult {
|
|
89
|
+
failedAttempts: CommandAttempt[];
|
|
90
|
+
successAttempt?: CommandAttempt;
|
|
91
|
+
error?: string;
|
|
92
|
+
status: CommandRunStatus;
|
|
93
|
+
mutations?: DomMutation[]; // Only for hover/focus, filtered for relevance
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export interface SomElement {
|
|
97
|
+
somId: string; // Simple integer as string: "1", "2", "3"
|
|
98
|
+
tag: string;
|
|
99
|
+
role: string;
|
|
100
|
+
text: string;
|
|
101
|
+
ariaLabel: string;
|
|
102
|
+
placeholder: string;
|
|
103
|
+
name: string;
|
|
104
|
+
type: string;
|
|
105
|
+
id: string;
|
|
106
|
+
className: string;
|
|
107
|
+
bbox: { x: number; y: number; width: number; height: number };
|
|
108
|
+
parent?: {
|
|
109
|
+
tag: string;
|
|
110
|
+
role: string;
|
|
111
|
+
className: string;
|
|
112
|
+
text: string;
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Typed selector (no string parsing needed)
|
|
118
|
+
* Supports chaining: parent.child for scoped selectors
|
|
119
|
+
*/
|
|
120
|
+
export interface TypedSelector {
|
|
121
|
+
type: 'id' | 'testId' | 'label' | 'role' | 'placeholder' | 'text' | 'title' | 'altText' | 'name' | 'locator';
|
|
122
|
+
value: string;
|
|
123
|
+
roleOptions?: { name?: string }; // For getByRole
|
|
124
|
+
parent?: TypedSelector; // For chaining: page.locator(parent).locator(this)
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Verification types for expect assertions
|
|
129
|
+
*/
|
|
130
|
+
export enum VerificationType {
|
|
131
|
+
// Text verifications
|
|
132
|
+
TEXT_CONTAINS = 'textContains',
|
|
133
|
+
TEXT_EQUALS = 'textEquals',
|
|
134
|
+
|
|
135
|
+
// Input verifications
|
|
136
|
+
VALUE_EQUALS = 'valueEquals',
|
|
137
|
+
VALUE_EMPTY = 'valueEmpty',
|
|
138
|
+
|
|
139
|
+
// Visibility verifications
|
|
140
|
+
IS_VISIBLE = 'isVisible',
|
|
141
|
+
IS_HIDDEN = 'isHidden',
|
|
142
|
+
|
|
143
|
+
// State verifications
|
|
144
|
+
IS_ENABLED = 'isEnabled',
|
|
145
|
+
IS_DISABLED = 'isDisabled',
|
|
146
|
+
IS_CHECKED = 'isChecked',
|
|
147
|
+
IS_UNCHECKED = 'isUnchecked',
|
|
148
|
+
|
|
149
|
+
// Count verifications (for lists, tables, etc.)
|
|
150
|
+
COUNT_EQUALS = 'countEquals',
|
|
151
|
+
COUNT_GREATER_THAN = 'countGreaterThan',
|
|
152
|
+
COUNT_LESS_THAN = 'countLessThan',
|
|
153
|
+
|
|
154
|
+
// Attribute verifications
|
|
155
|
+
HAS_CLASS = 'hasClass',
|
|
156
|
+
HAS_ATTRIBUTE = 'hasAttribute'
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* SoM verification command for expect assertions
|
|
161
|
+
*/
|
|
162
|
+
export interface SomVerification {
|
|
163
|
+
verificationType: VerificationType;
|
|
164
|
+
elementRef?: string; // SoM ID (e.g., "3") - optional for count verifications
|
|
165
|
+
expected?: string | number; // Expected value/text/count
|
|
166
|
+
description?: string; // Human-readable description
|
|
167
|
+
selector?: string; // For count verifications on non-SoM elements (CSS selector)
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Union type: commands array can contain both actions and verifications
|
|
172
|
+
*/
|
|
173
|
+
export type SomCommandOrVerification = SomCommand | SomVerification;
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Type guard to check if command is a verification
|
|
177
|
+
*/
|
|
178
|
+
export function isSomVerification(cmd: SomCommandOrVerification): cmd is SomVerification {
|
|
179
|
+
return 'verificationType' in cmd;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Type guard to check if command is an action
|
|
184
|
+
*/
|
|
185
|
+
export function isSomCommand(cmd: SomCommandOrVerification): cmd is SomCommand {
|
|
186
|
+
return 'action' in cmd;
|
|
187
|
+
}
|
|
188
|
+
|
|
@@ -40,6 +40,8 @@ export interface ToolExecutionContext {
|
|
|
40
40
|
memory: any; // JourneyMemory
|
|
41
41
|
stepNumber: number;
|
|
42
42
|
logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
|
|
43
|
+
previousSomScreenshot?: string; // For view_previous_screenshot tool
|
|
44
|
+
somHandler?: any; // PageSoMHandler for refresh_som_markers tool
|
|
43
45
|
}
|
|
44
46
|
|
|
45
47
|
/**
|
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Tool exports -
|
|
2
|
+
* Tool exports - 8 information-gathering tools
|
|
3
3
|
* Note: State changes (navigation, clicks, fills) are done via Playwright commands, not tools
|
|
4
|
+
* Ref-based commands (getByRef) are translated to Playwright at execution time
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
7
|
export { TakeScreenshotTool } from './take-screenshot';
|
|
8
|
+
export { ViewPreviousScreenshotTool } from './view-previous-screenshot';
|
|
9
|
+
export { RefreshSomMarkersTool } from './refresh-som-markers';
|
|
7
10
|
export { RecallHistoryTool } from './recall-history';
|
|
8
11
|
export { InspectPageTool } from './inspect-page';
|
|
9
12
|
export { CheckPageReadyTool } from './check-page-ready';
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
|
|
2
|
+
import { ToolResult } from '../types';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Tool to refresh SoM markers when they appear outdated or misaligned
|
|
6
|
+
*/
|
|
7
|
+
export class RefreshSomMarkersTool implements Tool {
|
|
8
|
+
name = 'refresh_som_markers';
|
|
9
|
+
description = 'Manually refresh the Set-of-Marks visual markers on the page. Use when: (1) Markers appear misaligned with actual UI elements, (2) Page content has changed but markers are stale (e.g., after dynamic content loads), (3) You suspect markers are from a previous page state. Returns updated screenshot with fresh markers.';
|
|
10
|
+
|
|
11
|
+
parameters: ToolParameter[] = [
|
|
12
|
+
{
|
|
13
|
+
name: 'reason',
|
|
14
|
+
type: 'string',
|
|
15
|
+
description: 'Why you need to refresh markers. Examples: "dropdown expanded but markers still show closed state", "new content loaded but not marked", "markers seem to point to wrong elements"',
|
|
16
|
+
required: true
|
|
17
|
+
}
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
|
|
21
|
+
const { logger, page, somHandler } = context;
|
|
22
|
+
const reason = params.reason || 'Markers appear outdated';
|
|
23
|
+
|
|
24
|
+
if (!somHandler) {
|
|
25
|
+
return {
|
|
26
|
+
success: false,
|
|
27
|
+
error: 'SoM mode not enabled - refresh markers tool unavailable.'
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (!page) {
|
|
32
|
+
return {
|
|
33
|
+
success: false,
|
|
34
|
+
error: 'No page context available.'
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
logger?.(`[RefreshSomMarkers] Refreshing markers due to: ${reason}`, 'log');
|
|
40
|
+
|
|
41
|
+
// Ensure somHandler has the latest page reference
|
|
42
|
+
somHandler.setPage(page);
|
|
43
|
+
|
|
44
|
+
// Re-scan page and update markers
|
|
45
|
+
await somHandler.updateSom();
|
|
46
|
+
logger?.(`[RefreshSomMarkers] ✓ Markers updated`, 'log');
|
|
47
|
+
|
|
48
|
+
// Capture fresh screenshot with new markers (viewport only - cheaper than full page)
|
|
49
|
+
const freshScreenshot = await somHandler.getScreenshot(true, false, 60);
|
|
50
|
+
logger?.(`[RefreshSomMarkers] ✓ Fresh screenshot captured (viewport)`, 'log');
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
success: true,
|
|
54
|
+
data: {
|
|
55
|
+
screenshot: freshScreenshot,
|
|
56
|
+
reason
|
|
57
|
+
},
|
|
58
|
+
learning: `SoM markers refreshed. New screenshot shows current page state with updated element markers. Reason: ${reason}`
|
|
59
|
+
};
|
|
60
|
+
} catch (error: any) {
|
|
61
|
+
logger?.(`[RefreshSomMarkers] ✗ Failed: ${error.message}`, 'error');
|
|
62
|
+
return {
|
|
63
|
+
success: false,
|
|
64
|
+
error: `Failed to refresh markers: ${error.message}`
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* View Previous Screenshot Tool
|
|
3
|
+
* Access the screenshot from the previous iteration for continuity reasoning
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
|
|
7
|
+
import { ToolResult } from '../types';
|
|
8
|
+
|
|
9
|
+
export class ViewPreviousScreenshotTool implements Tool {
|
|
10
|
+
name = 'view_previous_screenshot';
|
|
11
|
+
description = 'View the screenshot from the PREVIOUS iteration to understand continuity. Common uses: (1) Verify coordinate-based clicks - look for magenta "clicked" marker to see where click landed, (2) Compare before/after states - see what changed after commands, (3) Check transient effects - see alerts/toasts that may have disappeared. Returns vision analysis of the previous screenshot.';
|
|
12
|
+
|
|
13
|
+
// LLM facade for vision analysis
|
|
14
|
+
private llmFacade?: any;
|
|
15
|
+
|
|
16
|
+
setLLMFacade(llmFacade: any): void {
|
|
17
|
+
this.llmFacade = llmFacade;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
parameters: ToolParameter[] = [
|
|
21
|
+
{
|
|
22
|
+
name: 'purpose',
|
|
23
|
+
type: 'string',
|
|
24
|
+
description: 'Why you need to see the previous screenshot. Examples: "verify coord click accuracy", "check if error message appeared then disappeared", "compare before/after form submission"',
|
|
25
|
+
required: true
|
|
26
|
+
}
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
|
|
30
|
+
const { logger } = context;
|
|
31
|
+
const purpose = params.purpose || 'Review previous page state';
|
|
32
|
+
|
|
33
|
+
// Access previousSomScreenshot from context (passed through)
|
|
34
|
+
const previousScreenshot = (context as any).previousSomScreenshot;
|
|
35
|
+
|
|
36
|
+
if (!previousScreenshot) {
|
|
37
|
+
return {
|
|
38
|
+
success: false,
|
|
39
|
+
error: 'No previous screenshot available (this is the first iteration)'
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
logger?.(`[ViewPreviousScreenshot] Analyzing previous iteration screenshot for: ${purpose}`, 'log');
|
|
45
|
+
|
|
46
|
+
// Analyze with vision LLM if available
|
|
47
|
+
let analysis = 'Previous screenshot retrieved.';
|
|
48
|
+
|
|
49
|
+
if (this.llmFacade) {
|
|
50
|
+
try {
|
|
51
|
+
const analysisPrompt = `Analyze the screenshot from the PREVIOUS iteration (before the most recent commands executed).
|
|
52
|
+
|
|
53
|
+
PURPOSE: ${purpose}
|
|
54
|
+
|
|
55
|
+
WHAT TO LOOK FOR:
|
|
56
|
+
1. **Coordinate verification**: If purpose mentions "coord" or "click", look for MAGENTA "clicked" marker (circle with yellow border)
|
|
57
|
+
- Describe marker position relative to UI elements
|
|
58
|
+
- Assess accuracy: "centered on button", "5% above target", etc.
|
|
59
|
+
|
|
60
|
+
2. **Before/after comparison**: Compare visual state with current page
|
|
61
|
+
- What changed after commands executed?
|
|
62
|
+
- New elements, removed elements, state changes?
|
|
63
|
+
|
|
64
|
+
3. **Transient effects**: Elements that may have appeared and disappeared
|
|
65
|
+
- Alerts, toasts, error messages that are now gone
|
|
66
|
+
- Loading states, spinners that finished
|
|
67
|
+
|
|
68
|
+
4. **General state**: Answer the specific question from purpose
|
|
69
|
+
|
|
70
|
+
TASK: Provide concise, specific observations relevant to the purpose.`;
|
|
71
|
+
|
|
72
|
+
const llmResponse = await this.llmFacade.llmProvider.callLLM({
|
|
73
|
+
systemPrompt: 'You are analyzing a screenshot from a previous test iteration to help with continuity reasoning. Provide specific, actionable observations.',
|
|
74
|
+
userPrompt: analysisPrompt,
|
|
75
|
+
imageUrl: previousScreenshot
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
analysis = llmResponse.answer || analysis;
|
|
79
|
+
logger?.(`[ViewPreviousScreenshot] ✓ Analysis complete`, 'log');
|
|
80
|
+
|
|
81
|
+
} catch (error: any) {
|
|
82
|
+
logger?.(`[ViewPreviousScreenshot] ⚠ Vision analysis failed: ${error.message}`, 'warn');
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
success: true,
|
|
88
|
+
data: {
|
|
89
|
+
screenshotAvailable: true,
|
|
90
|
+
purpose
|
|
91
|
+
},
|
|
92
|
+
learning: analysis
|
|
93
|
+
};
|
|
94
|
+
} catch (error: any) {
|
|
95
|
+
logger?.(`[ViewPreviousScreenshot] ✗ Failed: ${error.message}`, 'error');
|
|
96
|
+
return {
|
|
97
|
+
success: false,
|
|
98
|
+
error: `Failed to access previous screenshot: ${error.message}`
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
@@ -108,7 +108,7 @@ export interface AgentDecision {
|
|
|
108
108
|
needsToolResults?: boolean; // Wait for tool results before proceeding with commands
|
|
109
109
|
|
|
110
110
|
// Command batch (executed sequentially)
|
|
111
|
-
commands?: string[];
|
|
111
|
+
commands?: string[]; // Plain Playwright commands
|
|
112
112
|
commandReasoning?: string;
|
|
113
113
|
|
|
114
114
|
// Self-reflection for next iteration
|
|
@@ -147,6 +147,12 @@ export interface AgentDecision {
|
|
|
147
147
|
issue: 'prior_incomplete' | 'already_done' | 'wrong_order' | null;
|
|
148
148
|
explanation: string; // Why agent thinks step order is off
|
|
149
149
|
};
|
|
150
|
+
|
|
151
|
+
// Meta-learning: Suggested prompt improvements based on journey learnings
|
|
152
|
+
debugInfo?: {
|
|
153
|
+
suggestedPromptUpdates?: string; // Confident suggestions for improving system/user prompts
|
|
154
|
+
reasoning?: string; // Why these updates would help
|
|
155
|
+
};
|
|
150
156
|
}
|
|
151
157
|
|
|
152
158
|
/**
|
|
@@ -172,16 +178,34 @@ export interface AgentContext {
|
|
|
172
178
|
experiences: string[];
|
|
173
179
|
extractedData: Record<string, string>;
|
|
174
180
|
|
|
175
|
-
//
|
|
176
|
-
previousIterationGuidance?: SelfReflection;
|
|
177
|
-
|
|
178
|
-
// Note from previous iteration (NEW - tactical continuity)
|
|
181
|
+
// Note from previous iteration (tactical continuity)
|
|
179
182
|
noteFromPreviousIteration?: NoteToFutureSelf;
|
|
180
183
|
|
|
184
|
+
// Test data / credentials for exploration
|
|
185
|
+
testDataPrompt?: string;
|
|
186
|
+
|
|
187
|
+
// SoM (Set-of-Marks) screenshot with visual markers
|
|
188
|
+
somScreenshot?: string; // Data URL of screenshot with SoM markers
|
|
189
|
+
somElementMap?: string; // Text map of SoM IDs to element details for disambiguation
|
|
190
|
+
|
|
191
|
+
// Repair mode context (undefined for script gen/exploration)
|
|
192
|
+
priorSteps?: string[]; // Steps completed before current (e.g., ["1. Navigate", "2. Login"])
|
|
193
|
+
nextSteps?: string[]; // Steps after current (e.g., ["5. Submit", "6. Verify"])
|
|
194
|
+
|
|
181
195
|
// Tool results from this iteration (if any)
|
|
182
196
|
toolResults?: Record<string, ToolResult>;
|
|
183
197
|
}
|
|
184
198
|
|
|
199
|
+
/**
|
|
200
|
+
* Exploration mode configuration
|
|
201
|
+
*/
|
|
202
|
+
export interface ExplorationMode {
|
|
203
|
+
enabled: boolean; // Whether exploration mode is active
|
|
204
|
+
explorationPrompt: string; // Journey-specific focus: "Explore Dashboard and test all widgets"
|
|
205
|
+
testDataPrompt?: string; // Test data, credentials context
|
|
206
|
+
maxExplorationSteps?: number; // Budget limit (default: 50) - agent can stop earlier
|
|
207
|
+
}
|
|
208
|
+
|
|
185
209
|
/**
|
|
186
210
|
* Configurable guardrails
|
|
187
211
|
*/
|
|
@@ -211,6 +235,15 @@ export interface AgentConfig {
|
|
|
211
235
|
// Allowed actions
|
|
212
236
|
allowedExplorationActions?: string[]; // Default: ['hover', 'click_info', 'click_menu', 'focus'] (Phase 2)
|
|
213
237
|
allowedDomains?: string[]; // For navigate_to_url validation
|
|
238
|
+
|
|
239
|
+
// Feature flags
|
|
240
|
+
enableCoordinateMode?: boolean; // Default: false (experimental - disable until stable)
|
|
241
|
+
useSoM?: boolean; // Default: true (Set-of-Marks visual mode)
|
|
242
|
+
somUseSomIdBasedCommands?: boolean; // Default: false (use semantic selectors first)
|
|
243
|
+
somRestrictCoordinates?: boolean; // Default: false (if true, strongly discourage coord commands except as absolute last resort)
|
|
244
|
+
|
|
245
|
+
// Exploration mode (NEW)
|
|
246
|
+
explorationMode?: ExplorationMode;
|
|
214
247
|
}
|
|
215
248
|
|
|
216
249
|
/**
|
|
@@ -243,6 +276,16 @@ export const DEFAULT_AGENT_CONFIG: Required<AgentConfig> = {
|
|
|
243
276
|
commandTimeout: 30000,
|
|
244
277
|
explorationTimeout: 2000,
|
|
245
278
|
allowedExplorationActions: ['hover', 'click_info', 'click_menu', 'focus'],
|
|
246
|
-
allowedDomains: []
|
|
279
|
+
allowedDomains: [],
|
|
280
|
+
enableCoordinateMode: false, // Disabled by default - experimental feature
|
|
281
|
+
useSoM: true, // Enabled by default - use Set-of-Marks visual mode
|
|
282
|
+
somUseSomIdBasedCommands: false, // Use semantic selectors first
|
|
283
|
+
somRestrictCoordinates: false, // Allow coords as valid fallback (for exploration)
|
|
284
|
+
explorationMode: {
|
|
285
|
+
enabled: false,
|
|
286
|
+
explorationPrompt: '',
|
|
287
|
+
testDataPrompt: undefined,
|
|
288
|
+
maxExplorationSteps: 50
|
|
289
|
+
}
|
|
247
290
|
};
|
|
248
291
|
|
package/src/scenario-service.ts
CHANGED
|
@@ -218,6 +218,26 @@ export class ScenarioService extends EventEmitter {
|
|
|
218
218
|
this.processNextJob();
|
|
219
219
|
}
|
|
220
220
|
|
|
221
|
+
/**
|
|
222
|
+
* Execute exploration mode using orchestrator
|
|
223
|
+
* Requires orchestrator to be enabled via useOrchestrator option
|
|
224
|
+
*/
|
|
225
|
+
async executeExploration(page: any, explorationConfig: any, jobId: string): Promise<any> {
|
|
226
|
+
if (!this.useOrchestrator) {
|
|
227
|
+
throw new Error('Exploration mode requires orchestrator to be enabled');
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Get an available worker (or create one if needed)
|
|
231
|
+
let worker = this.workers.find(w => !this.busyWorkers.has(w));
|
|
232
|
+
if (!worker) {
|
|
233
|
+
await this.createWorker();
|
|
234
|
+
worker = this.workers[this.workers.length - 1];
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Execute exploration via worker's orchestrator
|
|
238
|
+
return worker.executeExploration(page, explorationConfig, jobId);
|
|
239
|
+
}
|
|
240
|
+
|
|
221
241
|
async shutdown(): Promise<void> {
|
|
222
242
|
this.log('Shutting down scenario service...');
|
|
223
243
|
|
|
@@ -18,6 +18,8 @@ import {
|
|
|
18
18
|
JourneyMemory,
|
|
19
19
|
AgentConfig,
|
|
20
20
|
TakeScreenshotTool,
|
|
21
|
+
ViewPreviousScreenshotTool,
|
|
22
|
+
RefreshSomMarkersTool,
|
|
21
23
|
RecallHistoryTool,
|
|
22
24
|
InspectPageTool,
|
|
23
25
|
CheckPageReadyTool,
|
|
@@ -105,11 +107,18 @@ export class ScenarioWorker extends EventEmitter {
|
|
|
105
107
|
const takeScreenshotTool = new TakeScreenshotTool();
|
|
106
108
|
takeScreenshotTool.setLLMFacade(this.llmFacade); // Inject LLM for vision analysis
|
|
107
109
|
|
|
110
|
+
const viewPreviousScreenshotTool = new ViewPreviousScreenshotTool();
|
|
111
|
+
viewPreviousScreenshotTool.setLLMFacade(this.llmFacade); // Inject LLM for vision analysis
|
|
112
|
+
|
|
113
|
+
const refreshSomMarkersTool = new RefreshSomMarkersTool();
|
|
114
|
+
|
|
108
115
|
const verifyActionTool = new VerifyActionResultTool();
|
|
109
116
|
verifyActionTool.setLLMFacade(this.llmFacade); // Inject LLM for vision comparison
|
|
110
117
|
|
|
111
|
-
// Register
|
|
118
|
+
// Register 8 information-gathering tools (state changes via Playwright commands)
|
|
112
119
|
this.toolRegistry.register(takeScreenshotTool);
|
|
120
|
+
this.toolRegistry.register(viewPreviousScreenshotTool);
|
|
121
|
+
this.toolRegistry.register(refreshSomMarkersTool);
|
|
113
122
|
this.toolRegistry.register(new RecallHistoryTool());
|
|
114
123
|
this.toolRegistry.register(new InspectPageTool());
|
|
115
124
|
this.toolRegistry.register(new CheckPageReadyTool());
|
|
@@ -317,12 +326,12 @@ export class ScenarioWorker extends EventEmitter {
|
|
|
317
326
|
page = job.existingPage;
|
|
318
327
|
} else {
|
|
319
328
|
// Create new browser (default behavior for local clients)
|
|
320
|
-
//
|
|
329
|
+
// Let the playwrightConfig control headless mode (don't override with hardcoded value)
|
|
321
330
|
// Create logger function from outputChannel for browser initialization
|
|
322
331
|
const logger = this.outputChannel ? (message: string, level?: 'log' | 'error' | 'warn') => {
|
|
323
332
|
this.outputChannel!.appendLine(`[Browser] ${message}`);
|
|
324
333
|
} : undefined;
|
|
325
|
-
const browserInstance = await initializeBrowser(job.playwrightConfig,
|
|
334
|
+
const browserInstance = await initializeBrowser(job.playwrightConfig, undefined, undefined, logger);
|
|
326
335
|
browser = browserInstance.browser;
|
|
327
336
|
context = browserInstance.context;
|
|
328
337
|
page = browserInstance.page;
|
|
@@ -1082,6 +1091,18 @@ export class ScenarioWorker extends EventEmitter {
|
|
|
1082
1091
|
|
|
1083
1092
|
|
|
1084
1093
|
|
|
1094
|
+
/**
|
|
1095
|
+
* Execute exploration mode using orchestrator
|
|
1096
|
+
*/
|
|
1097
|
+
async executeExploration(page: any, explorationConfig: any, jobId: string): Promise<any> {
|
|
1098
|
+
if (!this.useOrchestrator || !this.orchestratorAgent) {
|
|
1099
|
+
throw new Error('Orchestrator not available - exploration mode requires orchestrator');
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
// Execute exploration via orchestrator
|
|
1103
|
+
return this.orchestratorAgent.executeExploration(page, explorationConfig, jobId);
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1085
1106
|
async cleanup(): Promise<void> {
|
|
1086
1107
|
this.initialized = false;
|
|
1087
1108
|
this.sessionId = null;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Page Info Retry Utility
|
|
3
|
+
* Handles adaptive page loading with exponential backoff
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { getEnhancedPageInfo, PageInfo } from './page-info-utils';
|
|
7
|
+
|
|
8
|
+
export class PageInfoRetry {
|
|
9
|
+
/**
|
|
10
|
+
* Get page info with retry logic - waits for interactive elements to appear
|
|
11
|
+
* Uses exponential backoff to handle slow-loading React/Vue/Angular apps
|
|
12
|
+
*/
|
|
13
|
+
static async getWithRetry(page: any, maxAttempts: number = 6): Promise<PageInfo> {
|
|
14
|
+
// Wait for initial page load (generous timeout for slow apps)
|
|
15
|
+
try {
|
|
16
|
+
await page.waitForLoadState('domcontentloaded', { timeout: 20000 }).catch(() => {});
|
|
17
|
+
} catch (waitError) {
|
|
18
|
+
// Continue even if wait fails
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
let attempt = 0;
|
|
22
|
+
let backoffMs = 1000; // Start with 1 second (adequate for most sites)
|
|
23
|
+
|
|
24
|
+
while (attempt < maxAttempts) {
|
|
25
|
+
attempt++;
|
|
26
|
+
|
|
27
|
+
// Try to extract page info
|
|
28
|
+
const pageInfo = await getEnhancedPageInfo(page);
|
|
29
|
+
|
|
30
|
+
// If we got a reasonable number of elements, we're done
|
|
31
|
+
if (pageInfo.interactiveElements && pageInfo.interactiveElements.length >= 3) {
|
|
32
|
+
if (attempt > 1) {
|
|
33
|
+
console.log(`[PageInfoRetry] ✓ Page elements loaded after ${attempt} attempts`);
|
|
34
|
+
}
|
|
35
|
+
return pageInfo;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// If this is the last attempt, return what we have
|
|
39
|
+
if (attempt >= maxAttempts) {
|
|
40
|
+
const totalWait = this.calculateTotalWaitTime(maxAttempts);
|
|
41
|
+
console.log(`[PageInfoRetry] ⚠️ Only found ${pageInfo.interactiveElements?.length || 0} elements after ${maxAttempts} attempts (total wait: ~${totalWait}ms)`);
|
|
42
|
+
return pageInfo;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Wait with exponential backoff before retrying
|
|
46
|
+
console.log(`[PageInfoRetry] Only ${pageInfo.interactiveElements?.length || 0} elements found (attempt ${attempt}/${maxAttempts}), waiting ${backoffMs}ms...`);
|
|
47
|
+
await page.waitForTimeout(backoffMs);
|
|
48
|
+
backoffMs = Math.min(backoffMs * 1.6, 15000); // Cap at 15 seconds per attempt
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Fallback (shouldn't reach here, but for type safety)
|
|
52
|
+
return await getEnhancedPageInfo(page);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
private static calculateTotalWaitTime(maxAttempts: number): number {
|
|
56
|
+
let total = 0;
|
|
57
|
+
let backoffMs = 1000;
|
|
58
|
+
for (let i = 1; i < maxAttempts; i++) {
|
|
59
|
+
total += backoffMs;
|
|
60
|
+
backoffMs = Math.min(backoffMs * 1.6, 15000);
|
|
61
|
+
}
|
|
62
|
+
return Math.round(total);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|