testchimp-runner-core 0.0.40 → 0.0.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +1 -3
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +7 -6
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -11
- package/dist/index.js.map +1 -1
- package/dist/orchestrator/decision-parser.d.ts.map +1 -1
- package/dist/orchestrator/decision-parser.js +16 -0
- package/dist/orchestrator/decision-parser.js.map +1 -1
- package/dist/orchestrator/index.d.ts +4 -2
- package/dist/orchestrator/index.d.ts.map +1 -1
- package/dist/orchestrator/index.js +10 -8
- package/dist/orchestrator/index.js.map +1 -1
- package/dist/orchestrator/orchestrator-agent.d.ts +10 -4
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-agent.js +376 -118
- package/dist/orchestrator/orchestrator-agent.js.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.d.ts +2 -10
- package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
- package/dist/orchestrator/orchestrator-prompts.js +343 -452
- package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
- package/dist/orchestrator/page-loading-utils.d.ts +15 -0
- package/dist/orchestrator/page-loading-utils.d.ts.map +1 -0
- package/dist/orchestrator/page-loading-utils.js +115 -0
- package/dist/orchestrator/page-loading-utils.js.map +1 -0
- package/dist/orchestrator/page-som-handler.d.ts +2 -1
- package/dist/orchestrator/page-som-handler.d.ts.map +1 -1
- package/dist/orchestrator/page-som-handler.js +250 -33
- package/dist/orchestrator/page-som-handler.js.map +1 -1
- package/dist/orchestrator/site-learnings-utils.d.ts +31 -0
- package/dist/orchestrator/site-learnings-utils.d.ts.map +1 -0
- package/dist/orchestrator/site-learnings-utils.js +175 -0
- package/dist/orchestrator/site-learnings-utils.js.map +1 -0
- package/dist/orchestrator/som-types.d.ts +2 -0
- package/dist/orchestrator/som-types.d.ts.map +1 -1
- package/dist/orchestrator/som-types.js.map +1 -1
- package/dist/orchestrator/tools/index.d.ts +9 -8
- package/dist/orchestrator/tools/index.d.ts.map +1 -1
- package/dist/orchestrator/tools/index.js +10 -15
- package/dist/orchestrator/tools/index.js.map +1 -1
- package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -1
- package/dist/orchestrator/tools/take-screenshot.js +10 -1
- package/dist/orchestrator/tools/take-screenshot.js.map +1 -1
- package/dist/orchestrator/types.d.ts +54 -9
- package/dist/orchestrator/types.d.ts.map +1 -1
- package/dist/orchestrator/types.js.map +1 -1
- package/dist/progress-reporter.d.ts +23 -2
- package/dist/progress-reporter.d.ts.map +1 -1
- package/dist/progress-reporter.js.map +1 -1
- package/dist/prompts.d.ts.map +1 -1
- package/dist/prompts.js +14 -3
- package/dist/prompts.js.map +1 -1
- package/dist/scenario-service.d.ts +3 -3
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +6 -5
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +7 -3
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +94 -21
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/types.d.ts +4 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
- package/dist/testing/agent-tester.d.ts +0 -35
- package/dist/testing/agent-tester.d.ts.map +0 -1
- package/dist/testing/agent-tester.js +0 -84
- package/dist/testing/agent-tester.js.map +0 -1
- package/dist/testing/ref-translator-tester.d.ts +0 -44
- package/dist/testing/ref-translator-tester.d.ts.map +0 -1
- package/dist/testing/ref-translator-tester.js +0 -104
- package/dist/testing/ref-translator-tester.js.map +0 -1
- package/dist/utils/hierarchical-selector.d.ts +0 -47
- package/dist/utils/hierarchical-selector.d.ts.map +0 -1
- package/dist/utils/hierarchical-selector.js +0 -212
- package/dist/utils/hierarchical-selector.js.map +0 -1
- package/dist/utils/ref-attacher.d.ts +0 -21
- package/dist/utils/ref-attacher.d.ts.map +0 -1
- package/dist/utils/ref-attacher.js +0 -149
- package/dist/utils/ref-attacher.js.map +0 -1
- package/dist/utils/ref-translator.d.ts +0 -49
- package/dist/utils/ref-translator.d.ts.map +0 -1
- package/dist/utils/ref-translator.js +0 -276
- package/dist/utils/ref-translator.js.map +0 -1
|
@@ -5,120 +5,115 @@
|
|
|
5
5
|
*/
|
|
6
6
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
7
|
exports.OrchestratorPrompts = void 0;
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
8
|
+
// ========== UTILITY FUNCTIONS ==========
|
|
9
|
+
/**
|
|
10
|
+
* Truncate URL to avoid bloating prompts
|
|
11
|
+
*/
|
|
12
|
+
function truncateUrl(url, maxLength = 300) {
|
|
13
|
+
if (!url || url.length <= maxLength)
|
|
14
|
+
return url;
|
|
15
|
+
return url.substring(0, maxLength) + '...';
|
|
16
|
+
}
|
|
17
|
+
// ========== SHARED PROMPT SECTIONS (to avoid duplication) ==========
|
|
18
|
+
const DISCRETE_EXPERIENCE_LOOP = `DISCRETE EXPERIENCE LOOP (YOU ARE STATELESS - NO SCREENSHOT MEMORY):
|
|
19
|
+
You operate in iterations: receive state → decide → sleep → wake with NEW state.
|
|
20
|
+
Each iteration you receive: current screenshot, past actions, collected memories, and noteToFutureSelf you wrote in last iteration.
|
|
21
|
+
CRITICAL: You do NOT see previous screenshots (unless you specifically request) - only text descriptions of past actions!
|
|
22
|
+
Write explicit EXPECTED STATE in noteToFutureSelf so your future self can verify against the future screenshot.
|
|
23
|
+
Example: "Clicked hamburger menu (was collapsed). EXPECT: menu expanded with 'settings' items visible"`;
|
|
24
|
+
const SITE_LEARNINGS_GUIDE = `SITE LEARNINGS: Build mental model (persistent across journeys)
|
|
25
|
+
|
|
26
|
+
NAMING (check SCREEN STATE VOCABULARY first):
|
|
27
|
+
- screen: REUSE from vocabulary ("login", "dashboard") or create if new. NEVER: "about:blank", "loading"
|
|
28
|
+
- state: INFER from COMPLETED STEPS (max 3 GENERIC dims - user role/context, NOT specific data)
|
|
29
|
+
Dimensions describe USER STATE (logged-in, admin, cart-empty), NOT data values (workspace names, usernames, products)
|
|
30
|
+
✅ "logged-in,admin", "guest,cart-empty"
|
|
31
|
+
❌ "testchimp-selected" (workspace name is data!), "user-john" (username is data!)
|
|
32
|
+
|
|
33
|
+
LEARNINGS (semantic insights that persist):
|
|
34
|
+
Focus on BEHAVIOR and PATTERNS that will help on future runs, when SoM IDs are completely different.
|
|
35
|
+
|
|
36
|
+
WHY NO SOM IDS: SoM markers (1, 2, [5], [6], element 9) regenerate EVERY page load - different numbers each time!
|
|
37
|
+
A learning with "element 9" is useless on next run when that same button is "element 3".
|
|
38
|
+
|
|
39
|
+
STORE: Non-obvious behavior, interaction quirks, selector strategies
|
|
40
|
+
✅ "Dropdown opens on caret icon click, not container div"
|
|
41
|
+
✅ "Delete requires overflow menu (not directly visible)"
|
|
42
|
+
✅ "Search triggers on Enter, not auto-search while typing"
|
|
43
|
+
|
|
44
|
+
DON'T STORE: Element catalogs, SoM IDs, obvious facts, attribute documentation
|
|
45
|
+
❌ "Continue with Google button" (element listing - adds no behavioral value)
|
|
46
|
+
❌ "opener is SoM id [6]" (ephemeral - will be different ID next run!)
|
|
47
|
+
❌ "input name=emailOrUsername" (documenting HTML - not useful)
|
|
48
|
+
|
|
49
|
+
Ask: "Will this help when SoM IDs are completely different?" NO → don't store
|
|
50
|
+
|
|
51
|
+
STEP COMPLETION: Check ALL signals (memory, URL, screenshot, noteToFutureSelf) vs step goal.
|
|
52
|
+
Process: Expected (from noteToSelf) → Actual (commands success? URL changed? content visible?) → Decide
|
|
53
|
+
- Commands ✓ + URL changed + expected page → COMPLETE
|
|
54
|
+
- Commands ✓ + error shown → CONTINUE (retry)
|
|
55
|
+
- Command failed → CONTINUE (different selector)
|
|
56
|
+
|
|
57
|
+
`;
|
|
58
|
+
const NOTETOSELF_GUIDE = `NOTETOSELF: Capture thinking/intentions + EXPLICIT EXPECTED STATE for verification.
|
|
59
|
+
✅ "Clicked menu. EXPECT: expanded with 'Settings' visible"
|
|
60
|
+
❌ "Click menu" (future can't verify!)
|
|
61
|
+
Include: strategy, backups if fails, what to verify next.`;
|
|
62
|
+
// Response schema - exact TypeScript interface the agent must follow
|
|
63
|
+
const RESPONSE_SCHEMA = `
|
|
64
|
+
RESPONSE FORMAT (exact TypeScript interface):
|
|
65
|
+
|
|
66
|
+
interface AgentDecision {
|
|
67
|
+
// Required fields
|
|
68
|
+
status: 'complete' | 'stuck' | 'infeasible' | 'continue';
|
|
69
|
+
statusReasoning: string;
|
|
70
|
+
reasoning: string;
|
|
61
71
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
],
|
|
72
|
+
// Screen identification (REQUIRED - always identify current screen)
|
|
73
|
+
screenState: {
|
|
74
|
+
screen: string; // Screen name - REUSE from SCREEN STATE KNOWLEDGE if possible
|
|
75
|
+
state: string; // State dimensions: "admin", "admin,empty-cart", "" for default
|
|
76
|
+
};
|
|
68
77
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
SELECTOR STRATEGIES (use in order of preference):
|
|
100
|
-
1. getByRole: page.getByRole('button', {name: 'Login'})
|
|
101
|
-
2. getByLabel: page.getByLabel('Email address')
|
|
102
|
-
3. getByPlaceholder: page.getByPlaceholder('Enter email')
|
|
103
|
-
4. getByText: page.getByText('Sign in')
|
|
104
|
-
5. CSS: page.locator('input[name="email"]')
|
|
105
|
-
6. Test IDs: page.getByTestId('login-button')
|
|
106
|
-
|
|
107
|
-
Example login commands:
|
|
108
|
-
{
|
|
109
|
-
"commands": [
|
|
110
|
-
"await page.getByLabel('Email').fill('user@test.com')",
|
|
111
|
-
"await page.getByLabel('Password').fill('secret123')",
|
|
112
|
-
"await page.getByRole('button', {name: 'Submit'}).click()"
|
|
113
|
-
]
|
|
78
|
+
// Site learnings (OPTIONAL - only when learning something NEW/IMPORTANT)
|
|
79
|
+
siteLearningsUpdate?: {
|
|
80
|
+
screens?: {
|
|
81
|
+
[screenName: string]: {
|
|
82
|
+
states: {
|
|
83
|
+
[stateName: string]: {
|
|
84
|
+
observations?: Array<{ id?: number; text: string }>; // Add (no id) or Update (with id)
|
|
85
|
+
deleteObservationIds?: number[];
|
|
86
|
+
};
|
|
87
|
+
};
|
|
88
|
+
};
|
|
89
|
+
};
|
|
90
|
+
uxPatterns?: Array<{ id?: number; text: string }>; // Add (no id) or Update (with id)
|
|
91
|
+
deleteUxPatternIds?: number[];
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
// Commands to execute
|
|
95
|
+
commands?: Array<SomCommand | string>;
|
|
96
|
+
commandReasoning?: string;
|
|
97
|
+
|
|
98
|
+
// Note to future self (your only memory continuity)
|
|
99
|
+
noteToFutureSelf?: string;
|
|
100
|
+
|
|
101
|
+
// Other optional fields
|
|
102
|
+
toolCalls?: Array<{ name: string; params: any }>;
|
|
103
|
+
toolReasoning?: string;
|
|
104
|
+
blockerDetected?: { description: string; clearingCommands: string[] };
|
|
105
|
+
memoryUpdate?: { action: string; observation: string; extractedData?: Record<string, any> };
|
|
114
106
|
}
|
|
115
107
|
|
|
116
|
-
|
|
117
|
-
|
|
108
|
+
CRITICAL: uxPatterns array must have objects with BOTH id and text fields!
|
|
109
|
+
Example: { "id": 1, "text": "Pattern description" } or { "text": "New pattern" } (no id for new)
|
|
110
|
+
`;
|
|
111
|
+
// ===================================================================
|
|
112
|
+
class OrchestratorPrompts {
|
|
118
113
|
/**
|
|
119
114
|
* Build SoM (Set-of-Marks) system prompt for visual element identification
|
|
120
115
|
*/
|
|
121
|
-
static buildSomSystemPrompt(restrictCoordinates = false) {
|
|
116
|
+
static buildSomSystemPrompt(restrictCoordinates = false, toolDescriptions) {
|
|
122
117
|
const coordinateRestriction = restrictCoordinates ? `
|
|
123
118
|
|
|
124
119
|
CRITICAL: COORDINATE COMMANDS RESTRICTED
|
|
@@ -130,32 +125,24 @@ Strong preference order:
|
|
|
130
125
|
3. ONLY IF NO OTHER OPTION EXISTS: use coordinate commands
|
|
131
126
|
|
|
132
127
|
If you use coordinates, you MUST explain in commandReasoning why no SoM-marked alternative exists.` : '';
|
|
133
|
-
|
|
128
|
+
const toolSection = toolDescriptions ? `
|
|
134
129
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
System waits for page stability after each batch.
|
|
130
|
+
AVAILABLE TOOLS:
|
|
131
|
+
${toolDescriptions}
|
|
138
132
|
|
|
139
|
-
|
|
140
|
-
You
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
- Previous step descriptions
|
|
144
|
-
- Your noteToFutureSelf from last iteration
|
|
133
|
+
Use tools to gather additional information when needed before executing commands.` : '';
|
|
134
|
+
return `You are an intelligent test automation agent using Set-of-Marks (SoM) visual element identification.${coordinateRestriction}${toolSection}
|
|
135
|
+
|
|
136
|
+
${DISCRETE_EXPERIENCE_LOOP}
|
|
145
137
|
|
|
146
|
-
|
|
147
|
-
• Document your current intentions and strategy
|
|
148
|
-
• Record what you were thinking/planning
|
|
149
|
-
• Give specific advice to your future self about what to look for
|
|
150
|
-
• Note any observations or patterns you've discovered
|
|
151
|
-
• Suggest backup plans if current approach fails
|
|
138
|
+
${NOTETOSELF_GUIDE}
|
|
152
139
|
|
|
153
140
|
IMPORTANT: You will receive a screenshot with COLOR-CODED BOUNDING BOXES and IDs overlaid on interactive elements.
|
|
154
141
|
|
|
155
142
|
SCREENSHOT SCOPE:
|
|
156
|
-
- Shows
|
|
157
|
-
-
|
|
158
|
-
-
|
|
143
|
+
- Shows FULL PAGE (entire scrollable content, including below-fold elements)
|
|
144
|
+
- ALL interactive elements across the entire page are marked with SoM IDs
|
|
145
|
+
- You can see and interact with any element on the page without scrolling
|
|
159
146
|
|
|
160
147
|
VISUAL MARKER SYSTEM:
|
|
161
148
|
- Each interactive element has a colored bounding box with a unique color
|
|
@@ -213,180 +200,19 @@ COMMANDS ARRAY: Mix actions (has 'action') and verifications (has 'verificationT
|
|
|
213
200
|
Example: [{"elementRef":"4","action":"fill","value":"Hello"}, {"elementRef":"3","verificationType":"textContains","expected":"You: Hello"}]
|
|
214
201
|
CRITICAL: Verification steps MUST generate verification commands (never 0 commands) - don't just visually confirm!
|
|
215
202
|
|
|
216
|
-
|
|
217
|
-
Use percentage-based coords for unmarked elements:
|
|
218
|
-
{ "action": "click", "coord": { "x": 85.625, "y": 12.375 } }
|
|
219
|
-
|
|
220
|
-
Format: percentages 0-100, MUST use 3 decimals (0.000 = top-left, 50.000 = center, 100.000 = bottom-right).
|
|
221
|
-
After coord click, magenta "clicked" marker appears. Use view_previous_screenshot tool to verify if result unexpected.
|
|
222
|
-
|
|
223
|
-
NAVIGATION: Use navigate/goBack/goForward/reload actions (no elementRef needed).
|
|
224
|
-
Example: { "action": "navigate", "value": "https://..." }
|
|
225
|
-
DON'T click address bar - use navigate action. System waits for page load after navigation.
|
|
226
|
-
|
|
227
|
-
// Available actions: click, doubleClick, rightClick, hover, drag, fill, press, select, check, uncheck, focus, blur, scroll, navigate, goBack, goForward, reload
|
|
228
|
-
// Available verifications: textContains, textEquals, valueEquals, valueEmpty, isVisible, isHidden, isEnabled, isDisabled, isChecked, isUnchecked, countEquals, countGreaterThan, countLessThan, hasClass, hasAttribute
|
|
203
|
+
${RESPONSE_SCHEMA}
|
|
229
204
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
commandReasoning?: string;
|
|
235
|
-
toolCalls?: Array<{ name: string; params: Record<string, any> }>;
|
|
236
|
-
noteToFutureSelf?: string;
|
|
237
|
-
experiences?: string[];
|
|
238
|
-
blockerDetected?: { description: string; clearingCommands: SomCommand[] };
|
|
239
|
-
debugInfo?: { suggestedPromptUpdates?: string; reasoning?: string };
|
|
240
|
-
}
|
|
241
|
-
\`\`\`
|
|
242
|
-
|
|
243
|
-
NOTETOSELF: Your only continuity. Include: hypothesis, strategy, backup plans if fails, what to verify, observations.
|
|
244
|
-
Example: "Strategy: Click ID 1 for menu. Backup: try ID 2/3 or coord (8%,15%). Want to verify: menu expands."
|
|
245
|
-
|
|
246
|
-
EXAMPLE RESPONSES:
|
|
247
|
-
|
|
248
|
-
Action step:
|
|
249
|
-
\`\`\`json
|
|
250
|
-
{
|
|
251
|
-
"status": "continue",
|
|
252
|
-
"reasoning": "Need to fill login form with credentials",
|
|
253
|
-
"commands": [
|
|
254
|
-
{ "elementRef": "5", "action": "fill", "value": "user@example.com" },
|
|
255
|
-
{ "elementRef": "7", "action": "fill", "value": "password123" },
|
|
256
|
-
{ "elementRef": "12", "action": "click" }
|
|
257
|
-
],
|
|
258
|
-
"commandReasoning": "Filling email (ID 5), password (ID 7), clicking submit (ID 12)"
|
|
259
|
-
}
|
|
260
|
-
\`\`\`
|
|
261
|
-
|
|
262
|
-
Verification step:
|
|
263
|
-
\`\`\`json
|
|
264
|
-
{
|
|
265
|
-
"status": "complete",
|
|
266
|
-
"reasoning": "Message sent and verified in conversation",
|
|
267
|
-
"commands": [
|
|
268
|
-
{ "elementRef": "3", "verificationType": "textContains", "expected": "You: Hello", "description": "Message appears in thread" },
|
|
269
|
-
{ "elementRef": "4", "verificationType": "valueEmpty", "description": "Input cleared" }
|
|
270
|
-
],
|
|
271
|
-
"commandReasoning": "Verifying message visible in conversation (ID 3) and input empty (ID 4)"
|
|
272
|
-
}
|
|
273
|
-
\`\`\`
|
|
274
|
-
|
|
275
|
-
REPAIR MODE - Step already completed (DELETE case):
|
|
276
|
-
\`\`\`json
|
|
277
|
-
{
|
|
278
|
-
"status": "complete",
|
|
279
|
-
"reasoning": "Step asked to 'Dismiss welcome modal' but I see no modal in current screenshot - it was already dismissed by prior steps",
|
|
280
|
-
"commands": [],
|
|
281
|
-
"commandReasoning": "No commands needed - step goal already achieved/obsolete"
|
|
282
|
-
}
|
|
283
|
-
\`\`\`
|
|
205
|
+
COORDS: { "action": "click", "coord": { "x": 85.625, "y": 12.375 } }. Use 3 decimals, 0-100%.
|
|
206
|
+
NAVIGATE: { "action": "navigate", "value": "https://..." }
|
|
207
|
+
SCROLL: { "action": "scroll", "scrollDirection": "down", "scrollAmount": 500 }
|
|
208
|
+
PRESS: { "elementRef": "5", "action": "press", "value": "Enter" } (NO coord for press!)
|
|
284
209
|
|
|
285
|
-
OUTPUT
|
|
286
|
-
}
|
|
287
|
-
/**
|
|
288
|
-
* Build coordinate-specific system prompt (used when selectors repeatedly fail)
|
|
289
|
-
*/
|
|
290
|
-
static buildCoordinateSystemPrompt() {
|
|
291
|
-
return `You are a visual web automation expert. Selector generation has FAILED multiple times.
|
|
292
|
-
|
|
293
|
-
YOU MUST NOW USE COORDINATE-BASED ACTIONS (this is not optional).
|
|
294
|
-
|
|
295
|
-
SCREENSHOT PROVIDED:
|
|
296
|
-
You will see a screenshot with color-coded bounding boxes and ID labels attached to each element.
|
|
297
|
-
|
|
298
|
-
CRITICAL - IDENTIFY THE CORRECT ELEMENT:
|
|
299
|
-
1. READ the step goal carefully - what specific element are you looking for?
|
|
300
|
-
2. Look for the colored bounding box that matches the element description
|
|
301
|
-
3. The ID label is at TOP-RIGHT corner, ABOVE the box (bottom of label touches top of box)
|
|
302
|
-
4. Match the label color to the bounding box color
|
|
303
|
-
5. LOCATE that element in the screenshot (NOT a similar-looking element!)
|
|
304
|
-
6. VERIFY position using screen regions:
|
|
305
|
-
- Left sidebar/menu: xPercent ~5-25% (FAR LEFT)
|
|
306
|
-
- Center content: xPercent ~30-70%
|
|
307
|
-
- Right panel/sidebar: xPercent ~75-95% (FAR RIGHT)
|
|
308
|
-
7. CALCULATE percentages from element's CENTER position
|
|
309
|
-
8. SANITY CHECK your percentages:
|
|
310
|
-
- Sidebar menu item at 85%? WRONG - that's far right, not sidebar!
|
|
311
|
-
- Button in top-left at 90%? WRONG - that's top-right!
|
|
312
|
-
- Element description says "left" but x > 50%? WRONG - recheck!
|
|
313
|
-
|
|
314
|
-
Example thought process:
|
|
315
|
-
Goal: "Click Settings link in left navigation"
|
|
316
|
-
→ I see "Settings" text in LEFT navigation panel in the screenshot
|
|
317
|
-
→ Visual estimate: The link appears in the far left sidebar
|
|
318
|
-
→ Horizontal: The link center is roughly 1/8th from the left edge → ~12-13% from left
|
|
319
|
-
→ Vertical: The link center is roughly 1/3rd down from top → ~30-35% from top
|
|
320
|
-
→ xPercent: 12.500, yPercent: 32.000
|
|
321
|
-
→ Sanity check: 12.5% is FAR LEFT (NOT 80%+ which would be far right!)
|
|
322
|
-
→ Description: "Clicking center of Settings link in left sidebar"
|
|
323
|
-
|
|
324
|
-
CRITICAL VISUAL ESTIMATION TIPS:
|
|
325
|
-
- Divide screenshot mentally into quadrants/regions
|
|
326
|
-
- Left sidebar usually ~5-20% from left, center content ~30-70%, right sidebar ~75-95%
|
|
327
|
-
- Aim for CENTER of element, not edges
|
|
328
|
-
- Top bar usually 0-10% from top, footer usually 90-100%
|
|
329
|
-
- Be conservative: slightly off-center is better than way off
|
|
330
|
-
|
|
331
|
-
YOUR RESPONSE FORMAT - Output JSON matching this interface:
|
|
332
|
-
|
|
333
|
-
interface AgentDecisionLLMResponse {
|
|
334
|
-
status: string; // REQUIRED: "continue" (usually for coordinate mode)
|
|
335
|
-
reasoning: string; // REQUIRED: "I see [element] at (X%, Y%) - using coordinates"
|
|
336
|
-
coordinateAction: { // REQUIRED in coordinate mode
|
|
337
|
-
type: "coordinate";
|
|
338
|
-
action: "click" | "doubleClick" | "rightClick" | "hover" | "drag" | "fill" | "scroll";
|
|
339
|
-
xPercent: number; // 0-100, 3 decimals
|
|
340
|
-
yPercent: number; // 0-100, 3 decimals
|
|
341
|
-
toXPercent?: number; // For drag
|
|
342
|
-
toYPercent?: number; // For drag
|
|
343
|
-
value?: string; // For fill
|
|
344
|
-
scrollAmount?: number; // For scroll
|
|
345
|
-
};
|
|
346
|
-
noteToFutureSelf?: string; // Optional: What to try if this fails
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
COORDINATE REFERENCE:
|
|
350
|
-
- Top-left corner: xPercent=0, yPercent=0
|
|
351
|
-
- Top-right corner: xPercent=100, yPercent=0
|
|
352
|
-
- Bottom-left corner: xPercent=0, yPercent=100
|
|
353
|
-
- Bottom-right corner: xPercent=100, yPercent=100
|
|
354
|
-
- Center of screen: xPercent=50, yPercent=50
|
|
355
|
-
|
|
356
|
-
Use 3 decimal places for precision (e.g., 15.755, not 16).
|
|
357
|
-
|
|
358
|
-
ACTIONS:
|
|
359
|
-
|
|
360
|
-
**Physical clicks:**
|
|
361
|
-
- click: { action: "click", xPercent: 15.755, yPercent: 8.500 }
|
|
362
|
-
- doubleClick: { action: "doubleClick", xPercent: 15.755, yPercent: 8.500 }
|
|
363
|
-
- rightClick: { action: "rightClick", xPercent: 15.755, yPercent: 8.500 }
|
|
364
|
-
- hover: { action: "hover", xPercent: 15.755, yPercent: 8.500 }
|
|
365
|
-
|
|
366
|
-
**Input actions:**
|
|
367
|
-
- fill: Click then type
|
|
368
|
-
{ action: "fill", xPercent: 30.000, yPercent: 25.000, value: "alice@example.com" }
|
|
369
|
-
|
|
370
|
-
**Movement actions:**
|
|
371
|
-
- drag: From one position to another
|
|
372
|
-
{ action: "drag", xPercent: 10.000, yPercent: 50.000, toXPercent: 60.000, toYPercent: 50.000 }
|
|
373
|
-
- scroll: At position, scroll by amount
|
|
374
|
-
{ action: "scroll", xPercent: 50.000, yPercent: 50.000, scrollAmount: 500 }
|
|
375
|
-
|
|
376
|
-
CRITICAL RULES:
|
|
377
|
-
- Percentages are from viewport TOP-LEFT (not full page)
|
|
378
|
-
- Use element CENTER for coordinates, not edges
|
|
379
|
-
- Be precise with decimals - wrong coords click wrong element
|
|
380
|
-
- For fill: system will click at (x%,y%) then type value automatically
|
|
381
|
-
- For drag: toXPercent/toYPercent are REQUIRED
|
|
382
|
-
|
|
383
|
-
DO NOT try to generate selectors - that approach already failed. Use coordinates only.
|
|
384
|
-
This is a last-resort mechanism, but it WILL work if you provide accurate percentages.`;
|
|
210
|
+
OUTPUT: Return valid JSON. Example: { "status": "complete", "commands": [{"elementRef":"5","action":"fill","value":"test"}], "screenState": {"screen":"login","state":""} }`;
|
|
385
211
|
}
|
|
386
212
|
/**
|
|
387
213
|
* Build user prompt with context
|
|
388
214
|
*/
|
|
389
|
-
static buildUserPrompt(context, consecutiveFailures
|
|
215
|
+
static buildUserPrompt(context, consecutiveFailures) {
|
|
390
216
|
const parts = [];
|
|
391
217
|
// Add SoM format reminder if screenshot is present
|
|
392
218
|
if (context.somScreenshot) {
|
|
@@ -413,50 +239,36 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
|
|
|
413
239
|
}
|
|
414
240
|
parts.push(``);
|
|
415
241
|
}
|
|
416
|
-
parts.push(`
|
|
417
|
-
parts.push(`- CRITICAL: First check if this step is STILL NEEDED (may already be done by prior step or now obsolete)`);
|
|
418
|
-
parts.push(` → If step goal already achieved/no longer needed: Return 0 commands + status "complete" (DELETE case)`);
|
|
419
|
-
parts.push(` → Example: "Dismiss modal" but modal already gone → 0 commands, status "complete"`);
|
|
420
|
-
parts.push(`- Use SoM markers to identify current elements`);
|
|
421
|
-
parts.push(`- Generate commands that work with CURRENT UI (not original script)`);
|
|
422
|
-
parts.push(`- CRITICAL: Once you fix this step, return status "complete" IMMEDIATELY (control goes back to script)`);
|
|
423
|
-
parts.push(` → Repair mode = single step fix, then hand back control`);
|
|
424
|
-
parts.push(` → Don't continue to next steps - script will auto-execute them`);
|
|
425
|
-
parts.push(`- DON'T redo completed steps - only fix the blocker\n`);
|
|
242
|
+
parts.push(`STRATEGY: Check if step still needed. Fix using current UI. Return "complete" when fixed.\n`);
|
|
426
243
|
}
|
|
427
|
-
//
|
|
428
|
-
parts.push('
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
parts.push('-
|
|
244
|
+
// Concise rules for LLM caching
|
|
245
|
+
parts.push('RULES: Do step goal only. No verify commands unless step asks. Check prerequisites before advancing.');
|
|
246
|
+
// TEMPORARY: Always full-page mode during debugging
|
|
247
|
+
// TODO: Re-enable adaptive heuristic once verified working
|
|
248
|
+
parts.push('- Screenshot shows FULL PAGE (all content visible, including offscreen elements)');
|
|
249
|
+
parts.push('- All interactive elements are marked with SoM IDs, even those below the fold');
|
|
250
|
+
// Kept for future reference when re-enabling adaptive mode:
|
|
251
|
+
// const heightOk = context.pageHeight && context.viewportHeight && context.pageHeight < (context.viewportHeight * 2);
|
|
252
|
+
// const widthOk = context.pageWidth && context.viewportWidth && context.pageWidth < (context.viewportWidth * 2);
|
|
253
|
+
// const isCompactPage = heightOk && widthOk;
|
|
432
254
|
parts.push('- Screenshot tool: Use ONCE for visual context, then ACT (max 3 per step, system enforced)');
|
|
433
255
|
parts.push('- Max 5 iterations per step, then forced STUCK\n');
|
|
434
256
|
// Dynamic content follows (changes per iteration)
|
|
435
257
|
parts.push('=== CURRENT CONTEXT ===\n');
|
|
436
258
|
// Display note from previous iteration (high priority tactical info)
|
|
437
|
-
if (context.
|
|
438
|
-
const note = context.
|
|
259
|
+
if (context.journeyMemory.latestNote) {
|
|
260
|
+
const note = context.journeyMemory.latestNote;
|
|
439
261
|
parts.push(`📝 YOUR NOTE FROM PREVIOUS ITERATION:`);
|
|
440
|
-
parts.push(
|
|
441
|
-
parts.push(` ^^ READ THIS - your previous self left important tactical guidance ^^`);
|
|
442
|
-
parts.push(``);
|
|
443
|
-
parts.push(` ACTION REQUIRED:`);
|
|
444
|
-
parts.push(` 1. Did your previous action work? Check the screenshot!`);
|
|
445
|
-
parts.push(` 2. If it WORKED: Execute next step from your plan`);
|
|
446
|
-
parts.push(` 3. If it FAILED: Use your backup plan (try alternative IDs/methods)`);
|
|
447
|
-
parts.push(` 4. Write NEW noteToFutureSelf with:`);
|
|
448
|
-
parts.push(` - What worked/didn't work (learn from attempts)`);
|
|
449
|
-
parts.push(` - Updated strategy with new backup plan`);
|
|
450
|
-
parts.push(` - Next alternatives to try if this fails`);
|
|
451
|
-
parts.push(` - Build on previous note's reasoning`);
|
|
262
|
+
parts.push(`${note.content}`);
|
|
452
263
|
parts.push(``);
|
|
453
|
-
parts.push(
|
|
264
|
+
parts.push(`⚠️ Follow your own instructions above. Compare current screenshot to expected state.`);
|
|
454
265
|
parts.push('');
|
|
455
266
|
}
|
|
456
267
|
// Check for screenshot loops (analysis paralysis) - PER STEP tracking
|
|
457
|
-
const
|
|
268
|
+
const recentSteps = context.journeyMemory.history.slice(-6);
|
|
269
|
+
const screenshotsThisStep = recentSteps.filter(s => s.stepNumber === context.stepNumber &&
|
|
458
270
|
(s.code.includes('take_screenshot') || s.action.toLowerCase().includes('screenshot')));
|
|
459
|
-
const recentScreenshots =
|
|
271
|
+
const recentScreenshots = recentSteps.slice(-3).filter(s => s.code.includes('take_screenshot') || s.action.toLowerCase().includes('screenshot'));
|
|
460
272
|
if (screenshotsThisStep.length >= 3) {
|
|
461
273
|
parts.push(`[CRITICAL] SCREENSHOT LOOP DETECTED - ${screenshotsThisStep.length} SCREENSHOTS THIS STEP`);
|
|
462
274
|
parts.push(`ANALYSIS PARALYSIS! You keep gathering info but NEVER ACTING!`);
|
|
@@ -474,7 +286,8 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
|
|
|
474
286
|
parts.push(`[WARNING] SYSTEM WARNING: ${consecutiveFailures} failures!`);
|
|
475
287
|
// Only suggest screenshot if we haven't already taken multiple THIS STEP
|
|
476
288
|
if (screenshotsThisStep.length === 0) {
|
|
477
|
-
parts.push(`Take screenshot
|
|
289
|
+
parts.push(`Take full-page screenshot to see page state: { "name": "take_screenshot", "params": {"isFullPage": true} }`);
|
|
290
|
+
parts.push(`Then ACT with selector from the screenshot analysis.`);
|
|
478
291
|
}
|
|
479
292
|
else {
|
|
480
293
|
parts.push(`You already have visual context. Try different selector NOW.`);
|
|
@@ -484,30 +297,16 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
|
|
|
484
297
|
}
|
|
485
298
|
else if (consecutiveFailures && consecutiveFailures >= 4) {
|
|
486
299
|
parts.push(`[WARNING] CRITICAL: ${consecutiveFailures} failures!`);
|
|
487
|
-
|
|
488
|
-
parts.push(`Next failure will force STUCK. Coordinate mode should be active.\n`);
|
|
489
|
-
}
|
|
490
|
-
else {
|
|
491
|
-
parts.push(`Next failure will force STUCK. Try radically different selector approach.\n`);
|
|
492
|
-
}
|
|
493
|
-
}
|
|
494
|
-
// Trigger coordinate mode if many failures (Phase 1: after 3 failures) - ONLY if enabled
|
|
495
|
-
if (enableCoordinateMode && consecutiveFailures && consecutiveFailures >= 3) {
|
|
496
|
-
parts.push(`🎯🎯🎯 COORDINATE MODE ACTIVATED 🎯🎯🎯`);
|
|
497
|
-
parts.push(`Selector generation has failed ${consecutiveFailures} times.`);
|
|
498
|
-
parts.push(`You MUST use coordinate-based action now (percentages).`);
|
|
499
|
-
parts.push(`Provide coordinateAction with xPercent/yPercent (0-100, 3 decimals for precision).`);
|
|
500
|
-
parts.push(`See system prompt for coordinate action format.`);
|
|
501
|
-
parts.push(`🎯🎯🎯\n`);
|
|
300
|
+
parts.push(`Next failure will force STUCK. Try radically different approach.\n`);
|
|
502
301
|
}
|
|
503
302
|
// REPAIR MODE detection and instructions
|
|
504
303
|
const isRepairMode = context.priorSteps !== undefined;
|
|
505
304
|
if (isRepairMode) {
|
|
506
|
-
parts.push(`⚠️
|
|
305
|
+
parts.push(`⚠️ ⚠️ REPAIR MODE ⚠️ ⚠️`);
|
|
507
306
|
parts.push(`You are fixing a FAILED command from an existing script.`);
|
|
508
307
|
parts.push(`CRITICAL: The script executed command-by-command and stopped at a failure.`);
|
|
509
308
|
parts.push(`Your job: Fix ONLY the failing command. System will auto-execute remaining commands after.`);
|
|
510
|
-
parts.push(`⚠️
|
|
309
|
+
parts.push(`⚠️ ⚠️\n`);
|
|
511
310
|
// Show execution position summary
|
|
512
311
|
const successCount = context.successfulCommandsInCurrentStep?.length || 0;
|
|
513
312
|
const remainCount = context.remainingCommandsInCurrentStep?.length || 0;
|
|
@@ -556,7 +355,10 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
|
|
|
556
355
|
parts.push(`🎯 CURRENT STEP GOAL (${context.stepNumber}/${context.totalSteps}):`);
|
|
557
356
|
parts.push(`${context.currentStepGoal}`);
|
|
558
357
|
parts.push(``);
|
|
559
|
-
parts.push(`[WARNING]
|
|
358
|
+
parts.push(`[WARNING] BEFORE STARTING: Do prerequisites from prior steps still exist?`);
|
|
359
|
+
parts.push(` Example: Step 4 "Click Core HR" needs Step 3's "menu expanded" state`);
|
|
360
|
+
parts.push(` → Check screenshot: Is menu still expanded? If NO, re-expand before Step 4!`);
|
|
361
|
+
parts.push(`[WARNING] AFTER ACTING: Is THIS step's goal achieved? If YES, mark status="complete" NOW.`);
|
|
560
362
|
parts.push(`[WARNING] CRITICAL: Only interact with elements you SEE in the screenshot - no guessing/hallucinating!`);
|
|
561
363
|
parts.push(`OVERALL SCENARIO: ${context.overallGoal}\n`);
|
|
562
364
|
if (!isRepairMode) {
|
|
@@ -569,50 +371,77 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
|
|
|
569
371
|
}
|
|
570
372
|
// SoM screenshot (if available)
|
|
571
373
|
if (context.somScreenshot) {
|
|
572
|
-
parts.push(`\
|
|
573
|
-
parts.push(`Screenshot shows VIEWPORT ONLY (current visible area, not full page).`);
|
|
574
|
-
parts.push(`Color-coded bounding boxes mark interactive elements in the viewport.`);
|
|
575
|
-
parts.push(`Each element has a unique color and an ID label (1, 2, 3, etc.) at TOP-RIGHT corner, OUTSIDE the box.`);
|
|
576
|
-
parts.push(`Labels are typically positioned OUTSIDE and ABOVE the bounding box.`);
|
|
577
|
-
parts.push(`TO FIND THE CORRECT ELEMENT: match the label color with the bounding box color.`);
|
|
578
|
-
parts.push(`If target element not visible: SCROLL down/up OR use take_screenshot(isFullPage=true).`);
|
|
579
|
-
parts.push(`Reference element IDs in your commands using elementRef field (e.g., "1", "2", "42").`);
|
|
580
|
-
parts.push(`The screenshot is attached as an image - examine it to identify elements visually.`);
|
|
581
|
-
parts.push(``);
|
|
582
|
-
// SoM element map for disambiguation
|
|
374
|
+
parts.push(`\nSET-OF-MARKS: Full page with color-coded boxes + IDs. Match label color to box. Use IDs in elementRef.`);
|
|
583
375
|
if (context.somElementMap) {
|
|
584
|
-
parts.push(
|
|
585
|
-
parts.push(`If unsure which ID matches your target (e.g., is it 11 or 12?), use this map:`);
|
|
376
|
+
parts.push(`\nELEMENT MAP (for disambiguation):`);
|
|
586
377
|
parts.push(context.somElementMap);
|
|
587
|
-
parts.push(`Example: If you need a "Submit" button and see IDs 5 and 6 are both buttons, check the map to see which one says "Submit".`);
|
|
588
378
|
parts.push(``);
|
|
589
379
|
}
|
|
590
380
|
}
|
|
591
381
|
// Current page state (most variable content - at the end)
|
|
592
382
|
parts.push(`\nCURRENT PAGE:`);
|
|
593
383
|
parts.push(`URL: ${context.currentURL}`);
|
|
594
|
-
parts.push(`Title: ${context.
|
|
595
|
-
//
|
|
596
|
-
if (
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
parts.push(
|
|
384
|
+
parts.push(`Title: ${context.currentPageTitle}`);
|
|
385
|
+
// Page dimensions for scroll decisions
|
|
386
|
+
if (context.viewportWidth && context.viewportHeight && context.pageHeight) {
|
|
387
|
+
const heightOk = context.pageHeight < (context.viewportHeight * 2);
|
|
388
|
+
const widthOk = context.pageWidth && context.pageWidth < (context.viewportWidth * 2);
|
|
389
|
+
const isCompactPage = heightOk && widthOk;
|
|
390
|
+
parts.push(`\nPAGE DIMENSIONS & SCROLL POSITION:`);
|
|
391
|
+
parts.push(`Viewport: ${context.viewportWidth}x${context.viewportHeight}px`);
|
|
392
|
+
parts.push(`Full Page: ${context.pageWidth}x${context.pageHeight}px`);
|
|
393
|
+
parts.push(`Screenshot Mode: ${isCompactPage ? 'FULL PAGE (compact page, all visible)' : 'VIEWPORT ONLY (large page, full-page markers would be too small)'}`);
|
|
394
|
+
if (context.scrollY !== undefined && context.scrollY > 0) {
|
|
395
|
+
parts.push(`Current Scroll: ${context.scrollY}px from top (you've already scrolled down)`);
|
|
396
|
+
}
|
|
397
|
+
else {
|
|
398
|
+
parts.push(`Current Scroll: At top of page (scrollY = 0)`);
|
|
399
|
+
}
|
|
400
|
+
const canScrollDown = context.pageHeight > context.viewportHeight;
|
|
401
|
+
const canScrollRight = context.pageWidth && context.pageWidth > context.viewportWidth;
|
|
402
|
+
if (!isCompactPage && (canScrollDown || canScrollRight)) {
|
|
403
|
+
const remainingBelow = Math.max(0, context.pageHeight - context.viewportHeight - (context.scrollY || 0));
|
|
404
|
+
const remainingRight = context.pageWidth ? Math.max(0, context.pageWidth - context.viewportWidth - (context.scrollX || 0)) : 0;
|
|
405
|
+
const hiddenContent = [];
|
|
406
|
+
if (remainingBelow > 0)
|
|
407
|
+
hiddenContent.push(`${remainingBelow}px below`);
|
|
408
|
+
if (remainingRight > 0)
|
|
409
|
+
hiddenContent.push(`${remainingRight}px to right`);
|
|
410
|
+
if (hiddenContent.length > 0) {
|
|
411
|
+
parts.push(`Hidden content: ${hiddenContent.join(', ')}`);
|
|
412
|
+
parts.push(`💡 If element not found → Call: take_screenshot with {"isFullPage": true, "purpose": "Find X"}`);
|
|
413
|
+
parts.push(` This shows entire page (markers small but LLM can still locate elements)`);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
else if (isCompactPage) {
|
|
417
|
+
parts.push(`All content visible in screenshot (no need for additional tools)`);
|
|
418
|
+
}
|
|
601
419
|
}
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
420
|
+
// In SoM mode, element details are in somElementMap (visual screenshot)
|
|
421
|
+
parts.push(`\nNote: Element details available in visual screenshot with SoM markers.`);
|
|
422
|
+
parts.push('');
|
|
423
|
+
// Show current URL with change detection
|
|
424
|
+
const currentUrl = truncateUrl(context.currentURL || '');
|
|
425
|
+
const lastAction = context.journeyMemory.history[context.journeyMemory.history.length - 1];
|
|
426
|
+
if (lastAction && lastAction.previousUrl && lastAction.url !== lastAction.previousUrl) {
|
|
427
|
+
const prevUrl = truncateUrl(lastAction.previousUrl);
|
|
428
|
+
const newUrl = truncateUrl(lastAction.url);
|
|
429
|
+
parts.push(`🔄 URL CHANGED: ${prevUrl} → ${newUrl}`);
|
|
430
|
+
parts.push(` ⚠️ Navigation occurred! Previous action likely succeeded and triggered page transition.\n`);
|
|
605
431
|
}
|
|
606
|
-
|
|
607
|
-
parts.push(
|
|
432
|
+
else {
|
|
433
|
+
parts.push(`📍 Current URL: ${currentUrl}\n`);
|
|
608
434
|
}
|
|
609
|
-
parts.push('');
|
|
610
435
|
// Recent steps (most variable content - at the end)
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
436
|
+
const recentStepsDisplay = context.journeyMemory.history.slice(-6);
|
|
437
|
+
if (recentStepsDisplay.length > 0) {
|
|
438
|
+
parts.push(`RECENT STEPS (last ${recentStepsDisplay.length}):`);
|
|
439
|
+
for (const step of recentStepsDisplay) {
|
|
614
440
|
const status = step.result === 'success' ? '[OK]' : '[FAIL]';
|
|
615
|
-
|
|
441
|
+
const urlChanged = step.previousUrl && step.url !== step.previousUrl
|
|
442
|
+
? ` [URL: ${step.previousUrl} → ${step.url}]`
|
|
443
|
+
: '';
|
|
444
|
+
parts.push(` ${status} ${step.stepNumber}.${step.iteration || ''} ${step.action}${urlChanged}`);
|
|
616
445
|
parts.push(` Code: ${step.code}`);
|
|
617
446
|
if (step.result === 'failure' && step.error) {
|
|
618
447
|
parts.push(` ERROR: ${step.error}`);
|
|
@@ -624,7 +453,7 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
|
|
|
624
453
|
}
|
|
625
454
|
parts.push('');
|
|
626
455
|
// Detect repeated failures
|
|
627
|
-
const recentFailures =
|
|
456
|
+
const recentFailures = recentStepsDisplay.filter(s => s.result === 'failure');
|
|
628
457
|
if (recentFailures.length >= 2) {
|
|
629
458
|
const sameSelector = recentFailures.slice(-2).every((s, i, arr) => i === 0 || s.code === arr[i - 1].code);
|
|
630
459
|
if (sameSelector) {
|
|
@@ -634,18 +463,49 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
|
|
|
634
463
|
}
|
|
635
464
|
}
|
|
636
465
|
}
|
|
637
|
-
//
|
|
638
|
-
if (context.
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
466
|
+
// Site learnings (persistent knowledge)
|
|
467
|
+
if (context.siteLearnings) {
|
|
468
|
+
const { screens, uxPatterns } = context.siteLearnings;
|
|
469
|
+
// Display UX patterns with IDs
|
|
470
|
+
const uxPatternEntries = Object.entries(uxPatterns);
|
|
471
|
+
if (uxPatternEntries.length > 0) {
|
|
472
|
+
parts.push(`\n🎯 SITE-WIDE UX PATTERNS (reference [ID] for updates/deletes):`);
|
|
473
|
+
uxPatternEntries.forEach(([id, text]) => parts.push(` [${id}] ${text}`));
|
|
474
|
+
parts.push('');
|
|
475
|
+
}
|
|
476
|
+
// Display screen/state vocabulary first (for consistent naming)
|
|
477
|
+
if (context.siteLearnings?.screenStateVocabulary && Object.keys(context.siteLearnings.screenStateVocabulary).length > 0) {
|
|
478
|
+
parts.push(`\n📋 SCREEN STATE VOCABULARY (use these names for consistency):`);
|
|
479
|
+
Object.entries(context.siteLearnings.screenStateVocabulary).forEach(([screenName, stateNames]) => {
|
|
480
|
+
const statesDisplay = stateNames.length > 0
|
|
481
|
+
? ` → States: ${stateNames.map(s => s || '""').join(', ')}`
|
|
482
|
+
: '';
|
|
483
|
+
parts.push(` • ${screenName}${statesDisplay}`);
|
|
484
|
+
});
|
|
485
|
+
parts.push('');
|
|
486
|
+
}
|
|
487
|
+
// Display screen state knowledge with IDs
|
|
488
|
+
if (screens && Object.keys(screens).length > 0) {
|
|
489
|
+
parts.push(`\n📚 SCREEN STATE KNOWLEDGE (reference [ID] for updates/deletes):`);
|
|
490
|
+
Object.entries(screens).forEach(([screenName, screenLearnings]) => {
|
|
491
|
+
Object.entries(screenLearnings.states).forEach(([state, learning]) => {
|
|
492
|
+
const stateLabel = state ? `[${state}]` : '';
|
|
493
|
+
parts.push(`\n ${screenName}${stateLabel}:`);
|
|
494
|
+
const obsEntries = Object.entries(learning.observations);
|
|
495
|
+
if (obsEntries.length > 0) {
|
|
496
|
+
obsEntries.forEach(([id, text]) => {
|
|
497
|
+
parts.push(` [${id}] ${text}`);
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
});
|
|
501
|
+
});
|
|
502
|
+
parts.push('');
|
|
642
503
|
}
|
|
643
|
-
parts.push('');
|
|
644
504
|
}
|
|
645
505
|
// Extracted data (from previous extract_data tool calls)
|
|
646
|
-
if (context.extractedData && Object.keys(context.extractedData).length > 0) {
|
|
506
|
+
if (context.journeyMemory.extractedData && Object.keys(context.journeyMemory.extractedData).length > 0) {
|
|
647
507
|
parts.push(`\nEXTRACTED DATA (available for use in commands):`);
|
|
648
|
-
parts.push(JSON.stringify(context.extractedData, null, 2));
|
|
508
|
+
parts.push(JSON.stringify(context.journeyMemory.extractedData, null, 2));
|
|
649
509
|
parts.push('');
|
|
650
510
|
}
|
|
651
511
|
return parts.join('\n');
|
|
@@ -662,19 +522,20 @@ DISCRETE EXPERIENCE LOOP:
|
|
|
662
522
|
You operate in iterations: receive state → decide → sleep → wake with new state.
|
|
663
523
|
System waits for page stability after each batch.
|
|
664
524
|
|
|
665
|
-
CRITICAL: MEMORY
|
|
666
|
-
|
|
667
|
-
- Current screenshot
|
|
668
|
-
-
|
|
669
|
-
-
|
|
525
|
+
CRITICAL: NO SCREENSHOT MEMORY (STATELESS!)
|
|
526
|
+
Each iteration you receive:
|
|
527
|
+
- Current screenshot (NOT previous screenshots!)
|
|
528
|
+
- Past actions (text descriptions, not screenshots)
|
|
529
|
+
- Ongoing memory (experiences, patterns)
|
|
670
530
|
- Your noteToFutureSelf from last iteration
|
|
531
|
+
- Current journey goal
|
|
671
532
|
|
|
672
|
-
The noteToFutureSelf is your
|
|
673
|
-
•
|
|
674
|
-
•
|
|
675
|
-
•
|
|
676
|
-
•
|
|
677
|
-
|
|
533
|
+
The noteToFutureSelf is your way to document expectations for verification. MUST include EXPLICIT EXPECTED STATE:
|
|
534
|
+
• ✅ GOOD: "Clicked sidebar menu button (was collapsed). EXPECT: expanded sidebar with 'Dashboard' and 'Reports' visible"
|
|
535
|
+
• ✅ GOOD: "Navigated to /settings. EXPECT: URL changed, 'Save Settings' button visible"
|
|
536
|
+
• ❌ BAD: "Clicked menu" (future you can't verify if it worked!)
|
|
537
|
+
• ❌ BAD: "Clicked ID 8" (ID meaningless without screenshot!)
|
|
538
|
+
Also include: strategy, observations, patterns discovered, backup plans if this fails
|
|
678
539
|
|
|
679
540
|
COMMON UX PATTERNS (critical for navigation):
|
|
680
541
|
• Disabled buttons → Fill required fields first to enable them
|
|
@@ -689,31 +550,13 @@ COMMON UX PATTERNS (critical for navigation):
|
|
|
689
550
|
• Lazy loading → Scroll down to load more content
|
|
690
551
|
• Accordions/expandable → Click header to toggle visibility
|
|
691
552
|
|
|
692
|
-
|
|
553
|
+
${RESPONSE_SCHEMA}
|
|
693
554
|
|
|
694
|
-
interface
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
// COMMANDS: Array of plain Playwright command strings
|
|
700
|
-
commands?: string[]; // Example: ["await page.fill('input[name=\"email\"]', 'test@example.com')", ...]
|
|
701
|
-
commandReasoning?: string;
|
|
702
|
-
toolCalls?: Array<{ // Tools to call (extract_data for menus, etc.)
|
|
703
|
-
name: string;
|
|
704
|
-
params: Record<string, any>;
|
|
705
|
-
}>;
|
|
706
|
-
toolReasoning?: string;
|
|
707
|
-
needsToolResults?: boolean;
|
|
708
|
-
noteToFutureSelf?: string;
|
|
709
|
-
coordinateAction?: { ... };
|
|
710
|
-
experiences?: string[]; // Use for BOTH app patterns AND exploration progress
|
|
711
|
-
blockerDetected?: { ... };
|
|
712
|
-
debugInfo?: { // Meta-learning: suggest prompt improvements (only when very confident)
|
|
713
|
-
suggestedPromptUpdates?: string;
|
|
714
|
-
reasoning?: string;
|
|
715
|
-
};
|
|
716
|
-
}
|
|
555
|
+
YOUR RESPONSE FORMAT - Output JSON matching AgentDecision interface above.
|
|
556
|
+
|
|
557
|
+
For exploration mode, also include:
|
|
558
|
+
- stepSummary: Concise 1-sentence summary of what was accomplished this iteration
|
|
559
|
+
- commands: Array of plain Playwright command strings (exploration uses string commands, not SoM)
|
|
717
560
|
|
|
718
561
|
EXPLORATION MODE GUIDELINES:
|
|
719
562
|
|
|
@@ -726,7 +569,7 @@ EXPLORATION MODE GUIDELINES:
|
|
|
726
569
|
|
|
727
570
|
3. **VISIBLE ELEMENTS ONLY**: Screenshot shows viewport only. Only interact with elements you SEE. If not visible, scroll or take_screenshot(isFullPage=true).
|
|
728
571
|
|
|
729
|
-
4. **SYSTEMATIC EXPLORATION**: Use extract_data to discover, store in extractedData, track in
|
|
572
|
+
4. **SYSTEMATIC EXPLORATION**: Use extract_data to discover, store in extractedData, track in siteLearningsUpdate, check history to avoid repeating, prioritize unexplored areas.
|
|
730
573
|
|
|
731
574
|
5. **CREATIVE TESTING**: Test functionality thoroughly - try edge cases, verify features work, look for bugs.
|
|
732
575
|
|
|
@@ -741,7 +584,25 @@ EXPLORATION MODE GUIDELINES:
|
|
|
741
584
|
|
|
742
585
|
11. **STEP SUMMARY**: When you complete actions, provide a concise 1-sentence summary of what was accomplished (e.g., "Logged in successfully", "Navigated to dashboard", "Created new widget"). This is used for step tracking, not future planning.
|
|
743
586
|
|
|
744
|
-
12. **MEMORY**:
|
|
587
|
+
12. **MEMORY (STATELESS!)**: You see only current screenshot. MUST write expected state in noteToFutureSelf:
|
|
588
|
+
- ✅ "Clicked settings button in navbar. EXPECT: settings page with 'Profile' section visible"
|
|
589
|
+
- ❌ "Clicked settings" (can't verify!)
|
|
590
|
+
- ❌ "Clicked ID 9" (ID meaningless without screenshot!)
|
|
591
|
+
- siteLearningsUpdate=persistent knowledge, extractedData=journey discoveries
|
|
592
|
+
|
|
593
|
+
SITE LEARNINGS: Build mental model (persistent across journeys)
|
|
594
|
+
- screenState: {screen, state} to identify current context (NEVER: "about:blank", "loading" states)
|
|
595
|
+
- siteLearningsUpdate: Add/update/delete observations per screen-state
|
|
596
|
+
CRITICAL: NEVER include SoM IDs ("element 9", "ID 5") - they regenerate every page load!
|
|
597
|
+
✅ "Workspace selector opens on caret icon click"
|
|
598
|
+
❌ "Element 9 opens dropdown with entries 6,7,8"
|
|
599
|
+
|
|
600
|
+
WHEN TO STORE:
|
|
601
|
+
✅ After discovering navigation (uxPatterns)
|
|
602
|
+
✅ After learning UI behavior (uxPatterns)
|
|
603
|
+
✅ When understanding screen layout (observations)
|
|
604
|
+
✅ When selector fails (observations)
|
|
605
|
+
❌ Don't store obvious/temporary things
|
|
745
606
|
|
|
746
607
|
CRITICAL: You're fully autonomous for THIS journey - no step-by-step instructions provided.
|
|
747
608
|
YOU decide the exploration path to meet the journey goal based on: journey prompt, current state, and memory.`;
|
|
@@ -773,21 +634,20 @@ YOU decide the exploration path to meet the journey goal based on: journey promp
|
|
|
773
634
|
parts.push(`PROGRESS: Step ${stepNumber}/${maxSteps} (you can complete earlier if journey goal met)\n`);
|
|
774
635
|
}
|
|
775
636
|
// Show discovered and tracked data from extractedData
|
|
776
|
-
if (context.extractedData && Object.keys(context.extractedData).length > 0) {
|
|
637
|
+
if (context.journeyMemory.extractedData && Object.keys(context.journeyMemory.extractedData).length > 0) {
|
|
777
638
|
parts.push(`\nDISCOVERED DATA (this journey):`);
|
|
778
|
-
for (const [key, value] of Object.entries(context.extractedData)) {
|
|
639
|
+
for (const [key, value] of Object.entries(context.journeyMemory.extractedData)) {
|
|
779
640
|
parts.push(` ${key}: ${value}`);
|
|
780
641
|
}
|
|
781
642
|
}
|
|
782
643
|
// SoM screenshot (if available)
|
|
783
644
|
if (context.somScreenshot) {
|
|
784
645
|
parts.push(`\n SET-OF-MARKS SCREENSHOT (with element IDs):`);
|
|
785
|
-
parts.push(`Screenshot shows
|
|
786
|
-
parts.push(`Color-coded bounding boxes mark interactive elements
|
|
646
|
+
parts.push(`Screenshot shows FULL PAGE (all content, including below-fold elements).`);
|
|
647
|
+
parts.push(`Color-coded bounding boxes mark ALL interactive elements across entire page.`);
|
|
787
648
|
parts.push(`Each element has a unique color and an ID label (1, 2, 3, etc.) at TOP-RIGHT corner, OUTSIDE the box.`);
|
|
788
649
|
parts.push(`Labels are typically positioned OUTSIDE and ABOVE the bounding box.`);
|
|
789
650
|
parts.push(`TO FIND THE CORRECT ELEMENT: match the label color with the bounding box color.`);
|
|
790
|
-
parts.push(`If target element not visible: SCROLL down/up OR use take_screenshot(isFullPage=true).`);
|
|
791
651
|
parts.push(`Reference element IDs in your commands using elementRef field (e.g., "1", "2", "42").`);
|
|
792
652
|
parts.push(`The screenshot is attached as an image - examine it to identify elements visually.`);
|
|
793
653
|
parts.push(``);
|
|
@@ -808,41 +668,72 @@ YOU decide the exploration path to meet the journey goal based on: journey promp
|
|
|
808
668
|
}
|
|
809
669
|
parts.push(`\nCURRENT PAGE:`);
|
|
810
670
|
parts.push(`URL: ${context.currentURL}`);
|
|
811
|
-
parts.push(`Title: ${context.
|
|
812
|
-
//
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
671
|
+
parts.push(`Title: ${context.currentPageTitle}`);
|
|
672
|
+
// In SoM mode, element details are in somElementMap
|
|
673
|
+
parts.push(`\nNote: Element details available in visual screenshot with SoM markers.`);
|
|
674
|
+
// Recent actions
|
|
675
|
+
// Show current URL with change detection
|
|
676
|
+
const currentUrl = truncateUrl(context.currentURL || '');
|
|
677
|
+
const lastAction = context.journeyMemory.history[context.journeyMemory.history.length - 1];
|
|
678
|
+
if (lastAction && lastAction.previousUrl && lastAction.url !== lastAction.previousUrl) {
|
|
679
|
+
const prevUrl = truncateUrl(lastAction.previousUrl);
|
|
680
|
+
const newUrl = truncateUrl(lastAction.url);
|
|
681
|
+
parts.push(`\n🔄 URL CHANGED: ${prevUrl} → ${newUrl}`);
|
|
682
|
+
parts.push(` ⚠️ Navigation occurred! Previous action likely triggered page transition.\n`);
|
|
818
683
|
}
|
|
819
684
|
else {
|
|
820
|
-
|
|
821
|
-
parts.push(`\nNote: Element details available in visual screenshot with SoM markers.`);
|
|
822
|
-
}
|
|
823
|
-
if (JSON.stringify(context.currentPageInfo.ariaSnapshot).length > 5000) {
|
|
824
|
-
parts.push('... (truncated)');
|
|
685
|
+
parts.push(`\n📍 Current URL: ${currentUrl}\n`);
|
|
825
686
|
}
|
|
826
|
-
|
|
827
|
-
if (
|
|
828
|
-
parts.push(
|
|
829
|
-
for (const step of
|
|
687
|
+
const recentActions = context.journeyMemory.history.slice(-6);
|
|
688
|
+
if (recentActions.length > 0) {
|
|
689
|
+
parts.push(`RECENT ACTIONS (last ${recentActions.length}):`);
|
|
690
|
+
for (const step of recentActions) {
|
|
830
691
|
const status = step.result === 'success' ? '[OK]' : '[FAIL]';
|
|
831
|
-
|
|
692
|
+
const urlChanged = step.previousUrl && step.url !== step.previousUrl
|
|
693
|
+
? ` [URL: ${truncateUrl(step.previousUrl)} → ${truncateUrl(step.url)}]`
|
|
694
|
+
: '';
|
|
695
|
+
parts.push(` ${status} ${step.action}${urlChanged}`);
|
|
832
696
|
parts.push(` ${step.observation}`);
|
|
833
697
|
}
|
|
834
698
|
}
|
|
835
|
-
//
|
|
836
|
-
if (context.
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
699
|
+
// Site learnings
|
|
700
|
+
if (context.siteLearnings) {
|
|
701
|
+
const { screens, uxPatterns } = context.siteLearnings;
|
|
702
|
+
const uxPatternEntries = Object.entries(uxPatterns);
|
|
703
|
+
if (uxPatternEntries.length > 0) {
|
|
704
|
+
parts.push(`\n🎯 SITE-WIDE UX PATTERNS (reference [ID] for updates/deletes):`);
|
|
705
|
+
uxPatternEntries.forEach(([id, text]) => parts.push(` [${id}] ${text}`));
|
|
706
|
+
}
|
|
707
|
+
// Display screen/state vocabulary first (for consistent naming)
|
|
708
|
+
if (context.siteLearnings?.screenStateVocabulary && Object.keys(context.siteLearnings.screenStateVocabulary).length > 0) {
|
|
709
|
+
parts.push(`\n📋 SCREEN STATE VOCABULARY (use these names for consistency):`);
|
|
710
|
+
Object.entries(context.siteLearnings.screenStateVocabulary).forEach(([screenName, stateNames]) => {
|
|
711
|
+
const statesDisplay = stateNames.length > 0
|
|
712
|
+
? ` → States: ${stateNames.map(s => s || '""').join(', ')}`
|
|
713
|
+
: '';
|
|
714
|
+
parts.push(` • ${screenName}${statesDisplay}`);
|
|
715
|
+
});
|
|
716
|
+
}
|
|
717
|
+
if (screens && Object.keys(screens).length > 0) {
|
|
718
|
+
parts.push(`\n📚 SCREEN STATE KNOWLEDGE (reference [ID] for updates/deletes):`);
|
|
719
|
+
Object.entries(screens).forEach(([screenName, screenLearnings]) => {
|
|
720
|
+
Object.entries(screenLearnings.states).forEach(([state, learning]) => {
|
|
721
|
+
const stateLabel = state ? `[${state}]` : '';
|
|
722
|
+
parts.push(`\n ${screenName}${stateLabel}:`);
|
|
723
|
+
const obsEntries = Object.entries(learning.observations);
|
|
724
|
+
if (obsEntries.length > 0) {
|
|
725
|
+
obsEntries.forEach(([id, text]) => {
|
|
726
|
+
parts.push(` [${id}] ${text}`);
|
|
727
|
+
});
|
|
728
|
+
}
|
|
729
|
+
});
|
|
730
|
+
});
|
|
840
731
|
}
|
|
841
732
|
}
|
|
842
733
|
// Note from previous iteration
|
|
843
|
-
if (context.
|
|
844
|
-
parts.push(`\nYOUR NOTE FROM LAST ITERATION: ${context.
|
|
845
|
-
parts.push(`
|
|
734
|
+
if (context.journeyMemory.latestNote) {
|
|
735
|
+
parts.push(`\nYOUR NOTE FROM LAST ITERATION: ${context.journeyMemory.latestNote.content}`);
|
|
736
|
+
parts.push(` ^^ Follow your own instructions from previous iteration ^^`);
|
|
846
737
|
}
|
|
847
738
|
parts.push(`\nDECIDE NEXT ACTION: What to explore/test next? Check history to avoid repeating. Is goal achieved? Mark complete.`);
|
|
848
739
|
return parts.join('\n');
|