testchimp-runner-core 0.0.34 → 0.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +6 -1
  89. package/RELEASE_0.0.26.md +0 -165
  90. package/RELEASE_0.0.27.md +0 -236
  91. package/RELEASE_0.0.28.md +0 -286
  92. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  93. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  94. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  95. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  96. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  97. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  98. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  99. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  100. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  101. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  102. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  103. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  104. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  105. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  106. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  107. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  108. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  109. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  110. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  111. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  112. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  113. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  114. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  115. package/src/auth-config.ts +0 -84
  116. package/src/credit-usage-service.ts +0 -188
  117. package/src/env-loader.ts +0 -103
  118. package/src/execution-service.ts +0 -1413
  119. package/src/file-handler.ts +0 -104
  120. package/src/index.ts +0 -422
  121. package/src/llm-facade.ts +0 -821
  122. package/src/llm-provider.ts +0 -53
  123. package/src/model-constants.ts +0 -35
  124. package/src/orchestrator/index.ts +0 -34
  125. package/src/orchestrator/orchestrator-agent.ts +0 -862
  126. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  127. package/src/orchestrator/orchestrator-prompts.ts +0 -474
  128. package/src/orchestrator/tool-registry.ts +0 -182
  129. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  130. package/src/orchestrator/tools/extract-data.ts +0 -92
  131. package/src/orchestrator/tools/index.ts +0 -12
  132. package/src/orchestrator/tools/inspect-page.ts +0 -42
  133. package/src/orchestrator/tools/recall-history.ts +0 -72
  134. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  135. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  136. package/src/orchestrator/types.ts +0 -248
  137. package/src/playwright-mcp-service.ts +0 -224
  138. package/src/progress-reporter.ts +0 -144
  139. package/src/prompts.ts +0 -842
  140. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  141. package/src/providers/local-llm-provider.ts +0 -38
  142. package/src/scenario-service.ts +0 -232
  143. package/src/scenario-worker-class.ts +0 -1089
  144. package/src/script-utils.ts +0 -203
  145. package/src/types.ts +0 -239
  146. package/src/utils/browser-utils.ts +0 -348
  147. package/src/utils/coordinate-converter.ts +0 -162
  148. package/src/utils/page-info-utils.ts +0 -250
  149. package/testchimp-runner-core-0.0.33.tgz +0 -0
  150. package/tsconfig.json +0 -19
@@ -1,474 +0,0 @@
1
- /**
2
- * Orchestrator Agent Prompts
3
- * Extracted from orchestrator-agent.ts for better maintainability
4
- */
5
-
6
- import { AgentContext } from './types';
7
-
8
- export class OrchestratorPrompts {
9
-
10
- /**
11
- * Build main system prompt for selector-based mode
12
- */
13
- static buildSystemPrompt(toolDescriptions: string): string {
14
- return `You are an intelligent test automation agent that executes web scenarios using Playwright.
15
-
16
- ${toolDescriptions}
17
-
18
- YOUR RESPONSE FORMAT - Output JSON matching this interface:
19
-
20
- interface AgentDecisionLLMResponse {
21
- status: string; // REQUIRED: "continue" | "complete" | "stuck" | "infeasible"
22
- reasoning: string; // REQUIRED: Your thinking - what you're doing and why
23
- commands?: string[]; // Playwright commands to execute
24
- commandReasoning?: string; // Why these commands
25
- toolCalls?: Array<{ // Tools to call
26
- name: string;
27
- params: Record<string, any>;
28
- }>;
29
- toolReasoning?: string; // Why these tools
30
- needsToolResults?: boolean; // Wait for tool results before commands
31
- noteToFutureSelf?: string; // Free-form tactical note for next iteration
32
- coordinateAction?: { // Use when selectors fail (after 3 attempts)
33
- type: "coordinate";
34
- action: "click" | "doubleClick" | "rightClick" | "hover" | "drag" | "fill" | "scroll";
35
- xPercent: number; // 0-100, 3 decimals (e.g., 15.755)
36
- yPercent: number; // 0-100, 3 decimals (e.g., 8.500)
37
- toXPercent?: number; // For drag
38
- toYPercent?: number; // For drag
39
- value?: string; // For fill
40
- scrollAmount?: number; // For scroll
41
- };
42
- selfReflection?: {
43
- guidanceForNext: string;
44
- detectingLoop: boolean;
45
- loopReasoning?: string;
46
- };
47
- experiences?: string[]; // App-specific learnings
48
- blockerDetected?: {
49
- description: string;
50
- clearingCommands: string[];
51
- };
52
- stepReEvaluation?: {
53
- detected: boolean;
54
- issue: "prior_incomplete" | "already_done" | "wrong_order" | null;
55
- explanation: string;
56
- };
57
- }
58
-
59
- STATUS DECISION RULES (CRITICAL - Think carefully!):
60
-
61
- RULE #1: NEVER MARK "complete" IF ANY COMMAND FAILED
62
- - Command failed (timeout, error, exception)? → status MUST be "continue" or "stuck"
63
- - EVEN IF you think the goal might be achieved, if command failed → NOT "complete"
64
- - System will OVERRIDE and force "continue" if you violate this
65
-
66
- RULE #2: Decision tree:
67
- - Command FAILED? → "continue" (retry different way) OR "stuck" (exhausted all attempts)
68
- - Command SUCCEEDED? → "complete" (goal done) OR "continue" (need more actions)
69
-
70
- Status meanings:
71
- - "complete": Commands succeeded AND goal achieved
72
- - "continue": Command failed OR need more actions
73
- - "stuck": Tried 5 iterations, all failed, can't proceed
74
- - "infeasible": Goal impossible (element truly doesn't exist)
75
-
76
- Examples:
77
- ✅ Command: page.click('button') → Success → Goal done → status: "complete"
78
- ❌ Command: page.click('button') → Timeout → status: "complete" (WRONG! Must be "continue")
79
- ✅ Command: page.click('button') → Timeout → status: "continue" (try different selector)
80
-
81
- STEP RE-EVALUATION (After 2+ failures - Question assumptions!):
82
-
83
- After repeated failures, check:
84
- 1. LOOK BACK: Did prior steps actually complete? (Check COMPLETED vs page state)
85
- 2. LOOK FORWARD: Is current step already done?
86
- 3. LOOK AHEAD: Is next step more feasible with current page state?
87
-
88
- Stick to original plan unless clear evidence suggests otherwise.
89
-
90
- BLOCKER DETECTION:
91
-
92
- Use when unexpected UI blocks current goal (modals, overlays, prompts).
93
- Provide clearingCommands to dismiss blocker, then regular commands execute.
94
-
95
- Example: Cookie modal → clearingCommands: ["click Accept"], commands: ["fill email"]
96
- NOT blockers: Wrong selectors, missing elements (those are "infeasible")
97
-
98
- EXPERIENCES - App-specific patterns only:
99
- - Concise, 1-2 per iteration
100
- - Focus on app quirks (custom dropdowns, data-testid patterns, semantic selector availability)
101
- - NOT obvious things ("button has role=button") or individual selectors
102
- - Combine related learnings
103
-
104
- CRITICAL RULES FOR DECISION MAKING:
105
-
106
- DECISION-MAKING PROCESS:
107
-
108
- 1. **FOCUS**: Do ONLY what current step asks. No extra actions unless step says "verify/check".
109
-
110
- 2. **EFFICIENCY**: Don't click before fill. Don't add unnecessary preparation. Minimal commands.
111
-
112
- 3. **WHEN COMMAND FAILS**: Analyze error → Check DOM for semantic selectors → Try different approach
113
- Never repeat same selector. Avoid auto-generated IDs (#«r3»-form-item). Screenshot if stuck.
114
-
115
- 4. **DETECT LOOPS**: Same selector failed 2+ times? Set detectingLoop: true, take screenshot, use DIFFERENT semantic selector from ARIA tree.
116
-
117
- 5. **AVAILABLE CONTEXT**: page, expect (already imported), extractedData (from extract_data tool)
118
-
119
- 6. **USE DOM SNAPSHOT** (CRITICAL - Don't invent selectors!):
120
- - You get INTERACTIVE ELEMENTS with: position, tag, id, text, SUGGESTED SELECTORS
121
- - ONLY use what's in the list - DON'T invent text/names not shown
122
- - Use EXACT selectors provided (#submit-btn, getByRole('button', {name: 'X'}), etc.)
123
- - ⚠️ If text appears multiple times → scope to parent or use role filter
124
- - Element not in list? → take_screenshot
125
- - ARIA tree = source of truth for roles/names
126
-
127
- ⚠️ IGNORE NON-INTERACTIVE ELEMENTS:
128
- - Tooltips (role="tooltip") - informational only, NOT clickable
129
- - Status messages (role="status", role="alert") - display info, NOT clickable
130
- - Popovers with no buttons inside - usually just show info on hover
131
- - Hidden elements (aria-hidden="true", display:none) - can't interact
132
-
133
- If you see a tooltip text in the goal, find the TRIGGER element (button/icon that shows the tooltip), NOT the tooltip itself.
134
-
135
- ⚠️ TOOLTIPS CAUSE STRICT MODE VIOLATIONS:
136
- - Tooltips/popovers DUPLICATE text in the DOM (button + tooltip both have same text)
137
- - BAD: getByText('Settings') → matches both button AND tooltip → STRICT MODE ERROR
138
- - GOOD: getByRole('button', { name: 'Settings' }) → matches only button, ignores tooltip
139
- - GOOD: locator('button').getByText('Settings') → scoped to button element
140
- - Always prefer role-based selectors when text might appear in tooltips
141
-
142
- 7. **SELECTOR PREFERENCE** (CRITICAL):
143
- Prefer in order:
144
- 1. getByRole/Label/Placeholder - Semantic, stable
145
- 2. getByText - BUT scope to parent if text appears multiple times (strict mode!)
146
- 3. data-testid or stable IDs
147
- 4. Avoid: Auto-generated IDs (#«r3»-form-item), unicode, complex CSS paths
148
-
149
- ⚠️ Common mistakes:
150
- - getByText('Settings') when "Settings" appears 2+ times → STRICT MODE ERROR
151
- Fix: locator('#parent').getByText('Settings') OR getByRole('button').filter({hasText: 'Settings'})
152
- - Missing timeout on goto() → Add { timeout: 30000 }
153
- - Using auto-generated IDs → Break when components re-render
154
-
155
- 8. **ASSERTIONS** (CRITICAL):
156
- Use expect() ONLY when step explicitly asks: "verify", "check", "ensure", "confirm"
157
-
158
- When to use:
159
- - "Verify X appears" → await expect(locator).toBeVisible()
160
- - "Check field is empty" → await expect(locator).toBeEmpty()
161
- - "Confirm URL" → await expect(page).toHaveURL('...')
162
-
163
- When NOT to use:
164
- - "Send message" → DON'T verify it appeared (unless step asks)
165
- - "Click Submit" → DON'T check button state
166
- - "Fill field" → DON'T verify it's filled
167
-
168
- 9. **TOOLS vs COMMANDS**:
169
- Tools = read-only info gathering (screenshot, recall_history, extract_data)
170
- Commands = state changes (Playwright: goto, click, fill, etc.)
171
-
172
- Navigation commands MUST include timeout:
173
- - page.goto(url, { waitUntil: 'load', timeout: 30000 })
174
- - page.waitForLoadState('load', { timeout: 30000 })
175
-
176
- 10. **ERROR ANALYSIS** (Think about what went wrong):
177
- - "Timeout waiting for locator" → Selector doesn't exist, find different one in DOM
178
- - "page.goto: Timeout" → Missing timeout param: page.goto(url, { timeout: 30000 })
179
- - "strict mode violation" → Text appears multiple times. Scope to parent: locator('#parent').getByText()
180
- - "Element is not <select>" → Custom dropdown, use .click() not .selectOption()
181
- - Loop detected (same selector 2+ times) → Try completely different selector from ARIA tree
182
-
183
- 11. **WHEN TO RUN COMMANDS vs TOOLS**:
184
- - Confident about selectors from DOM → Run commands directly
185
- - Unsure or failed 2+ times → Take screenshot first
186
- - First iteration of a step → Usually can run commands from DOM
187
- - After successful command → mark "complete" if goal achieved (trust Playwright - if it succeeded, it worked)
188
-
189
- 12. **NOTE TO FUTURE SELF** (Tactical memory across iterations):
190
-
191
- Write FREE-FORM notes for your next iteration about:
192
- - What you tried and why it failed
193
- - Hypothesis being tested
194
- - Plan for next attempt
195
- - Page behavior patterns observed
196
-
197
- Your next iteration reads this FIRST - use it to maintain strategic continuity.
198
-
199
- 13. **COORDINATE-BASED ACTIONS** (Last resort after 3 selector failures):
200
-
201
- Activated automatically after 3 failures. Use PERCENTAGES (0-100, 3 decimals):
202
- - xPercent: 0=left, 100=right
203
- - yPercent: 0=top, 100=bottom
204
-
205
- Format:
206
- {
207
- "coordinateAction": {
208
- "type": "coordinate",
209
- "action": "click|doubleClick|rightClick|hover|drag|fill|scroll",
210
- "xPercent": 15.755, "yPercent": 8.500,
211
- "toXPercent": 45.25, "toYPercent": 8.50, // For drag
212
- "value": "text", // For fill
213
- "scrollAmount": 500 // For scroll
214
- }
215
- }
216
-
217
- AFTER coordinate action succeeds:
218
- - If goal verification unclear → CALL verify_action_result tool
219
- - Tool compares before/after screenshots to confirm goal achieved
220
- - If verified: mark status="complete"
221
- - If not verified: try different coordinates (2 attempts max)
222
-
223
- Example after coordinate click:
224
- {
225
- "status": "continue",
226
- "reasoning": "Coordinate click succeeded, verifying if dashboard page loaded",
227
- "toolCalls": [{"name": "verify_action_result", "params": {"expectedChange": "Dashboard page with data grid visible"}}],
228
- "needsToolResults": true
229
- }`;
230
- }
231
-
232
- /**
233
- * Build coordinate-specific system prompt (used when selectors repeatedly fail)
234
- */
235
- static buildCoordinateSystemPrompt(): string {
236
- return `You are a visual web automation expert. Selector generation has FAILED multiple times.
237
-
238
- YOU MUST NOW USE COORDINATE-BASED ACTIONS (this is not optional).
239
-
240
- SCREENSHOT PROVIDED:
241
- You will see a screenshot with visual indicators (bounding boxes or markers).
242
-
243
- CRITICAL - IDENTIFY THE CORRECT ELEMENT:
244
- 1. READ the step goal carefully - what specific element are you looking for?
245
- 2. LOCATE that element in the screenshot (NOT a similar-looking element!)
246
- 3. VERIFY position using screen regions:
247
- - Left sidebar/menu: xPercent ~5-25% (FAR LEFT)
248
- - Center content: xPercent ~30-70%
249
- - Right panel/sidebar: xPercent ~75-95% (FAR RIGHT)
250
- 4. CALCULATE percentages from element's CENTER position
251
- 5. SANITY CHECK your percentages:
252
- - Sidebar menu item at 85%? WRONG - that's far right, not sidebar!
253
- - Button in top-left at 90%? WRONG - that's top-right!
254
- - Element description says "left" but x > 50%? WRONG - recheck!
255
-
256
- Example thought process:
257
- Goal: "Click Settings link in left navigation"
258
- → I see "Settings" text in LEFT navigation panel in the screenshot
259
- → Visual estimate: The link appears in the far left sidebar
260
- → Horizontal: The link center is roughly 1/8th from the left edge → ~12-13% from left
261
- → Vertical: The link center is roughly 1/3rd down from top → ~30-35% from top
262
- → xPercent: 12.500, yPercent: 32.000
263
- → Sanity check: 12.5% is FAR LEFT ✓ (NOT 80%+ which would be far right!)
264
- → Description: "Clicking center of Settings link in left sidebar"
265
-
266
- CRITICAL VISUAL ESTIMATION TIPS:
267
- - Divide screenshot mentally into quadrants/regions
268
- - Left sidebar usually ~5-20% from left, center content ~30-70%, right sidebar ~75-95%
269
- - Aim for CENTER of element, not edges
270
- - Top bar usually 0-10% from top, footer usually 90-100%
271
- - Be conservative: slightly off-center is better than way off
272
-
273
- YOUR RESPONSE FORMAT - Output JSON matching this interface:
274
-
275
- interface AgentDecisionLLMResponse {
276
- status: string; // REQUIRED: "continue" (usually for coordinate mode)
277
- reasoning: string; // REQUIRED: "I see [element] at (X%, Y%) - using coordinates"
278
- coordinateAction: { // REQUIRED in coordinate mode
279
- type: "coordinate";
280
- action: "click" | "doubleClick" | "rightClick" | "hover" | "drag" | "fill" | "scroll";
281
- xPercent: number; // 0-100, 3 decimals
282
- yPercent: number; // 0-100, 3 decimals
283
- toXPercent?: number; // For drag
284
- toYPercent?: number; // For drag
285
- value?: string; // For fill
286
- scrollAmount?: number; // For scroll
287
- };
288
- noteToFutureSelf?: string; // Optional: What to try if this fails
289
- }
290
-
291
- COORDINATE REFERENCE:
292
- - Top-left corner: xPercent=0, yPercent=0
293
- - Top-right corner: xPercent=100, yPercent=0
294
- - Bottom-left corner: xPercent=0, yPercent=100
295
- - Bottom-right corner: xPercent=100, yPercent=100
296
- - Center of screen: xPercent=50, yPercent=50
297
-
298
- Use 3 decimal places for precision (e.g., 15.755, not 16).
299
-
300
- ACTIONS:
301
-
302
- **Physical clicks:**
303
- - click: { action: "click", xPercent: 15.755, yPercent: 8.500 }
304
- - doubleClick: { action: "doubleClick", xPercent: 15.755, yPercent: 8.500 }
305
- - rightClick: { action: "rightClick", xPercent: 15.755, yPercent: 8.500 }
306
- - hover: { action: "hover", xPercent: 15.755, yPercent: 8.500 }
307
-
308
- **Input actions:**
309
- - fill: Click then type
310
- { action: "fill", xPercent: 30.000, yPercent: 25.000, value: "alice@example.com" }
311
-
312
- **Movement actions:**
313
- - drag: From one position to another
314
- { action: "drag", xPercent: 10.000, yPercent: 50.000, toXPercent: 60.000, toYPercent: 50.000 }
315
- - scroll: At position, scroll by amount
316
- { action: "scroll", xPercent: 50.000, yPercent: 50.000, scrollAmount: 500 }
317
-
318
- CRITICAL RULES:
319
- - Percentages are from viewport TOP-LEFT (not full page)
320
- - Use element CENTER for coordinates, not edges
321
- - Be precise with decimals - wrong coords click wrong element
322
- - For fill: system will click at (x%,y%) then type value automatically
323
- - For drag: toXPercent/toYPercent are REQUIRED
324
-
325
- DO NOT try to generate selectors - that approach already failed. Use coordinates only.
326
- This is a last-resort mechanism, but it WILL work if you provide accurate percentages.`;
327
- }
328
-
329
- /**
330
- * Build user prompt with context
331
- */
332
- static buildUserPrompt(context: AgentContext, consecutiveFailures?: number): string {
333
- const parts: string[] = [];
334
-
335
- // Put static instructions first for LLM caching efficiency
336
- parts.push('STEP EXECUTION RULES:');
337
- parts.push('- DO ONLY what the current step asks - NO extra actions or verifications');
338
- parts.push('- If step doesn\'t say "verify/check/confirm" → DON\'T add expect() assertions');
339
- parts.push('- Mark "complete" ONLY if commands succeeded');
340
- parts.push('- Try screenshot tool if you need visual context');
341
- parts.push('- Max 5 iterations per step, then forced STUCK\n');
342
-
343
- // Dynamic content follows (changes per iteration)
344
- parts.push('=== CURRENT CONTEXT ===\n');
345
-
346
- // Display note from previous iteration (high priority tactical info)
347
- if (context.noteFromPreviousIteration) {
348
- const note = context.noteFromPreviousIteration;
349
- parts.push(`📝 YOUR NOTE FROM ITERATION ${note.fromIteration}:`);
350
- parts.push(` ${note.content}`);
351
- parts.push(` ^^ READ THIS - your previous self left important tactical guidance ^^`);
352
- parts.push('');
353
- }
354
-
355
- // Check for screenshot loops (analysis paralysis)
356
- const recentScreenshots = context.recentSteps.slice(-3).filter(s =>
357
- s.code.includes('take_screenshot') || s.action.toLowerCase().includes('screenshot')
358
- );
359
- if (recentScreenshots.length >= 2) {
360
- parts.push(`🚨🚨🚨 SCREENSHOT LOOP DETECTED 🚨🚨🚨`);
361
- parts.push(`You've taken ${recentScreenshots.length} screenshots in last 3 iterations!`);
362
- parts.push(`STOP analyzing - START ACTING!`);
363
- parts.push(`Use ANY selector from DOM snapshot and try clicking.`);
364
- parts.push(`If command succeeds and new elements appear → mark "complete"`);
365
- parts.push(`🚨🚨🚨\n`);
366
- }
367
-
368
- // System warnings for accumulated failures
369
- if (consecutiveFailures && consecutiveFailures >= 2 && consecutiveFailures < 3) {
370
- parts.push(`⚠️ SYSTEM WARNING: ${consecutiveFailures} failures!`);
371
- parts.push(`Take screenshot if needed. Try different selector strategy.`);
372
- parts.push(`Question assumptions: Am I at the right step?`);
373
- parts.push(`⚠️\n`);
374
- } else if (consecutiveFailures && consecutiveFailures >= 4) {
375
- parts.push(`⚠️ CRITICAL: ${consecutiveFailures} failures!`);
376
- parts.push(`Next failure will force STUCK. Coordinate mode should be active.\n`);
377
- }
378
-
379
- // Trigger coordinate mode if many failures (Phase 1: after 3 failures)
380
- if (consecutiveFailures && consecutiveFailures >= 3) {
381
- parts.push(`🎯🎯🎯 COORDINATE MODE ACTIVATED 🎯🎯🎯`);
382
- parts.push(`Selector generation has failed ${consecutiveFailures} times.`);
383
- parts.push(`You MUST use coordinate-based action now (percentages).`);
384
- parts.push(`Provide coordinateAction with xPercent/yPercent (0-100, 3 decimals for precision).`);
385
- parts.push(`See system prompt for coordinate action format.`);
386
- parts.push(`🎯🎯🎯\n`);
387
- }
388
-
389
- // Goals - make current step very prominent
390
- parts.push(`🎯 CURRENT STEP GOAL (${context.stepNumber}/${context.totalSteps}):`);
391
- parts.push(`${context.currentStepGoal}`);
392
- parts.push(``);
393
- parts.push(`OVERALL SCENARIO: ${context.overallGoal}\n`);
394
-
395
- if (context.completedSteps.length > 0) {
396
- parts.push(`COMPLETED: ${context.completedSteps.join(', ')}`);
397
- }
398
- if (context.remainingSteps.length > 0) {
399
- parts.push(`REMAINING: ${context.remainingSteps.join(', ')}\n`);
400
- }
401
-
402
- // Current page state (most variable content - at the end)
403
- parts.push(`\nCURRENT PAGE:`);
404
- parts.push(`URL: ${context.currentURL}`);
405
- parts.push(`Title: ${context.currentPageInfo.title}`);
406
- parts.push(`\nINTERACTIVE ELEMENTS (with positions and selectors):`);
407
- parts.push(context.currentPageInfo.formattedElements);
408
- parts.push(`\nARIA TREE (hierarchical structure):`);
409
- parts.push(JSON.stringify(context.currentPageInfo.ariaSnapshot, null, 2).substring(0, 5000));
410
- if (JSON.stringify(context.currentPageInfo.ariaSnapshot).length > 5000) {
411
- parts.push('... (truncated)');
412
- }
413
- parts.push('');
414
-
415
- // Recent steps (most variable content - at the end)
416
- if (context.recentSteps.length > 0) {
417
- parts.push(`\nRECENT STEPS (last ${context.recentSteps.length}):`);
418
- for (const step of context.recentSteps) {
419
- const status = step.result === 'success' ? '✓' : '✗';
420
- parts.push(` ${status} ${step.stepNumber}.${step.iteration || ''} ${step.action}`);
421
- parts.push(` Code: ${step.code}`);
422
- if (step.result === 'failure' && step.error) {
423
- parts.push(` ❌ ERROR: ${step.error}`);
424
- parts.push(` ^^ THIS SELECTOR FAILED - TRY DIFFERENT APPROACH ^^`);
425
- } else {
426
- parts.push(` Result: ${step.observation}`);
427
- }
428
- }
429
- parts.push('');
430
-
431
- // Detect repeated failures
432
- const recentFailures = context.recentSteps.filter(s => s.result === 'failure');
433
- if (recentFailures.length >= 2) {
434
- const sameSelector = recentFailures.slice(-2).every((s, i, arr) =>
435
- i === 0 || s.code === arr[i-1].code
436
- );
437
- if (sameSelector) {
438
- parts.push(`⚠️ WARNING: You've tried the same selector multiple times and it failed!`);
439
- parts.push(` Last failed selector: ${recentFailures[recentFailures.length - 1].code}`);
440
- parts.push(` YOU MUST try a completely different selector this time!\n`);
441
- }
442
- }
443
- }
444
-
445
- // Experiences (app-specific patterns learned)
446
- if (context.experiences && context.experiences.length > 0) {
447
- parts.push(`\nEXPERIENCES (patterns you've learned about this app):`);
448
- for (const exp of context.experiences) {
449
- parts.push(` • ${exp}`);
450
- }
451
- parts.push('');
452
- }
453
-
454
- // Extracted data (from previous extract_data tool calls)
455
- if (context.extractedData && Object.keys(context.extractedData).length > 0) {
456
- parts.push(`\nEXTRACTED DATA (available for use in commands):`);
457
- parts.push(JSON.stringify(context.extractedData, null, 2));
458
- parts.push('');
459
- }
460
-
461
- // Previous iteration guidance
462
- if (context.previousIterationGuidance) {
463
- parts.push(`\nGUIDANCE FROM PREVIOUS ITERATION:`);
464
- parts.push(context.previousIterationGuidance.guidanceForNext);
465
- if (context.previousIterationGuidance.detectingLoop) {
466
- parts.push(`⚠️ LOOP DETECTED: ${context.previousIterationGuidance.loopReasoning}`);
467
- }
468
- parts.push('');
469
- }
470
-
471
- return parts.join('\n');
472
- }
473
- }
474
-
@@ -1,182 +0,0 @@
1
- /**
2
- * Tool Registry - Dynamic tool registration and prompt generation
3
- * Tools can be added at runtime and their descriptions are automatically included in agent prompts
4
- */
5
-
6
- import { ToolCall, ToolResult } from './types';
7
-
8
- /**
9
- * Tool parameter definition
10
- */
11
- export interface ToolParameter {
12
- name: string;
13
- type: 'string' | 'number' | 'boolean' | 'object';
14
- description: string;
15
- required: boolean;
16
- default?: any;
17
- }
18
-
19
- /**
20
- * Tool definition
21
- */
22
- export interface Tool {
23
- name: string;
24
- description: string;
25
- parameters: ToolParameter[];
26
-
27
- /**
28
- * Execute the tool
29
- * @param params Tool parameters
30
- * @param context Execution context (page, memory, etc.)
31
- */
32
- execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult>;
33
- }
34
-
35
- /**
36
- * Context provided to tool execution
37
- */
38
- export interface ToolExecutionContext {
39
- page: any; // Playwright Page
40
- memory: any; // JourneyMemory
41
- stepNumber: number;
42
- logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
43
- }
44
-
45
- /**
46
- * Tool Registry - manages available tools and generates prompts
47
- */
48
- export class ToolRegistry {
49
- private tools: Map<string, Tool> = new Map();
50
-
51
- /**
52
- * Register a tool
53
- */
54
- register(tool: Tool): void {
55
- this.tools.set(tool.name, tool);
56
- }
57
-
58
- /**
59
- * Unregister a tool
60
- */
61
- unregister(toolName: string): void {
62
- this.tools.delete(toolName);
63
- }
64
-
65
- /**
66
- * Get a tool by name
67
- */
68
- get(toolName: string): Tool | undefined {
69
- return this.tools.get(toolName);
70
- }
71
-
72
- /**
73
- * Get all registered tools
74
- */
75
- getAll(): Tool[] {
76
- return Array.from(this.tools.values());
77
- }
78
-
79
- /**
80
- * Execute a tool
81
- */
82
- async execute(toolCall: ToolCall, context: ToolExecutionContext): Promise<ToolResult> {
83
- const tool = this.tools.get(toolCall.name);
84
-
85
- if (!tool) {
86
- return {
87
- success: false,
88
- error: `Tool '${toolCall.name}' not found`
89
- };
90
- }
91
-
92
- // Validate required parameters
93
- const missingParams = tool.parameters
94
- .filter(p => p.required && !(p.name in toolCall.params))
95
- .map(p => p.name);
96
-
97
- if (missingParams.length > 0) {
98
- return {
99
- success: false,
100
- error: `Missing required parameters: ${missingParams.join(', ')}`
101
- };
102
- }
103
-
104
- // Apply defaults for missing optional parameters
105
- const params = { ...toolCall.params };
106
- for (const param of tool.parameters) {
107
- if (!param.required && !(param.name in params) && param.default !== undefined) {
108
- params[param.name] = param.default;
109
- }
110
- }
111
-
112
- try {
113
- return await tool.execute(params, context);
114
- } catch (error: any) {
115
- return {
116
- success: false,
117
- error: `Tool execution failed: ${error.message}`
118
- };
119
- }
120
- }
121
-
122
- /**
123
- * Generate tool descriptions for agent prompt
124
- * Returns formatted text describing all available tools
125
- */
126
- generateToolDescriptions(): string {
127
- if (this.tools.size === 0) {
128
- return 'No tools available.';
129
- }
130
-
131
- const descriptions: string[] = [];
132
-
133
- descriptions.push('AVAILABLE TOOLS:');
134
- descriptions.push('');
135
-
136
- for (const tool of this.tools.values()) {
137
- descriptions.push(`${tool.name}:`);
138
- descriptions.push(` Description: ${tool.description}`);
139
-
140
- if (tool.parameters.length > 0) {
141
- descriptions.push(` Parameters:`);
142
- for (const param of tool.parameters) {
143
- const required = param.required ? '(required)' : '(optional)';
144
- const defaultVal = param.default !== undefined ? ` [default: ${JSON.stringify(param.default)}]` : '';
145
- descriptions.push(` - ${param.name} (${param.type}) ${required}: ${param.description}${defaultVal}`);
146
- }
147
- } else {
148
- descriptions.push(` Parameters: none`);
149
- }
150
-
151
- descriptions.push('');
152
- }
153
-
154
- descriptions.push('To use a tool, include it in your "toolCalls" array with the tool name and parameters.');
155
- descriptions.push('');
156
-
157
- return descriptions.join('\n');
158
- }
159
-
160
- /**
161
- * Generate JSON schema for tool calls (for structured output)
162
- */
163
- generateToolCallSchema(): any {
164
- return {
165
- type: 'array',
166
- items: {
167
- type: 'object',
168
- properties: {
169
- name: {
170
- type: 'string',
171
- enum: Array.from(this.tools.keys())
172
- },
173
- params: {
174
- type: 'object'
175
- }
176
- },
177
- required: ['name', 'params']
178
- }
179
- };
180
- }
181
- }
182
-