testchimp-runner-core 0.0.34 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +1 -1
  89. package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
  90. package/plandocs/exploratory-mode-support.plan.md +928 -0
  91. package/plandocs/journey-id-tracking-addendum.md +227 -0
  92. package/src/execution-service.ts +179 -596
  93. package/src/index.ts +10 -0
  94. package/src/orchestrator/decision-parser.ts +139 -0
  95. package/src/orchestrator/index.ts +25 -1
  96. package/src/orchestrator/orchestrator-agent.ts +656 -236
  97. package/src/orchestrator/orchestrator-prompts.ts +559 -247
  98. package/src/orchestrator/page-som-handler.ts +1565 -0
  99. package/src/orchestrator/som-types.ts +188 -0
  100. package/src/orchestrator/tool-registry.ts +2 -0
  101. package/src/orchestrator/tools/index.ts +4 -1
  102. package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
  103. package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
  104. package/src/orchestrator/types.ts +49 -6
  105. package/src/scenario-service.ts +20 -0
  106. package/src/scenario-worker-class.ts +24 -3
  107. package/src/utils/page-info-retry.ts +65 -0
  108. package/src/utils/page-info-utils.ts +53 -18
  109. package/testchimp-runner-core-0.0.35.tgz +0 -0
  110. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  111. package/testchimp-runner-core-0.0.33.tgz +0 -0
  112. /package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
  113. /package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
  114. /package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0
@@ -0,0 +1,188 @@
1
+ /**
2
+ * Set-of-Marks (SoM) Type Definitions
3
+ * Types for visual element identification and interaction
4
+ */
5
+
6
+ export interface Coordinate {
7
+ x: number; // Percentage of viewport width: 0-100 (use 3 decimal precision, e.g., 15.625)
8
+ y: number; // Percentage of viewport height: 0-100 (use 3 decimal precision, e.g., 82.375)
9
+ }
10
+
11
+ export enum InteractionAction {
12
+ // Click actions
13
+ CLICK = 'click',
14
+ DOUBLE_CLICK = 'doubleClick',
15
+ RIGHT_CLICK = 'rightClick',
16
+
17
+ // Mouse actions
18
+ HOVER = 'hover',
19
+ MOUSE_DOWN = 'mouseDown',
20
+ MOUSE_UP = 'mouseUp',
21
+ DRAG = 'drag',
22
+
23
+ // Input actions
24
+ FILL = 'fill',
25
+ TYPE = 'type',
26
+ CLEAR = 'clear',
27
+
28
+ // Keyboard actions
29
+ PRESS = 'press',
30
+ PRESS_SEQUENTIALLY = 'pressSequentially',
31
+
32
+ // Select/Checkbox actions
33
+ SELECT = 'select',
34
+ CHECK = 'check',
35
+ UNCHECK = 'uncheck',
36
+
37
+ // Focus/Scroll actions
38
+ FOCUS = 'focus',
39
+ BLUR = 'blur',
40
+ SCROLL = 'scroll',
41
+ SCROLL_INTO_VIEW = 'scrollIntoView',
42
+
43
+ // Navigation actions
44
+ NAVIGATE = 'navigate', // Go to URL (requires value field)
45
+ GO_BACK = 'goBack',
46
+ GO_FORWARD = 'goForward',
47
+ RELOAD = 'reload'
48
+ }
49
+
50
+ export interface SomCommand {
51
+ elementRef?: string; // Integer as string: "1", "2", "42" (optional for coord-based commands)
52
+ action: InteractionAction;
53
+
54
+ // Coordinate-based action (use when elementRef is empty/null)
55
+ coord?: Coordinate; // Percentage-based (x: 0-100, y: 0-100 of viewport)
56
+
57
+ // Action-specific parameters
58
+ value?: string; // For fill/type/select/press actions
59
+ fromCoord?: Coordinate; // For drag (start) - percentage-based
60
+ toCoord?: Coordinate; // For drag (end) - percentage-based
61
+ force?: boolean; // Force action even if not actionable
62
+ scrollAmount?: number; // Pixels to scroll
63
+ scrollDirection?: 'up' | 'down' | 'left' | 'right';
64
+ button?: 'left' | 'right' | 'middle';
65
+ clickCount?: number;
66
+ modifiers?: Array<'Alt' | 'Control' | 'Meta' | 'Shift'>;
67
+ delay?: number; // Delay between keystrokes for TYPE (ms)
68
+ timeout?: number; // Override default timeout
69
+ }
70
+
71
+ export enum CommandRunStatus {
72
+ SUCCESS = 'success',
73
+ FAILURE = 'failure'
74
+ }
75
+
76
+ export interface CommandAttempt {
77
+ command?: string;
78
+ status: CommandRunStatus;
79
+ error?: string;
80
+ }
81
+
82
+ export interface DomMutation {
83
+ type: 'added' | 'removed' | 'modified' | 'attribute_changed';
84
+ elementDescription: string;
85
+ timestamp: number;
86
+ }
87
+
88
+ export interface SemanticCommandResult {
89
+ failedAttempts: CommandAttempt[];
90
+ successAttempt?: CommandAttempt;
91
+ error?: string;
92
+ status: CommandRunStatus;
93
+ mutations?: DomMutation[]; // Only for hover/focus, filtered for relevance
94
+ }
95
+
96
+ export interface SomElement {
97
+ somId: string; // Simple integer as string: "1", "2", "3"
98
+ tag: string;
99
+ role: string;
100
+ text: string;
101
+ ariaLabel: string;
102
+ placeholder: string;
103
+ name: string;
104
+ type: string;
105
+ id: string;
106
+ className: string;
107
+ bbox: { x: number; y: number; width: number; height: number };
108
+ parent?: {
109
+ tag: string;
110
+ role: string;
111
+ className: string;
112
+ text: string;
113
+ };
114
+ }
115
+
116
+ /**
117
+ * Typed selector (no string parsing needed)
118
+ * Supports chaining: parent.child for scoped selectors
119
+ */
120
+ export interface TypedSelector {
121
+ type: 'id' | 'testId' | 'label' | 'role' | 'placeholder' | 'text' | 'title' | 'altText' | 'name' | 'locator';
122
+ value: string;
123
+ roleOptions?: { name?: string }; // For getByRole
124
+ parent?: TypedSelector; // For chaining: page.locator(parent).locator(this)
125
+ }
126
+
127
+ /**
128
+ * Verification types for expect assertions
129
+ */
130
+ export enum VerificationType {
131
+ // Text verifications
132
+ TEXT_CONTAINS = 'textContains',
133
+ TEXT_EQUALS = 'textEquals',
134
+
135
+ // Input verifications
136
+ VALUE_EQUALS = 'valueEquals',
137
+ VALUE_EMPTY = 'valueEmpty',
138
+
139
+ // Visibility verifications
140
+ IS_VISIBLE = 'isVisible',
141
+ IS_HIDDEN = 'isHidden',
142
+
143
+ // State verifications
144
+ IS_ENABLED = 'isEnabled',
145
+ IS_DISABLED = 'isDisabled',
146
+ IS_CHECKED = 'isChecked',
147
+ IS_UNCHECKED = 'isUnchecked',
148
+
149
+ // Count verifications (for lists, tables, etc.)
150
+ COUNT_EQUALS = 'countEquals',
151
+ COUNT_GREATER_THAN = 'countGreaterThan',
152
+ COUNT_LESS_THAN = 'countLessThan',
153
+
154
+ // Attribute verifications
155
+ HAS_CLASS = 'hasClass',
156
+ HAS_ATTRIBUTE = 'hasAttribute'
157
+ }
158
+
159
+ /**
160
+ * SoM verification command for expect assertions
161
+ */
162
+ export interface SomVerification {
163
+ verificationType: VerificationType;
164
+ elementRef?: string; // SoM ID (e.g., "3") - optional for count verifications
165
+ expected?: string | number; // Expected value/text/count
166
+ description?: string; // Human-readable description
167
+ selector?: string; // For count verifications on non-SoM elements (CSS selector)
168
+ }
169
+
170
+ /**
171
+ * Union type: commands array can contain both actions and verifications
172
+ */
173
+ export type SomCommandOrVerification = SomCommand | SomVerification;
174
+
175
+ /**
176
+ * Type guard to check if command is a verification
177
+ */
178
+ export function isSomVerification(cmd: SomCommandOrVerification): cmd is SomVerification {
179
+ return 'verificationType' in cmd;
180
+ }
181
+
182
+ /**
183
+ * Type guard to check if command is an action
184
+ */
185
+ export function isSomCommand(cmd: SomCommandOrVerification): cmd is SomCommand {
186
+ return 'action' in cmd;
187
+ }
188
+
@@ -40,6 +40,8 @@ export interface ToolExecutionContext {
40
40
  memory: any; // JourneyMemory
41
41
  stepNumber: number;
42
42
  logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
43
+ previousSomScreenshot?: string; // For view_previous_screenshot tool
44
+ somHandler?: any; // PageSoMHandler for refresh_som_markers tool
43
45
  }
44
46
 
45
47
  /**
@@ -1,9 +1,12 @@
1
1
  /**
2
- * Tool exports - 6 information-gathering tools
2
+ * Tool exports - 8 information-gathering tools
3
3
  * Note: State changes (navigation, clicks, fills) are done via Playwright commands, not tools
4
+ * Ref-based commands (getByRef) are translated to Playwright at execution time
4
5
  */
5
6
 
6
7
  export { TakeScreenshotTool } from './take-screenshot';
8
+ export { ViewPreviousScreenshotTool } from './view-previous-screenshot';
9
+ export { RefreshSomMarkersTool } from './refresh-som-markers';
7
10
  export { RecallHistoryTool } from './recall-history';
8
11
  export { InspectPageTool } from './inspect-page';
9
12
  export { CheckPageReadyTool } from './check-page-ready';
@@ -0,0 +1,69 @@
1
+ import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
2
+ import { ToolResult } from '../types';
3
+
4
+ /**
5
+ * Tool to refresh SoM markers when they appear outdated or misaligned
6
+ */
7
+ export class RefreshSomMarkersTool implements Tool {
8
+ name = 'refresh_som_markers';
9
+ description = 'Manually refresh the Set-of-Marks visual markers on the page. Use when: (1) Markers appear misaligned with actual UI elements, (2) Page content has changed but markers are stale (e.g., after dynamic content loads), (3) You suspect markers are from a previous page state. Returns updated screenshot with fresh markers.';
10
+
11
+ parameters: ToolParameter[] = [
12
+ {
13
+ name: 'reason',
14
+ type: 'string',
15
+ description: 'Why you need to refresh markers. Examples: "dropdown expanded but markers still show closed state", "new content loaded but not marked", "markers seem to point to wrong elements"',
16
+ required: true
17
+ }
18
+ ];
19
+
20
+ async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
21
+ const { logger, page, somHandler } = context;
22
+ const reason = params.reason || 'Markers appear outdated';
23
+
24
+ if (!somHandler) {
25
+ return {
26
+ success: false,
27
+ error: 'SoM mode not enabled - refresh markers tool unavailable.'
28
+ };
29
+ }
30
+
31
+ if (!page) {
32
+ return {
33
+ success: false,
34
+ error: 'No page context available.'
35
+ };
36
+ }
37
+
38
+ try {
39
+ logger?.(`[RefreshSomMarkers] Refreshing markers due to: ${reason}`, 'log');
40
+
41
+ // Ensure somHandler has the latest page reference
42
+ somHandler.setPage(page);
43
+
44
+ // Re-scan page and update markers
45
+ await somHandler.updateSom();
46
+ logger?.(`[RefreshSomMarkers] ✓ Markers updated`, 'log');
47
+
48
+ // Capture fresh screenshot with new markers (viewport only - cheaper than full page)
49
+ const freshScreenshot = await somHandler.getScreenshot(true, false, 60);
50
+ logger?.(`[RefreshSomMarkers] ✓ Fresh screenshot captured (viewport)`, 'log');
51
+
52
+ return {
53
+ success: true,
54
+ data: {
55
+ screenshot: freshScreenshot,
56
+ reason
57
+ },
58
+ learning: `SoM markers refreshed. New screenshot shows current page state with updated element markers. Reason: ${reason}`
59
+ };
60
+ } catch (error: any) {
61
+ logger?.(`[RefreshSomMarkers] ✗ Failed: ${error.message}`, 'error');
62
+ return {
63
+ success: false,
64
+ error: `Failed to refresh markers: ${error.message}`
65
+ };
66
+ }
67
+ }
68
+ }
69
+
@@ -0,0 +1,103 @@
1
+ /**
2
+ * View Previous Screenshot Tool
3
+ * Access the screenshot from the previous iteration for continuity reasoning
4
+ */
5
+
6
+ import { Tool, ToolParameter, ToolExecutionContext } from '../tool-registry';
7
+ import { ToolResult } from '../types';
8
+
9
+ export class ViewPreviousScreenshotTool implements Tool {
10
+ name = 'view_previous_screenshot';
11
+ description = 'View the screenshot from the PREVIOUS iteration to understand continuity. Common uses: (1) Verify coordinate-based clicks - look for magenta "clicked" marker to see where click landed, (2) Compare before/after states - see what changed after commands, (3) Check transient effects - see alerts/toasts that may have disappeared. Returns vision analysis of the previous screenshot.';
12
+
13
+ // LLM facade for vision analysis
14
+ private llmFacade?: any;
15
+
16
+ setLLMFacade(llmFacade: any): void {
17
+ this.llmFacade = llmFacade;
18
+ }
19
+
20
+ parameters: ToolParameter[] = [
21
+ {
22
+ name: 'purpose',
23
+ type: 'string',
24
+ description: 'Why you need to see the previous screenshot. Examples: "verify coord click accuracy", "check if error message appeared then disappeared", "compare before/after form submission"',
25
+ required: true
26
+ }
27
+ ];
28
+
29
+ async execute(params: Record<string, any>, context: ToolExecutionContext): Promise<ToolResult> {
30
+ const { logger } = context;
31
+ const purpose = params.purpose || 'Review previous page state';
32
+
33
+ // Access previousSomScreenshot from context (passed through)
34
+ const previousScreenshot = (context as any).previousSomScreenshot;
35
+
36
+ if (!previousScreenshot) {
37
+ return {
38
+ success: false,
39
+ error: 'No previous screenshot available (this is the first iteration)'
40
+ };
41
+ }
42
+
43
+ try {
44
+ logger?.(`[ViewPreviousScreenshot] Analyzing previous iteration screenshot for: ${purpose}`, 'log');
45
+
46
+ // Analyze with vision LLM if available
47
+ let analysis = 'Previous screenshot retrieved.';
48
+
49
+ if (this.llmFacade) {
50
+ try {
51
+ const analysisPrompt = `Analyze the screenshot from the PREVIOUS iteration (before the most recent commands executed).
52
+
53
+ PURPOSE: ${purpose}
54
+
55
+ WHAT TO LOOK FOR:
56
+ 1. **Coordinate verification**: If purpose mentions "coord" or "click", look for MAGENTA "clicked" marker (circle with yellow border)
57
+ - Describe marker position relative to UI elements
58
+ - Assess accuracy: "centered on button", "5% above target", etc.
59
+
60
+ 2. **Before/after comparison**: Compare visual state with current page
61
+ - What changed after commands executed?
62
+ - New elements, removed elements, state changes?
63
+
64
+ 3. **Transient effects**: Elements that may have appeared and disappeared
65
+ - Alerts, toasts, error messages that are now gone
66
+ - Loading states, spinners that finished
67
+
68
+ 4. **General state**: Answer the specific question from purpose
69
+
70
+ TASK: Provide concise, specific observations relevant to the purpose.`;
71
+
72
+ const llmResponse = await this.llmFacade.llmProvider.callLLM({
73
+ systemPrompt: 'You are analyzing a screenshot from a previous test iteration to help with continuity reasoning. Provide specific, actionable observations.',
74
+ userPrompt: analysisPrompt,
75
+ imageUrl: previousScreenshot
76
+ });
77
+
78
+ analysis = llmResponse.answer || analysis;
79
+ logger?.(`[ViewPreviousScreenshot] ✓ Analysis complete`, 'log');
80
+
81
+ } catch (error: any) {
82
+ logger?.(`[ViewPreviousScreenshot] ⚠ Vision analysis failed: ${error.message}`, 'warn');
83
+ }
84
+ }
85
+
86
+ return {
87
+ success: true,
88
+ data: {
89
+ screenshotAvailable: true,
90
+ purpose
91
+ },
92
+ learning: analysis
93
+ };
94
+ } catch (error: any) {
95
+ logger?.(`[ViewPreviousScreenshot] ✗ Failed: ${error.message}`, 'error');
96
+ return {
97
+ success: false,
98
+ error: `Failed to access previous screenshot: ${error.message}`
99
+ };
100
+ }
101
+ }
102
+ }
103
+
@@ -108,7 +108,7 @@ export interface AgentDecision {
108
108
  needsToolResults?: boolean; // Wait for tool results before proceeding with commands
109
109
 
110
110
  // Command batch (executed sequentially)
111
- commands?: string[];
111
+ commands?: string[]; // Plain Playwright commands
112
112
  commandReasoning?: string;
113
113
 
114
114
  // Self-reflection for next iteration
@@ -147,6 +147,12 @@ export interface AgentDecision {
147
147
  issue: 'prior_incomplete' | 'already_done' | 'wrong_order' | null;
148
148
  explanation: string; // Why agent thinks step order is off
149
149
  };
150
+
151
+ // Meta-learning: Suggested prompt improvements based on journey learnings
152
+ debugInfo?: {
153
+ suggestedPromptUpdates?: string; // Confident suggestions for improving system/user prompts
154
+ reasoning?: string; // Why these updates would help
155
+ };
150
156
  }
151
157
 
152
158
  /**
@@ -172,16 +178,34 @@ export interface AgentContext {
172
178
  experiences: string[];
173
179
  extractedData: Record<string, string>;
174
180
 
175
- // Self-reflection from previous iteration
176
- previousIterationGuidance?: SelfReflection;
177
-
178
- // Note from previous iteration (NEW - tactical continuity)
181
+ // Note from previous iteration (tactical continuity)
179
182
  noteFromPreviousIteration?: NoteToFutureSelf;
180
183
 
184
+ // Test data / credentials for exploration
185
+ testDataPrompt?: string;
186
+
187
+ // SoM (Set-of-Marks) screenshot with visual markers
188
+ somScreenshot?: string; // Data URL of screenshot with SoM markers
189
+ somElementMap?: string; // Text map of SoM IDs to element details for disambiguation
190
+
191
+ // Repair mode context (undefined for script gen/exploration)
192
+ priorSteps?: string[]; // Steps completed before current (e.g., ["1. Navigate", "2. Login"])
193
+ nextSteps?: string[]; // Steps after current (e.g., ["5. Submit", "6. Verify"])
194
+
181
195
  // Tool results from this iteration (if any)
182
196
  toolResults?: Record<string, ToolResult>;
183
197
  }
184
198
 
199
+ /**
200
+ * Exploration mode configuration
201
+ */
202
+ export interface ExplorationMode {
203
+ enabled: boolean; // Whether exploration mode is active
204
+ explorationPrompt: string; // Journey-specific focus: "Explore Dashboard and test all widgets"
205
+ testDataPrompt?: string; // Test data, credentials context
206
+ maxExplorationSteps?: number; // Budget limit (default: 50) - agent can stop earlier
207
+ }
208
+
185
209
  /**
186
210
  * Configurable guardrails
187
211
  */
@@ -211,6 +235,15 @@ export interface AgentConfig {
211
235
  // Allowed actions
212
236
  allowedExplorationActions?: string[]; // Default: ['hover', 'click_info', 'click_menu', 'focus'] (Phase 2)
213
237
  allowedDomains?: string[]; // For navigate_to_url validation
238
+
239
+ // Feature flags
240
+ enableCoordinateMode?: boolean; // Default: false (experimental - disable until stable)
241
+ useSoM?: boolean; // Default: true (Set-of-Marks visual mode)
242
+ somUseSomIdBasedCommands?: boolean; // Default: false (use semantic selectors first)
243
+ somRestrictCoordinates?: boolean; // Default: false (if true, strongly discourage coord commands except as absolute last resort)
244
+
245
+ // Exploration mode (NEW)
246
+ explorationMode?: ExplorationMode;
214
247
  }
215
248
 
216
249
  /**
@@ -243,6 +276,16 @@ export const DEFAULT_AGENT_CONFIG: Required<AgentConfig> = {
243
276
  commandTimeout: 30000,
244
277
  explorationTimeout: 2000,
245
278
  allowedExplorationActions: ['hover', 'click_info', 'click_menu', 'focus'],
246
- allowedDomains: []
279
+ allowedDomains: [],
280
+ enableCoordinateMode: false, // Disabled by default - experimental feature
281
+ useSoM: true, // Enabled by default - use Set-of-Marks visual mode
282
+ somUseSomIdBasedCommands: false, // Use semantic selectors first
283
+ somRestrictCoordinates: false, // Allow coords as valid fallback (for exploration)
284
+ explorationMode: {
285
+ enabled: false,
286
+ explorationPrompt: '',
287
+ testDataPrompt: undefined,
288
+ maxExplorationSteps: 50
289
+ }
247
290
  };
248
291
 
@@ -218,6 +218,26 @@ export class ScenarioService extends EventEmitter {
218
218
  this.processNextJob();
219
219
  }
220
220
 
221
+ /**
222
+ * Execute exploration mode using orchestrator
223
+ * Requires orchestrator to be enabled via useOrchestrator option
224
+ */
225
+ async executeExploration(page: any, explorationConfig: any, jobId: string): Promise<any> {
226
+ if (!this.useOrchestrator) {
227
+ throw new Error('Exploration mode requires orchestrator to be enabled');
228
+ }
229
+
230
+ // Get an available worker (or create one if needed)
231
+ let worker = this.workers.find(w => !this.busyWorkers.has(w));
232
+ if (!worker) {
233
+ await this.createWorker();
234
+ worker = this.workers[this.workers.length - 1];
235
+ }
236
+
237
+ // Execute exploration via worker's orchestrator
238
+ return worker.executeExploration(page, explorationConfig, jobId);
239
+ }
240
+
221
241
  async shutdown(): Promise<void> {
222
242
  this.log('Shutting down scenario service...');
223
243
 
@@ -18,6 +18,8 @@ import {
18
18
  JourneyMemory,
19
19
  AgentConfig,
20
20
  TakeScreenshotTool,
21
+ ViewPreviousScreenshotTool,
22
+ RefreshSomMarkersTool,
21
23
  RecallHistoryTool,
22
24
  InspectPageTool,
23
25
  CheckPageReadyTool,
@@ -105,11 +107,18 @@ export class ScenarioWorker extends EventEmitter {
105
107
  const takeScreenshotTool = new TakeScreenshotTool();
106
108
  takeScreenshotTool.setLLMFacade(this.llmFacade); // Inject LLM for vision analysis
107
109
 
110
+ const viewPreviousScreenshotTool = new ViewPreviousScreenshotTool();
111
+ viewPreviousScreenshotTool.setLLMFacade(this.llmFacade); // Inject LLM for vision analysis
112
+
113
+ const refreshSomMarkersTool = new RefreshSomMarkersTool();
114
+
108
115
  const verifyActionTool = new VerifyActionResultTool();
109
116
  verifyActionTool.setLLMFacade(this.llmFacade); // Inject LLM for vision comparison
110
117
 
111
- // Register 6 information-gathering tools (state changes via Playwright commands)
118
+ // Register 8 information-gathering tools (state changes via Playwright commands)
112
119
  this.toolRegistry.register(takeScreenshotTool);
120
+ this.toolRegistry.register(viewPreviousScreenshotTool);
121
+ this.toolRegistry.register(refreshSomMarkersTool);
113
122
  this.toolRegistry.register(new RecallHistoryTool());
114
123
  this.toolRegistry.register(new InspectPageTool());
115
124
  this.toolRegistry.register(new CheckPageReadyTool());
@@ -317,12 +326,12 @@ export class ScenarioWorker extends EventEmitter {
317
326
  page = job.existingPage;
318
327
  } else {
319
328
  // Create new browser (default behavior for local clients)
320
- // Default to headed mode (headless: false) for better debugging
329
+ // Let the playwrightConfig control headless mode (don't override with hardcoded value)
321
330
  // Create logger function from outputChannel for browser initialization
322
331
  const logger = this.outputChannel ? (message: string, level?: 'log' | 'error' | 'warn') => {
323
332
  this.outputChannel!.appendLine(`[Browser] ${message}`);
324
333
  } : undefined;
325
- const browserInstance = await initializeBrowser(job.playwrightConfig, false, undefined, logger);
334
+ const browserInstance = await initializeBrowser(job.playwrightConfig, undefined, undefined, logger);
326
335
  browser = browserInstance.browser;
327
336
  context = browserInstance.context;
328
337
  page = browserInstance.page;
@@ -1082,6 +1091,18 @@ export class ScenarioWorker extends EventEmitter {
1082
1091
 
1083
1092
 
1084
1093
 
1094
+ /**
1095
+ * Execute exploration mode using orchestrator
1096
+ */
1097
+ async executeExploration(page: any, explorationConfig: any, jobId: string): Promise<any> {
1098
+ if (!this.useOrchestrator || !this.orchestratorAgent) {
1099
+ throw new Error('Orchestrator not available - exploration mode requires orchestrator');
1100
+ }
1101
+
1102
+ // Execute exploration via orchestrator
1103
+ return this.orchestratorAgent.executeExploration(page, explorationConfig, jobId);
1104
+ }
1105
+
1085
1106
  async cleanup(): Promise<void> {
1086
1107
  this.initialized = false;
1087
1108
  this.sessionId = null;
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Page Info Retry Utility
3
+ * Handles adaptive page loading with exponential backoff
4
+ */
5
+
6
+ import { getEnhancedPageInfo, PageInfo } from './page-info-utils';
7
+
8
+ export class PageInfoRetry {
9
+ /**
10
+ * Get page info with retry logic - waits for interactive elements to appear
11
+ * Uses exponential backoff to handle slow-loading React/Vue/Angular apps
12
+ */
13
+ static async getWithRetry(page: any, maxAttempts: number = 6): Promise<PageInfo> {
14
+ // Wait for initial page load (generous timeout for slow apps)
15
+ try {
16
+ await page.waitForLoadState('domcontentloaded', { timeout: 20000 }).catch(() => {});
17
+ } catch (waitError) {
18
+ // Continue even if wait fails
19
+ }
20
+
21
+ let attempt = 0;
22
+ let backoffMs = 1000; // Start with 1 second (adequate for most sites)
23
+
24
+ while (attempt < maxAttempts) {
25
+ attempt++;
26
+
27
+ // Try to extract page info
28
+ const pageInfo = await getEnhancedPageInfo(page);
29
+
30
+ // If we got a reasonable number of elements, we're done
31
+ if (pageInfo.interactiveElements && pageInfo.interactiveElements.length >= 3) {
32
+ if (attempt > 1) {
33
+ console.log(`[PageInfoRetry] ✓ Page elements loaded after ${attempt} attempts`);
34
+ }
35
+ return pageInfo;
36
+ }
37
+
38
+ // If this is the last attempt, return what we have
39
+ if (attempt >= maxAttempts) {
40
+ const totalWait = this.calculateTotalWaitTime(maxAttempts);
41
+ console.log(`[PageInfoRetry] ⚠️ Only found ${pageInfo.interactiveElements?.length || 0} elements after ${maxAttempts} attempts (total wait: ~${totalWait}ms)`);
42
+ return pageInfo;
43
+ }
44
+
45
+ // Wait with exponential backoff before retrying
46
+ console.log(`[PageInfoRetry] Only ${pageInfo.interactiveElements?.length || 0} elements found (attempt ${attempt}/${maxAttempts}), waiting ${backoffMs}ms...`);
47
+ await page.waitForTimeout(backoffMs);
48
+ backoffMs = Math.min(backoffMs * 1.6, 15000); // Cap at 15 seconds per attempt
49
+ }
50
+
51
+ // Fallback (shouldn't reach here, but for type safety)
52
+ return await getEnhancedPageInfo(page);
53
+ }
54
+
55
+ private static calculateTotalWaitTime(maxAttempts: number): number {
56
+ let total = 0;
57
+ let backoffMs = 1000;
58
+ for (let i = 1; i < maxAttempts; i++) {
59
+ total += backoffMs;
60
+ backoffMs = Math.min(backoffMs * 1.6, 15000);
61
+ }
62
+ return Math.round(total);
63
+ }
64
+ }
65
+