testchimp-runner-core 0.0.34 → 0.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +6 -1
  89. package/RELEASE_0.0.26.md +0 -165
  90. package/RELEASE_0.0.27.md +0 -236
  91. package/RELEASE_0.0.28.md +0 -286
  92. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  93. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  94. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  95. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  96. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  97. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  98. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  99. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  100. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  101. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  102. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  103. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  104. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  105. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  106. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  107. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  108. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  109. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  110. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  111. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  112. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  113. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  114. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  115. package/src/auth-config.ts +0 -84
  116. package/src/credit-usage-service.ts +0 -188
  117. package/src/env-loader.ts +0 -103
  118. package/src/execution-service.ts +0 -1413
  119. package/src/file-handler.ts +0 -104
  120. package/src/index.ts +0 -422
  121. package/src/llm-facade.ts +0 -821
  122. package/src/llm-provider.ts +0 -53
  123. package/src/model-constants.ts +0 -35
  124. package/src/orchestrator/index.ts +0 -34
  125. package/src/orchestrator/orchestrator-agent.ts +0 -862
  126. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  127. package/src/orchestrator/orchestrator-prompts.ts +0 -474
  128. package/src/orchestrator/tool-registry.ts +0 -182
  129. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  130. package/src/orchestrator/tools/extract-data.ts +0 -92
  131. package/src/orchestrator/tools/index.ts +0 -12
  132. package/src/orchestrator/tools/inspect-page.ts +0 -42
  133. package/src/orchestrator/tools/recall-history.ts +0 -72
  134. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  135. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  136. package/src/orchestrator/types.ts +0 -248
  137. package/src/playwright-mcp-service.ts +0 -224
  138. package/src/progress-reporter.ts +0 -144
  139. package/src/prompts.ts +0 -842
  140. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  141. package/src/providers/local-llm-provider.ts +0 -38
  142. package/src/scenario-service.ts +0 -232
  143. package/src/scenario-worker-class.ts +0 -1089
  144. package/src/script-utils.ts +0 -203
  145. package/src/types.ts +0 -239
  146. package/src/utils/browser-utils.ts +0 -348
  147. package/src/utils/coordinate-converter.ts +0 -162
  148. package/src/utils/page-info-utils.ts +0 -250
  149. package/testchimp-runner-core-0.0.33.tgz +0 -0
  150. package/tsconfig.json +0 -19
@@ -1,862 +0,0 @@
1
- /**
2
- * Orchestrator Agent
3
- * Single agent with tool-use capabilities that maintains journey memory and executes scenarios
4
- */
5
-
6
- import { expect } from '@playwright/test';
7
- import { LLMFacade } from '../llm-facade';
8
- import { ProgressReporter, TokenUsage, StepExecutionStatus } from '../progress-reporter';
9
- import { getEnhancedPageInfo } from '../utils/page-info-utils';
10
- import { CoordinateConverter } from '../utils/coordinate-converter';
11
- import { ToolRegistry, ToolExecutionContext } from './tool-registry';
12
- import { DEFAULT_MODEL } from '../model-constants';
13
- import {
14
- AgentConfig,
15
- AgentContext,
16
- AgentDecision,
17
- JourneyMemory,
18
- MemoryStep,
19
- OrchestratorStepResult,
20
- SelfReflection,
21
- NoteToFutureSelf,
22
- CoordinateAction,
23
- DEFAULT_AGENT_CONFIG
24
- } from './types';
25
- import { OrchestratorPrompts } from './orchestrator-prompts';
26
-
27
- /**
28
- * Orchestrator Agent - manages step execution with tool use and memory
29
- */
30
- export class OrchestratorAgent {
31
- private llmFacade: LLMFacade;
32
- private toolRegistry: ToolRegistry;
33
- private progressReporter?: ProgressReporter;
34
- private config: Required<AgentConfig>;
35
- private logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void;
36
- private debugMode: boolean = false;
37
-
38
- constructor(
39
- llmFacade: LLMFacade,
40
- toolRegistry: ToolRegistry,
41
- config?: Partial<AgentConfig>,
42
- progressReporter?: ProgressReporter,
43
- logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void,
44
- debugMode?: boolean
45
- ) {
46
- this.llmFacade = llmFacade;
47
- this.toolRegistry = toolRegistry;
48
- this.config = { ...DEFAULT_AGENT_CONFIG, ...config };
49
- this.progressReporter = progressReporter;
50
- this.logger = logger;
51
- this.debugMode = debugMode || false;
52
- }
53
-
54
- setDebugMode(enabled: boolean): void {
55
- this.debugMode = enabled;
56
- }
57
-
58
- /**
59
- * Execute a single step of the scenario
60
- */
61
- async executeStep(
62
- page: any,
63
- stepDescription: string,
64
- stepNumber: number,
65
- totalSteps: number,
66
- scenarioSteps: string[],
67
- memory: JourneyMemory,
68
- jobId: string
69
- ): Promise<OrchestratorStepResult> {
70
- this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
71
- this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
72
-
73
- let iteration = 0;
74
- let previousReflection: SelfReflection | undefined = undefined;
75
- let noteToSelf: NoteToFutureSelf | undefined = memory.latestNote; // Start with note from previous step
76
- const commandsExecuted: string[] = [];
77
- let consecutiveFailures = 0; // Track consecutive iterations with failed commands
78
- let coordinateAttempts = 0; // Track coordinate mode attempts (max 2)
79
-
80
- while (iteration < this.config.maxIterationsPerStep) {
81
- iteration++;
82
-
83
- this.logger?.(`\n[Orchestrator] === Iteration ${iteration}/${this.config.maxIterationsPerStep} ===`);
84
-
85
- // Build context for agent
86
- const context = await this.buildAgentContext(
87
- page,
88
- stepDescription,
89
- stepNumber,
90
- totalSteps,
91
- scenarioSteps,
92
- memory,
93
- previousReflection,
94
- consecutiveFailures,
95
- noteToSelf // NEW: Pass note from previous iteration
96
- );
97
-
98
- // Call agent to make decision
99
- const decision = await this.callAgent(
100
- context,
101
- jobId,
102
- stepNumber,
103
- iteration,
104
- consecutiveFailures
105
- );
106
-
107
- // Log agent's reasoning
108
- this.logAgentDecision(decision, iteration);
109
-
110
- // Report progress
111
- await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
112
-
113
- // Execute tools if requested (tools are READ-ONLY, they don't change state)
114
- let toolResults: Record<string, any> = {};
115
-
116
- // ANTI-LOOP: Detect if agent is taking screenshots repeatedly without acting
117
- const recentScreenshots = memory.history.slice(-3).filter(s =>
118
- s.code.includes('take_screenshot') || s.action.includes('Screenshot')
119
- );
120
- if (recentScreenshots.length >= 2 && iteration >= 3) {
121
- this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
122
- this.logger?.(`[Orchestrator] 💭 System: Stop gathering info, START ACTING with available selectors`);
123
- }
124
-
125
- if (decision.toolCalls && decision.toolCalls.length > 0) {
126
- toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
127
-
128
- // If agent wants to wait for tool results before proceeding, call agent again
129
- if (decision.needsToolResults) {
130
- const updatedContext = { ...context, toolResults };
131
- const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
132
-
133
- // Merge continued decision
134
- decision.commands = continuedDecision.commands || decision.commands;
135
- decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
136
- decision.status = continuedDecision.status;
137
- decision.statusReasoning = continuedDecision.statusReasoning;
138
- decision.reasoning = continuedDecision.reasoning;
139
- }
140
- }
141
-
142
- // Execute commands sequentially
143
- let iterationHadFailure = false;
144
-
145
- // Handle blocker if detected (clear blocker FIRST, then proceed with main commands)
146
- if (decision.blockerDetected && decision.blockerDetected.clearingCommands && decision.blockerDetected.clearingCommands.length > 0) {
147
- this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
148
- this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
149
-
150
- const blockerResult = await this.executeCommandsSequentially(
151
- decision.blockerDetected.clearingCommands,
152
- page,
153
- memory,
154
- stepNumber,
155
- iteration,
156
- jobId
157
- );
158
-
159
- // Add blocker commands with comment to output
160
- if (blockerResult.executed.length > 0) {
161
- commandsExecuted.push(`// Blocker: ${decision.blockerDetected.description}`);
162
- commandsExecuted.push(...blockerResult.executed);
163
- }
164
-
165
- // If blocker clearing failed, track it
166
- if (!blockerResult.allSucceeded) {
167
- this.logger?.(`[Orchestrator] ❌ Failed to clear blocker - continuing anyway`);
168
- consecutiveFailures++;
169
- iterationHadFailure = true;
170
- }
171
- }
172
-
173
- // Execute main commands (only if no blocker failure)
174
- if (decision.commands && decision.commands.length > 0 && !iterationHadFailure) {
175
- const executeResult = await this.executeCommandsSequentially(
176
- decision.commands,
177
- page,
178
- memory,
179
- stepNumber,
180
- iteration,
181
- jobId
182
- );
183
-
184
- commandsExecuted.push(...executeResult.executed);
185
-
186
- // Track failures
187
- if (!executeResult.allSucceeded) {
188
- this.logger?.(`[Orchestrator] ⚠ Command execution stopped at failure`);
189
- consecutiveFailures++;
190
- iterationHadFailure = true;
191
- } else {
192
- consecutiveFailures = 0; // Reset on success
193
- }
194
- }
195
-
196
- // Handle coordinate-based actions (NEW - fallback when selectors fail)
197
- if (decision.coordinateAction && !iterationHadFailure) {
198
- coordinateAttempts++;
199
-
200
- this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
201
-
202
- try {
203
- // BEFORE screenshot for visual verification
204
- const beforeScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
205
- const beforeDataUrl = `data:image/jpeg;base64,${beforeScreenshot}`;
206
-
207
- // Generate Playwright commands from coordinate action
208
- const coordCommands = await CoordinateConverter.generateCommands(decision.coordinateAction, page);
209
-
210
- this.logger?.(`[Orchestrator] Generated commands:`);
211
- coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
212
-
213
- // Execute coordinate commands
214
- const coordResult = await this.executeCommandsSequentially(
215
- coordCommands,
216
- page,
217
- memory,
218
- stepNumber,
219
- iteration,
220
- jobId
221
- );
222
-
223
- commandsExecuted.push(...coordResult.executed);
224
-
225
- if (!coordResult.allSucceeded) {
226
- this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
227
- consecutiveFailures++;
228
- iterationHadFailure = true;
229
-
230
- // Give up after 2 coordinate attempts
231
- if (coordinateAttempts >= 2) {
232
- this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
233
- return {
234
- success: false,
235
- commands: commandsExecuted,
236
- iterations: iteration,
237
- terminationReason: 'agent_stuck',
238
- memory,
239
- error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
240
- };
241
- }
242
- } else {
243
- this.logger?.(`[Orchestrator] ✅ Coordinate action succeeded (no Playwright error)`);
244
-
245
- // CRITICAL: Verify visually if goal was achieved (coordinates might have clicked wrong place)
246
- // Wait for network idle (smarter than fixed timeout - waits as needed, max 10s)
247
- try {
248
- await page.waitForLoadState('networkidle', { timeout: 10000 });
249
- } catch (e) {
250
- // Network idle timeout - page may still be loading, but proceed with verification
251
- this.logger?.(`[Orchestrator] ⚠️ Network idle timeout after 10s, proceeding with verification`, 'warn');
252
- }
253
-
254
- const afterScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
255
- const afterDataUrl = `data:image/jpeg;base64,${afterScreenshot}`;
256
-
257
- this.logger?.(`[Orchestrator] 📸 Verifying coordinate action visually...`);
258
-
259
- const verificationRequest = {
260
- model: 'gpt-5-mini',
261
- systemPrompt: 'You are a visual verification expert for web automation. Compare before/after screenshots to determine if an action achieved its goal.',
262
- userPrompt: `Goal: ${scenarioSteps[stepNumber - 1]}\n\nA coordinate-based action was just executed. Compare the BEFORE and AFTER screenshots.\n\nDid the action achieve the goal? Respond with JSON:\n{\n "verified": boolean,\n "reasoning": "What changed (or didn't change) between screenshots",\n "visibleChanges": ["List of UI changes observed"],\n "clickedWrongPlace": boolean\n}\n\nBe strict: Only return verified=true if you clearly see the expected change matching the goal.`,
263
- images: [
264
- { label: 'BEFORE', dataUrl: beforeDataUrl },
265
- { label: 'AFTER', dataUrl: afterDataUrl }
266
- ]
267
- };
268
-
269
- const verificationResponse = await this.llmFacade.llmProvider.callLLM(verificationRequest);
270
- const jsonMatch = verificationResponse.answer.match(/\{[\s\S]*\}/);
271
-
272
- if (jsonMatch) {
273
- const verificationResult = JSON.parse(jsonMatch[0]);
274
- const verified = verificationResult.verified === true;
275
- const reasoning = verificationResult.reasoning || 'No reasoning provided';
276
- const clickedWrongPlace = verificationResult.clickedWrongPlace === true;
277
-
278
- this.logger?.(`[Orchestrator] 📊 Visual verification: ${verified ? '✅ VERIFIED' : '❌ NOT VERIFIED'}`);
279
- this.logger?.(`[Orchestrator] 💭 Reasoning: ${reasoning}`);
280
-
281
- if (verified) {
282
- // Goal achieved! Can mark complete
283
- consecutiveFailures = 0;
284
-
285
- // Store note for context
286
- noteToSelf = {
287
- fromIteration: iteration,
288
- content: `Coordinate action verified successful: ${reasoning}`
289
- };
290
- memory.latestNote = noteToSelf;
291
- } else {
292
- // Goal NOT achieved despite no error
293
- this.logger?.(`[Orchestrator] ⚠️ Coordinate click succeeded but goal NOT achieved`, 'warn');
294
- consecutiveFailures++;
295
- iterationHadFailure = true;
296
-
297
- // Store diagnostic note for next attempt
298
- const diagnostic = clickedWrongPlace
299
- ? `Clicked wrong place. ${reasoning}. Try different coordinates.`
300
- : `Action executed but goal not achieved. ${reasoning}. May need different approach.`;
301
-
302
- noteToSelf = {
303
- fromIteration: iteration,
304
- content: diagnostic
305
- };
306
- memory.latestNote = noteToSelf;
307
-
308
- // Give up after 2 coordinate attempts
309
- if (coordinateAttempts >= 2) {
310
- this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts, none achieved goal) - marking stuck`);
311
- return {
312
- success: false,
313
- commands: commandsExecuted,
314
- iterations: iteration,
315
- terminationReason: 'agent_stuck',
316
- memory,
317
- error: `Coordinate actions clicked but didn't achieve goal: ${reasoning}`
318
- };
319
- }
320
- }
321
- } else {
322
- this.logger?.(`[Orchestrator] ⚠️ Could not parse verification response - treating as unverified`, 'warn');
323
- consecutiveFailures++;
324
- iterationHadFailure = true;
325
- }
326
- }
327
-
328
- } catch (error: any) {
329
- this.logger?.(`[Orchestrator] ❌ Coordinate action error: ${error.message}`, 'error');
330
- consecutiveFailures++;
331
- iterationHadFailure = true;
332
-
333
- // Give up after 2 coordinate attempts
334
- if (coordinateAttempts >= 2) {
335
- this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
336
- return {
337
- success: false,
338
- commands: commandsExecuted,
339
- iterations: iteration,
340
- terminationReason: 'agent_stuck',
341
- memory,
342
- error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
343
- };
344
- }
345
- }
346
- }
347
-
348
- // System-enforced stuck detection (agent might not detect it)
349
- // Allow 5 failures: 3 selector attempts + 2 coordinate attempts
350
- if (consecutiveFailures >= 5) {
351
- this.logger?.(`[Orchestrator] 🛑 SYSTEM: ${consecutiveFailures} consecutive failures detected - forcing stuck`, 'warn');
352
- return {
353
- success: false,
354
- commands: commandsExecuted,
355
- iterations: iteration,
356
- terminationReason: 'agent_stuck',
357
- memory,
358
- error: `Failed ${consecutiveFailures} iterations in a row - unable to proceed`
359
- };
360
- }
361
-
362
- // Update memory with experiences
363
- if (decision.experiences && decision.experiences.length > 0) {
364
- for (const exp of decision.experiences) {
365
- // Deduplicate - don't add if very similar experience exists
366
- const exists = memory.experiences.some(existing =>
367
- existing.toLowerCase().includes(exp.toLowerCase()) ||
368
- exp.toLowerCase().includes(existing.toLowerCase())
369
- );
370
-
371
- if (!exists) {
372
- memory.experiences.push(exp);
373
- this.logger?.(`[Orchestrator] 📚 Experience: ${exp}`);
374
- }
375
- }
376
-
377
- // Cap experiences
378
- if (memory.experiences.length > this.config.maxExperiences) {
379
- memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
380
- }
381
- }
382
-
383
- // Store self-reflection for next iteration
384
- previousReflection = decision.selfReflection;
385
-
386
- // Store note to future self (NEW - tactical memory across iterations AND steps)
387
- if (decision.noteToFutureSelf) {
388
- noteToSelf = {
389
- fromIteration: iteration,
390
- content: decision.noteToFutureSelf
391
- };
392
- memory.latestNote = noteToSelf; // Persist in journey memory across steps
393
- this.logger?.(`[Orchestrator] 📝 Note to self: ${decision.noteToFutureSelf}`);
394
- }
395
-
396
- // Check termination
397
- if (decision.status !== 'continue') {
398
- this.logger?.(`[Orchestrator] 🎯 Status: ${decision.status}`);
399
- this.logger?.(`[Orchestrator] 💭 Reason: ${decision.statusReasoning}`);
400
-
401
- // SAFETY CHECK: Don't allow "complete" if commands failed this iteration
402
- if (decision.status === 'complete' && iterationHadFailure) {
403
- this.logger?.(`[Orchestrator] ⚠️ OVERRIDE: Agent said "complete" but commands FAILED - forcing "continue"`, 'warn');
404
- this.logger?.(`[Orchestrator] 💭 System: Commands must succeed before marking complete`);
405
- // Don't return - continue to next iteration
406
- } else {
407
- // Valid termination
408
- return {
409
- success: decision.status === 'complete',
410
- commands: commandsExecuted,
411
- iterations: iteration,
412
- terminationReason: decision.status === 'complete' ? 'complete' :
413
- decision.status === 'stuck' ? 'agent_stuck' :
414
- 'infeasible',
415
- memory
416
- };
417
- }
418
- }
419
- }
420
-
421
- // Hit iteration limit
422
- this.logger?.(`[Orchestrator] ⚠ Maximum iterations reached (${this.config.maxIterationsPerStep})`);
423
-
424
- return {
425
- success: false,
426
- commands: commandsExecuted,
427
- iterations: iteration,
428
- terminationReason: 'system_limit',
429
- memory,
430
- error: 'Maximum iterations reached'
431
- };
432
- }
433
-
434
- /**
435
- * Build context for agent
436
- */
437
- private async buildAgentContext(
438
- page: any,
439
- currentStepGoal: string,
440
- stepNumber: number,
441
- totalSteps: number,
442
- scenarioSteps: string[],
443
- memory: JourneyMemory,
444
- previousReflection?: SelfReflection,
445
- consecutiveFailures?: number,
446
- noteFromPreviousIteration?: NoteToFutureSelf // NEW
447
- ): Promise<AgentContext> {
448
- // Get fresh DOM
449
- const currentPageInfo = await getEnhancedPageInfo(page);
450
- const currentURL = page.url();
451
-
452
- // Get recent steps
453
- const recentSteps = memory.history.slice(-this.config.recentStepsCount);
454
-
455
- // Build context
456
- return {
457
- overallGoal: scenarioSteps.join('\n'),
458
- currentStepGoal,
459
- stepNumber,
460
- totalSteps,
461
- completedSteps: scenarioSteps.slice(0, stepNumber - 1),
462
- remainingSteps: scenarioSteps.slice(stepNumber),
463
- currentPageInfo,
464
- currentURL,
465
- recentSteps,
466
- experiences: memory.experiences,
467
- extractedData: memory.extractedData,
468
- previousIterationGuidance: previousReflection,
469
- noteFromPreviousIteration // NEW: Pass tactical note from previous iteration
470
- };
471
- }
472
-
473
- /**
474
- * Call agent to make decision
475
- */
476
- private async callAgent(
477
- context: AgentContext,
478
- jobId: string,
479
- stepNumber: number,
480
- iteration: number,
481
- consecutiveFailures?: number
482
- ): Promise<AgentDecision> {
483
- // Detect if coordinate mode should be activated
484
- // Phase 1: Only 2 tiers (selectors → coordinates), so activate after 3 failures
485
- // Phase 2: Will have 3 tiers (selectors → index → coordinates), threshold will be 5
486
- const useCoordinateMode = consecutiveFailures !== undefined && consecutiveFailures >= 3;
487
-
488
- // Build appropriate system prompt based on mode
489
- const toolDescriptions = this.toolRegistry.generateToolDescriptions();
490
- const systemPrompt = useCoordinateMode
491
- ? OrchestratorPrompts.buildCoordinateSystemPrompt()
492
- : OrchestratorPrompts.buildSystemPrompt(toolDescriptions);
493
- const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures);
494
-
495
- // Log prompt lengths for monitoring
496
- const systemLength = systemPrompt.length;
497
- const userLength = userPrompt.length;
498
- const totalLength = systemLength + userLength;
499
- const estimatedTokens = Math.ceil(totalLength / 4); // Rough estimate: 4 chars per token
500
-
501
- this.logger?.(`[Orchestrator] 📊 Prompt lengths: system=${systemLength} chars, user=${userLength} chars, total=${totalLength} chars (~${estimatedTokens} tokens)`, 'log');
502
-
503
- try {
504
- // Call LLM directly via provider
505
- const llmRequest = {
506
- model: DEFAULT_MODEL,
507
- systemPrompt,
508
- userPrompt
509
- };
510
-
511
- const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
512
-
513
- // Report token usage
514
- if (response.usage && this.progressReporter?.onTokensUsed) {
515
- const tokenUsage: TokenUsage = {
516
- jobId,
517
- stepNumber,
518
- iteration,
519
- inputTokens: response.usage.inputTokens,
520
- outputTokens: response.usage.outputTokens,
521
- includesImage: false,
522
- model: DEFAULT_MODEL,
523
- timestamp: Date.now()
524
- };
525
- this.logger?.(`[Orchestrator] 💰 Reporting token usage: ${tokenUsage.inputTokens} + ${tokenUsage.outputTokens}`, 'log');
526
- await this.progressReporter.onTokensUsed(tokenUsage);
527
- } else if (!response.usage) {
528
- this.logger?.(`[Orchestrator] ⚠ No usage data in LLM response`, 'warn');
529
- }
530
-
531
- // Parse response
532
- return this.parseAgentDecision(response.answer);
533
-
534
- } catch (error: any) {
535
- this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
536
-
537
- // Return fallback decision
538
- return {
539
- status: 'stuck',
540
- statusReasoning: `Agent call failed: ${error.message}`,
541
- reasoning: 'LLM call failed'
542
- };
543
- }
544
- }
545
-
546
-
547
-
548
- /**
549
- * Parse agent decision from LLM response
550
- */
551
- private parseAgentDecision(response: string): AgentDecision {
552
- try {
553
- // Extract JSON from response
554
- const jsonMatch = response.match(/\{[\s\S]*\}/);
555
- if (!jsonMatch) {
556
- this.logger?.(`[Orchestrator] ✗ No JSON found in LLM response`, 'error');
557
- this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
558
- throw new Error('No JSON found in response');
559
- }
560
-
561
- const parsed = JSON.parse(jsonMatch[0]);
562
-
563
- // Validate required fields
564
- // Accept either "reasoning" or "statusReasoning" (LLMs sometimes only provide one)
565
- if (!parsed.status || (!parsed.reasoning && !parsed.statusReasoning)) {
566
- this.logger?.(`[Orchestrator] ✗ Missing required fields in parsed JSON`, 'error');
567
- this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
568
- this.logger?.(`[Orchestrator] 📄 PARSED JSON:\n${JSON.stringify(parsed, null, 2)}`, 'error');
569
- this.logger?.(`[Orchestrator] ❌ Has status: ${!!parsed.status}, Has reasoning: ${!!parsed.reasoning}, Has statusReasoning: ${!!parsed.statusReasoning}`, 'error');
570
- throw new Error('Missing required fields: status and (reasoning or statusReasoning)');
571
- }
572
-
573
- // Normalize: if reasoning is missing but statusReasoning exists, use statusReasoning as reasoning
574
- if (!parsed.reasoning && parsed.statusReasoning) {
575
- parsed.reasoning = parsed.statusReasoning;
576
- }
577
-
578
- return parsed as AgentDecision;
579
-
580
- } catch (error: any) {
581
- this.logger?.(`[Orchestrator] ✗ Failed to parse agent decision: ${error.message}`, 'error');
582
-
583
- // Only log full response if not already logged above
584
- if (!error.message.includes('Missing required fields') && !error.message.includes('No JSON found')) {
585
- this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
586
- }
587
-
588
- // Return fallback
589
- return {
590
- status: 'stuck',
591
- statusReasoning: 'Failed to parse agent response',
592
- reasoning: `Parse error: ${error.message}`
593
- };
594
- }
595
- }
596
-
597
- /**
598
- * Execute tools
599
- */
600
- private async executeTools(
601
- toolCalls: any[],
602
- page: any,
603
- memory: JourneyMemory,
604
- stepNumber: number
605
- ): Promise<Record<string, any>> {
606
- this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
607
-
608
- const results: Record<string, any> = {};
609
- const toolContext: ToolExecutionContext = {
610
- page,
611
- memory,
612
- stepNumber,
613
- logger: this.logger
614
- };
615
-
616
- for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
617
- this.logger?.(`[Orchestrator] ▶ ${toolCall.name}(${JSON.stringify(toolCall.params).substring(0, 50)}...)`);
618
-
619
- const result = await this.toolRegistry.execute(toolCall, toolContext);
620
- results[toolCall.name] = result;
621
-
622
- if (result.success) {
623
- this.logger?.(`[Orchestrator] ✓ ${toolCall.name} succeeded`);
624
- } else {
625
- this.logger?.(`[Orchestrator] ✗ ${toolCall.name} failed: ${result.error}`, 'error');
626
- }
627
- }
628
-
629
- return results;
630
- }
631
-
632
- /**
633
- * Execute commands sequentially with SHARED context (variables persist across commands)
634
- */
635
- private async executeCommandsSequentially(
636
- commands: string[],
637
- page: any,
638
- memory: JourneyMemory,
639
- stepNumber: number,
640
- iteration: number,
641
- jobId: string
642
- ): Promise<{ executed: string[]; allSucceeded: boolean }> {
643
- this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s) in shared context`);
644
-
645
- const executed: string[] = [];
646
- const limitedCommands = commands.slice(0, this.config.maxCommandsPerIteration);
647
-
648
- // Build execution with shared context (all commands share scope - variables persist)
649
- const commandsWithTracking = limitedCommands.map((cmd, i) => {
650
- return `
651
- // Command ${i + 1}/${limitedCommands.length}
652
- try {
653
- ${cmd}
654
- __results.push({ index: ${i}, success: true });
655
- } catch (error) {
656
- __results.push({ index: ${i}, success: false, error: error.message });
657
- throw error; // Stop on first failure
658
- }`;
659
- }).join('\n');
660
-
661
- const wrappedCode = `
662
- const __results = [];
663
- ${commandsWithTracking}
664
- return __results;
665
- `;
666
-
667
- try {
668
- // Execute in shared context - variables declared here persist for entire scenario
669
- // Pass both page and expect to make Playwright assertions available
670
- const func = new Function('page', 'expect', 'return (async () => { ' + wrappedCode + ' })()');
671
- const results = await func(page, expect);
672
-
673
- // Record results for each command
674
- for (let i = 0; i < limitedCommands.length; i++) {
675
- const cmd = limitedCommands[i];
676
- const result = results[i];
677
-
678
- if (result && result.success) {
679
- this.logger?.(`[Orchestrator] ✓ [${i + 1}/${limitedCommands.length}] Success`);
680
-
681
- // Record in history
682
- memory.history.push({
683
- stepNumber,
684
- iteration,
685
- action: `Command ${i + 1}/${limitedCommands.length}`,
686
- code: cmd,
687
- result: 'success',
688
- observation: 'Executed successfully',
689
- url: page.url(),
690
- timestamp: Date.now()
691
- });
692
-
693
- executed.push(cmd);
694
- }
695
- }
696
-
697
- // Cap history
698
- if (memory.history.length > this.config.maxHistorySize) {
699
- memory.history = memory.history.slice(-this.config.maxHistorySize);
700
- }
701
-
702
- return { executed, allSucceeded: true };
703
-
704
- } catch (error: any) {
705
- // One of the commands failed - find which one
706
- const errorMessage = error.message || String(error);
707
-
708
- // Capture page state for debug logging
709
- let pageStateDebug = '';
710
- if (this.debugMode) {
711
- try {
712
- const pageInfo = await getEnhancedPageInfo(page);
713
- pageStateDebug = `
714
- === DEBUG: PAGE STATE AT FAILURE ===
715
- URL: ${page.url()}
716
- Title: ${pageInfo.title}
717
-
718
- INTERACTIVE ELEMENTS:
719
- ${pageInfo.formattedElements}
720
-
721
- ARIA SNAPSHOT:
722
- ${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
723
-
724
- ====================================`;
725
- } catch (debugError: any) {
726
- pageStateDebug = `Failed to capture page state: ${debugError.message}`;
727
- }
728
- }
729
-
730
- // Record all that succeeded, then the failure
731
- for (let i = 0; i < limitedCommands.length; i++) {
732
- const cmd = limitedCommands[i];
733
-
734
- // This is a failed command (error happened here or earlier)
735
- if (executed.length <= i) {
736
- this.logger?.(`[Orchestrator] ✗ [${i + 1}/${limitedCommands.length}] Failed: ${errorMessage}`, 'error');
737
-
738
- // Log detailed debug info
739
- if (this.debugMode && pageStateDebug) {
740
- this.logger?.(pageStateDebug, 'debug');
741
- }
742
-
743
- memory.history.push({
744
- stepNumber,
745
- iteration,
746
- action: `Command ${i + 1}/${limitedCommands.length} - FAILED`,
747
- code: cmd,
748
- result: 'failure',
749
- observation: `Failed with error: ${errorMessage}. This selector likely doesn't exist or is incorrect.`,
750
- error: errorMessage,
751
- url: page.url(),
752
- timestamp: Date.now()
753
- });
754
-
755
- if (i < limitedCommands.length - 1) {
756
- this.logger?.(`[Orchestrator] ⚠ Skipping remaining ${limitedCommands.length - i - 1} command(s)`, 'warn');
757
- }
758
-
759
- break;
760
- }
761
- }
762
-
763
- return { executed, allSucceeded: false };
764
- }
765
- }
766
-
767
- /**
768
- * Execute a single command
769
- */
770
- private async executeCommand(cmd: string, page: any): Promise<void> {
771
- // Wrap in async function and execute
772
- const wrapped = `(async () => { ${cmd} })()`;
773
-
774
- try {
775
- await eval(wrapped);
776
- } catch (error: any) {
777
- // If eval fails, try direct execution with page context
778
- // Pass both page and expect to make Playwright assertions available
779
- const func = new Function('page', 'expect', `return (async () => { ${cmd} })()`);
780
- await func(page, expect);
781
- }
782
- }
783
-
784
- /**
785
- * Log agent decision
786
- */
787
- private logAgentDecision(decision: AgentDecision, iteration: number): void {
788
- this.logger?.(`[Orchestrator] 💭 REASONING: ${decision.reasoning}`);
789
-
790
- if (decision.selfReflection) {
791
- this.logger?.(`[Orchestrator] 🧠 SELF-REFLECTION:`);
792
- this.logger?.(`[Orchestrator] Next: ${decision.selfReflection.guidanceForNext}`);
793
- if (decision.selfReflection.detectingLoop) {
794
- this.logger?.(`[Orchestrator] 🔄 LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
795
- }
796
- }
797
-
798
- if (decision.toolCalls && decision.toolCalls.length > 0) {
799
- this.logger?.(`[Orchestrator] 🔧 TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
800
- if (decision.toolReasoning) {
801
- this.logger?.(`[Orchestrator] 📋 Why: ${decision.toolReasoning}`);
802
- }
803
- }
804
-
805
- if (decision.blockerDetected) {
806
- this.logger?.(`[Orchestrator] 🚧 BLOCKER: ${decision.blockerDetected.description}`, 'warn');
807
- this.logger?.(`[Orchestrator] 🧹 Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
808
- }
809
-
810
- if (decision.stepReEvaluation?.detected) {
811
- this.logger?.(`[Orchestrator] 🔍 STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
812
- this.logger?.(`[Orchestrator] 📝 Explanation: ${decision.stepReEvaluation.explanation}`);
813
- }
814
-
815
- if (decision.commands && decision.commands.length > 0) {
816
- this.logger?.(`[Orchestrator] 📝 COMMANDS (${decision.commands.length}):`);
817
- decision.commands.slice(0, 3).forEach((cmd, i) => {
818
- this.logger?.(`[Orchestrator] ${i + 1}. ${cmd.substring(0, 80)}...`);
819
- });
820
- if (decision.commands.length > 3) {
821
- this.logger?.(`[Orchestrator] ... and ${decision.commands.length - 3} more`);
822
- }
823
- if (decision.commandReasoning) {
824
- this.logger?.(`[Orchestrator] 💡 Why: ${decision.commandReasoning}`);
825
- }
826
- }
827
-
828
- // Experiences will be logged when added to memory, no need to log here
829
- }
830
-
831
- /**
832
- * Report step progress
833
- */
834
- private async reportStepProgress(
835
- jobId: string,
836
- stepNumber: number,
837
- description: string,
838
- decision: AgentDecision,
839
- iteration: number
840
- ): Promise<void> {
841
- if (!this.progressReporter?.onStepProgress) return;
842
-
843
- await this.progressReporter.onStepProgress({
844
- jobId,
845
- stepNumber,
846
- description,
847
- status: decision.status === 'complete' ? StepExecutionStatus.SUCCESS :
848
- decision.status === 'stuck' || decision.status === 'infeasible' ? StepExecutionStatus.FAILURE :
849
- StepExecutionStatus.IN_PROGRESS,
850
- code: decision.commands?.join('\n'),
851
- // Include agent metadata for transparency
852
- agentIteration: iteration,
853
- agentReasoning: decision.reasoning,
854
- agentSelfReflection: decision.selfReflection,
855
- agentExperiences: decision.experiences,
856
- agentToolsUsed: decision.toolCalls?.map(t => t.name),
857
- agentStatus: decision.status
858
- });
859
- }
860
- }
861
-
862
-