testchimp-runner-core 0.0.40 → 0.0.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/execution-service.d.ts.map +1 -1
  2. package/dist/execution-service.js +1 -3
  3. package/dist/execution-service.js.map +1 -1
  4. package/dist/index.d.ts +7 -6
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +5 -11
  7. package/dist/index.js.map +1 -1
  8. package/dist/orchestrator/decision-parser.d.ts.map +1 -1
  9. package/dist/orchestrator/decision-parser.js +16 -0
  10. package/dist/orchestrator/decision-parser.js.map +1 -1
  11. package/dist/orchestrator/index.d.ts +4 -2
  12. package/dist/orchestrator/index.d.ts.map +1 -1
  13. package/dist/orchestrator/index.js +10 -8
  14. package/dist/orchestrator/index.js.map +1 -1
  15. package/dist/orchestrator/orchestrator-agent.d.ts +10 -4
  16. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.js +376 -118
  18. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  19. package/dist/orchestrator/orchestrator-prompts.d.ts +2 -10
  20. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.js +343 -452
  22. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  23. package/dist/orchestrator/page-loading-utils.d.ts +15 -0
  24. package/dist/orchestrator/page-loading-utils.d.ts.map +1 -0
  25. package/dist/orchestrator/page-loading-utils.js +115 -0
  26. package/dist/orchestrator/page-loading-utils.js.map +1 -0
  27. package/dist/orchestrator/page-som-handler.d.ts +2 -1
  28. package/dist/orchestrator/page-som-handler.d.ts.map +1 -1
  29. package/dist/orchestrator/page-som-handler.js +250 -33
  30. package/dist/orchestrator/page-som-handler.js.map +1 -1
  31. package/dist/orchestrator/site-learnings-utils.d.ts +31 -0
  32. package/dist/orchestrator/site-learnings-utils.d.ts.map +1 -0
  33. package/dist/orchestrator/site-learnings-utils.js +175 -0
  34. package/dist/orchestrator/site-learnings-utils.js.map +1 -0
  35. package/dist/orchestrator/som-types.d.ts +2 -0
  36. package/dist/orchestrator/som-types.d.ts.map +1 -1
  37. package/dist/orchestrator/som-types.js.map +1 -1
  38. package/dist/orchestrator/tools/index.d.ts +9 -8
  39. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  40. package/dist/orchestrator/tools/index.js +10 -15
  41. package/dist/orchestrator/tools/index.js.map +1 -1
  42. package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -1
  43. package/dist/orchestrator/tools/take-screenshot.js +10 -1
  44. package/dist/orchestrator/tools/take-screenshot.js.map +1 -1
  45. package/dist/orchestrator/types.d.ts +54 -9
  46. package/dist/orchestrator/types.d.ts.map +1 -1
  47. package/dist/orchestrator/types.js.map +1 -1
  48. package/dist/progress-reporter.d.ts +23 -2
  49. package/dist/progress-reporter.d.ts.map +1 -1
  50. package/dist/progress-reporter.js.map +1 -1
  51. package/dist/prompts.d.ts.map +1 -1
  52. package/dist/prompts.js +14 -3
  53. package/dist/prompts.js.map +1 -1
  54. package/dist/scenario-service.d.ts +3 -3
  55. package/dist/scenario-service.d.ts.map +1 -1
  56. package/dist/scenario-service.js +6 -5
  57. package/dist/scenario-service.js.map +1 -1
  58. package/dist/scenario-worker-class.d.ts +7 -3
  59. package/dist/scenario-worker-class.d.ts.map +1 -1
  60. package/dist/scenario-worker-class.js +94 -21
  61. package/dist/scenario-worker-class.js.map +1 -1
  62. package/dist/types.d.ts +4 -0
  63. package/dist/types.d.ts.map +1 -1
  64. package/dist/types.js.map +1 -1
  65. package/package.json +1 -1
  66. package/dist/testing/agent-tester.d.ts +0 -35
  67. package/dist/testing/agent-tester.d.ts.map +0 -1
  68. package/dist/testing/agent-tester.js +0 -84
  69. package/dist/testing/agent-tester.js.map +0 -1
  70. package/dist/testing/ref-translator-tester.d.ts +0 -44
  71. package/dist/testing/ref-translator-tester.d.ts.map +0 -1
  72. package/dist/testing/ref-translator-tester.js +0 -104
  73. package/dist/testing/ref-translator-tester.js.map +0 -1
  74. package/dist/utils/hierarchical-selector.d.ts +0 -47
  75. package/dist/utils/hierarchical-selector.d.ts.map +0 -1
  76. package/dist/utils/hierarchical-selector.js +0 -212
  77. package/dist/utils/hierarchical-selector.js.map +0 -1
  78. package/dist/utils/ref-attacher.d.ts +0 -21
  79. package/dist/utils/ref-attacher.d.ts.map +0 -1
  80. package/dist/utils/ref-attacher.js +0 -149
  81. package/dist/utils/ref-attacher.js.map +0 -1
  82. package/dist/utils/ref-translator.d.ts +0 -49
  83. package/dist/utils/ref-translator.d.ts.map +0 -1
  84. package/dist/utils/ref-translator.js +0 -276
  85. package/dist/utils/ref-translator.js.map +0 -1
@@ -9,6 +9,10 @@ const progress_reporter_1 = require("../progress-reporter");
9
9
  const page_info_utils_1 = require("../utils/page-info-utils");
10
10
  const coordinate_converter_1 = require("../utils/coordinate-converter");
11
11
  const model_constants_1 = require("../model-constants");
12
+ // @ts-ignore - package.json exists
13
+ const package_json_1 = require("../../package.json");
14
+ const site_learnings_utils_1 = require("./site-learnings-utils");
15
+ const page_loading_utils_1 = require("./page-loading-utils");
12
16
  const types_1 = require("./types");
13
17
  const orchestrator_prompts_1 = require("./orchestrator-prompts");
14
18
  const page_info_retry_1 = require("../utils/page-info-retry");
@@ -21,6 +25,14 @@ const som_types_1 = require("./som-types");
21
25
  class OrchestratorAgent {
22
26
  constructor(llmFacade, toolRegistry, config, progressReporter, logger, debugMode) {
23
27
  this.debugMode = false;
28
+ // Debug stats tracking
29
+ this.debugStats = {
30
+ tokensUsedIn: 0,
31
+ tokensUsedOut: 0,
32
+ imagesUsed: 0,
33
+ toolsUsed: {},
34
+ promptImproveSuggestions: []
35
+ };
24
36
  this.llmFacade = llmFacade;
25
37
  this.toolRegistry = toolRegistry;
26
38
  this.config = { ...types_1.DEFAULT_AGENT_CONFIG, ...config };
@@ -43,10 +55,19 @@ class OrchestratorAgent {
43
55
  nextSteps, // For repair mode: steps after this one
44
56
  successfulCommandsInStep, // For repair mode: commands that succeeded within THIS step
45
57
  failingCommand, // For repair mode: the specific command that failed
46
- remainingCommandsInStep // For repair mode: commands after the failing one
58
+ remainingCommandsInStep, // For repair mode: commands after the failing one
59
+ existingSiteLearnings // Pre-existing site learnings from previous runs
47
60
  ) {
48
61
  this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
62
+ this.logger?.(`[Orchestrator] 🚀 runner-core v${package_json_1.version}`);
49
63
  this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
64
+ // Site learnings (persistent across journeys) - initialize with existing or empty
65
+ const siteLearnings = existingSiteLearnings || { screens: {}, uxPatterns: {} };
66
+ if (existingSiteLearnings) {
67
+ const screenCount = Object.keys(existingSiteLearnings.screens).length;
68
+ const patternCount = Object.keys(existingSiteLearnings.uxPatterns).length;
69
+ this.logger?.(`[Orchestrator] 📚 Loaded existing learnings: ${screenCount} screens, ${patternCount} UX patterns`);
70
+ }
50
71
  let iteration = 0;
51
72
  let noteToSelf = memory.latestNote; // Start with note from previous step
52
73
  const commandsExecuted = [];
@@ -56,8 +77,8 @@ class OrchestratorAgent {
56
77
  iteration++;
57
78
  this.logger?.(`\n[Orchestrator] === Iteration ${iteration}/${this.config.maxIterationsPerStep} ===`);
58
79
  // Build context for agent
59
- const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, consecutiveFailures, noteToSelf, // Pass note from previous iteration
60
- priorSteps, // Repair context: prior completed steps
80
+ const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, siteLearnings, // Site learnings (persistent)
81
+ consecutiveFailures, priorSteps, // Repair context: prior completed steps
61
82
  nextSteps, // Repair context: next steps
62
83
  successfulCommandsInStep, // Repair context: successful commands in THIS step
63
84
  failingCommand, // Repair context: the failing command
@@ -67,6 +88,30 @@ class OrchestratorAgent {
67
88
  const decision = await this.callAgent(context, jobId, stepNumber, iteration, consecutiveFailures);
68
89
  // Log agent's reasoning
69
90
  this.decisionParser.log(decision, iteration);
91
+ // Handle debug info from agent
92
+ if (decision.debugInfo) {
93
+ // Collect prompt improvement suggestions
94
+ if (decision.debugInfo.suggestedPromptUpdates) {
95
+ this.debugStats.promptImproveSuggestions.push(decision.debugInfo.suggestedPromptUpdates);
96
+ this.logger?.(`[Orchestrator] 💡 Prompt suggestion collected: ${decision.debugInfo.suggestedPromptUpdates.substring(0, 80)}...`, 'log');
97
+ }
98
+ // Process tool usefulness feedback (for tools from PREVIOUS iteration)
99
+ if (decision.debugInfo.toolUsefulnessFeedback) {
100
+ for (const [toolName, rating] of Object.entries(decision.debugInfo.toolUsefulnessFeedback)) {
101
+ if (this.debugStats.toolsUsed[toolName]) {
102
+ const stats = this.debugStats.toolsUsed[toolName];
103
+ // Calculate running average: (oldAvg * oldCount + newRating) / newCount
104
+ const oldTotal = stats.averageUsefulnessScore * stats.numTimesRated;
105
+ stats.numTimesRated++;
106
+ stats.averageUsefulnessScore = (oldTotal + rating) / stats.numTimesRated;
107
+ this.logger?.(`[Orchestrator] ⭐ Tool feedback: ${toolName} rated ${rating}/5 (avg: ${stats.averageUsefulnessScore.toFixed(2)})`, 'log');
108
+ }
109
+ else {
110
+ this.logger?.(`[Orchestrator] ⚠️ Tool feedback for unknown tool: ${toolName}`, 'warn');
111
+ }
112
+ }
113
+ }
114
+ }
70
115
  // Report progress
71
116
  await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
72
117
  // Execute tools if requested (tools are READ-ONLY, they don't change state)
@@ -82,6 +127,7 @@ class OrchestratorAgent {
82
127
  this.logger?.(`[Orchestrator] ⚠️ WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
83
128
  }
84
129
  if (decision.toolCalls && decision.toolCalls.length > 0) {
130
+ this.logger?.(`[Orchestrator] 🔧 Agent using TOOL CALLS: ${decision.toolCalls.map(tc => tc.name).join(', ')}`);
85
131
  // ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
86
132
  if (screenshotsThisStep.length >= 3) {
87
133
  decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
@@ -96,27 +142,28 @@ class OrchestratorAgent {
96
142
  }
97
143
  }
98
144
  if (decision.toolCalls.length > 0) {
99
- toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
100
- }
101
- // If agent wants to wait for tool results before proceeding, call agent again
102
- if (decision.needsToolResults) {
103
- const updatedContext = { ...context, toolResults };
104
- const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
105
- // Merge continued decision
106
- decision.commands = continuedDecision.commands || decision.commands;
107
- decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
108
- decision.status = continuedDecision.status;
109
- decision.statusReasoning = continuedDecision.statusReasoning;
110
- decision.reasoning = continuedDecision.reasoning;
145
+ toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
111
146
  }
112
147
  }
148
+ // If agent wants to wait for tool results before proceeding, call agent again
149
+ if (decision.toolCalls && decision.toolCalls.length > 0 && decision.needsToolResults) {
150
+ const updatedContext = { ...context, toolResults };
151
+ const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
152
+ // Merge continued decision
153
+ decision.commands = continuedDecision.commands || decision.commands;
154
+ decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
155
+ decision.status = continuedDecision.status;
156
+ decision.statusReasoning = continuedDecision.statusReasoning;
157
+ decision.reasoning = continuedDecision.reasoning;
158
+ }
113
159
  // Execute commands sequentially
114
160
  let iterationHadFailure = false;
115
161
  // Handle blocker if detected (clear blocker FIRST, then proceed with main commands)
116
162
  if (decision.blockerDetected && decision.blockerDetected.clearingCommands && decision.blockerDetected.clearingCommands.length > 0) {
117
163
  this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
118
164
  this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
119
- const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, iteration, jobId);
165
+ const urlBeforeBlockerClear = page.url();
166
+ const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, iteration, jobId, urlBeforeBlockerClear, decision.screenState);
120
167
  // Add blocker commands with comment to output
121
168
  if (blockerResult.executed.length > 0) {
122
169
  commandsExecuted.push(`// Blocker: ${decision.blockerDetected.description}`);
@@ -131,7 +178,8 @@ class OrchestratorAgent {
131
178
  }
132
179
  // Execute main commands (only if no blocker failure)
133
180
  if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
134
- const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, iteration, jobId);
181
+ const urlBeforeCommands = page.url();
182
+ const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, iteration, jobId, urlBeforeCommands, decision.screenState);
135
183
  commandsExecuted.push(...executeResult.executed);
136
184
  // Track failures
137
185
  if (!executeResult.allSucceeded) {
@@ -156,7 +204,8 @@ class OrchestratorAgent {
156
204
  this.logger?.(`[Orchestrator] Generated commands:`);
157
205
  coordCommands.forEach(cmd => this.logger?.(` ${cmd}`));
158
206
  // Execute coordinate commands
159
- const coordResult = await this.executeCommands(coordCommands, page, memory, stepNumber, iteration, jobId);
207
+ const urlBeforeCoord = page.url();
208
+ const coordResult = await this.executeCommands(coordCommands, page, memory, stepNumber, iteration, jobId, urlBeforeCoord, decision.screenState);
160
209
  commandsExecuted.push(...coordResult.executed);
161
210
  if (!coordResult.allSucceeded) {
162
211
  this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
@@ -171,6 +220,7 @@ class OrchestratorAgent {
171
220
  iterations: iteration,
172
221
  terminationReason: 'agent_stuck',
173
222
  memory,
223
+ siteLearnings,
174
224
  error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
175
225
  };
176
226
  }
@@ -240,6 +290,7 @@ class OrchestratorAgent {
240
290
  iterations: iteration,
241
291
  terminationReason: 'agent_stuck',
242
292
  memory,
293
+ siteLearnings,
243
294
  error: `Coordinate actions clicked but didn't achieve goal: ${reasoning}`
244
295
  };
245
296
  }
@@ -265,6 +316,7 @@ class OrchestratorAgent {
265
316
  iterations: iteration,
266
317
  terminationReason: 'agent_stuck',
267
318
  memory,
319
+ siteLearnings,
268
320
  error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
269
321
  };
270
322
  }
@@ -280,25 +332,39 @@ class OrchestratorAgent {
280
332
  iterations: iteration,
281
333
  terminationReason: 'agent_stuck',
282
334
  memory,
335
+ siteLearnings,
283
336
  error: `Failed ${consecutiveFailures} iterations in a row - unable to proceed`
284
337
  };
285
338
  }
286
- // Update memory with experiences
287
- if (decision.experiences && decision.experiences.length > 0) {
288
- for (const exp of decision.experiences) {
289
- // Deduplicate - don't add if very similar experience exists
290
- const exists = memory.experiences.some(existing => existing.toLowerCase().includes(exp.toLowerCase()) ||
291
- exp.toLowerCase().includes(existing.toLowerCase()));
292
- if (!exists) {
293
- memory.experiences.push(exp);
294
- this.logger?.(`[Orchestrator] 📚 Experience: ${exp}`);
339
+ // Auto-track visited screen (even without explicit learnings)
340
+ // Filter out transient screens and loading states
341
+ if (decision.screenState) {
342
+ const { screen, state } = decision.screenState;
343
+ // Skip about:blank and loading states (transient, not worth persisting)
344
+ const isTransientScreen = screen === 'about:blank' ||
345
+ screen.toLowerCase().includes('blank');
346
+ const isLoadingState = state.toLowerCase().includes('loading') ||
347
+ state.toLowerCase().includes('spinner') ||
348
+ state.toLowerCase().includes('initializing');
349
+ if (!isTransientScreen && !isLoadingState) {
350
+ if (!siteLearnings.screens[screen]) {
351
+ siteLearnings.screens[screen] = { states: {} };
352
+ this.logger?.(`[📍 Auto-tracked] Screen: ${screen}`);
353
+ }
354
+ if (!siteLearnings.screens[screen].states[state]) {
355
+ siteLearnings.screens[screen].states[state] = { observations: {} };
356
+ this.logger?.(`[📍 Auto-tracked] State: ${screen}[${state}]`);
295
357
  }
296
358
  }
297
- // Cap experiences
298
- if (memory.experiences.length > this.config.maxExperiences) {
299
- memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
359
+ else {
360
+ this.logger?.(`[⏭️ Skipped] Transient screen/state: ${screen}[${state}]`);
300
361
  }
301
362
  }
363
+ // Update site learnings
364
+ if (decision.siteLearningsUpdate) {
365
+ this.logger?.(`[🔍 DEBUG] siteLearningsUpdate from LLM:\n${JSON.stringify(decision.siteLearningsUpdate, null, 2)}`);
366
+ (0, site_learnings_utils_1.mergeSiteLearnings)(siteLearnings, decision.siteLearningsUpdate, this.logger);
367
+ }
302
368
  // Store note to future self (tactical memory across iterations AND steps)
303
369
  if (decision.noteToFutureSelf) {
304
370
  noteToSelf = {
@@ -327,7 +393,8 @@ class OrchestratorAgent {
327
393
  terminationReason: decision.status === 'complete' ? 'complete' :
328
394
  decision.status === 'stuck' ? 'agent_stuck' :
329
395
  'infeasible',
330
- memory
396
+ memory,
397
+ siteLearnings
331
398
  };
332
399
  }
333
400
  }
@@ -340,21 +407,58 @@ class OrchestratorAgent {
340
407
  iterations: iteration,
341
408
  terminationReason: 'system_limit',
342
409
  memory,
410
+ siteLearnings,
343
411
  error: 'Maximum iterations reached'
344
412
  };
345
413
  }
346
414
  /**
347
415
  * Build context for agent
348
416
  */
349
- async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory, consecutiveFailures, noteFromPreviousIteration, priorSteps, // For repair mode: prior completed steps
417
+ async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory, siteLearnings, // Site learnings (persistent across journeys)
418
+ consecutiveFailures, priorSteps, // For repair mode: prior completed steps
350
419
  nextSteps, // For repair mode: next steps
351
420
  successfulCommandsInStep, // For repair mode: successful commands in THIS step
352
421
  failingCommand, // For repair mode: the failing command
353
422
  remainingCommandsInStep // For repair mode: remaining commands in THIS step
354
423
  ) {
355
- // Get fresh DOM
424
+ // Get fresh DOM (for title only, not displayed in prompts - SoM mode uses visual markers)
356
425
  const currentPageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(page);
357
426
  const currentURL = page.url();
427
+ // Get page dimensions for scroll vs screenshot decisions
428
+ // IMPORTANT: Wait for page to stabilize and retry until dimensions stop changing (fixes lazy-loaded/dynamic content)
429
+ try {
430
+ await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
431
+ }
432
+ catch (e) {
433
+ // Already loaded, continue
434
+ }
435
+ // Retry approach: Measure scrollHeight multiple times until it stabilizes
436
+ // This handles React/Vue/Angular apps that expand the DOM after initial render
437
+ // Check MULTIPLE sources and use the maximum (handles edge cases like overflow:hidden)
438
+ const measureHeight = `Math.max(
439
+ document.documentElement.scrollHeight || 0,
440
+ document.body.scrollHeight || 0,
441
+ document.documentElement.offsetHeight || 0,
442
+ document.body.offsetHeight || 0
443
+ )`;
444
+ let pageHeight = await page.evaluate(measureHeight).catch(() => 0);
445
+ let previousHeight = 0;
446
+ let attempts = 0;
447
+ while (pageHeight !== previousHeight && attempts < 5) {
448
+ previousHeight = pageHeight;
449
+ await page.waitForTimeout(200); // Wait for potential expansion
450
+ pageHeight = await page.evaluate(measureHeight).catch(() => 0);
451
+ attempts++;
452
+ }
453
+ const viewport = page.viewportSize();
454
+ // @ts-expect-error - document is available in browser context during page.evaluate()
455
+ const pageWidth = await page.evaluate(() => document.documentElement.scrollWidth).catch(() => 0);
456
+ // @ts-expect-error - window is available in browser context during page.evaluate()
457
+ const scrollX = await page.evaluate(() => window.scrollX || window.pageXOffset).catch(() => 0);
458
+ // @ts-expect-error - window is available in browser context during page.evaluate()
459
+ const scrollY = await page.evaluate(() => window.scrollY || window.pageYOffset).catch(() => 0);
460
+ const pageDimensions = { width: pageWidth, height: pageHeight };
461
+ this.logger?.(`[Orchestrator] Page dimensions: ${pageWidth}x${pageHeight}px (viewport: ${viewport?.width}x${viewport?.height}px) - stabilized after ${attempts} checks`, 'log');
358
462
  // Get recent steps
359
463
  const recentSteps = memory.history.slice(-this.config.recentStepsCount);
360
464
  // SoM integration: Update markers and capture screenshot with visual IDs
@@ -375,10 +479,14 @@ class OrchestratorAgent {
375
479
  catch (error) {
376
480
  // Page already loaded or timeout - continue
377
481
  }
378
- // Update SoM markers
379
- await this.somHandler.updateSom();
380
- // Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
381
- somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
482
+ // Update SoM markers - include offscreen elements for full-page screenshots
483
+ await this.somHandler.updateSom(true);
484
+ // TEMPORARY: Always use full-page screenshot for debugging
485
+ // TODO: Re-enable heuristic once we verify full-page works correctly
486
+ const useFullPageSom = true;
487
+ this.logger?.(`[Orchestrator] SoM screenshot strategy: FULL PAGE (ALWAYS) - page: ${pageWidth}x${pageHeight}px, viewport: ${viewport?.width}x${viewport?.height}px`, 'log');
488
+ // Get screenshot WITH markers
489
+ somScreenshot = await this.somHandler.getScreenshot(true, useFullPageSom, 60);
382
490
  // Get element map for disambiguation
383
491
  somElementMap = this.somHandler.getSomElementMap();
384
492
  this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
@@ -395,12 +503,16 @@ class OrchestratorAgent {
395
503
  totalSteps,
396
504
  completedSteps: scenarioSteps.slice(0, stepNumber - 1),
397
505
  remainingSteps: scenarioSteps.slice(stepNumber),
398
- currentPageInfo,
399
506
  currentURL,
400
- recentSteps,
401
- experiences: memory.experiences,
402
- extractedData: memory.extractedData,
403
- noteFromPreviousIteration, // Tactical note from previous iteration
507
+ currentPageTitle: currentPageInfo.title,
508
+ viewportWidth: viewport?.width,
509
+ viewportHeight: viewport?.height,
510
+ pageWidth: pageDimensions.width,
511
+ pageHeight: pageDimensions.height,
512
+ scrollX,
513
+ scrollY,
514
+ journeyMemory: memory, // Journey-specific memory (includes history, extractedData, latestNote)
515
+ siteLearnings, // Site-level learnings (persistent across journeys)
404
516
  somScreenshot, // SoM screenshot with visual markers (current)
405
517
  somElementMap, // SoM element details for disambiguation
406
518
  priorSteps, // Repair: prior completed steps
@@ -419,26 +531,10 @@ class OrchestratorAgent {
419
531
  * Call agent to make decision
420
532
  */
421
533
  async callAgent(context, jobId, stepNumber, iteration, consecutiveFailures) {
422
- // Detect if coordinate mode should be activated
423
- // Phase 1: Only 2 tiers (selectors → coordinates), so activate after 3 failures
424
- // Phase 2: Will have 3 tiers (selectors → index → coordinates), threshold will be 5
425
- const useCoordinateMode = consecutiveFailures !== undefined && consecutiveFailures >= 3;
426
- // Build appropriate system prompt based on mode
534
+ // Build SoM system prompt (standard mode)
427
535
  const toolDescriptions = this.toolRegistry.generateToolDescriptions();
428
- let systemPrompt;
429
- if (this.config.useSoM) {
430
- // SoM mode: Use visual element identification
431
- systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
432
- }
433
- else if (useCoordinateMode) {
434
- // Coordinate mode: Fallback when selectors fail
435
- systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildCoordinateSystemPrompt();
436
- }
437
- else {
438
- // Standard mode: DOM-based selectors
439
- systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
440
- }
441
- const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
536
+ const systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates, toolDescriptions);
537
+ const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures);
442
538
  // Log prompt lengths for monitoring
443
539
  const systemLength = systemPrompt.length;
444
540
  const userLength = userPrompt.length;
@@ -458,22 +554,32 @@ class OrchestratorAgent {
458
554
  this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
459
555
  }
460
556
  const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
461
- // Report token usage
462
- if (response.usage && this.progressReporter?.onTokensUsed) {
463
- const tokenUsage = {
464
- jobId,
465
- stepNumber,
466
- iteration,
467
- inputTokens: response.usage.inputTokens,
468
- outputTokens: response.usage.outputTokens,
469
- includesImage: false,
470
- model: model_constants_1.DEFAULT_MODEL,
471
- timestamp: Date.now()
472
- };
473
- this.logger?.(`[Orchestrator] 💰 Reporting token usage: ${tokenUsage.inputTokens} + ${tokenUsage.outputTokens}`, 'log');
474
- await this.progressReporter.onTokensUsed(tokenUsage);
557
+ // Track token usage and images
558
+ const includesImage = !!context.somScreenshot;
559
+ if (response.usage) {
560
+ // Accumulate in debug stats
561
+ this.debugStats.tokensUsedIn += response.usage.inputTokens;
562
+ this.debugStats.tokensUsedOut += response.usage.outputTokens;
563
+ if (includesImage) {
564
+ this.debugStats.imagesUsed++;
565
+ }
566
+ // Report to progress reporter
567
+ if (this.progressReporter?.onTokensUsed) {
568
+ const tokenUsage = {
569
+ jobId,
570
+ stepNumber,
571
+ iteration,
572
+ inputTokens: response.usage.inputTokens,
573
+ outputTokens: response.usage.outputTokens,
574
+ includesImage,
575
+ model: model_constants_1.DEFAULT_MODEL,
576
+ timestamp: Date.now()
577
+ };
578
+ this.logger?.(`[Orchestrator] 💰 Reporting token usage: ${tokenUsage.inputTokens} + ${tokenUsage.outputTokens}`, 'log');
579
+ await this.progressReporter.onTokensUsed(tokenUsage);
580
+ }
475
581
  }
476
- else if (!response.usage) {
582
+ else {
477
583
  this.logger?.(`[Orchestrator] ⚠ No usage data in LLM response`, 'warn');
478
584
  }
479
585
  // Parse response
@@ -492,7 +598,7 @@ class OrchestratorAgent {
492
598
  /**
493
599
  * Execute tools
494
600
  */
495
- async executeTools(toolCalls, page, memory, stepNumber, refMap) {
601
+ async executeTools(toolCalls, page, memory, stepNumber) {
496
602
  this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
497
603
  const results = {};
498
604
  const toolContext = {
@@ -500,12 +606,23 @@ class OrchestratorAgent {
500
606
  memory,
501
607
  stepNumber,
502
608
  logger: this.logger,
503
- refMap, // Pass refMap for interact_with_ref tool
504
609
  previousSomScreenshot: this.previousSomScreenshot, // For view_previous_screenshot tool
505
610
  somHandler: this.somHandler // For refresh_som_markers tool
506
611
  };
507
612
  for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
508
- this.logger?.(`[Orchestrator] ▶ ${toolCall.name}(${JSON.stringify(toolCall.params).substring(0, 50)}...)`);
613
+ // Log full parameters for debugging (especially for take_screenshot to see if isFullPage is set)
614
+ this.logger?.(`[Orchestrator] ▶ ${toolCall.name}(${JSON.stringify(toolCall.params)})`);
615
+ // Track tool usage in debug stats
616
+ if (!this.debugStats.toolsUsed[toolCall.name]) {
617
+ this.debugStats.toolsUsed[toolCall.name] = {
618
+ count: 0,
619
+ averageUsefulnessScore: 0,
620
+ numTimesRated: 0
621
+ };
622
+ this.logger?.(`[DebugStats] 📊 Tracking new tool: ${toolCall.name}`);
623
+ }
624
+ this.debugStats.toolsUsed[toolCall.name].count++;
625
+ this.logger?.(`[DebugStats] 📊 Tool '${toolCall.name}' used (count: ${this.debugStats.toolsUsed[toolCall.name].count})`);
509
626
  const result = await this.toolRegistry.execute(toolCall, toolContext);
510
627
  results[toolCall.name] = result;
511
628
  if (result.success) {
@@ -549,7 +666,9 @@ class OrchestratorAgent {
549
666
  /**
550
667
  * Execute commands (mix of ref and playwright commands)
551
668
  */
552
- async executeCommands(commands, page, memory, stepNumber, iteration, jobId) {
669
+ async executeCommands(commands, page, memory, stepNumber, iteration, jobId, urlBeforeAction, // URL before commands execute
670
+ screenState // Screen state for memory
671
+ ) {
553
672
  this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
554
673
  const executed = [];
555
674
  if (commands.length === 0) {
@@ -560,6 +679,11 @@ class OrchestratorAgent {
560
679
  this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
561
680
  for (let i = 0; i < commands.length; i++) {
562
681
  const cmd = commands[i];
682
+ // Skip if plain string (should not happen in SoM mode, but handle gracefully)
683
+ if (typeof cmd === 'string') {
684
+ this.logger?.(`[Orchestrator] ⚠️ Skipping plain string command in SoM mode: "${cmd}"`, 'warn');
685
+ continue;
686
+ }
563
687
  // Check if verification or action command
564
688
  if ((0, som_types_1.isSomVerification)(cmd)) {
565
689
  // Handle verification command
@@ -580,6 +704,8 @@ class OrchestratorAgent {
580
704
  result: 'success',
581
705
  observation: `Verified: ${cmd.description || cmd.expected}`,
582
706
  url: page.url(),
707
+ previousUrl: urlBeforeAction,
708
+ screenState,
583
709
  timestamp: Date.now()
584
710
  });
585
711
  }
@@ -594,6 +720,8 @@ class OrchestratorAgent {
594
720
  observation: `Failed: ${result.error}`,
595
721
  error: result.error,
596
722
  url: page.url(),
723
+ previousUrl: urlBeforeAction,
724
+ screenState,
597
725
  timestamp: Date.now()
598
726
  });
599
727
  // Continue anyway - verification failures are non-blocking for script generation
@@ -624,6 +752,8 @@ class OrchestratorAgent {
624
752
  result: 'success',
625
753
  observation: 'Executed successfully',
626
754
  url: page.url(),
755
+ previousUrl: urlBeforeAction,
756
+ screenState,
627
757
  timestamp: Date.now()
628
758
  });
629
759
  // Small delay for form validation/animations
@@ -644,12 +774,14 @@ class OrchestratorAgent {
644
774
  observation: `Failed: ${result.error}`,
645
775
  error: result.error,
646
776
  url: page.url(),
777
+ previousUrl: urlBeforeAction,
778
+ screenState,
647
779
  timestamp: Date.now()
648
780
  });
649
781
  // Refresh SoM after batch (DOM may have changed)
650
782
  if (this.somHandler && page) {
651
783
  this.somHandler.setPage(page);
652
- await this.somHandler.updateSom();
784
+ await this.somHandler.updateSom(true);
653
785
  }
654
786
  return { executed, allSucceeded: false };
655
787
  }
@@ -665,12 +797,14 @@ class OrchestratorAgent {
665
797
  observation: `Exception: ${error.message}`,
666
798
  error: error.message,
667
799
  url: page.url(),
800
+ previousUrl: urlBeforeAction,
801
+ screenState,
668
802
  timestamp: Date.now()
669
803
  });
670
804
  // Refresh SoM after batch (DOM may have changed)
671
805
  if (this.somHandler && page) {
672
806
  this.somHandler.setPage(page);
673
- await this.somHandler.updateSom();
807
+ await this.somHandler.updateSom(true);
674
808
  }
675
809
  return { executed, allSucceeded: false };
676
810
  }
@@ -679,19 +813,33 @@ class OrchestratorAgent {
679
813
  this.logger?.(`[Orchestrator] ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
680
814
  }
681
815
  }
816
+ // Action-aware stabilization: Detect if commands likely triggered navigation
817
+ const isNavigationAction = (0, page_loading_utils_1.detectNavigationAction)(commands, executed);
682
818
  // Always wait for page to stabilize after command batch
683
819
  // This handles both explicit navigation AND clicks that trigger navigation/SPA routes
684
820
  try {
685
- this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
686
- // Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
687
- await page.waitForLoadState('networkidle', { timeout: 3000 });
688
- this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
821
+ if (isNavigationAction) {
822
+ this.logger?.(`[Orchestrator] Detected navigation action - using extended wait...`, 'log');
823
+ // Extended wait for form submissions and navigation clicks
824
+ await page.waitForLoadState('networkidle', { timeout: 15000 }); // Longer timeout for slow SPAs
825
+ await page.waitForTimeout(1000); // Initial buffer for SPA rendering
826
+ this.logger?.(`[Orchestrator] Page stabilized after navigation (networkidle + 1s buffer)`, 'log');
827
+ // Smart loading detection: Check if page still shows loading indicators
828
+ await (0, page_loading_utils_1.waitForLoadingToComplete)(page, this.logger);
829
+ }
830
+ else {
831
+ this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
832
+ // Use networkidle with short timeout for standard interactions
833
+ await page.waitForLoadState('networkidle', { timeout: 3000 });
834
+ this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
835
+ }
689
836
  }
690
837
  catch (error) {
691
- // If networkidle times out, fall back to domcontentloaded
838
+ // If networkidle times out, fall back to domcontentloaded + buffer
692
839
  try {
693
840
  await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
694
- this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
841
+ await page.waitForTimeout(1000);
842
+ this.logger?.(`[Orchestrator] Page loaded (domcontentloaded + buffer)`, 'log');
695
843
  }
696
844
  catch (error2) {
697
845
  this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
@@ -700,7 +848,7 @@ class OrchestratorAgent {
700
848
  // Refresh SoM after batch (DOM may have changed and page is now stable)
701
849
  if (this.somHandler && page) {
702
850
  this.somHandler.setPage(page);
703
- await this.somHandler.updateSom();
851
+ await this.somHandler.updateSom(true);
704
852
  }
705
853
  return { executed, allSucceeded: true };
706
854
  }
@@ -733,6 +881,8 @@ try {
733
881
  result: 'success',
734
882
  observation: 'Executed successfully',
735
883
  url: page.url(),
884
+ previousUrl: urlBeforeAction,
885
+ screenState,
736
886
  timestamp: Date.now()
737
887
  });
738
888
  executed.push(cmd);
@@ -755,6 +905,8 @@ try {
755
905
  observation: `Failed: ${errorMessage}`,
756
906
  error: errorMessage,
757
907
  url: page.url(),
908
+ previousUrl: urlBeforeAction,
909
+ screenState,
758
910
  timestamp: Date.now()
759
911
  });
760
912
  return { executed, allSucceeded: false };
@@ -766,6 +918,17 @@ try {
766
918
  async reportStepProgress(jobId, stepNumber, description, decision, iteration) {
767
919
  if (!this.progressReporter?.onStepProgress)
768
920
  return;
921
+ // Convert commands to strings (handle both string[] and SomCommand[])
922
+ const commandStrings = decision.commands?.map(cmd => {
923
+ if (typeof cmd === 'string') {
924
+ return cmd;
925
+ }
926
+ else if (cmd && typeof cmd === 'object') {
927
+ // SomCommand object - convert to readable string
928
+ return JSON.stringify(cmd);
929
+ }
930
+ return String(cmd);
931
+ }) || [];
769
932
  await this.progressReporter.onStepProgress({
770
933
  jobId,
771
934
  stepNumber,
@@ -773,12 +936,11 @@ try {
773
936
  status: decision.status === 'complete' ? progress_reporter_1.StepExecutionStatus.SUCCESS :
774
937
  decision.status === 'stuck' || decision.status === 'infeasible' ? progress_reporter_1.StepExecutionStatus.FAILURE :
775
938
  progress_reporter_1.StepExecutionStatus.IN_PROGRESS,
776
- code: decision.commands?.join('\n'),
939
+ code: commandStrings.join('\n'),
777
940
  // Include agent metadata for transparency
778
941
  agentIteration: iteration,
779
942
  agentReasoning: decision.reasoning,
780
943
  agentSelfReflection: decision.selfReflection,
781
- agentExperiences: decision.experiences,
782
944
  agentToolsUsed: decision.toolCalls?.map(t => t.name),
783
945
  agentStatus: decision.status
784
946
  });
@@ -787,17 +949,25 @@ try {
787
949
  * Execute exploration mode - agent autonomously explores to achieve journey goal
788
950
  * Fires onStepProgress callbacks for each autonomous action (transparent to caller)
789
951
  */
790
- async executeExploration(page, explorationConfig, jobId) {
952
+ async executeExploration(page, explorationConfig, jobId, existingSiteLearnings) {
791
953
  this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
954
+ this.logger?.(`[Orchestrator] 🚀 runner-core v${package_json_1.version}`);
792
955
  this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
793
956
  if (explorationConfig.testDataPrompt) {
794
957
  this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
795
958
  }
959
+ // Journey memory (temporal)
796
960
  const memory = {
797
961
  history: [],
798
- experiences: [],
799
962
  extractedData: {}
800
963
  };
964
+ // Site learnings (persistent across journeys) - initialize with existing or empty
965
+ const siteLearnings = existingSiteLearnings || { screens: {}, uxPatterns: {} };
966
+ if (existingSiteLearnings) {
967
+ const screenCount = Object.keys(existingSiteLearnings.screens).length;
968
+ const patternCount = Object.keys(existingSiteLearnings.uxPatterns).length;
969
+ this.logger?.(`[Orchestrator] 📚 Loaded existing learnings: ${screenCount} screens, ${patternCount} UX patterns`);
970
+ }
801
971
  const maxSteps = explorationConfig.maxExplorationSteps || 50;
802
972
  let stepNumber = 0;
803
973
  const commandsExecuted = [];
@@ -805,7 +975,7 @@ try {
805
975
  stepNumber++;
806
976
  this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
807
977
  // Build exploratory context
808
- const context = await this.buildExploratoryContext(page, explorationConfig.explorationPrompt, explorationConfig.testDataPrompt, memory, stepNumber, maxSteps);
978
+ const context = await this.buildExploratoryContext(page, explorationConfig.explorationPrompt, explorationConfig.testDataPrompt, memory, siteLearnings, stepNumber, maxSteps);
809
979
  // Call agent with exploratory prompt
810
980
  const decision = await this.callExploratoryAgent(context, jobId, stepNumber);
811
981
  this.decisionParser.log(decision, stepNumber);
@@ -837,37 +1007,72 @@ try {
837
1007
  // Handle blocker clearing
838
1008
  if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
839
1009
  this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
840
- const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, 1, jobId);
1010
+ const urlBeforeBlocker = page.url();
1011
+ const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, 1, jobId, urlBeforeBlocker, decision.screenState);
841
1012
  commandsExecuted.push(...blockerResult.executed);
842
1013
  }
843
1014
  // Execute exploration commands
844
1015
  let commandsSucceeded = true;
845
1016
  if (decision.commands && decision.commands.length > 0) {
846
- const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, 1, jobId);
1017
+ const urlBeforeExploration = page.url();
1018
+ const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, 1, jobId, urlBeforeExploration, decision.screenState);
847
1019
  commandsExecuted.push(...executeResult.executed);
848
1020
  commandsSucceeded = executeResult.allSucceeded;
849
1021
  }
850
1022
  // Report step completion (fires JourneyRunner's onStepComplete callback)
851
1023
  if (this.progressReporter?.onStepProgress) {
1024
+ // Convert commands to strings (handle both string[] and SomCommand[])
1025
+ const commandStrings = decision.commands?.map(cmd => {
1026
+ if (typeof cmd === 'string') {
1027
+ return cmd;
1028
+ }
1029
+ else if (cmd && typeof cmd === 'object') {
1030
+ // SomCommand object - convert to readable string
1031
+ return JSON.stringify(cmd);
1032
+ }
1033
+ return String(cmd);
1034
+ }) || [];
852
1035
  const stepInfo = {
853
1036
  jobId,
854
1037
  stepNumber,
855
1038
  stepId: `exploration-${stepNumber}-${Date.now()}`,
856
1039
  description: decision.reasoning,
857
- code: decision.commands?.join('\n') || '',
1040
+ code: commandStrings.join('\n'),
858
1041
  status: commandsSucceeded ? progress_reporter_1.StepExecutionStatus.SUCCESS : progress_reporter_1.StepExecutionStatus.FAILURE,
859
1042
  error: commandsSucceeded ? undefined : 'Command execution failed',
860
1043
  wasRepaired: false
861
1044
  };
862
1045
  await this.progressReporter.onStepProgress(stepInfo);
863
1046
  }
864
- // Add experiences (both app patterns AND exploration progress)
865
- if (decision.experiences) {
866
- memory.experiences.push(...decision.experiences);
867
- if (memory.experiences.length > this.config.maxExperiences) {
868
- memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
1047
+ // Auto-track visited screen (even without explicit learnings)
1048
+ // Filter out transient screens and loading states
1049
+ if (decision.screenState) {
1050
+ const { screen, state } = decision.screenState;
1051
+ // Skip about:blank and loading states (transient, not worth persisting)
1052
+ const isTransientScreen = screen === 'about:blank' ||
1053
+ screen.toLowerCase().includes('blank');
1054
+ const isLoadingState = state.toLowerCase().includes('loading') ||
1055
+ state.toLowerCase().includes('spinner') ||
1056
+ state.toLowerCase().includes('initializing');
1057
+ if (!isTransientScreen && !isLoadingState) {
1058
+ if (!siteLearnings.screens[screen]) {
1059
+ siteLearnings.screens[screen] = { states: {} };
1060
+ this.logger?.(`[📍 Auto-tracked] Screen: ${screen}`);
1061
+ }
1062
+ if (!siteLearnings.screens[screen].states[state]) {
1063
+ siteLearnings.screens[screen].states[state] = { observations: {} };
1064
+ this.logger?.(`[📍 Auto-tracked] State: ${screen}[${state}]`);
1065
+ }
1066
+ }
1067
+ else {
1068
+ this.logger?.(`[⏭️ Skipped] Transient screen/state: ${screen}[${state}]`);
869
1069
  }
870
1070
  }
1071
+ // Update site learnings
1072
+ if (decision.siteLearningsUpdate) {
1073
+ this.logger?.(`[🔍 DEBUG] siteLearningsUpdate from LLM:\n${JSON.stringify(decision.siteLearningsUpdate, null, 2)}`);
1074
+ (0, site_learnings_utils_1.mergeSiteLearnings)(siteLearnings, decision.siteLearningsUpdate, this.logger);
1075
+ }
871
1076
  // Store note for next iteration
872
1077
  if (decision.noteToFutureSelf) {
873
1078
  memory.latestNote = {
@@ -883,7 +1088,8 @@ try {
883
1088
  commands: commandsExecuted,
884
1089
  iterations: stepNumber,
885
1090
  terminationReason: 'complete',
886
- memory
1091
+ memory,
1092
+ siteLearnings
887
1093
  };
888
1094
  }
889
1095
  else if (decision.status === 'stuck') {
@@ -894,6 +1100,7 @@ try {
894
1100
  iterations: stepNumber,
895
1101
  terminationReason: 'agent_stuck',
896
1102
  memory,
1103
+ siteLearnings,
897
1104
  error: decision.statusReasoning
898
1105
  };
899
1106
  }
@@ -905,14 +1112,47 @@ try {
905
1112
  commands: commandsExecuted,
906
1113
  iterations: stepNumber,
907
1114
  terminationReason: 'system_limit',
908
- memory
1115
+ memory,
1116
+ siteLearnings
909
1117
  };
910
1118
  }
911
- async buildExploratoryContext(page, explorationPrompt, testDataPrompt, memory, stepNumber, maxSteps) {
1119
+ async buildExploratoryContext(page, explorationPrompt, testDataPrompt, memory, siteLearnings, stepNumber, maxSteps) {
912
1120
  // Wait for page to be ready and elements to appear (especially important after navigation)
913
1121
  const currentPageInfo = await page_info_retry_1.PageInfoRetry.getWithRetry(page);
914
1122
  const currentURL = page.url();
915
- const recentSteps = memory.history.slice(-this.config.recentStepsCount);
1123
+ // Get page dimensions for scroll vs screenshot decisions
1124
+ // IMPORTANT: Wait for page to stabilize with retry (fixes lazy-loaded/dynamic content)
1125
+ try {
1126
+ await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
1127
+ }
1128
+ catch (e) {
1129
+ // Already loaded, continue
1130
+ }
1131
+ // Retry approach: Measure scrollHeight multiple times until it stabilizes
1132
+ // Check MULTIPLE sources and use the maximum (handles edge cases like overflow:hidden)
1133
+ const measureHeight = `Math.max(
1134
+ document.documentElement.scrollHeight || 0,
1135
+ document.body.scrollHeight || 0,
1136
+ document.documentElement.offsetHeight || 0,
1137
+ document.body.offsetHeight || 0
1138
+ )`;
1139
+ let pageHeight = await page.evaluate(measureHeight).catch(() => 0);
1140
+ let previousHeight = 0;
1141
+ let attempts = 0;
1142
+ while (pageHeight !== previousHeight && attempts < 5) {
1143
+ previousHeight = pageHeight;
1144
+ await page.waitForTimeout(200); // Wait for potential expansion
1145
+ pageHeight = await page.evaluate(measureHeight).catch(() => 0);
1146
+ attempts++;
1147
+ }
1148
+ const viewport = page.viewportSize();
1149
+ // @ts-expect-error - document is available in browser context during page.evaluate()
1150
+ const pageWidth = await page.evaluate(() => document.documentElement.scrollWidth).catch(() => 0);
1151
+ // @ts-expect-error - window is available in browser context during page.evaluate()
1152
+ const scrollX = await page.evaluate(() => window.scrollX || window.pageXOffset).catch(() => 0);
1153
+ // @ts-expect-error - window is available in browser context during page.evaluate()
1154
+ const scrollY = await page.evaluate(() => window.scrollY || window.pageYOffset).catch(() => 0);
1155
+ this.logger?.(`[Orchestrator] Exploration page dimensions: ${pageWidth}x${pageHeight}px (viewport: ${viewport?.width}x${viewport?.height}px) - stabilized after ${attempts} checks`, 'log');
916
1156
  // SoM integration for exploratory mode
917
1157
  let somScreenshot = undefined;
918
1158
  let somElementMap = undefined;
@@ -926,9 +1166,11 @@ try {
926
1166
  catch (error) {
927
1167
  // Page already loaded or timeout - continue
928
1168
  }
929
- // Update SoM markers
930
- await this.somHandler.updateSom();
931
- somScreenshot = await this.somHandler.getScreenshot(true, false, 60); // Viewport only - agent can scroll or request full page
1169
+ // Update SoM markers after coordinate action
1170
+ await this.somHandler.updateSom(true);
1171
+ // TEMPORARY: Always use full-page screenshot for debugging
1172
+ const useFullPageSom = true;
1173
+ somScreenshot = await this.somHandler.getScreenshot(true, useFullPageSom, 60);
932
1174
  // Get element map for disambiguation
933
1175
  somElementMap = this.somHandler.getSomElementMap();
934
1176
  this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
@@ -944,12 +1186,16 @@ try {
944
1186
  totalSteps: maxSteps,
945
1187
  completedSteps: [],
946
1188
  remainingSteps: [],
947
- currentPageInfo,
948
1189
  currentURL,
949
- recentSteps,
950
- experiences: memory.experiences,
951
- extractedData: memory.extractedData,
952
- noteFromPreviousIteration: memory.latestNote,
1190
+ currentPageTitle: currentPageInfo.title,
1191
+ viewportWidth: viewport?.width,
1192
+ viewportHeight: viewport?.height,
1193
+ pageWidth,
1194
+ pageHeight,
1195
+ scrollX,
1196
+ scrollY,
1197
+ journeyMemory: memory, // Journey-specific memory
1198
+ siteLearnings, // Site-level learnings
953
1199
  testDataPrompt, // CRITICAL: Store testDataPrompt in context
954
1200
  somScreenshot, // SoM screenshot for exploratory mode (current)
955
1201
  somElementMap // SoM element details for disambiguation
@@ -962,10 +1208,8 @@ try {
962
1208
  }
963
1209
  async callExploratoryAgent(context, jobId, stepNumber) {
964
1210
  const toolDescriptions = this.toolRegistry.generateToolDescriptions();
965
- // Use SoM system prompt if in SoM mode, otherwise use standard exploratory prompt
966
- const systemPrompt = this.config.useSoM
967
- ? orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
968
- : orchestrator_prompts_1.OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
1211
+ // Use SoM system prompt (standard mode)
1212
+ const systemPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates, toolDescriptions);
969
1213
  const userPrompt = orchestrator_prompts_1.OrchestratorPrompts.buildExploratoryUserPrompt(context, context.overallGoal, context.testDataPrompt, // Pass testDataPrompt from context
970
1214
  stepNumber, context.totalSteps);
971
1215
  const llmRequest = {
@@ -996,6 +1240,20 @@ try {
996
1240
  const decision = this.decisionParser.parse(response.answer);
997
1241
  return decision;
998
1242
  }
1243
+ /**
1244
+ * Get accumulated debug statistics
1245
+ */
1246
+ getDebugStats() {
1247
+ const stats = { ...this.debugStats };
1248
+ // Log summary of collected debug stats
1249
+ this.logger?.(`\n========== DEBUG STATS SUMMARY ==========`);
1250
+ this.logger?.(`Tokens In: ${stats.tokensUsedIn}, Tokens Out: ${stats.tokensUsedOut}`);
1251
+ this.logger?.(`Images Used: ${stats.imagesUsed}`);
1252
+ this.logger?.(`Tools Used: ${Object.keys(stats.toolsUsed).length > 0 ? JSON.stringify(stats.toolsUsed, null, 2) : 'NONE'}`);
1253
+ this.logger?.(`Prompt Suggestions: ${stats.promptImproveSuggestions.length}`);
1254
+ this.logger?.(`=========================================\n`);
1255
+ return stats;
1256
+ }
999
1257
  }
1000
1258
  exports.OrchestratorAgent = OrchestratorAgent;
1001
1259
  //# sourceMappingURL=orchestrator-agent.js.map