testchimp-runner-core 0.0.34 โ†’ 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +1 -1
  89. package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
  90. package/plandocs/exploratory-mode-support.plan.md +928 -0
  91. package/plandocs/journey-id-tracking-addendum.md +227 -0
  92. package/src/execution-service.ts +179 -596
  93. package/src/index.ts +10 -0
  94. package/src/orchestrator/decision-parser.ts +139 -0
  95. package/src/orchestrator/index.ts +25 -1
  96. package/src/orchestrator/orchestrator-agent.ts +656 -236
  97. package/src/orchestrator/orchestrator-prompts.ts +559 -247
  98. package/src/orchestrator/page-som-handler.ts +1565 -0
  99. package/src/orchestrator/som-types.ts +188 -0
  100. package/src/orchestrator/tool-registry.ts +2 -0
  101. package/src/orchestrator/tools/index.ts +4 -1
  102. package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
  103. package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
  104. package/src/orchestrator/types.ts +49 -6
  105. package/src/scenario-service.ts +20 -0
  106. package/src/scenario-worker-class.ts +24 -3
  107. package/src/utils/page-info-retry.ts +65 -0
  108. package/src/utils/page-info-utils.ts +53 -18
  109. package/testchimp-runner-core-0.0.35.tgz +0 -0
  110. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  111. package/testchimp-runner-core-0.0.33.tgz +0 -0
  112. /package/{RELEASE_0.0.26.md โ†’ releasenotes/RELEASE_0.0.26.md} +0 -0
  113. /package/{RELEASE_0.0.27.md โ†’ releasenotes/RELEASE_0.0.27.md} +0 -0
  114. /package/{RELEASE_0.0.28.md โ†’ releasenotes/RELEASE_0.0.28.md} +0 -0
package/src/index.ts CHANGED
@@ -45,6 +45,7 @@ export {
45
45
  SelfReflection,
46
46
  ToolCall,
47
47
  ToolResult,
48
+ ExplorationMode,
48
49
  DEFAULT_AGENT_CONFIG,
49
50
  // Tools (information-gathering only)
50
51
  TakeScreenshotTool,
@@ -58,6 +59,9 @@ export {
58
59
  export * from './types';
59
60
  export { PageInfo, InteractiveElement } from './utils/page-info-utils';
60
61
 
62
+ // Model constants
63
+ export { DEFAULT_MODEL, DEFAULT_SIMPLER_MODEL, VISION_MODEL } from './model-constants';
64
+
61
65
  // Authentication
62
66
  export * from './auth-config';
63
67
 
@@ -359,6 +363,12 @@ export class TestChimpService {
359
363
  return this.executeScript(repairRequest);
360
364
  }
361
365
 
366
+ // Exploration mode execution
367
+ async executeExploration(page: any, explorationConfig: any, jobId: string): Promise<any> {
368
+ // Delegate to scenario service's orchestrator
369
+ return this.scenarioService.executeExploration(page, explorationConfig, jobId);
370
+ }
371
+
362
372
  // Credit usage reporting methods
363
373
  async reportScriptGenerationCredit(jobId?: string): Promise<void> {
364
374
  return this.creditUsageService.reportScriptGenerationCredit(jobId);
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Decision Parser Utility
3
+ * Parses and validates agent decisions from LLM responses
4
+ */
5
+
6
+ import { AgentDecision } from './types';
7
+ import { isSomVerification, isSomCommand } from './som-types';
8
+
9
+ export class DecisionParser {
10
+ private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
11
+
12
+ constructor(logger?: (message: string, level?: 'log' | 'error' | 'warn') => void) {
13
+ this.logger = logger;
14
+ }
15
+
16
+ /**
17
+ * Parse agent decision from LLM response
18
+ */
19
+ parse(response: string): AgentDecision {
20
+ try {
21
+ // Extract JSON from response
22
+ const jsonMatch = response.match(/\{[\s\S]*\}/);
23
+ if (!jsonMatch) {
24
+ this.logger?.(`[DecisionParser] โœ— No JSON found in LLM response`, 'error');
25
+ this.logger?.(`[DecisionParser] ๐Ÿ“„ FULL LLM RESPONSE:\n${response}`, 'error');
26
+ throw new Error('No JSON found in response');
27
+ }
28
+
29
+ const parsed = JSON.parse(jsonMatch[0]);
30
+
31
+ // Validate required fields
32
+ // Accept either "reasoning" or "statusReasoning" (LLMs sometimes only provide one)
33
+ if (!parsed.status || (!parsed.reasoning && !parsed.statusReasoning)) {
34
+ this.logger?.(`[DecisionParser] โœ— Missing required fields in parsed JSON`, 'error');
35
+ this.logger?.(`[DecisionParser] ๐Ÿ“„ FULL LLM RESPONSE:\n${response}`, 'error');
36
+ this.logger?.(`[DecisionParser] ๐Ÿ“„ PARSED JSON:\n${JSON.stringify(parsed, null, 2)}`, 'error');
37
+ this.logger?.(`[DecisionParser] โŒ Has status: ${!!parsed.status}, Has reasoning: ${!!parsed.reasoning}, Has statusReasoning: ${!!parsed.statusReasoning}`, 'error');
38
+ throw new Error('Missing required fields: status and (reasoning or statusReasoning)');
39
+ }
40
+
41
+ // Normalize: if reasoning is missing but statusReasoning exists, use statusReasoning as reasoning
42
+ if (!parsed.reasoning && parsed.statusReasoning) {
43
+ parsed.reasoning = parsed.statusReasoning;
44
+ }
45
+
46
+ return parsed as AgentDecision;
47
+
48
+ } catch (error: any) {
49
+ this.logger?.(`[DecisionParser] โœ— Failed to parse agent decision: ${error.message}`, 'error');
50
+
51
+ // Only log full response if not already logged above
52
+ if (!error.message.includes('Missing required fields') && !error.message.includes('No JSON found')) {
53
+ this.logger?.(`[DecisionParser] ๐Ÿ“„ FULL LLM RESPONSE:\n${response}`, 'error');
54
+ }
55
+
56
+ // Return fallback
57
+ return {
58
+ status: 'stuck',
59
+ statusReasoning: 'Failed to parse agent response',
60
+ reasoning: `Parse error: ${error.message}`
61
+ };
62
+ }
63
+ }
64
+
65
+ /**
66
+ * Log agent decision for debugging
67
+ */
68
+ log(decision: AgentDecision, iteration: number): void {
69
+ this.logger?.(`[Orchestrator] ๐Ÿ’ญ REASONING: ${decision.reasoning}`);
70
+
71
+ if (decision.selfReflection) {
72
+ this.logger?.(`[Orchestrator] ๐Ÿง  SELF-REFLECTION:`);
73
+ this.logger?.(`[Orchestrator] Next: ${decision.selfReflection.guidanceForNext}`);
74
+ if (decision.selfReflection.detectingLoop) {
75
+ this.logger?.(`[Orchestrator] ๐Ÿ”„ LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
76
+ }
77
+ }
78
+
79
+ if (decision.toolCalls && decision.toolCalls.length > 0) {
80
+ this.logger?.(`[Orchestrator] ๐Ÿ”ง TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
81
+ if (decision.toolReasoning) {
82
+ this.logger?.(`[Orchestrator] ๐Ÿ“‹ Why: ${decision.toolReasoning}`);
83
+ }
84
+ }
85
+
86
+ if (decision.blockerDetected) {
87
+ this.logger?.(`[Orchestrator] ๐Ÿšง BLOCKER: ${decision.blockerDetected.description}`, 'warn');
88
+ this.logger?.(`[Orchestrator] ๐Ÿงน Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
89
+ }
90
+
91
+ if (decision.stepReEvaluation?.detected) {
92
+ this.logger?.(`[Orchestrator] ๐Ÿ” STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
93
+ this.logger?.(`[Orchestrator] ๐Ÿ“ Explanation: ${decision.stepReEvaluation.explanation}`);
94
+ }
95
+
96
+ if (decision.commands && decision.commands.length > 0) {
97
+ this.logger?.(`[Orchestrator] ๐Ÿ“ COMMANDS (${decision.commands.length}):`);
98
+ decision.commands.slice(0, 3).forEach((cmd: any, i) => {
99
+ // Handle string commands (standard mode), SomCommand, and SomVerification objects
100
+ if (typeof cmd === 'string') {
101
+ this.logger?.(`[Orchestrator] ${i + 1}. ${cmd.substring(0, 80)}...`);
102
+ } else if (isSomVerification(cmd)) {
103
+ const ref = cmd.elementRef || cmd.selector || 'unknown';
104
+ this.logger?.(`[Orchestrator] ${i + 1}. [VERIFY] ${cmd.verificationType} on ${ref}: ${cmd.expected}`);
105
+ } else if (isSomCommand(cmd)) {
106
+ this.logger?.(`[Orchestrator] ${i + 1}. [ACTION] ${cmd.action} on ${cmd.elementRef || 'coord'}`);
107
+ } else {
108
+ this.logger?.(`[Orchestrator] ${i + 1}. ${JSON.stringify(cmd).substring(0, 80)}...`);
109
+ }
110
+ });
111
+ if (decision.commands.length > 3) {
112
+ this.logger?.(`[Orchestrator] ... and ${decision.commands.length - 3} more`);
113
+ }
114
+ if (decision.commandReasoning) {
115
+ this.logger?.(`[Orchestrator] ๐Ÿ’ก Why: ${decision.commandReasoning}`);
116
+ }
117
+ }
118
+
119
+ // Log meta-learning suggestions with clear demarcations
120
+ if (decision.debugInfo?.suggestedPromptUpdates) {
121
+ this.logger?.(``, 'warn');
122
+ this.logger?.(`โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•`, 'warn');
123
+ this.logger?.(`[Orchestrator] ๐Ÿง  META-LEARNING SUGGESTION FROM AGENT`, 'warn');
124
+ this.logger?.(`โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•`, 'warn');
125
+ this.logger?.(``, 'warn');
126
+ this.logger?.(`SUGGESTED PROMPT UPDATE:`, 'warn');
127
+ this.logger?.(` ${decision.debugInfo.suggestedPromptUpdates}`, 'warn');
128
+ this.logger?.(``, 'warn');
129
+ if (decision.debugInfo.reasoning) {
130
+ this.logger?.(`REASONING:`, 'warn');
131
+ this.logger?.(` ${decision.debugInfo.reasoning}`, 'warn');
132
+ this.logger?.(``, 'warn');
133
+ }
134
+ this.logger?.(`โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•`, 'warn');
135
+ this.logger?.(``, 'warn');
136
+ }
137
+ }
138
+ }
139
+
@@ -5,12 +5,17 @@
5
5
  // Core orchestrator
6
6
  export { OrchestratorAgent } from './orchestrator-agent';
7
7
 
8
+ // SoM (Set-of-Marks) handler
9
+ export { PageSoMHandler } from './page-som-handler';
10
+
8
11
  // Tool system
9
12
  export { ToolRegistry, Tool, ToolParameter, ToolExecutionContext } from './tool-registry';
10
13
 
11
- // 6 information-gathering tools (state changes via Playwright commands)
14
+ // 8 information-gathering tools (state changes via Playwright commands)
12
15
  export {
13
16
  TakeScreenshotTool,
17
+ ViewPreviousScreenshotTool,
18
+ RefreshSomMarkersTool,
14
19
  RecallHistoryTool,
15
20
  InspectPageTool,
16
21
  CheckPageReadyTool,
@@ -29,6 +34,25 @@ export {
29
34
  SelfReflection,
30
35
  ToolCall,
31
36
  ToolResult,
37
+ ExplorationMode,
32
38
  DEFAULT_AGENT_CONFIG
33
39
  } from './types';
34
40
 
41
+ // SoM Types
42
+ export {
43
+ SomCommand,
44
+ SomElement,
45
+ SemanticCommandResult,
46
+ CommandRunStatus,
47
+ CommandAttempt,
48
+ DomMutation,
49
+ InteractionAction,
50
+ Coordinate,
51
+ TypedSelector,
52
+ SomVerification,
53
+ VerificationType,
54
+ SomCommandOrVerification,
55
+ isSomVerification,
56
+ isSomCommand
57
+ } from './som-types';
58
+