testchimp-runner-core 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
  2. package/dist/credit-usage-service.d.ts +9 -0
  3. package/dist/credit-usage-service.d.ts.map +1 -1
  4. package/dist/credit-usage-service.js +20 -5
  5. package/dist/credit-usage-service.js.map +1 -1
  6. package/dist/execution-service.d.ts +7 -2
  7. package/dist/execution-service.d.ts.map +1 -1
  8. package/dist/execution-service.js +91 -36
  9. package/dist/execution-service.js.map +1 -1
  10. package/dist/index.d.ts +30 -2
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +91 -26
  13. package/dist/index.js.map +1 -1
  14. package/dist/llm-facade.d.ts +64 -8
  15. package/dist/llm-facade.d.ts.map +1 -1
  16. package/dist/llm-facade.js +361 -109
  17. package/dist/llm-facade.js.map +1 -1
  18. package/dist/llm-provider.d.ts +39 -0
  19. package/dist/llm-provider.d.ts.map +1 -0
  20. package/dist/llm-provider.js +7 -0
  21. package/dist/llm-provider.js.map +1 -0
  22. package/dist/model-constants.d.ts +21 -0
  23. package/dist/model-constants.d.ts.map +1 -0
  24. package/dist/model-constants.js +24 -0
  25. package/dist/model-constants.js.map +1 -0
  26. package/dist/orchestrator/index.d.ts +8 -0
  27. package/dist/orchestrator/index.d.ts.map +1 -0
  28. package/dist/orchestrator/index.js +23 -0
  29. package/dist/orchestrator/index.js.map +1 -0
  30. package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
  31. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
  32. package/dist/orchestrator/orchestrator-agent.js +855 -0
  33. package/dist/orchestrator/orchestrator-agent.js.map +1 -0
  34. package/dist/orchestrator/tool-registry.d.ts +74 -0
  35. package/dist/orchestrator/tool-registry.d.ts.map +1 -0
  36. package/dist/orchestrator/tool-registry.js +131 -0
  37. package/dist/orchestrator/tool-registry.js.map +1 -0
  38. package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
  39. package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
  40. package/dist/orchestrator/tools/check-page-ready.js +72 -0
  41. package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
  42. package/dist/orchestrator/tools/extract-data.d.ts +13 -0
  43. package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
  44. package/dist/orchestrator/tools/extract-data.js +84 -0
  45. package/dist/orchestrator/tools/extract-data.js.map +1 -0
  46. package/dist/orchestrator/tools/index.d.ts +10 -0
  47. package/dist/orchestrator/tools/index.d.ts.map +1 -0
  48. package/dist/orchestrator/tools/index.js +18 -0
  49. package/dist/orchestrator/tools/index.js.map +1 -0
  50. package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
  51. package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
  52. package/dist/orchestrator/tools/inspect-page.js +39 -0
  53. package/dist/orchestrator/tools/inspect-page.js.map +1 -0
  54. package/dist/orchestrator/tools/recall-history.d.ts +13 -0
  55. package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
  56. package/dist/orchestrator/tools/recall-history.js +64 -0
  57. package/dist/orchestrator/tools/recall-history.js.map +1 -0
  58. package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
  59. package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
  60. package/dist/orchestrator/tools/take-screenshot.js +112 -0
  61. package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
  62. package/dist/orchestrator/types.d.ts +133 -0
  63. package/dist/orchestrator/types.d.ts.map +1 -0
  64. package/dist/orchestrator/types.js +28 -0
  65. package/dist/orchestrator/types.js.map +1 -0
  66. package/dist/playwright-mcp-service.d.ts +9 -0
  67. package/dist/playwright-mcp-service.d.ts.map +1 -1
  68. package/dist/playwright-mcp-service.js +20 -5
  69. package/dist/playwright-mcp-service.js.map +1 -1
  70. package/dist/progress-reporter.d.ts +97 -0
  71. package/dist/progress-reporter.d.ts.map +1 -0
  72. package/dist/progress-reporter.js +18 -0
  73. package/dist/progress-reporter.js.map +1 -0
  74. package/dist/prompts.d.ts +24 -0
  75. package/dist/prompts.d.ts.map +1 -1
  76. package/dist/prompts.js +593 -68
  77. package/dist/prompts.js.map +1 -1
  78. package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
  79. package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
  80. package/dist/providers/backend-proxy-llm-provider.js +76 -0
  81. package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
  82. package/dist/providers/local-llm-provider.d.ts +21 -0
  83. package/dist/providers/local-llm-provider.d.ts.map +1 -0
  84. package/dist/providers/local-llm-provider.js +35 -0
  85. package/dist/providers/local-llm-provider.js.map +1 -0
  86. package/dist/scenario-service.d.ts +27 -1
  87. package/dist/scenario-service.d.ts.map +1 -1
  88. package/dist/scenario-service.js +48 -12
  89. package/dist/scenario-service.js.map +1 -1
  90. package/dist/scenario-worker-class.d.ts +39 -2
  91. package/dist/scenario-worker-class.d.ts.map +1 -1
  92. package/dist/scenario-worker-class.js +614 -86
  93. package/dist/scenario-worker-class.js.map +1 -1
  94. package/dist/script-utils.d.ts +2 -0
  95. package/dist/script-utils.d.ts.map +1 -1
  96. package/dist/script-utils.js +44 -4
  97. package/dist/script-utils.js.map +1 -1
  98. package/dist/types.d.ts +11 -0
  99. package/dist/types.d.ts.map +1 -1
  100. package/dist/types.js.map +1 -1
  101. package/dist/utils/browser-utils.d.ts +20 -1
  102. package/dist/utils/browser-utils.d.ts.map +1 -1
  103. package/dist/utils/browser-utils.js +102 -51
  104. package/dist/utils/browser-utils.js.map +1 -1
  105. package/dist/utils/page-info-utils.d.ts +23 -4
  106. package/dist/utils/page-info-utils.d.ts.map +1 -1
  107. package/dist/utils/page-info-utils.js +174 -43
  108. package/dist/utils/page-info-utils.js.map +1 -1
  109. package/package.json +1 -2
  110. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
  111. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
  112. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
  113. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
  114. package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
  115. package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
  116. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
  117. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
  118. package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
  119. package/src/credit-usage-service.ts +23 -5
  120. package/src/execution-service.ts +152 -42
  121. package/src/index.ts +169 -26
  122. package/src/llm-facade.ts +500 -126
  123. package/src/llm-provider.ts +43 -0
  124. package/src/model-constants.ts +23 -0
  125. package/src/orchestrator/index.ts +33 -0
  126. package/src/orchestrator/orchestrator-agent.ts +1037 -0
  127. package/src/orchestrator/tool-registry.ts +182 -0
  128. package/src/orchestrator/tools/check-page-ready.ts +75 -0
  129. package/src/orchestrator/tools/extract-data.ts +92 -0
  130. package/src/orchestrator/tools/index.ts +11 -0
  131. package/src/orchestrator/tools/inspect-page.ts +42 -0
  132. package/src/orchestrator/tools/recall-history.ts +72 -0
  133. package/src/orchestrator/tools/take-screenshot.ts +128 -0
  134. package/src/orchestrator/types.ts +200 -0
  135. package/src/playwright-mcp-service.ts +23 -5
  136. package/src/progress-reporter.ts +109 -0
  137. package/src/prompts.ts +606 -69
  138. package/src/providers/backend-proxy-llm-provider.ts +91 -0
  139. package/src/providers/local-llm-provider.ts +38 -0
  140. package/src/scenario-service.ts +83 -13
  141. package/src/scenario-worker-class.ts +740 -72
  142. package/src/script-utils.ts +50 -5
  143. package/src/types.ts +13 -1
  144. package/src/utils/browser-utils.ts +123 -51
  145. package/src/utils/page-info-utils.ts +210 -53
  146. package/testchimp-runner-core-0.0.22.tgz +0 -0
@@ -0,0 +1,200 @@
1
+ /**
2
+ * Orchestrator Agent Types
3
+ * Core types for the tool-using, memory-maintaining orchestrator agent
4
+ */
5
+
6
+ /**
7
+ * Self-reflection guidance for next iteration
8
+ */
9
+ export interface SelfReflection {
10
+ guidanceForNext: string; // Free-form text: "Try data-testid selectors instead of text"
11
+ detectingLoop: boolean; // Agent signals if it's repeating same approach
12
+ loopReasoning?: string; // "Tried text-based selectors 3 times, switching to IDs"
13
+ }
14
+
15
+ /**
16
+ * Journey memory - tracks the agent's journey through the scenario
17
+ */
18
+ export interface JourneyMemory {
19
+ // Step-by-step history (TEXT only, no screenshots)
20
+ history: MemoryStep[];
21
+
22
+ // Learnings accumulated throughout journey
23
+ experiences: string[];
24
+
25
+ // Data extracted and saved for later steps
26
+ extractedData: Record<string, string>;
27
+ }
28
+
29
+ /**
30
+ * Individual step in journey history
31
+ */
32
+ export interface MemoryStep {
33
+ stepNumber: number;
34
+ iteration?: number;
35
+ action: string; // Human-readable: "Filled login form"
36
+ code: string; // Executed code: "await page.fill('#email', ...)"
37
+ result: 'success' | 'failure';
38
+ observation: string; // What was observed: "Form submitted, redirected to dashboard"
39
+ error?: string; // Error details if failed
40
+ url: string; // URL after action
41
+ timestamp: number;
42
+ }
43
+
44
+ /**
45
+ * Tool call request from agent
46
+ */
47
+ export interface ToolCall {
48
+ name: string;
49
+ params: Record<string, any>;
50
+ }
51
+
52
+ /**
53
+ * Tool execution result
54
+ */
55
+ export interface ToolResult {
56
+ success: boolean;
57
+ data?: any;
58
+ error?: string;
59
+ learning?: string; // For exploratory actions: text-based learning extracted from screenshot
60
+ }
61
+
62
+ /**
63
+ * Agent decision output
64
+ */
65
+ export interface AgentDecision {
66
+ // Tool requests (agent can call multiple tools)
67
+ toolCalls?: ToolCall[];
68
+ toolReasoning?: string;
69
+ needsToolResults?: boolean; // Wait for tool results before proceeding with commands
70
+
71
+ // Command batch (executed sequentially)
72
+ commands?: string[];
73
+ commandReasoning?: string;
74
+
75
+ // Self-reflection for next iteration
76
+ selfReflection?: SelfReflection;
77
+
78
+ // Memory updates
79
+ memoryUpdate?: {
80
+ action: string;
81
+ observation: string;
82
+ extractedData?: Record<string, any>;
83
+ };
84
+
85
+ // Learnings to add to experiences
86
+ experiences?: string[];
87
+
88
+ // Termination decision
89
+ status: 'complete' | 'stuck' | 'infeasible' | 'continue';
90
+ statusReasoning: string;
91
+ reasoning: string; // Overall reasoning for this iteration
92
+
93
+ // Blocker detection (e.g., unexpected modals, tours, cookie consents)
94
+ blockerDetected?: {
95
+ description: string; // What's blocking (e.g., "Cookie consent modal")
96
+ clearingCommands: string[]; // Commands to dismiss/clear it
97
+ };
98
+
99
+ // Step re-evaluation (last resort after repeated failures)
100
+ stepReEvaluation?: {
101
+ detected: boolean; // True if agent is questioning current step
102
+ issue: 'prior_incomplete' | 'already_done' | 'wrong_order' | null;
103
+ explanation: string; // Why agent thinks step order is off
104
+ };
105
+ }
106
+
107
+ /**
108
+ * Context provided to agent each iteration
109
+ */
110
+ export interface AgentContext {
111
+ // Goals & progress
112
+ overallGoal: string;
113
+ currentStepGoal: string;
114
+ stepNumber: number;
115
+ totalSteps: number;
116
+ completedSteps: string[];
117
+ remainingSteps: string[];
118
+
119
+ // Current state (fresh)
120
+ currentPageInfo: any; // PageInfo from getEnhancedPageInfo
121
+ currentURL: string;
122
+
123
+ // Recent memory (6-7 steps)
124
+ recentSteps: MemoryStep[];
125
+
126
+ // Learnings & data
127
+ experiences: string[];
128
+ extractedData: Record<string, string>;
129
+
130
+ // Self-reflection from previous iteration
131
+ previousIterationGuidance?: SelfReflection;
132
+
133
+ // Tool results from this iteration (if any)
134
+ toolResults?: Record<string, ToolResult>;
135
+ }
136
+
137
+ /**
138
+ * Configurable guardrails
139
+ */
140
+ export interface AgentConfig {
141
+ // Per-step limits
142
+ maxIterationsPerStep?: number; // Default: 8
143
+ maxToolCallsPerIteration?: number; // Default: 5
144
+ maxCommandsPerIteration?: number; // Default: 5
145
+ maxExploratoryActionsPerIteration?: number; // Default: 3 (Phase 2)
146
+ maxExploratoryActionsPerStep?: number; // Default: 10 (Phase 2)
147
+
148
+ // Scenario-wide limits
149
+ maxConsecutiveStepFailures?: number; // Default: 3 (increased from 2 to allow for UI changes)
150
+ maxTotalIterations?: number; // Default: 50 (across all steps)
151
+ continueOnStepFailure?: boolean; // Default: true (try subsequent steps even if commands fail)
152
+ // NOTE: ALWAYS stops on agent_stuck/infeasible (explicit agent decision)
153
+
154
+ // Memory limits
155
+ maxExperiences?: number; // Default: 20
156
+ maxHistorySize?: number; // Default: 100
157
+ recentStepsCount?: number; // Default: 7
158
+
159
+ // Timeouts
160
+ commandTimeout?: number; // Default: 30000 (30s)
161
+ explorationTimeout?: number; // Default: 2000 (2s) (Phase 2)
162
+
163
+ // Allowed actions
164
+ allowedExplorationActions?: string[]; // Default: ['hover', 'click_info', 'click_menu', 'focus'] (Phase 2)
165
+ allowedDomains?: string[]; // For navigate_to_url validation
166
+ }
167
+
168
+ /**
169
+ * Step execution result from orchestrator
170
+ */
171
+ export interface OrchestratorStepResult {
172
+ success: boolean;
173
+ commands: string[];
174
+ iterations: number;
175
+ terminationReason?: 'complete' | 'stuck' | 'infeasible' | 'system_limit' | 'agent_stuck';
176
+ memory: JourneyMemory;
177
+ error?: string;
178
+ }
179
+
180
+ /**
181
+ * Default config values
182
+ */
183
+ export const DEFAULT_AGENT_CONFIG: Required<AgentConfig> = {
184
+ maxIterationsPerStep: 8,
185
+ maxToolCallsPerIteration: 5,
186
+ maxCommandsPerIteration: 5,
187
+ maxExploratoryActionsPerIteration: 3,
188
+ maxExploratoryActionsPerStep: 10,
189
+ maxConsecutiveStepFailures: 3, // Increased from 2
190
+ maxTotalIterations: 50,
191
+ continueOnStepFailure: true, // Try subsequent steps even if one fails
192
+ maxExperiences: 20,
193
+ maxHistorySize: 100,
194
+ recentStepsCount: 7,
195
+ commandTimeout: 30000,
196
+ explorationTimeout: 2000,
197
+ allowedExplorationActions: ['hover', 'click_info', 'click_menu', 'focus'],
198
+ allowedDomains: []
199
+ };
200
+
@@ -6,21 +6,39 @@ import { initializeBrowser } from './utils/browser-utils';
6
6
  */
7
7
  export class PlaywrightMCPService {
8
8
  private isConnected = false;
9
+ private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
9
10
 
10
11
  constructor() {
11
12
  // No initialization needed for direct Playwright execution
12
13
  }
13
14
 
15
+ /**
16
+ * Set a logger callback for capturing execution logs
17
+ */
18
+ setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
19
+ this.logger = logger;
20
+ }
21
+
22
+ /**
23
+ * Log a message using the configured logger
24
+ */
25
+ private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
26
+ if (this.logger) {
27
+ this.logger(message, level);
28
+ }
29
+ // No console fallback - logs are routed to consumer
30
+ }
31
+
14
32
  /**
15
33
  * Initialize the service
16
34
  */
17
35
  async initialize(): Promise<void> {
18
36
  try {
19
- console.log('Initializing Playwright service...');
37
+ this.log('Initializing Playwright service...');
20
38
 
21
39
  // No specific initialization needed for direct Playwright execution
22
40
  this.isConnected = true;
23
- console.log('Playwright service initialized successfully');
41
+ this.log('Playwright service initialized successfully');
24
42
  } catch (error) {
25
43
  throw new Error(`Failed to initialize Playwright service: ${error}`);
26
44
  }
@@ -109,7 +127,7 @@ export class PlaywrightMCPService {
109
127
 
110
128
  try {
111
129
  // Use the centralized browser initialization utility
112
- const browserInstance = await initializeBrowser(config);
130
+ const browserInstance = await initializeBrowser(config, undefined, undefined, this.logger);
113
131
  browser = browserInstance.browser;
114
132
  context = browserInstance.context;
115
133
  page = browserInstance.page;
@@ -191,9 +209,9 @@ export class PlaywrightMCPService {
191
209
  try {
192
210
  // No cleanup needed for direct Playwright execution
193
211
  this.isConnected = false;
194
- console.log('Playwright service closed');
212
+ this.log('Playwright service closed');
195
213
  } catch (error) {
196
- console.error('Error during shutdown:', error);
214
+ this.log(`Error during shutdown: ${error}`, 'error');
197
215
  }
198
216
  }
199
217
 
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Progress Reporter Interface
3
+ * Allows consumers to track execution progress (logs, DB writes, etc.)
4
+ */
5
+
6
+ /**
7
+ * Execution status for a step
8
+ * Matches scriptservice StepExecutionStatus enum
9
+ */
10
+ export enum StepExecutionStatus {
11
+ SUCCESS = 'SUCCESS_STEP_EXECUTION',
12
+ FAILURE = 'FAILURE_STEP_EXECUTION',
13
+ IN_PROGRESS = 'IN_PROGRESS_STEP_EXECUTION'
14
+ }
15
+
16
+ /**
17
+ * Step progress report - camelCase for TypeScript
18
+ * Structure matches scriptservice's SmartTestExecutionStep / ScriptGenStep
19
+ */
20
+ export interface StepProgress {
21
+ jobId: string; // Job ID for DB keying
22
+ stepId?: string; // Unique step identifier
23
+ stepNumber: number; // Step index (1-based)
24
+ description: string; // Step description
25
+ code?: string; // Playwright command executed
26
+ screenshotDataUrl?: string; // Screenshot as data URL (data:image/png;base64,...)
27
+ status: StepExecutionStatus; // Execution status
28
+ error?: string; // Error message if failed
29
+ wasRepaired?: boolean; // For repair mode
30
+ subActionCount?: number; // Internal tracking
31
+ attempt?: number; // Internal tracking
32
+
33
+ // Agent transparency (for orchestrator mode)
34
+ agentIteration?: number; // Which iteration of the agent loop
35
+ agentReasoning?: string; // Agent's reasoning for this iteration
36
+ agentSelfReflection?: any; // SelfReflection guidance for next iteration
37
+ agentExperiences?: string[]; // Learnings from this iteration
38
+ agentToolsUsed?: string[]; // Tools the agent used
39
+ agentStatus?: string; // Agent's status decision
40
+ }
41
+
42
+ /**
43
+ * Job progress report - camelCase for TypeScript
44
+ */
45
+ export interface JobProgress {
46
+ jobId: string; // Job identifier
47
+ status: 'started' | 'in_progress' | 'completed' | 'failed';
48
+ currentStep?: number; // Current step being executed
49
+ totalSteps?: number; // Total steps in scenario
50
+ testName?: string; // Test name
51
+ script?: string; // Generated/repaired script
52
+ error?: string; // Error message if failed
53
+ }
54
+
55
+ /**
56
+ * Token usage tracking
57
+ */
58
+ export interface TokenUsage {
59
+ jobId: string;
60
+ stepNumber?: number;
61
+ iteration?: number;
62
+ inputTokens: number;
63
+ outputTokens: number;
64
+ includesImage: boolean;
65
+ model: string;
66
+ timestamp: number;
67
+ }
68
+
69
+ /**
70
+ * Progress reporter interface for external consumers
71
+ */
72
+ export interface ProgressReporter {
73
+ /**
74
+ * Called when a step starts, updates, or completes
75
+ * - VS Extension/GitHub: Log to console
76
+ * - Script Service: Write to DB, upload screenshot to GCS
77
+ */
78
+ onStepProgress?(progress: StepProgress): Promise<void>;
79
+
80
+ /**
81
+ * Called when overall job status changes
82
+ * - VS Extension/GitHub: Log status
83
+ * - Script Service: Update job in DB
84
+ */
85
+ onJobProgress?(progress: JobProgress): Promise<void>;
86
+
87
+ /**
88
+ * Called when script is generated/updated
89
+ */
90
+ onScriptGenerated?(jobId: string, script: string, testName: string): Promise<void>;
91
+
92
+ /**
93
+ * Called when script is repaired/updated
94
+ */
95
+ onScriptRepaired?(jobId: string, originalScript: string, repairedScript: string, confidence: number): Promise<void>;
96
+
97
+ /**
98
+ * Called when tokens are used (for cost tracking)
99
+ * - VS Extension/GitHub: Log token usage
100
+ * - Script Service: Store in DB for analytics
101
+ */
102
+ onTokensUsed?(usage: TokenUsage): Promise<void>;
103
+
104
+ /**
105
+ * Generic logging (for environments that don't need structured progress)
106
+ */
107
+ log?(message: string, level?: 'log' | 'error' | 'warn'): void;
108
+ }
109
+