testchimp-runner-core 0.0.25 → 0.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CREDIT_CALLBACK_ARCHITECTURE.md +253 -0
  2. package/INTEGRATION_COMPLETE.md +322 -0
  3. package/RELEASE_0.0.26.md +165 -0
  4. package/RELEASE_0.0.27.md +236 -0
  5. package/RELEASE_0.0.28.md +286 -0
  6. package/dist/credit-usage-service.d.ts +28 -2
  7. package/dist/credit-usage-service.d.ts.map +1 -1
  8. package/dist/credit-usage-service.js +60 -24
  9. package/dist/credit-usage-service.js.map +1 -1
  10. package/dist/env-loader.d.ts +0 -5
  11. package/dist/env-loader.d.ts.map +1 -1
  12. package/dist/env-loader.js +0 -21
  13. package/dist/env-loader.js.map +1 -1
  14. package/dist/execution-service.d.ts.map +1 -1
  15. package/dist/execution-service.js +134 -10
  16. package/dist/execution-service.js.map +1 -1
  17. package/dist/index.d.ts +14 -6
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +28 -7
  20. package/dist/index.js.map +1 -1
  21. package/dist/progress-reporter.d.ts +30 -0
  22. package/dist/progress-reporter.d.ts.map +1 -1
  23. package/dist/prompts.js +4 -4
  24. package/dist/scenario-service.d.ts +1 -1
  25. package/dist/scenario-service.d.ts.map +1 -1
  26. package/dist/scenario-service.js +7 -4
  27. package/dist/scenario-service.js.map +1 -1
  28. package/dist/scenario-worker-class.d.ts +2 -10
  29. package/dist/scenario-worker-class.d.ts.map +1 -1
  30. package/dist/scenario-worker-class.js +88 -26
  31. package/dist/scenario-worker-class.js.map +1 -1
  32. package/dist/types.d.ts +9 -0
  33. package/dist/types.d.ts.map +1 -1
  34. package/dist/types.js.map +1 -1
  35. package/package.json +1 -1
  36. package/src/credit-usage-service.ts +81 -26
  37. package/src/env-loader.ts +0 -22
  38. package/src/execution-service.ts +158 -11
  39. package/src/index.ts +54 -10
  40. package/src/progress-reporter.ts +35 -0
  41. package/src/prompts.ts +4 -4
  42. package/src/scenario-service.ts +16 -4
  43. package/src/scenario-worker-class.ts +102 -28
  44. package/src/types.ts +16 -0
  45. package/testchimp-runner-core-0.0.27.tgz +0 -0
  46. package/RELEASE_0.0.23.md +0 -120
  47. package/RELEASE_0.0.24.md +0 -161
@@ -235,14 +235,78 @@ export class ExecutionService {
235
235
  throw new Error('Script content is required for execution. The TestChimpService should read the file and provide script content.');
236
236
  }
237
237
 
238
+ // Check if we should use existing browser or create new one
239
+ const useExistingBrowser = !!(request.existingBrowser && request.existingContext && request.existingPage);
240
+
241
+ if (useExistingBrowser) {
242
+ this.log('Using existing browser/page provided by caller');
243
+ // Single attempt with existing browser
244
+ const browser = request.existingBrowser;
245
+ const context = request.existingContext;
246
+ const page = request.existingPage;
247
+
248
+ try {
249
+ // LIFECYCLE: Call beforeStartTest if provided
250
+ if (this.progressReporter?.beforeStartTest) {
251
+ await this.progressReporter.beforeStartTest(page, browser, context);
252
+ }
253
+
254
+ // Execute the script as-is
255
+ await this.executeScriptContent(request.script, page);
256
+
257
+ // LIFECYCLE: Call afterEndTest on success
258
+ if (this.progressReporter?.afterEndTest) {
259
+ await this.progressReporter.afterEndTest('passed', undefined, page);
260
+ }
261
+
262
+ // Don't close browser - caller owns it
263
+
264
+ return {
265
+ run_status: 'success',
266
+ num_deflake_runs: 0,
267
+ executionTime: Date.now() - startTime
268
+ };
269
+ } catch (error) {
270
+ lastError = error instanceof Error ? error : new Error('Script execution failed');
271
+ this.log(`Execution failed: ${lastError.message}`);
272
+
273
+ // LIFECYCLE: Call afterEndTest on failure
274
+ if (this.progressReporter?.afterEndTest) {
275
+ try {
276
+ await this.progressReporter.afterEndTest('failed', lastError.message, page);
277
+ } catch (callbackError) {
278
+ this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
279
+ }
280
+ }
281
+
282
+ return {
283
+ run_status: 'failed',
284
+ num_deflake_runs: 0,
285
+ executionTime: Date.now() - startTime,
286
+ error: lastError.message
287
+ };
288
+ }
289
+ }
290
+
291
+ // Create our own browser (original behavior)
238
292
  for (let attempt = 1; attempt <= totalAttempts; attempt++) {
239
293
  this.log(`Attempting deflake run ${attempt}/${totalAttempts}`);
240
294
  const { browser, context, page } = await this.initializeBrowser(request.playwrightConfig, request.headless, request.playwrightConfigFilePath);
241
295
 
242
296
  try {
297
+ // LIFECYCLE: Call beforeStartTest if provided
298
+ if (this.progressReporter?.beforeStartTest) {
299
+ await this.progressReporter.beforeStartTest(page, browser, context);
300
+ }
301
+
243
302
  // Execute the script as-is
244
303
  await this.executeScriptContent(request.script, page);
245
304
 
305
+ // LIFECYCLE: Call afterEndTest on success
306
+ if (this.progressReporter?.afterEndTest) {
307
+ await this.progressReporter.afterEndTest('passed', undefined, page);
308
+ }
309
+
246
310
  await browser.close();
247
311
 
248
312
  // Success! Return immediately
@@ -255,6 +319,15 @@ export class ExecutionService {
255
319
  lastError = error instanceof Error ? error : new Error('Script execution failed');
256
320
  this.log(`Initial run failed: ${lastError.message}`);
257
321
 
322
+ // LIFECYCLE: Call afterEndTest on failure
323
+ if (this.progressReporter?.afterEndTest) {
324
+ try {
325
+ await this.progressReporter.afterEndTest('failed', lastError.message, page);
326
+ } catch (callbackError) {
327
+ this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
328
+ }
329
+ }
330
+
258
331
  try {
259
332
  await browser.close();
260
333
  } catch (closeError) {
@@ -287,6 +360,9 @@ export class ExecutionService {
287
360
  throw new Error('Script content is required for AI repair. The TestChimpService should read the file and provide script content.');
288
361
  }
289
362
 
363
+ // Check if we should use existing browser
364
+ const useExistingBrowser = !!(request.existingBrowser && request.existingContext && request.existingPage);
365
+
290
366
  // First, try runExactly (which includes deflaking if configured)
291
367
  this.log('Attempting runExactly first (with deflaking if configured)...');
292
368
  const runExactlyResult = await this.runExactly(request, startTime, model);
@@ -300,16 +376,46 @@ export class ExecutionService {
300
376
  this.log('runExactly failed, starting AI repair process...');
301
377
 
302
378
  try {
379
+ let repairBrowser, repairContext, repairPage, steps, updatedSteps;
303
380
 
304
- // Start browser initialization and script parsing in parallel for faster startup
305
- this.log('Initializing repair browser and parsing script...');
306
- const [steps, { browser: repairBrowser, context: repairContext, page: repairPage }] = await Promise.all([
307
- this.parseScriptIntoSteps(request.script, model),
308
- this.initializeBrowser(request.playwrightConfig, request.headless, request.playwrightConfigFilePath) // Use request.headless (defaults to false/headed)
309
- ]);
310
-
311
- this.log('Starting AI repair with parsed steps...');
312
- const updatedSteps = await this.repairStepsWithAI(steps, repairPage, repairFlexibility, model);
381
+ if (useExistingBrowser) {
382
+ // Use existing browser
383
+ this.log('Using existing browser for AI repair...');
384
+ repairBrowser = request.existingBrowser;
385
+ repairContext = request.existingContext;
386
+ repairPage = request.existingPage;
387
+
388
+ // Parse script into steps
389
+ steps = await this.parseScriptIntoSteps(request.script, model);
390
+
391
+ // LIFECYCLE: Call beforeStartTest if provided
392
+ if (this.progressReporter?.beforeStartTest) {
393
+ await this.progressReporter.beforeStartTest(repairPage, repairBrowser, repairContext);
394
+ }
395
+
396
+ this.log('Starting AI repair with parsed steps...');
397
+ updatedSteps = await this.repairStepsWithAI(steps, repairPage, repairFlexibility, model);
398
+ } else {
399
+ // Start browser initialization and script parsing in parallel for faster startup
400
+ this.log('Initializing repair browser and parsing script...');
401
+ const results = await Promise.all([
402
+ this.parseScriptIntoSteps(request.script, model),
403
+ this.initializeBrowser(request.playwrightConfig, request.headless, request.playwrightConfigFilePath) // Use request.headless (defaults to false/headed)
404
+ ]);
405
+
406
+ steps = results[0];
407
+ repairBrowser = results[1].browser;
408
+ repairContext = results[1].context;
409
+ repairPage = results[1].page;
410
+
411
+ // LIFECYCLE: Call beforeStartTest if provided
412
+ if (this.progressReporter?.beforeStartTest) {
413
+ await this.progressReporter.beforeStartTest(repairPage, repairBrowser, repairContext);
414
+ }
415
+
416
+ this.log('Starting AI repair with parsed steps...');
417
+ updatedSteps = await this.repairStepsWithAI(steps, repairPage, repairFlexibility, model);
418
+ }
313
419
 
314
420
  // Always generate the updated script
315
421
  const updatedScript = this.generateUpdatedScript(updatedSteps);
@@ -343,7 +449,23 @@ export class ExecutionService {
343
449
  this.log(`Failed to report credit usage for AI repair: ${error}`, 'warn');
344
450
  });
345
451
 
346
- await repairBrowser.close();
452
+ // LIFECYCLE: Call afterEndTest (partial or complete success)
453
+ if (this.progressReporter?.afterEndTest) {
454
+ try {
455
+ await this.progressReporter.afterEndTest(
456
+ allStepsSuccessful ? 'passed' : 'failed',
457
+ allStepsSuccessful ? undefined : 'Partial repair success',
458
+ repairPage
459
+ );
460
+ } catch (callbackError) {
461
+ this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
462
+ }
463
+ }
464
+
465
+ // Only close browser if we created it (not provided by caller)
466
+ if (!useExistingBrowser) {
467
+ await repairBrowser.close();
468
+ }
347
469
 
348
470
  return {
349
471
  run_status: 'failed', // Original script failed
@@ -356,7 +478,20 @@ export class ExecutionService {
356
478
  };
357
479
  } else {
358
480
  // No successful repairs at all
359
- await repairBrowser.close();
481
+
482
+ // LIFECYCLE: Call afterEndTest (complete failure)
483
+ if (this.progressReporter?.afterEndTest) {
484
+ try {
485
+ await this.progressReporter.afterEndTest('failed', 'AI repair could not fix any steps', repairPage);
486
+ } catch (callbackError) {
487
+ this.log(`afterEndTest callback failed: ${callbackError}`, 'warn');
488
+ }
489
+ }
490
+
491
+ // Only close browser if we created it (not provided by caller)
492
+ if (!useExistingBrowser) {
493
+ await repairBrowser.close();
494
+ }
360
495
 
361
496
  return {
362
497
  run_status: 'failed', // Original script failed
@@ -469,6 +604,18 @@ export class ExecutionService {
469
604
  this.log(`Loop iteration: i=${i}, step description="${step.description}", total steps=${updatedSteps.length}`);
470
605
 
471
606
  try {
607
+ // LIFECYCLE: Call beforeStepStart if provided
608
+ if (this.progressReporter?.beforeStepStart) {
609
+ await this.progressReporter.beforeStepStart(
610
+ {
611
+ stepNumber: i + 1,
612
+ description: step.description,
613
+ code: step.code
614
+ },
615
+ page
616
+ );
617
+ }
618
+
472
619
  // Try to execute the step directly without context replay
473
620
  this.log(`Attempting Step ${i + 1}: ${step.description}`);
474
621
  this.log(` Code: ${step.code}`);
package/src/index.ts CHANGED
@@ -10,9 +10,9 @@ import { ScenarioWorker } from './scenario-worker-class';
10
10
  import { PlaywrightMCPService } from './playwright-mcp-service';
11
11
  import { LLMFacade } from './llm-facade';
12
12
  import { AuthConfig } from './auth-config';
13
- import { CreditUsageService } from './credit-usage-service';
13
+ import { CreditUsageService, CreditUsageCallback, CreditUsage, CreditUsageReason } from './credit-usage-service';
14
14
 
15
- export { ExecutionService, ScenarioService, ScenarioWorker, PlaywrightMCPService, LLMFacade, CreditUsageService };
15
+ export { ExecutionService, ScenarioService, ScenarioWorker, PlaywrightMCPService, LLMFacade, CreditUsageService, CreditUsageCallback, CreditUsage, CreditUsageReason };
16
16
 
17
17
  // File handlers
18
18
  import { FileHandler, LocalFileHandler, CIFileHandler, NoOpFileHandler } from './file-handler';
@@ -20,12 +20,12 @@ export { FileHandler, LocalFileHandler, CIFileHandler, NoOpFileHandler };
20
20
 
21
21
  // LLM Provider interfaces
22
22
  import { LLMProvider, LLMRequest, LLMResponse } from './llm-provider';
23
- import { ProgressReporter, StepProgress, JobProgress, StepExecutionStatus } from './progress-reporter';
23
+ import { ProgressReporter, StepProgress, JobProgress, StepExecutionStatus, StepInfo } from './progress-reporter';
24
24
  import { BackendProxyLLMProvider } from './providers/backend-proxy-llm-provider';
25
25
  import { LocalLLMProvider } from './providers/local-llm-provider';
26
26
 
27
27
  export { LLMProvider, LLMRequest, LLMResponse };
28
- export { ProgressReporter, StepProgress, JobProgress, StepExecutionStatus };
28
+ export { ProgressReporter, StepProgress, JobProgress, StepExecutionStatus, StepInfo };
29
29
  export { BackendProxyLLMProvider, LocalLLMProvider };
30
30
 
31
31
  // Orchestrator (tool-using agent)
@@ -80,6 +80,8 @@ export class TestChimpService {
80
80
  private backendUrl: string;
81
81
  private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
82
82
  private orchestratorOptions?: { useOrchestrator?: boolean; orchestratorConfig?: Partial<AgentConfig>; debugMode?: boolean };
83
+ private outputChannel?: any; // Store outputChannel to preserve it across service recreations
84
+ private creditUsageCallback?: CreditUsageCallback; // Store credit callback to preserve it across service recreations
83
85
 
84
86
  constructor(
85
87
  fileHandler?: FileHandler,
@@ -88,19 +90,22 @@ export class TestChimpService {
88
90
  maxWorkers?: number,
89
91
  llmProvider?: LLMProvider,
90
92
  progressReporter?: ProgressReporter,
91
- orchestratorOptions?: { useOrchestrator?: boolean; orchestratorConfig?: Partial<AgentConfig>; debugMode?: boolean }
93
+ orchestratorOptions?: { useOrchestrator?: boolean; orchestratorConfig?: Partial<AgentConfig>; debugMode?: boolean },
94
+ creditUsageCallback?: CreditUsageCallback
92
95
  ) {
93
96
  this.fileHandler = fileHandler || new NoOpFileHandler();
94
97
  this.authConfig = authConfig || null;
95
98
  this.backendUrl = backendUrl || 'https://featureservice.testchimp.io'; // Default to production
96
99
  this.progressReporter = progressReporter;
97
100
  this.orchestratorOptions = orchestratorOptions;
101
+ this.creditUsageCallback = creditUsageCallback;
98
102
 
99
103
  // Use provided LLM provider or default to backend proxy (backward compatible)
100
104
  this.llmProvider = llmProvider || new BackendProxyLLMProvider(authConfig, backendUrl);
101
105
 
102
106
  this.playwrightService = new PlaywrightMCPService();
103
- this.creditUsageService = new CreditUsageService(this.authConfig || undefined, this.backendUrl);
107
+ // Pass credit callback to constructor - preserved across recreations via this.creditUsageCallback
108
+ this.creditUsageService = new CreditUsageService(this.authConfig || undefined, this.backendUrl, this.creditUsageCallback);
104
109
 
105
110
  // Create services with providers
106
111
  this.executionService = new ExecutionService(
@@ -163,6 +168,11 @@ export class TestChimpService {
163
168
  this.scenarioService.setLogger(this.logger);
164
169
  }
165
170
 
171
+ // Reapply outputChannel if we have one (critical for orchestrator logs)
172
+ if (this.outputChannel) {
173
+ this.scenarioService.setOutputChannel(this.outputChannel);
174
+ }
175
+
166
176
  // Reinitialize the services
167
177
  await this.executionService.initialize();
168
178
  await this.scenarioService.initialize();
@@ -183,8 +193,8 @@ export class TestChimpService {
183
193
  this.llmProvider.setLogger?.(this.logger);
184
194
  }
185
195
 
186
- // Recreate services with new provider
187
- this.creditUsageService = new CreditUsageService(this.authConfig || undefined, this.backendUrl);
196
+ // Recreate services with new provider (preserve credit callback)
197
+ this.creditUsageService = new CreditUsageService(this.authConfig || undefined, this.backendUrl, this.creditUsageCallback);
188
198
  this.executionService = new ExecutionService(
189
199
  this.authConfig || undefined,
190
200
  this.backendUrl,
@@ -208,6 +218,11 @@ export class TestChimpService {
208
218
  this.executionService.setLogger(this.logger);
209
219
  this.scenarioService.setLogger(this.logger);
210
220
  }
221
+
222
+ // Reapply outputChannel if we have one (critical for orchestrator logs)
223
+ if (this.outputChannel) {
224
+ this.scenarioService.setOutputChannel(this.outputChannel);
225
+ }
211
226
  }
212
227
 
213
228
  /**
@@ -226,11 +241,22 @@ export class TestChimpService {
226
241
  * This enables orchestrator thinking logs to appear in output console
227
242
  */
228
243
  setOutputChannel(outputChannel: any): void {
244
+ this.outputChannel = outputChannel; // Store for future service recreations
229
245
  if (typeof this.scenarioService?.setOutputChannel === 'function') {
230
246
  this.scenarioService.setOutputChannel(outputChannel);
231
247
  }
232
248
  }
233
249
 
250
+ /**
251
+ * Set credit usage callback
252
+ * Server-side: Use callback to update DB directly (no axios calls)
253
+ * Client-side: Don't set callback, uses auth for axios calls to backend
254
+ */
255
+ setCreditUsageCallback(callback: CreditUsageCallback): void {
256
+ this.creditUsageCallback = callback; // Store for future service recreations
257
+ this.creditUsageService.setCreditUsageCallback(callback);
258
+ }
259
+
234
260
  /**
235
261
  * Log a message using the configured logger
236
262
  */
@@ -259,8 +285,26 @@ export class TestChimpService {
259
285
  }
260
286
 
261
287
  // Scenario generation
262
- async generateScript(scenario: string, testName?: string, config?: string, model?: string, scenarioFileName?: string): Promise<string> {
263
- return this.scenarioService.processScenario(scenario, testName, config, model, scenarioFileName);
288
+ async generateScript(
289
+ scenario: string,
290
+ testName?: string,
291
+ config?: string,
292
+ model?: string,
293
+ scenarioFileName?: string,
294
+ existingBrowser?: any,
295
+ existingContext?: any,
296
+ existingPage?: any
297
+ ): Promise<string> {
298
+ return this.scenarioService.processScenario(
299
+ scenario,
300
+ testName,
301
+ config,
302
+ model,
303
+ scenarioFileName,
304
+ existingBrowser,
305
+ existingContext,
306
+ existingPage
307
+ );
264
308
  }
265
309
 
266
310
  // Test execution
@@ -66,6 +66,16 @@ export interface TokenUsage {
66
66
  timestamp: number;
67
67
  }
68
68
 
69
+ /**
70
+ * Additional step info for lifecycle callbacks
71
+ */
72
+ export interface StepInfo {
73
+ stepId?: string;
74
+ stepNumber: number;
75
+ description: string;
76
+ code?: string;
77
+ }
78
+
69
79
  /**
70
80
  * Progress reporter interface for external consumers
71
81
  */
@@ -105,5 +115,30 @@ export interface ProgressReporter {
105
115
  * Generic logging (for environments that don't need structured progress)
106
116
  */
107
117
  log?(message: string, level?: 'log' | 'error' | 'warn'): void;
118
+
119
+ /**
120
+ * LIFECYCLE CALLBACKS (optional - used by scriptservice, ignored by local clients)
121
+ */
122
+
123
+ /**
124
+ * Called before test execution starts
125
+ * - Script Service: Initialize browser context, set up DB records
126
+ * - VS Extension/GitHub: Not used (ignore)
127
+ */
128
+ beforeStartTest?(page: any, browser: any, context: any): Promise<void>;
129
+
130
+ /**
131
+ * Called before each step execution
132
+ * - Script Service: Update step status to IN_PROGRESS in DB
133
+ * - VS Extension/GitHub: Not used (ignore)
134
+ */
135
+ beforeStepStart?(step: StepInfo, page: any): Promise<void>;
136
+
137
+ /**
138
+ * Called after test execution completes (success or failure)
139
+ * - Script Service: Write final status to DB, cleanup resources
140
+ * - VS Extension/GitHub: Not used (return value is sufficient)
141
+ */
142
+ afterEndTest?(status: 'passed' | 'failed', error?: string, page?: any): Promise<void>;
108
143
  }
109
144
 
package/src/prompts.ts CHANGED
@@ -757,12 +757,12 @@ ${script}
757
757
 
758
758
  YOUR TASK (MINOR ADJUSTMENTS ONLY):
759
759
  1. Remove duplicate/redundant expect() assertions (e.g., same assertion repeated twice)
760
- 2. Remove duplicate step comments without code
761
- 3. Fix obvious formatting issues (inconsistent spacing, etc.)
762
- 4. Consolidate multiple identical assertions into one
763
- 5. Remove any obviously redundant waits or checks
760
+ 2. Fix obvious formatting issues (inconsistent spacing, etc.)
761
+ 3. Consolidate multiple identical assertions into one
762
+ 4. Remove any obviously redundant waits or checks
764
763
 
765
764
  DO NOT:
765
+ - Remove step comments (e.g., "// Step 1: ..." or "// Navigate to...") - these are important for readability
766
766
  - Change the test logic or flow
767
767
  - Remove legitimate assertions
768
768
  - Restructure the code
@@ -111,16 +111,25 @@ export class ScenarioService extends EventEmitter {
111
111
  });
112
112
 
113
113
  this.workers.push(worker);
114
- this.log(`Scenario worker initialized${this.useOrchestrator ? ' (Orchestrator Mode)' : ''} with session: ${worker['sessionId']}`);
114
+ // Internal initialization - no need to log worker details
115
115
  }
116
116
 
117
117
  async initialize(): Promise<void> {
118
118
  // Wait for workers to be initialized
119
119
  await this.initializeWorkers();
120
- this.log('Scenario service initialized');
120
+ // Internal initialization - consumer doesn't need to see this
121
121
  }
122
122
 
123
- processScenario(scenario: string, testName?: string, config?: PlaywrightConfig, model?: string, scenarioFileName?: string): string {
123
+ processScenario(
124
+ scenario: string,
125
+ testName?: string,
126
+ config?: PlaywrightConfig,
127
+ model?: string,
128
+ scenarioFileName?: string,
129
+ existingBrowser?: any,
130
+ existingContext?: any,
131
+ existingPage?: any
132
+ ): string {
124
133
  const jobId = `scenario_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
125
134
 
126
135
  // Add job to queue
@@ -130,7 +139,10 @@ export class ScenarioService extends EventEmitter {
130
139
  testName,
131
140
  playwrightConfig: config,
132
141
  model,
133
- scenarioFileName
142
+ scenarioFileName,
143
+ existingBrowser,
144
+ existingContext,
145
+ existingPage
134
146
  };
135
147
 
136
148
  this.jobQueue.push(job);