@telnyx/voice-agent-tester 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -238,6 +238,7 @@ export class VoiceAgentTester {
238
238
  } else {
239
239
  errorMessage += '\n (Could not collect browser diagnostics)';
240
240
  }
241
+
241
242
  }
242
243
 
243
244
  reject(new Error(errorMessage));
@@ -363,6 +364,7 @@ export class VoiceAgentTester {
363
364
  console.error(error.stack);
364
365
  }
365
366
  });
367
+
366
368
  }
367
369
 
368
370
  async close() {
@@ -488,7 +490,7 @@ export class VoiceAgentTester {
488
490
  }
489
491
  }
490
492
 
491
- async executeStep(step, stepIndex, appName = '', scenarioName = '', repetition = 1) {
493
+ async executeStep(step, stepIndex, appName = '', scenarioName = '', repetition = 1, scenarioStepIndex = null) {
492
494
  if (!this.page) {
493
495
  throw new Error('Browser not launched. Call launch() first.');
494
496
  }
@@ -551,13 +553,13 @@ export class VoiceAgentTester {
551
553
  // Record metrics for report if enabled and step has metrics attribute
552
554
  if (this.reportGenerator && step.metrics) {
553
555
  if (step.metrics.includes('elapsed_time')) {
554
- this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, 'elapsed_time', elapsedTimeMs);
556
+ this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, 'elapsed_time', elapsedTimeMs, scenarioStepIndex);
555
557
  }
556
558
  // Record any additional metrics returned by the handler
557
559
  if (handlerResult && typeof handlerResult === 'object') {
558
560
  for (const [metricName, metricValue] of Object.entries(handlerResult)) {
559
561
  if (step.metrics.includes(metricName)) {
560
- this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, metricName, metricValue);
562
+ this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, metricName, metricValue, scenarioStepIndex);
561
563
  }
562
564
  }
563
565
  }
@@ -866,10 +868,40 @@ export class VoiceAgentTester {
866
868
 
867
869
  // Wait for speech to complete by listening for speechend event
868
870
  try {
869
- await this.waitForAudioEvent('speechend');
871
+ // Use a shorter timeout for speechend (15s) since we have safety fallback in browser
872
+ await this.waitForAudioEvent('speechend', 15000);
870
873
  } catch (error) {
871
- console.error('Timeout waiting for speech to complete:', error.message);
872
- throw error;
874
+ // speechend timeout is recoverable the audio likely finished but the event was lost
875
+ // (e.g., agent started responding and disrupted the audio element)
876
+ if (this.debug) {
877
+ // Check the state of the speak audio in the browser
878
+ const speakState = await this.page.evaluate(() => {
879
+ const info = {
880
+ currentSpeakAudio: null,
881
+ audioContextState: null,
882
+ };
883
+ try {
884
+ if (window.currentSpeakAudio) {
885
+ info.currentSpeakAudio = {
886
+ paused: window.currentSpeakAudio.paused,
887
+ ended: window.currentSpeakAudio.ended,
888
+ currentTime: window.currentSpeakAudio.currentTime,
889
+ duration: window.currentSpeakAudio.duration,
890
+ readyState: window.currentSpeakAudio.readyState,
891
+ };
892
+ }
893
+ if (window.globalAudioContext) {
894
+ info.audioContextState = window.globalAudioContext.state;
895
+ }
896
+ } catch (e) { /* ignore */ }
897
+ return info;
898
+ }).catch(() => null);
899
+
900
+ console.warn(`\t⚠️ speechend timeout (recovered) — speak audio state:`, JSON.stringify(speakState));
901
+ } else {
902
+ console.warn(`\t⚠️ speechend timeout — continuing (audio likely finished)`);
903
+ }
904
+ // Don't throw — treat speechend timeout as recoverable
873
905
  }
874
906
  }
875
907
 
@@ -1234,10 +1266,14 @@ export class VoiceAgentTester {
1234
1266
  }
1235
1267
 
1236
1268
  // Execute all configured steps
1269
+ const appStepCount = appSteps.length;
1237
1270
  for (let i = 0; i < steps.length; i++) {
1238
1271
  const step = steps[i];
1239
1272
  console.log(`Executing step ${i + 1}: ${JSON.stringify(step)}`);
1240
- await this.executeStep(step, i, appName, scenarioName, repetition);
1273
+ // For scenario steps (after app steps), pass the 1-based scenario step index
1274
+ // so metrics can be aligned across providers with different app setup steps
1275
+ const scenarioStepIndex = i >= appStepCount ? (i - appStepCount + 1) : null;
1276
+ await this.executeStep(step, i, appName, scenarioName, repetition, scenarioStepIndex);
1241
1277
  }
1242
1278
 
1243
1279
  // Keep the browser open for a bit after all steps
@@ -44,8 +44,9 @@ describe('Integration Tests', () => {
44
44
  this.text = text;
45
45
  };
46
46
 
47
- // Mock __speak function that will be called by the tester
48
- // This needs to be in the page itself since evaluateOnNewDocument runs before navigation
47
+ // Mock __speak and __waitForMediaStream functions
48
+ // These override the injected audio hooks since inline scripts run after evaluateOnNewDocument
49
+ window.__waitForMediaStream = () => Promise.resolve();
49
50
  window.__speak = (text) => {
50
51
  document.getElementById('speech-output').textContent = text;
51
52
  // Signal speech end after a small delay to allow waitForAudioEvent to be set up
@@ -75,7 +76,7 @@ describe('Integration Tests', () => {
75
76
 
76
77
  // The scenario should complete without throwing errors
77
78
  expect(true).toBe(true);
78
- });
79
+ }, 15000);
79
80
 
80
81
  test('should handle scenario with wait step', async () => {
81
82
  const testPageContent = `
@@ -1,5 +1,6 @@
1
1
  import { describe, test, expect, beforeEach, afterEach } from '@jest/globals';
2
2
  import { VoiceAgentTester } from '../src/voice-agent-tester.js';
3
+ import { ReportGenerator } from '../src/report.js';
3
4
  import fs from 'fs';
4
5
  import path from 'path';
5
6
 
@@ -187,4 +188,136 @@ describe('VoiceAgentTester', () => {
187
188
  await expect(tester.executeStep({ action: 'speak' }, 0, 'scenario'))
188
189
  .rejects.toThrow('No text or file specified for speak action');
189
190
  });
191
+ });
192
+
193
+ describe('ReportGenerator - Comparison Step Alignment', () => {
194
+ test('should align metrics by scenario step index across providers with different app steps', () => {
195
+ // Simulate: Vapi has 5 app steps, Telnyx has 3 app steps
196
+ // Both share the same 7 scenario steps with metrics on scenario steps 4 and 7
197
+ const providerReport = new ReportGenerator('/tmp/test_provider.csv');
198
+ const telnyxReport = new ReportGenerator('/tmp/test_telnyx.csv');
199
+
200
+ // Provider (Vapi): 5 app steps + 7 scenario steps = 12 total
201
+ // Metric steps at absolute indices 8 (scenario step 4) and 11 (scenario step 7)
202
+ providerReport.beginRun('vapi', 'appointment', 0);
203
+ providerReport.recordStepMetric('vapi', 'appointment', 0, 8, 'wait_for_voice', 'elapsed_time', 2849, 4);
204
+ providerReport.recordStepMetric('vapi', 'appointment', 0, 11, 'wait_for_voice', 'elapsed_time', 3307, 7);
205
+ providerReport.endRun('vapi', 'appointment', 0);
206
+
207
+ // Telnyx: 3 app steps + 7 scenario steps = 10 total
208
+ // Metric steps at absolute indices 6 (scenario step 4) and 9 (scenario step 7)
209
+ telnyxReport.beginRun('telnyx', 'appointment', 0);
210
+ telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 6, 'wait_for_voice', 'elapsed_time', 1552, 4);
211
+ telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 9, 'wait_for_voice', 'elapsed_time', 704, 7);
212
+ telnyxReport.endRun('telnyx', 'appointment', 0);
213
+
214
+ // Get scenario-aligned metrics
215
+ const providerMetrics = providerReport.getAggregatedMetricsByScenarioStep();
216
+ const telnyxMetrics = telnyxReport.getAggregatedMetricsByScenarioStep();
217
+
218
+ // Both should have metrics at scenario steps 4 and 7
219
+ expect(providerMetrics.has(4)).toBe(true);
220
+ expect(providerMetrics.has(7)).toBe(true);
221
+ expect(telnyxMetrics.has(4)).toBe(true);
222
+ expect(telnyxMetrics.has(7)).toBe(true);
223
+
224
+ // Verify values are correct
225
+ expect(providerMetrics.get(4).get('elapsed_time').avg).toBe(2849);
226
+ expect(providerMetrics.get(7).get('elapsed_time').avg).toBe(3307);
227
+ expect(telnyxMetrics.get(4).get('elapsed_time').avg).toBe(1552);
228
+ expect(telnyxMetrics.get(7).get('elapsed_time').avg).toBe(704);
229
+
230
+ // The comparison should now have 2 comparable steps (not 4 separate unmatched ones)
231
+ const allScenarioSteps = new Set([
232
+ ...providerMetrics.keys(),
233
+ ...telnyxMetrics.keys()
234
+ ]);
235
+ expect(allScenarioSteps.size).toBe(2);
236
+ });
237
+
238
+ test('should generate comparison summary with single headline number', () => {
239
+ const providerReport = new ReportGenerator('/tmp/test_provider.csv');
240
+ const telnyxReport = new ReportGenerator('/tmp/test_telnyx.csv');
241
+
242
+ providerReport.beginRun('vapi', 'appointment', 0);
243
+ providerReport.recordStepMetric('vapi', 'appointment', 0, 8, 'wait_for_voice', 'elapsed_time', 2849, 4);
244
+ providerReport.recordStepMetric('vapi', 'appointment', 0, 11, 'wait_for_voice', 'elapsed_time', 3307, 7);
245
+ providerReport.endRun('vapi', 'appointment', 0);
246
+
247
+ telnyxReport.beginRun('telnyx', 'appointment', 0);
248
+ telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 6, 'wait_for_voice', 'elapsed_time', 1552, 4);
249
+ telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 9, 'wait_for_voice', 'elapsed_time', 704, 7);
250
+ telnyxReport.endRun('telnyx', 'appointment', 0);
251
+
252
+ // Capture console output
253
+ const logs = [];
254
+ const originalLog = console.log;
255
+ console.log = (msg) => logs.push(msg);
256
+
257
+ ReportGenerator.generateComparisonSummary(providerReport, telnyxReport, 'vapi');
258
+
259
+ console.log = originalLog;
260
+
261
+ const output = logs.join('\n');
262
+
263
+ // Should show averaged headline numbers: vapi avg = (2849+3307)/2 = 3078, telnyx avg = (1552+704)/2 = 1128
264
+ expect(output).toContain('3078ms');
265
+ expect(output).toContain('1128ms');
266
+ // Should show "2 matched responses"
267
+ expect(output).toContain('2 matched responses');
268
+ // Should declare Telnyx the winner
269
+ expect(output).toContain('🏆 Telnyx');
270
+ // Should NOT contain per-response breakdown without debug
271
+ expect(output).not.toContain('Per-response breakdown');
272
+ expect(output).not.toContain('#1');
273
+ expect(output).not.toContain('#2');
274
+ });
275
+
276
+ test('should show per-response breakdown with debug flag', () => {
277
+ const providerReport = new ReportGenerator('/tmp/test_provider.csv');
278
+ const telnyxReport = new ReportGenerator('/tmp/test_telnyx.csv');
279
+
280
+ providerReport.beginRun('vapi', 'appointment', 0);
281
+ providerReport.recordStepMetric('vapi', 'appointment', 0, 8, 'wait_for_voice', 'elapsed_time', 2849, 4);
282
+ providerReport.recordStepMetric('vapi', 'appointment', 0, 11, 'wait_for_voice', 'elapsed_time', 3307, 7);
283
+ providerReport.endRun('vapi', 'appointment', 0);
284
+
285
+ telnyxReport.beginRun('telnyx', 'appointment', 0);
286
+ telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 6, 'wait_for_voice', 'elapsed_time', 1552, 4);
287
+ telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 9, 'wait_for_voice', 'elapsed_time', 704, 7);
288
+ telnyxReport.endRun('telnyx', 'appointment', 0);
289
+
290
+ const logs = [];
291
+ const originalLog = console.log;
292
+ console.log = (msg) => logs.push(msg);
293
+
294
+ ReportGenerator.generateComparisonSummary(providerReport, telnyxReport, 'vapi', { debug: true });
295
+
296
+ console.log = originalLog;
297
+
298
+ const output = logs.join('\n');
299
+
300
+ // Should contain per-response breakdown
301
+ expect(output).toContain('Per-response breakdown');
302
+ expect(output).toContain('#1');
303
+ expect(output).toContain('#2');
304
+ expect(output).toContain('2849ms');
305
+ expect(output).toContain('1552ms');
306
+ expect(output).toContain('3307ms');
307
+ expect(output).toContain('704ms');
308
+ // Should ALSO contain the headline average
309
+ expect(output).toContain('3078ms');
310
+ expect(output).toContain('1128ms');
311
+ });
312
+
313
+ test('getAggregatedMetricsByScenarioStep returns empty map when no scenario steps', () => {
314
+ const report = new ReportGenerator('/tmp/test.csv');
315
+ report.beginRun('test', 'scenario', 0);
316
+ // Record without scenarioStepIndex (app step)
317
+ report.recordStepMetric('test', 'scenario', 0, 0, 'click', 'elapsed_time', 100);
318
+ report.endRun('test', 'scenario', 0);
319
+
320
+ const metrics = report.getAggregatedMetricsByScenarioStep();
321
+ expect(metrics.size).toBe(0);
322
+ });
190
323
  });