npm - @telnyx/voice-agent-tester - Versions diffs - 0.4.3 → 0.4.5 - Mend

@telnyx/voice-agent-tester 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md +23 -0
package/README.md +185 -161
package/applications/elevenlabs.yaml +1 -1
package/javascript/audio_input_hooks.js +89 -19
package/javascript/audio_output_hooks.js +92 -2
package/package.json +1 -1
package/src/index.js +79 -28
package/src/report.js +169 -90
package/src/voice-agent-tester.js +43 -7
package/tests/integration.test.js +4 -3
package/tests/voice-agent-tester.test.js +133 -0

package/src/voice-agent-tester.js CHANGED Viewed

@@ -238,6 +238,7 @@ export class VoiceAgentTester {
           } else {
             errorMessage += '\n  (Could not collect browser diagnostics)';
           }
         }
         reject(new Error(errorMessage));
@@ -363,6 +364,7 @@ export class VoiceAgentTester {
         console.error(error.stack);
       }
     });
   }
   async close() {
@@ -488,7 +490,7 @@ export class VoiceAgentTester {
     }
   }
-  async executeStep(step, stepIndex, appName = '', scenarioName = '', repetition = 1) {
+  async executeStep(step, stepIndex, appName = '', scenarioName = '', repetition = 1, scenarioStepIndex = null) {
     if (!this.page) {
       throw new Error('Browser not launched. Call launch() first.');
     }
@@ -551,13 +553,13 @@ export class VoiceAgentTester {
       // Record metrics for report if enabled and step has metrics attribute
       if (this.reportGenerator && step.metrics) {
         if (step.metrics.includes('elapsed_time')) {
-          this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, 'elapsed_time', elapsedTimeMs);
+          this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, 'elapsed_time', elapsedTimeMs, scenarioStepIndex);
         }
         // Record any additional metrics returned by the handler
         if (handlerResult && typeof handlerResult === 'object') {
           for (const [metricName, metricValue] of Object.entries(handlerResult)) {
             if (step.metrics.includes(metricName)) {
-              this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, metricName, metricValue);
+              this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, metricName, metricValue, scenarioStepIndex);
             }
           }
         }
@@ -866,10 +868,40 @@ export class VoiceAgentTester {
     // Wait for speech to complete by listening for speechend event
     try {
-      await this.waitForAudioEvent('speechend');
+      // Use a shorter timeout for speechend (15s) since we have safety fallback in browser
+      await this.waitForAudioEvent('speechend', 15000);
     } catch (error) {
-      console.error('Timeout waiting for speech to complete:', error.message);
-      throw error;
+      // speechend timeout is recoverable — the audio likely finished but the event was lost
+      // (e.g., agent started responding and disrupted the audio element)
+      if (this.debug) {
+        // Check the state of the speak audio in the browser
+        const speakState = await this.page.evaluate(() => {
+          const info = {
+            currentSpeakAudio: null,
+            audioContextState: null,
+          };
+          try {
+            if (window.currentSpeakAudio) {
+              info.currentSpeakAudio = {
+                paused: window.currentSpeakAudio.paused,
+                ended: window.currentSpeakAudio.ended,
+                currentTime: window.currentSpeakAudio.currentTime,
+                duration: window.currentSpeakAudio.duration,
+                readyState: window.currentSpeakAudio.readyState,
+              };
+            }
+            if (window.globalAudioContext) {
+              info.audioContextState = window.globalAudioContext.state;
+            }
+          } catch (e) { /* ignore */ }
+          return info;
+        }).catch(() => null);
+        console.warn(`\t⚠️ speechend timeout (recovered) — speak audio state:`, JSON.stringify(speakState));
+      } else {
+        console.warn(`\t⚠️ speechend timeout — continuing (audio likely finished)`);
+      }
+      // Don't throw — treat speechend timeout as recoverable
     }
   }
@@ -1234,10 +1266,14 @@ export class VoiceAgentTester {
       }
       // Execute all configured steps
+      const appStepCount = appSteps.length;
       for (let i = 0; i < steps.length; i++) {
         const step = steps[i];
         console.log(`Executing step ${i + 1}: ${JSON.stringify(step)}`);
-        await this.executeStep(step, i, appName, scenarioName, repetition);
+        // For scenario steps (after app steps), pass the 1-based scenario step index
+        // so metrics can be aligned across providers with different app setup steps
+        const scenarioStepIndex = i >= appStepCount ? (i - appStepCount + 1) : null;
+        await this.executeStep(step, i, appName, scenarioName, repetition, scenarioStepIndex);
       }
       // Keep the browser open for a bit after all steps

package/tests/integration.test.js CHANGED Viewed

@@ -44,8 +44,9 @@ describe('Integration Tests', () => {
               this.text = text;
             };
-            // Mock __speak function that will be called by the tester
-            // This needs to be in the page itself since evaluateOnNewDocument runs before navigation
+            // Mock __speak and __waitForMediaStream functions
+            // These override the injected audio hooks since inline scripts run after evaluateOnNewDocument
+            window.__waitForMediaStream = () => Promise.resolve();
             window.__speak = (text) => {
               document.getElementById('speech-output').textContent = text;
               // Signal speech end after a small delay to allow waitForAudioEvent to be set up
@@ -75,7 +76,7 @@ describe('Integration Tests', () => {
     // The scenario should complete without throwing errors
     expect(true).toBe(true);
-  });
+  }, 15000);
   test('should handle scenario with wait step', async () => {
     const testPageContent = `

package/tests/voice-agent-tester.test.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { describe, test, expect, beforeEach, afterEach } from '@jest/globals';
 import { VoiceAgentTester } from '../src/voice-agent-tester.js';
+import { ReportGenerator } from '../src/report.js';
 import fs from 'fs';
 import path from 'path';
@@ -187,4 +188,136 @@ describe('VoiceAgentTester', () => {
     await expect(tester.executeStep({ action: 'speak' }, 0, 'scenario'))
       .rejects.toThrow('No text or file specified for speak action');
   });
+});
+describe('ReportGenerator - Comparison Step Alignment', () => {
+  test('should align metrics by scenario step index across providers with different app steps', () => {
+    // Simulate: Vapi has 5 app steps, Telnyx has 3 app steps
+    // Both share the same 7 scenario steps with metrics on scenario steps 4 and 7
+    const providerReport = new ReportGenerator('/tmp/test_provider.csv');
+    const telnyxReport = new ReportGenerator('/tmp/test_telnyx.csv');
+    // Provider (Vapi): 5 app steps + 7 scenario steps = 12 total
+    // Metric steps at absolute indices 8 (scenario step 4) and 11 (scenario step 7)
+    providerReport.beginRun('vapi', 'appointment', 0);
+    providerReport.recordStepMetric('vapi', 'appointment', 0, 8, 'wait_for_voice', 'elapsed_time', 2849, 4);
+    providerReport.recordStepMetric('vapi', 'appointment', 0, 11, 'wait_for_voice', 'elapsed_time', 3307, 7);
+    providerReport.endRun('vapi', 'appointment', 0);
+    // Telnyx: 3 app steps + 7 scenario steps = 10 total
+    // Metric steps at absolute indices 6 (scenario step 4) and 9 (scenario step 7)
+    telnyxReport.beginRun('telnyx', 'appointment', 0);
+    telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 6, 'wait_for_voice', 'elapsed_time', 1552, 4);
+    telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 9, 'wait_for_voice', 'elapsed_time', 704, 7);
+    telnyxReport.endRun('telnyx', 'appointment', 0);
+    // Get scenario-aligned metrics
+    const providerMetrics = providerReport.getAggregatedMetricsByScenarioStep();
+    const telnyxMetrics = telnyxReport.getAggregatedMetricsByScenarioStep();
+    // Both should have metrics at scenario steps 4 and 7
+    expect(providerMetrics.has(4)).toBe(true);
+    expect(providerMetrics.has(7)).toBe(true);
+    expect(telnyxMetrics.has(4)).toBe(true);
+    expect(telnyxMetrics.has(7)).toBe(true);
+    // Verify values are correct
+    expect(providerMetrics.get(4).get('elapsed_time').avg).toBe(2849);
+    expect(providerMetrics.get(7).get('elapsed_time').avg).toBe(3307);
+    expect(telnyxMetrics.get(4).get('elapsed_time').avg).toBe(1552);
+    expect(telnyxMetrics.get(7).get('elapsed_time').avg).toBe(704);
+    // The comparison should now have 2 comparable steps (not 4 separate unmatched ones)
+    const allScenarioSteps = new Set([
+      ...providerMetrics.keys(),
+      ...telnyxMetrics.keys()
+    ]);
+    expect(allScenarioSteps.size).toBe(2);
+  });
+  test('should generate comparison summary with single headline number', () => {
+    const providerReport = new ReportGenerator('/tmp/test_provider.csv');
+    const telnyxReport = new ReportGenerator('/tmp/test_telnyx.csv');
+    providerReport.beginRun('vapi', 'appointment', 0);
+    providerReport.recordStepMetric('vapi', 'appointment', 0, 8, 'wait_for_voice', 'elapsed_time', 2849, 4);
+    providerReport.recordStepMetric('vapi', 'appointment', 0, 11, 'wait_for_voice', 'elapsed_time', 3307, 7);
+    providerReport.endRun('vapi', 'appointment', 0);
+    telnyxReport.beginRun('telnyx', 'appointment', 0);
+    telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 6, 'wait_for_voice', 'elapsed_time', 1552, 4);
+    telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 9, 'wait_for_voice', 'elapsed_time', 704, 7);
+    telnyxReport.endRun('telnyx', 'appointment', 0);
+    // Capture console output
+    const logs = [];
+    const originalLog = console.log;
+    console.log = (msg) => logs.push(msg);
+    ReportGenerator.generateComparisonSummary(providerReport, telnyxReport, 'vapi');
+    console.log = originalLog;
+    const output = logs.join('\n');
+    // Should show averaged headline numbers: vapi avg = (2849+3307)/2 = 3078, telnyx avg = (1552+704)/2 = 1128
+    expect(output).toContain('3078ms');
+    expect(output).toContain('1128ms');
+    // Should show "2 matched responses"
+    expect(output).toContain('2 matched responses');
+    // Should declare Telnyx the winner
+    expect(output).toContain('🏆 Telnyx');
+    // Should NOT contain per-response breakdown without debug
+    expect(output).not.toContain('Per-response breakdown');
+    expect(output).not.toContain('#1');
+    expect(output).not.toContain('#2');
+  });
+  test('should show per-response breakdown with debug flag', () => {
+    const providerReport = new ReportGenerator('/tmp/test_provider.csv');
+    const telnyxReport = new ReportGenerator('/tmp/test_telnyx.csv');
+    providerReport.beginRun('vapi', 'appointment', 0);
+    providerReport.recordStepMetric('vapi', 'appointment', 0, 8, 'wait_for_voice', 'elapsed_time', 2849, 4);
+    providerReport.recordStepMetric('vapi', 'appointment', 0, 11, 'wait_for_voice', 'elapsed_time', 3307, 7);
+    providerReport.endRun('vapi', 'appointment', 0);
+    telnyxReport.beginRun('telnyx', 'appointment', 0);
+    telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 6, 'wait_for_voice', 'elapsed_time', 1552, 4);
+    telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 9, 'wait_for_voice', 'elapsed_time', 704, 7);
+    telnyxReport.endRun('telnyx', 'appointment', 0);
+    const logs = [];
+    const originalLog = console.log;
+    console.log = (msg) => logs.push(msg);
+    ReportGenerator.generateComparisonSummary(providerReport, telnyxReport, 'vapi', { debug: true });
+    console.log = originalLog;
+    const output = logs.join('\n');
+    // Should contain per-response breakdown
+    expect(output).toContain('Per-response breakdown');
+    expect(output).toContain('#1');
+    expect(output).toContain('#2');
+    expect(output).toContain('2849ms');
+    expect(output).toContain('1552ms');
+    expect(output).toContain('3307ms');
+    expect(output).toContain('704ms');
+    // Should ALSO contain the headline average
+    expect(output).toContain('3078ms');
+    expect(output).toContain('1128ms');
+  });
+  test('getAggregatedMetricsByScenarioStep returns empty map when no scenario steps', () => {
+    const report = new ReportGenerator('/tmp/test.csv');
+    report.beginRun('test', 'scenario', 0);
+    // Record without scenarioStepIndex (app step)
+    report.recordStepMetric('test', 'scenario', 0, 0, 'click', 'elapsed_time', 100);
+    report.endRun('test', 'scenario', 0);
+    const metrics = report.getAggregatedMetricsByScenarioStep();
+    expect(metrics.size).toBe(0);
+  });
 });