@telnyx/voice-agent-tester 0.4.3 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/README.md +185 -161
- package/applications/elevenlabs.yaml +1 -1
- package/javascript/audio_input_hooks.js +89 -19
- package/javascript/audio_output_hooks.js +92 -2
- package/package.json +1 -1
- package/src/index.js +79 -28
- package/src/report.js +169 -90
- package/src/voice-agent-tester.js +43 -7
- package/tests/integration.test.js +4 -3
- package/tests/voice-agent-tester.test.js +133 -0
|
@@ -238,6 +238,7 @@ export class VoiceAgentTester {
|
|
|
238
238
|
} else {
|
|
239
239
|
errorMessage += '\n (Could not collect browser diagnostics)';
|
|
240
240
|
}
|
|
241
|
+
|
|
241
242
|
}
|
|
242
243
|
|
|
243
244
|
reject(new Error(errorMessage));
|
|
@@ -363,6 +364,7 @@ export class VoiceAgentTester {
|
|
|
363
364
|
console.error(error.stack);
|
|
364
365
|
}
|
|
365
366
|
});
|
|
367
|
+
|
|
366
368
|
}
|
|
367
369
|
|
|
368
370
|
async close() {
|
|
@@ -488,7 +490,7 @@ export class VoiceAgentTester {
|
|
|
488
490
|
}
|
|
489
491
|
}
|
|
490
492
|
|
|
491
|
-
async executeStep(step, stepIndex, appName = '', scenarioName = '', repetition = 1) {
|
|
493
|
+
async executeStep(step, stepIndex, appName = '', scenarioName = '', repetition = 1, scenarioStepIndex = null) {
|
|
492
494
|
if (!this.page) {
|
|
493
495
|
throw new Error('Browser not launched. Call launch() first.');
|
|
494
496
|
}
|
|
@@ -551,13 +553,13 @@ export class VoiceAgentTester {
|
|
|
551
553
|
// Record metrics for report if enabled and step has metrics attribute
|
|
552
554
|
if (this.reportGenerator && step.metrics) {
|
|
553
555
|
if (step.metrics.includes('elapsed_time')) {
|
|
554
|
-
this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, 'elapsed_time', elapsedTimeMs);
|
|
556
|
+
this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, 'elapsed_time', elapsedTimeMs, scenarioStepIndex);
|
|
555
557
|
}
|
|
556
558
|
// Record any additional metrics returned by the handler
|
|
557
559
|
if (handlerResult && typeof handlerResult === 'object') {
|
|
558
560
|
for (const [metricName, metricValue] of Object.entries(handlerResult)) {
|
|
559
561
|
if (step.metrics.includes(metricName)) {
|
|
560
|
-
this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, metricName, metricValue);
|
|
562
|
+
this.reportGenerator.recordStepMetric(appName, scenarioName, repetition, stepIndex, step.action, metricName, metricValue, scenarioStepIndex);
|
|
561
563
|
}
|
|
562
564
|
}
|
|
563
565
|
}
|
|
@@ -866,10 +868,40 @@ export class VoiceAgentTester {
|
|
|
866
868
|
|
|
867
869
|
// Wait for speech to complete by listening for speechend event
|
|
868
870
|
try {
|
|
869
|
-
|
|
871
|
+
// Use a shorter timeout for speechend (15s) since we have safety fallback in browser
|
|
872
|
+
await this.waitForAudioEvent('speechend', 15000);
|
|
870
873
|
} catch (error) {
|
|
871
|
-
|
|
872
|
-
|
|
874
|
+
// speechend timeout is recoverable — the audio likely finished but the event was lost
|
|
875
|
+
// (e.g., agent started responding and disrupted the audio element)
|
|
876
|
+
if (this.debug) {
|
|
877
|
+
// Check the state of the speak audio in the browser
|
|
878
|
+
const speakState = await this.page.evaluate(() => {
|
|
879
|
+
const info = {
|
|
880
|
+
currentSpeakAudio: null,
|
|
881
|
+
audioContextState: null,
|
|
882
|
+
};
|
|
883
|
+
try {
|
|
884
|
+
if (window.currentSpeakAudio) {
|
|
885
|
+
info.currentSpeakAudio = {
|
|
886
|
+
paused: window.currentSpeakAudio.paused,
|
|
887
|
+
ended: window.currentSpeakAudio.ended,
|
|
888
|
+
currentTime: window.currentSpeakAudio.currentTime,
|
|
889
|
+
duration: window.currentSpeakAudio.duration,
|
|
890
|
+
readyState: window.currentSpeakAudio.readyState,
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
if (window.globalAudioContext) {
|
|
894
|
+
info.audioContextState = window.globalAudioContext.state;
|
|
895
|
+
}
|
|
896
|
+
} catch (e) { /* ignore */ }
|
|
897
|
+
return info;
|
|
898
|
+
}).catch(() => null);
|
|
899
|
+
|
|
900
|
+
console.warn(`\t⚠️ speechend timeout (recovered) — speak audio state:`, JSON.stringify(speakState));
|
|
901
|
+
} else {
|
|
902
|
+
console.warn(`\t⚠️ speechend timeout — continuing (audio likely finished)`);
|
|
903
|
+
}
|
|
904
|
+
// Don't throw — treat speechend timeout as recoverable
|
|
873
905
|
}
|
|
874
906
|
}
|
|
875
907
|
|
|
@@ -1234,10 +1266,14 @@ export class VoiceAgentTester {
|
|
|
1234
1266
|
}
|
|
1235
1267
|
|
|
1236
1268
|
// Execute all configured steps
|
|
1269
|
+
const appStepCount = appSteps.length;
|
|
1237
1270
|
for (let i = 0; i < steps.length; i++) {
|
|
1238
1271
|
const step = steps[i];
|
|
1239
1272
|
console.log(`Executing step ${i + 1}: ${JSON.stringify(step)}`);
|
|
1240
|
-
|
|
1273
|
+
// For scenario steps (after app steps), pass the 1-based scenario step index
|
|
1274
|
+
// so metrics can be aligned across providers with different app setup steps
|
|
1275
|
+
const scenarioStepIndex = i >= appStepCount ? (i - appStepCount + 1) : null;
|
|
1276
|
+
await this.executeStep(step, i, appName, scenarioName, repetition, scenarioStepIndex);
|
|
1241
1277
|
}
|
|
1242
1278
|
|
|
1243
1279
|
// Keep the browser open for a bit after all steps
|
|
@@ -44,8 +44,9 @@ describe('Integration Tests', () => {
|
|
|
44
44
|
this.text = text;
|
|
45
45
|
};
|
|
46
46
|
|
|
47
|
-
// Mock __speak
|
|
48
|
-
//
|
|
47
|
+
// Mock __speak and __waitForMediaStream functions
|
|
48
|
+
// These override the injected audio hooks since inline scripts run after evaluateOnNewDocument
|
|
49
|
+
window.__waitForMediaStream = () => Promise.resolve();
|
|
49
50
|
window.__speak = (text) => {
|
|
50
51
|
document.getElementById('speech-output').textContent = text;
|
|
51
52
|
// Signal speech end after a small delay to allow waitForAudioEvent to be set up
|
|
@@ -75,7 +76,7 @@ describe('Integration Tests', () => {
|
|
|
75
76
|
|
|
76
77
|
// The scenario should complete without throwing errors
|
|
77
78
|
expect(true).toBe(true);
|
|
78
|
-
});
|
|
79
|
+
}, 15000);
|
|
79
80
|
|
|
80
81
|
test('should handle scenario with wait step', async () => {
|
|
81
82
|
const testPageContent = `
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { describe, test, expect, beforeEach, afterEach } from '@jest/globals';
|
|
2
2
|
import { VoiceAgentTester } from '../src/voice-agent-tester.js';
|
|
3
|
+
import { ReportGenerator } from '../src/report.js';
|
|
3
4
|
import fs from 'fs';
|
|
4
5
|
import path from 'path';
|
|
5
6
|
|
|
@@ -187,4 +188,136 @@ describe('VoiceAgentTester', () => {
|
|
|
187
188
|
await expect(tester.executeStep({ action: 'speak' }, 0, 'scenario'))
|
|
188
189
|
.rejects.toThrow('No text or file specified for speak action');
|
|
189
190
|
});
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
describe('ReportGenerator - Comparison Step Alignment', () => {
|
|
194
|
+
test('should align metrics by scenario step index across providers with different app steps', () => {
|
|
195
|
+
// Simulate: Vapi has 5 app steps, Telnyx has 3 app steps
|
|
196
|
+
// Both share the same 7 scenario steps with metrics on scenario steps 4 and 7
|
|
197
|
+
const providerReport = new ReportGenerator('/tmp/test_provider.csv');
|
|
198
|
+
const telnyxReport = new ReportGenerator('/tmp/test_telnyx.csv');
|
|
199
|
+
|
|
200
|
+
// Provider (Vapi): 5 app steps + 7 scenario steps = 12 total
|
|
201
|
+
// Metric steps at absolute indices 8 (scenario step 4) and 11 (scenario step 7)
|
|
202
|
+
providerReport.beginRun('vapi', 'appointment', 0);
|
|
203
|
+
providerReport.recordStepMetric('vapi', 'appointment', 0, 8, 'wait_for_voice', 'elapsed_time', 2849, 4);
|
|
204
|
+
providerReport.recordStepMetric('vapi', 'appointment', 0, 11, 'wait_for_voice', 'elapsed_time', 3307, 7);
|
|
205
|
+
providerReport.endRun('vapi', 'appointment', 0);
|
|
206
|
+
|
|
207
|
+
// Telnyx: 3 app steps + 7 scenario steps = 10 total
|
|
208
|
+
// Metric steps at absolute indices 6 (scenario step 4) and 9 (scenario step 7)
|
|
209
|
+
telnyxReport.beginRun('telnyx', 'appointment', 0);
|
|
210
|
+
telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 6, 'wait_for_voice', 'elapsed_time', 1552, 4);
|
|
211
|
+
telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 9, 'wait_for_voice', 'elapsed_time', 704, 7);
|
|
212
|
+
telnyxReport.endRun('telnyx', 'appointment', 0);
|
|
213
|
+
|
|
214
|
+
// Get scenario-aligned metrics
|
|
215
|
+
const providerMetrics = providerReport.getAggregatedMetricsByScenarioStep();
|
|
216
|
+
const telnyxMetrics = telnyxReport.getAggregatedMetricsByScenarioStep();
|
|
217
|
+
|
|
218
|
+
// Both should have metrics at scenario steps 4 and 7
|
|
219
|
+
expect(providerMetrics.has(4)).toBe(true);
|
|
220
|
+
expect(providerMetrics.has(7)).toBe(true);
|
|
221
|
+
expect(telnyxMetrics.has(4)).toBe(true);
|
|
222
|
+
expect(telnyxMetrics.has(7)).toBe(true);
|
|
223
|
+
|
|
224
|
+
// Verify values are correct
|
|
225
|
+
expect(providerMetrics.get(4).get('elapsed_time').avg).toBe(2849);
|
|
226
|
+
expect(providerMetrics.get(7).get('elapsed_time').avg).toBe(3307);
|
|
227
|
+
expect(telnyxMetrics.get(4).get('elapsed_time').avg).toBe(1552);
|
|
228
|
+
expect(telnyxMetrics.get(7).get('elapsed_time').avg).toBe(704);
|
|
229
|
+
|
|
230
|
+
// The comparison should now have 2 comparable steps (not 4 separate unmatched ones)
|
|
231
|
+
const allScenarioSteps = new Set([
|
|
232
|
+
...providerMetrics.keys(),
|
|
233
|
+
...telnyxMetrics.keys()
|
|
234
|
+
]);
|
|
235
|
+
expect(allScenarioSteps.size).toBe(2);
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
test('should generate comparison summary with single headline number', () => {
|
|
239
|
+
const providerReport = new ReportGenerator('/tmp/test_provider.csv');
|
|
240
|
+
const telnyxReport = new ReportGenerator('/tmp/test_telnyx.csv');
|
|
241
|
+
|
|
242
|
+
providerReport.beginRun('vapi', 'appointment', 0);
|
|
243
|
+
providerReport.recordStepMetric('vapi', 'appointment', 0, 8, 'wait_for_voice', 'elapsed_time', 2849, 4);
|
|
244
|
+
providerReport.recordStepMetric('vapi', 'appointment', 0, 11, 'wait_for_voice', 'elapsed_time', 3307, 7);
|
|
245
|
+
providerReport.endRun('vapi', 'appointment', 0);
|
|
246
|
+
|
|
247
|
+
telnyxReport.beginRun('telnyx', 'appointment', 0);
|
|
248
|
+
telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 6, 'wait_for_voice', 'elapsed_time', 1552, 4);
|
|
249
|
+
telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 9, 'wait_for_voice', 'elapsed_time', 704, 7);
|
|
250
|
+
telnyxReport.endRun('telnyx', 'appointment', 0);
|
|
251
|
+
|
|
252
|
+
// Capture console output
|
|
253
|
+
const logs = [];
|
|
254
|
+
const originalLog = console.log;
|
|
255
|
+
console.log = (msg) => logs.push(msg);
|
|
256
|
+
|
|
257
|
+
ReportGenerator.generateComparisonSummary(providerReport, telnyxReport, 'vapi');
|
|
258
|
+
|
|
259
|
+
console.log = originalLog;
|
|
260
|
+
|
|
261
|
+
const output = logs.join('\n');
|
|
262
|
+
|
|
263
|
+
// Should show averaged headline numbers: vapi avg = (2849+3307)/2 = 3078, telnyx avg = (1552+704)/2 = 1128
|
|
264
|
+
expect(output).toContain('3078ms');
|
|
265
|
+
expect(output).toContain('1128ms');
|
|
266
|
+
// Should show "2 matched responses"
|
|
267
|
+
expect(output).toContain('2 matched responses');
|
|
268
|
+
// Should declare Telnyx the winner
|
|
269
|
+
expect(output).toContain('🏆 Telnyx');
|
|
270
|
+
// Should NOT contain per-response breakdown without debug
|
|
271
|
+
expect(output).not.toContain('Per-response breakdown');
|
|
272
|
+
expect(output).not.toContain('#1');
|
|
273
|
+
expect(output).not.toContain('#2');
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
test('should show per-response breakdown with debug flag', () => {
|
|
277
|
+
const providerReport = new ReportGenerator('/tmp/test_provider.csv');
|
|
278
|
+
const telnyxReport = new ReportGenerator('/tmp/test_telnyx.csv');
|
|
279
|
+
|
|
280
|
+
providerReport.beginRun('vapi', 'appointment', 0);
|
|
281
|
+
providerReport.recordStepMetric('vapi', 'appointment', 0, 8, 'wait_for_voice', 'elapsed_time', 2849, 4);
|
|
282
|
+
providerReport.recordStepMetric('vapi', 'appointment', 0, 11, 'wait_for_voice', 'elapsed_time', 3307, 7);
|
|
283
|
+
providerReport.endRun('vapi', 'appointment', 0);
|
|
284
|
+
|
|
285
|
+
telnyxReport.beginRun('telnyx', 'appointment', 0);
|
|
286
|
+
telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 6, 'wait_for_voice', 'elapsed_time', 1552, 4);
|
|
287
|
+
telnyxReport.recordStepMetric('telnyx', 'appointment', 0, 9, 'wait_for_voice', 'elapsed_time', 704, 7);
|
|
288
|
+
telnyxReport.endRun('telnyx', 'appointment', 0);
|
|
289
|
+
|
|
290
|
+
const logs = [];
|
|
291
|
+
const originalLog = console.log;
|
|
292
|
+
console.log = (msg) => logs.push(msg);
|
|
293
|
+
|
|
294
|
+
ReportGenerator.generateComparisonSummary(providerReport, telnyxReport, 'vapi', { debug: true });
|
|
295
|
+
|
|
296
|
+
console.log = originalLog;
|
|
297
|
+
|
|
298
|
+
const output = logs.join('\n');
|
|
299
|
+
|
|
300
|
+
// Should contain per-response breakdown
|
|
301
|
+
expect(output).toContain('Per-response breakdown');
|
|
302
|
+
expect(output).toContain('#1');
|
|
303
|
+
expect(output).toContain('#2');
|
|
304
|
+
expect(output).toContain('2849ms');
|
|
305
|
+
expect(output).toContain('1552ms');
|
|
306
|
+
expect(output).toContain('3307ms');
|
|
307
|
+
expect(output).toContain('704ms');
|
|
308
|
+
// Should ALSO contain the headline average
|
|
309
|
+
expect(output).toContain('3078ms');
|
|
310
|
+
expect(output).toContain('1128ms');
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
test('getAggregatedMetricsByScenarioStep returns empty map when no scenario steps', () => {
|
|
314
|
+
const report = new ReportGenerator('/tmp/test.csv');
|
|
315
|
+
report.beginRun('test', 'scenario', 0);
|
|
316
|
+
// Record without scenarioStepIndex (app step)
|
|
317
|
+
report.recordStepMetric('test', 'scenario', 0, 0, 'click', 'elapsed_time', 100);
|
|
318
|
+
report.endRun('test', 'scenario', 0);
|
|
319
|
+
|
|
320
|
+
const metrics = report.getAggregatedMetricsByScenarioStep();
|
|
321
|
+
expect(metrics.size).toBe(0);
|
|
322
|
+
});
|
|
190
323
|
});
|