@telnyx/voice-agent-tester 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/javascript/audio_input_hooks.js +89 -19
- package/package.json +1 -1
- package/src/index.js +48 -16
- package/src/voice-agent-tester.js +35 -3
- package/tests/integration.test.js +4 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.4.4](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.3...v0.4.4) (2026-03-11)
|
|
4
|
+
|
|
5
|
+
### Features
|
|
6
|
+
|
|
7
|
+
* fix speechend race condition, add --retries flag ([#21](https://github.com/team-telnyx/voice-agent-tester/issues/21)) ([09e3b65](https://github.com/team-telnyx/voice-agent-tester/commit/09e3b6578face6c407d058991ab5495d9463e544))
|
|
8
|
+
|
|
9
|
+
### Chores
|
|
10
|
+
|
|
11
|
+
* release v0.4.3 ([#20](https://github.com/team-telnyx/voice-agent-tester/issues/20)) ([bdeb87b](https://github.com/team-telnyx/voice-agent-tester/commit/bdeb87bed502919a9fed9950e69242b1c2aefcfc))
|
|
12
|
+
|
|
3
13
|
## [0.4.3](https://github.com/team-telnyx/voice-agent-tester/compare/v0.4.2...v0.4.3) (2026-03-11)
|
|
4
14
|
|
|
5
15
|
### Features
|
|
@@ -62,20 +62,24 @@ function createControlledMediaStream() {
|
|
|
62
62
|
}
|
|
63
63
|
|
|
64
64
|
// Replace getUserMedia to return our controlled stream
|
|
65
|
-
|
|
66
|
-
navigator.mediaDevices.getUserMedia
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
65
|
+
if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
|
|
66
|
+
const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
|
|
67
|
+
navigator.mediaDevices.getUserMedia = function (constraints) {
|
|
68
|
+
console.log("🎤 Intercepted getUserMedia call with constraints:", constraints);
|
|
69
|
+
|
|
70
|
+
// If audio is requested, return our controlled stream
|
|
71
|
+
if (constraints && constraints.audio) {
|
|
72
|
+
console.log("🎤 Returning controlled MediaStream instead of real microphone");
|
|
73
|
+
const controlledStream = createControlledMediaStream();
|
|
74
|
+
return Promise.resolve(controlledStream);
|
|
75
|
+
}
|
|
75
76
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
};
|
|
77
|
+
// For video-only or other requests, use original implementation
|
|
78
|
+
return originalGetUserMedia(constraints);
|
|
79
|
+
};
|
|
80
|
+
} else {
|
|
81
|
+
console.warn("🎤 navigator.mediaDevices.getUserMedia not available, skipping microphone intercept");
|
|
82
|
+
}
|
|
79
83
|
|
|
80
84
|
// Expose __speak method to be called from voice-agent-tester.js
|
|
81
85
|
window.__speak = function (textOrUrl) {
|
|
@@ -152,6 +156,24 @@ function playAudioInMediaStream(url) {
|
|
|
152
156
|
const audio = new Audio(url);
|
|
153
157
|
audio.crossOrigin = 'anonymous'; // Enable CORS if needed
|
|
154
158
|
|
|
159
|
+
// Keep a strong reference so the element is not garbage collected
|
|
160
|
+
currentSpeakAudio = audio;
|
|
161
|
+
|
|
162
|
+
let speechEndFired = false;
|
|
163
|
+
let safetyTimeoutId = null;
|
|
164
|
+
|
|
165
|
+
function fireSpeechEnd(reason) {
|
|
166
|
+
if (speechEndFired) return;
|
|
167
|
+
speechEndFired = true;
|
|
168
|
+
if (safetyTimeoutId) clearTimeout(safetyTimeoutId);
|
|
169
|
+
console.log(`🎤 Audio playback ended (${reason})`);
|
|
170
|
+
if (typeof __publishEvent === 'function') {
|
|
171
|
+
__publishEvent('speechend', { url: url, reason: reason });
|
|
172
|
+
}
|
|
173
|
+
// Release reference
|
|
174
|
+
if (currentSpeakAudio === audio) currentSpeakAudio = null;
|
|
175
|
+
}
|
|
176
|
+
|
|
155
177
|
// Set up audio routing through all MediaStreams
|
|
156
178
|
audio.addEventListener('canplaythrough', function () {
|
|
157
179
|
console.log(`🎤 Audio ready to play, routing to ${mediaStreams.length} MediaStreams`);
|
|
@@ -181,7 +203,33 @@ function playAudioInMediaStream(url) {
|
|
|
181
203
|
}
|
|
182
204
|
|
|
183
205
|
// Play the audio
|
|
184
|
-
audio.play()
|
|
206
|
+
audio.play().then(() => {
|
|
207
|
+
// Set up safety timeout based on audio duration
|
|
208
|
+
// audio.duration should be available after canplaythrough
|
|
209
|
+
const duration = audio.duration;
|
|
210
|
+
if (duration && isFinite(duration)) {
|
|
211
|
+
const safetyMs = Math.max((duration * 1000) + 5000, 15000);
|
|
212
|
+
console.log(`🎤 Audio duration: ${duration.toFixed(1)}s, safety timeout: ${(safetyMs / 1000).toFixed(1)}s`);
|
|
213
|
+
safetyTimeoutId = setTimeout(() => {
|
|
214
|
+
if (!speechEndFired) {
|
|
215
|
+
console.warn(`🎤 Safety timeout: speechend not fired after ${(safetyMs / 1000).toFixed(1)}s (audio paused=${audio.paused}, ended=${audio.ended}, currentTime=${audio.currentTime.toFixed(1)})`);
|
|
216
|
+
fireSpeechEnd('safety_timeout');
|
|
217
|
+
}
|
|
218
|
+
}, safetyMs);
|
|
219
|
+
} else {
|
|
220
|
+
// Unknown duration — use 20s fallback
|
|
221
|
+
console.warn('🎤 Audio duration unknown, using 20s safety timeout');
|
|
222
|
+
safetyTimeoutId = setTimeout(() => {
|
|
223
|
+
if (!speechEndFired) {
|
|
224
|
+
console.warn('🎤 Safety timeout: speechend not fired after 20s');
|
|
225
|
+
fireSpeechEnd('safety_timeout');
|
|
226
|
+
}
|
|
227
|
+
}, 20000);
|
|
228
|
+
}
|
|
229
|
+
}).catch(error => {
|
|
230
|
+
console.error('Error playing audio:', error);
|
|
231
|
+
fireSpeechEnd('play_error');
|
|
232
|
+
});
|
|
185
233
|
} catch (error) {
|
|
186
234
|
console.error('Error setting up audio source:', error);
|
|
187
235
|
if (typeof __publishEvent === 'function') {
|
|
@@ -190,11 +238,19 @@ function playAudioInMediaStream(url) {
|
|
|
190
238
|
}
|
|
191
239
|
});
|
|
192
240
|
|
|
193
|
-
// Handle audio end
|
|
241
|
+
// Handle audio end — primary path
|
|
194
242
|
audio.addEventListener('ended', function () {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
243
|
+
fireSpeechEnd('ended');
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
// Handle pause — if something pauses the audio externally
|
|
247
|
+
audio.addEventListener('pause', function () {
|
|
248
|
+
// Only treat as speechend if the audio is past 90% of its duration (near end)
|
|
249
|
+
// or if it was paused externally (not by us)
|
|
250
|
+
if (audio.ended || (audio.duration && audio.currentTime >= audio.duration * 0.9)) {
|
|
251
|
+
fireSpeechEnd('pause_near_end');
|
|
252
|
+
} else {
|
|
253
|
+
console.warn(`🎤 Audio paused at ${audio.currentTime.toFixed(1)}s / ${(audio.duration || 0).toFixed(1)}s`);
|
|
198
254
|
}
|
|
199
255
|
});
|
|
200
256
|
|
|
@@ -204,17 +260,31 @@ function playAudioInMediaStream(url) {
|
|
|
204
260
|
if (typeof __publishEvent === 'function') {
|
|
205
261
|
__publishEvent('speecherror', { error: 'Audio playback failed', url: url });
|
|
206
262
|
}
|
|
263
|
+
fireSpeechEnd('error');
|
|
207
264
|
});
|
|
208
265
|
|
|
209
266
|
// Start loading the audio
|
|
210
267
|
audio.load();
|
|
211
268
|
}
|
|
212
269
|
|
|
270
|
+
// Keep a reference to the current speak Audio element so it doesn't get GC'd
|
|
271
|
+
let currentSpeakAudio = null;
|
|
272
|
+
|
|
213
273
|
// Helper function to stop current audio and reset to silence
|
|
214
274
|
function stopCurrentAudio() {
|
|
275
|
+
// Stop the speak audio element if playing
|
|
276
|
+
if (currentSpeakAudio) {
|
|
277
|
+
try {
|
|
278
|
+
currentSpeakAudio.pause();
|
|
279
|
+
currentSpeakAudio.currentTime = 0;
|
|
280
|
+
} catch (e) {
|
|
281
|
+
console.warn('Error stopping speak audio:', e);
|
|
282
|
+
}
|
|
283
|
+
currentSpeakAudio = null;
|
|
284
|
+
}
|
|
285
|
+
|
|
215
286
|
currentPlaybackNodes.forEach((sourceNode, index) => {
|
|
216
287
|
try {
|
|
217
|
-
sourceNode.stop();
|
|
218
288
|
sourceNode.disconnect();
|
|
219
289
|
console.log(`🎤 Stopped audio source ${index}`);
|
|
220
290
|
} catch (e) {
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -327,6 +327,11 @@ const argv = yargs(hideBin(process.argv))
|
|
|
327
327
|
description: 'Volume level for audio input (0.0 to 1.0)',
|
|
328
328
|
default: 1.0
|
|
329
329
|
})
|
|
330
|
+
.option('retries', {
|
|
331
|
+
type: 'number',
|
|
332
|
+
description: 'Number of retries for failed test runs (0 = no retries)',
|
|
333
|
+
default: 0
|
|
334
|
+
})
|
|
330
335
|
.help()
|
|
331
336
|
.argv;
|
|
332
337
|
|
|
@@ -409,22 +414,49 @@ async function runBenchmark({ applications, scenarios, repeat, concurrency, argv
|
|
|
409
414
|
audioVolume: argv.audioVolume
|
|
410
415
|
});
|
|
411
416
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
417
|
+
const maxAttempts = (argv.retries || 0) + 1;
|
|
418
|
+
|
|
419
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
420
|
+
// Create a fresh tester for each attempt (after first, original tester is closed)
|
|
421
|
+
const currentTester = attempt === 1 ? tester : new VoiceAgentTester({
|
|
422
|
+
verbose: argv.verbose,
|
|
423
|
+
headless: argv.headless,
|
|
424
|
+
assetsServerUrl: argv.assetsServer,
|
|
425
|
+
reportGenerator: reportGenerator,
|
|
426
|
+
record: argv.record,
|
|
427
|
+
debug: argv.debug,
|
|
428
|
+
audioUrl: argv.audioUrl,
|
|
429
|
+
audioVolume: argv.audioVolume
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
try {
|
|
433
|
+
await currentTester.runScenario(targetUrl, app.steps, scenario.steps, app.name, scenario.name, repetition);
|
|
434
|
+
console.log(`✅ Completed successfully (Run ${runNumber}/${totalRuns})`);
|
|
435
|
+
return { success: true };
|
|
436
|
+
} catch (error) {
|
|
437
|
+
const shortMessage = error.message.split('\n')[0];
|
|
438
|
+
|
|
439
|
+
if (attempt < maxAttempts) {
|
|
440
|
+
console.warn(`\n⚠️ Attempt ${attempt}/${maxAttempts} failed: ${shortMessage}`);
|
|
441
|
+
console.warn(`🔄 Retrying in 3s... (${maxAttempts - attempt} retries left)\n`);
|
|
442
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
443
|
+
continue;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Final attempt failed
|
|
447
|
+
const errorInfo = {
|
|
448
|
+
app: app.name,
|
|
449
|
+
scenario: scenario.name,
|
|
450
|
+
repetition,
|
|
451
|
+
error: shortMessage
|
|
452
|
+
};
|
|
453
|
+
// Print full diagnostics here (only place they appear)
|
|
454
|
+
console.error(`❌ Error (Run ${runNumber}/${totalRuns}):\n${error.message}`);
|
|
455
|
+
if (maxAttempts > 1) {
|
|
456
|
+
console.error(` Failed after ${maxAttempts} attempts`);
|
|
457
|
+
}
|
|
458
|
+
return { success: false, error: errorInfo };
|
|
459
|
+
}
|
|
428
460
|
}
|
|
429
461
|
}
|
|
430
462
|
|
|
@@ -238,6 +238,7 @@ export class VoiceAgentTester {
|
|
|
238
238
|
} else {
|
|
239
239
|
errorMessage += '\n (Could not collect browser diagnostics)';
|
|
240
240
|
}
|
|
241
|
+
|
|
241
242
|
}
|
|
242
243
|
|
|
243
244
|
reject(new Error(errorMessage));
|
|
@@ -363,6 +364,7 @@ export class VoiceAgentTester {
|
|
|
363
364
|
console.error(error.stack);
|
|
364
365
|
}
|
|
365
366
|
});
|
|
367
|
+
|
|
366
368
|
}
|
|
367
369
|
|
|
368
370
|
async close() {
|
|
@@ -866,10 +868,40 @@ export class VoiceAgentTester {
|
|
|
866
868
|
|
|
867
869
|
// Wait for speech to complete by listening for speechend event
|
|
868
870
|
try {
|
|
869
|
-
|
|
871
|
+
// Use a shorter timeout for speechend (15s) since we have safety fallback in browser
|
|
872
|
+
await this.waitForAudioEvent('speechend', 15000);
|
|
870
873
|
} catch (error) {
|
|
871
|
-
|
|
872
|
-
|
|
874
|
+
// speechend timeout is recoverable — the audio likely finished but the event was lost
|
|
875
|
+
// (e.g., agent started responding and disrupted the audio element)
|
|
876
|
+
if (this.debug) {
|
|
877
|
+
// Check the state of the speak audio in the browser
|
|
878
|
+
const speakState = await this.page.evaluate(() => {
|
|
879
|
+
const info = {
|
|
880
|
+
currentSpeakAudio: null,
|
|
881
|
+
audioContextState: null,
|
|
882
|
+
};
|
|
883
|
+
try {
|
|
884
|
+
if (window.currentSpeakAudio) {
|
|
885
|
+
info.currentSpeakAudio = {
|
|
886
|
+
paused: window.currentSpeakAudio.paused,
|
|
887
|
+
ended: window.currentSpeakAudio.ended,
|
|
888
|
+
currentTime: window.currentSpeakAudio.currentTime,
|
|
889
|
+
duration: window.currentSpeakAudio.duration,
|
|
890
|
+
readyState: window.currentSpeakAudio.readyState,
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
if (window.globalAudioContext) {
|
|
894
|
+
info.audioContextState = window.globalAudioContext.state;
|
|
895
|
+
}
|
|
896
|
+
} catch (e) { /* ignore */ }
|
|
897
|
+
return info;
|
|
898
|
+
}).catch(() => null);
|
|
899
|
+
|
|
900
|
+
console.warn(`\t⚠️ speechend timeout (recovered) — speak audio state:`, JSON.stringify(speakState));
|
|
901
|
+
} else {
|
|
902
|
+
console.warn(`\t⚠️ speechend timeout — continuing (audio likely finished)`);
|
|
903
|
+
}
|
|
904
|
+
// Don't throw — treat speechend timeout as recoverable
|
|
873
905
|
}
|
|
874
906
|
}
|
|
875
907
|
|
|
@@ -44,8 +44,9 @@ describe('Integration Tests', () => {
|
|
|
44
44
|
this.text = text;
|
|
45
45
|
};
|
|
46
46
|
|
|
47
|
-
// Mock __speak
|
|
48
|
-
//
|
|
47
|
+
// Mock __speak and __waitForMediaStream functions
|
|
48
|
+
// These override the injected audio hooks since inline scripts run after evaluateOnNewDocument
|
|
49
|
+
window.__waitForMediaStream = () => Promise.resolve();
|
|
49
50
|
window.__speak = (text) => {
|
|
50
51
|
document.getElementById('speech-output').textContent = text;
|
|
51
52
|
// Signal speech end after a small delay to allow waitForAudioEvent to be set up
|
|
@@ -75,7 +76,7 @@ describe('Integration Tests', () => {
|
|
|
75
76
|
|
|
76
77
|
// The scenario should complete without throwing errors
|
|
77
78
|
expect(true).toBe(true);
|
|
78
|
-
});
|
|
79
|
+
}, 15000);
|
|
79
80
|
|
|
80
81
|
test('should handle scenario with wait step', async () => {
|
|
81
82
|
const testPageContent = `
|