@telnyx/voice-agent-tester 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +4 -0
- package/.github/workflows/ci.yml +29 -0
- package/.github/workflows/draft-release.yml +72 -0
- package/.github/workflows/publish-release.yml +39 -0
- package/.release-it.json +31 -0
- package/CHANGELOG.md +30 -0
- package/CLAUDE.md +72 -0
- package/LICENSE +21 -0
- package/README.md +92 -0
- package/assets/appointment_data.mp3 +0 -0
- package/assets/confirmation.mp3 +0 -0
- package/assets/greet_me_angry.mp3 +0 -0
- package/assets/hello_make_an_appointment.mp3 +0 -0
- package/assets/name_lebron_james.mp3 +0 -0
- package/assets/recording-processor.js +86 -0
- package/assets/tell_me_joke_laugh.mp3 +0 -0
- package/assets/tell_me_something_funny.mp3 +0 -0
- package/assets/tell_me_something_sad.mp3 +0 -0
- package/benchmarks/applications/elevenlabs.yaml +10 -0
- package/benchmarks/applications/telnyx.yaml +10 -0
- package/benchmarks/applications/vapi.yaml +10 -0
- package/benchmarks/scenarios/appointment.yaml +16 -0
- package/javascript/audio_input_hooks.js +291 -0
- package/javascript/audio_output_hooks.js +876 -0
- package/package.json +61 -0
- package/src/index.js +560 -0
- package/src/provider-import.js +315 -0
- package/src/report.js +228 -0
- package/src/server.js +31 -0
- package/src/transcription.js +138 -0
- package/src/voice-agent-tester.js +1033 -0
- package/tests/integration.test.js +138 -0
- package/tests/voice-agent-tester.test.js +190 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
console.log("🎤 audio_input_hooks.js loaded and executing");
|
|
2
|
+
console.log("Setting up audio input monitoring...");
|
|
3
|
+
console.log("Audio input hooks ready for voice detection");
|
|
4
|
+
|
|
5
|
+
// Configuration flag to control whether speak audio should be audible
|
|
6
|
+
const MAKE_SPEAK_AUDIO_AUDIBLE = true;
|
|
7
|
+
|
|
8
|
+
// Global variables for MediaStream control
|
|
9
|
+
let globalAudioContext = null;
|
|
10
|
+
let mediaStreams = []; // Array to store multiple MediaStream instances
|
|
11
|
+
let currentPlaybackNodes = []; // Array to store current playback nodes for all streams
|
|
12
|
+
let mediaStreamWaiters = []; // Array of resolve functions waiting for a stream
|
|
13
|
+
|
|
14
|
+
function checkMediaStreamWaiters() {
|
|
15
|
+
if (mediaStreams.length > 0) {
|
|
16
|
+
const waiters = [...mediaStreamWaiters];
|
|
17
|
+
mediaStreamWaiters = [];
|
|
18
|
+
waiters.forEach(waiter => waiter());
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Create AudioContext and setup silence generation (multiple streams)
|
|
23
|
+
function createControlledMediaStream() {
|
|
24
|
+
// Always create a new stream instead of returning existing one
|
|
25
|
+
if (!globalAudioContext) {
|
|
26
|
+
globalAudioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Create a MediaStreamDestination to output our controlled audio
|
|
30
|
+
const destination = globalAudioContext.createMediaStreamDestination();
|
|
31
|
+
|
|
32
|
+
// Create gain node for volume control
|
|
33
|
+
const gainNode = globalAudioContext.createGain();
|
|
34
|
+
gainNode.connect(destination);
|
|
35
|
+
|
|
36
|
+
// Start with silence - create an oscillator with zero gain
|
|
37
|
+
const silenceSourceNode = globalAudioContext.createOscillator();
|
|
38
|
+
const silenceGain = globalAudioContext.createGain();
|
|
39
|
+
silenceGain.gain.setValueAtTime(0, globalAudioContext.currentTime);
|
|
40
|
+
|
|
41
|
+
silenceSourceNode.connect(silenceGain);
|
|
42
|
+
silenceGain.connect(gainNode);
|
|
43
|
+
silenceSourceNode.start();
|
|
44
|
+
|
|
45
|
+
const mediaStream = destination.stream;
|
|
46
|
+
|
|
47
|
+
// Store the stream and its associated nodes
|
|
48
|
+
const streamData = {
|
|
49
|
+
stream: mediaStream,
|
|
50
|
+
gainNode: gainNode,
|
|
51
|
+
destination: destination,
|
|
52
|
+
silenceSourceNode: silenceSourceNode,
|
|
53
|
+
silenceGain: silenceGain,
|
|
54
|
+
currentSourceNode: null,
|
|
55
|
+
id: `stream_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
mediaStreams.push(streamData);
|
|
59
|
+
console.log(`🎤 Created new controlled MediaStream: ${streamData.id} (Total: ${mediaStreams.length})`);
|
|
60
|
+
checkMediaStreamWaiters();
|
|
61
|
+
return mediaStream;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Replace getUserMedia to return our controlled stream
|
|
65
|
+
const originalGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
|
|
66
|
+
navigator.mediaDevices.getUserMedia = function (constraints) {
|
|
67
|
+
console.log("🎤 Intercepted getUserMedia call with constraints:", constraints);
|
|
68
|
+
|
|
69
|
+
// If audio is requested, return our controlled stream
|
|
70
|
+
if (constraints && constraints.audio) {
|
|
71
|
+
console.log("🎤 Returning controlled MediaStream instead of real microphone");
|
|
72
|
+
const controlledStream = createControlledMediaStream();
|
|
73
|
+
return Promise.resolve(controlledStream);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// For video-only or other requests, use original implementation
|
|
77
|
+
return originalGetUserMedia(constraints);
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
// Expose __speak method to be called from voice-agent-tester.js
|
|
81
|
+
window.__speak = function (textOrUrl) {
|
|
82
|
+
console.log(`Speaking: ${textOrUrl}`);
|
|
83
|
+
|
|
84
|
+
// Check if input is a URL
|
|
85
|
+
if (textOrUrl.startsWith('http')) {
|
|
86
|
+
console.log(`Detected URL, playing audio in MediaStream: ${textOrUrl}`);
|
|
87
|
+
playAudioInMediaStream(textOrUrl);
|
|
88
|
+
} else {
|
|
89
|
+
console.log(`Detected text, converting to speech in MediaStream: ${textOrUrl}`);
|
|
90
|
+
speakTextInMediaStream(textOrUrl);
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
// Expose dedicated __speakFromUrl method for file-based speech
|
|
95
|
+
window.__speakFromUrl = function (url) {
|
|
96
|
+
console.log(`Playing audio from URL in MediaStream: ${url}`);
|
|
97
|
+
playAudioInMediaStream(url);
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
function speakTextInMediaStream(text) {
|
|
101
|
+
console.log(`🎤 Converting text to speech in all MediaStreams: ${text}`);
|
|
102
|
+
|
|
103
|
+
if (!globalAudioContext || mediaStreams.length === 0) {
|
|
104
|
+
console.error('AudioContext not initialized or no MediaStreams available');
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Create a temporary audio element for speech synthesis
|
|
109
|
+
const utterance = new SpeechSynthesisUtterance(text);
|
|
110
|
+
|
|
111
|
+
// Notify when speech starts
|
|
112
|
+
utterance.onstart = function () {
|
|
113
|
+
console.log('🎤 Speech synthesis started');
|
|
114
|
+
if (typeof __publishEvent === 'function') {
|
|
115
|
+
__publishEvent('speechstart', { text: text });
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
// Notify when speech ends
|
|
120
|
+
utterance.onend = function () {
|
|
121
|
+
console.log('🎤 Speech synthesis ended');
|
|
122
|
+
if (typeof __publishEvent === 'function') {
|
|
123
|
+
__publishEvent('speechend', { text: text });
|
|
124
|
+
}
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
// Handle speech errors
|
|
128
|
+
utterance.onerror = function (event) {
|
|
129
|
+
console.error('Speech synthesis error:', event.error);
|
|
130
|
+
if (typeof __publishEvent === 'function') {
|
|
131
|
+
__publishEvent('speecherror', { error: event.error, text: text });
|
|
132
|
+
}
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
// Use speech synthesis but we'll need a different approach for MediaStream
|
|
136
|
+
// For now, we'll use the original method but this could be enhanced
|
|
137
|
+
window.speechSynthesis.speak(utterance);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function playAudioInMediaStream(url) {
|
|
141
|
+
console.log(`🎤 Playing audio in all MediaStreams (${mediaStreams.length} streams): ${url}`);
|
|
142
|
+
|
|
143
|
+
if (!globalAudioContext || mediaStreams.length === 0) {
|
|
144
|
+
console.error('AudioContext not initialized or no MediaStreams available');
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Stop current audio sources in all streams
|
|
149
|
+
stopCurrentAudio();
|
|
150
|
+
|
|
151
|
+
// Create new audio element
|
|
152
|
+
const audio = new Audio(url);
|
|
153
|
+
audio.crossOrigin = 'anonymous'; // Enable CORS if needed
|
|
154
|
+
|
|
155
|
+
// Set up audio routing through all MediaStreams
|
|
156
|
+
audio.addEventListener('canplaythrough', function () {
|
|
157
|
+
console.log(`🎤 Audio ready to play, routing to ${mediaStreams.length} MediaStreams`);
|
|
158
|
+
|
|
159
|
+
try {
|
|
160
|
+
// Create media element source
|
|
161
|
+
const sourceNode = globalAudioContext.createMediaElementSource(audio);
|
|
162
|
+
|
|
163
|
+
// Connect to all MediaStream gain nodes
|
|
164
|
+
mediaStreams.forEach((streamData, index) => {
|
|
165
|
+
sourceNode.connect(streamData.gainNode);
|
|
166
|
+
console.log(`🎤 Connected audio to stream ${streamData.id}`);
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
// Store the source node for cleanup
|
|
170
|
+
currentPlaybackNodes.push(sourceNode);
|
|
171
|
+
|
|
172
|
+
// If flag is enabled, also make it audible by connecting to destination
|
|
173
|
+
if (MAKE_SPEAK_AUDIO_AUDIBLE) {
|
|
174
|
+
sourceNode.connect(globalAudioContext.destination);
|
|
175
|
+
console.log('🎤 Audio will be audible through speakers');
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Notify when audio starts
|
|
179
|
+
if (typeof __publishEvent === 'function') {
|
|
180
|
+
__publishEvent('speechstart', { url: url, streamCount: mediaStreams.length });
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Play the audio
|
|
184
|
+
audio.play();
|
|
185
|
+
} catch (error) {
|
|
186
|
+
console.error('Error setting up audio source:', error);
|
|
187
|
+
if (typeof __publishEvent === 'function') {
|
|
188
|
+
__publishEvent('speecherror', { error: error.message, url: url });
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
// Handle audio end
|
|
194
|
+
audio.addEventListener('ended', function () {
|
|
195
|
+
console.log('🎤 Audio playback ended');
|
|
196
|
+
if (typeof __publishEvent === 'function') {
|
|
197
|
+
__publishEvent('speechend', { url: url });
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
// Handle errors
|
|
202
|
+
audio.addEventListener('error', function (event) {
|
|
203
|
+
console.error('Audio playback error:', event);
|
|
204
|
+
if (typeof __publishEvent === 'function') {
|
|
205
|
+
__publishEvent('speecherror', { error: 'Audio playback failed', url: url });
|
|
206
|
+
}
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
// Start loading the audio
|
|
210
|
+
audio.load();
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Helper function to stop current audio and reset to silence
|
|
214
|
+
function stopCurrentAudio() {
|
|
215
|
+
currentPlaybackNodes.forEach((sourceNode, index) => {
|
|
216
|
+
try {
|
|
217
|
+
sourceNode.stop();
|
|
218
|
+
sourceNode.disconnect();
|
|
219
|
+
console.log(`🎤 Stopped audio source ${index}`);
|
|
220
|
+
} catch (e) {
|
|
221
|
+
console.warn(`Error stopping audio source ${index}:`, e);
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
currentPlaybackNodes = [];
|
|
225
|
+
console.log('🎤 Stopped all current audio sources');
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Helper function to get information about all MediaStreams
|
|
229
|
+
window.__getMediaStreamInfo = function () {
|
|
230
|
+
return {
|
|
231
|
+
totalStreams: mediaStreams.length,
|
|
232
|
+
streams: mediaStreams.map(streamData => ({
|
|
233
|
+
id: streamData.id,
|
|
234
|
+
streamId: streamData.stream.id,
|
|
235
|
+
active: streamData.stream.active,
|
|
236
|
+
tracks: streamData.stream.getTracks().length
|
|
237
|
+
}))
|
|
238
|
+
};
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
// Helper function to remove a specific MediaStream
|
|
242
|
+
window.__removeMediaStream = function (streamId) {
|
|
243
|
+
const index = mediaStreams.findIndex(streamData => streamData.id === streamId || streamData.stream.id === streamId);
|
|
244
|
+
if (index !== -1) {
|
|
245
|
+
const streamData = mediaStreams[index];
|
|
246
|
+
try {
|
|
247
|
+
streamData.silenceSourceNode.stop();
|
|
248
|
+
streamData.silenceSourceNode.disconnect();
|
|
249
|
+
streamData.gainNode.disconnect();
|
|
250
|
+
streamData.stream.getTracks().forEach(track => track.stop());
|
|
251
|
+
} catch (e) {
|
|
252
|
+
console.warn('Error cleaning up MediaStream:', e);
|
|
253
|
+
}
|
|
254
|
+
mediaStreams.splice(index, 1);
|
|
255
|
+
console.log(`🎤 Removed MediaStream: ${streamId} (Remaining: ${mediaStreams.length})`);
|
|
256
|
+
return true;
|
|
257
|
+
}
|
|
258
|
+
return false;
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
// Expose helper function for external control
|
|
262
|
+
window.__stopAudio = stopCurrentAudio;
|
|
263
|
+
|
|
264
|
+
window.__waitForMediaStream = function (timeout = 10000) {
|
|
265
|
+
if (mediaStreams.length > 0) {
|
|
266
|
+
return Promise.resolve();
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
console.log(`🎤 Waiting for MediaStream (timeout: ${timeout}ms)...`);
|
|
270
|
+
return new Promise((resolve, reject) => {
|
|
271
|
+
let settled = false;
|
|
272
|
+
|
|
273
|
+
const timeoutId = setTimeout(() => {
|
|
274
|
+
if (settled) return;
|
|
275
|
+
settled = true;
|
|
276
|
+
const index = mediaStreamWaiters.indexOf(onStreamReady);
|
|
277
|
+
if (index > -1) mediaStreamWaiters.splice(index, 1);
|
|
278
|
+
reject(new Error("Timeout waiting for MediaStream initialization. The application has not requested microphone access yet."));
|
|
279
|
+
}, timeout);
|
|
280
|
+
|
|
281
|
+
const onStreamReady = () => {
|
|
282
|
+
if (settled) return;
|
|
283
|
+
settled = true;
|
|
284
|
+
clearTimeout(timeoutId);
|
|
285
|
+
resolve();
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
mediaStreamWaiters.push(onStreamReady);
|
|
289
|
+
});
|
|
290
|
+
};
|
|
291
|
+
|