@telnyx/voice-agent-tester 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +4 -0
- package/.github/workflows/ci.yml +29 -0
- package/.github/workflows/draft-release.yml +72 -0
- package/.github/workflows/publish-release.yml +39 -0
- package/.release-it.json +31 -0
- package/CHANGELOG.md +30 -0
- package/CLAUDE.md +72 -0
- package/LICENSE +21 -0
- package/README.md +92 -0
- package/assets/appointment_data.mp3 +0 -0
- package/assets/confirmation.mp3 +0 -0
- package/assets/greet_me_angry.mp3 +0 -0
- package/assets/hello_make_an_appointment.mp3 +0 -0
- package/assets/name_lebron_james.mp3 +0 -0
- package/assets/recording-processor.js +86 -0
- package/assets/tell_me_joke_laugh.mp3 +0 -0
- package/assets/tell_me_something_funny.mp3 +0 -0
- package/assets/tell_me_something_sad.mp3 +0 -0
- package/benchmarks/applications/elevenlabs.yaml +10 -0
- package/benchmarks/applications/telnyx.yaml +10 -0
- package/benchmarks/applications/vapi.yaml +10 -0
- package/benchmarks/scenarios/appointment.yaml +16 -0
- package/javascript/audio_input_hooks.js +291 -0
- package/javascript/audio_output_hooks.js +876 -0
- package/package.json +61 -0
- package/src/index.js +560 -0
- package/src/provider-import.js +315 -0
- package/src/report.js +228 -0
- package/src/server.js +31 -0
- package/src/transcription.js +138 -0
- package/src/voice-agent-tester.js +1033 -0
- package/tests/integration.test.js +138 -0
- package/tests/voice-agent-tester.test.js +190 -0
|
@@ -0,0 +1,876 @@
|
|
|
1
|
+
let i = 0;
|
|
2
|
+
|
|
3
|
+
// Store original Audio constructor and createElement
|
|
4
|
+
const OriginalAudio = window.Audio;
|
|
5
|
+
const originalCreateElement = document.createElement;
|
|
6
|
+
|
|
7
|
+
// Set to track programmatically created Audio instances
|
|
8
|
+
const programmaticAudioInstances = new Set();
|
|
9
|
+
|
|
10
|
+
// Track RTCPeerConnections for RTP stats
|
|
11
|
+
const rtcPeerConnections = new Set();
|
|
12
|
+
const OriginalRTCPeerConnection = window.RTCPeerConnection;
|
|
13
|
+
|
|
14
|
+
// Intercept RTCPeerConnection creation
|
|
15
|
+
window.RTCPeerConnection = function(...args) {
|
|
16
|
+
const pc = new OriginalRTCPeerConnection(...args);
|
|
17
|
+
rtcPeerConnections.add(pc);
|
|
18
|
+
console.log(`🔗 RTCPeerConnection created (total: ${rtcPeerConnections.size})`);
|
|
19
|
+
|
|
20
|
+
// Remove from set when connection is closed
|
|
21
|
+
const originalClose = pc.close.bind(pc);
|
|
22
|
+
pc.close = function() {
|
|
23
|
+
rtcPeerConnections.delete(pc);
|
|
24
|
+
console.log(`🔗 RTCPeerConnection closed (remaining: ${rtcPeerConnections.size})`);
|
|
25
|
+
return originalClose();
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
// Also track connection state changes
|
|
29
|
+
pc.addEventListener('connectionstatechange', () => {
|
|
30
|
+
if (pc.connectionState === 'closed' || pc.connectionState === 'failed') {
|
|
31
|
+
rtcPeerConnections.delete(pc);
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
return pc;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
// Preserve prototype chain
|
|
39
|
+
window.RTCPeerConnection.prototype = OriginalRTCPeerConnection.prototype;
|
|
40
|
+
Object.setPrototypeOf(window.RTCPeerConnection, OriginalRTCPeerConnection);
|
|
41
|
+
|
|
42
|
+
// Function to get RTP stats from all active peer connections
|
|
43
|
+
window.__getRtpStats = async function() {
|
|
44
|
+
const allStats = [];
|
|
45
|
+
|
|
46
|
+
for (const pc of rtcPeerConnections) {
|
|
47
|
+
try {
|
|
48
|
+
const stats = await pc.getStats();
|
|
49
|
+
const pcStats = {
|
|
50
|
+
connectionState: pc.connectionState,
|
|
51
|
+
iceConnectionState: pc.iceConnectionState,
|
|
52
|
+
signalingState: pc.signalingState,
|
|
53
|
+
inboundAudio: [],
|
|
54
|
+
outboundAudio: [],
|
|
55
|
+
candidatePairs: []
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
stats.forEach(report => {
|
|
59
|
+
if (report.type === 'inbound-rtp' && report.kind === 'audio') {
|
|
60
|
+
pcStats.inboundAudio.push({
|
|
61
|
+
packetsReceived: report.packetsReceived,
|
|
62
|
+
packetsLost: report.packetsLost,
|
|
63
|
+
bytesReceived: report.bytesReceived,
|
|
64
|
+
jitter: report.jitter,
|
|
65
|
+
audioLevel: report.audioLevel,
|
|
66
|
+
totalAudioEnergy: report.totalAudioEnergy,
|
|
67
|
+
totalSamplesReceived: report.totalSamplesReceived,
|
|
68
|
+
concealedSamples: report.concealedSamples,
|
|
69
|
+
silentConcealedSamples: report.silentConcealedSamples,
|
|
70
|
+
codecId: report.codecId
|
|
71
|
+
});
|
|
72
|
+
} else if (report.type === 'outbound-rtp' && report.kind === 'audio') {
|
|
73
|
+
pcStats.outboundAudio.push({
|
|
74
|
+
packetsSent: report.packetsSent,
|
|
75
|
+
bytesSent: report.bytesSent,
|
|
76
|
+
targetBitrate: report.targetBitrate,
|
|
77
|
+
codecId: report.codecId
|
|
78
|
+
});
|
|
79
|
+
} else if (report.type === 'candidate-pair' && report.state === 'succeeded') {
|
|
80
|
+
pcStats.candidatePairs.push({
|
|
81
|
+
state: report.state,
|
|
82
|
+
localCandidateId: report.localCandidateId,
|
|
83
|
+
remoteCandidateId: report.remoteCandidateId,
|
|
84
|
+
currentRoundTripTime: report.currentRoundTripTime,
|
|
85
|
+
availableOutgoingBitrate: report.availableOutgoingBitrate
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
allStats.push(pcStats);
|
|
91
|
+
} catch (error) {
|
|
92
|
+
allStats.push({
|
|
93
|
+
error: error.message,
|
|
94
|
+
connectionState: pc.connectionState
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
timestamp: Date.now(),
|
|
101
|
+
connectionCount: rtcPeerConnections.size,
|
|
102
|
+
connections: allStats
|
|
103
|
+
};
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
class AudioElementMonitor {
|
|
107
|
+
constructor() {
|
|
108
|
+
this.monitoredElements = new Map();
|
|
109
|
+
this.audioContext = null;
|
|
110
|
+
this.bodyMutationObserver = null; // Observer for new elements added to body
|
|
111
|
+
this.audioElementObservers = new Map(); // Map of individual observers for each audio element
|
|
112
|
+
this.init();
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
init() {
|
|
116
|
+
this.setupAudioContext();
|
|
117
|
+
this.setupBodyMutationObserver();
|
|
118
|
+
this.scanExistingAudioElements();
|
|
119
|
+
this.setupProgrammaticAudioInterception();
|
|
120
|
+
this.setupShadowDomInterception();
|
|
121
|
+
console.log("AudioElementMonitor initialized");
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
setupShadowDomInterception() {
|
|
125
|
+
const monitor = this;
|
|
126
|
+
const originalAttachShadow = Element.prototype.attachShadow;
|
|
127
|
+
Element.prototype.attachShadow = function(init) {
|
|
128
|
+
const shadowRoot = originalAttachShadow.call(this, init);
|
|
129
|
+
monitor.observeNode(shadowRoot);
|
|
130
|
+
// Also scan for existing elements in the new shadow root
|
|
131
|
+
monitor.checkForNewAudioElements(shadowRoot);
|
|
132
|
+
return shadowRoot;
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
setupAudioContext() {
|
|
137
|
+
try {
|
|
138
|
+
this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
|
139
|
+
console.log("AudioContext created successfully");
|
|
140
|
+
} catch (error) {
|
|
141
|
+
console.error("Failed to create AudioContext:", error);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
setupBodyMutationObserver() {
|
|
146
|
+
if (this.bodyMutationObserver) {
|
|
147
|
+
// Already set up
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (!document.body) {
|
|
152
|
+
console.log("document.body not available yet, waiting for DOMContentLoaded");
|
|
153
|
+
window.addEventListener('DOMContentLoaded', () => {
|
|
154
|
+
this.setupBodyMutationObserver();
|
|
155
|
+
this.scanExistingAudioElements();
|
|
156
|
+
});
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
this.bodyMutationObserver = new MutationObserver((mutations) => {
|
|
161
|
+
mutations.forEach((mutation) => {
|
|
162
|
+
if (mutation.type === 'childList') {
|
|
163
|
+
mutation.addedNodes.forEach((node) => {
|
|
164
|
+
if (node.nodeType === Node.ELEMENT_NODE) {
|
|
165
|
+
this.checkForNewAudioElements(node);
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
this.observeNode(document.body);
|
|
173
|
+
console.log("Body MutationObserver setup complete");
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
observeNode(node) {
|
|
177
|
+
const observer = new MutationObserver((mutations) => {
|
|
178
|
+
mutations.forEach((mutation) => {
|
|
179
|
+
if (mutation.type === 'childList') {
|
|
180
|
+
mutation.addedNodes.forEach((addedNode) => {
|
|
181
|
+
if (addedNode.nodeType === Node.ELEMENT_NODE) {
|
|
182
|
+
this.checkForNewAudioElements(addedNode);
|
|
183
|
+
}
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
observer.observe(node, {
|
|
190
|
+
childList: true,
|
|
191
|
+
subtree: true
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
setupProgrammaticAudioInterception() {
|
|
196
|
+
const monitor = this;
|
|
197
|
+
|
|
198
|
+
// Override Audio constructor
|
|
199
|
+
window.Audio = function (src) {
|
|
200
|
+
const audioElement = new OriginalAudio(src);
|
|
201
|
+
const elementId = monitor.getElementId(audioElement);
|
|
202
|
+
|
|
203
|
+
programmaticAudioInstances.add(audioElement);
|
|
204
|
+
monitor.setupProgrammaticAudioElement(audioElement, elementId);
|
|
205
|
+
|
|
206
|
+
console.log(`Programmatic Audio created: ${elementId} with src: ${src || 'none'}`);
|
|
207
|
+
return audioElement;
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
// window.Audio.prototype.play = function () {
|
|
211
|
+
// console.log(`Programmatic audio play called: ${this.src || this.srcObject}`);
|
|
212
|
+
// monitor.handleProgrammaticAudioPlay(this, this.srcObject);
|
|
213
|
+
// return originalPlay.apply(this, arguments);
|
|
214
|
+
// };
|
|
215
|
+
|
|
216
|
+
// Override document.createElement to catch audio elements
|
|
217
|
+
document.createElement = function (tagName) {
|
|
218
|
+
const element = originalCreateElement.call(this, tagName);
|
|
219
|
+
|
|
220
|
+
if (tagName.toLowerCase() === 'audio') {
|
|
221
|
+
const elementId = monitor.getElementId(element);
|
|
222
|
+
programmaticAudioInstances.add(element);
|
|
223
|
+
monitor.setupProgrammaticAudioElement(element, elementId);
|
|
224
|
+
console.log(`Programmatic audio element created via createElement: ${elementId}`);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
return element;
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
// Preserve original constructor properties
|
|
231
|
+
Object.setPrototypeOf(window.Audio, OriginalAudio);
|
|
232
|
+
Object.defineProperty(window.Audio, 'prototype', {
|
|
233
|
+
value: OriginalAudio.prototype,
|
|
234
|
+
writable: false
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
console.log("Programmatic Audio interception setup complete");
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
setupAudioElementObserver(audioElement) {
|
|
241
|
+
const elementId = this.getElementId(audioElement);
|
|
242
|
+
|
|
243
|
+
if (this.audioElementObservers.has(elementId)) {
|
|
244
|
+
// Already observing this element
|
|
245
|
+
return;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Since srcObject is a property, not an attribute, we need to use a different approach
|
|
249
|
+
// We'll override the srcObject setter to detect changes
|
|
250
|
+
const originalDescriptor = Object.getOwnPropertyDescriptor(audioElement, 'srcObject') ||
|
|
251
|
+
Object.getOwnPropertyDescriptor(HTMLMediaElement.prototype, 'srcObject');
|
|
252
|
+
|
|
253
|
+
// Create a property descriptor that intercepts srcObject changes
|
|
254
|
+
const monitor = this;
|
|
255
|
+
Object.defineProperty(audioElement, 'srcObject', {
|
|
256
|
+
get() {
|
|
257
|
+
return originalDescriptor ? originalDescriptor.get.call(this) : this._srcObject;
|
|
258
|
+
},
|
|
259
|
+
set(value) {
|
|
260
|
+
const previousValue = this.srcObject;
|
|
261
|
+
|
|
262
|
+
// Set the actual srcObject using the original setter
|
|
263
|
+
if (originalDescriptor && originalDescriptor.set) {
|
|
264
|
+
originalDescriptor.set.call(this, value);
|
|
265
|
+
} else {
|
|
266
|
+
this._srcObject = value;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
console.log(`Audio element srcObject changed: ${elementId} from ${previousValue} to ${value}`);
|
|
270
|
+
|
|
271
|
+
// Trigger handler when srcObject changes
|
|
272
|
+
if (previousValue !== value) {
|
|
273
|
+
monitor.handleAudioElement(audioElement);
|
|
274
|
+
}
|
|
275
|
+
},
|
|
276
|
+
configurable: true,
|
|
277
|
+
enumerable: true
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
// Also set up a mutation observer for other attribute changes (like src)
|
|
281
|
+
const observer = new MutationObserver((mutations) => {
|
|
282
|
+
mutations.forEach((mutation) => {
|
|
283
|
+
console.log(`Audio element attribute changed: ${elementId} ${mutation.attributeName}`);
|
|
284
|
+
if (mutation.type === 'attributes' && mutation.attributeName === 'src') {
|
|
285
|
+
this.handleAudioElement(mutation.target);
|
|
286
|
+
}
|
|
287
|
+
});
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
observer.observe(audioElement, {
|
|
291
|
+
attributes: true,
|
|
292
|
+
attributeFilter: ['src']
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
this.audioElementObservers.set(elementId, observer);
|
|
296
|
+
console.log(`Set up srcObject observer for audio element: ${audioElement.tagName} ${elementId} ${audioElement.srcObject}`);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
checkForNewAudioElements(element) {
|
|
300
|
+
if (element.tagName === 'AUDIO') {
|
|
301
|
+
this.setupAudioElementObserver(element);
|
|
302
|
+
if (element.srcObject) {
|
|
303
|
+
this.handleAudioElement(element);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const audioElements = element.querySelectorAll('audio');
|
|
308
|
+
audioElements.forEach(audioEl => {
|
|
309
|
+
this.setupAudioElementObserver(audioEl);
|
|
310
|
+
if (audioEl.srcObject) {
|
|
311
|
+
this.handleAudioElement(audioEl);
|
|
312
|
+
}
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
scanExistingAudioElements() {
|
|
317
|
+
const existingAudio = document.querySelectorAll('audio');
|
|
318
|
+
console.log(`Scanning ${existingAudio.length} existing audio elements`);
|
|
319
|
+
existingAudio.forEach(audioEl => {
|
|
320
|
+
this.setupAudioElementObserver(audioEl);
|
|
321
|
+
if (audioEl.srcObject) {
|
|
322
|
+
this.handleAudioElement(audioEl);
|
|
323
|
+
}
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
handleAudioElement(audioElement) {
|
|
328
|
+
const elementId = this.getElementId(audioElement);
|
|
329
|
+
|
|
330
|
+
if (this.monitoredElements.has(elementId)) {
|
|
331
|
+
console.log(`Audio element ${elementId} already monitored`);
|
|
332
|
+
return;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
console.log(`New audio element with src detected: ${elementId} ${audioElement.srcObject}`);
|
|
336
|
+
this.monitorAudioElement(audioElement, elementId);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
getElementId(element) {
|
|
340
|
+
if (!element._customId) {
|
|
341
|
+
element._customId = element.id || `audio_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
342
|
+
}
|
|
343
|
+
return element._customId;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
setupProgrammaticAudioElement(audioElement, elementId) {
|
|
347
|
+
const monitor = this;
|
|
348
|
+
|
|
349
|
+
// Set up property interceptors for src and srcObject changes
|
|
350
|
+
this.setupProgrammaticPropertyInterceptors(audioElement, elementId);
|
|
351
|
+
|
|
352
|
+
// Override methods specifically for this instance
|
|
353
|
+
const originalPlay = audioElement.play;
|
|
354
|
+
const originalPause = audioElement.pause;
|
|
355
|
+
const originalLoad = audioElement.load;
|
|
356
|
+
|
|
357
|
+
audioElement.play = function () {
|
|
358
|
+
console.log(`Programmatic audio play called: ${elementId}, src: ${this.src || this.srcObject}`);
|
|
359
|
+
monitor.handleProgrammaticAudioPlay(this, elementId);
|
|
360
|
+
return originalPlay.apply(this, arguments);
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
audioElement.pause = function () {
|
|
364
|
+
console.log(`Programmatic audio pause called: ${elementId}`);
|
|
365
|
+
monitor.handleProgrammaticAudioPause(this, elementId);
|
|
366
|
+
return originalPause.apply(this, arguments);
|
|
367
|
+
};
|
|
368
|
+
|
|
369
|
+
audioElement.load = function () {
|
|
370
|
+
console.log(`Programmatic audio load called: ${elementId}, src: ${this.src || this.srcObject}`);
|
|
371
|
+
monitor.handleProgrammaticAudioLoad(this, elementId);
|
|
372
|
+
return originalLoad.apply(this, arguments);
|
|
373
|
+
};
|
|
374
|
+
|
|
375
|
+
// Set up event listeners
|
|
376
|
+
const events = ['play', 'pause', 'ended', 'loadstart', 'canplay', 'loadeddata'];
|
|
377
|
+
events.forEach(eventType => {
|
|
378
|
+
audioElement.addEventListener(eventType, (event) => {
|
|
379
|
+
console.log(`Programmatic audio element ${elementId} event: ${eventType}`);
|
|
380
|
+
this.dispatchAudioEvent(`element${eventType}`, elementId, audioElement, event);
|
|
381
|
+
});
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
audioElement.addEventListener('ended', () => {
|
|
385
|
+
const monitorData = this.monitoredElements.get(elementId);
|
|
386
|
+
if (monitorData && monitorData.isPlaying) {
|
|
387
|
+
monitorData.isPlaying = false;
|
|
388
|
+
this.dispatchAudioEvent('audiostop', elementId, audioElement);
|
|
389
|
+
}
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
setupProgrammaticPropertyInterceptors(audioElement, elementId) {
|
|
394
|
+
const monitor = this;
|
|
395
|
+
|
|
396
|
+
// Intercept src property
|
|
397
|
+
const originalSrcDescriptor = Object.getOwnPropertyDescriptor(audioElement, 'src') ||
|
|
398
|
+
Object.getOwnPropertyDescriptor(HTMLAudioElement.prototype, 'src');
|
|
399
|
+
|
|
400
|
+
Object.defineProperty(audioElement, 'src', {
|
|
401
|
+
get() {
|
|
402
|
+
return originalSrcDescriptor ? originalSrcDescriptor.get.call(this) : this._src;
|
|
403
|
+
},
|
|
404
|
+
set(value) {
|
|
405
|
+
const previousValue = this.src;
|
|
406
|
+
|
|
407
|
+
if (originalSrcDescriptor && originalSrcDescriptor.set) {
|
|
408
|
+
originalSrcDescriptor.set.call(this, value);
|
|
409
|
+
} else {
|
|
410
|
+
this._src = value;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
console.log(`Programmatic audio src changed: ${elementId} from ${previousValue} to ${value}`);
|
|
414
|
+
|
|
415
|
+
if (previousValue !== value && value) {
|
|
416
|
+
monitor.handleProgrammaticAudioSrcChange(audioElement, elementId);
|
|
417
|
+
}
|
|
418
|
+
},
|
|
419
|
+
configurable: true,
|
|
420
|
+
enumerable: true
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
// Intercept srcObject property
|
|
424
|
+
const originalSrcObjectDescriptor = Object.getOwnPropertyDescriptor(audioElement, 'srcObject') ||
|
|
425
|
+
Object.getOwnPropertyDescriptor(HTMLMediaElement.prototype, 'srcObject');
|
|
426
|
+
|
|
427
|
+
Object.defineProperty(audioElement, 'srcObject', {
|
|
428
|
+
get() {
|
|
429
|
+
return originalSrcObjectDescriptor ? originalSrcObjectDescriptor.get.call(this) : this._srcObject;
|
|
430
|
+
},
|
|
431
|
+
set(value) {
|
|
432
|
+
const previousValue = this.srcObject;
|
|
433
|
+
|
|
434
|
+
if (originalSrcObjectDescriptor && originalSrcObjectDescriptor.set) {
|
|
435
|
+
originalSrcObjectDescriptor.set.call(this, value);
|
|
436
|
+
} else {
|
|
437
|
+
this._srcObject = value;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
console.log(`Programmatic audio srcObject changed: ${elementId} from ${previousValue} to ${value}`);
|
|
441
|
+
|
|
442
|
+
if (previousValue !== value && value) {
|
|
443
|
+
monitor.handleProgrammaticAudioSrcChange(audioElement, elementId);
|
|
444
|
+
}
|
|
445
|
+
},
|
|
446
|
+
configurable: true,
|
|
447
|
+
enumerable: true
|
|
448
|
+
});
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
handleProgrammaticAudioPlay(audioElement, elementId) {
|
|
452
|
+
if (audioElement.src || audioElement.srcObject) {
|
|
453
|
+
this.handleProgrammaticAudioSrcChange(audioElement, elementId);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
handleProgrammaticAudioPause(audioElement, elementId) {
|
|
458
|
+
const monitorData = this.monitoredElements.get(elementId);
|
|
459
|
+
if (monitorData && monitorData.isPlaying) {
|
|
460
|
+
monitorData.isPlaying = false;
|
|
461
|
+
this.dispatchAudioEvent('audiostop', elementId, audioElement);
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
handleProgrammaticAudioLoad(audioElement, elementId) {
|
|
466
|
+
if (audioElement.src || audioElement.srcObject) {
|
|
467
|
+
this.handleProgrammaticAudioSrcChange(audioElement, elementId);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
handleProgrammaticAudioSrcChange(audioElement, elementId) {
|
|
472
|
+
if (this.monitoredElements.has(elementId)) {
|
|
473
|
+
console.log(`Programmatic audio element ${elementId} already monitored`);
|
|
474
|
+
return;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
console.log(`New programmatic audio element with src detected: ${elementId} ${audioElement.src || audioElement.srcObject}`);
|
|
478
|
+
this.monitorProgrammaticAudioElement(audioElement, elementId);
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
monitorProgrammaticAudioElement(audioElement, elementId) {
|
|
482
|
+
if (!this.audioContext) {
|
|
483
|
+
console.warn("AudioContext not available, cannot monitor programmatic audio");
|
|
484
|
+
return;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
try {
|
|
488
|
+
let source;
|
|
489
|
+
if (audioElement.srcObject && audioElement.srcObject instanceof MediaStream) {
|
|
490
|
+
// For MediaStream sources
|
|
491
|
+
source = this.audioContext.createMediaStreamSource(audioElement.srcObject);
|
|
492
|
+
} else if (audioElement.src) {
|
|
493
|
+
// For regular audio sources, we need to create a media element source
|
|
494
|
+
source = this.audioContext.createMediaElementSource(audioElement);
|
|
495
|
+
} else {
|
|
496
|
+
console.warn(`Cannot monitor programmatic audio element ${elementId}: no valid source`);
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
const analyser = this.audioContext.createAnalyser();
|
|
501
|
+
analyser.fftSize = 2048;
|
|
502
|
+
|
|
503
|
+
source.connect(analyser);
|
|
504
|
+
// Only connect to destination for MediaElementSource to avoid double audio
|
|
505
|
+
if (audioElement.src) {
|
|
506
|
+
analyser.connect(this.audioContext.destination);
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
const bufferLength = analyser.frequencyBinCount;
|
|
510
|
+
const dataArray = new Uint8Array(bufferLength);
|
|
511
|
+
|
|
512
|
+
const monitorData = {
|
|
513
|
+
element: audioElement,
|
|
514
|
+
source: source,
|
|
515
|
+
analyser: analyser,
|
|
516
|
+
dataArray: dataArray,
|
|
517
|
+
isPlaying: false,
|
|
518
|
+
lastAudioTime: 0,
|
|
519
|
+
silenceThreshold: 10,
|
|
520
|
+
checkInterval: null,
|
|
521
|
+
isProgrammatic: true
|
|
522
|
+
};
|
|
523
|
+
|
|
524
|
+
this.monitoredElements.set(elementId, monitorData);
|
|
525
|
+
this.startAudioAnalysis(elementId, monitorData);
|
|
526
|
+
|
|
527
|
+
console.log(`Started monitoring programmatic audio element: ${elementId}`);
|
|
528
|
+
} catch (error) {
|
|
529
|
+
console.error(`Failed to monitor programmatic audio element ${elementId}:`, error);
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
monitorAudioElement(audioElement, elementId) {
|
|
534
|
+
if (!this.audioContext) {
|
|
535
|
+
console.warn("AudioContext not available, cannot monitor audio");
|
|
536
|
+
return;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
try {
|
|
540
|
+
const source = this.audioContext.createMediaStreamSource(audioElement.srcObject);
|
|
541
|
+
const analyser = this.audioContext.createAnalyser();
|
|
542
|
+
analyser.fftSize = 2048;
|
|
543
|
+
|
|
544
|
+
source.connect(analyser);
|
|
545
|
+
analyser.connect(this.audioContext.destination);
|
|
546
|
+
|
|
547
|
+
const bufferLength = analyser.frequencyBinCount;
|
|
548
|
+
const dataArray = new Uint8Array(bufferLength);
|
|
549
|
+
|
|
550
|
+
const monitorData = {
|
|
551
|
+
element: audioElement,
|
|
552
|
+
source: source,
|
|
553
|
+
analyser: analyser,
|
|
554
|
+
dataArray: dataArray,
|
|
555
|
+
isPlaying: false,
|
|
556
|
+
lastAudioTime: 0,
|
|
557
|
+
silenceThreshold: 10,
|
|
558
|
+
checkInterval: null
|
|
559
|
+
};
|
|
560
|
+
|
|
561
|
+
this.monitoredElements.set(elementId, monitorData);
|
|
562
|
+
this.startAudioAnalysis(elementId, monitorData);
|
|
563
|
+
this.setupAudioEventListeners(audioElement, elementId, monitorData);
|
|
564
|
+
|
|
565
|
+
console.log(`Started monitoring audio element: ${elementId}`);
|
|
566
|
+
} catch (error) {
|
|
567
|
+
console.error(`Failed to monitor audio element ${elementId}:`, error);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
startAudioAnalysis(elementId, monitorData) {
|
|
572
|
+
const { analyser, dataArray, silenceThreshold } = monitorData;
|
|
573
|
+
|
|
574
|
+
monitorData.checkInterval = setInterval(() => {
|
|
575
|
+
analyser.getByteFrequencyData(dataArray);
|
|
576
|
+
|
|
577
|
+
const average = dataArray.reduce((sum, value) => sum + value, 0) / dataArray.length;
|
|
578
|
+
const hasAudio = average > silenceThreshold;
|
|
579
|
+
|
|
580
|
+
// if (i++ % 10 == 0) {
|
|
581
|
+
// console.log(`Average: ${average} hasAudio: ${hasAudio} elementId: ${elementId}`);
|
|
582
|
+
// }
|
|
583
|
+
|
|
584
|
+
if (hasAudio && !monitorData.isPlaying) {
|
|
585
|
+
monitorData.isPlaying = true;
|
|
586
|
+
monitorData.lastAudioTime = Date.now();
|
|
587
|
+
this.dispatchAudioEvent('audiostart', elementId, monitorData.element);
|
|
588
|
+
// Notify Node.js via exposed function
|
|
589
|
+
if (typeof window.__publishEvent === 'function') {
|
|
590
|
+
window.__publishEvent('audiostart', { elementId, timestamp: Date.now() });
|
|
591
|
+
}
|
|
592
|
+
console.log(`Audio started: ${elementId}`);
|
|
593
|
+
} else if (!hasAudio && monitorData.isPlaying) {
|
|
594
|
+
const silenceDuration = Date.now() - monitorData.lastAudioTime;
|
|
595
|
+
if (silenceDuration > 1000) {
|
|
596
|
+
monitorData.isPlaying = false;
|
|
597
|
+
this.dispatchAudioEvent('audiostop', elementId, monitorData.element);
|
|
598
|
+
// Notify Node.js via exposed function
|
|
599
|
+
if (typeof window.__publishEvent === 'function') {
|
|
600
|
+
window.__publishEvent('audiostop', { elementId, timestamp: Date.now() });
|
|
601
|
+
}
|
|
602
|
+
console.log(`Audio stopped: ${elementId}`);
|
|
603
|
+
}
|
|
604
|
+
} else if (hasAudio) {
|
|
605
|
+
monitorData.lastAudioTime = Date.now();
|
|
606
|
+
}
|
|
607
|
+
}, 20);
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
setupAudioEventListeners(audioElement, elementId, monitorData) {
|
|
611
|
+
const events = ['play', 'pause', 'ended', 'loadstart', 'canplay'];
|
|
612
|
+
|
|
613
|
+
events.forEach(eventType => {
|
|
614
|
+
audioElement.addEventListener(eventType, (event) => {
|
|
615
|
+
console.log(`Audio element ${elementId} event: ${eventType}`);
|
|
616
|
+
this.dispatchAudioEvent(`element${eventType}`, elementId, audioElement, event);
|
|
617
|
+
});
|
|
618
|
+
});
|
|
619
|
+
|
|
620
|
+
audioElement.addEventListener('ended', () => {
|
|
621
|
+
if (monitorData.isPlaying) {
|
|
622
|
+
monitorData.isPlaying = false;
|
|
623
|
+
this.dispatchAudioEvent('audiostop', elementId, audioElement);
|
|
624
|
+
}
|
|
625
|
+
});
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
dispatchAudioEvent(eventType, elementId, audioElement, originalEvent = null) {
|
|
629
|
+
const customEvent = new CustomEvent(`audio-monitor-${eventType}`, {
|
|
630
|
+
detail: {
|
|
631
|
+
elementId: elementId,
|
|
632
|
+
audioElement: audioElement,
|
|
633
|
+
timestamp: Date.now(),
|
|
634
|
+
originalEvent: originalEvent
|
|
635
|
+
}
|
|
636
|
+
});
|
|
637
|
+
|
|
638
|
+
document.dispatchEvent(customEvent);
|
|
639
|
+
console.log(`Dispatched event: audio-monitor-${eventType} for ${elementId}`);
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
stopMonitoring(elementId) {
|
|
643
|
+
const monitorData = this.monitoredElements.get(elementId);
|
|
644
|
+
if (monitorData) {
|
|
645
|
+
if (monitorData.checkInterval) {
|
|
646
|
+
clearInterval(monitorData.checkInterval);
|
|
647
|
+
}
|
|
648
|
+
this.monitoredElements.delete(elementId);
|
|
649
|
+
console.log(`Stopped monitoring audio element: ${elementId}`);
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
destroy() {
|
|
654
|
+
this.monitoredElements.forEach((monitorData, elementId) => {
|
|
655
|
+
this.stopMonitoring(elementId);
|
|
656
|
+
});
|
|
657
|
+
|
|
658
|
+
if (this.bodyMutationObserver) {
|
|
659
|
+
this.bodyMutationObserver.disconnect();
|
|
660
|
+
this.bodyMutationObserver = null;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
this.audioElementObservers.forEach((observer, elementId) => {
|
|
664
|
+
observer.disconnect();
|
|
665
|
+
});
|
|
666
|
+
this.audioElementObservers.clear();
|
|
667
|
+
|
|
668
|
+
// Restore original Audio constructor
|
|
669
|
+
if (OriginalAudio) {
|
|
670
|
+
window.Audio = OriginalAudio;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
// Restore original createElement
|
|
674
|
+
if (originalCreateElement) {
|
|
675
|
+
document.createElement = originalCreateElement;
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
// Clear programmatic instances
|
|
679
|
+
programmaticAudioInstances.clear();
|
|
680
|
+
|
|
681
|
+
if (this.audioContext) {
|
|
682
|
+
this.audioContext.close();
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
console.log("AudioElementMonitor destroyed");
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
const audioMonitor = new AudioElementMonitor();
|
|
690
|
+
|
|
691
|
+
document.addEventListener('audio-monitor-audiostart', (event) => {
|
|
692
|
+
console.log('🔊 Audio playback started:', event.detail);
|
|
693
|
+
});
|
|
694
|
+
|
|
695
|
+
document.addEventListener('audio-monitor-audiostop', (event) => {
|
|
696
|
+
console.log('🔇 Audio playback stopped:', event.detail);
|
|
697
|
+
});
|
|
698
|
+
|
|
699
|
+
window.audioMonitor = audioMonitor;
|
|
700
|
+
|
|
701
|
+
// Expose a diagnostic function to get detailed audio monitoring state
|
|
702
|
+
window.__getAudioDiagnostics = function() {
|
|
703
|
+
const diagnostics = {
|
|
704
|
+
timestamp: Date.now(),
|
|
705
|
+
audioContextState: audioMonitor.audioContext ? audioMonitor.audioContext.state : 'not-created',
|
|
706
|
+
monitoredElementsCount: audioMonitor.monitoredElements.size,
|
|
707
|
+
elements: []
|
|
708
|
+
};
|
|
709
|
+
|
|
710
|
+
audioMonitor.monitoredElements.forEach((monitorData, elementId) => {
|
|
711
|
+
const { analyser, dataArray, silenceThreshold, isPlaying, lastAudioTime, isProgrammatic } = monitorData;
|
|
712
|
+
|
|
713
|
+
// Get current audio level if analyser is available
|
|
714
|
+
let currentLevel = null;
|
|
715
|
+
let currentMaxLevel = null;
|
|
716
|
+
if (analyser && dataArray) {
|
|
717
|
+
analyser.getByteFrequencyData(dataArray);
|
|
718
|
+
currentLevel = dataArray.reduce((sum, value) => sum + value, 0) / dataArray.length;
|
|
719
|
+
currentMaxLevel = Math.max(...dataArray);
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
diagnostics.elements.push({
|
|
723
|
+
elementId,
|
|
724
|
+
isPlaying,
|
|
725
|
+
isProgrammatic: !!isProgrammatic,
|
|
726
|
+
silenceThreshold,
|
|
727
|
+
currentAudioLevel: currentLevel !== null ? currentLevel.toFixed(2) : 'unavailable',
|
|
728
|
+
currentMaxLevel: currentMaxLevel !== null ? currentMaxLevel : 'unavailable',
|
|
729
|
+
wouldTriggerAudioStart: currentLevel !== null ? currentLevel > silenceThreshold : 'unknown',
|
|
730
|
+
lastAudioTime,
|
|
731
|
+
timeSinceLastAudio: lastAudioTime ? Date.now() - lastAudioTime : null
|
|
732
|
+
});
|
|
733
|
+
});
|
|
734
|
+
|
|
735
|
+
return diagnostics;
|
|
736
|
+
};
|
|
737
|
+
|
|
738
|
+
// Recording functionality
|
|
739
|
+
let isRecording = false;
|
|
740
|
+
let recordingWorkletNode = null;
|
|
741
|
+
let recordingSampleRate = 16000;
|
|
742
|
+
|
|
743
|
+
window.__startRecording = async function () {
|
|
744
|
+
if (isRecording) {
|
|
745
|
+
console.log("Recording already in progress");
|
|
746
|
+
return;
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
// Find the first monitored audio element to record from
|
|
750
|
+
const monitorData = Array.from(audioMonitor.monitoredElements.values())[0];
|
|
751
|
+
if (!monitorData) {
|
|
752
|
+
console.error("No monitored audio elements found for recording");
|
|
753
|
+
return;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
try {
|
|
757
|
+
const audioContext = audioMonitor.audioContext;
|
|
758
|
+
if (!audioContext) {
|
|
759
|
+
console.error("AudioContext not available for recording");
|
|
760
|
+
return;
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
recordingSampleRate = audioContext.sampleRate;
|
|
764
|
+
|
|
765
|
+
// Load the AudioWorklet processor module
|
|
766
|
+
const assetsServerUrl = window.__assetsServerUrl || window.location.origin;
|
|
767
|
+
const workletUrl = `${assetsServerUrl}/assets/recording-processor.js`;
|
|
768
|
+
await audioContext.audioWorklet.addModule(workletUrl);
|
|
769
|
+
|
|
770
|
+
// Create the AudioWorklet node
|
|
771
|
+
recordingWorkletNode = new AudioWorkletNode(audioContext, 'recording-processor');
|
|
772
|
+
|
|
773
|
+
// Set up message handling from the worklet
|
|
774
|
+
recordingWorkletNode.port.onmessage = (event) => {
|
|
775
|
+
const { command, audioData, sampleRate } = event.data;
|
|
776
|
+
|
|
777
|
+
if (command === 'recordingComplete') {
|
|
778
|
+
// Convert Float32Array to PCM16 and then to base64
|
|
779
|
+
const pcm16Buffer = floatTo16BitPCM(audioData);
|
|
780
|
+
const base64Audio = arrayBufferToBase64(pcm16Buffer);
|
|
781
|
+
|
|
782
|
+
// Publish the recording event with PCM audio data
|
|
783
|
+
if (typeof window.__publishEvent === 'function') {
|
|
784
|
+
window.__publishEvent('recordingcomplete', {
|
|
785
|
+
audioData: base64Audio,
|
|
786
|
+
mimeType: 'audio/pcm',
|
|
787
|
+
sampleRate: sampleRate,
|
|
788
|
+
channels: 1,
|
|
789
|
+
bitsPerSample: 16,
|
|
790
|
+
timestamp: Date.now()
|
|
791
|
+
});
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
console.log('Recording completed and published');
|
|
795
|
+
}
|
|
796
|
+
};
|
|
797
|
+
|
|
798
|
+
// Connect the recording worklet to the existing audio chain
|
|
799
|
+
// Insert it between the source and analyser
|
|
800
|
+
monitorData.source.disconnect();
|
|
801
|
+
monitorData.source.connect(recordingWorkletNode);
|
|
802
|
+
recordingWorkletNode.connect(monitorData.analyser);
|
|
803
|
+
|
|
804
|
+
// Start recording
|
|
805
|
+
recordingWorkletNode.port.postMessage({ command: 'start' });
|
|
806
|
+
isRecording = true;
|
|
807
|
+
console.log('Recording started from monitored audio stream using AudioWorklet');
|
|
808
|
+
|
|
809
|
+
// Publish recording start event
|
|
810
|
+
if (typeof window.__publishEvent === 'function') {
|
|
811
|
+
window.__publishEvent('recordingstart', { timestamp: Date.now() });
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
} catch (error) {
|
|
815
|
+
console.error('Error starting recording:', error);
|
|
816
|
+
isRecording = false;
|
|
817
|
+
}
|
|
818
|
+
};
|
|
819
|
+
|
|
820
|
+
window.__stopRecording = function () {
|
|
821
|
+
if (!isRecording || !recordingWorkletNode) {
|
|
822
|
+
console.log("No recording in progress");
|
|
823
|
+
return;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
isRecording = false;
|
|
827
|
+
|
|
828
|
+
try {
|
|
829
|
+
// Send stop command to the worklet
|
|
830
|
+
recordingWorkletNode.port.postMessage({ command: 'stop' });
|
|
831
|
+
|
|
832
|
+
// Disconnect and clean up the recording worklet
|
|
833
|
+
recordingWorkletNode.disconnect();
|
|
834
|
+
|
|
835
|
+
// Reconnect the original audio chain
|
|
836
|
+
const monitorData = Array.from(audioMonitor.monitoredElements.values())[0];
|
|
837
|
+
if (monitorData) {
|
|
838
|
+
monitorData.source.connect(monitorData.analyser);
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
// Clean up
|
|
842
|
+
recordingWorkletNode = null;
|
|
843
|
+
|
|
844
|
+
console.log('Recording stop requested');
|
|
845
|
+
|
|
846
|
+
} catch (error) {
|
|
847
|
+
console.error('Error stopping recording:', error);
|
|
848
|
+
}
|
|
849
|
+
};
|
|
850
|
+
|
|
851
|
+
// Helper function to convert Float32Array to 16-bit PCM
|
|
852
|
+
function floatTo16BitPCM(float32Array) {
|
|
853
|
+
const buffer = new ArrayBuffer(float32Array.length * 2);
|
|
854
|
+
const view = new DataView(buffer);
|
|
855
|
+
let offset = 0;
|
|
856
|
+
|
|
857
|
+
for (let i = 0; i < float32Array.length; i++, offset += 2) {
|
|
858
|
+
const s = Math.max(-1, Math.min(1, float32Array[i]));
|
|
859
|
+
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
return buffer;
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
// Helper function to convert ArrayBuffer to base64
|
|
866
|
+
function arrayBufferToBase64(buffer) {
|
|
867
|
+
const bytes = new Uint8Array(buffer);
|
|
868
|
+
let binary = '';
|
|
869
|
+
for (let i = 0; i < bytes.byteLength; i++) {
|
|
870
|
+
binary += String.fromCharCode(bytes[i]);
|
|
871
|
+
}
|
|
872
|
+
return btoa(binary);
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
// Add a visible indicator that this script loaded
|
|
876
|
+
console.log("Audio output hooks initialization complete");
|