browser-pilot 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -1
- package/dist/actions.cjs +485 -9
- package/dist/actions.d.cts +24 -5
- package/dist/actions.d.ts +24 -5
- package/dist/actions.mjs +5 -3
- package/dist/browser.cjs +1761 -102
- package/dist/browser.d.cts +8 -4
- package/dist/browser.d.ts +8 -4
- package/dist/browser.mjs +6 -5
- package/dist/{chunk-PCNEJAJ7.mjs → chunk-7OSR2CAE.mjs} +1756 -46
- package/dist/chunk-KKW2SZLV.mjs +741 -0
- package/dist/cli.mjs +7576 -265
- package/dist/index.cjs +2434 -108
- package/dist/index.d.cts +142 -6
- package/dist/index.d.ts +142 -6
- package/dist/index.mjs +360 -13
- package/dist/providers.d.cts +2 -2
- package/dist/providers.d.ts +2 -2
- package/dist/{types-D_uDqh0Z.d.cts → types--wXNHUwt.d.cts} +1 -1
- package/dist/{types-D_uDqh0Z.d.ts → types--wXNHUwt.d.ts} +1 -1
- package/dist/{types-TVlTA7nH.d.cts → types-CYw-7vx1.d.cts} +280 -3
- package/dist/{types-CbdmaocU.d.ts → types-DOGsEYQa.d.ts} +280 -3
- package/package.json +3 -3
- package/dist/chunk-6RB3GKQP.mjs +0 -251
- package/dist/chunk-ZIQA4JOT.mjs +0 -226
- package/dist/cli.cjs +0 -4792
- package/dist/cli.d.cts +0 -25
- package/dist/cli.d.ts +0 -25
|
@@ -5,8 +5,1315 @@ import {
|
|
|
5
5
|
createProvider
|
|
6
6
|
} from "./chunk-R3PS4PCM.mjs";
|
|
7
7
|
import {
|
|
8
|
-
BatchExecutor
|
|
9
|
-
|
|
8
|
+
BatchExecutor,
|
|
9
|
+
ElementNotFoundError,
|
|
10
|
+
TimeoutError
|
|
11
|
+
} from "./chunk-KKW2SZLV.mjs";
|
|
12
|
+
|
|
13
|
+
// src/audio/encoding.ts
|
|
14
|
+
function bufferToBase64(data) {
|
|
15
|
+
const bytes = data instanceof Uint8Array ? data : new Uint8Array(data);
|
|
16
|
+
let binary = "";
|
|
17
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
18
|
+
binary += String.fromCharCode(bytes[i]);
|
|
19
|
+
}
|
|
20
|
+
return btoa(binary);
|
|
21
|
+
}
|
|
22
|
+
function base64ToBuffer(b64) {
|
|
23
|
+
const binary = atob(b64);
|
|
24
|
+
const bytes = new Uint8Array(binary.length);
|
|
25
|
+
for (let i = 0; i < binary.length; i++) {
|
|
26
|
+
bytes[i] = binary.charCodeAt(i);
|
|
27
|
+
}
|
|
28
|
+
return bytes;
|
|
29
|
+
}
|
|
30
|
+
function calculateRMS(samples) {
|
|
31
|
+
if (samples.length === 0) return 0;
|
|
32
|
+
let sum = 0;
|
|
33
|
+
for (let i = 0; i < samples.length; i++) {
|
|
34
|
+
sum += samples[i] * samples[i];
|
|
35
|
+
}
|
|
36
|
+
return Math.sqrt(sum / samples.length);
|
|
37
|
+
}
|
|
38
|
+
function pcmToWav(options) {
|
|
39
|
+
const { left, right, sampleRate } = options;
|
|
40
|
+
const numChannels = right ? 2 : 1;
|
|
41
|
+
const numSamples = left.length;
|
|
42
|
+
const bitsPerSample = 16;
|
|
43
|
+
const bytesPerSample = bitsPerSample / 8;
|
|
44
|
+
const blockAlign = numChannels * bytesPerSample;
|
|
45
|
+
const dataLength = numSamples * blockAlign;
|
|
46
|
+
const headerLength = 44;
|
|
47
|
+
const buffer = new ArrayBuffer(headerLength + dataLength);
|
|
48
|
+
const view = new DataView(buffer);
|
|
49
|
+
writeString(view, 0, "RIFF");
|
|
50
|
+
view.setUint32(4, 36 + dataLength, true);
|
|
51
|
+
writeString(view, 8, "WAVE");
|
|
52
|
+
writeString(view, 12, "fmt ");
|
|
53
|
+
view.setUint32(16, 16, true);
|
|
54
|
+
view.setUint16(20, 1, true);
|
|
55
|
+
view.setUint16(22, numChannels, true);
|
|
56
|
+
view.setUint32(24, sampleRate, true);
|
|
57
|
+
view.setUint32(28, sampleRate * blockAlign, true);
|
|
58
|
+
view.setUint16(32, blockAlign, true);
|
|
59
|
+
view.setUint16(34, bitsPerSample, true);
|
|
60
|
+
writeString(view, 36, "data");
|
|
61
|
+
view.setUint32(40, dataLength, true);
|
|
62
|
+
let offset = 44;
|
|
63
|
+
for (let i = 0; i < numSamples; i++) {
|
|
64
|
+
const leftSample = Math.max(-1, Math.min(1, left[i]));
|
|
65
|
+
view.setInt16(offset, leftSample < 0 ? leftSample * 32768 : leftSample * 32767, true);
|
|
66
|
+
offset += 2;
|
|
67
|
+
if (right) {
|
|
68
|
+
const rightSample = Math.max(-1, Math.min(1, right[i]));
|
|
69
|
+
view.setInt16(offset, rightSample < 0 ? rightSample * 32768 : rightSample * 32767, true);
|
|
70
|
+
offset += 2;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return buffer;
|
|
74
|
+
}
|
|
75
|
+
function parseWavHeader(data) {
|
|
76
|
+
const view = new DataView(data);
|
|
77
|
+
if (data.byteLength < 44) {
|
|
78
|
+
throw new Error("Invalid WAV: file too small");
|
|
79
|
+
}
|
|
80
|
+
const riff = readString(view, 0, 4);
|
|
81
|
+
const wave = readString(view, 8, 4);
|
|
82
|
+
if (riff !== "RIFF" || wave !== "WAVE") {
|
|
83
|
+
throw new Error("Invalid WAV: missing RIFF/WAVE header");
|
|
84
|
+
}
|
|
85
|
+
const fmt = readString(view, 12, 4);
|
|
86
|
+
if (fmt !== "fmt ") {
|
|
87
|
+
throw new Error("Invalid WAV: missing fmt chunk");
|
|
88
|
+
}
|
|
89
|
+
const channels = view.getUint16(22, true);
|
|
90
|
+
const sampleRate = view.getUint32(24, true);
|
|
91
|
+
const bitsPerSample = view.getUint16(34, true);
|
|
92
|
+
let dataOffset = 36;
|
|
93
|
+
while (dataOffset < data.byteLength - 8) {
|
|
94
|
+
const chunkId = readString(view, dataOffset, 4);
|
|
95
|
+
const chunkSize = view.getUint32(dataOffset + 4, true);
|
|
96
|
+
if (chunkId === "data") {
|
|
97
|
+
return {
|
|
98
|
+
sampleRate,
|
|
99
|
+
channels,
|
|
100
|
+
bitsPerSample,
|
|
101
|
+
dataOffset: dataOffset + 8,
|
|
102
|
+
dataLength: chunkSize
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
dataOffset += 8 + chunkSize;
|
|
106
|
+
}
|
|
107
|
+
throw new Error("Invalid WAV: missing data chunk");
|
|
108
|
+
}
|
|
109
|
+
function generateSilence(durationMs, sampleRate = 48e3) {
|
|
110
|
+
return new Float32Array(Math.ceil(sampleRate * durationMs / 1e3));
|
|
111
|
+
}
|
|
112
|
+
function generateTone(frequency, durationMs, sampleRate = 48e3, amplitude = 0.5) {
|
|
113
|
+
const numSamples = Math.ceil(sampleRate * durationMs / 1e3);
|
|
114
|
+
const samples = new Float32Array(numSamples);
|
|
115
|
+
for (let i = 0; i < numSamples; i++) {
|
|
116
|
+
samples[i] = amplitude * Math.sin(2 * Math.PI * frequency * i / sampleRate);
|
|
117
|
+
}
|
|
118
|
+
return samples;
|
|
119
|
+
}
|
|
120
|
+
function writeString(view, offset, str) {
|
|
121
|
+
for (let i = 0; i < str.length; i++) {
|
|
122
|
+
view.setUint8(offset + i, str.charCodeAt(i));
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
function readString(view, offset, length) {
|
|
126
|
+
let str = "";
|
|
127
|
+
for (let i = 0; i < length; i++) {
|
|
128
|
+
str += String.fromCharCode(view.getUint8(offset + i));
|
|
129
|
+
}
|
|
130
|
+
return str;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// src/audio/permissions.ts
|
|
134
|
+
async function grantAudioPermissions(cdp, origin) {
|
|
135
|
+
await cdp.send("Browser.grantPermissions", {
|
|
136
|
+
permissions: ["audioCapture"],
|
|
137
|
+
origin: origin ?? ""
|
|
138
|
+
});
|
|
139
|
+
await cdp.send("Page.addScriptToEvaluateOnNewDocument", {
|
|
140
|
+
source: PERMISSIONS_OVERRIDE_SCRIPT
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
var PERMISSIONS_OVERRIDE_SCRIPT = `
|
|
144
|
+
(function() {
|
|
145
|
+
if (window.__bpPermissionsPatched) return;
|
|
146
|
+
window.__bpPermissionsPatched = true;
|
|
147
|
+
|
|
148
|
+
var origQuery = navigator.permissions.query.bind(navigator.permissions);
|
|
149
|
+
navigator.permissions.query = function(desc) {
|
|
150
|
+
if (desc && (desc.name === 'microphone' || desc.name === 'audio-capture')) {
|
|
151
|
+
return Promise.resolve({
|
|
152
|
+
state: 'granted',
|
|
153
|
+
onchange: null,
|
|
154
|
+
addEventListener: function() {},
|
|
155
|
+
removeEventListener: function() {},
|
|
156
|
+
dispatchEvent: function() { return true; }
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
return origQuery(desc);
|
|
160
|
+
};
|
|
161
|
+
})();
|
|
162
|
+
`;
|
|
163
|
+
|
|
164
|
+
// src/audio/input.ts
|
|
165
|
+
var INPUT_BINDING = "__bpAudioInputDone";
|
|
166
|
+
var AUDIO_INPUT_SCRIPT = `
|
|
167
|
+
(function() {
|
|
168
|
+
if (window.__bpAudioInput) return;
|
|
169
|
+
|
|
170
|
+
var audioCtx = null;
|
|
171
|
+
var sourceNode = null;
|
|
172
|
+
var destinationNode = null;
|
|
173
|
+
var fakeStream = null;
|
|
174
|
+
var silenceGain = null;
|
|
175
|
+
var silenceOsc = null;
|
|
176
|
+
var isPlaying = false;
|
|
177
|
+
|
|
178
|
+
function ensureFakeStream() {
|
|
179
|
+
if (fakeStream) return fakeStream;
|
|
180
|
+
// Use the original AudioContext to avoid being tracked by our output override
|
|
181
|
+
var CtorToUse = window.__bpOrigAudioContext || window.AudioContext || window.webkitAudioContext;
|
|
182
|
+
audioCtx = new CtorToUse({ sampleRate: 48000 });
|
|
183
|
+
// Auto-resume if suspended (CDP automation has no user gesture)
|
|
184
|
+
if (audioCtx.state === 'suspended') {
|
|
185
|
+
console.log('[bp:input] AudioContext suspended, auto-resuming...');
|
|
186
|
+
audioCtx.resume().then(function() {
|
|
187
|
+
console.log('[bp:input] AudioContext resumed (' + audioCtx.state + ')');
|
|
188
|
+
}).catch(function(e) {
|
|
189
|
+
console.warn('[bp:input] AudioContext resume failed:', e);
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
destinationNode = audioCtx.createMediaStreamDestination();
|
|
193
|
+
|
|
194
|
+
// Start with silence so the stream always has active tracks
|
|
195
|
+
silenceGain = audioCtx.createGain();
|
|
196
|
+
silenceGain.gain.value = 0;
|
|
197
|
+
silenceOsc = audioCtx.createOscillator();
|
|
198
|
+
silenceOsc.connect(silenceGain);
|
|
199
|
+
silenceGain.connect(destinationNode);
|
|
200
|
+
silenceOsc.start();
|
|
201
|
+
|
|
202
|
+
fakeStream = destinationNode.stream;
|
|
203
|
+
console.log('[bp:input] Fake mic stream created (48kHz, ' + fakeStream.getAudioTracks().length + ' tracks)');
|
|
204
|
+
return fakeStream;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function playAudio(base64Data) {
|
|
208
|
+
ensureFakeStream();
|
|
209
|
+
|
|
210
|
+
var resumePromise = audioCtx.state === 'suspended'
|
|
211
|
+
? audioCtx.resume()
|
|
212
|
+
: Promise.resolve();
|
|
213
|
+
|
|
214
|
+
return resumePromise.then(function() {
|
|
215
|
+
if (sourceNode) {
|
|
216
|
+
try { sourceNode.stop(); } catch(e) {}
|
|
217
|
+
sourceNode.disconnect();
|
|
218
|
+
sourceNode = null;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
var binaryStr = atob(base64Data);
|
|
222
|
+
var bytes = new Uint8Array(binaryStr.length);
|
|
223
|
+
for (var i = 0; i < binaryStr.length; i++) {
|
|
224
|
+
bytes[i] = binaryStr.charCodeAt(i);
|
|
225
|
+
}
|
|
226
|
+
console.log('[bp:input] Decoding audio (' + bytes.length + ' bytes)...');
|
|
227
|
+
|
|
228
|
+
return audioCtx.decodeAudioData(bytes.buffer.slice(0));
|
|
229
|
+
}).then(function(audioBuffer) {
|
|
230
|
+
sourceNode = audioCtx.createBufferSource();
|
|
231
|
+
sourceNode.buffer = audioBuffer;
|
|
232
|
+
sourceNode.connect(destinationNode);
|
|
233
|
+
|
|
234
|
+
var durationMs = Math.round(audioBuffer.duration * 1000);
|
|
235
|
+
console.log('[bp:input] Playing ' + durationMs + 'ms audio (' + audioBuffer.sampleRate + 'Hz, ' + audioBuffer.numberOfChannels + 'ch)');
|
|
236
|
+
|
|
237
|
+
return new Promise(function(resolve) {
|
|
238
|
+
sourceNode.onended = function() {
|
|
239
|
+
isPlaying = false;
|
|
240
|
+
console.log('[bp:input] Playback ended');
|
|
241
|
+
resolve(true);
|
|
242
|
+
try {
|
|
243
|
+
if (typeof window.__bpAudioInputDone === 'function') {
|
|
244
|
+
window.__bpAudioInputDone('done');
|
|
245
|
+
}
|
|
246
|
+
} catch(e) {}
|
|
247
|
+
};
|
|
248
|
+
isPlaying = true;
|
|
249
|
+
sourceNode.start();
|
|
250
|
+
});
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
function stopAudio() {
|
|
255
|
+
if (sourceNode) {
|
|
256
|
+
try { sourceNode.stop(); } catch(e) {}
|
|
257
|
+
sourceNode.disconnect();
|
|
258
|
+
sourceNode = null;
|
|
259
|
+
}
|
|
260
|
+
isPlaying = false;
|
|
261
|
+
console.log('[bp:input] Stopped');
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
var origGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
|
|
265
|
+
|
|
266
|
+
navigator.mediaDevices.getUserMedia = function(constraints) {
|
|
267
|
+
if (constraints && constraints.audio) {
|
|
268
|
+
var stream = ensureFakeStream();
|
|
269
|
+
console.log('[bp:input] getUserMedia intercepted \u2014 returning fake mic' + (constraints.video ? ' + real video' : ''));
|
|
270
|
+
|
|
271
|
+
if (constraints.video) {
|
|
272
|
+
// Get real video + our fake audio
|
|
273
|
+
return origGetUserMedia({ video: constraints.video }).then(function(realStream) {
|
|
274
|
+
var combined = new MediaStream(
|
|
275
|
+
stream.getAudioTracks().concat(realStream.getVideoTracks())
|
|
276
|
+
);
|
|
277
|
+
return combined;
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Return a clone so consumers can't stop our source track
|
|
282
|
+
return Promise.resolve(stream.clone());
|
|
283
|
+
}
|
|
284
|
+
return origGetUserMedia(constraints);
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
var origEnumerate = navigator.mediaDevices.enumerateDevices.bind(navigator.mediaDevices);
|
|
288
|
+
navigator.mediaDevices.enumerateDevices = function() {
|
|
289
|
+
return origEnumerate().then(function(devices) {
|
|
290
|
+
var hasMic = devices.some(function(d) { return d.kind === 'audioinput'; });
|
|
291
|
+
if (!hasMic) {
|
|
292
|
+
devices.push({
|
|
293
|
+
deviceId: 'bp-fake-mic',
|
|
294
|
+
kind: 'audioinput',
|
|
295
|
+
label: 'Default Audio Input',
|
|
296
|
+
groupId: 'bp-audio',
|
|
297
|
+
toJSON: function() {
|
|
298
|
+
return { deviceId: this.deviceId, kind: this.kind, label: this.label, groupId: this.groupId };
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
return devices;
|
|
303
|
+
});
|
|
304
|
+
};
|
|
305
|
+
|
|
306
|
+
window.__bpAudioInput = {
|
|
307
|
+
play: playAudio,
|
|
308
|
+
stop: stopAudio,
|
|
309
|
+
isPlaying: function() { return isPlaying; },
|
|
310
|
+
getState: function() {
|
|
311
|
+
return {
|
|
312
|
+
contextState: audioCtx ? audioCtx.state : 'not-created',
|
|
313
|
+
isPlaying: isPlaying,
|
|
314
|
+
sampleRate: audioCtx ? audioCtx.sampleRate : 0
|
|
315
|
+
};
|
|
316
|
+
},
|
|
317
|
+
getContext: function() { return audioCtx; }
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
console.log('[bp:input] Audio input override installed (getUserMedia + enumerateDevices)');
|
|
321
|
+
})();
|
|
322
|
+
`;
|
|
323
|
+
var AudioInput = class {
|
|
324
|
+
cdp;
|
|
325
|
+
injected = false;
|
|
326
|
+
bindingRegistered = false;
|
|
327
|
+
bindingHandler = null;
|
|
328
|
+
constructor(cdp) {
|
|
329
|
+
this.cdp = cdp;
|
|
330
|
+
}
|
|
331
|
+
/** Whether the audio input system has been set up */
|
|
332
|
+
get isSetup() {
|
|
333
|
+
return this.injected;
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Set up audio input injection.
|
|
337
|
+
* Must be called before navigating to the page that will use getUserMedia.
|
|
338
|
+
* Grants permissions and injects the getUserMedia override.
|
|
339
|
+
*/
|
|
340
|
+
async setup() {
|
|
341
|
+
if (this.injected) return;
|
|
342
|
+
try {
|
|
343
|
+
const resp = await this.cdp.send("Runtime.evaluate", {
|
|
344
|
+
expression: "location.href",
|
|
345
|
+
returnByValue: true
|
|
346
|
+
});
|
|
347
|
+
const href = resp.result?.value;
|
|
348
|
+
if (typeof href === "string" && (href === "about:blank" || href === "about:srcdoc")) {
|
|
349
|
+
throw new Error(
|
|
350
|
+
'Cannot set up audio on about:blank. Navigate to a page first.\nExample: await page.goto("https://your-voice-app.com")'
|
|
351
|
+
);
|
|
352
|
+
}
|
|
353
|
+
} catch (e) {
|
|
354
|
+
if (e instanceof Error && e.message.includes("Cannot set up audio")) throw e;
|
|
355
|
+
}
|
|
356
|
+
let origin;
|
|
357
|
+
try {
|
|
358
|
+
const resp = await this.cdp.send("Runtime.evaluate", {
|
|
359
|
+
expression: "location.origin",
|
|
360
|
+
returnByValue: true
|
|
361
|
+
});
|
|
362
|
+
const val = resp.result?.value;
|
|
363
|
+
if (typeof val === "string" && val !== "null") {
|
|
364
|
+
origin = val;
|
|
365
|
+
}
|
|
366
|
+
} catch {
|
|
367
|
+
}
|
|
368
|
+
await grantAudioPermissions(this.cdp, origin);
|
|
369
|
+
if (!this.bindingRegistered) {
|
|
370
|
+
await this.cdp.send("Runtime.addBinding", { name: INPUT_BINDING });
|
|
371
|
+
this.bindingRegistered = true;
|
|
372
|
+
}
|
|
373
|
+
await this.cdp.send("Page.addScriptToEvaluateOnNewDocument", {
|
|
374
|
+
source: AUDIO_INPUT_SCRIPT
|
|
375
|
+
});
|
|
376
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
377
|
+
expression: AUDIO_INPUT_SCRIPT,
|
|
378
|
+
awaitPromise: false,
|
|
379
|
+
userGesture: true
|
|
380
|
+
});
|
|
381
|
+
this.injected = true;
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Play audio bytes into the page's fake microphone.
|
|
385
|
+
* Accepts any format that Web Audio API can decode (WAV, MP3, OGG, etc.).
|
|
386
|
+
*
|
|
387
|
+
* @param audioData - Raw audio file bytes
|
|
388
|
+
* @param options - Playback options
|
|
389
|
+
*/
|
|
390
|
+
async play(audioData, options) {
|
|
391
|
+
if (!this.injected) {
|
|
392
|
+
await this.setup();
|
|
393
|
+
}
|
|
394
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
395
|
+
expression: `(function() {
|
|
396
|
+
var resumed = [];
|
|
397
|
+
(window.__bpTrackedAudioContexts || []).forEach(function(ctx) {
|
|
398
|
+
if (ctx.state === 'suspended') {
|
|
399
|
+
ctx.resume().then(function() {
|
|
400
|
+
console.log('[bp:input] Resumed suspended AudioContext (' + ctx.sampleRate + 'Hz)');
|
|
401
|
+
});
|
|
402
|
+
resumed.push(ctx.sampleRate);
|
|
403
|
+
}
|
|
404
|
+
});
|
|
405
|
+
// Also resume the input context itself
|
|
406
|
+
if (window.__bpAudioInput && window.__bpAudioInput.getContext) {
|
|
407
|
+
var inputCtx = window.__bpAudioInput.getContext();
|
|
408
|
+
if (inputCtx && inputCtx.state === 'suspended') {
|
|
409
|
+
inputCtx.resume().then(function() {
|
|
410
|
+
console.log('[bp:input] Resumed input AudioContext (' + inputCtx.sampleRate + 'Hz)');
|
|
411
|
+
});
|
|
412
|
+
resumed.push('input-' + inputCtx.sampleRate);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
return resumed.length > 0 ? 'resumed: ' + resumed.join(',') : 'all running';
|
|
416
|
+
})()`,
|
|
417
|
+
awaitPromise: false,
|
|
418
|
+
userGesture: true
|
|
419
|
+
});
|
|
420
|
+
const base64 = bufferToBase64(audioData);
|
|
421
|
+
const waitForEnd = options?.waitForEnd ?? true;
|
|
422
|
+
const timeout = options?.timeout ?? 6e4;
|
|
423
|
+
if (waitForEnd) {
|
|
424
|
+
const donePromise = this.waitForBinding(timeout);
|
|
425
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
426
|
+
expression: `window.__bpAudioInput.play('${base64}')`,
|
|
427
|
+
awaitPromise: false
|
|
428
|
+
});
|
|
429
|
+
await donePromise;
|
|
430
|
+
} else {
|
|
431
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
432
|
+
expression: `window.__bpAudioInput.play('${base64}')`,
|
|
433
|
+
awaitPromise: false
|
|
434
|
+
});
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* Stop any currently playing audio.
|
|
439
|
+
*/
|
|
440
|
+
async stop() {
|
|
441
|
+
if (!this.injected) return;
|
|
442
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
443
|
+
expression: "window.__bpAudioInput && window.__bpAudioInput.stop()",
|
|
444
|
+
awaitPromise: false
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
/**
|
|
448
|
+
* Get current state of the injected audio input system.
|
|
449
|
+
*/
|
|
450
|
+
async getState() {
|
|
451
|
+
if (!this.injected) {
|
|
452
|
+
return { contextState: "not-created", isPlaying: false, sampleRate: 0 };
|
|
453
|
+
}
|
|
454
|
+
const result = await this.cdp.send("Runtime.evaluate", {
|
|
455
|
+
expression: "window.__bpAudioInput ? window.__bpAudioInput.getState() : null",
|
|
456
|
+
returnByValue: true
|
|
457
|
+
});
|
|
458
|
+
return result.result.value ?? { contextState: "not-created", isPlaying: false, sampleRate: 0 };
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* Clean up: remove binding handler.
|
|
462
|
+
*/
|
|
463
|
+
async teardown() {
|
|
464
|
+
if (this.bindingHandler) {
|
|
465
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
466
|
+
this.bindingHandler = null;
|
|
467
|
+
}
|
|
468
|
+
await this.stop();
|
|
469
|
+
this.injected = false;
|
|
470
|
+
this.bindingRegistered = false;
|
|
471
|
+
}
|
|
472
|
+
/**
|
|
473
|
+
* Wait for the playback-complete binding to fire.
|
|
474
|
+
*/
|
|
475
|
+
waitForBinding(timeout) {
|
|
476
|
+
return new Promise((resolve, reject) => {
|
|
477
|
+
const timer = setTimeout(() => {
|
|
478
|
+
if (this.bindingHandler) {
|
|
479
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
480
|
+
this.bindingHandler = null;
|
|
481
|
+
}
|
|
482
|
+
reject(new Error(`AudioInput: playback timed out after ${timeout}ms`));
|
|
483
|
+
}, timeout);
|
|
484
|
+
if (this.bindingHandler) {
|
|
485
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
486
|
+
}
|
|
487
|
+
this.bindingHandler = (params) => {
|
|
488
|
+
if (params["name"] === INPUT_BINDING) {
|
|
489
|
+
clearTimeout(timer);
|
|
490
|
+
if (this.bindingHandler) {
|
|
491
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
492
|
+
this.bindingHandler = null;
|
|
493
|
+
}
|
|
494
|
+
resolve();
|
|
495
|
+
}
|
|
496
|
+
};
|
|
497
|
+
this.cdp.on("Runtime.bindingCalled", this.bindingHandler);
|
|
498
|
+
});
|
|
499
|
+
}
|
|
500
|
+
};
|
|
501
|
+
|
|
502
|
+
// src/audio/output.ts
|
|
503
|
+
var OUTPUT_BINDING = "__bpAudioOutputData";
|
|
504
|
+
var AUDIO_OUTPUT_SCRIPT = `
|
|
505
|
+
(function() {
|
|
506
|
+
// If already installed, stop any active capture but allow re-initialization
|
|
507
|
+
// so that updated scripts (e.g. with new capture strategies) take effect.
|
|
508
|
+
if (window.__bpAudioOutput) {
|
|
509
|
+
if (window.__bpAudioOutput.isCapturing()) window.__bpAudioOutput.stop();
|
|
510
|
+
// Keep existing allAudioContexts if available (preserves pre-override tracking)
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
var BUFFER_SIZE = 4096;
|
|
514
|
+
var FLUSH_SAMPLES = 48000; // flush every ~1s at 48kHz (scales with sample rate)
|
|
515
|
+
var capturing = false;
|
|
516
|
+
var capturedChunks = [];
|
|
517
|
+
var totalSamples = 0;
|
|
518
|
+
var flushCount = 0;
|
|
519
|
+
var pendingTracks = [];
|
|
520
|
+
var tappedTrackIds = {};
|
|
521
|
+
|
|
522
|
+
// --- Per-context tap infrastructure ---
|
|
523
|
+
// Preserve any AudioContexts tracked by a previous script version
|
|
524
|
+
var allAudioContexts = window.__bpTrackedAudioContexts || [];
|
|
525
|
+
// Use a WeakMap to associate taps with AudioContext instances
|
|
526
|
+
// (native objects like AudioContext may not support custom properties)
|
|
527
|
+
var contextTapMap = typeof WeakMap !== 'undefined' ? new WeakMap() : null;
|
|
528
|
+
var contextTapList = []; // fallback: [{ctx, proc}]
|
|
529
|
+
|
|
530
|
+
var OrigAudioContext = window.__bpOrigAudioContext || window.AudioContext || window.webkitAudioContext;
|
|
531
|
+
// Save the native connect function once; on re-injection, reuse it to avoid double-wrapping
|
|
532
|
+
var origConnect = window.__bpOrigConnect || AudioNode.prototype.connect;
|
|
533
|
+
window.__bpOrigConnect = origConnect;
|
|
534
|
+
|
|
535
|
+
// Our own capture context (48kHz) for WebRTC tracks and media elements
|
|
536
|
+
var captureCtx = null;
|
|
537
|
+
var captureProcessor = null;
|
|
538
|
+
|
|
539
|
+
// Save original AudioContext constructor once
|
|
540
|
+
if (!window.__bpOrigAudioContext) {
|
|
541
|
+
window.__bpOrigAudioContext = OrigAudioContext;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// Override AudioContext constructor to track all instances (skip if already overridden)
|
|
545
|
+
if (OrigAudioContext && !window.__bpAudioContextOverridden) {
|
|
546
|
+
window.__bpAudioContextOverridden = true;
|
|
547
|
+
window.AudioContext = function() {
|
|
548
|
+
var ctx = new (Function.prototype.bind.apply(OrigAudioContext, [null].concat(Array.prototype.slice.call(arguments))))();
|
|
549
|
+
allAudioContexts.push(ctx);
|
|
550
|
+
// Auto-resume suspended contexts \u2014 CDP automation has no user gesture,
|
|
551
|
+
// so Chrome suspends new AudioContexts by default. Without this, voice
|
|
552
|
+
// agents' ScriptProcessorNodes never fire and no audio flows.
|
|
553
|
+
if (ctx.state === 'suspended') {
|
|
554
|
+
console.log('[bp:output] AudioContext created suspended (' + ctx.sampleRate + 'Hz), auto-resuming...');
|
|
555
|
+
ctx.resume().then(function() {
|
|
556
|
+
console.log('[bp:output] AudioContext resumed successfully (' + ctx.sampleRate + 'Hz, state: ' + ctx.state + ')');
|
|
557
|
+
}).catch(function(e) {
|
|
558
|
+
console.warn('[bp:output] AudioContext resume failed (' + ctx.sampleRate + 'Hz):', e);
|
|
559
|
+
});
|
|
560
|
+
} else {
|
|
561
|
+
console.log('[bp:output] AudioContext created (' + ctx.sampleRate + 'Hz, state: ' + ctx.state + ')');
|
|
562
|
+
}
|
|
563
|
+
return ctx;
|
|
564
|
+
};
|
|
565
|
+
window.AudioContext.prototype = OrigAudioContext.prototype;
|
|
566
|
+
Object.keys(OrigAudioContext).forEach(function(k) {
|
|
567
|
+
try { window.AudioContext[k] = OrigAudioContext[k]; } catch(e) {}
|
|
568
|
+
});
|
|
569
|
+
if (window.webkitAudioContext) {
|
|
570
|
+
window.webkitAudioContext = window.AudioContext;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// Expose tracked contexts on window so re-injections preserve them
|
|
575
|
+
window.__bpTrackedAudioContexts = allAudioContexts;
|
|
576
|
+
|
|
577
|
+
// Look up an existing tap for a given AudioContext
|
|
578
|
+
function findTap(ctx) {
|
|
579
|
+
if (contextTapMap) return contextTapMap.get(ctx) || null;
|
|
580
|
+
for (var i = 0; i < contextTapList.length; i++) {
|
|
581
|
+
if (contextTapList[i].ctx === ctx) return contextTapList[i].proc;
|
|
582
|
+
}
|
|
583
|
+
return null;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Store a tap for a given AudioContext
|
|
587
|
+
function storeTap(ctx, proc) {
|
|
588
|
+
if (contextTapMap) { contextTapMap.set(ctx, proc); }
|
|
589
|
+
else { contextTapList.push({ ctx: ctx, proc: proc }); }
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
// Count stored taps
|
|
593
|
+
function tapCount() {
|
|
594
|
+
if (contextTapMap) {
|
|
595
|
+
var count = 0;
|
|
596
|
+
for (var i = 0; i < allAudioContexts.length; i++) {
|
|
597
|
+
if (contextTapMap.has(allAudioContexts[i])) count++;
|
|
598
|
+
}
|
|
599
|
+
return count;
|
|
600
|
+
}
|
|
601
|
+
return contextTapList.length;
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
// Create or retrieve a ScriptProcessorNode tap for a specific AudioContext.
|
|
605
|
+
// The tap lives in the SAME context as the source, avoiding cross-context errors.
|
|
606
|
+
function getOrCreateTap(ctx) {
|
|
607
|
+
var existing = findTap(ctx);
|
|
608
|
+
if (existing) return existing;
|
|
609
|
+
|
|
610
|
+
try {
|
|
611
|
+
if (ctx.state === 'closed') return null;
|
|
612
|
+
var channels = Math.min(ctx.destination.channelCount || 2, 2);
|
|
613
|
+
if (channels < 1) channels = 1;
|
|
614
|
+
var proc = ctx.createScriptProcessor(BUFFER_SIZE, channels, channels);
|
|
615
|
+
proc.onaudioprocess = function(e) {
|
|
616
|
+
if (!capturing) return;
|
|
617
|
+
var left = new Float32Array(e.inputBuffer.getChannelData(0));
|
|
618
|
+
var right = e.inputBuffer.numberOfChannels > 1
|
|
619
|
+
? new Float32Array(e.inputBuffer.getChannelData(1))
|
|
620
|
+
: new Float32Array(left.length);
|
|
621
|
+
capturedChunks.push({ left: left, right: right, sampleRate: ctx.sampleRate });
|
|
622
|
+
totalSamples += left.length;
|
|
623
|
+
if (totalSamples >= FLUSH_SAMPLES) {
|
|
624
|
+
flushToNodeJs();
|
|
625
|
+
}
|
|
626
|
+
};
|
|
627
|
+
// Must connect to destination to keep ScriptProcessorNode alive
|
|
628
|
+
origConnect.call(proc, ctx.destination);
|
|
629
|
+
storeTap(ctx, proc);
|
|
630
|
+
return proc;
|
|
631
|
+
} catch(e) {
|
|
632
|
+
return null;
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Override AudioNode.prototype.connect to tap connections to any AudioDestinationNode
|
|
637
|
+
AudioNode.prototype.connect = function(destination) {
|
|
638
|
+
var result = origConnect.apply(this, arguments);
|
|
639
|
+
|
|
640
|
+
if (capturing && destination instanceof AudioDestinationNode) {
|
|
641
|
+
try {
|
|
642
|
+
var tap = getOrCreateTap(destination.context);
|
|
643
|
+
// Don't connect the tap to itself
|
|
644
|
+
if (tap && tap !== this) {
|
|
645
|
+
origConnect.call(this, tap);
|
|
646
|
+
}
|
|
647
|
+
} catch(e) {}
|
|
648
|
+
}
|
|
649
|
+
return result;
|
|
650
|
+
};
|
|
651
|
+
|
|
652
|
+
var origPlay = window.__bpOrigPlay || HTMLMediaElement.prototype.play;
|
|
653
|
+
window.__bpOrigPlay = origPlay;
|
|
654
|
+
HTMLMediaElement.prototype.play = function() {
|
|
655
|
+
if (capturing && !this.__bpCaptured) {
|
|
656
|
+
this.__bpCaptured = true;
|
|
657
|
+
try {
|
|
658
|
+
if (!captureCtx) initCaptureCtx();
|
|
659
|
+
var stream = this.captureStream ? this.captureStream() : null;
|
|
660
|
+
if (stream && captureCtx) {
|
|
661
|
+
var source = captureCtx.createMediaStreamSource(stream);
|
|
662
|
+
origConnect.call(source, captureProcessor);
|
|
663
|
+
}
|
|
664
|
+
} catch(e) {}
|
|
665
|
+
}
|
|
666
|
+
return origPlay.apply(this, arguments);
|
|
667
|
+
};
|
|
668
|
+
|
|
669
|
+
// Intercept srcObject assignment to catch WebRTC streams attached to media elements
|
|
670
|
+
var origSrcObjectDesc = Object.getOwnPropertyDescriptor(HTMLMediaElement.prototype, 'srcObject');
|
|
671
|
+
if (origSrcObjectDesc && origSrcObjectDesc.set) {
|
|
672
|
+
Object.defineProperty(HTMLMediaElement.prototype, 'srcObject', {
|
|
673
|
+
set: function(stream) {
|
|
674
|
+
origSrcObjectDesc.set.call(this, stream);
|
|
675
|
+
if (stream && stream.getAudioTracks) {
|
|
676
|
+
var tracks = stream.getAudioTracks();
|
|
677
|
+
for (var i = 0; i < tracks.length; i++) {
|
|
678
|
+
if (capturing) {
|
|
679
|
+
tapAudioTrack(tracks[i]);
|
|
680
|
+
} else {
|
|
681
|
+
pendingTracks.push(tracks[i]);
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
},
|
|
686
|
+
get: origSrcObjectDesc.get,
|
|
687
|
+
configurable: true
|
|
688
|
+
});
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
// Initialize our own 48kHz capture context for WebRTC and media element tapping
|
|
692
|
+
function initCaptureCtx() {
|
|
693
|
+
captureCtx = new OrigAudioContext({ sampleRate: 48000 });
|
|
694
|
+
captureProcessor = captureCtx.createScriptProcessor(BUFFER_SIZE, 2, 2);
|
|
695
|
+
captureProcessor.onaudioprocess = function(e) {
|
|
696
|
+
if (!capturing) return;
|
|
697
|
+
var left = new Float32Array(e.inputBuffer.getChannelData(0));
|
|
698
|
+
var right = new Float32Array(e.inputBuffer.getChannelData(1));
|
|
699
|
+
capturedChunks.push({ left: left, right: right, sampleRate: 48000 });
|
|
700
|
+
totalSamples += left.length;
|
|
701
|
+
if (totalSamples >= FLUSH_SAMPLES) {
|
|
702
|
+
flushToNodeJs();
|
|
703
|
+
}
|
|
704
|
+
};
|
|
705
|
+
origConnect.call(captureProcessor, captureCtx.destination);
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
function uint8ToBase64(bytes) {
|
|
709
|
+
var CHUNK = 8192;
|
|
710
|
+
var parts = [];
|
|
711
|
+
for (var i = 0; i < bytes.length; i += CHUNK) {
|
|
712
|
+
var slice = bytes.subarray(i, Math.min(i + CHUNK, bytes.length));
|
|
713
|
+
var binary = '';
|
|
714
|
+
for (var j = 0; j < slice.length; j++) {
|
|
715
|
+
binary += String.fromCharCode(slice[j]);
|
|
716
|
+
}
|
|
717
|
+
parts.push(binary);
|
|
718
|
+
}
|
|
719
|
+
return btoa(parts.join(''));
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
function flushGroup(chunks, rate) {
|
|
723
|
+
var totalLen = 0;
|
|
724
|
+
for (var i = 0; i < chunks.length; i++) {
|
|
725
|
+
totalLen += chunks[i].left.length;
|
|
726
|
+
}
|
|
727
|
+
if (totalLen === 0) return;
|
|
728
|
+
|
|
729
|
+
var left = new Float32Array(totalLen);
|
|
730
|
+
var right = new Float32Array(totalLen);
|
|
731
|
+
var offset = 0;
|
|
732
|
+
for (var i = 0; i < chunks.length; i++) {
|
|
733
|
+
left.set(chunks[i].left, offset);
|
|
734
|
+
right.set(chunks[i].right, offset);
|
|
735
|
+
offset += chunks[i].left.length;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
var leftB64 = uint8ToBase64(new Uint8Array(left.buffer));
|
|
739
|
+
var rightB64 = uint8ToBase64(new Uint8Array(right.buffer));
|
|
740
|
+
|
|
741
|
+
flushCount++;
|
|
742
|
+
|
|
743
|
+
try {
|
|
744
|
+
if (typeof window.__bpAudioOutputData === 'function') {
|
|
745
|
+
window.__bpAudioOutputData(JSON.stringify({
|
|
746
|
+
left: leftB64,
|
|
747
|
+
right: rightB64,
|
|
748
|
+
sampleRate: rate,
|
|
749
|
+
samples: totalLen
|
|
750
|
+
}));
|
|
751
|
+
}
|
|
752
|
+
} catch(e) {}
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
function flushToNodeJs() {
|
|
756
|
+
if (capturedChunks.length === 0) return;
|
|
757
|
+
|
|
758
|
+
// Group chunks by sample rate to avoid mixing different-rate audio
|
|
759
|
+
var byRate = {};
|
|
760
|
+
for (var i = 0; i < capturedChunks.length; i++) {
|
|
761
|
+
var rate = capturedChunks[i].sampleRate || 48000;
|
|
762
|
+
if (!byRate[rate]) byRate[rate] = [];
|
|
763
|
+
byRate[rate].push(capturedChunks[i]);
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// Flush each sample rate group separately
|
|
767
|
+
for (var rateKey in byRate) {
|
|
768
|
+
if (byRate.hasOwnProperty(rateKey)) {
|
|
769
|
+
flushGroup(byRate[rateKey], Number(rateKey));
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
capturedChunks = [];
|
|
774
|
+
totalSamples = 0;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
// --- WebRTC interception (for apps that use RTCPeerConnection) ---
|
|
778
|
+
var rtcTrackedStreams = [];
|
|
779
|
+
var rtcPeerConnections = [];
|
|
780
|
+
|
|
781
|
+
function tapAudioTrack(track) {
|
|
782
|
+
try {
|
|
783
|
+
if (tappedTrackIds[track.id]) return;
|
|
784
|
+
tappedTrackIds[track.id] = true;
|
|
785
|
+
if (!captureCtx) initCaptureCtx();
|
|
786
|
+
var stream = new MediaStream([track]);
|
|
787
|
+
var source = captureCtx.createMediaStreamSource(stream);
|
|
788
|
+
origConnect.call(source, captureProcessor);
|
|
789
|
+
rtcTrackedStreams.push(source);
|
|
790
|
+
} catch(e) {}
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
function tapExistingPeerConnection(pc) {
|
|
794
|
+
try {
|
|
795
|
+
var receivers = pc.getReceivers ? pc.getReceivers() : [];
|
|
796
|
+
for (var i = 0; i < receivers.length; i++) {
|
|
797
|
+
if (receivers[i].track && receivers[i].track.kind === 'audio') {
|
|
798
|
+
tapAudioTrack(receivers[i].track);
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
} catch(e) {}
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
if (typeof RTCPeerConnection !== 'undefined') {
|
|
805
|
+
var OrigRTC = RTCPeerConnection;
|
|
806
|
+
|
|
807
|
+
window.RTCPeerConnection = function() {
|
|
808
|
+
var pc = new (Function.prototype.bind.apply(OrigRTC, [null].concat(Array.prototype.slice.call(arguments))))();
|
|
809
|
+
rtcPeerConnections.push(pc);
|
|
810
|
+
|
|
811
|
+
pc.addEventListener('track', function(event) {
|
|
812
|
+
if (event.track && event.track.kind === 'audio') {
|
|
813
|
+
if (capturing) {
|
|
814
|
+
tapAudioTrack(event.track);
|
|
815
|
+
} else {
|
|
816
|
+
pendingTracks.push(event.track);
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
});
|
|
820
|
+
|
|
821
|
+
return pc;
|
|
822
|
+
};
|
|
823
|
+
window.RTCPeerConnection.prototype = OrigRTC.prototype;
|
|
824
|
+
Object.keys(OrigRTC).forEach(function(k) {
|
|
825
|
+
try { window.RTCPeerConnection[k] = OrigRTC[k]; } catch(e) {}
|
|
826
|
+
});
|
|
827
|
+
|
|
828
|
+
window.__bpTrackedPCs = rtcPeerConnections;
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
window.__bpAudioOutput = {
|
|
832
|
+
start: function() {
|
|
833
|
+
capturing = true;
|
|
834
|
+
capturedChunks = [];
|
|
835
|
+
totalSamples = 0;
|
|
836
|
+
flushCount = 0;
|
|
837
|
+
tappedTrackIds = {};
|
|
838
|
+
|
|
839
|
+
// Resume any suspended capture context
|
|
840
|
+
if (captureCtx && captureCtx.state === 'suspended') captureCtx.resume();
|
|
841
|
+
|
|
842
|
+
// Create taps for all tracked AudioContexts (catches contexts created before capture)
|
|
843
|
+
for (var i = 0; i < allAudioContexts.length; i++) {
|
|
844
|
+
var ctx = allAudioContexts[i];
|
|
845
|
+
if (ctx.state !== 'closed') {
|
|
846
|
+
getOrCreateTap(ctx);
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
// Drain pending WebRTC tracks
|
|
851
|
+
for (var j = 0; j < pendingTracks.length; j++) {
|
|
852
|
+
tapAudioTrack(pendingTracks[j]);
|
|
853
|
+
}
|
|
854
|
+
pendingTracks = [];
|
|
855
|
+
|
|
856
|
+
// Tap existing peer connections
|
|
857
|
+
for (var k = 0; k < rtcPeerConnections.length; k++) {
|
|
858
|
+
tapExistingPeerConnection(rtcPeerConnections[k]);
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
// Scan existing media elements for srcObject with audio tracks
|
|
862
|
+
var mediaEls = document.querySelectorAll('audio, video');
|
|
863
|
+
for (var i = 0; i < mediaEls.length; i++) {
|
|
864
|
+
var el = mediaEls[i];
|
|
865
|
+
if (el.srcObject && el.srcObject.getAudioTracks && !el.__bpCaptured) {
|
|
866
|
+
el.__bpCaptured = true;
|
|
867
|
+
var tracks = el.srcObject.getAudioTracks();
|
|
868
|
+
for (var j = 0; j < tracks.length; j++) {
|
|
869
|
+
tapAudioTrack(tracks[j]);
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
// Watch for dynamically added media elements with srcObject
|
|
875
|
+
if (typeof MutationObserver !== 'undefined') {
|
|
876
|
+
if (window.__bpMediaObserver) {
|
|
877
|
+
window.__bpMediaObserver.disconnect();
|
|
878
|
+
}
|
|
879
|
+
window.__bpMediaObserver = new MutationObserver(function(mutations) {
|
|
880
|
+
for (var i = 0; i < mutations.length; i++) {
|
|
881
|
+
var added = mutations[i].addedNodes;
|
|
882
|
+
for (var j = 0; j < added.length; j++) {
|
|
883
|
+
var node = added[j];
|
|
884
|
+
if (node.nodeType !== 1) continue;
|
|
885
|
+
var els = [];
|
|
886
|
+
if (node.tagName === 'AUDIO' || node.tagName === 'VIDEO') els.push(node);
|
|
887
|
+
else if (node.querySelectorAll) {
|
|
888
|
+
var nested = node.querySelectorAll('audio, video');
|
|
889
|
+
for (var k = 0; k < nested.length; k++) els.push(nested[k]);
|
|
890
|
+
}
|
|
891
|
+
for (var m = 0; m < els.length; m++) {
|
|
892
|
+
var el = els[m];
|
|
893
|
+
if (el.srcObject && el.srcObject.getAudioTracks && !el.__bpCaptured) {
|
|
894
|
+
el.__bpCaptured = true;
|
|
895
|
+
var tracks = el.srcObject.getAudioTracks();
|
|
896
|
+
for (var t = 0; t < tracks.length; t++) tapAudioTrack(tracks[t]);
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
});
|
|
902
|
+
window.__bpMediaObserver.observe(document, { childList: true, subtree: true });
|
|
903
|
+
}
|
|
904
|
+
},
|
|
905
|
+
stop: function() {
|
|
906
|
+
capturing = false;
|
|
907
|
+
flushToNodeJs();
|
|
908
|
+
// Disconnect MutationObserver
|
|
909
|
+
if (window.__bpMediaObserver) {
|
|
910
|
+
window.__bpMediaObserver.disconnect();
|
|
911
|
+
window.__bpMediaObserver = null;
|
|
912
|
+
}
|
|
913
|
+
},
|
|
914
|
+
isCapturing: function() { return capturing; },
|
|
915
|
+
getBufferedSamples: function() { return totalSamples; },
|
|
916
|
+
tapPC: function(pc) {
|
|
917
|
+
if (!pc || typeof pc.getReceivers !== 'function') return false;
|
|
918
|
+
if (rtcPeerConnections.indexOf(pc) === -1) {
|
|
919
|
+
rtcPeerConnections.push(pc);
|
|
920
|
+
}
|
|
921
|
+
if (capturing) {
|
|
922
|
+
tapExistingPeerConnection(pc);
|
|
923
|
+
}
|
|
924
|
+
pc.addEventListener('track', function(event) {
|
|
925
|
+
if (event.track && event.track.kind === 'audio') {
|
|
926
|
+
if (capturing) {
|
|
927
|
+
tapAudioTrack(event.track);
|
|
928
|
+
} else {
|
|
929
|
+
pendingTracks.push(event.track);
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
});
|
|
933
|
+
return true;
|
|
934
|
+
},
|
|
935
|
+
getStats: function() {
|
|
936
|
+
return {
|
|
937
|
+
audioContexts: allAudioContexts.filter(function(c) { return c.state !== 'closed'; }).length,
|
|
938
|
+
contextTaps: tapCount(),
|
|
939
|
+
audioNodes: captureCtx ? captureCtx.destination.numberOfInputs : 0,
|
|
940
|
+
rtcConnections: rtcPeerConnections.length,
|
|
941
|
+
mediaElements: document.querySelectorAll('audio, video').length,
|
|
942
|
+
pendingTracks: pendingTracks.length,
|
|
943
|
+
tappedTracks: Object.keys(tappedTrackIds).length,
|
|
944
|
+
capturing: capturing,
|
|
945
|
+
bufferedSamples: totalSamples,
|
|
946
|
+
rtcDetails: rtcPeerConnections.map(function(pc) {
|
|
947
|
+
try {
|
|
948
|
+
var receivers = pc.getReceivers ? pc.getReceivers() : [];
|
|
949
|
+
var senders = pc.getSenders ? pc.getSenders() : [];
|
|
950
|
+
var audioReceivers = receivers.filter(function(r) { return r.track && r.track.kind === 'audio'; }).length;
|
|
951
|
+
var audioSenders = senders.filter(function(s) { return s.track && s.track.kind === 'audio'; }).length;
|
|
952
|
+
return {
|
|
953
|
+
state: pc.connectionState || pc.iceConnectionState || 'unknown',
|
|
954
|
+
audioReceivers: audioReceivers,
|
|
955
|
+
audioSenders: audioSenders,
|
|
956
|
+
tapped: receivers.some(function(r) { return r.track && tappedTrackIds[r.track.id]; })
|
|
957
|
+
};
|
|
958
|
+
} catch(e) { return { state: 'error', audioReceivers: 0, audioSenders: 0, tapped: false }; }
|
|
959
|
+
}),
|
|
960
|
+
mediaElementDetails: (function() {
|
|
961
|
+
try {
|
|
962
|
+
var els = document.querySelectorAll('audio, video');
|
|
963
|
+
var details = [];
|
|
964
|
+
for (var i = 0; i < els.length; i++) {
|
|
965
|
+
var el = els[i];
|
|
966
|
+
var hasSrcObject = !!(el.srcObject);
|
|
967
|
+
var audioTracks = 0;
|
|
968
|
+
if (el.srcObject && el.srcObject.getAudioTracks) {
|
|
969
|
+
audioTracks = el.srcObject.getAudioTracks().length;
|
|
970
|
+
}
|
|
971
|
+
details.push({
|
|
972
|
+
tag: el.tagName.toLowerCase(),
|
|
973
|
+
hasSrcObject: hasSrcObject,
|
|
974
|
+
hasSrc: !!(el.src || el.currentSrc),
|
|
975
|
+
audioTracks: audioTracks,
|
|
976
|
+
tapped: !!(el.__bpCaptured)
|
|
977
|
+
});
|
|
978
|
+
}
|
|
979
|
+
return details;
|
|
980
|
+
} catch(e) { return []; }
|
|
981
|
+
})()
|
|
982
|
+
};
|
|
983
|
+
}
|
|
984
|
+
};
|
|
985
|
+
})();
|
|
986
|
+
`;
|
|
987
|
+
var AudioOutput = class {
|
|
988
|
+
cdp;
|
|
989
|
+
chunks = [];
|
|
990
|
+
injected = false;
|
|
991
|
+
capturing = false;
|
|
992
|
+
bindingHandler = null;
|
|
993
|
+
onChunkHandler;
|
|
994
|
+
onDiagHandler;
|
|
995
|
+
/** Timestamp of the first non-silent chunk received */
|
|
996
|
+
firstChunkTime = null;
|
|
997
|
+
constructor(cdp) {
|
|
998
|
+
this.cdp = cdp;
|
|
999
|
+
}
|
|
1000
|
+
/** Whether the audio output system has been set up */
|
|
1001
|
+
get isSetup() {
|
|
1002
|
+
return this.injected;
|
|
1003
|
+
}
|
|
1004
|
+
/** Whether audio is currently being captured */
|
|
1005
|
+
get isCapturing() {
|
|
1006
|
+
return this.capturing;
|
|
1007
|
+
}
|
|
1008
|
+
/**
|
|
1009
|
+
* Set up audio output capture.
|
|
1010
|
+
* Registers bindings and injects the capture script.
|
|
1011
|
+
*/
|
|
1012
|
+
async setup() {
|
|
1013
|
+
if (this.injected) return;
|
|
1014
|
+
await this.cdp.send("Runtime.addBinding", { name: OUTPUT_BINDING });
|
|
1015
|
+
this.bindingHandler = (params) => {
|
|
1016
|
+
if (params["name"] === OUTPUT_BINDING) {
|
|
1017
|
+
this.handleAudioData(params["payload"]);
|
|
1018
|
+
}
|
|
1019
|
+
};
|
|
1020
|
+
this.cdp.on("Runtime.bindingCalled", this.bindingHandler);
|
|
1021
|
+
await this.cdp.send("Page.addScriptToEvaluateOnNewDocument", {
|
|
1022
|
+
source: AUDIO_OUTPUT_SCRIPT
|
|
1023
|
+
});
|
|
1024
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1025
|
+
expression: AUDIO_OUTPUT_SCRIPT,
|
|
1026
|
+
awaitPromise: false,
|
|
1027
|
+
userGesture: true
|
|
1028
|
+
});
|
|
1029
|
+
this.injected = true;
|
|
1030
|
+
}
|
|
1031
|
+
/**
|
|
1032
|
+
* Start capturing audio output.
|
|
1033
|
+
*/
|
|
1034
|
+
async start() {
|
|
1035
|
+
if (!this.injected) {
|
|
1036
|
+
await this.setup();
|
|
1037
|
+
}
|
|
1038
|
+
this.chunks = [];
|
|
1039
|
+
this.firstChunkTime = null;
|
|
1040
|
+
this.capturing = true;
|
|
1041
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1042
|
+
expression: `(function() {
|
|
1043
|
+
var resumed = [];
|
|
1044
|
+
(window.__bpTrackedAudioContexts || []).forEach(function(ctx) {
|
|
1045
|
+
if (ctx.state === 'suspended') {
|
|
1046
|
+
ctx.resume().then(function() {
|
|
1047
|
+
console.log('[bp:output] Resumed AudioContext (' + ctx.sampleRate + 'Hz) before capture');
|
|
1048
|
+
});
|
|
1049
|
+
resumed.push(ctx.sampleRate);
|
|
1050
|
+
}
|
|
1051
|
+
});
|
|
1052
|
+
if (window.__bpAudioInput && window.__bpAudioInput.getContext) {
|
|
1053
|
+
var inputCtx = window.__bpAudioInput.getContext();
|
|
1054
|
+
if (inputCtx && inputCtx.state === 'suspended') {
|
|
1055
|
+
inputCtx.resume();
|
|
1056
|
+
resumed.push('input-' + inputCtx.sampleRate);
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
if (resumed.length) console.log('[bp:output] Resumed ' + resumed.length + ' contexts: ' + resumed.join(', '));
|
|
1060
|
+
})()`,
|
|
1061
|
+
awaitPromise: false,
|
|
1062
|
+
userGesture: true
|
|
1063
|
+
});
|
|
1064
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1065
|
+
expression: "window.__bpAudioOutput && window.__bpAudioOutput.start()",
|
|
1066
|
+
awaitPromise: false
|
|
1067
|
+
});
|
|
1068
|
+
await this.discoverExistingPeerConnections();
|
|
1069
|
+
if (this.onDiagHandler) {
|
|
1070
|
+
try {
|
|
1071
|
+
const statsResult = await this.cdp.send(
|
|
1072
|
+
"Runtime.evaluate",
|
|
1073
|
+
{
|
|
1074
|
+
expression: "window.__bpAudioOutput && window.__bpAudioOutput.getStats()",
|
|
1075
|
+
returnByValue: true
|
|
1076
|
+
}
|
|
1077
|
+
);
|
|
1078
|
+
const stats = statsResult.result.value;
|
|
1079
|
+
if (stats) {
|
|
1080
|
+
this.onDiagHandler(
|
|
1081
|
+
`started \u2014 ${stats["audioContexts"]} AudioContexts, ${stats["contextTaps"]} taps, ${stats["rtcConnections"]} RTCPeerConnections, ${stats["mediaElements"]} MediaElements, ${stats["tappedTracks"]} tapped tracks`
|
|
1082
|
+
);
|
|
1083
|
+
}
|
|
1084
|
+
} catch {
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
/**
|
|
1089
|
+
* Stop capturing and return all collected audio.
|
|
1090
|
+
*/
|
|
1091
|
+
async stop() {
|
|
1092
|
+
if (!this.injected) {
|
|
1093
|
+
return emptyCaptureResult();
|
|
1094
|
+
}
|
|
1095
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1096
|
+
expression: "window.__bpAudioOutput && window.__bpAudioOutput.stop()",
|
|
1097
|
+
awaitPromise: false
|
|
1098
|
+
});
|
|
1099
|
+
this.capturing = false;
|
|
1100
|
+
await sleep(250);
|
|
1101
|
+
return this.mergeChunks();
|
|
1102
|
+
}
|
|
1103
|
+
/**
|
|
1104
|
+
* Capture audio until silence is detected.
|
|
1105
|
+
*
|
|
1106
|
+
* Two-phase approach:
|
|
1107
|
+
* 1. **Wait phase**: Wait up to `maxDuration` for the first non-silent chunk.
|
|
1108
|
+
* The silence countdown does NOT tick during this phase, so slow voice agents
|
|
1109
|
+
* (STT → LLM → TTS can take 5-15s) don't cause premature timeout.
|
|
1110
|
+
* 2. **Capture phase**: Once audio is detected, capture until `silenceTimeout` ms
|
|
1111
|
+
* of consecutive silence pass, then stop.
|
|
1112
|
+
*/
|
|
1113
|
+
async captureUntilSilence(options) {
|
|
1114
|
+
const silenceTimeout = options?.silenceTimeout ?? 1500;
|
|
1115
|
+
const silenceThreshold = options?.silenceThreshold ?? 0.01;
|
|
1116
|
+
const maxDuration = options?.maxDuration ?? 3e5;
|
|
1117
|
+
const noAudioTimeout = options?.noAudioTimeout ?? 15e3;
|
|
1118
|
+
if (!this.capturing) {
|
|
1119
|
+
await this.start();
|
|
1120
|
+
}
|
|
1121
|
+
return new Promise((resolve) => {
|
|
1122
|
+
let heardAudio = false;
|
|
1123
|
+
let lastSoundTime = 0;
|
|
1124
|
+
const startTime = Date.now();
|
|
1125
|
+
const checkInterval = setInterval(async () => {
|
|
1126
|
+
const elapsed = Date.now() - startTime;
|
|
1127
|
+
if (elapsed > maxDuration) {
|
|
1128
|
+
clearInterval(checkInterval);
|
|
1129
|
+
this.onDiagHandler?.(`max duration reached (${maxDuration}ms), stopping`);
|
|
1130
|
+
resolve(await this.stop());
|
|
1131
|
+
return;
|
|
1132
|
+
}
|
|
1133
|
+
const latest = this.chunks[this.chunks.length - 1];
|
|
1134
|
+
if (latest) {
|
|
1135
|
+
const rms = calculateRMS(latest.left);
|
|
1136
|
+
if (rms > silenceThreshold) {
|
|
1137
|
+
if (!heardAudio) {
|
|
1138
|
+
heardAudio = true;
|
|
1139
|
+
this.onDiagHandler?.("first audio detected \u2014 silence countdown begins");
|
|
1140
|
+
}
|
|
1141
|
+
lastSoundTime = Date.now();
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
if (!heardAudio && elapsed > noAudioTimeout) {
|
|
1145
|
+
clearInterval(checkInterval);
|
|
1146
|
+
this.onDiagHandler?.(`no audio detected after ${noAudioTimeout}ms, stopping early`);
|
|
1147
|
+
resolve(await this.stop());
|
|
1148
|
+
return;
|
|
1149
|
+
}
|
|
1150
|
+
if (heardAudio && Date.now() - lastSoundTime > silenceTimeout) {
|
|
1151
|
+
clearInterval(checkInterval);
|
|
1152
|
+
resolve(await this.stop());
|
|
1153
|
+
}
|
|
1154
|
+
}, 200);
|
|
1155
|
+
});
|
|
1156
|
+
}
|
|
1157
|
+
/**
|
|
1158
|
+
* Subscribe to real-time audio chunks as they arrive.
|
|
1159
|
+
*/
|
|
1160
|
+
onData(handler) {
|
|
1161
|
+
this.onChunkHandler = handler;
|
|
1162
|
+
}
|
|
1163
|
+
/**
|
|
1164
|
+
* Subscribe to diagnostic messages (for --verbose).
|
|
1165
|
+
*/
|
|
1166
|
+
onDiag(handler) {
|
|
1167
|
+
this.onDiagHandler = handler;
|
|
1168
|
+
}
|
|
1169
|
+
/**
|
|
1170
|
+
* Clean up: remove binding handler.
|
|
1171
|
+
*/
|
|
1172
|
+
async teardown() {
|
|
1173
|
+
if (this.capturing) {
|
|
1174
|
+
await this.stop();
|
|
1175
|
+
}
|
|
1176
|
+
if (this.bindingHandler) {
|
|
1177
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
1178
|
+
this.bindingHandler = null;
|
|
1179
|
+
}
|
|
1180
|
+
this.onChunkHandler = void 0;
|
|
1181
|
+
this.onDiagHandler = void 0;
|
|
1182
|
+
this.injected = false;
|
|
1183
|
+
}
|
|
1184
|
+
/**
|
|
1185
|
+
* Use CDP Runtime.queryObjects to find RTCPeerConnection instances
|
|
1186
|
+
* that were created before our override was injected, and tap their audio tracks.
|
|
1187
|
+
*/
|
|
1188
|
+
async discoverExistingPeerConnections() {
|
|
1189
|
+
try {
|
|
1190
|
+
const protoResult = await this.cdp.send("Runtime.evaluate", {
|
|
1191
|
+
expression: 'typeof RTCPeerConnection !== "undefined" ? RTCPeerConnection.prototype : null',
|
|
1192
|
+
returnByValue: false
|
|
1193
|
+
});
|
|
1194
|
+
const protoId = protoResult.result.objectId;
|
|
1195
|
+
if (!protoId) return;
|
|
1196
|
+
const queryResult = await this.cdp.send("Runtime.queryObjects", {
|
|
1197
|
+
prototypeObjectId: protoId
|
|
1198
|
+
});
|
|
1199
|
+
const arrayId = queryResult.objects.objectId;
|
|
1200
|
+
if (!arrayId) return;
|
|
1201
|
+
const propsResult = await this.cdp.send("Runtime.getProperties", {
|
|
1202
|
+
objectId: arrayId,
|
|
1203
|
+
ownProperties: true
|
|
1204
|
+
});
|
|
1205
|
+
let tapped = 0;
|
|
1206
|
+
for (const prop of propsResult.result) {
|
|
1207
|
+
if (prop.name === "length" || prop.name === "__proto__") continue;
|
|
1208
|
+
const pcObjectId = prop.value?.objectId;
|
|
1209
|
+
if (!pcObjectId) continue;
|
|
1210
|
+
await this.cdp.send("Runtime.callFunctionOn", {
|
|
1211
|
+
objectId: pcObjectId,
|
|
1212
|
+
functionDeclaration: "function() { if (window.__bpAudioOutput && window.__bpAudioOutput.tapPC) { return window.__bpAudioOutput.tapPC(this); } return false; }",
|
|
1213
|
+
returnByValue: true
|
|
1214
|
+
});
|
|
1215
|
+
tapped++;
|
|
1216
|
+
}
|
|
1217
|
+
if (tapped > 0) {
|
|
1218
|
+
this.onDiagHandler?.(`retroactively discovered ${tapped} existing RTCPeerConnection(s)`);
|
|
1219
|
+
}
|
|
1220
|
+
await this.cdp.send("Runtime.releaseObject", { objectId: arrayId });
|
|
1221
|
+
await this.cdp.send("Runtime.releaseObject", { objectId: protoId });
|
|
1222
|
+
} catch {
|
|
1223
|
+
}
|
|
1224
|
+
}
|
|
1225
|
+
handleAudioData(payload) {
|
|
1226
|
+
try {
|
|
1227
|
+
const data = JSON.parse(payload);
|
|
1228
|
+
const leftBytes = base64ToBuffer(data.left);
|
|
1229
|
+
const rightBytes = base64ToBuffer(data.right);
|
|
1230
|
+
const chunk = {
|
|
1231
|
+
left: new Float32Array(leftBytes.buffer),
|
|
1232
|
+
right: new Float32Array(rightBytes.buffer),
|
|
1233
|
+
sampleRate: data.sampleRate,
|
|
1234
|
+
samples: data.samples,
|
|
1235
|
+
timestamp: Date.now()
|
|
1236
|
+
};
|
|
1237
|
+
this.chunks.push(chunk);
|
|
1238
|
+
if (this.onDiagHandler) {
|
|
1239
|
+
const rms = calculateRMS(chunk.left);
|
|
1240
|
+
const label = rms > 0.01 ? "audio" : "silence";
|
|
1241
|
+
this.onDiagHandler(`chunk: ${chunk.samples} samples, RMS=${rms.toFixed(4)} (${label})`);
|
|
1242
|
+
}
|
|
1243
|
+
if (this.firstChunkTime === null) {
|
|
1244
|
+
const rms = calculateRMS(chunk.left);
|
|
1245
|
+
if (rms > 1e-3) {
|
|
1246
|
+
this.firstChunkTime = Date.now();
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
this.onChunkHandler?.(chunk);
|
|
1250
|
+
} catch {
|
|
1251
|
+
}
|
|
1252
|
+
}
|
|
1253
|
+
mergeChunks() {
|
|
1254
|
+
if (this.chunks.length === 0) {
|
|
1255
|
+
return emptyCaptureResult();
|
|
1256
|
+
}
|
|
1257
|
+
const byRate = /* @__PURE__ */ new Map();
|
|
1258
|
+
for (const chunk of this.chunks) {
|
|
1259
|
+
const rate = chunk.sampleRate;
|
|
1260
|
+
if (!byRate.has(rate)) byRate.set(rate, []);
|
|
1261
|
+
byRate.get(rate).push(chunk);
|
|
1262
|
+
}
|
|
1263
|
+
let bestRate = this.chunks[0].sampleRate;
|
|
1264
|
+
let bestNonSilentSamples = 0;
|
|
1265
|
+
for (const [rate, chunks] of byRate) {
|
|
1266
|
+
let nonSilentSamples = 0;
|
|
1267
|
+
for (const chunk of chunks) {
|
|
1268
|
+
const rms = calculateRMS(chunk.left);
|
|
1269
|
+
if (rms > 0.01) {
|
|
1270
|
+
nonSilentSamples += chunk.left.length;
|
|
1271
|
+
}
|
|
1272
|
+
}
|
|
1273
|
+
if (nonSilentSamples > bestNonSilentSamples) {
|
|
1274
|
+
bestNonSilentSamples = nonSilentSamples;
|
|
1275
|
+
bestRate = rate;
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
const bestChunks = byRate.get(bestRate);
|
|
1279
|
+
let totalLen = 0;
|
|
1280
|
+
for (const chunk of bestChunks) {
|
|
1281
|
+
totalLen += chunk.left.length;
|
|
1282
|
+
}
|
|
1283
|
+
const left = new Float32Array(totalLen);
|
|
1284
|
+
const right = new Float32Array(totalLen);
|
|
1285
|
+
let offset = 0;
|
|
1286
|
+
for (const chunk of bestChunks) {
|
|
1287
|
+
left.set(chunk.left, offset);
|
|
1288
|
+
right.set(chunk.right, offset);
|
|
1289
|
+
offset += chunk.left.length;
|
|
1290
|
+
}
|
|
1291
|
+
if (byRate.size > 1) {
|
|
1292
|
+
this.onDiagHandler?.(
|
|
1293
|
+
`mergeChunks: ${byRate.size} sample rates detected, using ${bestRate}Hz (${bestNonSilentSamples} non-silent samples)`
|
|
1294
|
+
);
|
|
1295
|
+
}
|
|
1296
|
+
return {
|
|
1297
|
+
left,
|
|
1298
|
+
right,
|
|
1299
|
+
sampleRate: bestRate,
|
|
1300
|
+
durationMs: totalLen / bestRate * 1e3,
|
|
1301
|
+
chunkCount: bestChunks.length
|
|
1302
|
+
};
|
|
1303
|
+
}
|
|
1304
|
+
};
|
|
1305
|
+
function emptyCaptureResult() {
|
|
1306
|
+
return {
|
|
1307
|
+
left: new Float32Array(0),
|
|
1308
|
+
right: new Float32Array(0),
|
|
1309
|
+
sampleRate: 48e3,
|
|
1310
|
+
durationMs: 0,
|
|
1311
|
+
chunkCount: 0
|
|
1312
|
+
};
|
|
1313
|
+
}
|
|
1314
|
+
function sleep(ms) {
|
|
1315
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
1316
|
+
}
|
|
10
1317
|
|
|
11
1318
|
// src/network/interceptor.ts
|
|
12
1319
|
var RequestInterceptor = class {
|
|
@@ -258,7 +1565,7 @@ async function isElementAttached(cdp, selector, contextId) {
|
|
|
258
1565
|
const result = await cdp.send("Runtime.evaluate", params);
|
|
259
1566
|
return result.result.value === true;
|
|
260
1567
|
}
|
|
261
|
-
function
|
|
1568
|
+
function sleep2(ms) {
|
|
262
1569
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
263
1570
|
}
|
|
264
1571
|
async function waitForElement(cdp, selector, options = {}) {
|
|
@@ -284,7 +1591,7 @@ async function waitForElement(cdp, selector, options = {}) {
|
|
|
284
1591
|
if (conditionMet) {
|
|
285
1592
|
return { success: true, waitedMs: Date.now() - startTime };
|
|
286
1593
|
}
|
|
287
|
-
await
|
|
1594
|
+
await sleep2(pollInterval);
|
|
288
1595
|
}
|
|
289
1596
|
return { success: false, waitedMs: Date.now() - startTime };
|
|
290
1597
|
}
|
|
@@ -313,7 +1620,7 @@ async function waitForAnyElement(cdp, selectors, options = {}) {
|
|
|
313
1620
|
return { success: true, selector, waitedMs: Date.now() - startTime };
|
|
314
1621
|
}
|
|
315
1622
|
}
|
|
316
|
-
await
|
|
1623
|
+
await sleep2(pollInterval);
|
|
317
1624
|
}
|
|
318
1625
|
return { success: false, waitedMs: Date.now() - startTime };
|
|
319
1626
|
}
|
|
@@ -362,7 +1669,7 @@ async function waitForNavigation(cdp, options = {}) {
|
|
|
362
1669
|
}
|
|
363
1670
|
const pollUrl = async () => {
|
|
364
1671
|
while (!resolved && Date.now() < startTime + timeout) {
|
|
365
|
-
await
|
|
1672
|
+
await sleep2(100);
|
|
366
1673
|
if (resolved) return;
|
|
367
1674
|
try {
|
|
368
1675
|
const currentUrl = await getCurrentUrl(cdp);
|
|
@@ -422,33 +1729,256 @@ async function waitForNetworkIdle(cdp, options = {}) {
|
|
|
422
1729
|
});
|
|
423
1730
|
}
|
|
424
1731
|
|
|
425
|
-
// src/browser/
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
1732
|
+
// src/browser/fuzzy-match.ts
|
|
1733
|
+
function jaroWinkler(a, b) {
|
|
1734
|
+
if (a.length === 0 && b.length === 0) return 0;
|
|
1735
|
+
if (a.length === 0 || b.length === 0) return 0;
|
|
1736
|
+
if (a === b) return 1;
|
|
1737
|
+
const s1 = a.toLowerCase();
|
|
1738
|
+
const s2 = b.toLowerCase();
|
|
1739
|
+
const matchWindow = Math.max(0, Math.floor(Math.max(s1.length, s2.length) / 2) - 1);
|
|
1740
|
+
const s1Matches = new Array(s1.length).fill(false);
|
|
1741
|
+
const s2Matches = new Array(s2.length).fill(false);
|
|
1742
|
+
let matches = 0;
|
|
1743
|
+
let transpositions = 0;
|
|
1744
|
+
for (let i = 0; i < s1.length; i++) {
|
|
1745
|
+
const start = Math.max(0, i - matchWindow);
|
|
1746
|
+
const end = Math.min(i + matchWindow + 1, s2.length);
|
|
1747
|
+
for (let j = start; j < end; j++) {
|
|
1748
|
+
if (s2Matches[j] || s1[i] !== s2[j]) continue;
|
|
1749
|
+
s1Matches[i] = true;
|
|
1750
|
+
s2Matches[j] = true;
|
|
1751
|
+
matches++;
|
|
1752
|
+
break;
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1755
|
+
if (matches === 0) return 0;
|
|
1756
|
+
let k = 0;
|
|
1757
|
+
for (let i = 0; i < s1.length; i++) {
|
|
1758
|
+
if (!s1Matches[i]) continue;
|
|
1759
|
+
while (!s2Matches[k]) k++;
|
|
1760
|
+
if (s1[i] !== s2[k]) transpositions++;
|
|
1761
|
+
k++;
|
|
1762
|
+
}
|
|
1763
|
+
const jaro = (matches / s1.length + matches / s2.length + (matches - transpositions / 2) / matches) / 3;
|
|
1764
|
+
let prefix = 0;
|
|
1765
|
+
for (let i = 0; i < Math.min(4, Math.min(s1.length, s2.length)); i++) {
|
|
1766
|
+
if (s1[i] === s2[i]) {
|
|
1767
|
+
prefix++;
|
|
1768
|
+
} else {
|
|
1769
|
+
break;
|
|
1770
|
+
}
|
|
433
1771
|
}
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
1772
|
+
const WINKLER_SCALING = 0.1;
|
|
1773
|
+
return jaro + prefix * WINKLER_SCALING * (1 - jaro);
|
|
1774
|
+
}
|
|
1775
|
+
function stringSimilarity(a, b) {
|
|
1776
|
+
if (a.length === 0 || b.length === 0) return 0;
|
|
1777
|
+
const lowerA = a.toLowerCase();
|
|
1778
|
+
const lowerB = b.toLowerCase();
|
|
1779
|
+
if (lowerA === lowerB) return 1;
|
|
1780
|
+
const jw = jaroWinkler(a, b);
|
|
1781
|
+
let containsBonus = 0;
|
|
1782
|
+
if (lowerB.includes(lowerA)) {
|
|
1783
|
+
containsBonus = 0.2;
|
|
1784
|
+
} else if (lowerA.includes(lowerB)) {
|
|
1785
|
+
containsBonus = 0.1;
|
|
1786
|
+
}
|
|
1787
|
+
return Math.min(1, jw + containsBonus);
|
|
1788
|
+
}
|
|
1789
|
+
function scoreElement(query, element) {
|
|
1790
|
+
const lowerQuery = query.toLowerCase();
|
|
1791
|
+
const words = lowerQuery.split(/\s+/).filter((w) => w.length > 0);
|
|
1792
|
+
let nameScore = 0;
|
|
1793
|
+
if (element.name) {
|
|
1794
|
+
const lowerName = element.name.toLowerCase();
|
|
1795
|
+
if (lowerName === lowerQuery) {
|
|
1796
|
+
nameScore = 1;
|
|
1797
|
+
} else if (lowerName.includes(lowerQuery)) {
|
|
1798
|
+
nameScore = 0.8;
|
|
1799
|
+
} else if (words.length > 0) {
|
|
1800
|
+
const matchedWords = words.filter((w) => lowerName.includes(w));
|
|
1801
|
+
nameScore = matchedWords.length / words.length * 0.7;
|
|
1802
|
+
} else {
|
|
1803
|
+
nameScore = stringSimilarity(query, element.name) * 0.6;
|
|
1804
|
+
}
|
|
439
1805
|
}
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
1806
|
+
let roleScore = 0;
|
|
1807
|
+
const lowerRole = element.role.toLowerCase();
|
|
1808
|
+
if (lowerRole === lowerQuery || lowerQuery.includes(lowerRole)) {
|
|
1809
|
+
roleScore = 0.3;
|
|
1810
|
+
} else if (words.some((w) => lowerRole.includes(w))) {
|
|
1811
|
+
roleScore = 0.2;
|
|
445
1812
|
}
|
|
1813
|
+
let selectorScore = 0;
|
|
1814
|
+
const lowerSelector = element.selector.toLowerCase();
|
|
1815
|
+
if (words.some((w) => lowerSelector.includes(w))) {
|
|
1816
|
+
selectorScore = 0.2;
|
|
1817
|
+
}
|
|
1818
|
+
const totalScore = nameScore * 0.6 + roleScore * 0.25 + selectorScore * 0.15;
|
|
1819
|
+
return totalScore;
|
|
1820
|
+
}
|
|
1821
|
+
function explainMatch(query, element, score) {
|
|
1822
|
+
const reasons = [];
|
|
1823
|
+
const lowerQuery = query.toLowerCase();
|
|
1824
|
+
const words = lowerQuery.split(/\s+/).filter((w) => w.length > 0);
|
|
1825
|
+
if (element.name) {
|
|
1826
|
+
const lowerName = element.name.toLowerCase();
|
|
1827
|
+
if (lowerName === lowerQuery) {
|
|
1828
|
+
reasons.push("exact name match");
|
|
1829
|
+
} else if (lowerName.includes(lowerQuery)) {
|
|
1830
|
+
reasons.push("name contains query");
|
|
1831
|
+
} else if (words.some((w) => lowerName.includes(w))) {
|
|
1832
|
+
const matchedWords = words.filter((w) => lowerName.includes(w));
|
|
1833
|
+
reasons.push(`name contains: ${matchedWords.join(", ")}`);
|
|
1834
|
+
} else if (stringSimilarity(query, element.name) > 0.5) {
|
|
1835
|
+
reasons.push("similar name");
|
|
1836
|
+
}
|
|
1837
|
+
}
|
|
1838
|
+
const lowerRole = element.role.toLowerCase();
|
|
1839
|
+
if (lowerRole === lowerQuery || words.some((w) => w === lowerRole)) {
|
|
1840
|
+
reasons.push(`role: ${element.role}`);
|
|
1841
|
+
}
|
|
1842
|
+
if (words.some((w) => element.selector.toLowerCase().includes(w))) {
|
|
1843
|
+
reasons.push("selector match");
|
|
1844
|
+
}
|
|
1845
|
+
if (reasons.length === 0) {
|
|
1846
|
+
reasons.push(`fuzzy match (score: ${score.toFixed(2)})`);
|
|
1847
|
+
}
|
|
1848
|
+
return reasons.join(", ");
|
|
1849
|
+
}
|
|
1850
|
+
function fuzzyMatchElements(query, elements, maxResults = 5) {
|
|
1851
|
+
if (!query || query.length === 0) {
|
|
1852
|
+
return [];
|
|
1853
|
+
}
|
|
1854
|
+
const THRESHOLD = 0.3;
|
|
1855
|
+
const scored = elements.map((element) => ({
|
|
1856
|
+
element,
|
|
1857
|
+
score: scoreElement(query, element)
|
|
1858
|
+
}));
|
|
1859
|
+
return scored.filter((s) => s.score >= THRESHOLD).sort((a, b) => b.score - a.score).slice(0, maxResults).map((s) => ({
|
|
1860
|
+
element: s.element,
|
|
1861
|
+
score: s.score,
|
|
1862
|
+
matchReason: explainMatch(query, s.element, s.score)
|
|
1863
|
+
}));
|
|
1864
|
+
}
|
|
1865
|
+
|
|
1866
|
+
// src/browser/hint-generator.ts
|
|
1867
|
+
var ACTION_ROLE_MAP = {
|
|
1868
|
+
click: ["button", "link", "menuitem", "menuitemcheckbox", "menuitemradio", "tab", "option"],
|
|
1869
|
+
fill: ["textbox", "searchbox", "textarea"],
|
|
1870
|
+
type: ["textbox", "searchbox", "textarea"],
|
|
1871
|
+
submit: ["button", "form"],
|
|
1872
|
+
select: ["combobox", "listbox", "option"],
|
|
1873
|
+
check: ["checkbox", "radio", "switch"],
|
|
1874
|
+
uncheck: ["checkbox", "switch"],
|
|
1875
|
+
focus: [],
|
|
1876
|
+
// Any focusable element
|
|
1877
|
+
hover: [],
|
|
1878
|
+
// Any element
|
|
1879
|
+
clear: ["textbox", "searchbox", "textarea"]
|
|
446
1880
|
};
|
|
1881
|
+
function extractIntent(selectors) {
|
|
1882
|
+
const patterns = [];
|
|
1883
|
+
let text = "";
|
|
1884
|
+
for (const selector of selectors) {
|
|
1885
|
+
if (selector.startsWith("ref:")) {
|
|
1886
|
+
continue;
|
|
1887
|
+
}
|
|
1888
|
+
const idMatch = selector.match(/#([a-zA-Z0-9_-]+)/);
|
|
1889
|
+
if (idMatch) {
|
|
1890
|
+
patterns.push(idMatch[1]);
|
|
1891
|
+
}
|
|
1892
|
+
const ariaMatch = selector.match(/\[aria-label=["']([^"']+)["']\]/);
|
|
1893
|
+
if (ariaMatch) {
|
|
1894
|
+
patterns.push(ariaMatch[1]);
|
|
1895
|
+
}
|
|
1896
|
+
const testidMatch = selector.match(/\[data-testid=["']([^"']+)["']\]/);
|
|
1897
|
+
if (testidMatch) {
|
|
1898
|
+
patterns.push(testidMatch[1]);
|
|
1899
|
+
}
|
|
1900
|
+
const classMatch = selector.match(/\.([a-zA-Z0-9_-]+)/);
|
|
1901
|
+
if (classMatch) {
|
|
1902
|
+
patterns.push(classMatch[1]);
|
|
1903
|
+
}
|
|
1904
|
+
}
|
|
1905
|
+
patterns.sort((a, b) => b.length - a.length);
|
|
1906
|
+
text = patterns[0] ?? selectors[0] ?? "";
|
|
1907
|
+
return { text, patterns };
|
|
1908
|
+
}
|
|
1909
|
+
function getHintType(selector) {
|
|
1910
|
+
if (selector.startsWith("ref:")) return "ref";
|
|
1911
|
+
if (selector.includes("data-testid")) return "testid";
|
|
1912
|
+
if (selector.includes("aria-label")) return "aria";
|
|
1913
|
+
if (selector.startsWith("#")) return "id";
|
|
1914
|
+
return "css";
|
|
1915
|
+
}
|
|
1916
|
+
function getConfidence(score) {
|
|
1917
|
+
if (score >= 0.8) return "high";
|
|
1918
|
+
if (score >= 0.5) return "medium";
|
|
1919
|
+
return "low";
|
|
1920
|
+
}
|
|
1921
|
+
function diversifyHints(candidates, maxHints) {
|
|
1922
|
+
const hints = [];
|
|
1923
|
+
const usedTypes = /* @__PURE__ */ new Set();
|
|
1924
|
+
for (const candidate of candidates) {
|
|
1925
|
+
if (hints.length >= maxHints) break;
|
|
1926
|
+
const refSelector = `ref:${candidate.element.ref}`;
|
|
1927
|
+
const hintType = getHintType(refSelector);
|
|
1928
|
+
if (!usedTypes.has(hintType)) {
|
|
1929
|
+
hints.push({
|
|
1930
|
+
selector: refSelector,
|
|
1931
|
+
reason: candidate.matchReason,
|
|
1932
|
+
confidence: getConfidence(candidate.score),
|
|
1933
|
+
element: {
|
|
1934
|
+
ref: candidate.element.ref,
|
|
1935
|
+
role: candidate.element.role,
|
|
1936
|
+
name: candidate.element.name,
|
|
1937
|
+
disabled: candidate.element.disabled
|
|
1938
|
+
}
|
|
1939
|
+
});
|
|
1940
|
+
usedTypes.add(hintType);
|
|
1941
|
+
} else if (hints.length < maxHints) {
|
|
1942
|
+
hints.push({
|
|
1943
|
+
selector: refSelector,
|
|
1944
|
+
reason: candidate.matchReason,
|
|
1945
|
+
confidence: getConfidence(candidate.score),
|
|
1946
|
+
element: {
|
|
1947
|
+
ref: candidate.element.ref,
|
|
1948
|
+
role: candidate.element.role,
|
|
1949
|
+
name: candidate.element.name,
|
|
1950
|
+
disabled: candidate.element.disabled
|
|
1951
|
+
}
|
|
1952
|
+
});
|
|
1953
|
+
}
|
|
1954
|
+
}
|
|
1955
|
+
return hints;
|
|
1956
|
+
}
|
|
1957
|
+
async function generateHints(page, failedSelectors, actionType, maxHints = 3) {
|
|
1958
|
+
let snapshot;
|
|
1959
|
+
try {
|
|
1960
|
+
snapshot = await page.snapshot();
|
|
1961
|
+
} catch {
|
|
1962
|
+
return [];
|
|
1963
|
+
}
|
|
1964
|
+
const intent = extractIntent(failedSelectors);
|
|
1965
|
+
const roleFilter = ACTION_ROLE_MAP[actionType] ?? [];
|
|
1966
|
+
let candidates = snapshot.interactiveElements;
|
|
1967
|
+
if (roleFilter.length > 0) {
|
|
1968
|
+
candidates = candidates.filter((el) => roleFilter.includes(el.role));
|
|
1969
|
+
}
|
|
1970
|
+
const matches = fuzzyMatchElements(intent.text, candidates, maxHints * 2);
|
|
1971
|
+
if (matches.length === 0) {
|
|
1972
|
+
return [];
|
|
1973
|
+
}
|
|
1974
|
+
return diversifyHints(matches, maxHints);
|
|
1975
|
+
}
|
|
447
1976
|
|
|
448
1977
|
// src/browser/page.ts
|
|
449
1978
|
var DEFAULT_TIMEOUT = 3e4;
|
|
450
1979
|
var Page = class {
|
|
451
1980
|
cdp;
|
|
1981
|
+
_targetId;
|
|
452
1982
|
rootNodeId = null;
|
|
453
1983
|
batchExecutor;
|
|
454
1984
|
emulationState = {};
|
|
@@ -467,10 +1997,23 @@ var Page = class {
|
|
|
467
1997
|
frameExecutionContexts = /* @__PURE__ */ new Map();
|
|
468
1998
|
/** Current frame's execution context ID (null = main frame default) */
|
|
469
1999
|
currentFrameContextId = null;
|
|
470
|
-
|
|
2000
|
+
/** Last matched selector from findElement (for selectorUsed tracking) */
|
|
2001
|
+
_lastMatchedSelector;
|
|
2002
|
+
/** Audio input controller (lazy-initialized) */
|
|
2003
|
+
_audioInput;
|
|
2004
|
+
/** Audio output controller (lazy-initialized) */
|
|
2005
|
+
_audioOutput;
|
|
2006
|
+
constructor(cdp, targetId) {
|
|
471
2007
|
this.cdp = cdp;
|
|
2008
|
+
this._targetId = targetId;
|
|
472
2009
|
this.batchExecutor = new BatchExecutor(this);
|
|
473
2010
|
}
|
|
2011
|
+
/**
|
|
2012
|
+
* Get the CDP target ID for this page
|
|
2013
|
+
*/
|
|
2014
|
+
get targetId() {
|
|
2015
|
+
return this._targetId;
|
|
2016
|
+
}
|
|
474
2017
|
/**
|
|
475
2018
|
* Get the underlying CDP client for advanced operations.
|
|
476
2019
|
* Use with caution - prefer high-level Page methods when possible.
|
|
@@ -478,6 +2021,13 @@ var Page = class {
|
|
|
478
2021
|
get cdpClient() {
|
|
479
2022
|
return this.cdp;
|
|
480
2023
|
}
|
|
2024
|
+
/**
|
|
2025
|
+
* Get the last matched selector from findElement (for selectorUsed tracking).
|
|
2026
|
+
* Returns undefined if no selector has been matched yet.
|
|
2027
|
+
*/
|
|
2028
|
+
getLastMatchedSelector() {
|
|
2029
|
+
return this._lastMatchedSelector;
|
|
2030
|
+
}
|
|
481
2031
|
/**
|
|
482
2032
|
* Initialize the page (enable required CDP domains)
|
|
483
2033
|
*/
|
|
@@ -597,7 +2147,9 @@ var Page = class {
|
|
|
597
2147
|
const element = await this.findElement(selector, options);
|
|
598
2148
|
if (!element) {
|
|
599
2149
|
if (options.optional) return false;
|
|
600
|
-
|
|
2150
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
2151
|
+
const hints = await generateHints(this, selectorList, "click");
|
|
2152
|
+
throw new ElementNotFoundError(selector, hints);
|
|
601
2153
|
}
|
|
602
2154
|
await this.scrollIntoView(element.nodeId);
|
|
603
2155
|
const submitResult = await this.evaluateInFrame(
|
|
@@ -633,7 +2185,9 @@ var Page = class {
|
|
|
633
2185
|
const element = await this.findElement(selector, options);
|
|
634
2186
|
if (!element) {
|
|
635
2187
|
if (options.optional) return false;
|
|
636
|
-
|
|
2188
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
2189
|
+
const hints = await generateHints(this, selectorList, "fill");
|
|
2190
|
+
throw new ElementNotFoundError(selector, hints);
|
|
637
2191
|
}
|
|
638
2192
|
await this.cdp.send("DOM.focus", { nodeId: element.nodeId });
|
|
639
2193
|
if (clear) {
|
|
@@ -696,7 +2250,7 @@ var Page = class {
|
|
|
696
2250
|
key: char
|
|
697
2251
|
});
|
|
698
2252
|
if (delay > 0) {
|
|
699
|
-
await
|
|
2253
|
+
await sleep3(delay);
|
|
700
2254
|
}
|
|
701
2255
|
}
|
|
702
2256
|
return true;
|
|
@@ -711,7 +2265,9 @@ var Page = class {
|
|
|
711
2265
|
const element = await this.findElement(selector, options);
|
|
712
2266
|
if (!element) {
|
|
713
2267
|
if (options.optional) return false;
|
|
714
|
-
|
|
2268
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
2269
|
+
const hints = await generateHints(this, selectorList, "select");
|
|
2270
|
+
throw new ElementNotFoundError(selector, hints);
|
|
715
2271
|
}
|
|
716
2272
|
const values = Array.isArray(value) ? value : [value];
|
|
717
2273
|
await this.cdp.send("Runtime.evaluate", {
|
|
@@ -735,7 +2291,7 @@ var Page = class {
|
|
|
735
2291
|
async selectCustom(config, options = {}) {
|
|
736
2292
|
const { trigger, option, value, match = "text" } = config;
|
|
737
2293
|
await this.click(trigger, options);
|
|
738
|
-
await
|
|
2294
|
+
await sleep3(100);
|
|
739
2295
|
let optionSelector;
|
|
740
2296
|
const optionSelectors = Array.isArray(option) ? option : [option];
|
|
741
2297
|
if (match === "contains") {
|
|
@@ -772,7 +2328,9 @@ var Page = class {
|
|
|
772
2328
|
const element = await this.findElement(selector, options);
|
|
773
2329
|
if (!element) {
|
|
774
2330
|
if (options.optional) return false;
|
|
775
|
-
|
|
2331
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
2332
|
+
const hints = await generateHints(this, selectorList, "check");
|
|
2333
|
+
throw new ElementNotFoundError(selector, hints);
|
|
776
2334
|
}
|
|
777
2335
|
const result = await this.cdp.send("Runtime.evaluate", {
|
|
778
2336
|
expression: `(() => {
|
|
@@ -792,7 +2350,9 @@ var Page = class {
|
|
|
792
2350
|
const element = await this.findElement(selector, options);
|
|
793
2351
|
if (!element) {
|
|
794
2352
|
if (options.optional) return false;
|
|
795
|
-
|
|
2353
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
2354
|
+
const hints = await generateHints(this, selectorList, "uncheck");
|
|
2355
|
+
throw new ElementNotFoundError(selector, hints);
|
|
796
2356
|
}
|
|
797
2357
|
const result = await this.cdp.send("Runtime.evaluate", {
|
|
798
2358
|
expression: `(() => {
|
|
@@ -812,13 +2372,40 @@ var Page = class {
|
|
|
812
2372
|
* - 'auto' (default): Attempt to detect navigation for 1 second, then assume client-side handling
|
|
813
2373
|
* - true: Wait for full navigation (traditional forms)
|
|
814
2374
|
* - false: Return immediately (AJAX forms where you'll wait for something else)
|
|
2375
|
+
*
|
|
2376
|
+
* When targeting a <form> element directly, uses form.requestSubmit() which fires
|
|
2377
|
+
* the submit event and triggers HTML5 validation.
|
|
815
2378
|
*/
|
|
816
2379
|
async submit(selector, options = {}) {
|
|
817
2380
|
const { method = "enter+click", waitForNavigation: shouldWait = "auto" } = options;
|
|
818
2381
|
const element = await this.findElement(selector, options);
|
|
819
2382
|
if (!element) {
|
|
820
2383
|
if (options.optional) return false;
|
|
821
|
-
|
|
2384
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
2385
|
+
const hints = await generateHints(this, selectorList, "submit");
|
|
2386
|
+
throw new ElementNotFoundError(selector, hints);
|
|
2387
|
+
}
|
|
2388
|
+
const isFormElement = await this.evaluateInFrame(
|
|
2389
|
+
`(() => {
|
|
2390
|
+
const el = document.querySelector(${JSON.stringify(element.selector)});
|
|
2391
|
+
return el instanceof HTMLFormElement;
|
|
2392
|
+
})()`
|
|
2393
|
+
);
|
|
2394
|
+
if (isFormElement.result.value) {
|
|
2395
|
+
await this.evaluateInFrame(
|
|
2396
|
+
`(() => {
|
|
2397
|
+
const form = document.querySelector(${JSON.stringify(element.selector)});
|
|
2398
|
+
if (form && form instanceof HTMLFormElement) {
|
|
2399
|
+
form.requestSubmit();
|
|
2400
|
+
}
|
|
2401
|
+
})()`
|
|
2402
|
+
);
|
|
2403
|
+
if (shouldWait === true) {
|
|
2404
|
+
await this.waitForNavigation({ timeout: options.timeout ?? DEFAULT_TIMEOUT });
|
|
2405
|
+
} else if (shouldWait === "auto") {
|
|
2406
|
+
await Promise.race([this.waitForNavigation({ timeout: 1e3, optional: true }), sleep3(500)]);
|
|
2407
|
+
}
|
|
2408
|
+
return true;
|
|
822
2409
|
}
|
|
823
2410
|
await this.cdp.send("DOM.focus", { nodeId: element.nodeId });
|
|
824
2411
|
if (method.includes("enter")) {
|
|
@@ -834,7 +2421,7 @@ var Page = class {
|
|
|
834
2421
|
this.waitForNavigation({ timeout: 1e3, optional: true }).then(
|
|
835
2422
|
(success) => success ? "nav" : null
|
|
836
2423
|
),
|
|
837
|
-
|
|
2424
|
+
sleep3(500).then(() => "timeout")
|
|
838
2425
|
]);
|
|
839
2426
|
if (navigationDetected === "nav") {
|
|
840
2427
|
return true;
|
|
@@ -848,7 +2435,7 @@ var Page = class {
|
|
|
848
2435
|
if (shouldWait === true) {
|
|
849
2436
|
await this.waitForNavigation({ timeout: options.timeout ?? DEFAULT_TIMEOUT });
|
|
850
2437
|
} else if (shouldWait === "auto") {
|
|
851
|
-
await
|
|
2438
|
+
await sleep3(100);
|
|
852
2439
|
}
|
|
853
2440
|
}
|
|
854
2441
|
return true;
|
|
@@ -889,7 +2476,9 @@ var Page = class {
|
|
|
889
2476
|
const element = await this.findElement(selector, options);
|
|
890
2477
|
if (!element) {
|
|
891
2478
|
if (options.optional) return false;
|
|
892
|
-
|
|
2479
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
2480
|
+
const hints = await generateHints(this, selectorList, "focus");
|
|
2481
|
+
throw new ElementNotFoundError(selector, hints);
|
|
893
2482
|
}
|
|
894
2483
|
await this.cdp.send("DOM.focus", { nodeId: element.nodeId });
|
|
895
2484
|
return true;
|
|
@@ -902,7 +2491,9 @@ var Page = class {
|
|
|
902
2491
|
const element = await this.findElement(selector, options);
|
|
903
2492
|
if (!element) {
|
|
904
2493
|
if (options.optional) return false;
|
|
905
|
-
|
|
2494
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
2495
|
+
const hints = await generateHints(this, selectorList, "hover");
|
|
2496
|
+
throw new ElementNotFoundError(selector, hints);
|
|
906
2497
|
}
|
|
907
2498
|
await this.scrollIntoView(element.nodeId);
|
|
908
2499
|
const box = await this.getBoxModel(element.nodeId);
|
|
@@ -1841,7 +3432,7 @@ var Page = class {
|
|
|
1841
3432
|
lastError = e;
|
|
1842
3433
|
if (attempt < retries) {
|
|
1843
3434
|
this.rootNodeId = null;
|
|
1844
|
-
await
|
|
3435
|
+
await sleep3(delay);
|
|
1845
3436
|
continue;
|
|
1846
3437
|
}
|
|
1847
3438
|
}
|
|
@@ -1857,6 +3448,7 @@ var Page = class {
|
|
|
1857
3448
|
async findElement(selectors, options = {}) {
|
|
1858
3449
|
const { timeout = DEFAULT_TIMEOUT } = options;
|
|
1859
3450
|
const selectorList = Array.isArray(selectors) ? selectors : [selectors];
|
|
3451
|
+
this._lastMatchedSelector = void 0;
|
|
1860
3452
|
for (const selector of selectorList) {
|
|
1861
3453
|
if (selector.startsWith("ref:")) {
|
|
1862
3454
|
const ref = selector.slice(4);
|
|
@@ -1873,6 +3465,7 @@ var Page = class {
|
|
|
1873
3465
|
}
|
|
1874
3466
|
);
|
|
1875
3467
|
if (pushResult.nodeIds?.[0]) {
|
|
3468
|
+
this._lastMatchedSelector = selector;
|
|
1876
3469
|
return {
|
|
1877
3470
|
nodeId: pushResult.nodeIds[0],
|
|
1878
3471
|
backendNodeId,
|
|
@@ -1906,6 +3499,7 @@ var Page = class {
|
|
|
1906
3499
|
"DOM.describeNode",
|
|
1907
3500
|
{ nodeId: queryResult.nodeId }
|
|
1908
3501
|
);
|
|
3502
|
+
this._lastMatchedSelector = result.selector;
|
|
1909
3503
|
return {
|
|
1910
3504
|
nodeId: queryResult.nodeId,
|
|
1911
3505
|
backendNodeId: describeResult2.node.backendNodeId,
|
|
@@ -1933,6 +3527,7 @@ var Page = class {
|
|
|
1933
3527
|
"DOM.describeNode",
|
|
1934
3528
|
{ nodeId: nodeResult.nodeId }
|
|
1935
3529
|
);
|
|
3530
|
+
this._lastMatchedSelector = result.selector;
|
|
1936
3531
|
return {
|
|
1937
3532
|
nodeId: nodeResult.nodeId,
|
|
1938
3533
|
backendNodeId: describeResult.node.backendNodeId,
|
|
@@ -2009,8 +3604,107 @@ var Page = class {
|
|
|
2009
3604
|
clickCount: 1
|
|
2010
3605
|
});
|
|
2011
3606
|
}
|
|
3607
|
+
// ============ Audio I/O ============
|
|
3608
|
+
/**
|
|
3609
|
+
* Audio input controller (fake microphone).
|
|
3610
|
+
* Lazy-initialized on first access.
|
|
3611
|
+
*/
|
|
3612
|
+
get audioInput() {
|
|
3613
|
+
if (!this._audioInput) {
|
|
3614
|
+
this._audioInput = new AudioInput(this.cdp);
|
|
3615
|
+
}
|
|
3616
|
+
return this._audioInput;
|
|
3617
|
+
}
|
|
3618
|
+
/**
|
|
3619
|
+
* Audio output capture controller.
|
|
3620
|
+
* Lazy-initialized on first access.
|
|
3621
|
+
*/
|
|
3622
|
+
get audioOutput() {
|
|
3623
|
+
if (!this._audioOutput) {
|
|
3624
|
+
this._audioOutput = new AudioOutput(this.cdp);
|
|
3625
|
+
}
|
|
3626
|
+
return this._audioOutput;
|
|
3627
|
+
}
|
|
3628
|
+
/**
|
|
3629
|
+
* Set up both audio input (fake microphone) and output (capture).
|
|
3630
|
+
* Must be called before navigating to the page that will use audio.
|
|
3631
|
+
*/
|
|
3632
|
+
async setupAudio() {
|
|
3633
|
+
try {
|
|
3634
|
+
await this.cdp.send("Input.dispatchMouseEvent", {
|
|
3635
|
+
type: "mousePressed",
|
|
3636
|
+
x: 0,
|
|
3637
|
+
y: 0,
|
|
3638
|
+
button: "left",
|
|
3639
|
+
clickCount: 1
|
|
3640
|
+
});
|
|
3641
|
+
await this.cdp.send("Input.dispatchMouseEvent", {
|
|
3642
|
+
type: "mouseReleased",
|
|
3643
|
+
x: 0,
|
|
3644
|
+
y: 0,
|
|
3645
|
+
button: "left",
|
|
3646
|
+
clickCount: 1
|
|
3647
|
+
});
|
|
3648
|
+
} catch {
|
|
3649
|
+
}
|
|
3650
|
+
await this.audioInput.setup();
|
|
3651
|
+
await this.audioOutput.setup();
|
|
3652
|
+
}
|
|
3653
|
+
/**
|
|
3654
|
+
* Full audio round-trip: feed input audio, capture the response.
|
|
3655
|
+
*
|
|
3656
|
+
* 1. Starts capturing output
|
|
3657
|
+
* 2. Feeds input audio as microphone data
|
|
3658
|
+
* 3. Waits for the page to respond and then go silent
|
|
3659
|
+
* 4. Returns the captured response audio with latency metrics
|
|
3660
|
+
*
|
|
3661
|
+
* @example
|
|
3662
|
+
* ```typescript
|
|
3663
|
+
* await page.setupAudio();
|
|
3664
|
+
* await page.goto('https://voice-agent.example.com');
|
|
3665
|
+
* const result = await page.audioRoundTrip({
|
|
3666
|
+
* input: wavFileBytes,
|
|
3667
|
+
* silenceTimeout: 3000,
|
|
3668
|
+
* });
|
|
3669
|
+
* console.log(`Response: ${result.audio.durationMs}ms, latency: ${result.latencyMs}ms`);
|
|
3670
|
+
* ```
|
|
3671
|
+
*/
|
|
3672
|
+
async audioRoundTrip(options) {
|
|
3673
|
+
if (!this.audioInput.isSetup || !this.audioOutput.isSetup) {
|
|
3674
|
+
await this.setupAudio();
|
|
3675
|
+
}
|
|
3676
|
+
const start = Date.now();
|
|
3677
|
+
await this.audioOutput.start();
|
|
3678
|
+
if (options.preDelay && options.preDelay > 0) {
|
|
3679
|
+
await sleep3(options.preDelay);
|
|
3680
|
+
}
|
|
3681
|
+
const inputDone = this.audioInput.play(options.input, {
|
|
3682
|
+
waitForEnd: !!options.sendSelector
|
|
3683
|
+
});
|
|
3684
|
+
if (options.sendSelector) {
|
|
3685
|
+
await inputDone.catch(() => {
|
|
3686
|
+
});
|
|
3687
|
+
await this.click(options.sendSelector);
|
|
3688
|
+
}
|
|
3689
|
+
const audio = await this.audioOutput.captureUntilSilence({
|
|
3690
|
+
silenceTimeout: options.silenceTimeout ?? 1500,
|
|
3691
|
+
silenceThreshold: options.silenceThreshold ?? 0.01,
|
|
3692
|
+
maxDuration: options.timeout ?? 12e4
|
|
3693
|
+
});
|
|
3694
|
+
await this.audioInput.stop();
|
|
3695
|
+
if (!options.sendSelector) {
|
|
3696
|
+
await inputDone.catch(() => {
|
|
3697
|
+
});
|
|
3698
|
+
}
|
|
3699
|
+
const firstChunkTime = this.audioOutput.firstChunkTime;
|
|
3700
|
+
return {
|
|
3701
|
+
audio,
|
|
3702
|
+
latencyMs: firstChunkTime !== null ? firstChunkTime - start : -1,
|
|
3703
|
+
totalMs: Date.now() - start
|
|
3704
|
+
};
|
|
3705
|
+
}
|
|
2012
3706
|
};
|
|
2013
|
-
function
|
|
3707
|
+
function sleep3(ms) {
|
|
2014
3708
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
2015
3709
|
}
|
|
2016
3710
|
|
|
@@ -2039,14 +3733,24 @@ var Browser = class _Browser {
|
|
|
2039
3733
|
* Get or create a page by name
|
|
2040
3734
|
* If no name is provided, returns the first available page or creates a new one
|
|
2041
3735
|
*/
|
|
2042
|
-
async page(name) {
|
|
3736
|
+
async page(name, options) {
|
|
2043
3737
|
const pageName = name ?? "default";
|
|
2044
3738
|
const cached = this.pages.get(pageName);
|
|
2045
3739
|
if (cached) return cached;
|
|
2046
3740
|
const targets = await this.cdp.send("Target.getTargets");
|
|
2047
3741
|
const pageTargets = targets.targetInfos.filter((t) => t.type === "page");
|
|
2048
3742
|
let targetId;
|
|
2049
|
-
if (
|
|
3743
|
+
if (options?.targetId) {
|
|
3744
|
+
const targetExists = pageTargets.some((t) => t.targetId === options.targetId);
|
|
3745
|
+
if (targetExists) {
|
|
3746
|
+
targetId = options.targetId;
|
|
3747
|
+
} else {
|
|
3748
|
+
console.warn(`[browser-pilot] Target ${options.targetId} no longer exists, falling back`);
|
|
3749
|
+
targetId = pageTargets.length > 0 ? pageTargets[0].targetId : (await this.cdp.send("Target.createTarget", {
|
|
3750
|
+
url: "about:blank"
|
|
3751
|
+
})).targetId;
|
|
3752
|
+
}
|
|
3753
|
+
} else if (pageTargets.length > 0) {
|
|
2050
3754
|
targetId = pageTargets[0].targetId;
|
|
2051
3755
|
} else {
|
|
2052
3756
|
const result = await this.cdp.send("Target.createTarget", {
|
|
@@ -2055,7 +3759,7 @@ var Browser = class _Browser {
|
|
|
2055
3759
|
targetId = result.targetId;
|
|
2056
3760
|
}
|
|
2057
3761
|
await this.cdp.attachToTarget(targetId);
|
|
2058
|
-
const page = new Page(this.cdp);
|
|
3762
|
+
const page = new Page(this.cdp, targetId);
|
|
2059
3763
|
await page.init();
|
|
2060
3764
|
this.pages.set(pageName, page);
|
|
2061
3765
|
return page;
|
|
@@ -2068,7 +3772,7 @@ var Browser = class _Browser {
|
|
|
2068
3772
|
url
|
|
2069
3773
|
});
|
|
2070
3774
|
await this.cdp.attachToTarget(result.targetId);
|
|
2071
|
-
const page = new Page(this.cdp);
|
|
3775
|
+
const page = new Page(this.cdp, result.targetId);
|
|
2072
3776
|
await page.init();
|
|
2073
3777
|
const name = `page-${this.pages.size + 1}`;
|
|
2074
3778
|
this.pages.set(name, page);
|
|
@@ -2140,14 +3844,20 @@ function connect(options) {
|
|
|
2140
3844
|
}
|
|
2141
3845
|
|
|
2142
3846
|
export {
|
|
3847
|
+
bufferToBase64,
|
|
3848
|
+
calculateRMS,
|
|
3849
|
+
pcmToWav,
|
|
3850
|
+
parseWavHeader,
|
|
3851
|
+
generateSilence,
|
|
3852
|
+
generateTone,
|
|
3853
|
+
grantAudioPermissions,
|
|
3854
|
+
AudioInput,
|
|
3855
|
+
AudioOutput,
|
|
2143
3856
|
RequestInterceptor,
|
|
2144
3857
|
waitForElement,
|
|
2145
3858
|
waitForAnyElement,
|
|
2146
3859
|
waitForNavigation,
|
|
2147
3860
|
waitForNetworkIdle,
|
|
2148
|
-
ElementNotFoundError,
|
|
2149
|
-
TimeoutError,
|
|
2150
|
-
NavigationError,
|
|
2151
3861
|
Page,
|
|
2152
3862
|
Browser,
|
|
2153
3863
|
connect
|