browser-pilot 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -1
- package/dist/actions.cjs +485 -9
- package/dist/actions.d.cts +24 -5
- package/dist/actions.d.ts +24 -5
- package/dist/actions.mjs +5 -3
- package/dist/browser.cjs +1761 -102
- package/dist/browser.d.cts +8 -4
- package/dist/browser.d.ts +8 -4
- package/dist/browser.mjs +6 -5
- package/dist/{chunk-PCNEJAJ7.mjs → chunk-7OSR2CAE.mjs} +1756 -46
- package/dist/chunk-KKW2SZLV.mjs +741 -0
- package/dist/cli.mjs +7576 -265
- package/dist/index.cjs +2434 -108
- package/dist/index.d.cts +142 -6
- package/dist/index.d.ts +142 -6
- package/dist/index.mjs +360 -13
- package/dist/providers.d.cts +2 -2
- package/dist/providers.d.ts +2 -2
- package/dist/{types-D_uDqh0Z.d.cts → types--wXNHUwt.d.cts} +1 -1
- package/dist/{types-D_uDqh0Z.d.ts → types--wXNHUwt.d.ts} +1 -1
- package/dist/{types-TVlTA7nH.d.cts → types-CYw-7vx1.d.cts} +280 -3
- package/dist/{types-CbdmaocU.d.ts → types-DOGsEYQa.d.ts} +280 -3
- package/package.json +3 -3
- package/dist/chunk-6RB3GKQP.mjs +0 -251
- package/dist/chunk-ZIQA4JOT.mjs +0 -226
- package/dist/cli.cjs +0 -4792
- package/dist/cli.d.cts +0 -25
- package/dist/cli.d.ts +0 -25
package/dist/index.cjs
CHANGED
|
@@ -20,6 +20,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var src_exports = {};
|
|
22
22
|
__export(src_exports, {
|
|
23
|
+
AudioInput: () => AudioInput,
|
|
24
|
+
AudioOutput: () => AudioOutput,
|
|
23
25
|
BatchExecutor: () => BatchExecutor,
|
|
24
26
|
Browser: () => Browser,
|
|
25
27
|
BrowserBaseProvider: () => BrowserBaseProvider,
|
|
@@ -33,6 +35,8 @@ __export(src_exports, {
|
|
|
33
35
|
TimeoutError: () => TimeoutError,
|
|
34
36
|
Tracer: () => Tracer,
|
|
35
37
|
addBatchToPage: () => addBatchToPage,
|
|
38
|
+
bufferToBase64: () => bufferToBase64,
|
|
39
|
+
calculateRMS: () => calculateRMS,
|
|
36
40
|
connect: () => connect,
|
|
37
41
|
createCDPClient: () => createCDPClient,
|
|
38
42
|
createProvider: () => createProvider,
|
|
@@ -40,8 +44,17 @@ __export(src_exports, {
|
|
|
40
44
|
disableTracing: () => disableTracing,
|
|
41
45
|
discoverTargets: () => discoverTargets,
|
|
42
46
|
enableTracing: () => enableTracing,
|
|
47
|
+
generateSilence: () => generateSilence,
|
|
48
|
+
generateTone: () => generateTone,
|
|
49
|
+
getAudioChromeFlags: () => getAudioChromeFlags,
|
|
43
50
|
getBrowserWebSocketUrl: () => getBrowserWebSocketUrl,
|
|
44
51
|
getTracer: () => getTracer,
|
|
52
|
+
grantAudioPermissions: () => grantAudioPermissions,
|
|
53
|
+
isTranscriptionAvailable: () => isTranscriptionAvailable,
|
|
54
|
+
parseWavHeader: () => parseWavHeader,
|
|
55
|
+
pcmToWav: () => pcmToWav,
|
|
56
|
+
transcribe: () => transcribe,
|
|
57
|
+
validateSteps: () => validateSteps,
|
|
45
58
|
waitForAnyElement: () => waitForAnyElement,
|
|
46
59
|
waitForElement: () => waitForElement,
|
|
47
60
|
waitForNavigation: () => waitForNavigation,
|
|
@@ -49,6 +62,31 @@ __export(src_exports, {
|
|
|
49
62
|
});
|
|
50
63
|
module.exports = __toCommonJS(src_exports);
|
|
51
64
|
|
|
65
|
+
// src/browser/types.ts
|
|
66
|
+
var ElementNotFoundError = class extends Error {
|
|
67
|
+
selectors;
|
|
68
|
+
hints;
|
|
69
|
+
constructor(selectors, hints) {
|
|
70
|
+
const selectorList = Array.isArray(selectors) ? selectors : [selectors];
|
|
71
|
+
super(`Element not found: ${selectorList.join(", ")}`);
|
|
72
|
+
this.name = "ElementNotFoundError";
|
|
73
|
+
this.selectors = selectorList;
|
|
74
|
+
this.hints = hints;
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
var TimeoutError = class extends Error {
|
|
78
|
+
constructor(message = "Operation timed out") {
|
|
79
|
+
super(message);
|
|
80
|
+
this.name = "TimeoutError";
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
var NavigationError = class extends Error {
|
|
84
|
+
constructor(message) {
|
|
85
|
+
super(message);
|
|
86
|
+
this.name = "NavigationError";
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
|
|
52
90
|
// src/actions/executor.ts
|
|
53
91
|
var DEFAULT_TIMEOUT = 3e4;
|
|
54
92
|
var BatchExecutor = class {
|
|
@@ -80,13 +118,15 @@ var BatchExecutor = class {
|
|
|
80
118
|
});
|
|
81
119
|
} catch (error) {
|
|
82
120
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
121
|
+
const hints = error instanceof ElementNotFoundError ? error.hints : void 0;
|
|
83
122
|
results.push({
|
|
84
123
|
index: i,
|
|
85
124
|
action: step.action,
|
|
86
125
|
selector: step.selector,
|
|
87
126
|
success: false,
|
|
88
127
|
durationMs: Date.now() - stepStart,
|
|
89
|
-
error: errorMessage
|
|
128
|
+
error: errorMessage,
|
|
129
|
+
hints
|
|
90
130
|
});
|
|
91
131
|
if (onFail === "stop" && !step.optional) {
|
|
92
132
|
return {
|
|
@@ -214,86 +254,1962 @@ var BatchExecutor = class {
|
|
|
214
254
|
await this.page.evaluate(`window.scrollBy(${deltaX}, ${deltaY})`);
|
|
215
255
|
return {};
|
|
216
256
|
}
|
|
217
|
-
if (!step.selector) throw new Error("scroll requires selector, coordinates, or direction");
|
|
218
|
-
await this.page.scroll(step.selector, { timeout, optional });
|
|
219
|
-
return { selectorUsed: this.getUsedSelector(step.selector) };
|
|
220
|
-
}
|
|
221
|
-
case "wait": {
|
|
222
|
-
if (!step.selector && !step.waitFor) {
|
|
223
|
-
const delay = step.timeout ?? 1e3;
|
|
224
|
-
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
225
|
-
return {};
|
|
257
|
+
if (!step.selector) throw new Error("scroll requires selector, coordinates, or direction");
|
|
258
|
+
await this.page.scroll(step.selector, { timeout, optional });
|
|
259
|
+
return { selectorUsed: this.getUsedSelector(step.selector) };
|
|
260
|
+
}
|
|
261
|
+
case "wait": {
|
|
262
|
+
if (!step.selector && !step.waitFor) {
|
|
263
|
+
const delay = step.timeout ?? 1e3;
|
|
264
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
265
|
+
return {};
|
|
266
|
+
}
|
|
267
|
+
if (step.waitFor === "navigation") {
|
|
268
|
+
await this.page.waitForNavigation({ timeout, optional });
|
|
269
|
+
return {};
|
|
270
|
+
}
|
|
271
|
+
if (step.waitFor === "networkIdle") {
|
|
272
|
+
await this.page.waitForNetworkIdle({ timeout, optional });
|
|
273
|
+
return {};
|
|
274
|
+
}
|
|
275
|
+
if (!step.selector)
|
|
276
|
+
throw new Error(
|
|
277
|
+
"wait requires selector (or waitFor: navigation/networkIdle, or timeout for simple delay)"
|
|
278
|
+
);
|
|
279
|
+
await this.page.waitFor(step.selector, {
|
|
280
|
+
timeout,
|
|
281
|
+
optional,
|
|
282
|
+
state: step.waitFor ?? "visible"
|
|
283
|
+
});
|
|
284
|
+
return { selectorUsed: this.getUsedSelector(step.selector) };
|
|
285
|
+
}
|
|
286
|
+
case "snapshot": {
|
|
287
|
+
const snapshot = await this.page.snapshot();
|
|
288
|
+
return { value: snapshot };
|
|
289
|
+
}
|
|
290
|
+
case "screenshot": {
|
|
291
|
+
const data = await this.page.screenshot({
|
|
292
|
+
format: step.format,
|
|
293
|
+
quality: step.quality,
|
|
294
|
+
fullPage: step.fullPage
|
|
295
|
+
});
|
|
296
|
+
return { value: data };
|
|
297
|
+
}
|
|
298
|
+
case "evaluate": {
|
|
299
|
+
if (typeof step.value !== "string")
|
|
300
|
+
throw new Error("evaluate requires string value (expression)");
|
|
301
|
+
const result = await this.page.evaluate(step.value);
|
|
302
|
+
return { value: result };
|
|
303
|
+
}
|
|
304
|
+
case "text": {
|
|
305
|
+
const selector = Array.isArray(step.selector) ? step.selector[0] : step.selector;
|
|
306
|
+
const text = await this.page.text(selector);
|
|
307
|
+
return { text, selectorUsed: selector };
|
|
308
|
+
}
|
|
309
|
+
case "switchFrame": {
|
|
310
|
+
if (!step.selector) throw new Error("switchFrame requires selector");
|
|
311
|
+
await this.page.switchToFrame(step.selector, { timeout, optional });
|
|
312
|
+
return { selectorUsed: this.getUsedSelector(step.selector) };
|
|
313
|
+
}
|
|
314
|
+
case "switchToMain": {
|
|
315
|
+
await this.page.switchToMain();
|
|
316
|
+
return {};
|
|
317
|
+
}
|
|
318
|
+
default: {
|
|
319
|
+
const action = step.action;
|
|
320
|
+
const aliases = {
|
|
321
|
+
execute: "evaluate",
|
|
322
|
+
navigate: "goto",
|
|
323
|
+
input: "fill",
|
|
324
|
+
tap: "click",
|
|
325
|
+
go: "goto",
|
|
326
|
+
run: "evaluate",
|
|
327
|
+
capture: "screenshot",
|
|
328
|
+
inspect: "snapshot",
|
|
329
|
+
enter: "press",
|
|
330
|
+
open: "goto",
|
|
331
|
+
visit: "goto",
|
|
332
|
+
eval: "evaluate",
|
|
333
|
+
js: "evaluate",
|
|
334
|
+
snap: "snapshot",
|
|
335
|
+
frame: "switchFrame"
|
|
336
|
+
};
|
|
337
|
+
const suggestion = aliases[action.toLowerCase()];
|
|
338
|
+
const hint = suggestion ? ` Did you mean "${suggestion}"?` : "";
|
|
339
|
+
const valid = "goto, click, fill, type, select, check, uncheck, submit, press, focus, hover, scroll, wait, snapshot, screenshot, evaluate, text, switchFrame, switchToMain";
|
|
340
|
+
throw new Error(`Unknown action "${action}".${hint}
|
|
341
|
+
|
|
342
|
+
Valid actions: ${valid}`);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Get the actual selector that matched the element.
|
|
348
|
+
* Uses the last matched selector tracked by Page, falls back to first selector if unavailable.
|
|
349
|
+
*/
|
|
350
|
+
getUsedSelector(selector) {
|
|
351
|
+
const matched = this.page.getLastMatchedSelector();
|
|
352
|
+
if (matched) return matched;
|
|
353
|
+
return Array.isArray(selector) ? selector[0] : selector;
|
|
354
|
+
}
|
|
355
|
+
};
|
|
356
|
+
function addBatchToPage(page) {
|
|
357
|
+
const executor = new BatchExecutor(page);
|
|
358
|
+
return Object.assign(page, {
|
|
359
|
+
batch: (steps, options) => executor.execute(steps, options)
|
|
360
|
+
});
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// src/actions/validate.ts
|
|
364
|
+
function levenshtein(a, b) {
|
|
365
|
+
const m = a.length;
|
|
366
|
+
const n = b.length;
|
|
367
|
+
const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
|
368
|
+
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
|
369
|
+
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
|
370
|
+
for (let i = 1; i <= m; i++) {
|
|
371
|
+
for (let j = 1; j <= n; j++) {
|
|
372
|
+
dp[i][j] = a[i - 1] === b[j - 1] ? dp[i - 1][j - 1] : 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return dp[m][n];
|
|
376
|
+
}
|
|
377
|
+
var ACTION_ALIASES = {
|
|
378
|
+
execute: "evaluate",
|
|
379
|
+
navigate: "goto",
|
|
380
|
+
input: "fill",
|
|
381
|
+
tap: "click",
|
|
382
|
+
go: "goto",
|
|
383
|
+
run: "evaluate",
|
|
384
|
+
capture: "screenshot",
|
|
385
|
+
inspect: "snapshot",
|
|
386
|
+
enter: "press",
|
|
387
|
+
keypress: "press",
|
|
388
|
+
nav: "goto",
|
|
389
|
+
open: "goto",
|
|
390
|
+
visit: "goto",
|
|
391
|
+
browse: "goto",
|
|
392
|
+
load: "goto",
|
|
393
|
+
write: "fill",
|
|
394
|
+
set: "fill",
|
|
395
|
+
pick: "select",
|
|
396
|
+
choose: "select",
|
|
397
|
+
send: "press",
|
|
398
|
+
eval: "evaluate",
|
|
399
|
+
js: "evaluate",
|
|
400
|
+
script: "evaluate",
|
|
401
|
+
snap: "snapshot",
|
|
402
|
+
accessibility: "snapshot",
|
|
403
|
+
a11y: "snapshot",
|
|
404
|
+
image: "screenshot",
|
|
405
|
+
pic: "screenshot",
|
|
406
|
+
frame: "switchFrame",
|
|
407
|
+
iframe: "switchFrame"
|
|
408
|
+
};
|
|
409
|
+
var PROPERTY_ALIASES = {
|
|
410
|
+
expression: "value",
|
|
411
|
+
href: "url",
|
|
412
|
+
target: "selector",
|
|
413
|
+
element: "selector",
|
|
414
|
+
code: "value",
|
|
415
|
+
script: "value",
|
|
416
|
+
src: "url",
|
|
417
|
+
link: "url",
|
|
418
|
+
char: "key",
|
|
419
|
+
text: "value",
|
|
420
|
+
query: "selector",
|
|
421
|
+
el: "selector",
|
|
422
|
+
elem: "selector",
|
|
423
|
+
css: "selector",
|
|
424
|
+
xpath: "selector",
|
|
425
|
+
input: "value",
|
|
426
|
+
content: "value",
|
|
427
|
+
keys: "key",
|
|
428
|
+
button: "key",
|
|
429
|
+
address: "url",
|
|
430
|
+
page: "url",
|
|
431
|
+
path: "url"
|
|
432
|
+
};
|
|
433
|
+
var ACTION_RULES = {
|
|
434
|
+
goto: {
|
|
435
|
+
required: { url: { type: "string" } },
|
|
436
|
+
optional: {}
|
|
437
|
+
},
|
|
438
|
+
click: {
|
|
439
|
+
required: { selector: { type: "string|string[]" } },
|
|
440
|
+
optional: {
|
|
441
|
+
waitForNavigation: { type: "boolean" }
|
|
442
|
+
}
|
|
443
|
+
},
|
|
444
|
+
fill: {
|
|
445
|
+
required: { selector: { type: "string|string[]" }, value: { type: "string" } },
|
|
446
|
+
optional: {
|
|
447
|
+
clear: { type: "boolean" },
|
|
448
|
+
blur: { type: "boolean" }
|
|
449
|
+
}
|
|
450
|
+
},
|
|
451
|
+
type: {
|
|
452
|
+
required: { selector: { type: "string|string[]" }, value: { type: "string" } },
|
|
453
|
+
optional: {
|
|
454
|
+
delay: { type: "number" }
|
|
455
|
+
}
|
|
456
|
+
},
|
|
457
|
+
select: {
|
|
458
|
+
required: {},
|
|
459
|
+
optional: {
|
|
460
|
+
selector: { type: "string|string[]" },
|
|
461
|
+
value: { type: "string|string[]" },
|
|
462
|
+
trigger: { type: "string|string[]" },
|
|
463
|
+
option: { type: "string|string[]" },
|
|
464
|
+
match: { type: "string", enum: ["text", "value", "contains"] }
|
|
465
|
+
}
|
|
466
|
+
},
|
|
467
|
+
check: {
|
|
468
|
+
required: { selector: { type: "string|string[]" } },
|
|
469
|
+
optional: {}
|
|
470
|
+
},
|
|
471
|
+
uncheck: {
|
|
472
|
+
required: { selector: { type: "string|string[]" } },
|
|
473
|
+
optional: {}
|
|
474
|
+
},
|
|
475
|
+
submit: {
|
|
476
|
+
required: { selector: { type: "string|string[]" } },
|
|
477
|
+
optional: {
|
|
478
|
+
method: { type: "string", enum: ["enter", "click", "enter+click"] }
|
|
479
|
+
}
|
|
480
|
+
},
|
|
481
|
+
press: {
|
|
482
|
+
required: { key: { type: "string" } },
|
|
483
|
+
optional: {}
|
|
484
|
+
},
|
|
485
|
+
focus: {
|
|
486
|
+
required: { selector: { type: "string|string[]" } },
|
|
487
|
+
optional: {}
|
|
488
|
+
},
|
|
489
|
+
hover: {
|
|
490
|
+
required: { selector: { type: "string|string[]" } },
|
|
491
|
+
optional: {}
|
|
492
|
+
},
|
|
493
|
+
scroll: {
|
|
494
|
+
required: {},
|
|
495
|
+
optional: {
|
|
496
|
+
selector: { type: "string|string[]" },
|
|
497
|
+
x: { type: "number" },
|
|
498
|
+
y: { type: "number" },
|
|
499
|
+
direction: { type: "string", enum: ["up", "down", "left", "right"] },
|
|
500
|
+
amount: { type: "number" }
|
|
501
|
+
}
|
|
502
|
+
},
|
|
503
|
+
wait: {
|
|
504
|
+
required: {},
|
|
505
|
+
optional: {
|
|
506
|
+
selector: { type: "string|string[]" },
|
|
507
|
+
waitFor: {
|
|
508
|
+
type: "string",
|
|
509
|
+
enum: ["visible", "hidden", "attached", "detached", "navigation", "networkIdle"]
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
},
|
|
513
|
+
snapshot: {
|
|
514
|
+
required: {},
|
|
515
|
+
optional: {}
|
|
516
|
+
},
|
|
517
|
+
screenshot: {
|
|
518
|
+
required: {},
|
|
519
|
+
optional: {
|
|
520
|
+
format: { type: "string", enum: ["png", "jpeg", "webp"] },
|
|
521
|
+
quality: { type: "number" },
|
|
522
|
+
fullPage: { type: "boolean" }
|
|
523
|
+
}
|
|
524
|
+
},
|
|
525
|
+
evaluate: {
|
|
526
|
+
required: { value: { type: "string" } },
|
|
527
|
+
optional: {}
|
|
528
|
+
},
|
|
529
|
+
text: {
|
|
530
|
+
required: {},
|
|
531
|
+
optional: {
|
|
532
|
+
selector: { type: "string|string[]" }
|
|
533
|
+
}
|
|
534
|
+
},
|
|
535
|
+
switchFrame: {
|
|
536
|
+
required: { selector: { type: "string|string[]" } },
|
|
537
|
+
optional: {}
|
|
538
|
+
},
|
|
539
|
+
switchToMain: {
|
|
540
|
+
required: {},
|
|
541
|
+
optional: {}
|
|
542
|
+
}
|
|
543
|
+
};
|
|
544
|
+
var VALID_ACTIONS = Object.keys(ACTION_RULES);
|
|
545
|
+
var VALID_ACTIONS_LIST = VALID_ACTIONS.join(", ");
|
|
546
|
+
var KNOWN_STEP_FIELDS = /* @__PURE__ */ new Set([
|
|
547
|
+
"action",
|
|
548
|
+
"selector",
|
|
549
|
+
"url",
|
|
550
|
+
"value",
|
|
551
|
+
"key",
|
|
552
|
+
"waitFor",
|
|
553
|
+
"timeout",
|
|
554
|
+
"optional",
|
|
555
|
+
"method",
|
|
556
|
+
"clear",
|
|
557
|
+
"blur",
|
|
558
|
+
"delay",
|
|
559
|
+
"waitForNavigation",
|
|
560
|
+
"trigger",
|
|
561
|
+
"option",
|
|
562
|
+
"match",
|
|
563
|
+
"x",
|
|
564
|
+
"y",
|
|
565
|
+
"direction",
|
|
566
|
+
"amount",
|
|
567
|
+
"format",
|
|
568
|
+
"quality",
|
|
569
|
+
"fullPage"
|
|
570
|
+
]);
|
|
571
|
+
function resolveAction(name) {
|
|
572
|
+
if (VALID_ACTIONS.includes(name)) {
|
|
573
|
+
return { action: name };
|
|
574
|
+
}
|
|
575
|
+
const lower = name.toLowerCase();
|
|
576
|
+
if (ACTION_ALIASES[lower]) {
|
|
577
|
+
return {
|
|
578
|
+
action: ACTION_ALIASES[lower],
|
|
579
|
+
suggestion: `Did you mean "${ACTION_ALIASES[lower]}"?`
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
let best = null;
|
|
583
|
+
let bestDist = Infinity;
|
|
584
|
+
for (const valid of VALID_ACTIONS) {
|
|
585
|
+
const dist = levenshtein(lower, valid);
|
|
586
|
+
if (dist < bestDist) {
|
|
587
|
+
bestDist = dist;
|
|
588
|
+
best = valid;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
if (best && bestDist <= 2) {
|
|
592
|
+
return { action: best, suggestion: `Did you mean "${best}"?` };
|
|
593
|
+
}
|
|
594
|
+
return null;
|
|
595
|
+
}
|
|
596
|
+
function suggestProperty(name) {
|
|
597
|
+
if (PROPERTY_ALIASES[name]) {
|
|
598
|
+
return PROPERTY_ALIASES[name];
|
|
599
|
+
}
|
|
600
|
+
let best = null;
|
|
601
|
+
let bestDist = Infinity;
|
|
602
|
+
for (const known of KNOWN_STEP_FIELDS) {
|
|
603
|
+
if (known === "action") continue;
|
|
604
|
+
const dist = levenshtein(name, known);
|
|
605
|
+
if (dist < bestDist) {
|
|
606
|
+
bestDist = dist;
|
|
607
|
+
best = known;
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
if (best && bestDist <= 2) {
|
|
611
|
+
return best;
|
|
612
|
+
}
|
|
613
|
+
return void 0;
|
|
614
|
+
}
|
|
615
|
+
function checkFieldType(value, rule) {
|
|
616
|
+
switch (rule.type) {
|
|
617
|
+
case "string":
|
|
618
|
+
if (typeof value !== "string") return `expected string, got ${typeof value}`;
|
|
619
|
+
if (rule.enum && !rule.enum.includes(value)) {
|
|
620
|
+
return `must be one of: ${rule.enum.join(", ")}`;
|
|
621
|
+
}
|
|
622
|
+
return null;
|
|
623
|
+
case "string|string[]":
|
|
624
|
+
if (typeof value !== "string" && !Array.isArray(value)) {
|
|
625
|
+
return `expected string or string[], got ${typeof value}`;
|
|
626
|
+
}
|
|
627
|
+
if (Array.isArray(value) && value.some((v) => typeof v !== "string")) {
|
|
628
|
+
return "array elements must be strings";
|
|
629
|
+
}
|
|
630
|
+
return null;
|
|
631
|
+
case "number":
|
|
632
|
+
if (typeof value !== "number") return `expected number, got ${typeof value}`;
|
|
633
|
+
return null;
|
|
634
|
+
case "boolean":
|
|
635
|
+
if (typeof value !== "boolean") return `expected boolean, got ${typeof value}`;
|
|
636
|
+
return null;
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
function validateSteps(steps) {
|
|
640
|
+
const errors = [];
|
|
641
|
+
for (let i = 0; i < steps.length; i++) {
|
|
642
|
+
const step = steps[i];
|
|
643
|
+
if (!step || typeof step !== "object" || Array.isArray(step)) {
|
|
644
|
+
errors.push({
|
|
645
|
+
stepIndex: i,
|
|
646
|
+
field: "step",
|
|
647
|
+
message: "step must be a JSON object."
|
|
648
|
+
});
|
|
649
|
+
continue;
|
|
650
|
+
}
|
|
651
|
+
const obj = step;
|
|
652
|
+
if (!("action" in obj)) {
|
|
653
|
+
errors.push({
|
|
654
|
+
stepIndex: i,
|
|
655
|
+
field: "action",
|
|
656
|
+
message: 'missing required "action" field.'
|
|
657
|
+
});
|
|
658
|
+
continue;
|
|
659
|
+
}
|
|
660
|
+
const actionName = obj["action"];
|
|
661
|
+
if (typeof actionName !== "string") {
|
|
662
|
+
errors.push({
|
|
663
|
+
stepIndex: i,
|
|
664
|
+
field: "action",
|
|
665
|
+
message: `"action" must be a string, got ${typeof actionName}.`
|
|
666
|
+
});
|
|
667
|
+
continue;
|
|
668
|
+
}
|
|
669
|
+
const resolved = resolveAction(actionName);
|
|
670
|
+
if (!resolved) {
|
|
671
|
+
errors.push({
|
|
672
|
+
stepIndex: i,
|
|
673
|
+
field: "action",
|
|
674
|
+
message: `unknown action "${actionName}".`,
|
|
675
|
+
suggestion: `Valid actions: ${VALID_ACTIONS_LIST}`
|
|
676
|
+
});
|
|
677
|
+
continue;
|
|
678
|
+
}
|
|
679
|
+
if (resolved.suggestion) {
|
|
680
|
+
errors.push({
|
|
681
|
+
stepIndex: i,
|
|
682
|
+
field: "action",
|
|
683
|
+
message: `unknown action "${actionName}". ${resolved.suggestion}`,
|
|
684
|
+
suggestion: resolved.suggestion
|
|
685
|
+
});
|
|
686
|
+
continue;
|
|
687
|
+
}
|
|
688
|
+
const action = resolved.action;
|
|
689
|
+
const rule = ACTION_RULES[action];
|
|
690
|
+
for (const key of Object.keys(obj)) {
|
|
691
|
+
if (key === "action") continue;
|
|
692
|
+
if (!KNOWN_STEP_FIELDS.has(key)) {
|
|
693
|
+
const suggestion = suggestProperty(key);
|
|
694
|
+
errors.push({
|
|
695
|
+
stepIndex: i,
|
|
696
|
+
field: key,
|
|
697
|
+
message: suggestion ? `unknown property "${key}". Did you mean "${suggestion}"?` : `unknown property "${key}".`,
|
|
698
|
+
suggestion: suggestion ? `Did you mean "${suggestion}"?` : void 0
|
|
699
|
+
});
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
for (const [field, fieldRule] of Object.entries(rule.required)) {
|
|
703
|
+
if (!(field in obj) || obj[field] === void 0) {
|
|
704
|
+
errors.push({
|
|
705
|
+
stepIndex: i,
|
|
706
|
+
field,
|
|
707
|
+
message: `missing required "${field}" (${fieldRule.type}).`
|
|
708
|
+
});
|
|
709
|
+
} else {
|
|
710
|
+
const typeErr = checkFieldType(obj[field], fieldRule);
|
|
711
|
+
if (typeErr) {
|
|
712
|
+
errors.push({
|
|
713
|
+
stepIndex: i,
|
|
714
|
+
field,
|
|
715
|
+
message: `"${field}" ${typeErr}.`
|
|
716
|
+
});
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
for (const [field, fieldRule] of Object.entries(rule.optional)) {
|
|
721
|
+
if (field in obj && obj[field] !== void 0) {
|
|
722
|
+
const typeErr = checkFieldType(obj[field], fieldRule);
|
|
723
|
+
if (typeErr) {
|
|
724
|
+
errors.push({
|
|
725
|
+
stepIndex: i,
|
|
726
|
+
field,
|
|
727
|
+
message: `"${field}" ${typeErr}.`
|
|
728
|
+
});
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
if ("timeout" in obj && obj["timeout"] !== void 0) {
|
|
733
|
+
if (typeof obj["timeout"] !== "number") {
|
|
734
|
+
errors.push({
|
|
735
|
+
stepIndex: i,
|
|
736
|
+
field: "timeout",
|
|
737
|
+
message: `"timeout" expected number, got ${typeof obj["timeout"]}.`
|
|
738
|
+
});
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
if ("optional" in obj && obj["optional"] !== void 0) {
|
|
742
|
+
if (typeof obj["optional"] !== "boolean") {
|
|
743
|
+
errors.push({
|
|
744
|
+
stepIndex: i,
|
|
745
|
+
field: "optional",
|
|
746
|
+
message: `"optional" expected boolean, got ${typeof obj["optional"]}.`
|
|
747
|
+
});
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
if (action === "select") {
|
|
751
|
+
const hasNative = "selector" in obj && "value" in obj;
|
|
752
|
+
const hasCustom = "trigger" in obj && "option" in obj && "value" in obj;
|
|
753
|
+
if (!hasNative && !hasCustom) {
|
|
754
|
+
errors.push({
|
|
755
|
+
stepIndex: i,
|
|
756
|
+
field: "selector",
|
|
757
|
+
message: "select requires either (selector + value) for native select, or (trigger + option + value) for custom select."
|
|
758
|
+
});
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
return {
|
|
763
|
+
valid: errors.length === 0,
|
|
764
|
+
errors,
|
|
765
|
+
formatted() {
|
|
766
|
+
if (errors.length === 0) return "";
|
|
767
|
+
const lines = [`Validation failed (${errors.length} error${errors.length > 1 ? "s" : ""}):`];
|
|
768
|
+
for (const err of errors) {
|
|
769
|
+
const stepLabel = err.field === "action" || err.field === "step" ? `Step ${err.stepIndex}` : `Step ${err.stepIndex}`;
|
|
770
|
+
lines.push("");
|
|
771
|
+
lines.push(` ${stepLabel}: ${err.message}`);
|
|
772
|
+
if (err.suggestion && !err.message.includes(err.suggestion)) {
|
|
773
|
+
lines.push(` ${err.suggestion}`);
|
|
774
|
+
}
|
|
775
|
+
const step = steps[err.stepIndex];
|
|
776
|
+
if (step && typeof step === "object") {
|
|
777
|
+
lines.push(` Got: ${JSON.stringify(step)}`);
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
const hasEvaluateError = errors.some((err) => {
|
|
781
|
+
const step = steps[err.stepIndex];
|
|
782
|
+
return step && typeof step === "object" && step["action"] === "evaluate";
|
|
783
|
+
});
|
|
784
|
+
if (hasEvaluateError) {
|
|
785
|
+
lines.push("");
|
|
786
|
+
lines.push(
|
|
787
|
+
"Tip: For JavaScript evaluation, use 'bp eval' instead \u2014 no JSON wrapping needed:"
|
|
788
|
+
);
|
|
789
|
+
lines.push(" bp eval 'your.expression.here'");
|
|
790
|
+
}
|
|
791
|
+
lines.push("");
|
|
792
|
+
lines.push(`Valid actions: ${VALID_ACTIONS_LIST}`);
|
|
793
|
+
return lines.join("\n");
|
|
794
|
+
}
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
// src/audio/encoding.ts
|
|
799
|
+
function bufferToBase64(data) {
|
|
800
|
+
const bytes = data instanceof Uint8Array ? data : new Uint8Array(data);
|
|
801
|
+
let binary = "";
|
|
802
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
803
|
+
binary += String.fromCharCode(bytes[i]);
|
|
804
|
+
}
|
|
805
|
+
return btoa(binary);
|
|
806
|
+
}
|
|
807
|
+
function base64ToBuffer(b64) {
|
|
808
|
+
const binary = atob(b64);
|
|
809
|
+
const bytes = new Uint8Array(binary.length);
|
|
810
|
+
for (let i = 0; i < binary.length; i++) {
|
|
811
|
+
bytes[i] = binary.charCodeAt(i);
|
|
812
|
+
}
|
|
813
|
+
return bytes;
|
|
814
|
+
}
|
|
815
|
+
function calculateRMS(samples) {
|
|
816
|
+
if (samples.length === 0) return 0;
|
|
817
|
+
let sum = 0;
|
|
818
|
+
for (let i = 0; i < samples.length; i++) {
|
|
819
|
+
sum += samples[i] * samples[i];
|
|
820
|
+
}
|
|
821
|
+
return Math.sqrt(sum / samples.length);
|
|
822
|
+
}
|
|
823
|
+
function pcmToWav(options) {
|
|
824
|
+
const { left, right, sampleRate } = options;
|
|
825
|
+
const numChannels = right ? 2 : 1;
|
|
826
|
+
const numSamples = left.length;
|
|
827
|
+
const bitsPerSample = 16;
|
|
828
|
+
const bytesPerSample = bitsPerSample / 8;
|
|
829
|
+
const blockAlign = numChannels * bytesPerSample;
|
|
830
|
+
const dataLength = numSamples * blockAlign;
|
|
831
|
+
const headerLength = 44;
|
|
832
|
+
const buffer = new ArrayBuffer(headerLength + dataLength);
|
|
833
|
+
const view = new DataView(buffer);
|
|
834
|
+
writeString(view, 0, "RIFF");
|
|
835
|
+
view.setUint32(4, 36 + dataLength, true);
|
|
836
|
+
writeString(view, 8, "WAVE");
|
|
837
|
+
writeString(view, 12, "fmt ");
|
|
838
|
+
view.setUint32(16, 16, true);
|
|
839
|
+
view.setUint16(20, 1, true);
|
|
840
|
+
view.setUint16(22, numChannels, true);
|
|
841
|
+
view.setUint32(24, sampleRate, true);
|
|
842
|
+
view.setUint32(28, sampleRate * blockAlign, true);
|
|
843
|
+
view.setUint16(32, blockAlign, true);
|
|
844
|
+
view.setUint16(34, bitsPerSample, true);
|
|
845
|
+
writeString(view, 36, "data");
|
|
846
|
+
view.setUint32(40, dataLength, true);
|
|
847
|
+
let offset = 44;
|
|
848
|
+
for (let i = 0; i < numSamples; i++) {
|
|
849
|
+
const leftSample = Math.max(-1, Math.min(1, left[i]));
|
|
850
|
+
view.setInt16(offset, leftSample < 0 ? leftSample * 32768 : leftSample * 32767, true);
|
|
851
|
+
offset += 2;
|
|
852
|
+
if (right) {
|
|
853
|
+
const rightSample = Math.max(-1, Math.min(1, right[i]));
|
|
854
|
+
view.setInt16(offset, rightSample < 0 ? rightSample * 32768 : rightSample * 32767, true);
|
|
855
|
+
offset += 2;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
return buffer;
|
|
859
|
+
}
|
|
860
|
+
function parseWavHeader(data) {
|
|
861
|
+
const view = new DataView(data);
|
|
862
|
+
if (data.byteLength < 44) {
|
|
863
|
+
throw new Error("Invalid WAV: file too small");
|
|
864
|
+
}
|
|
865
|
+
const riff = readString(view, 0, 4);
|
|
866
|
+
const wave = readString(view, 8, 4);
|
|
867
|
+
if (riff !== "RIFF" || wave !== "WAVE") {
|
|
868
|
+
throw new Error("Invalid WAV: missing RIFF/WAVE header");
|
|
869
|
+
}
|
|
870
|
+
const fmt = readString(view, 12, 4);
|
|
871
|
+
if (fmt !== "fmt ") {
|
|
872
|
+
throw new Error("Invalid WAV: missing fmt chunk");
|
|
873
|
+
}
|
|
874
|
+
const channels = view.getUint16(22, true);
|
|
875
|
+
const sampleRate = view.getUint32(24, true);
|
|
876
|
+
const bitsPerSample = view.getUint16(34, true);
|
|
877
|
+
let dataOffset = 36;
|
|
878
|
+
while (dataOffset < data.byteLength - 8) {
|
|
879
|
+
const chunkId = readString(view, dataOffset, 4);
|
|
880
|
+
const chunkSize = view.getUint32(dataOffset + 4, true);
|
|
881
|
+
if (chunkId === "data") {
|
|
882
|
+
return {
|
|
883
|
+
sampleRate,
|
|
884
|
+
channels,
|
|
885
|
+
bitsPerSample,
|
|
886
|
+
dataOffset: dataOffset + 8,
|
|
887
|
+
dataLength: chunkSize
|
|
888
|
+
};
|
|
889
|
+
}
|
|
890
|
+
dataOffset += 8 + chunkSize;
|
|
891
|
+
}
|
|
892
|
+
throw new Error("Invalid WAV: missing data chunk");
|
|
893
|
+
}
|
|
894
|
+
function generateSilence(durationMs, sampleRate = 48e3) {
|
|
895
|
+
return new Float32Array(Math.ceil(sampleRate * durationMs / 1e3));
|
|
896
|
+
}
|
|
897
|
+
function generateTone(frequency, durationMs, sampleRate = 48e3, amplitude = 0.5) {
|
|
898
|
+
const numSamples = Math.ceil(sampleRate * durationMs / 1e3);
|
|
899
|
+
const samples = new Float32Array(numSamples);
|
|
900
|
+
for (let i = 0; i < numSamples; i++) {
|
|
901
|
+
samples[i] = amplitude * Math.sin(2 * Math.PI * frequency * i / sampleRate);
|
|
902
|
+
}
|
|
903
|
+
return samples;
|
|
904
|
+
}
|
|
905
|
+
function writeString(view, offset, str) {
|
|
906
|
+
for (let i = 0; i < str.length; i++) {
|
|
907
|
+
view.setUint8(offset + i, str.charCodeAt(i));
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
function readString(view, offset, length) {
|
|
911
|
+
let str = "";
|
|
912
|
+
for (let i = 0; i < length; i++) {
|
|
913
|
+
str += String.fromCharCode(view.getUint8(offset + i));
|
|
914
|
+
}
|
|
915
|
+
return str;
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
// src/audio/flags.ts
|
|
919
|
+
function getAudioChromeFlags(options) {
|
|
920
|
+
const flags = [
|
|
921
|
+
"--use-fake-device-for-media-stream",
|
|
922
|
+
"--use-fake-ui-for-media-stream",
|
|
923
|
+
"--autoplay-policy=no-user-gesture-required"
|
|
924
|
+
];
|
|
925
|
+
if (options?.inputWavPath) {
|
|
926
|
+
let path = options.inputWavPath;
|
|
927
|
+
if (options.noLoop) {
|
|
928
|
+
path += "%noloop";
|
|
929
|
+
}
|
|
930
|
+
flags.push(`--use-file-for-fake-audio-capture=${path}`);
|
|
931
|
+
}
|
|
932
|
+
return flags;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
// src/audio/permissions.ts
|
|
936
|
+
async function grantAudioPermissions(cdp, origin) {
|
|
937
|
+
await cdp.send("Browser.grantPermissions", {
|
|
938
|
+
permissions: ["audioCapture"],
|
|
939
|
+
origin: origin ?? ""
|
|
940
|
+
});
|
|
941
|
+
await cdp.send("Page.addScriptToEvaluateOnNewDocument", {
|
|
942
|
+
source: PERMISSIONS_OVERRIDE_SCRIPT
|
|
943
|
+
});
|
|
944
|
+
}
|
|
945
|
+
var PERMISSIONS_OVERRIDE_SCRIPT = `
|
|
946
|
+
(function() {
|
|
947
|
+
if (window.__bpPermissionsPatched) return;
|
|
948
|
+
window.__bpPermissionsPatched = true;
|
|
949
|
+
|
|
950
|
+
var origQuery = navigator.permissions.query.bind(navigator.permissions);
|
|
951
|
+
navigator.permissions.query = function(desc) {
|
|
952
|
+
if (desc && (desc.name === 'microphone' || desc.name === 'audio-capture')) {
|
|
953
|
+
return Promise.resolve({
|
|
954
|
+
state: 'granted',
|
|
955
|
+
onchange: null,
|
|
956
|
+
addEventListener: function() {},
|
|
957
|
+
removeEventListener: function() {},
|
|
958
|
+
dispatchEvent: function() { return true; }
|
|
959
|
+
});
|
|
960
|
+
}
|
|
961
|
+
return origQuery(desc);
|
|
962
|
+
};
|
|
963
|
+
})();
|
|
964
|
+
`;
|
|
965
|
+
|
|
966
|
+
// src/audio/input.ts
|
|
967
|
+
var INPUT_BINDING = "__bpAudioInputDone";
|
|
968
|
+
var AUDIO_INPUT_SCRIPT = `
|
|
969
|
+
(function() {
|
|
970
|
+
if (window.__bpAudioInput) return;
|
|
971
|
+
|
|
972
|
+
var audioCtx = null;
|
|
973
|
+
var sourceNode = null;
|
|
974
|
+
var destinationNode = null;
|
|
975
|
+
var fakeStream = null;
|
|
976
|
+
var silenceGain = null;
|
|
977
|
+
var silenceOsc = null;
|
|
978
|
+
var isPlaying = false;
|
|
979
|
+
|
|
980
|
+
function ensureFakeStream() {
|
|
981
|
+
if (fakeStream) return fakeStream;
|
|
982
|
+
// Use the original AudioContext to avoid being tracked by our output override
|
|
983
|
+
var CtorToUse = window.__bpOrigAudioContext || window.AudioContext || window.webkitAudioContext;
|
|
984
|
+
audioCtx = new CtorToUse({ sampleRate: 48000 });
|
|
985
|
+
// Auto-resume if suspended (CDP automation has no user gesture)
|
|
986
|
+
if (audioCtx.state === 'suspended') {
|
|
987
|
+
console.log('[bp:input] AudioContext suspended, auto-resuming...');
|
|
988
|
+
audioCtx.resume().then(function() {
|
|
989
|
+
console.log('[bp:input] AudioContext resumed (' + audioCtx.state + ')');
|
|
990
|
+
}).catch(function(e) {
|
|
991
|
+
console.warn('[bp:input] AudioContext resume failed:', e);
|
|
992
|
+
});
|
|
993
|
+
}
|
|
994
|
+
destinationNode = audioCtx.createMediaStreamDestination();
|
|
995
|
+
|
|
996
|
+
// Start with silence so the stream always has active tracks
|
|
997
|
+
silenceGain = audioCtx.createGain();
|
|
998
|
+
silenceGain.gain.value = 0;
|
|
999
|
+
silenceOsc = audioCtx.createOscillator();
|
|
1000
|
+
silenceOsc.connect(silenceGain);
|
|
1001
|
+
silenceGain.connect(destinationNode);
|
|
1002
|
+
silenceOsc.start();
|
|
1003
|
+
|
|
1004
|
+
fakeStream = destinationNode.stream;
|
|
1005
|
+
console.log('[bp:input] Fake mic stream created (48kHz, ' + fakeStream.getAudioTracks().length + ' tracks)');
|
|
1006
|
+
return fakeStream;
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
function playAudio(base64Data) {
|
|
1010
|
+
ensureFakeStream();
|
|
1011
|
+
|
|
1012
|
+
var resumePromise = audioCtx.state === 'suspended'
|
|
1013
|
+
? audioCtx.resume()
|
|
1014
|
+
: Promise.resolve();
|
|
1015
|
+
|
|
1016
|
+
return resumePromise.then(function() {
|
|
1017
|
+
if (sourceNode) {
|
|
1018
|
+
try { sourceNode.stop(); } catch(e) {}
|
|
1019
|
+
sourceNode.disconnect();
|
|
1020
|
+
sourceNode = null;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
var binaryStr = atob(base64Data);
|
|
1024
|
+
var bytes = new Uint8Array(binaryStr.length);
|
|
1025
|
+
for (var i = 0; i < binaryStr.length; i++) {
|
|
1026
|
+
bytes[i] = binaryStr.charCodeAt(i);
|
|
1027
|
+
}
|
|
1028
|
+
console.log('[bp:input] Decoding audio (' + bytes.length + ' bytes)...');
|
|
1029
|
+
|
|
1030
|
+
return audioCtx.decodeAudioData(bytes.buffer.slice(0));
|
|
1031
|
+
}).then(function(audioBuffer) {
|
|
1032
|
+
sourceNode = audioCtx.createBufferSource();
|
|
1033
|
+
sourceNode.buffer = audioBuffer;
|
|
1034
|
+
sourceNode.connect(destinationNode);
|
|
1035
|
+
|
|
1036
|
+
var durationMs = Math.round(audioBuffer.duration * 1000);
|
|
1037
|
+
console.log('[bp:input] Playing ' + durationMs + 'ms audio (' + audioBuffer.sampleRate + 'Hz, ' + audioBuffer.numberOfChannels + 'ch)');
|
|
1038
|
+
|
|
1039
|
+
return new Promise(function(resolve) {
|
|
1040
|
+
sourceNode.onended = function() {
|
|
1041
|
+
isPlaying = false;
|
|
1042
|
+
console.log('[bp:input] Playback ended');
|
|
1043
|
+
resolve(true);
|
|
1044
|
+
try {
|
|
1045
|
+
if (typeof window.__bpAudioInputDone === 'function') {
|
|
1046
|
+
window.__bpAudioInputDone('done');
|
|
1047
|
+
}
|
|
1048
|
+
} catch(e) {}
|
|
1049
|
+
};
|
|
1050
|
+
isPlaying = true;
|
|
1051
|
+
sourceNode.start();
|
|
1052
|
+
});
|
|
1053
|
+
});
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
function stopAudio() {
|
|
1057
|
+
if (sourceNode) {
|
|
1058
|
+
try { sourceNode.stop(); } catch(e) {}
|
|
1059
|
+
sourceNode.disconnect();
|
|
1060
|
+
sourceNode = null;
|
|
1061
|
+
}
|
|
1062
|
+
isPlaying = false;
|
|
1063
|
+
console.log('[bp:input] Stopped');
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
var origGetUserMedia = navigator.mediaDevices.getUserMedia.bind(navigator.mediaDevices);
|
|
1067
|
+
|
|
1068
|
+
navigator.mediaDevices.getUserMedia = function(constraints) {
|
|
1069
|
+
if (constraints && constraints.audio) {
|
|
1070
|
+
var stream = ensureFakeStream();
|
|
1071
|
+
console.log('[bp:input] getUserMedia intercepted \u2014 returning fake mic' + (constraints.video ? ' + real video' : ''));
|
|
1072
|
+
|
|
1073
|
+
if (constraints.video) {
|
|
1074
|
+
// Get real video + our fake audio
|
|
1075
|
+
return origGetUserMedia({ video: constraints.video }).then(function(realStream) {
|
|
1076
|
+
var combined = new MediaStream(
|
|
1077
|
+
stream.getAudioTracks().concat(realStream.getVideoTracks())
|
|
1078
|
+
);
|
|
1079
|
+
return combined;
|
|
1080
|
+
});
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
// Return a clone so consumers can't stop our source track
|
|
1084
|
+
return Promise.resolve(stream.clone());
|
|
1085
|
+
}
|
|
1086
|
+
return origGetUserMedia(constraints);
|
|
1087
|
+
};
|
|
1088
|
+
|
|
1089
|
+
var origEnumerate = navigator.mediaDevices.enumerateDevices.bind(navigator.mediaDevices);
|
|
1090
|
+
navigator.mediaDevices.enumerateDevices = function() {
|
|
1091
|
+
return origEnumerate().then(function(devices) {
|
|
1092
|
+
var hasMic = devices.some(function(d) { return d.kind === 'audioinput'; });
|
|
1093
|
+
if (!hasMic) {
|
|
1094
|
+
devices.push({
|
|
1095
|
+
deviceId: 'bp-fake-mic',
|
|
1096
|
+
kind: 'audioinput',
|
|
1097
|
+
label: 'Default Audio Input',
|
|
1098
|
+
groupId: 'bp-audio',
|
|
1099
|
+
toJSON: function() {
|
|
1100
|
+
return { deviceId: this.deviceId, kind: this.kind, label: this.label, groupId: this.groupId };
|
|
1101
|
+
}
|
|
1102
|
+
});
|
|
1103
|
+
}
|
|
1104
|
+
return devices;
|
|
1105
|
+
});
|
|
1106
|
+
};
|
|
1107
|
+
|
|
1108
|
+
window.__bpAudioInput = {
|
|
1109
|
+
play: playAudio,
|
|
1110
|
+
stop: stopAudio,
|
|
1111
|
+
isPlaying: function() { return isPlaying; },
|
|
1112
|
+
getState: function() {
|
|
1113
|
+
return {
|
|
1114
|
+
contextState: audioCtx ? audioCtx.state : 'not-created',
|
|
1115
|
+
isPlaying: isPlaying,
|
|
1116
|
+
sampleRate: audioCtx ? audioCtx.sampleRate : 0
|
|
1117
|
+
};
|
|
1118
|
+
},
|
|
1119
|
+
getContext: function() { return audioCtx; }
|
|
1120
|
+
};
|
|
1121
|
+
|
|
1122
|
+
console.log('[bp:input] Audio input override installed (getUserMedia + enumerateDevices)');
|
|
1123
|
+
})();
|
|
1124
|
+
`;
|
|
1125
|
+
var AudioInput = class {
|
|
1126
|
+
cdp;
|
|
1127
|
+
injected = false;
|
|
1128
|
+
bindingRegistered = false;
|
|
1129
|
+
bindingHandler = null;
|
|
1130
|
+
constructor(cdp) {
|
|
1131
|
+
this.cdp = cdp;
|
|
1132
|
+
}
|
|
1133
|
+
/** Whether the audio input system has been set up */
|
|
1134
|
+
get isSetup() {
|
|
1135
|
+
return this.injected;
|
|
1136
|
+
}
|
|
1137
|
+
/**
|
|
1138
|
+
* Set up audio input injection.
|
|
1139
|
+
* Must be called before navigating to the page that will use getUserMedia.
|
|
1140
|
+
* Grants permissions and injects the getUserMedia override.
|
|
1141
|
+
*/
|
|
1142
|
+
async setup() {
|
|
1143
|
+
if (this.injected) return;
|
|
1144
|
+
try {
|
|
1145
|
+
const resp = await this.cdp.send("Runtime.evaluate", {
|
|
1146
|
+
expression: "location.href",
|
|
1147
|
+
returnByValue: true
|
|
1148
|
+
});
|
|
1149
|
+
const href = resp.result?.value;
|
|
1150
|
+
if (typeof href === "string" && (href === "about:blank" || href === "about:srcdoc")) {
|
|
1151
|
+
throw new Error(
|
|
1152
|
+
'Cannot set up audio on about:blank. Navigate to a page first.\nExample: await page.goto("https://your-voice-app.com")'
|
|
1153
|
+
);
|
|
1154
|
+
}
|
|
1155
|
+
} catch (e) {
|
|
1156
|
+
if (e instanceof Error && e.message.includes("Cannot set up audio")) throw e;
|
|
1157
|
+
}
|
|
1158
|
+
let origin;
|
|
1159
|
+
try {
|
|
1160
|
+
const resp = await this.cdp.send("Runtime.evaluate", {
|
|
1161
|
+
expression: "location.origin",
|
|
1162
|
+
returnByValue: true
|
|
1163
|
+
});
|
|
1164
|
+
const val = resp.result?.value;
|
|
1165
|
+
if (typeof val === "string" && val !== "null") {
|
|
1166
|
+
origin = val;
|
|
1167
|
+
}
|
|
1168
|
+
} catch {
|
|
1169
|
+
}
|
|
1170
|
+
await grantAudioPermissions(this.cdp, origin);
|
|
1171
|
+
if (!this.bindingRegistered) {
|
|
1172
|
+
await this.cdp.send("Runtime.addBinding", { name: INPUT_BINDING });
|
|
1173
|
+
this.bindingRegistered = true;
|
|
1174
|
+
}
|
|
1175
|
+
await this.cdp.send("Page.addScriptToEvaluateOnNewDocument", {
|
|
1176
|
+
source: AUDIO_INPUT_SCRIPT
|
|
1177
|
+
});
|
|
1178
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1179
|
+
expression: AUDIO_INPUT_SCRIPT,
|
|
1180
|
+
awaitPromise: false,
|
|
1181
|
+
userGesture: true
|
|
1182
|
+
});
|
|
1183
|
+
this.injected = true;
|
|
1184
|
+
}
|
|
1185
|
+
/**
|
|
1186
|
+
* Play audio bytes into the page's fake microphone.
|
|
1187
|
+
* Accepts any format that Web Audio API can decode (WAV, MP3, OGG, etc.).
|
|
1188
|
+
*
|
|
1189
|
+
* @param audioData - Raw audio file bytes
|
|
1190
|
+
* @param options - Playback options
|
|
1191
|
+
*/
|
|
1192
|
+
async play(audioData, options) {
|
|
1193
|
+
if (!this.injected) {
|
|
1194
|
+
await this.setup();
|
|
1195
|
+
}
|
|
1196
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1197
|
+
expression: `(function() {
|
|
1198
|
+
var resumed = [];
|
|
1199
|
+
(window.__bpTrackedAudioContexts || []).forEach(function(ctx) {
|
|
1200
|
+
if (ctx.state === 'suspended') {
|
|
1201
|
+
ctx.resume().then(function() {
|
|
1202
|
+
console.log('[bp:input] Resumed suspended AudioContext (' + ctx.sampleRate + 'Hz)');
|
|
1203
|
+
});
|
|
1204
|
+
resumed.push(ctx.sampleRate);
|
|
1205
|
+
}
|
|
1206
|
+
});
|
|
1207
|
+
// Also resume the input context itself
|
|
1208
|
+
if (window.__bpAudioInput && window.__bpAudioInput.getContext) {
|
|
1209
|
+
var inputCtx = window.__bpAudioInput.getContext();
|
|
1210
|
+
if (inputCtx && inputCtx.state === 'suspended') {
|
|
1211
|
+
inputCtx.resume().then(function() {
|
|
1212
|
+
console.log('[bp:input] Resumed input AudioContext (' + inputCtx.sampleRate + 'Hz)');
|
|
1213
|
+
});
|
|
1214
|
+
resumed.push('input-' + inputCtx.sampleRate);
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
return resumed.length > 0 ? 'resumed: ' + resumed.join(',') : 'all running';
|
|
1218
|
+
})()`,
|
|
1219
|
+
awaitPromise: false,
|
|
1220
|
+
userGesture: true
|
|
1221
|
+
});
|
|
1222
|
+
const base64 = bufferToBase64(audioData);
|
|
1223
|
+
const waitForEnd = options?.waitForEnd ?? true;
|
|
1224
|
+
const timeout = options?.timeout ?? 6e4;
|
|
1225
|
+
if (waitForEnd) {
|
|
1226
|
+
const donePromise = this.waitForBinding(timeout);
|
|
1227
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1228
|
+
expression: `window.__bpAudioInput.play('${base64}')`,
|
|
1229
|
+
awaitPromise: false
|
|
1230
|
+
});
|
|
1231
|
+
await donePromise;
|
|
1232
|
+
} else {
|
|
1233
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1234
|
+
expression: `window.__bpAudioInput.play('${base64}')`,
|
|
1235
|
+
awaitPromise: false
|
|
1236
|
+
});
|
|
1237
|
+
}
|
|
1238
|
+
}
|
|
1239
|
+
/**
|
|
1240
|
+
* Stop any currently playing audio.
|
|
1241
|
+
*/
|
|
1242
|
+
async stop() {
|
|
1243
|
+
if (!this.injected) return;
|
|
1244
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1245
|
+
expression: "window.__bpAudioInput && window.__bpAudioInput.stop()",
|
|
1246
|
+
awaitPromise: false
|
|
1247
|
+
});
|
|
1248
|
+
}
|
|
1249
|
+
/**
|
|
1250
|
+
* Get current state of the injected audio input system.
|
|
1251
|
+
*/
|
|
1252
|
+
async getState() {
|
|
1253
|
+
if (!this.injected) {
|
|
1254
|
+
return { contextState: "not-created", isPlaying: false, sampleRate: 0 };
|
|
1255
|
+
}
|
|
1256
|
+
const result = await this.cdp.send("Runtime.evaluate", {
|
|
1257
|
+
expression: "window.__bpAudioInput ? window.__bpAudioInput.getState() : null",
|
|
1258
|
+
returnByValue: true
|
|
1259
|
+
});
|
|
1260
|
+
return result.result.value ?? { contextState: "not-created", isPlaying: false, sampleRate: 0 };
|
|
1261
|
+
}
|
|
1262
|
+
/**
|
|
1263
|
+
* Clean up: remove binding handler.
|
|
1264
|
+
*/
|
|
1265
|
+
async teardown() {
|
|
1266
|
+
if (this.bindingHandler) {
|
|
1267
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
1268
|
+
this.bindingHandler = null;
|
|
1269
|
+
}
|
|
1270
|
+
await this.stop();
|
|
1271
|
+
this.injected = false;
|
|
1272
|
+
this.bindingRegistered = false;
|
|
1273
|
+
}
|
|
1274
|
+
/**
|
|
1275
|
+
* Wait for the playback-complete binding to fire.
|
|
1276
|
+
*/
|
|
1277
|
+
waitForBinding(timeout) {
|
|
1278
|
+
return new Promise((resolve, reject) => {
|
|
1279
|
+
const timer = setTimeout(() => {
|
|
1280
|
+
if (this.bindingHandler) {
|
|
1281
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
1282
|
+
this.bindingHandler = null;
|
|
1283
|
+
}
|
|
1284
|
+
reject(new Error(`AudioInput: playback timed out after ${timeout}ms`));
|
|
1285
|
+
}, timeout);
|
|
1286
|
+
if (this.bindingHandler) {
|
|
1287
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
1288
|
+
}
|
|
1289
|
+
this.bindingHandler = (params) => {
|
|
1290
|
+
if (params["name"] === INPUT_BINDING) {
|
|
1291
|
+
clearTimeout(timer);
|
|
1292
|
+
if (this.bindingHandler) {
|
|
1293
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
1294
|
+
this.bindingHandler = null;
|
|
1295
|
+
}
|
|
1296
|
+
resolve();
|
|
1297
|
+
}
|
|
1298
|
+
};
|
|
1299
|
+
this.cdp.on("Runtime.bindingCalled", this.bindingHandler);
|
|
1300
|
+
});
|
|
1301
|
+
}
|
|
1302
|
+
};
|
|
1303
|
+
|
|
1304
|
+
// src/audio/output.ts
|
|
1305
|
+
var OUTPUT_BINDING = "__bpAudioOutputData";
|
|
1306
|
+
var AUDIO_OUTPUT_SCRIPT = `
|
|
1307
|
+
(function() {
|
|
1308
|
+
// If already installed, stop any active capture but allow re-initialization
|
|
1309
|
+
// so that updated scripts (e.g. with new capture strategies) take effect.
|
|
1310
|
+
if (window.__bpAudioOutput) {
|
|
1311
|
+
if (window.__bpAudioOutput.isCapturing()) window.__bpAudioOutput.stop();
|
|
1312
|
+
// Keep existing allAudioContexts if available (preserves pre-override tracking)
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
var BUFFER_SIZE = 4096;
|
|
1316
|
+
var FLUSH_SAMPLES = 48000; // flush every ~1s at 48kHz (scales with sample rate)
|
|
1317
|
+
var capturing = false;
|
|
1318
|
+
var capturedChunks = [];
|
|
1319
|
+
var totalSamples = 0;
|
|
1320
|
+
var flushCount = 0;
|
|
1321
|
+
var pendingTracks = [];
|
|
1322
|
+
var tappedTrackIds = {};
|
|
1323
|
+
|
|
1324
|
+
// --- Per-context tap infrastructure ---
|
|
1325
|
+
// Preserve any AudioContexts tracked by a previous script version
|
|
1326
|
+
var allAudioContexts = window.__bpTrackedAudioContexts || [];
|
|
1327
|
+
// Use a WeakMap to associate taps with AudioContext instances
|
|
1328
|
+
// (native objects like AudioContext may not support custom properties)
|
|
1329
|
+
var contextTapMap = typeof WeakMap !== 'undefined' ? new WeakMap() : null;
|
|
1330
|
+
var contextTapList = []; // fallback: [{ctx, proc}]
|
|
1331
|
+
|
|
1332
|
+
var OrigAudioContext = window.__bpOrigAudioContext || window.AudioContext || window.webkitAudioContext;
|
|
1333
|
+
// Save the native connect function once; on re-injection, reuse it to avoid double-wrapping
|
|
1334
|
+
var origConnect = window.__bpOrigConnect || AudioNode.prototype.connect;
|
|
1335
|
+
window.__bpOrigConnect = origConnect;
|
|
1336
|
+
|
|
1337
|
+
// Our own capture context (48kHz) for WebRTC tracks and media elements
|
|
1338
|
+
var captureCtx = null;
|
|
1339
|
+
var captureProcessor = null;
|
|
1340
|
+
|
|
1341
|
+
// Save original AudioContext constructor once
|
|
1342
|
+
if (!window.__bpOrigAudioContext) {
|
|
1343
|
+
window.__bpOrigAudioContext = OrigAudioContext;
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1346
|
+
// Override AudioContext constructor to track all instances (skip if already overridden)
|
|
1347
|
+
if (OrigAudioContext && !window.__bpAudioContextOverridden) {
|
|
1348
|
+
window.__bpAudioContextOverridden = true;
|
|
1349
|
+
window.AudioContext = function() {
|
|
1350
|
+
var ctx = new (Function.prototype.bind.apply(OrigAudioContext, [null].concat(Array.prototype.slice.call(arguments))))();
|
|
1351
|
+
allAudioContexts.push(ctx);
|
|
1352
|
+
// Auto-resume suspended contexts \u2014 CDP automation has no user gesture,
|
|
1353
|
+
// so Chrome suspends new AudioContexts by default. Without this, voice
|
|
1354
|
+
// agents' ScriptProcessorNodes never fire and no audio flows.
|
|
1355
|
+
if (ctx.state === 'suspended') {
|
|
1356
|
+
console.log('[bp:output] AudioContext created suspended (' + ctx.sampleRate + 'Hz), auto-resuming...');
|
|
1357
|
+
ctx.resume().then(function() {
|
|
1358
|
+
console.log('[bp:output] AudioContext resumed successfully (' + ctx.sampleRate + 'Hz, state: ' + ctx.state + ')');
|
|
1359
|
+
}).catch(function(e) {
|
|
1360
|
+
console.warn('[bp:output] AudioContext resume failed (' + ctx.sampleRate + 'Hz):', e);
|
|
1361
|
+
});
|
|
1362
|
+
} else {
|
|
1363
|
+
console.log('[bp:output] AudioContext created (' + ctx.sampleRate + 'Hz, state: ' + ctx.state + ')');
|
|
1364
|
+
}
|
|
1365
|
+
return ctx;
|
|
1366
|
+
};
|
|
1367
|
+
window.AudioContext.prototype = OrigAudioContext.prototype;
|
|
1368
|
+
Object.keys(OrigAudioContext).forEach(function(k) {
|
|
1369
|
+
try { window.AudioContext[k] = OrigAudioContext[k]; } catch(e) {}
|
|
1370
|
+
});
|
|
1371
|
+
if (window.webkitAudioContext) {
|
|
1372
|
+
window.webkitAudioContext = window.AudioContext;
|
|
1373
|
+
}
|
|
1374
|
+
}
|
|
1375
|
+
|
|
1376
|
+
// Expose tracked contexts on window so re-injections preserve them
|
|
1377
|
+
window.__bpTrackedAudioContexts = allAudioContexts;
|
|
1378
|
+
|
|
1379
|
+
// Look up an existing tap for a given AudioContext
|
|
1380
|
+
function findTap(ctx) {
|
|
1381
|
+
if (contextTapMap) return contextTapMap.get(ctx) || null;
|
|
1382
|
+
for (var i = 0; i < contextTapList.length; i++) {
|
|
1383
|
+
if (contextTapList[i].ctx === ctx) return contextTapList[i].proc;
|
|
1384
|
+
}
|
|
1385
|
+
return null;
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
// Store a tap for a given AudioContext
|
|
1389
|
+
function storeTap(ctx, proc) {
|
|
1390
|
+
if (contextTapMap) { contextTapMap.set(ctx, proc); }
|
|
1391
|
+
else { contextTapList.push({ ctx: ctx, proc: proc }); }
|
|
1392
|
+
}
|
|
1393
|
+
|
|
1394
|
+
// Count stored taps
|
|
1395
|
+
function tapCount() {
|
|
1396
|
+
if (contextTapMap) {
|
|
1397
|
+
var count = 0;
|
|
1398
|
+
for (var i = 0; i < allAudioContexts.length; i++) {
|
|
1399
|
+
if (contextTapMap.has(allAudioContexts[i])) count++;
|
|
1400
|
+
}
|
|
1401
|
+
return count;
|
|
1402
|
+
}
|
|
1403
|
+
return contextTapList.length;
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
// Create or retrieve a ScriptProcessorNode tap for a specific AudioContext.
|
|
1407
|
+
// The tap lives in the SAME context as the source, avoiding cross-context errors.
|
|
1408
|
+
function getOrCreateTap(ctx) {
|
|
1409
|
+
var existing = findTap(ctx);
|
|
1410
|
+
if (existing) return existing;
|
|
1411
|
+
|
|
1412
|
+
try {
|
|
1413
|
+
if (ctx.state === 'closed') return null;
|
|
1414
|
+
var channels = Math.min(ctx.destination.channelCount || 2, 2);
|
|
1415
|
+
if (channels < 1) channels = 1;
|
|
1416
|
+
var proc = ctx.createScriptProcessor(BUFFER_SIZE, channels, channels);
|
|
1417
|
+
proc.onaudioprocess = function(e) {
|
|
1418
|
+
if (!capturing) return;
|
|
1419
|
+
var left = new Float32Array(e.inputBuffer.getChannelData(0));
|
|
1420
|
+
var right = e.inputBuffer.numberOfChannels > 1
|
|
1421
|
+
? new Float32Array(e.inputBuffer.getChannelData(1))
|
|
1422
|
+
: new Float32Array(left.length);
|
|
1423
|
+
capturedChunks.push({ left: left, right: right, sampleRate: ctx.sampleRate });
|
|
1424
|
+
totalSamples += left.length;
|
|
1425
|
+
if (totalSamples >= FLUSH_SAMPLES) {
|
|
1426
|
+
flushToNodeJs();
|
|
1427
|
+
}
|
|
1428
|
+
};
|
|
1429
|
+
// Must connect to destination to keep ScriptProcessorNode alive
|
|
1430
|
+
origConnect.call(proc, ctx.destination);
|
|
1431
|
+
storeTap(ctx, proc);
|
|
1432
|
+
return proc;
|
|
1433
|
+
} catch(e) {
|
|
1434
|
+
return null;
|
|
1435
|
+
}
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
// Override AudioNode.prototype.connect to tap connections to any AudioDestinationNode
|
|
1439
|
+
AudioNode.prototype.connect = function(destination) {
|
|
1440
|
+
var result = origConnect.apply(this, arguments);
|
|
1441
|
+
|
|
1442
|
+
if (capturing && destination instanceof AudioDestinationNode) {
|
|
1443
|
+
try {
|
|
1444
|
+
var tap = getOrCreateTap(destination.context);
|
|
1445
|
+
// Don't connect the tap to itself
|
|
1446
|
+
if (tap && tap !== this) {
|
|
1447
|
+
origConnect.call(this, tap);
|
|
1448
|
+
}
|
|
1449
|
+
} catch(e) {}
|
|
1450
|
+
}
|
|
1451
|
+
return result;
|
|
1452
|
+
};
|
|
1453
|
+
|
|
1454
|
+
var origPlay = window.__bpOrigPlay || HTMLMediaElement.prototype.play;
|
|
1455
|
+
window.__bpOrigPlay = origPlay;
|
|
1456
|
+
HTMLMediaElement.prototype.play = function() {
|
|
1457
|
+
if (capturing && !this.__bpCaptured) {
|
|
1458
|
+
this.__bpCaptured = true;
|
|
1459
|
+
try {
|
|
1460
|
+
if (!captureCtx) initCaptureCtx();
|
|
1461
|
+
var stream = this.captureStream ? this.captureStream() : null;
|
|
1462
|
+
if (stream && captureCtx) {
|
|
1463
|
+
var source = captureCtx.createMediaStreamSource(stream);
|
|
1464
|
+
origConnect.call(source, captureProcessor);
|
|
1465
|
+
}
|
|
1466
|
+
} catch(e) {}
|
|
1467
|
+
}
|
|
1468
|
+
return origPlay.apply(this, arguments);
|
|
1469
|
+
};
|
|
1470
|
+
|
|
1471
|
+
// Intercept srcObject assignment to catch WebRTC streams attached to media elements
|
|
1472
|
+
var origSrcObjectDesc = Object.getOwnPropertyDescriptor(HTMLMediaElement.prototype, 'srcObject');
|
|
1473
|
+
if (origSrcObjectDesc && origSrcObjectDesc.set) {
|
|
1474
|
+
Object.defineProperty(HTMLMediaElement.prototype, 'srcObject', {
|
|
1475
|
+
set: function(stream) {
|
|
1476
|
+
origSrcObjectDesc.set.call(this, stream);
|
|
1477
|
+
if (stream && stream.getAudioTracks) {
|
|
1478
|
+
var tracks = stream.getAudioTracks();
|
|
1479
|
+
for (var i = 0; i < tracks.length; i++) {
|
|
1480
|
+
if (capturing) {
|
|
1481
|
+
tapAudioTrack(tracks[i]);
|
|
1482
|
+
} else {
|
|
1483
|
+
pendingTracks.push(tracks[i]);
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
},
|
|
1488
|
+
get: origSrcObjectDesc.get,
|
|
1489
|
+
configurable: true
|
|
1490
|
+
});
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
// Initialize our own 48kHz capture context for WebRTC and media element tapping
|
|
1494
|
+
function initCaptureCtx() {
|
|
1495
|
+
captureCtx = new OrigAudioContext({ sampleRate: 48000 });
|
|
1496
|
+
captureProcessor = captureCtx.createScriptProcessor(BUFFER_SIZE, 2, 2);
|
|
1497
|
+
captureProcessor.onaudioprocess = function(e) {
|
|
1498
|
+
if (!capturing) return;
|
|
1499
|
+
var left = new Float32Array(e.inputBuffer.getChannelData(0));
|
|
1500
|
+
var right = new Float32Array(e.inputBuffer.getChannelData(1));
|
|
1501
|
+
capturedChunks.push({ left: left, right: right, sampleRate: 48000 });
|
|
1502
|
+
totalSamples += left.length;
|
|
1503
|
+
if (totalSamples >= FLUSH_SAMPLES) {
|
|
1504
|
+
flushToNodeJs();
|
|
1505
|
+
}
|
|
1506
|
+
};
|
|
1507
|
+
origConnect.call(captureProcessor, captureCtx.destination);
|
|
1508
|
+
}
|
|
1509
|
+
|
|
1510
|
+
function uint8ToBase64(bytes) {
|
|
1511
|
+
var CHUNK = 8192;
|
|
1512
|
+
var parts = [];
|
|
1513
|
+
for (var i = 0; i < bytes.length; i += CHUNK) {
|
|
1514
|
+
var slice = bytes.subarray(i, Math.min(i + CHUNK, bytes.length));
|
|
1515
|
+
var binary = '';
|
|
1516
|
+
for (var j = 0; j < slice.length; j++) {
|
|
1517
|
+
binary += String.fromCharCode(slice[j]);
|
|
1518
|
+
}
|
|
1519
|
+
parts.push(binary);
|
|
1520
|
+
}
|
|
1521
|
+
return btoa(parts.join(''));
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1524
|
+
function flushGroup(chunks, rate) {
|
|
1525
|
+
var totalLen = 0;
|
|
1526
|
+
for (var i = 0; i < chunks.length; i++) {
|
|
1527
|
+
totalLen += chunks[i].left.length;
|
|
1528
|
+
}
|
|
1529
|
+
if (totalLen === 0) return;
|
|
1530
|
+
|
|
1531
|
+
var left = new Float32Array(totalLen);
|
|
1532
|
+
var right = new Float32Array(totalLen);
|
|
1533
|
+
var offset = 0;
|
|
1534
|
+
for (var i = 0; i < chunks.length; i++) {
|
|
1535
|
+
left.set(chunks[i].left, offset);
|
|
1536
|
+
right.set(chunks[i].right, offset);
|
|
1537
|
+
offset += chunks[i].left.length;
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
var leftB64 = uint8ToBase64(new Uint8Array(left.buffer));
|
|
1541
|
+
var rightB64 = uint8ToBase64(new Uint8Array(right.buffer));
|
|
1542
|
+
|
|
1543
|
+
flushCount++;
|
|
1544
|
+
|
|
1545
|
+
try {
|
|
1546
|
+
if (typeof window.__bpAudioOutputData === 'function') {
|
|
1547
|
+
window.__bpAudioOutputData(JSON.stringify({
|
|
1548
|
+
left: leftB64,
|
|
1549
|
+
right: rightB64,
|
|
1550
|
+
sampleRate: rate,
|
|
1551
|
+
samples: totalLen
|
|
1552
|
+
}));
|
|
1553
|
+
}
|
|
1554
|
+
} catch(e) {}
|
|
1555
|
+
}
|
|
1556
|
+
|
|
1557
|
+
function flushToNodeJs() {
|
|
1558
|
+
if (capturedChunks.length === 0) return;
|
|
1559
|
+
|
|
1560
|
+
// Group chunks by sample rate to avoid mixing different-rate audio
|
|
1561
|
+
var byRate = {};
|
|
1562
|
+
for (var i = 0; i < capturedChunks.length; i++) {
|
|
1563
|
+
var rate = capturedChunks[i].sampleRate || 48000;
|
|
1564
|
+
if (!byRate[rate]) byRate[rate] = [];
|
|
1565
|
+
byRate[rate].push(capturedChunks[i]);
|
|
1566
|
+
}
|
|
1567
|
+
|
|
1568
|
+
// Flush each sample rate group separately
|
|
1569
|
+
for (var rateKey in byRate) {
|
|
1570
|
+
if (byRate.hasOwnProperty(rateKey)) {
|
|
1571
|
+
flushGroup(byRate[rateKey], Number(rateKey));
|
|
1572
|
+
}
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
capturedChunks = [];
|
|
1576
|
+
totalSamples = 0;
|
|
1577
|
+
}
|
|
1578
|
+
|
|
1579
|
+
// --- WebRTC interception (for apps that use RTCPeerConnection) ---
|
|
1580
|
+
var rtcTrackedStreams = [];
|
|
1581
|
+
var rtcPeerConnections = [];
|
|
1582
|
+
|
|
1583
|
+
function tapAudioTrack(track) {
|
|
1584
|
+
try {
|
|
1585
|
+
if (tappedTrackIds[track.id]) return;
|
|
1586
|
+
tappedTrackIds[track.id] = true;
|
|
1587
|
+
if (!captureCtx) initCaptureCtx();
|
|
1588
|
+
var stream = new MediaStream([track]);
|
|
1589
|
+
var source = captureCtx.createMediaStreamSource(stream);
|
|
1590
|
+
origConnect.call(source, captureProcessor);
|
|
1591
|
+
rtcTrackedStreams.push(source);
|
|
1592
|
+
} catch(e) {}
|
|
1593
|
+
}
|
|
1594
|
+
|
|
1595
|
+
function tapExistingPeerConnection(pc) {
|
|
1596
|
+
try {
|
|
1597
|
+
var receivers = pc.getReceivers ? pc.getReceivers() : [];
|
|
1598
|
+
for (var i = 0; i < receivers.length; i++) {
|
|
1599
|
+
if (receivers[i].track && receivers[i].track.kind === 'audio') {
|
|
1600
|
+
tapAudioTrack(receivers[i].track);
|
|
1601
|
+
}
|
|
1602
|
+
}
|
|
1603
|
+
} catch(e) {}
|
|
1604
|
+
}
|
|
1605
|
+
|
|
1606
|
+
if (typeof RTCPeerConnection !== 'undefined') {
|
|
1607
|
+
var OrigRTC = RTCPeerConnection;
|
|
1608
|
+
|
|
1609
|
+
window.RTCPeerConnection = function() {
|
|
1610
|
+
var pc = new (Function.prototype.bind.apply(OrigRTC, [null].concat(Array.prototype.slice.call(arguments))))();
|
|
1611
|
+
rtcPeerConnections.push(pc);
|
|
1612
|
+
|
|
1613
|
+
pc.addEventListener('track', function(event) {
|
|
1614
|
+
if (event.track && event.track.kind === 'audio') {
|
|
1615
|
+
if (capturing) {
|
|
1616
|
+
tapAudioTrack(event.track);
|
|
1617
|
+
} else {
|
|
1618
|
+
pendingTracks.push(event.track);
|
|
1619
|
+
}
|
|
1620
|
+
}
|
|
1621
|
+
});
|
|
1622
|
+
|
|
1623
|
+
return pc;
|
|
1624
|
+
};
|
|
1625
|
+
window.RTCPeerConnection.prototype = OrigRTC.prototype;
|
|
1626
|
+
Object.keys(OrigRTC).forEach(function(k) {
|
|
1627
|
+
try { window.RTCPeerConnection[k] = OrigRTC[k]; } catch(e) {}
|
|
1628
|
+
});
|
|
1629
|
+
|
|
1630
|
+
window.__bpTrackedPCs = rtcPeerConnections;
|
|
1631
|
+
}
|
|
1632
|
+
|
|
1633
|
+
window.__bpAudioOutput = {
|
|
1634
|
+
start: function() {
|
|
1635
|
+
capturing = true;
|
|
1636
|
+
capturedChunks = [];
|
|
1637
|
+
totalSamples = 0;
|
|
1638
|
+
flushCount = 0;
|
|
1639
|
+
tappedTrackIds = {};
|
|
1640
|
+
|
|
1641
|
+
// Resume any suspended capture context
|
|
1642
|
+
if (captureCtx && captureCtx.state === 'suspended') captureCtx.resume();
|
|
1643
|
+
|
|
1644
|
+
// Create taps for all tracked AudioContexts (catches contexts created before capture)
|
|
1645
|
+
for (var i = 0; i < allAudioContexts.length; i++) {
|
|
1646
|
+
var ctx = allAudioContexts[i];
|
|
1647
|
+
if (ctx.state !== 'closed') {
|
|
1648
|
+
getOrCreateTap(ctx);
|
|
1649
|
+
}
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
// Drain pending WebRTC tracks
|
|
1653
|
+
for (var j = 0; j < pendingTracks.length; j++) {
|
|
1654
|
+
tapAudioTrack(pendingTracks[j]);
|
|
1655
|
+
}
|
|
1656
|
+
pendingTracks = [];
|
|
1657
|
+
|
|
1658
|
+
// Tap existing peer connections
|
|
1659
|
+
for (var k = 0; k < rtcPeerConnections.length; k++) {
|
|
1660
|
+
tapExistingPeerConnection(rtcPeerConnections[k]);
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
// Scan existing media elements for srcObject with audio tracks
|
|
1664
|
+
var mediaEls = document.querySelectorAll('audio, video');
|
|
1665
|
+
for (var i = 0; i < mediaEls.length; i++) {
|
|
1666
|
+
var el = mediaEls[i];
|
|
1667
|
+
if (el.srcObject && el.srcObject.getAudioTracks && !el.__bpCaptured) {
|
|
1668
|
+
el.__bpCaptured = true;
|
|
1669
|
+
var tracks = el.srcObject.getAudioTracks();
|
|
1670
|
+
for (var j = 0; j < tracks.length; j++) {
|
|
1671
|
+
tapAudioTrack(tracks[j]);
|
|
1672
|
+
}
|
|
1673
|
+
}
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
// Watch for dynamically added media elements with srcObject
|
|
1677
|
+
if (typeof MutationObserver !== 'undefined') {
|
|
1678
|
+
if (window.__bpMediaObserver) {
|
|
1679
|
+
window.__bpMediaObserver.disconnect();
|
|
1680
|
+
}
|
|
1681
|
+
window.__bpMediaObserver = new MutationObserver(function(mutations) {
|
|
1682
|
+
for (var i = 0; i < mutations.length; i++) {
|
|
1683
|
+
var added = mutations[i].addedNodes;
|
|
1684
|
+
for (var j = 0; j < added.length; j++) {
|
|
1685
|
+
var node = added[j];
|
|
1686
|
+
if (node.nodeType !== 1) continue;
|
|
1687
|
+
var els = [];
|
|
1688
|
+
if (node.tagName === 'AUDIO' || node.tagName === 'VIDEO') els.push(node);
|
|
1689
|
+
else if (node.querySelectorAll) {
|
|
1690
|
+
var nested = node.querySelectorAll('audio, video');
|
|
1691
|
+
for (var k = 0; k < nested.length; k++) els.push(nested[k]);
|
|
1692
|
+
}
|
|
1693
|
+
for (var m = 0; m < els.length; m++) {
|
|
1694
|
+
var el = els[m];
|
|
1695
|
+
if (el.srcObject && el.srcObject.getAudioTracks && !el.__bpCaptured) {
|
|
1696
|
+
el.__bpCaptured = true;
|
|
1697
|
+
var tracks = el.srcObject.getAudioTracks();
|
|
1698
|
+
for (var t = 0; t < tracks.length; t++) tapAudioTrack(tracks[t]);
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1701
|
+
}
|
|
1702
|
+
}
|
|
1703
|
+
});
|
|
1704
|
+
window.__bpMediaObserver.observe(document, { childList: true, subtree: true });
|
|
1705
|
+
}
|
|
1706
|
+
},
|
|
1707
|
+
stop: function() {
|
|
1708
|
+
capturing = false;
|
|
1709
|
+
flushToNodeJs();
|
|
1710
|
+
// Disconnect MutationObserver
|
|
1711
|
+
if (window.__bpMediaObserver) {
|
|
1712
|
+
window.__bpMediaObserver.disconnect();
|
|
1713
|
+
window.__bpMediaObserver = null;
|
|
1714
|
+
}
|
|
1715
|
+
},
|
|
1716
|
+
isCapturing: function() { return capturing; },
|
|
1717
|
+
getBufferedSamples: function() { return totalSamples; },
|
|
1718
|
+
tapPC: function(pc) {
|
|
1719
|
+
if (!pc || typeof pc.getReceivers !== 'function') return false;
|
|
1720
|
+
if (rtcPeerConnections.indexOf(pc) === -1) {
|
|
1721
|
+
rtcPeerConnections.push(pc);
|
|
1722
|
+
}
|
|
1723
|
+
if (capturing) {
|
|
1724
|
+
tapExistingPeerConnection(pc);
|
|
1725
|
+
}
|
|
1726
|
+
pc.addEventListener('track', function(event) {
|
|
1727
|
+
if (event.track && event.track.kind === 'audio') {
|
|
1728
|
+
if (capturing) {
|
|
1729
|
+
tapAudioTrack(event.track);
|
|
1730
|
+
} else {
|
|
1731
|
+
pendingTracks.push(event.track);
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
});
|
|
1735
|
+
return true;
|
|
1736
|
+
},
|
|
1737
|
+
getStats: function() {
|
|
1738
|
+
return {
|
|
1739
|
+
audioContexts: allAudioContexts.filter(function(c) { return c.state !== 'closed'; }).length,
|
|
1740
|
+
contextTaps: tapCount(),
|
|
1741
|
+
audioNodes: captureCtx ? captureCtx.destination.numberOfInputs : 0,
|
|
1742
|
+
rtcConnections: rtcPeerConnections.length,
|
|
1743
|
+
mediaElements: document.querySelectorAll('audio, video').length,
|
|
1744
|
+
pendingTracks: pendingTracks.length,
|
|
1745
|
+
tappedTracks: Object.keys(tappedTrackIds).length,
|
|
1746
|
+
capturing: capturing,
|
|
1747
|
+
bufferedSamples: totalSamples,
|
|
1748
|
+
rtcDetails: rtcPeerConnections.map(function(pc) {
|
|
1749
|
+
try {
|
|
1750
|
+
var receivers = pc.getReceivers ? pc.getReceivers() : [];
|
|
1751
|
+
var senders = pc.getSenders ? pc.getSenders() : [];
|
|
1752
|
+
var audioReceivers = receivers.filter(function(r) { return r.track && r.track.kind === 'audio'; }).length;
|
|
1753
|
+
var audioSenders = senders.filter(function(s) { return s.track && s.track.kind === 'audio'; }).length;
|
|
1754
|
+
return {
|
|
1755
|
+
state: pc.connectionState || pc.iceConnectionState || 'unknown',
|
|
1756
|
+
audioReceivers: audioReceivers,
|
|
1757
|
+
audioSenders: audioSenders,
|
|
1758
|
+
tapped: receivers.some(function(r) { return r.track && tappedTrackIds[r.track.id]; })
|
|
1759
|
+
};
|
|
1760
|
+
} catch(e) { return { state: 'error', audioReceivers: 0, audioSenders: 0, tapped: false }; }
|
|
1761
|
+
}),
|
|
1762
|
+
mediaElementDetails: (function() {
|
|
1763
|
+
try {
|
|
1764
|
+
var els = document.querySelectorAll('audio, video');
|
|
1765
|
+
var details = [];
|
|
1766
|
+
for (var i = 0; i < els.length; i++) {
|
|
1767
|
+
var el = els[i];
|
|
1768
|
+
var hasSrcObject = !!(el.srcObject);
|
|
1769
|
+
var audioTracks = 0;
|
|
1770
|
+
if (el.srcObject && el.srcObject.getAudioTracks) {
|
|
1771
|
+
audioTracks = el.srcObject.getAudioTracks().length;
|
|
1772
|
+
}
|
|
1773
|
+
details.push({
|
|
1774
|
+
tag: el.tagName.toLowerCase(),
|
|
1775
|
+
hasSrcObject: hasSrcObject,
|
|
1776
|
+
hasSrc: !!(el.src || el.currentSrc),
|
|
1777
|
+
audioTracks: audioTracks,
|
|
1778
|
+
tapped: !!(el.__bpCaptured)
|
|
1779
|
+
});
|
|
1780
|
+
}
|
|
1781
|
+
return details;
|
|
1782
|
+
} catch(e) { return []; }
|
|
1783
|
+
})()
|
|
1784
|
+
};
|
|
1785
|
+
}
|
|
1786
|
+
};
|
|
1787
|
+
})();
|
|
1788
|
+
`;
|
|
1789
|
+
var AudioOutput = class {
|
|
1790
|
+
cdp;
|
|
1791
|
+
chunks = [];
|
|
1792
|
+
injected = false;
|
|
1793
|
+
capturing = false;
|
|
1794
|
+
bindingHandler = null;
|
|
1795
|
+
onChunkHandler;
|
|
1796
|
+
onDiagHandler;
|
|
1797
|
+
/** Timestamp of the first non-silent chunk received */
|
|
1798
|
+
firstChunkTime = null;
|
|
1799
|
+
constructor(cdp) {
|
|
1800
|
+
this.cdp = cdp;
|
|
1801
|
+
}
|
|
1802
|
+
/** Whether the audio output system has been set up */
|
|
1803
|
+
get isSetup() {
|
|
1804
|
+
return this.injected;
|
|
1805
|
+
}
|
|
1806
|
+
/** Whether audio is currently being captured */
|
|
1807
|
+
get isCapturing() {
|
|
1808
|
+
return this.capturing;
|
|
1809
|
+
}
|
|
1810
|
+
/**
|
|
1811
|
+
* Set up audio output capture.
|
|
1812
|
+
* Registers bindings and injects the capture script.
|
|
1813
|
+
*/
|
|
1814
|
+
async setup() {
|
|
1815
|
+
if (this.injected) return;
|
|
1816
|
+
await this.cdp.send("Runtime.addBinding", { name: OUTPUT_BINDING });
|
|
1817
|
+
this.bindingHandler = (params) => {
|
|
1818
|
+
if (params["name"] === OUTPUT_BINDING) {
|
|
1819
|
+
this.handleAudioData(params["payload"]);
|
|
1820
|
+
}
|
|
1821
|
+
};
|
|
1822
|
+
this.cdp.on("Runtime.bindingCalled", this.bindingHandler);
|
|
1823
|
+
await this.cdp.send("Page.addScriptToEvaluateOnNewDocument", {
|
|
1824
|
+
source: AUDIO_OUTPUT_SCRIPT
|
|
1825
|
+
});
|
|
1826
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1827
|
+
expression: AUDIO_OUTPUT_SCRIPT,
|
|
1828
|
+
awaitPromise: false,
|
|
1829
|
+
userGesture: true
|
|
1830
|
+
});
|
|
1831
|
+
this.injected = true;
|
|
1832
|
+
}
|
|
1833
|
+
/**
|
|
1834
|
+
* Start capturing audio output.
|
|
1835
|
+
*/
|
|
1836
|
+
async start() {
|
|
1837
|
+
if (!this.injected) {
|
|
1838
|
+
await this.setup();
|
|
1839
|
+
}
|
|
1840
|
+
this.chunks = [];
|
|
1841
|
+
this.firstChunkTime = null;
|
|
1842
|
+
this.capturing = true;
|
|
1843
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1844
|
+
expression: `(function() {
|
|
1845
|
+
var resumed = [];
|
|
1846
|
+
(window.__bpTrackedAudioContexts || []).forEach(function(ctx) {
|
|
1847
|
+
if (ctx.state === 'suspended') {
|
|
1848
|
+
ctx.resume().then(function() {
|
|
1849
|
+
console.log('[bp:output] Resumed AudioContext (' + ctx.sampleRate + 'Hz) before capture');
|
|
1850
|
+
});
|
|
1851
|
+
resumed.push(ctx.sampleRate);
|
|
1852
|
+
}
|
|
1853
|
+
});
|
|
1854
|
+
if (window.__bpAudioInput && window.__bpAudioInput.getContext) {
|
|
1855
|
+
var inputCtx = window.__bpAudioInput.getContext();
|
|
1856
|
+
if (inputCtx && inputCtx.state === 'suspended') {
|
|
1857
|
+
inputCtx.resume();
|
|
1858
|
+
resumed.push('input-' + inputCtx.sampleRate);
|
|
1859
|
+
}
|
|
1860
|
+
}
|
|
1861
|
+
if (resumed.length) console.log('[bp:output] Resumed ' + resumed.length + ' contexts: ' + resumed.join(', '));
|
|
1862
|
+
})()`,
|
|
1863
|
+
awaitPromise: false,
|
|
1864
|
+
userGesture: true
|
|
1865
|
+
});
|
|
1866
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1867
|
+
expression: "window.__bpAudioOutput && window.__bpAudioOutput.start()",
|
|
1868
|
+
awaitPromise: false
|
|
1869
|
+
});
|
|
1870
|
+
await this.discoverExistingPeerConnections();
|
|
1871
|
+
if (this.onDiagHandler) {
|
|
1872
|
+
try {
|
|
1873
|
+
const statsResult = await this.cdp.send(
|
|
1874
|
+
"Runtime.evaluate",
|
|
1875
|
+
{
|
|
1876
|
+
expression: "window.__bpAudioOutput && window.__bpAudioOutput.getStats()",
|
|
1877
|
+
returnByValue: true
|
|
1878
|
+
}
|
|
1879
|
+
);
|
|
1880
|
+
const stats = statsResult.result.value;
|
|
1881
|
+
if (stats) {
|
|
1882
|
+
this.onDiagHandler(
|
|
1883
|
+
`started \u2014 ${stats["audioContexts"]} AudioContexts, ${stats["contextTaps"]} taps, ${stats["rtcConnections"]} RTCPeerConnections, ${stats["mediaElements"]} MediaElements, ${stats["tappedTracks"]} tapped tracks`
|
|
1884
|
+
);
|
|
1885
|
+
}
|
|
1886
|
+
} catch {
|
|
1887
|
+
}
|
|
1888
|
+
}
|
|
1889
|
+
}
|
|
1890
|
+
/**
|
|
1891
|
+
* Stop capturing and return all collected audio.
|
|
1892
|
+
*/
|
|
1893
|
+
async stop() {
|
|
1894
|
+
if (!this.injected) {
|
|
1895
|
+
return emptyCaptureResult();
|
|
1896
|
+
}
|
|
1897
|
+
await this.cdp.send("Runtime.evaluate", {
|
|
1898
|
+
expression: "window.__bpAudioOutput && window.__bpAudioOutput.stop()",
|
|
1899
|
+
awaitPromise: false
|
|
1900
|
+
});
|
|
1901
|
+
this.capturing = false;
|
|
1902
|
+
await sleep(250);
|
|
1903
|
+
return this.mergeChunks();
|
|
1904
|
+
}
|
|
1905
|
+
/**
|
|
1906
|
+
* Capture audio until silence is detected.
|
|
1907
|
+
*
|
|
1908
|
+
* Two-phase approach:
|
|
1909
|
+
* 1. **Wait phase**: Wait up to `maxDuration` for the first non-silent chunk.
|
|
1910
|
+
* The silence countdown does NOT tick during this phase, so slow voice agents
|
|
1911
|
+
* (STT → LLM → TTS can take 5-15s) don't cause premature timeout.
|
|
1912
|
+
* 2. **Capture phase**: Once audio is detected, capture until `silenceTimeout` ms
|
|
1913
|
+
* of consecutive silence pass, then stop.
|
|
1914
|
+
*/
|
|
1915
|
+
async captureUntilSilence(options) {
|
|
1916
|
+
const silenceTimeout = options?.silenceTimeout ?? 1500;
|
|
1917
|
+
const silenceThreshold = options?.silenceThreshold ?? 0.01;
|
|
1918
|
+
const maxDuration = options?.maxDuration ?? 3e5;
|
|
1919
|
+
const noAudioTimeout = options?.noAudioTimeout ?? 15e3;
|
|
1920
|
+
if (!this.capturing) {
|
|
1921
|
+
await this.start();
|
|
1922
|
+
}
|
|
1923
|
+
return new Promise((resolve) => {
|
|
1924
|
+
let heardAudio = false;
|
|
1925
|
+
let lastSoundTime = 0;
|
|
1926
|
+
const startTime = Date.now();
|
|
1927
|
+
const checkInterval = setInterval(async () => {
|
|
1928
|
+
const elapsed = Date.now() - startTime;
|
|
1929
|
+
if (elapsed > maxDuration) {
|
|
1930
|
+
clearInterval(checkInterval);
|
|
1931
|
+
this.onDiagHandler?.(`max duration reached (${maxDuration}ms), stopping`);
|
|
1932
|
+
resolve(await this.stop());
|
|
1933
|
+
return;
|
|
1934
|
+
}
|
|
1935
|
+
const latest = this.chunks[this.chunks.length - 1];
|
|
1936
|
+
if (latest) {
|
|
1937
|
+
const rms = calculateRMS(latest.left);
|
|
1938
|
+
if (rms > silenceThreshold) {
|
|
1939
|
+
if (!heardAudio) {
|
|
1940
|
+
heardAudio = true;
|
|
1941
|
+
this.onDiagHandler?.("first audio detected \u2014 silence countdown begins");
|
|
1942
|
+
}
|
|
1943
|
+
lastSoundTime = Date.now();
|
|
1944
|
+
}
|
|
226
1945
|
}
|
|
227
|
-
if (
|
|
228
|
-
|
|
229
|
-
|
|
1946
|
+
if (!heardAudio && elapsed > noAudioTimeout) {
|
|
1947
|
+
clearInterval(checkInterval);
|
|
1948
|
+
this.onDiagHandler?.(`no audio detected after ${noAudioTimeout}ms, stopping early`);
|
|
1949
|
+
resolve(await this.stop());
|
|
1950
|
+
return;
|
|
230
1951
|
}
|
|
231
|
-
if (
|
|
232
|
-
|
|
233
|
-
|
|
1952
|
+
if (heardAudio && Date.now() - lastSoundTime > silenceTimeout) {
|
|
1953
|
+
clearInterval(checkInterval);
|
|
1954
|
+
resolve(await this.stop());
|
|
234
1955
|
}
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
1956
|
+
}, 200);
|
|
1957
|
+
});
|
|
1958
|
+
}
|
|
1959
|
+
/**
|
|
1960
|
+
* Subscribe to real-time audio chunks as they arrive.
|
|
1961
|
+
*/
|
|
1962
|
+
onData(handler) {
|
|
1963
|
+
this.onChunkHandler = handler;
|
|
1964
|
+
}
|
|
1965
|
+
/**
|
|
1966
|
+
* Subscribe to diagnostic messages (for --verbose).
|
|
1967
|
+
*/
|
|
1968
|
+
onDiag(handler) {
|
|
1969
|
+
this.onDiagHandler = handler;
|
|
1970
|
+
}
|
|
1971
|
+
/**
|
|
1972
|
+
* Clean up: remove binding handler.
|
|
1973
|
+
*/
|
|
1974
|
+
async teardown() {
|
|
1975
|
+
if (this.capturing) {
|
|
1976
|
+
await this.stop();
|
|
1977
|
+
}
|
|
1978
|
+
if (this.bindingHandler) {
|
|
1979
|
+
this.cdp.off("Runtime.bindingCalled", this.bindingHandler);
|
|
1980
|
+
this.bindingHandler = null;
|
|
1981
|
+
}
|
|
1982
|
+
this.onChunkHandler = void 0;
|
|
1983
|
+
this.onDiagHandler = void 0;
|
|
1984
|
+
this.injected = false;
|
|
1985
|
+
}
|
|
1986
|
+
/**
|
|
1987
|
+
* Use CDP Runtime.queryObjects to find RTCPeerConnection instances
|
|
1988
|
+
* that were created before our override was injected, and tap their audio tracks.
|
|
1989
|
+
*/
|
|
1990
|
+
async discoverExistingPeerConnections() {
|
|
1991
|
+
try {
|
|
1992
|
+
const protoResult = await this.cdp.send("Runtime.evaluate", {
|
|
1993
|
+
expression: 'typeof RTCPeerConnection !== "undefined" ? RTCPeerConnection.prototype : null',
|
|
1994
|
+
returnByValue: false
|
|
1995
|
+
});
|
|
1996
|
+
const protoId = protoResult.result.objectId;
|
|
1997
|
+
if (!protoId) return;
|
|
1998
|
+
const queryResult = await this.cdp.send("Runtime.queryObjects", {
|
|
1999
|
+
prototypeObjectId: protoId
|
|
2000
|
+
});
|
|
2001
|
+
const arrayId = queryResult.objects.objectId;
|
|
2002
|
+
if (!arrayId) return;
|
|
2003
|
+
const propsResult = await this.cdp.send("Runtime.getProperties", {
|
|
2004
|
+
objectId: arrayId,
|
|
2005
|
+
ownProperties: true
|
|
2006
|
+
});
|
|
2007
|
+
let tapped = 0;
|
|
2008
|
+
for (const prop of propsResult.result) {
|
|
2009
|
+
if (prop.name === "length" || prop.name === "__proto__") continue;
|
|
2010
|
+
const pcObjectId = prop.value?.objectId;
|
|
2011
|
+
if (!pcObjectId) continue;
|
|
2012
|
+
await this.cdp.send("Runtime.callFunctionOn", {
|
|
2013
|
+
objectId: pcObjectId,
|
|
2014
|
+
functionDeclaration: "function() { if (window.__bpAudioOutput && window.__bpAudioOutput.tapPC) { return window.__bpAudioOutput.tapPC(this); } return false; }",
|
|
2015
|
+
returnByValue: true
|
|
243
2016
|
});
|
|
244
|
-
|
|
245
|
-
}
|
|
246
|
-
case "snapshot": {
|
|
247
|
-
const snapshot = await this.page.snapshot();
|
|
248
|
-
return { value: snapshot };
|
|
2017
|
+
tapped++;
|
|
249
2018
|
}
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
format: step.format,
|
|
253
|
-
quality: step.quality,
|
|
254
|
-
fullPage: step.fullPage
|
|
255
|
-
});
|
|
256
|
-
return { value: data };
|
|
2019
|
+
if (tapped > 0) {
|
|
2020
|
+
this.onDiagHandler?.(`retroactively discovered ${tapped} existing RTCPeerConnection(s)`);
|
|
257
2021
|
}
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
2022
|
+
await this.cdp.send("Runtime.releaseObject", { objectId: arrayId });
|
|
2023
|
+
await this.cdp.send("Runtime.releaseObject", { objectId: protoId });
|
|
2024
|
+
} catch {
|
|
2025
|
+
}
|
|
2026
|
+
}
|
|
2027
|
+
handleAudioData(payload) {
|
|
2028
|
+
try {
|
|
2029
|
+
const data = JSON.parse(payload);
|
|
2030
|
+
const leftBytes = base64ToBuffer(data.left);
|
|
2031
|
+
const rightBytes = base64ToBuffer(data.right);
|
|
2032
|
+
const chunk = {
|
|
2033
|
+
left: new Float32Array(leftBytes.buffer),
|
|
2034
|
+
right: new Float32Array(rightBytes.buffer),
|
|
2035
|
+
sampleRate: data.sampleRate,
|
|
2036
|
+
samples: data.samples,
|
|
2037
|
+
timestamp: Date.now()
|
|
2038
|
+
};
|
|
2039
|
+
this.chunks.push(chunk);
|
|
2040
|
+
if (this.onDiagHandler) {
|
|
2041
|
+
const rms = calculateRMS(chunk.left);
|
|
2042
|
+
const label = rms > 0.01 ? "audio" : "silence";
|
|
2043
|
+
this.onDiagHandler(`chunk: ${chunk.samples} samples, RMS=${rms.toFixed(4)} (${label})`);
|
|
263
2044
|
}
|
|
264
|
-
|
|
265
|
-
const
|
|
266
|
-
|
|
267
|
-
|
|
2045
|
+
if (this.firstChunkTime === null) {
|
|
2046
|
+
const rms = calculateRMS(chunk.left);
|
|
2047
|
+
if (rms > 1e-3) {
|
|
2048
|
+
this.firstChunkTime = Date.now();
|
|
2049
|
+
}
|
|
268
2050
|
}
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
2051
|
+
this.onChunkHandler?.(chunk);
|
|
2052
|
+
} catch {
|
|
2053
|
+
}
|
|
2054
|
+
}
|
|
2055
|
+
mergeChunks() {
|
|
2056
|
+
if (this.chunks.length === 0) {
|
|
2057
|
+
return emptyCaptureResult();
|
|
2058
|
+
}
|
|
2059
|
+
const byRate = /* @__PURE__ */ new Map();
|
|
2060
|
+
for (const chunk of this.chunks) {
|
|
2061
|
+
const rate = chunk.sampleRate;
|
|
2062
|
+
if (!byRate.has(rate)) byRate.set(rate, []);
|
|
2063
|
+
byRate.get(rate).push(chunk);
|
|
2064
|
+
}
|
|
2065
|
+
let bestRate = this.chunks[0].sampleRate;
|
|
2066
|
+
let bestNonSilentSamples = 0;
|
|
2067
|
+
for (const [rate, chunks] of byRate) {
|
|
2068
|
+
let nonSilentSamples = 0;
|
|
2069
|
+
for (const chunk of chunks) {
|
|
2070
|
+
const rms = calculateRMS(chunk.left);
|
|
2071
|
+
if (rms > 0.01) {
|
|
2072
|
+
nonSilentSamples += chunk.left.length;
|
|
2073
|
+
}
|
|
273
2074
|
}
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
2075
|
+
if (nonSilentSamples > bestNonSilentSamples) {
|
|
2076
|
+
bestNonSilentSamples = nonSilentSamples;
|
|
2077
|
+
bestRate = rate;
|
|
277
2078
|
}
|
|
278
|
-
default:
|
|
279
|
-
throw new Error(
|
|
280
|
-
`Unknown action: ${step.action}. Run 'bp actions' for available actions.`
|
|
281
|
-
);
|
|
282
2079
|
}
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
2080
|
+
const bestChunks = byRate.get(bestRate);
|
|
2081
|
+
let totalLen = 0;
|
|
2082
|
+
for (const chunk of bestChunks) {
|
|
2083
|
+
totalLen += chunk.left.length;
|
|
2084
|
+
}
|
|
2085
|
+
const left = new Float32Array(totalLen);
|
|
2086
|
+
const right = new Float32Array(totalLen);
|
|
2087
|
+
let offset = 0;
|
|
2088
|
+
for (const chunk of bestChunks) {
|
|
2089
|
+
left.set(chunk.left, offset);
|
|
2090
|
+
right.set(chunk.right, offset);
|
|
2091
|
+
offset += chunk.left.length;
|
|
2092
|
+
}
|
|
2093
|
+
if (byRate.size > 1) {
|
|
2094
|
+
this.onDiagHandler?.(
|
|
2095
|
+
`mergeChunks: ${byRate.size} sample rates detected, using ${bestRate}Hz (${bestNonSilentSamples} non-silent samples)`
|
|
2096
|
+
);
|
|
2097
|
+
}
|
|
2098
|
+
return {
|
|
2099
|
+
left,
|
|
2100
|
+
right,
|
|
2101
|
+
sampleRate: bestRate,
|
|
2102
|
+
durationMs: totalLen / bestRate * 1e3,
|
|
2103
|
+
chunkCount: bestChunks.length
|
|
2104
|
+
};
|
|
290
2105
|
}
|
|
291
2106
|
};
|
|
292
|
-
function
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
2107
|
+
function emptyCaptureResult() {
|
|
2108
|
+
return {
|
|
2109
|
+
left: new Float32Array(0),
|
|
2110
|
+
right: new Float32Array(0),
|
|
2111
|
+
sampleRate: 48e3,
|
|
2112
|
+
durationMs: 0,
|
|
2113
|
+
chunkCount: 0
|
|
2114
|
+
};
|
|
2115
|
+
}
|
|
2116
|
+
function sleep(ms) {
|
|
2117
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
2118
|
+
}
|
|
2119
|
+
|
|
2120
|
+
// src/audio/transcribe.ts
|
|
2121
|
+
async function transcribe(audio, options) {
|
|
2122
|
+
const apiKey = options?.apiKey ?? getEnvVar("OPENAI_API_KEY");
|
|
2123
|
+
if (!apiKey) {
|
|
2124
|
+
throw new Error(
|
|
2125
|
+
"OpenAI API key required for transcription. Set OPENAI_API_KEY environment variable or pass apiKey option."
|
|
2126
|
+
);
|
|
2127
|
+
}
|
|
2128
|
+
if (audio.left.length === 0) {
|
|
2129
|
+
return { text: "", audioDurationMs: 0, apiDurationMs: 0 };
|
|
2130
|
+
}
|
|
2131
|
+
const model = options?.model ?? "whisper-1";
|
|
2132
|
+
const responseFormat = options?.responseFormat ?? "text";
|
|
2133
|
+
const wavBuffer = pcmToWav({
|
|
2134
|
+
left: audio.left,
|
|
2135
|
+
right: audio.right.length > 0 ? audio.right : void 0,
|
|
2136
|
+
sampleRate: audio.sampleRate
|
|
2137
|
+
});
|
|
2138
|
+
const boundary = `----bpAudio${Date.now()}`;
|
|
2139
|
+
const parts = [];
|
|
2140
|
+
appendFormField(parts, boundary, "file", new Uint8Array(wavBuffer), "audio.wav", "audio/wav");
|
|
2141
|
+
appendFormTextField(parts, boundary, "model", model);
|
|
2142
|
+
appendFormTextField(parts, boundary, "response_format", responseFormat);
|
|
2143
|
+
if (options?.language) {
|
|
2144
|
+
appendFormTextField(parts, boundary, "language", options.language);
|
|
2145
|
+
}
|
|
2146
|
+
if (options?.prompt) {
|
|
2147
|
+
appendFormTextField(parts, boundary, "prompt", options.prompt);
|
|
2148
|
+
}
|
|
2149
|
+
const closing = new TextEncoder().encode(`\r
|
|
2150
|
+
--${boundary}--\r
|
|
2151
|
+
`);
|
|
2152
|
+
parts.push(closing);
|
|
2153
|
+
const totalLength = parts.reduce((sum, p) => sum + p.length, 0);
|
|
2154
|
+
const body = new Uint8Array(totalLength);
|
|
2155
|
+
let offset = 0;
|
|
2156
|
+
for (const part of parts) {
|
|
2157
|
+
body.set(part, offset);
|
|
2158
|
+
offset += part.length;
|
|
2159
|
+
}
|
|
2160
|
+
const start = Date.now();
|
|
2161
|
+
const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
|
|
2162
|
+
method: "POST",
|
|
2163
|
+
headers: {
|
|
2164
|
+
Authorization: `Bearer ${apiKey}`,
|
|
2165
|
+
"Content-Type": `multipart/form-data; boundary=${boundary}`
|
|
2166
|
+
},
|
|
2167
|
+
body
|
|
296
2168
|
});
|
|
2169
|
+
const apiDurationMs = Date.now() - start;
|
|
2170
|
+
if (!response.ok) {
|
|
2171
|
+
const errorBody = await response.text().catch(() => "");
|
|
2172
|
+
throw new Error(`Whisper API error (${response.status}): ${errorBody}`);
|
|
2173
|
+
}
|
|
2174
|
+
let text;
|
|
2175
|
+
if (responseFormat === "text") {
|
|
2176
|
+
text = (await response.text()).trim();
|
|
2177
|
+
} else {
|
|
2178
|
+
const json = await response.json();
|
|
2179
|
+
text = json.text ?? "";
|
|
2180
|
+
}
|
|
2181
|
+
return {
|
|
2182
|
+
text,
|
|
2183
|
+
audioDurationMs: audio.durationMs,
|
|
2184
|
+
apiDurationMs
|
|
2185
|
+
};
|
|
2186
|
+
}
|
|
2187
|
+
function isTranscriptionAvailable() {
|
|
2188
|
+
return !!getEnvVar("OPENAI_API_KEY");
|
|
2189
|
+
}
|
|
2190
|
+
function getEnvVar(name) {
|
|
2191
|
+
if (typeof globalThis.process !== "undefined" && globalThis.process.env) {
|
|
2192
|
+
return globalThis.process.env[name];
|
|
2193
|
+
}
|
|
2194
|
+
return void 0;
|
|
2195
|
+
}
|
|
2196
|
+
function appendFormTextField(parts, boundary, name, value) {
|
|
2197
|
+
const text = `\r
|
|
2198
|
+
--${boundary}\r
|
|
2199
|
+
Content-Disposition: form-data; name="${name}"\r
|
|
2200
|
+
\r
|
|
2201
|
+
${value}`;
|
|
2202
|
+
parts.push(new TextEncoder().encode(text));
|
|
2203
|
+
}
|
|
2204
|
+
function appendFormField(parts, boundary, name, data, filename, contentType) {
|
|
2205
|
+
const header = `\r
|
|
2206
|
+
--${boundary}\r
|
|
2207
|
+
Content-Disposition: form-data; name="${name}"; filename="${filename}"\r
|
|
2208
|
+
Content-Type: ${contentType}\r
|
|
2209
|
+
\r
|
|
2210
|
+
`;
|
|
2211
|
+
parts.push(new TextEncoder().encode(header));
|
|
2212
|
+
parts.push(data);
|
|
297
2213
|
}
|
|
298
2214
|
|
|
299
2215
|
// src/cdp/protocol.ts
|
|
@@ -991,7 +2907,7 @@ async function isElementAttached(cdp, selector, contextId) {
|
|
|
991
2907
|
const result = await cdp.send("Runtime.evaluate", params);
|
|
992
2908
|
return result.result.value === true;
|
|
993
2909
|
}
|
|
994
|
-
function
|
|
2910
|
+
function sleep2(ms) {
|
|
995
2911
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
996
2912
|
}
|
|
997
2913
|
async function waitForElement(cdp, selector, options = {}) {
|
|
@@ -1017,7 +2933,7 @@ async function waitForElement(cdp, selector, options = {}) {
|
|
|
1017
2933
|
if (conditionMet) {
|
|
1018
2934
|
return { success: true, waitedMs: Date.now() - startTime };
|
|
1019
2935
|
}
|
|
1020
|
-
await
|
|
2936
|
+
await sleep2(pollInterval);
|
|
1021
2937
|
}
|
|
1022
2938
|
return { success: false, waitedMs: Date.now() - startTime };
|
|
1023
2939
|
}
|
|
@@ -1046,7 +2962,7 @@ async function waitForAnyElement(cdp, selectors, options = {}) {
|
|
|
1046
2962
|
return { success: true, selector, waitedMs: Date.now() - startTime };
|
|
1047
2963
|
}
|
|
1048
2964
|
}
|
|
1049
|
-
await
|
|
2965
|
+
await sleep2(pollInterval);
|
|
1050
2966
|
}
|
|
1051
2967
|
return { success: false, waitedMs: Date.now() - startTime };
|
|
1052
2968
|
}
|
|
@@ -1095,7 +3011,7 @@ async function waitForNavigation(cdp, options = {}) {
|
|
|
1095
3011
|
}
|
|
1096
3012
|
const pollUrl = async () => {
|
|
1097
3013
|
while (!resolved && Date.now() < startTime + timeout) {
|
|
1098
|
-
await
|
|
3014
|
+
await sleep2(100);
|
|
1099
3015
|
if (resolved) return;
|
|
1100
3016
|
try {
|
|
1101
3017
|
const currentUrl = await getCurrentUrl(cdp);
|
|
@@ -1155,33 +3071,256 @@ async function waitForNetworkIdle(cdp, options = {}) {
|
|
|
1155
3071
|
});
|
|
1156
3072
|
}
|
|
1157
3073
|
|
|
1158
|
-
// src/browser/
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
3074
|
+
// src/browser/fuzzy-match.ts
|
|
3075
|
+
function jaroWinkler(a, b) {
|
|
3076
|
+
if (a.length === 0 && b.length === 0) return 0;
|
|
3077
|
+
if (a.length === 0 || b.length === 0) return 0;
|
|
3078
|
+
if (a === b) return 1;
|
|
3079
|
+
const s1 = a.toLowerCase();
|
|
3080
|
+
const s2 = b.toLowerCase();
|
|
3081
|
+
const matchWindow = Math.max(0, Math.floor(Math.max(s1.length, s2.length) / 2) - 1);
|
|
3082
|
+
const s1Matches = new Array(s1.length).fill(false);
|
|
3083
|
+
const s2Matches = new Array(s2.length).fill(false);
|
|
3084
|
+
let matches = 0;
|
|
3085
|
+
let transpositions = 0;
|
|
3086
|
+
for (let i = 0; i < s1.length; i++) {
|
|
3087
|
+
const start = Math.max(0, i - matchWindow);
|
|
3088
|
+
const end = Math.min(i + matchWindow + 1, s2.length);
|
|
3089
|
+
for (let j = start; j < end; j++) {
|
|
3090
|
+
if (s2Matches[j] || s1[i] !== s2[j]) continue;
|
|
3091
|
+
s1Matches[i] = true;
|
|
3092
|
+
s2Matches[j] = true;
|
|
3093
|
+
matches++;
|
|
3094
|
+
break;
|
|
3095
|
+
}
|
|
1166
3096
|
}
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
3097
|
+
if (matches === 0) return 0;
|
|
3098
|
+
let k = 0;
|
|
3099
|
+
for (let i = 0; i < s1.length; i++) {
|
|
3100
|
+
if (!s1Matches[i]) continue;
|
|
3101
|
+
while (!s2Matches[k]) k++;
|
|
3102
|
+
if (s1[i] !== s2[k]) transpositions++;
|
|
3103
|
+
k++;
|
|
3104
|
+
}
|
|
3105
|
+
const jaro = (matches / s1.length + matches / s2.length + (matches - transpositions / 2) / matches) / 3;
|
|
3106
|
+
let prefix = 0;
|
|
3107
|
+
for (let i = 0; i < Math.min(4, Math.min(s1.length, s2.length)); i++) {
|
|
3108
|
+
if (s1[i] === s2[i]) {
|
|
3109
|
+
prefix++;
|
|
3110
|
+
} else {
|
|
3111
|
+
break;
|
|
3112
|
+
}
|
|
1172
3113
|
}
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
3114
|
+
const WINKLER_SCALING = 0.1;
|
|
3115
|
+
return jaro + prefix * WINKLER_SCALING * (1 - jaro);
|
|
3116
|
+
}
|
|
3117
|
+
function stringSimilarity(a, b) {
|
|
3118
|
+
if (a.length === 0 || b.length === 0) return 0;
|
|
3119
|
+
const lowerA = a.toLowerCase();
|
|
3120
|
+
const lowerB = b.toLowerCase();
|
|
3121
|
+
if (lowerA === lowerB) return 1;
|
|
3122
|
+
const jw = jaroWinkler(a, b);
|
|
3123
|
+
let containsBonus = 0;
|
|
3124
|
+
if (lowerB.includes(lowerA)) {
|
|
3125
|
+
containsBonus = 0.2;
|
|
3126
|
+
} else if (lowerA.includes(lowerB)) {
|
|
3127
|
+
containsBonus = 0.1;
|
|
3128
|
+
}
|
|
3129
|
+
return Math.min(1, jw + containsBonus);
|
|
3130
|
+
}
|
|
3131
|
+
function scoreElement(query, element) {
|
|
3132
|
+
const lowerQuery = query.toLowerCase();
|
|
3133
|
+
const words = lowerQuery.split(/\s+/).filter((w) => w.length > 0);
|
|
3134
|
+
let nameScore = 0;
|
|
3135
|
+
if (element.name) {
|
|
3136
|
+
const lowerName = element.name.toLowerCase();
|
|
3137
|
+
if (lowerName === lowerQuery) {
|
|
3138
|
+
nameScore = 1;
|
|
3139
|
+
} else if (lowerName.includes(lowerQuery)) {
|
|
3140
|
+
nameScore = 0.8;
|
|
3141
|
+
} else if (words.length > 0) {
|
|
3142
|
+
const matchedWords = words.filter((w) => lowerName.includes(w));
|
|
3143
|
+
nameScore = matchedWords.length / words.length * 0.7;
|
|
3144
|
+
} else {
|
|
3145
|
+
nameScore = stringSimilarity(query, element.name) * 0.6;
|
|
3146
|
+
}
|
|
3147
|
+
}
|
|
3148
|
+
let roleScore = 0;
|
|
3149
|
+
const lowerRole = element.role.toLowerCase();
|
|
3150
|
+
if (lowerRole === lowerQuery || lowerQuery.includes(lowerRole)) {
|
|
3151
|
+
roleScore = 0.3;
|
|
3152
|
+
} else if (words.some((w) => lowerRole.includes(w))) {
|
|
3153
|
+
roleScore = 0.2;
|
|
3154
|
+
}
|
|
3155
|
+
let selectorScore = 0;
|
|
3156
|
+
const lowerSelector = element.selector.toLowerCase();
|
|
3157
|
+
if (words.some((w) => lowerSelector.includes(w))) {
|
|
3158
|
+
selectorScore = 0.2;
|
|
3159
|
+
}
|
|
3160
|
+
const totalScore = nameScore * 0.6 + roleScore * 0.25 + selectorScore * 0.15;
|
|
3161
|
+
return totalScore;
|
|
3162
|
+
}
|
|
3163
|
+
function explainMatch(query, element, score) {
|
|
3164
|
+
const reasons = [];
|
|
3165
|
+
const lowerQuery = query.toLowerCase();
|
|
3166
|
+
const words = lowerQuery.split(/\s+/).filter((w) => w.length > 0);
|
|
3167
|
+
if (element.name) {
|
|
3168
|
+
const lowerName = element.name.toLowerCase();
|
|
3169
|
+
if (lowerName === lowerQuery) {
|
|
3170
|
+
reasons.push("exact name match");
|
|
3171
|
+
} else if (lowerName.includes(lowerQuery)) {
|
|
3172
|
+
reasons.push("name contains query");
|
|
3173
|
+
} else if (words.some((w) => lowerName.includes(w))) {
|
|
3174
|
+
const matchedWords = words.filter((w) => lowerName.includes(w));
|
|
3175
|
+
reasons.push(`name contains: ${matchedWords.join(", ")}`);
|
|
3176
|
+
} else if (stringSimilarity(query, element.name) > 0.5) {
|
|
3177
|
+
reasons.push("similar name");
|
|
3178
|
+
}
|
|
3179
|
+
}
|
|
3180
|
+
const lowerRole = element.role.toLowerCase();
|
|
3181
|
+
if (lowerRole === lowerQuery || words.some((w) => w === lowerRole)) {
|
|
3182
|
+
reasons.push(`role: ${element.role}`);
|
|
1178
3183
|
}
|
|
3184
|
+
if (words.some((w) => element.selector.toLowerCase().includes(w))) {
|
|
3185
|
+
reasons.push("selector match");
|
|
3186
|
+
}
|
|
3187
|
+
if (reasons.length === 0) {
|
|
3188
|
+
reasons.push(`fuzzy match (score: ${score.toFixed(2)})`);
|
|
3189
|
+
}
|
|
3190
|
+
return reasons.join(", ");
|
|
3191
|
+
}
|
|
3192
|
+
function fuzzyMatchElements(query, elements, maxResults = 5) {
|
|
3193
|
+
if (!query || query.length === 0) {
|
|
3194
|
+
return [];
|
|
3195
|
+
}
|
|
3196
|
+
const THRESHOLD = 0.3;
|
|
3197
|
+
const scored = elements.map((element) => ({
|
|
3198
|
+
element,
|
|
3199
|
+
score: scoreElement(query, element)
|
|
3200
|
+
}));
|
|
3201
|
+
return scored.filter((s) => s.score >= THRESHOLD).sort((a, b) => b.score - a.score).slice(0, maxResults).map((s) => ({
|
|
3202
|
+
element: s.element,
|
|
3203
|
+
score: s.score,
|
|
3204
|
+
matchReason: explainMatch(query, s.element, s.score)
|
|
3205
|
+
}));
|
|
3206
|
+
}
|
|
3207
|
+
|
|
3208
|
+
// src/browser/hint-generator.ts
|
|
3209
|
+
var ACTION_ROLE_MAP = {
|
|
3210
|
+
click: ["button", "link", "menuitem", "menuitemcheckbox", "menuitemradio", "tab", "option"],
|
|
3211
|
+
fill: ["textbox", "searchbox", "textarea"],
|
|
3212
|
+
type: ["textbox", "searchbox", "textarea"],
|
|
3213
|
+
submit: ["button", "form"],
|
|
3214
|
+
select: ["combobox", "listbox", "option"],
|
|
3215
|
+
check: ["checkbox", "radio", "switch"],
|
|
3216
|
+
uncheck: ["checkbox", "switch"],
|
|
3217
|
+
focus: [],
|
|
3218
|
+
// Any focusable element
|
|
3219
|
+
hover: [],
|
|
3220
|
+
// Any element
|
|
3221
|
+
clear: ["textbox", "searchbox", "textarea"]
|
|
1179
3222
|
};
|
|
3223
|
+
function extractIntent(selectors) {
|
|
3224
|
+
const patterns = [];
|
|
3225
|
+
let text = "";
|
|
3226
|
+
for (const selector of selectors) {
|
|
3227
|
+
if (selector.startsWith("ref:")) {
|
|
3228
|
+
continue;
|
|
3229
|
+
}
|
|
3230
|
+
const idMatch = selector.match(/#([a-zA-Z0-9_-]+)/);
|
|
3231
|
+
if (idMatch) {
|
|
3232
|
+
patterns.push(idMatch[1]);
|
|
3233
|
+
}
|
|
3234
|
+
const ariaMatch = selector.match(/\[aria-label=["']([^"']+)["']\]/);
|
|
3235
|
+
if (ariaMatch) {
|
|
3236
|
+
patterns.push(ariaMatch[1]);
|
|
3237
|
+
}
|
|
3238
|
+
const testidMatch = selector.match(/\[data-testid=["']([^"']+)["']\]/);
|
|
3239
|
+
if (testidMatch) {
|
|
3240
|
+
patterns.push(testidMatch[1]);
|
|
3241
|
+
}
|
|
3242
|
+
const classMatch = selector.match(/\.([a-zA-Z0-9_-]+)/);
|
|
3243
|
+
if (classMatch) {
|
|
3244
|
+
patterns.push(classMatch[1]);
|
|
3245
|
+
}
|
|
3246
|
+
}
|
|
3247
|
+
patterns.sort((a, b) => b.length - a.length);
|
|
3248
|
+
text = patterns[0] ?? selectors[0] ?? "";
|
|
3249
|
+
return { text, patterns };
|
|
3250
|
+
}
|
|
3251
|
+
function getHintType(selector) {
|
|
3252
|
+
if (selector.startsWith("ref:")) return "ref";
|
|
3253
|
+
if (selector.includes("data-testid")) return "testid";
|
|
3254
|
+
if (selector.includes("aria-label")) return "aria";
|
|
3255
|
+
if (selector.startsWith("#")) return "id";
|
|
3256
|
+
return "css";
|
|
3257
|
+
}
|
|
3258
|
+
function getConfidence(score) {
|
|
3259
|
+
if (score >= 0.8) return "high";
|
|
3260
|
+
if (score >= 0.5) return "medium";
|
|
3261
|
+
return "low";
|
|
3262
|
+
}
|
|
3263
|
+
function diversifyHints(candidates, maxHints) {
|
|
3264
|
+
const hints = [];
|
|
3265
|
+
const usedTypes = /* @__PURE__ */ new Set();
|
|
3266
|
+
for (const candidate of candidates) {
|
|
3267
|
+
if (hints.length >= maxHints) break;
|
|
3268
|
+
const refSelector = `ref:${candidate.element.ref}`;
|
|
3269
|
+
const hintType = getHintType(refSelector);
|
|
3270
|
+
if (!usedTypes.has(hintType)) {
|
|
3271
|
+
hints.push({
|
|
3272
|
+
selector: refSelector,
|
|
3273
|
+
reason: candidate.matchReason,
|
|
3274
|
+
confidence: getConfidence(candidate.score),
|
|
3275
|
+
element: {
|
|
3276
|
+
ref: candidate.element.ref,
|
|
3277
|
+
role: candidate.element.role,
|
|
3278
|
+
name: candidate.element.name,
|
|
3279
|
+
disabled: candidate.element.disabled
|
|
3280
|
+
}
|
|
3281
|
+
});
|
|
3282
|
+
usedTypes.add(hintType);
|
|
3283
|
+
} else if (hints.length < maxHints) {
|
|
3284
|
+
hints.push({
|
|
3285
|
+
selector: refSelector,
|
|
3286
|
+
reason: candidate.matchReason,
|
|
3287
|
+
confidence: getConfidence(candidate.score),
|
|
3288
|
+
element: {
|
|
3289
|
+
ref: candidate.element.ref,
|
|
3290
|
+
role: candidate.element.role,
|
|
3291
|
+
name: candidate.element.name,
|
|
3292
|
+
disabled: candidate.element.disabled
|
|
3293
|
+
}
|
|
3294
|
+
});
|
|
3295
|
+
}
|
|
3296
|
+
}
|
|
3297
|
+
return hints;
|
|
3298
|
+
}
|
|
3299
|
+
async function generateHints(page, failedSelectors, actionType, maxHints = 3) {
|
|
3300
|
+
let snapshot;
|
|
3301
|
+
try {
|
|
3302
|
+
snapshot = await page.snapshot();
|
|
3303
|
+
} catch {
|
|
3304
|
+
return [];
|
|
3305
|
+
}
|
|
3306
|
+
const intent = extractIntent(failedSelectors);
|
|
3307
|
+
const roleFilter = ACTION_ROLE_MAP[actionType] ?? [];
|
|
3308
|
+
let candidates = snapshot.interactiveElements;
|
|
3309
|
+
if (roleFilter.length > 0) {
|
|
3310
|
+
candidates = candidates.filter((el) => roleFilter.includes(el.role));
|
|
3311
|
+
}
|
|
3312
|
+
const matches = fuzzyMatchElements(intent.text, candidates, maxHints * 2);
|
|
3313
|
+
if (matches.length === 0) {
|
|
3314
|
+
return [];
|
|
3315
|
+
}
|
|
3316
|
+
return diversifyHints(matches, maxHints);
|
|
3317
|
+
}
|
|
1180
3318
|
|
|
1181
3319
|
// src/browser/page.ts
|
|
1182
3320
|
var DEFAULT_TIMEOUT2 = 3e4;
|
|
1183
3321
|
var Page = class {
|
|
1184
3322
|
cdp;
|
|
3323
|
+
_targetId;
|
|
1185
3324
|
rootNodeId = null;
|
|
1186
3325
|
batchExecutor;
|
|
1187
3326
|
emulationState = {};
|
|
@@ -1200,10 +3339,23 @@ var Page = class {
|
|
|
1200
3339
|
frameExecutionContexts = /* @__PURE__ */ new Map();
|
|
1201
3340
|
/** Current frame's execution context ID (null = main frame default) */
|
|
1202
3341
|
currentFrameContextId = null;
|
|
1203
|
-
|
|
3342
|
+
/** Last matched selector from findElement (for selectorUsed tracking) */
|
|
3343
|
+
_lastMatchedSelector;
|
|
3344
|
+
/** Audio input controller (lazy-initialized) */
|
|
3345
|
+
_audioInput;
|
|
3346
|
+
/** Audio output controller (lazy-initialized) */
|
|
3347
|
+
_audioOutput;
|
|
3348
|
+
constructor(cdp, targetId) {
|
|
1204
3349
|
this.cdp = cdp;
|
|
3350
|
+
this._targetId = targetId;
|
|
1205
3351
|
this.batchExecutor = new BatchExecutor(this);
|
|
1206
3352
|
}
|
|
3353
|
+
/**
|
|
3354
|
+
* Get the CDP target ID for this page
|
|
3355
|
+
*/
|
|
3356
|
+
get targetId() {
|
|
3357
|
+
return this._targetId;
|
|
3358
|
+
}
|
|
1207
3359
|
/**
|
|
1208
3360
|
* Get the underlying CDP client for advanced operations.
|
|
1209
3361
|
* Use with caution - prefer high-level Page methods when possible.
|
|
@@ -1211,6 +3363,13 @@ var Page = class {
|
|
|
1211
3363
|
get cdpClient() {
|
|
1212
3364
|
return this.cdp;
|
|
1213
3365
|
}
|
|
3366
|
+
/**
|
|
3367
|
+
* Get the last matched selector from findElement (for selectorUsed tracking).
|
|
3368
|
+
* Returns undefined if no selector has been matched yet.
|
|
3369
|
+
*/
|
|
3370
|
+
getLastMatchedSelector() {
|
|
3371
|
+
return this._lastMatchedSelector;
|
|
3372
|
+
}
|
|
1214
3373
|
/**
|
|
1215
3374
|
* Initialize the page (enable required CDP domains)
|
|
1216
3375
|
*/
|
|
@@ -1330,7 +3489,9 @@ var Page = class {
|
|
|
1330
3489
|
const element = await this.findElement(selector, options);
|
|
1331
3490
|
if (!element) {
|
|
1332
3491
|
if (options.optional) return false;
|
|
1333
|
-
|
|
3492
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
3493
|
+
const hints = await generateHints(this, selectorList, "click");
|
|
3494
|
+
throw new ElementNotFoundError(selector, hints);
|
|
1334
3495
|
}
|
|
1335
3496
|
await this.scrollIntoView(element.nodeId);
|
|
1336
3497
|
const submitResult = await this.evaluateInFrame(
|
|
@@ -1366,7 +3527,9 @@ var Page = class {
|
|
|
1366
3527
|
const element = await this.findElement(selector, options);
|
|
1367
3528
|
if (!element) {
|
|
1368
3529
|
if (options.optional) return false;
|
|
1369
|
-
|
|
3530
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
3531
|
+
const hints = await generateHints(this, selectorList, "fill");
|
|
3532
|
+
throw new ElementNotFoundError(selector, hints);
|
|
1370
3533
|
}
|
|
1371
3534
|
await this.cdp.send("DOM.focus", { nodeId: element.nodeId });
|
|
1372
3535
|
if (clear) {
|
|
@@ -1429,7 +3592,7 @@ var Page = class {
|
|
|
1429
3592
|
key: char
|
|
1430
3593
|
});
|
|
1431
3594
|
if (delay > 0) {
|
|
1432
|
-
await
|
|
3595
|
+
await sleep3(delay);
|
|
1433
3596
|
}
|
|
1434
3597
|
}
|
|
1435
3598
|
return true;
|
|
@@ -1444,7 +3607,9 @@ var Page = class {
|
|
|
1444
3607
|
const element = await this.findElement(selector, options);
|
|
1445
3608
|
if (!element) {
|
|
1446
3609
|
if (options.optional) return false;
|
|
1447
|
-
|
|
3610
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
3611
|
+
const hints = await generateHints(this, selectorList, "select");
|
|
3612
|
+
throw new ElementNotFoundError(selector, hints);
|
|
1448
3613
|
}
|
|
1449
3614
|
const values = Array.isArray(value) ? value : [value];
|
|
1450
3615
|
await this.cdp.send("Runtime.evaluate", {
|
|
@@ -1468,7 +3633,7 @@ var Page = class {
|
|
|
1468
3633
|
async selectCustom(config, options = {}) {
|
|
1469
3634
|
const { trigger, option, value, match = "text" } = config;
|
|
1470
3635
|
await this.click(trigger, options);
|
|
1471
|
-
await
|
|
3636
|
+
await sleep3(100);
|
|
1472
3637
|
let optionSelector;
|
|
1473
3638
|
const optionSelectors = Array.isArray(option) ? option : [option];
|
|
1474
3639
|
if (match === "contains") {
|
|
@@ -1505,7 +3670,9 @@ var Page = class {
|
|
|
1505
3670
|
const element = await this.findElement(selector, options);
|
|
1506
3671
|
if (!element) {
|
|
1507
3672
|
if (options.optional) return false;
|
|
1508
|
-
|
|
3673
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
3674
|
+
const hints = await generateHints(this, selectorList, "check");
|
|
3675
|
+
throw new ElementNotFoundError(selector, hints);
|
|
1509
3676
|
}
|
|
1510
3677
|
const result = await this.cdp.send("Runtime.evaluate", {
|
|
1511
3678
|
expression: `(() => {
|
|
@@ -1525,7 +3692,9 @@ var Page = class {
|
|
|
1525
3692
|
const element = await this.findElement(selector, options);
|
|
1526
3693
|
if (!element) {
|
|
1527
3694
|
if (options.optional) return false;
|
|
1528
|
-
|
|
3695
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
3696
|
+
const hints = await generateHints(this, selectorList, "uncheck");
|
|
3697
|
+
throw new ElementNotFoundError(selector, hints);
|
|
1529
3698
|
}
|
|
1530
3699
|
const result = await this.cdp.send("Runtime.evaluate", {
|
|
1531
3700
|
expression: `(() => {
|
|
@@ -1545,13 +3714,40 @@ var Page = class {
|
|
|
1545
3714
|
* - 'auto' (default): Attempt to detect navigation for 1 second, then assume client-side handling
|
|
1546
3715
|
* - true: Wait for full navigation (traditional forms)
|
|
1547
3716
|
* - false: Return immediately (AJAX forms where you'll wait for something else)
|
|
3717
|
+
*
|
|
3718
|
+
* When targeting a <form> element directly, uses form.requestSubmit() which fires
|
|
3719
|
+
* the submit event and triggers HTML5 validation.
|
|
1548
3720
|
*/
|
|
1549
3721
|
async submit(selector, options = {}) {
|
|
1550
3722
|
const { method = "enter+click", waitForNavigation: shouldWait = "auto" } = options;
|
|
1551
3723
|
const element = await this.findElement(selector, options);
|
|
1552
3724
|
if (!element) {
|
|
1553
3725
|
if (options.optional) return false;
|
|
1554
|
-
|
|
3726
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
3727
|
+
const hints = await generateHints(this, selectorList, "submit");
|
|
3728
|
+
throw new ElementNotFoundError(selector, hints);
|
|
3729
|
+
}
|
|
3730
|
+
const isFormElement = await this.evaluateInFrame(
|
|
3731
|
+
`(() => {
|
|
3732
|
+
const el = document.querySelector(${JSON.stringify(element.selector)});
|
|
3733
|
+
return el instanceof HTMLFormElement;
|
|
3734
|
+
})()`
|
|
3735
|
+
);
|
|
3736
|
+
if (isFormElement.result.value) {
|
|
3737
|
+
await this.evaluateInFrame(
|
|
3738
|
+
`(() => {
|
|
3739
|
+
const form = document.querySelector(${JSON.stringify(element.selector)});
|
|
3740
|
+
if (form && form instanceof HTMLFormElement) {
|
|
3741
|
+
form.requestSubmit();
|
|
3742
|
+
}
|
|
3743
|
+
})()`
|
|
3744
|
+
);
|
|
3745
|
+
if (shouldWait === true) {
|
|
3746
|
+
await this.waitForNavigation({ timeout: options.timeout ?? DEFAULT_TIMEOUT2 });
|
|
3747
|
+
} else if (shouldWait === "auto") {
|
|
3748
|
+
await Promise.race([this.waitForNavigation({ timeout: 1e3, optional: true }), sleep3(500)]);
|
|
3749
|
+
}
|
|
3750
|
+
return true;
|
|
1555
3751
|
}
|
|
1556
3752
|
await this.cdp.send("DOM.focus", { nodeId: element.nodeId });
|
|
1557
3753
|
if (method.includes("enter")) {
|
|
@@ -1567,7 +3763,7 @@ var Page = class {
|
|
|
1567
3763
|
this.waitForNavigation({ timeout: 1e3, optional: true }).then(
|
|
1568
3764
|
(success) => success ? "nav" : null
|
|
1569
3765
|
),
|
|
1570
|
-
|
|
3766
|
+
sleep3(500).then(() => "timeout")
|
|
1571
3767
|
]);
|
|
1572
3768
|
if (navigationDetected === "nav") {
|
|
1573
3769
|
return true;
|
|
@@ -1581,7 +3777,7 @@ var Page = class {
|
|
|
1581
3777
|
if (shouldWait === true) {
|
|
1582
3778
|
await this.waitForNavigation({ timeout: options.timeout ?? DEFAULT_TIMEOUT2 });
|
|
1583
3779
|
} else if (shouldWait === "auto") {
|
|
1584
|
-
await
|
|
3780
|
+
await sleep3(100);
|
|
1585
3781
|
}
|
|
1586
3782
|
}
|
|
1587
3783
|
return true;
|
|
@@ -1622,7 +3818,9 @@ var Page = class {
|
|
|
1622
3818
|
const element = await this.findElement(selector, options);
|
|
1623
3819
|
if (!element) {
|
|
1624
3820
|
if (options.optional) return false;
|
|
1625
|
-
|
|
3821
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
3822
|
+
const hints = await generateHints(this, selectorList, "focus");
|
|
3823
|
+
throw new ElementNotFoundError(selector, hints);
|
|
1626
3824
|
}
|
|
1627
3825
|
await this.cdp.send("DOM.focus", { nodeId: element.nodeId });
|
|
1628
3826
|
return true;
|
|
@@ -1635,7 +3833,9 @@ var Page = class {
|
|
|
1635
3833
|
const element = await this.findElement(selector, options);
|
|
1636
3834
|
if (!element) {
|
|
1637
3835
|
if (options.optional) return false;
|
|
1638
|
-
|
|
3836
|
+
const selectorList = Array.isArray(selector) ? selector : [selector];
|
|
3837
|
+
const hints = await generateHints(this, selectorList, "hover");
|
|
3838
|
+
throw new ElementNotFoundError(selector, hints);
|
|
1639
3839
|
}
|
|
1640
3840
|
await this.scrollIntoView(element.nodeId);
|
|
1641
3841
|
const box = await this.getBoxModel(element.nodeId);
|
|
@@ -2574,7 +4774,7 @@ var Page = class {
|
|
|
2574
4774
|
lastError = e;
|
|
2575
4775
|
if (attempt < retries) {
|
|
2576
4776
|
this.rootNodeId = null;
|
|
2577
|
-
await
|
|
4777
|
+
await sleep3(delay);
|
|
2578
4778
|
continue;
|
|
2579
4779
|
}
|
|
2580
4780
|
}
|
|
@@ -2590,6 +4790,7 @@ var Page = class {
|
|
|
2590
4790
|
async findElement(selectors, options = {}) {
|
|
2591
4791
|
const { timeout = DEFAULT_TIMEOUT2 } = options;
|
|
2592
4792
|
const selectorList = Array.isArray(selectors) ? selectors : [selectors];
|
|
4793
|
+
this._lastMatchedSelector = void 0;
|
|
2593
4794
|
for (const selector of selectorList) {
|
|
2594
4795
|
if (selector.startsWith("ref:")) {
|
|
2595
4796
|
const ref = selector.slice(4);
|
|
@@ -2606,6 +4807,7 @@ var Page = class {
|
|
|
2606
4807
|
}
|
|
2607
4808
|
);
|
|
2608
4809
|
if (pushResult.nodeIds?.[0]) {
|
|
4810
|
+
this._lastMatchedSelector = selector;
|
|
2609
4811
|
return {
|
|
2610
4812
|
nodeId: pushResult.nodeIds[0],
|
|
2611
4813
|
backendNodeId,
|
|
@@ -2639,6 +4841,7 @@ var Page = class {
|
|
|
2639
4841
|
"DOM.describeNode",
|
|
2640
4842
|
{ nodeId: queryResult.nodeId }
|
|
2641
4843
|
);
|
|
4844
|
+
this._lastMatchedSelector = result.selector;
|
|
2642
4845
|
return {
|
|
2643
4846
|
nodeId: queryResult.nodeId,
|
|
2644
4847
|
backendNodeId: describeResult2.node.backendNodeId,
|
|
@@ -2666,6 +4869,7 @@ var Page = class {
|
|
|
2666
4869
|
"DOM.describeNode",
|
|
2667
4870
|
{ nodeId: nodeResult.nodeId }
|
|
2668
4871
|
);
|
|
4872
|
+
this._lastMatchedSelector = result.selector;
|
|
2669
4873
|
return {
|
|
2670
4874
|
nodeId: nodeResult.nodeId,
|
|
2671
4875
|
backendNodeId: describeResult.node.backendNodeId,
|
|
@@ -2742,8 +4946,107 @@ var Page = class {
|
|
|
2742
4946
|
clickCount: 1
|
|
2743
4947
|
});
|
|
2744
4948
|
}
|
|
4949
|
+
// ============ Audio I/O ============
|
|
4950
|
+
/**
|
|
4951
|
+
* Audio input controller (fake microphone).
|
|
4952
|
+
* Lazy-initialized on first access.
|
|
4953
|
+
*/
|
|
4954
|
+
get audioInput() {
|
|
4955
|
+
if (!this._audioInput) {
|
|
4956
|
+
this._audioInput = new AudioInput(this.cdp);
|
|
4957
|
+
}
|
|
4958
|
+
return this._audioInput;
|
|
4959
|
+
}
|
|
4960
|
+
/**
|
|
4961
|
+
* Audio output capture controller.
|
|
4962
|
+
* Lazy-initialized on first access.
|
|
4963
|
+
*/
|
|
4964
|
+
get audioOutput() {
|
|
4965
|
+
if (!this._audioOutput) {
|
|
4966
|
+
this._audioOutput = new AudioOutput(this.cdp);
|
|
4967
|
+
}
|
|
4968
|
+
return this._audioOutput;
|
|
4969
|
+
}
|
|
4970
|
+
/**
|
|
4971
|
+
* Set up both audio input (fake microphone) and output (capture).
|
|
4972
|
+
* Must be called before navigating to the page that will use audio.
|
|
4973
|
+
*/
|
|
4974
|
+
async setupAudio() {
|
|
4975
|
+
try {
|
|
4976
|
+
await this.cdp.send("Input.dispatchMouseEvent", {
|
|
4977
|
+
type: "mousePressed",
|
|
4978
|
+
x: 0,
|
|
4979
|
+
y: 0,
|
|
4980
|
+
button: "left",
|
|
4981
|
+
clickCount: 1
|
|
4982
|
+
});
|
|
4983
|
+
await this.cdp.send("Input.dispatchMouseEvent", {
|
|
4984
|
+
type: "mouseReleased",
|
|
4985
|
+
x: 0,
|
|
4986
|
+
y: 0,
|
|
4987
|
+
button: "left",
|
|
4988
|
+
clickCount: 1
|
|
4989
|
+
});
|
|
4990
|
+
} catch {
|
|
4991
|
+
}
|
|
4992
|
+
await this.audioInput.setup();
|
|
4993
|
+
await this.audioOutput.setup();
|
|
4994
|
+
}
|
|
4995
|
+
/**
|
|
4996
|
+
* Full audio round-trip: feed input audio, capture the response.
|
|
4997
|
+
*
|
|
4998
|
+
* 1. Starts capturing output
|
|
4999
|
+
* 2. Feeds input audio as microphone data
|
|
5000
|
+
* 3. Waits for the page to respond and then go silent
|
|
5001
|
+
* 4. Returns the captured response audio with latency metrics
|
|
5002
|
+
*
|
|
5003
|
+
* @example
|
|
5004
|
+
* ```typescript
|
|
5005
|
+
* await page.setupAudio();
|
|
5006
|
+
* await page.goto('https://voice-agent.example.com');
|
|
5007
|
+
* const result = await page.audioRoundTrip({
|
|
5008
|
+
* input: wavFileBytes,
|
|
5009
|
+
* silenceTimeout: 3000,
|
|
5010
|
+
* });
|
|
5011
|
+
* console.log(`Response: ${result.audio.durationMs}ms, latency: ${result.latencyMs}ms`);
|
|
5012
|
+
* ```
|
|
5013
|
+
*/
|
|
5014
|
+
async audioRoundTrip(options) {
|
|
5015
|
+
if (!this.audioInput.isSetup || !this.audioOutput.isSetup) {
|
|
5016
|
+
await this.setupAudio();
|
|
5017
|
+
}
|
|
5018
|
+
const start = Date.now();
|
|
5019
|
+
await this.audioOutput.start();
|
|
5020
|
+
if (options.preDelay && options.preDelay > 0) {
|
|
5021
|
+
await sleep3(options.preDelay);
|
|
5022
|
+
}
|
|
5023
|
+
const inputDone = this.audioInput.play(options.input, {
|
|
5024
|
+
waitForEnd: !!options.sendSelector
|
|
5025
|
+
});
|
|
5026
|
+
if (options.sendSelector) {
|
|
5027
|
+
await inputDone.catch(() => {
|
|
5028
|
+
});
|
|
5029
|
+
await this.click(options.sendSelector);
|
|
5030
|
+
}
|
|
5031
|
+
const audio = await this.audioOutput.captureUntilSilence({
|
|
5032
|
+
silenceTimeout: options.silenceTimeout ?? 1500,
|
|
5033
|
+
silenceThreshold: options.silenceThreshold ?? 0.01,
|
|
5034
|
+
maxDuration: options.timeout ?? 12e4
|
|
5035
|
+
});
|
|
5036
|
+
await this.audioInput.stop();
|
|
5037
|
+
if (!options.sendSelector) {
|
|
5038
|
+
await inputDone.catch(() => {
|
|
5039
|
+
});
|
|
5040
|
+
}
|
|
5041
|
+
const firstChunkTime = this.audioOutput.firstChunkTime;
|
|
5042
|
+
return {
|
|
5043
|
+
audio,
|
|
5044
|
+
latencyMs: firstChunkTime !== null ? firstChunkTime - start : -1,
|
|
5045
|
+
totalMs: Date.now() - start
|
|
5046
|
+
};
|
|
5047
|
+
}
|
|
2745
5048
|
};
|
|
2746
|
-
function
|
|
5049
|
+
function sleep3(ms) {
|
|
2747
5050
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
2748
5051
|
}
|
|
2749
5052
|
|
|
@@ -2772,14 +5075,24 @@ var Browser = class _Browser {
|
|
|
2772
5075
|
* Get or create a page by name
|
|
2773
5076
|
* If no name is provided, returns the first available page or creates a new one
|
|
2774
5077
|
*/
|
|
2775
|
-
async page(name) {
|
|
5078
|
+
async page(name, options) {
|
|
2776
5079
|
const pageName = name ?? "default";
|
|
2777
5080
|
const cached = this.pages.get(pageName);
|
|
2778
5081
|
if (cached) return cached;
|
|
2779
5082
|
const targets = await this.cdp.send("Target.getTargets");
|
|
2780
5083
|
const pageTargets = targets.targetInfos.filter((t) => t.type === "page");
|
|
2781
5084
|
let targetId;
|
|
2782
|
-
if (
|
|
5085
|
+
if (options?.targetId) {
|
|
5086
|
+
const targetExists = pageTargets.some((t) => t.targetId === options.targetId);
|
|
5087
|
+
if (targetExists) {
|
|
5088
|
+
targetId = options.targetId;
|
|
5089
|
+
} else {
|
|
5090
|
+
console.warn(`[browser-pilot] Target ${options.targetId} no longer exists, falling back`);
|
|
5091
|
+
targetId = pageTargets.length > 0 ? pageTargets[0].targetId : (await this.cdp.send("Target.createTarget", {
|
|
5092
|
+
url: "about:blank"
|
|
5093
|
+
})).targetId;
|
|
5094
|
+
}
|
|
5095
|
+
} else if (pageTargets.length > 0) {
|
|
2783
5096
|
targetId = pageTargets[0].targetId;
|
|
2784
5097
|
} else {
|
|
2785
5098
|
const result = await this.cdp.send("Target.createTarget", {
|
|
@@ -2788,7 +5101,7 @@ var Browser = class _Browser {
|
|
|
2788
5101
|
targetId = result.targetId;
|
|
2789
5102
|
}
|
|
2790
5103
|
await this.cdp.attachToTarget(targetId);
|
|
2791
|
-
const page = new Page(this.cdp);
|
|
5104
|
+
const page = new Page(this.cdp, targetId);
|
|
2792
5105
|
await page.init();
|
|
2793
5106
|
this.pages.set(pageName, page);
|
|
2794
5107
|
return page;
|
|
@@ -2801,7 +5114,7 @@ var Browser = class _Browser {
|
|
|
2801
5114
|
url
|
|
2802
5115
|
});
|
|
2803
5116
|
await this.cdp.attachToTarget(result.targetId);
|
|
2804
|
-
const page = new Page(this.cdp);
|
|
5117
|
+
const page = new Page(this.cdp, result.targetId);
|
|
2805
5118
|
await page.init();
|
|
2806
5119
|
const name = `page-${this.pages.size + 1}`;
|
|
2807
5120
|
this.pages.set(name, page);
|
|
@@ -3092,6 +5405,8 @@ function disableTracing() {
|
|
|
3092
5405
|
}
|
|
3093
5406
|
// Annotate the CommonJS export names for ESM import in node:
|
|
3094
5407
|
0 && (module.exports = {
|
|
5408
|
+
AudioInput,
|
|
5409
|
+
AudioOutput,
|
|
3095
5410
|
BatchExecutor,
|
|
3096
5411
|
Browser,
|
|
3097
5412
|
BrowserBaseProvider,
|
|
@@ -3105,6 +5420,8 @@ function disableTracing() {
|
|
|
3105
5420
|
TimeoutError,
|
|
3106
5421
|
Tracer,
|
|
3107
5422
|
addBatchToPage,
|
|
5423
|
+
bufferToBase64,
|
|
5424
|
+
calculateRMS,
|
|
3108
5425
|
connect,
|
|
3109
5426
|
createCDPClient,
|
|
3110
5427
|
createProvider,
|
|
@@ -3112,8 +5429,17 @@ function disableTracing() {
|
|
|
3112
5429
|
disableTracing,
|
|
3113
5430
|
discoverTargets,
|
|
3114
5431
|
enableTracing,
|
|
5432
|
+
generateSilence,
|
|
5433
|
+
generateTone,
|
|
5434
|
+
getAudioChromeFlags,
|
|
3115
5435
|
getBrowserWebSocketUrl,
|
|
3116
5436
|
getTracer,
|
|
5437
|
+
grantAudioPermissions,
|
|
5438
|
+
isTranscriptionAvailable,
|
|
5439
|
+
parseWavHeader,
|
|
5440
|
+
pcmToWav,
|
|
5441
|
+
transcribe,
|
|
5442
|
+
validateSteps,
|
|
3117
5443
|
waitForAnyElement,
|
|
3118
5444
|
waitForElement,
|
|
3119
5445
|
waitForNavigation,
|