@absolutejs/voice 0.0.22-beta.319 → 0.0.22-beta.320

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2159,6 +2159,412 @@ var serverMessageToAction = (message) => {
2159
2159
  }
2160
2160
  };
2161
2161
 
2162
+ // node_modules/@absolutejs/media/dist/index.js
2163
+ var formatLabel = (format) => `${format.container}/${format.encoding}/${String(format.sampleRateHz)}hz/${String(format.channels)}ch`;
2164
+ var formatMatches = (actual, expected) => actual.container === expected.container && actual.encoding === expected.encoding && actual.sampleRateHz === expected.sampleRateHz && actual.channels === expected.channels;
2165
+ var pushIssue = (issues, severity, code, message) => {
2166
+ issues.push({ code, message, severity });
2167
+ };
2168
+ var numericMetadata = (frame, key) => {
2169
+ const value = frame.metadata?.[key];
2170
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
2171
+ };
2172
+ var average3 = (values) => values.length === 0 ? undefined : values.reduce((total, value) => total + value, 0) / values.length;
2173
+ var max = (values) => values.length === 0 ? undefined : Math.max(...values);
2174
+ var min = (values) => values.length === 0 ? undefined : Math.min(...values);
2175
+ var numericStat = (stat, key) => {
2176
+ const value = stat[key];
2177
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
2178
+ };
2179
+ var booleanStat = (stat, key) => {
2180
+ const value = stat[key];
2181
+ return typeof value === "boolean" ? value : undefined;
2182
+ };
2183
+ var stringStat = (stat, key) => {
2184
+ const value = stat[key];
2185
+ return typeof value === "string" ? value : undefined;
2186
+ };
2187
+ var secondsToMs = (value) => value === undefined ? undefined : value * 1000;
2188
+ var normalizeWebRTCStat = (stat) => {
2189
+ const sample = {};
2190
+ for (const [key, value] of Object.entries(stat)) {
2191
+ if (value === null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") {
2192
+ sample[key] = value;
2193
+ }
2194
+ }
2195
+ return sample;
2196
+ };
2197
+ var buildMediaResamplingPlan = (input) => {
2198
+ const required = !formatMatches(input.inputFormat, input.outputFormat);
2199
+ return {
2200
+ inputFormat: input.inputFormat,
2201
+ outputFormat: input.outputFormat,
2202
+ ratio: input.outputFormat.sampleRateHz / input.inputFormat.sampleRateHz,
2203
+ required,
2204
+ status: input.inputFormat.container === input.outputFormat.container && input.inputFormat.encoding === input.outputFormat.encoding && input.inputFormat.channels === input.outputFormat.channels ? "pass" : "warn"
2205
+ };
2206
+ };
2207
+ var speechProbability = (frame) => {
2208
+ if (frame.metadata?.isSpeech === true) {
2209
+ return 1;
2210
+ }
2211
+ if (frame.metadata?.isSpeech === false) {
2212
+ return 0;
2213
+ }
2214
+ for (const key of ["speechProbability", "voiceProbability", "rms", "energy"]) {
2215
+ const value = numericMetadata(frame, key);
2216
+ if (value !== undefined) {
2217
+ return value;
2218
+ }
2219
+ }
2220
+ return 0;
2221
+ };
2222
+ var buildMediaVadReport = (input = {}) => {
2223
+ const frames = (input.frames ?? []).filter((frame) => frame.kind === "input-audio");
2224
+ const speechStartThreshold = input.speechStartThreshold ?? 0.6;
2225
+ const speechEndThreshold = input.speechEndThreshold ?? 0.35;
2226
+ const minSpeechFrames = input.minSpeechFrames ?? 1;
2227
+ const maxSilenceFrames = input.maxSilenceFrames ?? 1;
2228
+ const segments = [];
2229
+ let activeFrames = [];
2230
+ let silenceFrames = 0;
2231
+ const closeSegment = () => {
2232
+ if (activeFrames.length < minSpeechFrames) {
2233
+ activeFrames = [];
2234
+ silenceFrames = 0;
2235
+ return;
2236
+ }
2237
+ const first = activeFrames[0];
2238
+ const last = activeFrames.at(-1);
2239
+ if (!first) {
2240
+ return;
2241
+ }
2242
+ segments.push({
2243
+ durationMs: first.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined,
2244
+ endAt: last?.at !== undefined ? last.at + (last.durationMs ?? 0) : undefined,
2245
+ frameCount: activeFrames.length,
2246
+ segmentId: `vad:${String(segments.length + 1)}`,
2247
+ sessionId: first.sessionId,
2248
+ startAt: first.at,
2249
+ turnId: first.turnId
2250
+ });
2251
+ activeFrames = [];
2252
+ silenceFrames = 0;
2253
+ };
2254
+ for (const frame of frames) {
2255
+ const probability = speechProbability(frame);
2256
+ if (activeFrames.length === 0) {
2257
+ if (probability >= speechStartThreshold) {
2258
+ activeFrames.push(frame);
2259
+ }
2260
+ continue;
2261
+ }
2262
+ activeFrames.push(frame);
2263
+ if (probability <= speechEndThreshold) {
2264
+ silenceFrames += 1;
2265
+ } else {
2266
+ silenceFrames = 0;
2267
+ }
2268
+ if (silenceFrames > maxSilenceFrames) {
2269
+ closeSegment();
2270
+ }
2271
+ }
2272
+ closeSegment();
2273
+ return {
2274
+ checkedAt: Date.now(),
2275
+ inputAudioFrames: frames.length,
2276
+ segments,
2277
+ status: frames.length === 0 ? "warn" : "pass"
2278
+ };
2279
+ };
2280
+ var buildMediaInterruptionReport = (input = {}) => {
2281
+ const issues = [];
2282
+ const interruptionFrames = (input.frames ?? []).filter((frame) => frame.kind === "interruption");
2283
+ const latenciesMs = interruptionFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
2284
+ const maxInterruptionLatencyMs = input.maxInterruptionLatencyMs;
2285
+ if (interruptionFrames.length === 0) {
2286
+ pushIssue(issues, "warning", "media.interruption_missing", "No interruption frame was observed.");
2287
+ }
2288
+ if (maxInterruptionLatencyMs !== undefined && latenciesMs.some((latency) => latency > maxInterruptionLatencyMs)) {
2289
+ pushIssue(issues, "error", "media.interruption_latency", `Interruption latency exceeded ${String(maxInterruptionLatencyMs)}ms.`);
2290
+ }
2291
+ return {
2292
+ checkedAt: Date.now(),
2293
+ interruptionFrames: interruptionFrames.length,
2294
+ issues,
2295
+ latenciesMs,
2296
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass"
2297
+ };
2298
+ };
2299
+ var buildMediaQualityReport = (input = {}) => {
2300
+ const frames = [...input.frames ?? []].sort((a, b) => (a.at ?? 0) - (b.at ?? 0));
2301
+ const audioFrames = frames.filter((frame) => frame.kind === "input-audio" || frame.kind === "assistant-audio");
2302
+ const inputAudioFrames = frames.filter((frame) => frame.kind === "input-audio");
2303
+ const assistantAudioFrames = frames.filter((frame) => frame.kind === "assistant-audio");
2304
+ const issues = [];
2305
+ const gapsMs = [];
2306
+ for (const [index, frame] of audioFrames.entries()) {
2307
+ const previous = audioFrames[index - 1];
2308
+ if (previous?.at === undefined || frame.at === undefined || previous.durationMs === undefined) {
2309
+ continue;
2310
+ }
2311
+ const gap = frame.at - (previous.at + previous.durationMs);
2312
+ if (gap > 0) {
2313
+ gapsMs.push(gap);
2314
+ }
2315
+ }
2316
+ const jitterMs = audioFrames.map((frame) => numericMetadata(frame, "jitterMs")).filter((value) => value !== undefined).at(-1) ?? max(gapsMs);
2317
+ const first = audioFrames.find((frame) => frame.at !== undefined);
2318
+ const last = audioFrames.toReversed().find((frame) => frame.at !== undefined);
2319
+ const durationMs = first?.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined;
2320
+ const expectedDurationMs = audioFrames.length > 0 ? audioFrames.reduce((total, frame) => total + (frame.durationMs ?? 0), 0) : undefined;
2321
+ const timestampDriftMs = durationMs !== undefined && expectedDurationMs !== undefined ? Math.max(0, durationMs - expectedDurationMs) : undefined;
2322
+ const speechScores = inputAudioFrames.map(speechProbability);
2323
+ const speechFrames = speechScores.filter((score) => score >= 0.6).length;
2324
+ const silenceFrames = speechScores.filter((score) => score <= 0.35).length;
2325
+ const unknownSpeechFrames = Math.max(0, inputAudioFrames.length - speechFrames - silenceFrames);
2326
+ const speechRatio = inputAudioFrames.length === 0 ? 0 : speechFrames / inputAudioFrames.length;
2327
+ const silenceRatio = inputAudioFrames.length === 0 ? 0 : silenceFrames / inputAudioFrames.length;
2328
+ const levels = audioFrames.map((frame) => numericMetadata(frame, "level") ?? numericMetadata(frame, "rms") ?? numericMetadata(frame, "energy")).filter((value) => value !== undefined);
2329
+ const backpressureEvents = input.transport?.backpressureEvents ?? 0;
2330
+ const maxGapMs = input.maxGapMs;
2331
+ if (maxGapMs !== undefined && gapsMs.some((gap) => gap > maxGapMs)) {
2332
+ pushIssue(issues, "warning", "media.quality_gap", `Observed media gap above ${String(maxGapMs)}ms.`);
2333
+ }
2334
+ if (input.maxJitterMs !== undefined && jitterMs !== undefined && jitterMs > input.maxJitterMs) {
2335
+ pushIssue(issues, "warning", "media.quality_jitter", `Observed jitter ${String(jitterMs)}ms above ${String(input.maxJitterMs)}ms.`);
2336
+ }
2337
+ if (input.maxTimestampDriftMs !== undefined && timestampDriftMs !== undefined && timestampDriftMs > input.maxTimestampDriftMs) {
2338
+ pushIssue(issues, "warning", "media.quality_timestamp_drift", `Observed timestamp drift ${String(timestampDriftMs)}ms above ${String(input.maxTimestampDriftMs)}ms.`);
2339
+ }
2340
+ if (input.minSpeechRatio !== undefined && inputAudioFrames.length > 0 && speechRatio < input.minSpeechRatio) {
2341
+ pushIssue(issues, "warning", "media.quality_speech_ratio", `Observed speech ratio ${String(speechRatio)} below ${String(input.minSpeechRatio)}.`);
2342
+ }
2343
+ if (input.maxBackpressureEvents !== undefined && backpressureEvents > input.maxBackpressureEvents) {
2344
+ pushIssue(issues, "warning", "media.quality_backpressure", `Observed ${String(backpressureEvents)} backpressure event(s), above ${String(input.maxBackpressureEvents)}.`);
2345
+ }
2346
+ return {
2347
+ assistantAudioFrames: assistantAudioFrames.length,
2348
+ backpressureEvents,
2349
+ checkedAt: Date.now(),
2350
+ durationMs,
2351
+ gapCount: gapsMs.length,
2352
+ gapsMs,
2353
+ inputAudioFrames: inputAudioFrames.length,
2354
+ issues,
2355
+ jitterMs,
2356
+ levelAverage: average3(levels),
2357
+ levelMax: max(levels),
2358
+ levelMin: min(levels),
2359
+ silenceFrames,
2360
+ silenceRatio,
2361
+ speechFrames,
2362
+ speechRatio,
2363
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
2364
+ timestampDriftMs,
2365
+ totalFrames: frames.length,
2366
+ unknownSpeechFrames
2367
+ };
2368
+ };
2369
+ var buildMediaWebRTCStatsReport = (input = {}) => {
2370
+ const stats = input.stats ?? [];
2371
+ const issues = [];
2372
+ const inbound = stats.filter((stat) => stat.type === "inbound-rtp" && stringStat(stat, "kind") !== "video");
2373
+ const outbound = stats.filter((stat) => stat.type === "outbound-rtp" && stringStat(stat, "kind") !== "video");
2374
+ const candidatePairs = stats.filter((stat) => stat.type === "candidate-pair");
2375
+ const audioTracks = stats.filter((stat) => (stat.type === "track" || stat.type === "media-source") && stringStat(stat, "kind") === "audio");
2376
+ const activeCandidatePairs = candidatePairs.filter((stat) => booleanStat(stat, "selected") === true || booleanStat(stat, "nominated") === true || stringStat(stat, "state") === "succeeded").length;
2377
+ const liveAudioTracks = audioTracks.filter((stat) => stringStat(stat, "readyState") !== "ended" && stringStat(stat, "trackState") !== "ended" && booleanStat(stat, "ended") !== true).length;
2378
+ const endedAudioTracks = audioTracks.filter((stat) => stringStat(stat, "readyState") === "ended" || stringStat(stat, "trackState") === "ended" || booleanStat(stat, "ended") === true).length;
2379
+ const inboundPackets = inbound.reduce((total, stat) => total + (numericStat(stat, "packetsReceived") ?? 0), 0);
2380
+ const outboundPackets = outbound.reduce((total, stat) => total + (numericStat(stat, "packetsSent") ?? 0), 0);
2381
+ const packetsLost = [...inbound, ...outbound].reduce((total, stat) => total + Math.max(0, numericStat(stat, "packetsLost") ?? 0), 0);
2382
+ const packetLossDenominator = inboundPackets + packetsLost;
2383
+ const packetLossRatio = packetLossDenominator === 0 ? 0 : packetsLost / packetLossDenominator;
2384
+ const bytesReceived = inbound.reduce((total, stat) => total + (numericStat(stat, "bytesReceived") ?? 0), 0);
2385
+ const bytesSent = outbound.reduce((total, stat) => total + (numericStat(stat, "bytesSent") ?? 0), 0);
2386
+ const roundTripTimeMs = max(candidatePairs.map((stat) => secondsToMs(numericStat(stat, "currentRoundTripTime") ?? numericStat(stat, "roundTripTime"))).filter((value) => value !== undefined));
2387
+ const jitterMs = max([...inbound, ...outbound].map((stat) => secondsToMs(numericStat(stat, "jitter"))).filter((value) => value !== undefined));
2388
+ const jitterBufferDelayMs = max(inbound.map((stat) => {
2389
+ const delay = numericStat(stat, "jitterBufferDelay");
2390
+ const emitted = numericStat(stat, "jitterBufferEmittedCount");
2391
+ return delay !== undefined && emitted !== undefined && emitted > 0 ? delay / emitted * 1000 : undefined;
2392
+ }).filter((value) => value !== undefined));
2393
+ const audioLevels = audioTracks.map((stat) => numericStat(stat, "audioLevel")).filter((value) => value !== undefined);
2394
+ if (input.requireConnectedCandidatePair && candidatePairs.length > 0 && activeCandidatePairs === 0) {
2395
+ pushIssue(issues, "error", "media.webrtc_candidate_pair_missing", "No active WebRTC candidate pair was observed.");
2396
+ }
2397
+ if (input.requireLiveAudioTrack && liveAudioTracks === 0) {
2398
+ pushIssue(issues, "error", "media.webrtc_audio_track_missing", "No live WebRTC audio track was observed.");
2399
+ }
2400
+ if (input.maxPacketLossRatio !== undefined && packetLossRatio > input.maxPacketLossRatio) {
2401
+ pushIssue(issues, "warning", "media.webrtc_packet_loss", `Observed WebRTC packet loss ratio ${String(packetLossRatio)} above ${String(input.maxPacketLossRatio)}.`);
2402
+ }
2403
+ if (input.maxRoundTripTimeMs !== undefined && roundTripTimeMs !== undefined && roundTripTimeMs > input.maxRoundTripTimeMs) {
2404
+ pushIssue(issues, "warning", "media.webrtc_round_trip_time", `Observed WebRTC RTT ${String(roundTripTimeMs)}ms above ${String(input.maxRoundTripTimeMs)}ms.`);
2405
+ }
2406
+ if (input.maxJitterMs !== undefined && jitterMs !== undefined && jitterMs > input.maxJitterMs) {
2407
+ pushIssue(issues, "warning", "media.webrtc_jitter", `Observed WebRTC jitter ${String(jitterMs)}ms above ${String(input.maxJitterMs)}ms.`);
2408
+ }
2409
+ return {
2410
+ activeCandidatePairs,
2411
+ audioLevelAverage: average3(audioLevels),
2412
+ bytesReceived,
2413
+ bytesSent,
2414
+ checkedAt: Date.now(),
2415
+ endedAudioTracks,
2416
+ inboundPackets,
2417
+ issues,
2418
+ jitterBufferDelayMs,
2419
+ jitterMs,
2420
+ liveAudioTracks,
2421
+ outboundPackets,
2422
+ packetLossRatio,
2423
+ packetsLost,
2424
+ roundTripTimeMs,
2425
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
2426
+ totalStats: stats.length
2427
+ };
2428
+ };
2429
+ var collectMediaWebRTCStats = async (input) => {
2430
+ const report = await input.peerConnection.getStats(input.selector ?? null);
2431
+ return [...report.values()].map(normalizeWebRTCStat);
2432
+ };
2433
+ var collectMediaWebRTCStatsReport = async (input) => {
2434
+ const stats = await collectMediaWebRTCStats(input);
2435
+ return buildMediaWebRTCStatsReport({
2436
+ ...input,
2437
+ stats
2438
+ });
2439
+ };
2440
+ var buildMediaPipelineCalibrationReport = (input = {}) => {
2441
+ const frames = input.frames ?? [];
2442
+ const issues = [];
2443
+ const inputFrames = frames.filter((frame) => frame.kind === "input-audio");
2444
+ const assistantFrames = frames.filter((frame) => frame.kind === "assistant-audio");
2445
+ const turnCommitFrames = frames.filter((frame) => frame.kind === "turn-commit");
2446
+ const interruptionFrameRecords = frames.filter((frame) => frame.kind === "interruption");
2447
+ const traceLinkedFrames = frames.filter((frame) => frame.traceEventId).length;
2448
+ const backpressureFrames = frames.filter((frame) => Boolean(frame.metadata?.backpressure)).length;
2449
+ const audioLatencies = assistantFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
2450
+ const firstAudioLatencyMs = audioLatencies.length > 0 ? Math.min(...audioLatencies) : undefined;
2451
+ const jitterValues = frames.map((frame) => numericMetadata(frame, "jitterMs")).filter((value) => value !== undefined);
2452
+ const jitterMs = jitterValues.length > 0 ? Math.max(...jitterValues) : undefined;
2453
+ const inputFormat = input.inputFormat ?? inputFrames.find((frame) => frame.format)?.format;
2454
+ const outputFormat = input.outputFormat ?? assistantFrames.find((frame) => frame.format)?.format;
2455
+ const resamplingRequired = Boolean(input.expectedInputFormat && inputFormat && inputFormat.sampleRateHz !== input.expectedInputFormat.sampleRateHz) || Boolean(input.expectedOutputFormat && outputFormat && outputFormat.sampleRateHz !== input.expectedOutputFormat.sampleRateHz);
2456
+ const resamplingTargetHz = resamplingRequired && input.expectedInputFormat ? input.expectedInputFormat.sampleRateHz : resamplingRequired ? input.expectedOutputFormat?.sampleRateHz : undefined;
2457
+ if (inputFrames.length === 0) {
2458
+ pushIssue(issues, "warning", "media.input_audio_missing", "No input audio frames were observed.");
2459
+ }
2460
+ if (assistantFrames.length === 0) {
2461
+ pushIssue(issues, "warning", "media.assistant_audio_missing", "No assistant audio frames were observed.");
2462
+ }
2463
+ if (input.expectedInputFormat && inputFormat && !formatMatches(inputFormat, input.expectedInputFormat)) {
2464
+ pushIssue(issues, inputFormat.sampleRateHz === input.expectedInputFormat.sampleRateHz ? "warning" : "error", "media.input_format_mismatch", `Input format ${formatLabel(inputFormat)} does not match expected ${formatLabel(input.expectedInputFormat)}.`);
2465
+ }
2466
+ if (input.expectedOutputFormat && outputFormat && !formatMatches(outputFormat, input.expectedOutputFormat)) {
2467
+ pushIssue(issues, outputFormat.sampleRateHz === input.expectedOutputFormat.sampleRateHz ? "warning" : "error", "media.output_format_mismatch", `Output format ${formatLabel(outputFormat)} does not match expected ${formatLabel(input.expectedOutputFormat)}.`);
2468
+ }
2469
+ if (firstAudioLatencyMs !== undefined && input.maxFirstAudioLatencyMs !== undefined && firstAudioLatencyMs > input.maxFirstAudioLatencyMs) {
2470
+ pushIssue(issues, "error", "media.first_audio_latency", `First audio latency ${String(firstAudioLatencyMs)}ms exceeds budget ${String(input.maxFirstAudioLatencyMs)}ms.`);
2471
+ }
2472
+ if (jitterMs !== undefined && input.maxJitterMs !== undefined && jitterMs > input.maxJitterMs) {
2473
+ pushIssue(issues, "warning", "media.jitter", `Media jitter ${String(jitterMs)}ms exceeds budget ${String(input.maxJitterMs)}ms.`);
2474
+ }
2475
+ if (input.maxBackpressureFrames !== undefined && backpressureFrames > input.maxBackpressureFrames) {
2476
+ pushIssue(issues, "warning", "media.backpressure", `Backpressure frame count ${String(backpressureFrames)} exceeds budget ${String(input.maxBackpressureFrames)}.`);
2477
+ }
2478
+ if (input.requireInterruptionFrame && interruptionFrameRecords.length === 0) {
2479
+ pushIssue(issues, "warning", "media.interruption_missing", "No interruption frame was observed.");
2480
+ }
2481
+ if (input.requireTraceEvidence && traceLinkedFrames === 0) {
2482
+ pushIssue(issues, "warning", "media.trace_evidence_missing", "No media frames were linked to trace evidence.");
2483
+ }
2484
+ return {
2485
+ assistantAudioFrames: assistantFrames.length,
2486
+ backpressureFrames,
2487
+ checkedAt: Date.now(),
2488
+ firstAudioLatencyMs,
2489
+ inputAudioFrames: inputFrames.length,
2490
+ inputFormat,
2491
+ interruptionFrames: interruptionFrameRecords.length,
2492
+ issues,
2493
+ jitterMs,
2494
+ outputFormat,
2495
+ resamplingRequired,
2496
+ resamplingTargetHz,
2497
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
2498
+ surface: input.surface ?? "media-pipeline",
2499
+ traceLinkedFrames,
2500
+ turnCommitFrames: turnCommitFrames.length
2501
+ };
2502
+ };
2503
+
2504
+ // src/client/browserMedia.ts
2505
+ var DEFAULT_BROWSER_MEDIA_PATH = "/api/voice/browser-media";
2506
+ var DEFAULT_BROWSER_MEDIA_INTERVAL_MS = 5000;
2507
+ var resolvePeerConnection = async (options) => options.peerConnection ?? await options.getPeerConnection?.() ?? null;
2508
+ var postBrowserMediaReport = async (payload, options) => {
2509
+ const requestFetch = options.fetch ?? globalThis.fetch;
2510
+ if (!requestFetch) {
2511
+ return;
2512
+ }
2513
+ await requestFetch(options.path ?? DEFAULT_BROWSER_MEDIA_PATH, {
2514
+ body: JSON.stringify(payload),
2515
+ headers: {
2516
+ "Content-Type": "application/json"
2517
+ },
2518
+ keepalive: true,
2519
+ method: "POST"
2520
+ });
2521
+ };
2522
+ var createVoiceBrowserMediaReporter = (options) => {
2523
+ let interval = null;
2524
+ const reportOnce = async () => {
2525
+ const peerConnection = await resolvePeerConnection(options);
2526
+ if (!peerConnection) {
2527
+ return;
2528
+ }
2529
+ const report = await collectMediaWebRTCStatsReport({
2530
+ ...options,
2531
+ peerConnection
2532
+ });
2533
+ const payload = {
2534
+ at: Date.now(),
2535
+ report,
2536
+ scenarioId: options.getScenarioId?.() ?? null,
2537
+ sessionId: options.getSessionId?.() ?? null
2538
+ };
2539
+ options.onReport?.(payload);
2540
+ await postBrowserMediaReport(payload, options);
2541
+ return payload;
2542
+ };
2543
+ const run = () => {
2544
+ reportOnce().catch((error) => {
2545
+ options.onError?.(error);
2546
+ });
2547
+ };
2548
+ const stop = () => {
2549
+ if (interval) {
2550
+ clearInterval(interval);
2551
+ interval = null;
2552
+ }
2553
+ };
2554
+ return {
2555
+ close: stop,
2556
+ reportOnce,
2557
+ start: () => {
2558
+ if (interval) {
2559
+ return;
2560
+ }
2561
+ run();
2562
+ interval = setInterval(run, options.intervalMs ?? DEFAULT_BROWSER_MEDIA_INTERVAL_MS);
2563
+ },
2564
+ stop
2565
+ };
2566
+ };
2567
+
2162
2568
  // src/client/connection.ts
2163
2569
  var WS_OPEN = 1;
2164
2570
  var WS_CLOSED = 3;
@@ -2591,12 +2997,18 @@ var createVoiceStreamStore = () => {
2591
2997
  var createVoiceStream = (path, options = {}) => {
2592
2998
  const connection = createVoiceConnection(path, options);
2593
2999
  const store = createVoiceStreamStore();
3000
+ const browserMediaReporter = options.browserMedia && typeof window !== "undefined" ? createVoiceBrowserMediaReporter({
3001
+ ...options.browserMedia,
3002
+ getScenarioId: () => options.browserMedia ? options.browserMedia.getScenarioId?.() ?? connection.getScenarioId() : connection.getScenarioId(),
3003
+ getSessionId: () => options.browserMedia ? options.browserMedia.getSessionId?.() ?? connection.getSessionId() : connection.getSessionId()
3004
+ }) : null;
2594
3005
  const subscribers = new Set;
2595
3006
  const start = (input) => Promise.resolve().then(() => {
2596
3007
  if (!input?.sessionId && !input?.scenarioId) {
2597
3008
  return;
2598
3009
  }
2599
3010
  connection.start(input);
3011
+ browserMediaReporter?.start();
2600
3012
  });
2601
3013
  const notify = () => {
2602
3014
  subscribers.forEach((subscriber) => subscriber());
@@ -2638,6 +3050,7 @@ var createVoiceStream = (path, options = {}) => {
2638
3050
  },
2639
3051
  close() {
2640
3052
  unsubscribeConnection();
3053
+ browserMediaReporter?.close();
2641
3054
  connection.close();
2642
3055
  store.dispatch({ type: "disconnected" });
2643
3056
  notify();
@@ -7806,16 +8219,16 @@ var renderVoiceCallReviewHTML = (artifact) => {
7806
8219
  </html>`;
7807
8220
  };
7808
8221
  // src/testing/sessionBenchmark.ts
7809
- var average3 = (values) => values.length > 0 ? values.reduce((sum, value) => sum + value, 0) / values.length : 0;
8222
+ var average4 = (values) => values.length > 0 ? values.reduce((sum, value) => sum + value, 0) / values.length : 0;
7810
8223
  var normalizeTurnText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
7811
8224
  var countPassedTurns = (turnResults) => turnResults.reduce((count, result) => count + (result.passes ? 1 : 0), 0);
7812
8225
  var calculateTurnPassRate = (turnResults) => turnResults.length > 0 ? countPassedTurns(turnResults) / turnResults.length : 0;
7813
8226
  var summarizeScenarioCosts = (turnResults) => {
7814
8227
  const costEstimates = turnResults.map((turn) => turn.quality?.cost).filter((value) => value !== undefined);
7815
8228
  return {
7816
- averageRelativeCostUnits: roundMetric5(average3(costEstimates.map((estimate) => estimate.estimatedRelativeCostUnits))),
7817
- fallbackReplayAudioMs: roundMetric5(average3(costEstimates.map((estimate) => estimate.fallbackReplayAudioMs)), 2),
7818
- primaryAudioMs: roundMetric5(average3(costEstimates.map((estimate) => estimate.primaryAudioMs)), 2)
8229
+ averageRelativeCostUnits: roundMetric5(average4(costEstimates.map((estimate) => estimate.estimatedRelativeCostUnits))),
8230
+ fallbackReplayAudioMs: roundMetric5(average4(costEstimates.map((estimate) => estimate.fallbackReplayAudioMs)), 2),
8231
+ primaryAudioMs: roundMetric5(average4(costEstimates.map((estimate) => estimate.primaryAudioMs)), 2)
7819
8232
  };
7820
8233
  };
7821
8234
  var roundMetric5 = (value, digits = 4) => {
@@ -8134,13 +8547,13 @@ var summarizeVoiceSessionBenchmark = (adapterId, scenarios) => {
8134
8547
  const turnAccuracies = scenarios.flatMap((scenario) => scenario.turnResults.map((turn) => turn.accuracy?.wordErrorRate).filter((value) => typeof value === "number"));
8135
8548
  return {
8136
8549
  adapterId,
8137
- averageElapsedMs: roundMetric5(average3(scenarios.map((scenario) => scenario.elapsedMs)), 2),
8138
- averageFallbackReplayAudioMs: roundMetric5(average3(scenarios.map((scenario) => scenario.fallbackReplayAudioMs)), 2),
8139
- averagePrimaryAudioMs: roundMetric5(average3(scenarios.map((scenario) => scenario.primaryAudioMs)), 2),
8140
- averageReconnectCount: roundMetric5(average3(scenarios.map((scenario) => scenario.reconnectCount))),
8141
- averageRelativeCostUnits: roundMetric5(average3(scenarios.map((scenario) => scenario.averageRelativeCostUnits))),
8142
- averageTurnPassRate: roundMetric5(average3(scenarios.map((scenario) => scenario.turnPassRate))),
8143
- averageWordErrorRate: roundMetric5(average3(turnAccuracies)),
8550
+ averageElapsedMs: roundMetric5(average4(scenarios.map((scenario) => scenario.elapsedMs)), 2),
8551
+ averageFallbackReplayAudioMs: roundMetric5(average4(scenarios.map((scenario) => scenario.fallbackReplayAudioMs)), 2),
8552
+ averagePrimaryAudioMs: roundMetric5(average4(scenarios.map((scenario) => scenario.primaryAudioMs)), 2),
8553
+ averageReconnectCount: roundMetric5(average4(scenarios.map((scenario) => scenario.reconnectCount))),
8554
+ averageRelativeCostUnits: roundMetric5(average4(scenarios.map((scenario) => scenario.averageRelativeCostUnits))),
8555
+ averageTurnPassRate: roundMetric5(average4(scenarios.map((scenario) => scenario.turnPassRate))),
8556
+ averageWordErrorRate: roundMetric5(average4(turnAccuracies)),
8144
8557
  duplicateTurnRate: roundMetric5(scenarios.length > 0 ? scenarios.filter((scenario) => scenario.duplicateTurnCount > 0).length / scenarios.length : 0),
8145
8558
  passCount,
8146
8559
  passRate: roundMetric5(scenarios.length > 0 ? passCount / scenarios.length : 0),
@@ -8166,13 +8579,13 @@ var summarizeVoiceSessionBenchmarkSeries = (input) => {
8166
8579
  const passCount = results.filter((scenario) => scenario.passes).length;
8167
8580
  const sample = results[0];
8168
8581
  return {
8169
- averageElapsedMs: roundMetric5(average3(results.map((scenario) => scenario.elapsedMs)), 2),
8170
- averageFallbackReplayAudioMs: roundMetric5(average3(results.map((scenario) => scenario.fallbackReplayAudioMs)), 2),
8171
- averagePrimaryAudioMs: roundMetric5(average3(results.map((scenario) => scenario.primaryAudioMs)), 2),
8172
- averageReconnectCount: roundMetric5(average3(results.map((scenario) => scenario.reconnectCount))),
8173
- averageRelativeCostUnits: roundMetric5(average3(results.map((scenario) => scenario.averageRelativeCostUnits))),
8174
- averageTurnPassRate: roundMetric5(average3(results.map((scenario) => scenario.turnPassRate))),
8175
- averageWordErrorRate: roundMetric5(average3(wordErrorRates)),
8582
+ averageElapsedMs: roundMetric5(average4(results.map((scenario) => scenario.elapsedMs)), 2),
8583
+ averageFallbackReplayAudioMs: roundMetric5(average4(results.map((scenario) => scenario.fallbackReplayAudioMs)), 2),
8584
+ averagePrimaryAudioMs: roundMetric5(average4(results.map((scenario) => scenario.primaryAudioMs)), 2),
8585
+ averageReconnectCount: roundMetric5(average4(results.map((scenario) => scenario.reconnectCount))),
8586
+ averageRelativeCostUnits: roundMetric5(average4(results.map((scenario) => scenario.averageRelativeCostUnits))),
8587
+ averageTurnPassRate: roundMetric5(average4(results.map((scenario) => scenario.turnPassRate))),
8588
+ averageWordErrorRate: roundMetric5(average4(wordErrorRates)),
8176
8589
  bestWordErrorRate: roundMetric5(wordErrorRates.length > 0 ? Math.min(...wordErrorRates) : 0),
8177
8590
  fixtureId,
8178
8591
  passCount,
@@ -8195,18 +8608,18 @@ var summarizeVoiceSessionBenchmarkSeries = (input) => {
8195
8608
  scenarios: scenarioAggregates,
8196
8609
  summary: {
8197
8610
  adapterId: input.adapterId,
8198
- averageElapsedMs: roundMetric5(average3(scenarioAggregates.map((scenario) => scenario.averageElapsedMs)), 2),
8199
- averageFallbackReplayAudioMs: roundMetric5(average3(scenarioAggregates.map((scenario) => scenario.averageFallbackReplayAudioMs)), 2),
8200
- averagePassRate: roundMetric5(average3(scenarioAggregates.map((scenario) => scenario.passRate))),
8201
- averagePrimaryAudioMs: roundMetric5(average3(scenarioAggregates.map((scenario) => scenario.averagePrimaryAudioMs)), 2),
8202
- averageReconnectCount: roundMetric5(average3(scenarioAggregates.map((scenario) => scenario.averageReconnectCount))),
8203
- averageRelativeCostUnits: roundMetric5(average3(scenarioAggregates.map((scenario) => scenario.averageRelativeCostUnits))),
8204
- averageTurnPassRate: roundMetric5(average3(scenarioAggregates.map((scenario) => scenario.averageTurnPassRate))),
8205
- averageWordErrorRate: roundMetric5(average3(scenarioAggregates.map((scenario) => scenario.averageWordErrorRate))),
8611
+ averageElapsedMs: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageElapsedMs)), 2),
8612
+ averageFallbackReplayAudioMs: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageFallbackReplayAudioMs)), 2),
8613
+ averagePassRate: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.passRate))),
8614
+ averagePrimaryAudioMs: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averagePrimaryAudioMs)), 2),
8615
+ averageReconnectCount: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageReconnectCount))),
8616
+ averageRelativeCostUnits: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageRelativeCostUnits))),
8617
+ averageTurnPassRate: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageTurnPassRate))),
8618
+ averageWordErrorRate: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageWordErrorRate))),
8206
8619
  flakyScenarioCount: scenarioAggregates.filter((scenario) => scenario.passRate > 0 && scenario.passRate < 1).length,
8207
8620
  generatedRunCount: input.reports.length,
8208
- reconnectCoverageRate: roundMetric5(average3(reconnectCoverageRates)),
8209
- reconnectSuccessRate: roundMetric5(average3(reconnectRates)),
8621
+ reconnectCoverageRate: roundMetric5(average4(reconnectCoverageRates)),
8622
+ reconnectSuccessRate: roundMetric5(average4(reconnectRates)),
8210
8623
  scenarioCount: scenarioAggregates.length,
8211
8624
  stableScenarioCount: scenarioAggregates.filter((scenario) => scenario.passRate === 1).length,
8212
8625
  totalPassCount,
package/dist/trace.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { S3Client, S3Options } from 'bun';
2
- export type VoiceTraceEventType = 'assistant.guardrail' | 'assistant.memory' | 'assistant.run' | 'agent.context' | 'agent.handoff' | 'agent.model' | 'agent.result' | 'agent.tool' | 'call.handoff' | 'call.lifecycle' | 'client.barge_in' | 'client.live_latency' | 'client.reconnect' | 'operator.action' | 'provider.decision' | 'session.error' | 'turn.assistant' | 'turn.committed' | 'turn.cost' | 'turn_latency.stage' | 'turn.transcript' | 'workflow.contract';
2
+ export type VoiceTraceEventType = 'assistant.guardrail' | 'assistant.memory' | 'assistant.run' | 'agent.context' | 'agent.handoff' | 'agent.model' | 'agent.result' | 'agent.tool' | 'call.handoff' | 'call.lifecycle' | 'client.barge_in' | 'client.browser_media' | 'client.live_latency' | 'client.reconnect' | 'operator.action' | 'provider.decision' | 'session.error' | 'turn.assistant' | 'turn.committed' | 'turn.cost' | 'turn_latency.stage' | 'turn.transcript' | 'workflow.contract';
3
3
  export type VoiceTraceEvent<TPayload extends Record<string, unknown> = Record<string, unknown>> = {
4
4
  at: number;
5
5
  id?: string;
package/dist/types.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import type { SessionStore } from '@absolutejs/absolute';
2
+ import type { MediaWebRTCStatsCollector, MediaWebRTCStatsReport, MediaWebRTCStatsReportInput } from '@absolutejs/media';
2
3
  import type { VoiceOpsDispositionTaskPolicies, VoiceOpsTaskAssignmentRule, VoiceOpsTaskAssignmentRules, VoiceIntegrationWebhookConfig, StoredVoiceIntegrationEvent, StoredVoiceOpsTask, VoiceIntegrationEventStore, VoiceOpsTaskPolicy, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
3
4
  import type { VoiceIntegrationSink } from './opsSinks';
4
5
  import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
@@ -762,6 +763,7 @@ export type VoiceServerConnectionMessage = {
762
763
  };
763
764
  export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerReplayMessage<TResult> | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerAudioMessage | VoiceServerCallLifecycleMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage | VoiceServerConnectionMessage;
764
765
  export type VoiceConnectionOptions = {
766
+ browserMedia?: false | VoiceBrowserMediaReporterOptions;
765
767
  protocols?: string[];
766
768
  scenarioId?: string;
767
769
  reconnect?: boolean;
@@ -770,6 +772,23 @@ export type VoiceConnectionOptions = {
770
772
  pingInterval?: number;
771
773
  sessionId?: string;
772
774
  };
775
+ export type VoiceBrowserMediaReportPayload = {
776
+ at: number;
777
+ report: MediaWebRTCStatsReport;
778
+ scenarioId?: string | null;
779
+ sessionId?: string | null;
780
+ };
781
+ export type VoiceBrowserMediaReporterOptions = Omit<MediaWebRTCStatsReportInput, 'peerConnection'> & {
782
+ fetch?: typeof fetch;
783
+ getPeerConnection?: (() => MediaWebRTCStatsCollector | null | undefined) | (() => Promise<MediaWebRTCStatsCollector | null | undefined>);
784
+ getScenarioId?: () => string | null | undefined;
785
+ getSessionId?: () => string | null | undefined;
786
+ intervalMs?: number;
787
+ onError?: (error: unknown) => void;
788
+ onReport?: (payload: VoiceBrowserMediaReportPayload) => void;
789
+ path?: string;
790
+ peerConnection?: MediaWebRTCStatsCollector;
791
+ };
773
792
  export type VoiceCaptureOptions = {
774
793
  channelCount?: 1 | 2;
775
794
  onAudio?: (audio: Uint8Array | ArrayBuffer, sendAudio: (audio: Uint8Array | ArrayBuffer) => void) => void;