@absolutejs/voice 0.0.22-beta.319 → 0.0.22-beta.320

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -242,6 +242,170 @@ var serverMessageToAction = (message) => {
242
242
  }
243
243
  };
244
244
 
245
+ // node_modules/@absolutejs/media/dist/index.js
246
+ var pushIssue = (issues, severity, code, message) => {
247
+ issues.push({ code, message, severity });
248
+ };
249
+ var average = (values) => values.length === 0 ? undefined : values.reduce((total, value) => total + value, 0) / values.length;
250
+ var max = (values) => values.length === 0 ? undefined : Math.max(...values);
251
+ var numericStat = (stat, key) => {
252
+ const value = stat[key];
253
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
254
+ };
255
+ var booleanStat = (stat, key) => {
256
+ const value = stat[key];
257
+ return typeof value === "boolean" ? value : undefined;
258
+ };
259
+ var stringStat = (stat, key) => {
260
+ const value = stat[key];
261
+ return typeof value === "string" ? value : undefined;
262
+ };
263
+ var secondsToMs = (value) => value === undefined ? undefined : value * 1000;
264
+ var normalizeWebRTCStat = (stat) => {
265
+ const sample = {};
266
+ for (const [key, value] of Object.entries(stat)) {
267
+ if (value === null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") {
268
+ sample[key] = value;
269
+ }
270
+ }
271
+ return sample;
272
+ };
273
+ var buildMediaWebRTCStatsReport = (input = {}) => {
274
+ const stats = input.stats ?? [];
275
+ const issues = [];
276
+ const inbound = stats.filter((stat) => stat.type === "inbound-rtp" && stringStat(stat, "kind") !== "video");
277
+ const outbound = stats.filter((stat) => stat.type === "outbound-rtp" && stringStat(stat, "kind") !== "video");
278
+ const candidatePairs = stats.filter((stat) => stat.type === "candidate-pair");
279
+ const audioTracks = stats.filter((stat) => (stat.type === "track" || stat.type === "media-source") && stringStat(stat, "kind") === "audio");
280
+ const activeCandidatePairs = candidatePairs.filter((stat) => booleanStat(stat, "selected") === true || booleanStat(stat, "nominated") === true || stringStat(stat, "state") === "succeeded").length;
281
+ const liveAudioTracks = audioTracks.filter((stat) => stringStat(stat, "readyState") !== "ended" && stringStat(stat, "trackState") !== "ended" && booleanStat(stat, "ended") !== true).length;
282
+ const endedAudioTracks = audioTracks.filter((stat) => stringStat(stat, "readyState") === "ended" || stringStat(stat, "trackState") === "ended" || booleanStat(stat, "ended") === true).length;
283
+ const inboundPackets = inbound.reduce((total, stat) => total + (numericStat(stat, "packetsReceived") ?? 0), 0);
284
+ const outboundPackets = outbound.reduce((total, stat) => total + (numericStat(stat, "packetsSent") ?? 0), 0);
285
+ const packetsLost = [...inbound, ...outbound].reduce((total, stat) => total + Math.max(0, numericStat(stat, "packetsLost") ?? 0), 0);
286
+ const packetLossDenominator = inboundPackets + packetsLost;
287
+ const packetLossRatio = packetLossDenominator === 0 ? 0 : packetsLost / packetLossDenominator;
288
+ const bytesReceived = inbound.reduce((total, stat) => total + (numericStat(stat, "bytesReceived") ?? 0), 0);
289
+ const bytesSent = outbound.reduce((total, stat) => total + (numericStat(stat, "bytesSent") ?? 0), 0);
290
+ const roundTripTimeMs = max(candidatePairs.map((stat) => secondsToMs(numericStat(stat, "currentRoundTripTime") ?? numericStat(stat, "roundTripTime"))).filter((value) => value !== undefined));
291
+ const jitterMs = max([...inbound, ...outbound].map((stat) => secondsToMs(numericStat(stat, "jitter"))).filter((value) => value !== undefined));
292
+ const jitterBufferDelayMs = max(inbound.map((stat) => {
293
+ const delay = numericStat(stat, "jitterBufferDelay");
294
+ const emitted = numericStat(stat, "jitterBufferEmittedCount");
295
+ return delay !== undefined && emitted !== undefined && emitted > 0 ? delay / emitted * 1000 : undefined;
296
+ }).filter((value) => value !== undefined));
297
+ const audioLevels = audioTracks.map((stat) => numericStat(stat, "audioLevel")).filter((value) => value !== undefined);
298
+ if (input.requireConnectedCandidatePair && candidatePairs.length > 0 && activeCandidatePairs === 0) {
299
+ pushIssue(issues, "error", "media.webrtc_candidate_pair_missing", "No active WebRTC candidate pair was observed.");
300
+ }
301
+ if (input.requireLiveAudioTrack && liveAudioTracks === 0) {
302
+ pushIssue(issues, "error", "media.webrtc_audio_track_missing", "No live WebRTC audio track was observed.");
303
+ }
304
+ if (input.maxPacketLossRatio !== undefined && packetLossRatio > input.maxPacketLossRatio) {
305
+ pushIssue(issues, "warning", "media.webrtc_packet_loss", `Observed WebRTC packet loss ratio ${String(packetLossRatio)} above ${String(input.maxPacketLossRatio)}.`);
306
+ }
307
+ if (input.maxRoundTripTimeMs !== undefined && roundTripTimeMs !== undefined && roundTripTimeMs > input.maxRoundTripTimeMs) {
308
+ pushIssue(issues, "warning", "media.webrtc_round_trip_time", `Observed WebRTC RTT ${String(roundTripTimeMs)}ms above ${String(input.maxRoundTripTimeMs)}ms.`);
309
+ }
310
+ if (input.maxJitterMs !== undefined && jitterMs !== undefined && jitterMs > input.maxJitterMs) {
311
+ pushIssue(issues, "warning", "media.webrtc_jitter", `Observed WebRTC jitter ${String(jitterMs)}ms above ${String(input.maxJitterMs)}ms.`);
312
+ }
313
+ return {
314
+ activeCandidatePairs,
315
+ audioLevelAverage: average(audioLevels),
316
+ bytesReceived,
317
+ bytesSent,
318
+ checkedAt: Date.now(),
319
+ endedAudioTracks,
320
+ inboundPackets,
321
+ issues,
322
+ jitterBufferDelayMs,
323
+ jitterMs,
324
+ liveAudioTracks,
325
+ outboundPackets,
326
+ packetLossRatio,
327
+ packetsLost,
328
+ roundTripTimeMs,
329
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
330
+ totalStats: stats.length
331
+ };
332
+ };
333
+ var collectMediaWebRTCStats = async (input) => {
334
+ const report = await input.peerConnection.getStats(input.selector ?? null);
335
+ return [...report.values()].map(normalizeWebRTCStat);
336
+ };
337
+ var collectMediaWebRTCStatsReport = async (input) => {
338
+ const stats = await collectMediaWebRTCStats(input);
339
+ return buildMediaWebRTCStatsReport({
340
+ ...input,
341
+ stats
342
+ });
343
+ };
344
+
345
+ // src/client/browserMedia.ts
346
+ var DEFAULT_BROWSER_MEDIA_PATH = "/api/voice/browser-media";
347
+ var DEFAULT_BROWSER_MEDIA_INTERVAL_MS = 5000;
348
+ var resolvePeerConnection = async (options) => options.peerConnection ?? await options.getPeerConnection?.() ?? null;
349
+ var postBrowserMediaReport = async (payload, options) => {
350
+ const requestFetch = options.fetch ?? globalThis.fetch;
351
+ if (!requestFetch) {
352
+ return;
353
+ }
354
+ await requestFetch(options.path ?? DEFAULT_BROWSER_MEDIA_PATH, {
355
+ body: JSON.stringify(payload),
356
+ headers: {
357
+ "Content-Type": "application/json"
358
+ },
359
+ keepalive: true,
360
+ method: "POST"
361
+ });
362
+ };
363
+ var createVoiceBrowserMediaReporter = (options) => {
364
+ let interval = null;
365
+ const reportOnce = async () => {
366
+ const peerConnection = await resolvePeerConnection(options);
367
+ if (!peerConnection) {
368
+ return;
369
+ }
370
+ const report = await collectMediaWebRTCStatsReport({
371
+ ...options,
372
+ peerConnection
373
+ });
374
+ const payload = {
375
+ at: Date.now(),
376
+ report,
377
+ scenarioId: options.getScenarioId?.() ?? null,
378
+ sessionId: options.getSessionId?.() ?? null
379
+ };
380
+ options.onReport?.(payload);
381
+ await postBrowserMediaReport(payload, options);
382
+ return payload;
383
+ };
384
+ const run = () => {
385
+ reportOnce().catch((error) => {
386
+ options.onError?.(error);
387
+ });
388
+ };
389
+ const stop = () => {
390
+ if (interval) {
391
+ clearInterval(interval);
392
+ interval = null;
393
+ }
394
+ };
395
+ return {
396
+ close: stop,
397
+ reportOnce,
398
+ start: () => {
399
+ if (interval) {
400
+ return;
401
+ }
402
+ run();
403
+ interval = setInterval(run, options.intervalMs ?? DEFAULT_BROWSER_MEDIA_INTERVAL_MS);
404
+ },
405
+ stop
406
+ };
407
+ };
408
+
245
409
  // src/client/connection.ts
246
410
  var WS_OPEN = 1;
247
411
  var WS_CLOSED = 3;
@@ -674,12 +838,18 @@ var createVoiceStreamStore = () => {
674
838
  var createVoiceStream = (path, options = {}) => {
675
839
  const connection = createVoiceConnection(path, options);
676
840
  const store = createVoiceStreamStore();
841
+ const browserMediaReporter = options.browserMedia && typeof window !== "undefined" ? createVoiceBrowserMediaReporter({
842
+ ...options.browserMedia,
843
+ getScenarioId: () => options.browserMedia ? options.browserMedia.getScenarioId?.() ?? connection.getScenarioId() : connection.getScenarioId(),
844
+ getSessionId: () => options.browserMedia ? options.browserMedia.getSessionId?.() ?? connection.getSessionId() : connection.getSessionId()
845
+ }) : null;
677
846
  const subscribers = new Set;
678
847
  const start = (input) => Promise.resolve().then(() => {
679
848
  if (!input?.sessionId && !input?.scenarioId) {
680
849
  return;
681
850
  }
682
851
  connection.start(input);
852
+ browserMediaReporter?.start();
683
853
  });
684
854
  const notify = () => {
685
855
  subscribers.forEach((subscriber) => subscriber());
@@ -721,6 +891,7 @@ var createVoiceStream = (path, options = {}) => {
721
891
  },
722
892
  close() {
723
893
  unsubscribeConnection();
894
+ browserMediaReporter?.close();
724
895
  connection.close();
725
896
  store.dispatch({ type: "disconnected" });
726
897
  notify();
@@ -1733,7 +1904,7 @@ var DEFAULT_GUIDED_PROMPTS = [
1733
1904
  "Now describe what you are trying to do or test.",
1734
1905
  "Finish with any detail that feels blocked, risky, or unclear."
1735
1906
  ];
1736
- var clamp = (value, min, max) => Math.min(max, Math.max(min, value));
1907
+ var clamp = (value, min, max2) => Math.min(max2, Math.max(min, value));
1737
1908
  var escapeHtml = (value) => value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll('"', "&quot;").replaceAll("'", "&#39;");
1738
1909
  var readErrorField = (value, key) => {
1739
1910
  const candidate = value[key];
@@ -1,6 +1,8 @@
1
1
  export { createVoiceConnection } from './connection';
2
2
  export { createVoiceAudioPlayer, decodeVoiceAudioChunk } from './audioPlayer';
3
3
  export { createVoiceStream } from './createVoiceStream';
4
+ export { createVoiceBrowserMediaReporter } from './browserMedia';
5
+ export type { VoiceBrowserMediaReporter } from './browserMedia';
4
6
  export { createVoiceController } from './controller';
5
7
  export { bindVoiceBargeIn, createVoiceDuplexController } from './duplex';
6
8
  export { bindVoiceHTMX } from './htmx';
@@ -780,6 +780,412 @@ var serverMessageToAction = (message) => {
780
780
  }
781
781
  };
782
782
 
783
+ // node_modules/@absolutejs/media/dist/index.js
784
+ var formatLabel = (format) => `${format.container}/${format.encoding}/${String(format.sampleRateHz)}hz/${String(format.channels)}ch`;
785
+ var formatMatches = (actual, expected) => actual.container === expected.container && actual.encoding === expected.encoding && actual.sampleRateHz === expected.sampleRateHz && actual.channels === expected.channels;
786
+ var pushIssue = (issues, severity, code, message) => {
787
+ issues.push({ code, message, severity });
788
+ };
789
+ var numericMetadata = (frame, key) => {
790
+ const value = frame.metadata?.[key];
791
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
792
+ };
793
+ var average = (values) => values.length === 0 ? undefined : values.reduce((total, value) => total + value, 0) / values.length;
794
+ var max = (values) => values.length === 0 ? undefined : Math.max(...values);
795
+ var min = (values) => values.length === 0 ? undefined : Math.min(...values);
796
+ var numericStat = (stat, key) => {
797
+ const value = stat[key];
798
+ return typeof value === "number" && Number.isFinite(value) ? value : undefined;
799
+ };
800
+ var booleanStat = (stat, key) => {
801
+ const value = stat[key];
802
+ return typeof value === "boolean" ? value : undefined;
803
+ };
804
+ var stringStat = (stat, key) => {
805
+ const value = stat[key];
806
+ return typeof value === "string" ? value : undefined;
807
+ };
808
+ var secondsToMs = (value) => value === undefined ? undefined : value * 1000;
809
+ var normalizeWebRTCStat = (stat) => {
810
+ const sample = {};
811
+ for (const [key, value] of Object.entries(stat)) {
812
+ if (value === null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") {
813
+ sample[key] = value;
814
+ }
815
+ }
816
+ return sample;
817
+ };
818
+ var buildMediaResamplingPlan = (input) => {
819
+ const required = !formatMatches(input.inputFormat, input.outputFormat);
820
+ return {
821
+ inputFormat: input.inputFormat,
822
+ outputFormat: input.outputFormat,
823
+ ratio: input.outputFormat.sampleRateHz / input.inputFormat.sampleRateHz,
824
+ required,
825
+ status: input.inputFormat.container === input.outputFormat.container && input.inputFormat.encoding === input.outputFormat.encoding && input.inputFormat.channels === input.outputFormat.channels ? "pass" : "warn"
826
+ };
827
+ };
828
+ var speechProbability = (frame) => {
829
+ if (frame.metadata?.isSpeech === true) {
830
+ return 1;
831
+ }
832
+ if (frame.metadata?.isSpeech === false) {
833
+ return 0;
834
+ }
835
+ for (const key of ["speechProbability", "voiceProbability", "rms", "energy"]) {
836
+ const value = numericMetadata(frame, key);
837
+ if (value !== undefined) {
838
+ return value;
839
+ }
840
+ }
841
+ return 0;
842
+ };
843
+ var buildMediaVadReport = (input = {}) => {
844
+ const frames = (input.frames ?? []).filter((frame) => frame.kind === "input-audio");
845
+ const speechStartThreshold = input.speechStartThreshold ?? 0.6;
846
+ const speechEndThreshold = input.speechEndThreshold ?? 0.35;
847
+ const minSpeechFrames = input.minSpeechFrames ?? 1;
848
+ const maxSilenceFrames = input.maxSilenceFrames ?? 1;
849
+ const segments = [];
850
+ let activeFrames = [];
851
+ let silenceFrames = 0;
852
+ const closeSegment = () => {
853
+ if (activeFrames.length < minSpeechFrames) {
854
+ activeFrames = [];
855
+ silenceFrames = 0;
856
+ return;
857
+ }
858
+ const first = activeFrames[0];
859
+ const last = activeFrames.at(-1);
860
+ if (!first) {
861
+ return;
862
+ }
863
+ segments.push({
864
+ durationMs: first.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined,
865
+ endAt: last?.at !== undefined ? last.at + (last.durationMs ?? 0) : undefined,
866
+ frameCount: activeFrames.length,
867
+ segmentId: `vad:${String(segments.length + 1)}`,
868
+ sessionId: first.sessionId,
869
+ startAt: first.at,
870
+ turnId: first.turnId
871
+ });
872
+ activeFrames = [];
873
+ silenceFrames = 0;
874
+ };
875
+ for (const frame of frames) {
876
+ const probability = speechProbability(frame);
877
+ if (activeFrames.length === 0) {
878
+ if (probability >= speechStartThreshold) {
879
+ activeFrames.push(frame);
880
+ }
881
+ continue;
882
+ }
883
+ activeFrames.push(frame);
884
+ if (probability <= speechEndThreshold) {
885
+ silenceFrames += 1;
886
+ } else {
887
+ silenceFrames = 0;
888
+ }
889
+ if (silenceFrames > maxSilenceFrames) {
890
+ closeSegment();
891
+ }
892
+ }
893
+ closeSegment();
894
+ return {
895
+ checkedAt: Date.now(),
896
+ inputAudioFrames: frames.length,
897
+ segments,
898
+ status: frames.length === 0 ? "warn" : "pass"
899
+ };
900
+ };
901
+ var buildMediaInterruptionReport = (input = {}) => {
902
+ const issues = [];
903
+ const interruptionFrames = (input.frames ?? []).filter((frame) => frame.kind === "interruption");
904
+ const latenciesMs = interruptionFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
905
+ const maxInterruptionLatencyMs = input.maxInterruptionLatencyMs;
906
+ if (interruptionFrames.length === 0) {
907
+ pushIssue(issues, "warning", "media.interruption_missing", "No interruption frame was observed.");
908
+ }
909
+ if (maxInterruptionLatencyMs !== undefined && latenciesMs.some((latency) => latency > maxInterruptionLatencyMs)) {
910
+ pushIssue(issues, "error", "media.interruption_latency", `Interruption latency exceeded ${String(maxInterruptionLatencyMs)}ms.`);
911
+ }
912
+ return {
913
+ checkedAt: Date.now(),
914
+ interruptionFrames: interruptionFrames.length,
915
+ issues,
916
+ latenciesMs,
917
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass"
918
+ };
919
+ };
920
+ var buildMediaQualityReport = (input = {}) => {
921
+ const frames = [...input.frames ?? []].sort((a, b) => (a.at ?? 0) - (b.at ?? 0));
922
+ const audioFrames = frames.filter((frame) => frame.kind === "input-audio" || frame.kind === "assistant-audio");
923
+ const inputAudioFrames = frames.filter((frame) => frame.kind === "input-audio");
924
+ const assistantAudioFrames = frames.filter((frame) => frame.kind === "assistant-audio");
925
+ const issues = [];
926
+ const gapsMs = [];
927
+ for (const [index, frame] of audioFrames.entries()) {
928
+ const previous = audioFrames[index - 1];
929
+ if (previous?.at === undefined || frame.at === undefined || previous.durationMs === undefined) {
930
+ continue;
931
+ }
932
+ const gap = frame.at - (previous.at + previous.durationMs);
933
+ if (gap > 0) {
934
+ gapsMs.push(gap);
935
+ }
936
+ }
937
+ const jitterMs = audioFrames.map((frame) => numericMetadata(frame, "jitterMs")).filter((value) => value !== undefined).at(-1) ?? max(gapsMs);
938
+ const first = audioFrames.find((frame) => frame.at !== undefined);
939
+ const last = audioFrames.toReversed().find((frame) => frame.at !== undefined);
940
+ const durationMs = first?.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined;
941
+ const expectedDurationMs = audioFrames.length > 0 ? audioFrames.reduce((total, frame) => total + (frame.durationMs ?? 0), 0) : undefined;
942
+ const timestampDriftMs = durationMs !== undefined && expectedDurationMs !== undefined ? Math.max(0, durationMs - expectedDurationMs) : undefined;
943
+ const speechScores = inputAudioFrames.map(speechProbability);
944
+ const speechFrames = speechScores.filter((score) => score >= 0.6).length;
945
+ const silenceFrames = speechScores.filter((score) => score <= 0.35).length;
946
+ const unknownSpeechFrames = Math.max(0, inputAudioFrames.length - speechFrames - silenceFrames);
947
+ const speechRatio = inputAudioFrames.length === 0 ? 0 : speechFrames / inputAudioFrames.length;
948
+ const silenceRatio = inputAudioFrames.length === 0 ? 0 : silenceFrames / inputAudioFrames.length;
949
+ const levels = audioFrames.map((frame) => numericMetadata(frame, "level") ?? numericMetadata(frame, "rms") ?? numericMetadata(frame, "energy")).filter((value) => value !== undefined);
950
+ const backpressureEvents = input.transport?.backpressureEvents ?? 0;
951
+ const maxGapMs = input.maxGapMs;
952
+ if (maxGapMs !== undefined && gapsMs.some((gap) => gap > maxGapMs)) {
953
+ pushIssue(issues, "warning", "media.quality_gap", `Observed media gap above ${String(maxGapMs)}ms.`);
954
+ }
955
+ if (input.maxJitterMs !== undefined && jitterMs !== undefined && jitterMs > input.maxJitterMs) {
956
+ pushIssue(issues, "warning", "media.quality_jitter", `Observed jitter ${String(jitterMs)}ms above ${String(input.maxJitterMs)}ms.`);
957
+ }
958
+ if (input.maxTimestampDriftMs !== undefined && timestampDriftMs !== undefined && timestampDriftMs > input.maxTimestampDriftMs) {
959
+ pushIssue(issues, "warning", "media.quality_timestamp_drift", `Observed timestamp drift ${String(timestampDriftMs)}ms above ${String(input.maxTimestampDriftMs)}ms.`);
960
+ }
961
+ if (input.minSpeechRatio !== undefined && inputAudioFrames.length > 0 && speechRatio < input.minSpeechRatio) {
962
+ pushIssue(issues, "warning", "media.quality_speech_ratio", `Observed speech ratio ${String(speechRatio)} below ${String(input.minSpeechRatio)}.`);
963
+ }
964
+ if (input.maxBackpressureEvents !== undefined && backpressureEvents > input.maxBackpressureEvents) {
965
+ pushIssue(issues, "warning", "media.quality_backpressure", `Observed ${String(backpressureEvents)} backpressure event(s), above ${String(input.maxBackpressureEvents)}.`);
966
+ }
967
+ return {
968
+ assistantAudioFrames: assistantAudioFrames.length,
969
+ backpressureEvents,
970
+ checkedAt: Date.now(),
971
+ durationMs,
972
+ gapCount: gapsMs.length,
973
+ gapsMs,
974
+ inputAudioFrames: inputAudioFrames.length,
975
+ issues,
976
+ jitterMs,
977
+ levelAverage: average(levels),
978
+ levelMax: max(levels),
979
+ levelMin: min(levels),
980
+ silenceFrames,
981
+ silenceRatio,
982
+ speechFrames,
983
+ speechRatio,
984
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
985
+ timestampDriftMs,
986
+ totalFrames: frames.length,
987
+ unknownSpeechFrames
988
+ };
989
+ };
990
+ var buildMediaWebRTCStatsReport = (input = {}) => {
991
+ const stats = input.stats ?? [];
992
+ const issues = [];
993
+ const inbound = stats.filter((stat) => stat.type === "inbound-rtp" && stringStat(stat, "kind") !== "video");
994
+ const outbound = stats.filter((stat) => stat.type === "outbound-rtp" && stringStat(stat, "kind") !== "video");
995
+ const candidatePairs = stats.filter((stat) => stat.type === "candidate-pair");
996
+ const audioTracks = stats.filter((stat) => (stat.type === "track" || stat.type === "media-source") && stringStat(stat, "kind") === "audio");
997
+ const activeCandidatePairs = candidatePairs.filter((stat) => booleanStat(stat, "selected") === true || booleanStat(stat, "nominated") === true || stringStat(stat, "state") === "succeeded").length;
998
+ const liveAudioTracks = audioTracks.filter((stat) => stringStat(stat, "readyState") !== "ended" && stringStat(stat, "trackState") !== "ended" && booleanStat(stat, "ended") !== true).length;
999
+ const endedAudioTracks = audioTracks.filter((stat) => stringStat(stat, "readyState") === "ended" || stringStat(stat, "trackState") === "ended" || booleanStat(stat, "ended") === true).length;
1000
+ const inboundPackets = inbound.reduce((total, stat) => total + (numericStat(stat, "packetsReceived") ?? 0), 0);
1001
+ const outboundPackets = outbound.reduce((total, stat) => total + (numericStat(stat, "packetsSent") ?? 0), 0);
1002
+ const packetsLost = [...inbound, ...outbound].reduce((total, stat) => total + Math.max(0, numericStat(stat, "packetsLost") ?? 0), 0);
1003
+ const packetLossDenominator = inboundPackets + packetsLost;
1004
+ const packetLossRatio = packetLossDenominator === 0 ? 0 : packetsLost / packetLossDenominator;
1005
+ const bytesReceived = inbound.reduce((total, stat) => total + (numericStat(stat, "bytesReceived") ?? 0), 0);
1006
+ const bytesSent = outbound.reduce((total, stat) => total + (numericStat(stat, "bytesSent") ?? 0), 0);
1007
+ const roundTripTimeMs = max(candidatePairs.map((stat) => secondsToMs(numericStat(stat, "currentRoundTripTime") ?? numericStat(stat, "roundTripTime"))).filter((value) => value !== undefined));
1008
+ const jitterMs = max([...inbound, ...outbound].map((stat) => secondsToMs(numericStat(stat, "jitter"))).filter((value) => value !== undefined));
1009
+ const jitterBufferDelayMs = max(inbound.map((stat) => {
1010
+ const delay = numericStat(stat, "jitterBufferDelay");
1011
+ const emitted = numericStat(stat, "jitterBufferEmittedCount");
1012
+ return delay !== undefined && emitted !== undefined && emitted > 0 ? delay / emitted * 1000 : undefined;
1013
+ }).filter((value) => value !== undefined));
1014
+ const audioLevels = audioTracks.map((stat) => numericStat(stat, "audioLevel")).filter((value) => value !== undefined);
1015
+ if (input.requireConnectedCandidatePair && candidatePairs.length > 0 && activeCandidatePairs === 0) {
1016
+ pushIssue(issues, "error", "media.webrtc_candidate_pair_missing", "No active WebRTC candidate pair was observed.");
1017
+ }
1018
+ if (input.requireLiveAudioTrack && liveAudioTracks === 0) {
1019
+ pushIssue(issues, "error", "media.webrtc_audio_track_missing", "No live WebRTC audio track was observed.");
1020
+ }
1021
+ if (input.maxPacketLossRatio !== undefined && packetLossRatio > input.maxPacketLossRatio) {
1022
+ pushIssue(issues, "warning", "media.webrtc_packet_loss", `Observed WebRTC packet loss ratio ${String(packetLossRatio)} above ${String(input.maxPacketLossRatio)}.`);
1023
+ }
1024
+ if (input.maxRoundTripTimeMs !== undefined && roundTripTimeMs !== undefined && roundTripTimeMs > input.maxRoundTripTimeMs) {
1025
+ pushIssue(issues, "warning", "media.webrtc_round_trip_time", `Observed WebRTC RTT ${String(roundTripTimeMs)}ms above ${String(input.maxRoundTripTimeMs)}ms.`);
1026
+ }
1027
+ if (input.maxJitterMs !== undefined && jitterMs !== undefined && jitterMs > input.maxJitterMs) {
1028
+ pushIssue(issues, "warning", "media.webrtc_jitter", `Observed WebRTC jitter ${String(jitterMs)}ms above ${String(input.maxJitterMs)}ms.`);
1029
+ }
1030
+ return {
1031
+ activeCandidatePairs,
1032
+ audioLevelAverage: average(audioLevels),
1033
+ bytesReceived,
1034
+ bytesSent,
1035
+ checkedAt: Date.now(),
1036
+ endedAudioTracks,
1037
+ inboundPackets,
1038
+ issues,
1039
+ jitterBufferDelayMs,
1040
+ jitterMs,
1041
+ liveAudioTracks,
1042
+ outboundPackets,
1043
+ packetLossRatio,
1044
+ packetsLost,
1045
+ roundTripTimeMs,
1046
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
1047
+ totalStats: stats.length
1048
+ };
1049
+ };
1050
+ var collectMediaWebRTCStats = async (input) => {
1051
+ const report = await input.peerConnection.getStats(input.selector ?? null);
1052
+ return [...report.values()].map(normalizeWebRTCStat);
1053
+ };
1054
+ var collectMediaWebRTCStatsReport = async (input) => {
1055
+ const stats = await collectMediaWebRTCStats(input);
1056
+ return buildMediaWebRTCStatsReport({
1057
+ ...input,
1058
+ stats
1059
+ });
1060
+ };
1061
+ var buildMediaPipelineCalibrationReport = (input = {}) => {
1062
+ const frames = input.frames ?? [];
1063
+ const issues = [];
1064
+ const inputFrames = frames.filter((frame) => frame.kind === "input-audio");
1065
+ const assistantFrames = frames.filter((frame) => frame.kind === "assistant-audio");
1066
+ const turnCommitFrames = frames.filter((frame) => frame.kind === "turn-commit");
1067
+ const interruptionFrameRecords = frames.filter((frame) => frame.kind === "interruption");
1068
+ const traceLinkedFrames = frames.filter((frame) => frame.traceEventId).length;
1069
+ const backpressureFrames = frames.filter((frame) => Boolean(frame.metadata?.backpressure)).length;
1070
+ const audioLatencies = assistantFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
1071
+ const firstAudioLatencyMs = audioLatencies.length > 0 ? Math.min(...audioLatencies) : undefined;
1072
+ const jitterValues = frames.map((frame) => numericMetadata(frame, "jitterMs")).filter((value) => value !== undefined);
1073
+ const jitterMs = jitterValues.length > 0 ? Math.max(...jitterValues) : undefined;
1074
+ const inputFormat = input.inputFormat ?? inputFrames.find((frame) => frame.format)?.format;
1075
+ const outputFormat = input.outputFormat ?? assistantFrames.find((frame) => frame.format)?.format;
1076
+ const resamplingRequired = Boolean(input.expectedInputFormat && inputFormat && inputFormat.sampleRateHz !== input.expectedInputFormat.sampleRateHz) || Boolean(input.expectedOutputFormat && outputFormat && outputFormat.sampleRateHz !== input.expectedOutputFormat.sampleRateHz);
1077
+ const resamplingTargetHz = resamplingRequired && input.expectedInputFormat ? input.expectedInputFormat.sampleRateHz : resamplingRequired ? input.expectedOutputFormat?.sampleRateHz : undefined;
1078
+ if (inputFrames.length === 0) {
1079
+ pushIssue(issues, "warning", "media.input_audio_missing", "No input audio frames were observed.");
1080
+ }
1081
+ if (assistantFrames.length === 0) {
1082
+ pushIssue(issues, "warning", "media.assistant_audio_missing", "No assistant audio frames were observed.");
1083
+ }
1084
+ if (input.expectedInputFormat && inputFormat && !formatMatches(inputFormat, input.expectedInputFormat)) {
1085
+ pushIssue(issues, inputFormat.sampleRateHz === input.expectedInputFormat.sampleRateHz ? "warning" : "error", "media.input_format_mismatch", `Input format ${formatLabel(inputFormat)} does not match expected ${formatLabel(input.expectedInputFormat)}.`);
1086
+ }
1087
+ if (input.expectedOutputFormat && outputFormat && !formatMatches(outputFormat, input.expectedOutputFormat)) {
1088
+ pushIssue(issues, outputFormat.sampleRateHz === input.expectedOutputFormat.sampleRateHz ? "warning" : "error", "media.output_format_mismatch", `Output format ${formatLabel(outputFormat)} does not match expected ${formatLabel(input.expectedOutputFormat)}.`);
1089
+ }
1090
+ if (firstAudioLatencyMs !== undefined && input.maxFirstAudioLatencyMs !== undefined && firstAudioLatencyMs > input.maxFirstAudioLatencyMs) {
1091
+ pushIssue(issues, "error", "media.first_audio_latency", `First audio latency ${String(firstAudioLatencyMs)}ms exceeds budget ${String(input.maxFirstAudioLatencyMs)}ms.`);
1092
+ }
1093
+ if (jitterMs !== undefined && input.maxJitterMs !== undefined && jitterMs > input.maxJitterMs) {
1094
+ pushIssue(issues, "warning", "media.jitter", `Media jitter ${String(jitterMs)}ms exceeds budget ${String(input.maxJitterMs)}ms.`);
1095
+ }
1096
+ if (input.maxBackpressureFrames !== undefined && backpressureFrames > input.maxBackpressureFrames) {
1097
+ pushIssue(issues, "warning", "media.backpressure", `Backpressure frame count ${String(backpressureFrames)} exceeds budget ${String(input.maxBackpressureFrames)}.`);
1098
+ }
1099
+ if (input.requireInterruptionFrame && interruptionFrameRecords.length === 0) {
1100
+ pushIssue(issues, "warning", "media.interruption_missing", "No interruption frame was observed.");
1101
+ }
1102
+ if (input.requireTraceEvidence && traceLinkedFrames === 0) {
1103
+ pushIssue(issues, "warning", "media.trace_evidence_missing", "No media frames were linked to trace evidence.");
1104
+ }
1105
+ return {
1106
+ assistantAudioFrames: assistantFrames.length,
1107
+ backpressureFrames,
1108
+ checkedAt: Date.now(),
1109
+ firstAudioLatencyMs,
1110
+ inputAudioFrames: inputFrames.length,
1111
+ inputFormat,
1112
+ interruptionFrames: interruptionFrameRecords.length,
1113
+ issues,
1114
+ jitterMs,
1115
+ outputFormat,
1116
+ resamplingRequired,
1117
+ resamplingTargetHz,
1118
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
1119
+ surface: input.surface ?? "media-pipeline",
1120
+ traceLinkedFrames,
1121
+ turnCommitFrames: turnCommitFrames.length
1122
+ };
1123
+ };
1124
+
1125
+ // src/client/browserMedia.ts
1126
+ var DEFAULT_BROWSER_MEDIA_PATH = "/api/voice/browser-media";
1127
+ var DEFAULT_BROWSER_MEDIA_INTERVAL_MS = 5000;
1128
+ var resolvePeerConnection = async (options) => options.peerConnection ?? await options.getPeerConnection?.() ?? null;
1129
+ var postBrowserMediaReport = async (payload, options) => {
1130
+ const requestFetch = options.fetch ?? globalThis.fetch;
1131
+ if (!requestFetch) {
1132
+ return;
1133
+ }
1134
+ await requestFetch(options.path ?? DEFAULT_BROWSER_MEDIA_PATH, {
1135
+ body: JSON.stringify(payload),
1136
+ headers: {
1137
+ "Content-Type": "application/json"
1138
+ },
1139
+ keepalive: true,
1140
+ method: "POST"
1141
+ });
1142
+ };
1143
+ var createVoiceBrowserMediaReporter = (options) => {
1144
+ let interval = null;
1145
+ const reportOnce = async () => {
1146
+ const peerConnection = await resolvePeerConnection(options);
1147
+ if (!peerConnection) {
1148
+ return;
1149
+ }
1150
+ const report = await collectMediaWebRTCStatsReport({
1151
+ ...options,
1152
+ peerConnection
1153
+ });
1154
+ const payload = {
1155
+ at: Date.now(),
1156
+ report,
1157
+ scenarioId: options.getScenarioId?.() ?? null,
1158
+ sessionId: options.getSessionId?.() ?? null
1159
+ };
1160
+ options.onReport?.(payload);
1161
+ await postBrowserMediaReport(payload, options);
1162
+ return payload;
1163
+ };
1164
+ const run = () => {
1165
+ reportOnce().catch((error) => {
1166
+ options.onError?.(error);
1167
+ });
1168
+ };
1169
+ const stop = () => {
1170
+ if (interval) {
1171
+ clearInterval(interval);
1172
+ interval = null;
1173
+ }
1174
+ };
1175
+ return {
1176
+ close: stop,
1177
+ reportOnce,
1178
+ start: () => {
1179
+ if (interval) {
1180
+ return;
1181
+ }
1182
+ run();
1183
+ interval = setInterval(run, options.intervalMs ?? DEFAULT_BROWSER_MEDIA_INTERVAL_MS);
1184
+ },
1185
+ stop
1186
+ };
1187
+ };
1188
+
783
1189
  // src/client/store.ts
784
1190
  var createInitialReconnectState = () => ({
785
1191
  attempts: 0,
@@ -948,12 +1354,18 @@ var createVoiceStreamStore = () => {
948
1354
  var createVoiceStream = (path, options = {}) => {
949
1355
  const connection = createVoiceConnection(path, options);
950
1356
  const store = createVoiceStreamStore();
1357
+ const browserMediaReporter = options.browserMedia && typeof window !== "undefined" ? createVoiceBrowserMediaReporter({
1358
+ ...options.browserMedia,
1359
+ getScenarioId: () => options.browserMedia ? options.browserMedia.getScenarioId?.() ?? connection.getScenarioId() : connection.getScenarioId(),
1360
+ getSessionId: () => options.browserMedia ? options.browserMedia.getSessionId?.() ?? connection.getSessionId() : connection.getSessionId()
1361
+ }) : null;
951
1362
  const subscribers = new Set;
952
1363
  const start = (input) => Promise.resolve().then(() => {
953
1364
  if (!input?.sessionId && !input?.scenarioId) {
954
1365
  return;
955
1366
  }
956
1367
  connection.start(input);
1368
+ browserMediaReporter?.start();
957
1369
  });
958
1370
  const notify = () => {
959
1371
  subscribers.forEach((subscriber) => subscriber());
@@ -995,6 +1407,7 @@ var createVoiceStream = (path, options = {}) => {
995
1407
  },
996
1408
  close() {
997
1409
  unsubscribeConnection();
1410
+ browserMediaReporter?.close();
998
1411
  connection.close();
999
1412
  store.dispatch({ type: "disconnected" });
1000
1413
  notify();
@@ -6943,6 +7356,7 @@ export {
6943
7356
  createVoiceController,
6944
7357
  createVoiceConnection,
6945
7358
  createVoiceCampaignDialerProofStore,
7359
+ createVoiceBrowserMediaReporter,
6946
7360
  createVoiceBargeInMonitor,
6947
7361
  createVoiceAudioPlayer,
6948
7362
  createVoiceAgentSquadStatusViewModel,
package/dist/index.d.ts CHANGED
@@ -16,7 +16,9 @@ export { assertVoiceRealtimeProviderContractEvidence, buildVoiceRealtimeProvider
16
16
  export type { VoiceRealtimeProviderContractAssertionInput, VoiceRealtimeProviderContractAssertionReport, VoiceRealtimeProviderContractCapability, VoiceRealtimeProviderContractCheck, VoiceRealtimeProviderContractDefinition, VoiceRealtimeProviderContractMatrixPresetOptions, VoiceRealtimeProviderContractMatrixInput, VoiceRealtimeProviderContractMatrixReport, VoiceRealtimeProviderContractRoutesOptions, VoiceRealtimeProviderContractRow, VoiceRealtimeProviderPresetProvider, VoiceRealtimeProviderContractStatus } from './realtimeProviderContracts';
17
17
  export { buildVoiceDiagnosticsMarkdown, createVoiceDiagnosticsRoutes, resolveVoiceDiagnosticsTraceFilter } from './diagnosticsRoutes';
18
18
  export { assertVoiceMediaPipelineEvidence, buildVoiceMediaPipelineReport, createVoiceMediaPipelineRoutes, evaluateVoiceMediaPipelineEvidence, renderVoiceMediaPipelineHTML, renderVoiceMediaPipelineMarkdown } from './mediaPipelineRoutes';
19
+ export { createVoiceBrowserMediaRoutes, getLatestVoiceBrowserMediaReport, renderVoiceBrowserMediaHTML, summarizeVoiceBrowserMedia } from './browserMediaRoutes';
19
20
  export type { VoiceMediaPipelineAssertionInput, VoiceMediaPipelineAssertionReport, VoiceMediaPipelineReport, VoiceMediaPipelineReportOptions, VoiceMediaPipelineRoutesOptions } from './mediaPipelineRoutes';
21
+ export type { VoiceBrowserMediaReport, VoiceBrowserMediaRoutesOptions, VoiceBrowserMediaSample, VoiceBrowserMediaStatus } from './browserMediaRoutes';
20
22
  export { buildVoiceDemoReadyReport, createVoiceDemoReadyRoutes, renderVoiceDemoReadyHTML } from './demoReadyRoutes';
21
23
  export { buildVoiceDeliverySinkReport, createVoiceDeliverySinkDescriptor, createVoiceDeliverySinkPair, createVoiceDeliverySinkRoutes, createVoiceFileDeliverySink, createVoicePostgresDeliverySink, createVoiceS3DeliverySink, createVoiceSQLiteDeliverySink, createVoiceWebhookDeliverySink, renderVoiceDeliverySinkHTML } from './deliverySinkRoutes';
22
24
  export { buildVoiceOpsActionHistoryReport, createVoiceOpsActionAuditRoutes, recordVoiceOpsActionAudit, renderVoiceOpsActionHistoryHTML } from './opsActionAuditRoutes';