@absolutejs/voice 0.0.22-beta.319 → 0.0.22-beta.320
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/angular/index.js +413 -0
- package/dist/browserMediaRoutes.d.ts +61 -0
- package/dist/client/browserMedia.d.ts +8 -0
- package/dist/client/htmxBootstrap.js +172 -1
- package/dist/client/index.d.ts +2 -0
- package/dist/client/index.js +414 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +587 -404
- package/dist/react/index.js +413 -0
- package/dist/svelte/index.js +413 -0
- package/dist/testing/index.js +441 -28
- package/dist/trace.d.ts +1 -1
- package/dist/types.d.ts +19 -0
- package/dist/vue/index.js +413 -0
- package/package.json +1 -1
package/dist/testing/index.js
CHANGED
|
@@ -2159,6 +2159,412 @@ var serverMessageToAction = (message) => {
|
|
|
2159
2159
|
}
|
|
2160
2160
|
};
|
|
2161
2161
|
|
|
2162
|
+
// node_modules/@absolutejs/media/dist/index.js
|
|
2163
|
+
var formatLabel = (format) => `${format.container}/${format.encoding}/${String(format.sampleRateHz)}hz/${String(format.channels)}ch`;
|
|
2164
|
+
var formatMatches = (actual, expected) => actual.container === expected.container && actual.encoding === expected.encoding && actual.sampleRateHz === expected.sampleRateHz && actual.channels === expected.channels;
|
|
2165
|
+
var pushIssue = (issues, severity, code, message) => {
|
|
2166
|
+
issues.push({ code, message, severity });
|
|
2167
|
+
};
|
|
2168
|
+
var numericMetadata = (frame, key) => {
|
|
2169
|
+
const value = frame.metadata?.[key];
|
|
2170
|
+
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
|
2171
|
+
};
|
|
2172
|
+
var average3 = (values) => values.length === 0 ? undefined : values.reduce((total, value) => total + value, 0) / values.length;
|
|
2173
|
+
var max = (values) => values.length === 0 ? undefined : Math.max(...values);
|
|
2174
|
+
var min = (values) => values.length === 0 ? undefined : Math.min(...values);
|
|
2175
|
+
var numericStat = (stat, key) => {
|
|
2176
|
+
const value = stat[key];
|
|
2177
|
+
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
|
2178
|
+
};
|
|
2179
|
+
var booleanStat = (stat, key) => {
|
|
2180
|
+
const value = stat[key];
|
|
2181
|
+
return typeof value === "boolean" ? value : undefined;
|
|
2182
|
+
};
|
|
2183
|
+
var stringStat = (stat, key) => {
|
|
2184
|
+
const value = stat[key];
|
|
2185
|
+
return typeof value === "string" ? value : undefined;
|
|
2186
|
+
};
|
|
2187
|
+
var secondsToMs = (value) => value === undefined ? undefined : value * 1000;
|
|
2188
|
+
var normalizeWebRTCStat = (stat) => {
|
|
2189
|
+
const sample = {};
|
|
2190
|
+
for (const [key, value] of Object.entries(stat)) {
|
|
2191
|
+
if (value === null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") {
|
|
2192
|
+
sample[key] = value;
|
|
2193
|
+
}
|
|
2194
|
+
}
|
|
2195
|
+
return sample;
|
|
2196
|
+
};
|
|
2197
|
+
var buildMediaResamplingPlan = (input) => {
|
|
2198
|
+
const required = !formatMatches(input.inputFormat, input.outputFormat);
|
|
2199
|
+
return {
|
|
2200
|
+
inputFormat: input.inputFormat,
|
|
2201
|
+
outputFormat: input.outputFormat,
|
|
2202
|
+
ratio: input.outputFormat.sampleRateHz / input.inputFormat.sampleRateHz,
|
|
2203
|
+
required,
|
|
2204
|
+
status: input.inputFormat.container === input.outputFormat.container && input.inputFormat.encoding === input.outputFormat.encoding && input.inputFormat.channels === input.outputFormat.channels ? "pass" : "warn"
|
|
2205
|
+
};
|
|
2206
|
+
};
|
|
2207
|
+
var speechProbability = (frame) => {
|
|
2208
|
+
if (frame.metadata?.isSpeech === true) {
|
|
2209
|
+
return 1;
|
|
2210
|
+
}
|
|
2211
|
+
if (frame.metadata?.isSpeech === false) {
|
|
2212
|
+
return 0;
|
|
2213
|
+
}
|
|
2214
|
+
for (const key of ["speechProbability", "voiceProbability", "rms", "energy"]) {
|
|
2215
|
+
const value = numericMetadata(frame, key);
|
|
2216
|
+
if (value !== undefined) {
|
|
2217
|
+
return value;
|
|
2218
|
+
}
|
|
2219
|
+
}
|
|
2220
|
+
return 0;
|
|
2221
|
+
};
|
|
2222
|
+
var buildMediaVadReport = (input = {}) => {
|
|
2223
|
+
const frames = (input.frames ?? []).filter((frame) => frame.kind === "input-audio");
|
|
2224
|
+
const speechStartThreshold = input.speechStartThreshold ?? 0.6;
|
|
2225
|
+
const speechEndThreshold = input.speechEndThreshold ?? 0.35;
|
|
2226
|
+
const minSpeechFrames = input.minSpeechFrames ?? 1;
|
|
2227
|
+
const maxSilenceFrames = input.maxSilenceFrames ?? 1;
|
|
2228
|
+
const segments = [];
|
|
2229
|
+
let activeFrames = [];
|
|
2230
|
+
let silenceFrames = 0;
|
|
2231
|
+
const closeSegment = () => {
|
|
2232
|
+
if (activeFrames.length < minSpeechFrames) {
|
|
2233
|
+
activeFrames = [];
|
|
2234
|
+
silenceFrames = 0;
|
|
2235
|
+
return;
|
|
2236
|
+
}
|
|
2237
|
+
const first = activeFrames[0];
|
|
2238
|
+
const last = activeFrames.at(-1);
|
|
2239
|
+
if (!first) {
|
|
2240
|
+
return;
|
|
2241
|
+
}
|
|
2242
|
+
segments.push({
|
|
2243
|
+
durationMs: first.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined,
|
|
2244
|
+
endAt: last?.at !== undefined ? last.at + (last.durationMs ?? 0) : undefined,
|
|
2245
|
+
frameCount: activeFrames.length,
|
|
2246
|
+
segmentId: `vad:${String(segments.length + 1)}`,
|
|
2247
|
+
sessionId: first.sessionId,
|
|
2248
|
+
startAt: first.at,
|
|
2249
|
+
turnId: first.turnId
|
|
2250
|
+
});
|
|
2251
|
+
activeFrames = [];
|
|
2252
|
+
silenceFrames = 0;
|
|
2253
|
+
};
|
|
2254
|
+
for (const frame of frames) {
|
|
2255
|
+
const probability = speechProbability(frame);
|
|
2256
|
+
if (activeFrames.length === 0) {
|
|
2257
|
+
if (probability >= speechStartThreshold) {
|
|
2258
|
+
activeFrames.push(frame);
|
|
2259
|
+
}
|
|
2260
|
+
continue;
|
|
2261
|
+
}
|
|
2262
|
+
activeFrames.push(frame);
|
|
2263
|
+
if (probability <= speechEndThreshold) {
|
|
2264
|
+
silenceFrames += 1;
|
|
2265
|
+
} else {
|
|
2266
|
+
silenceFrames = 0;
|
|
2267
|
+
}
|
|
2268
|
+
if (silenceFrames > maxSilenceFrames) {
|
|
2269
|
+
closeSegment();
|
|
2270
|
+
}
|
|
2271
|
+
}
|
|
2272
|
+
closeSegment();
|
|
2273
|
+
return {
|
|
2274
|
+
checkedAt: Date.now(),
|
|
2275
|
+
inputAudioFrames: frames.length,
|
|
2276
|
+
segments,
|
|
2277
|
+
status: frames.length === 0 ? "warn" : "pass"
|
|
2278
|
+
};
|
|
2279
|
+
};
|
|
2280
|
+
var buildMediaInterruptionReport = (input = {}) => {
|
|
2281
|
+
const issues = [];
|
|
2282
|
+
const interruptionFrames = (input.frames ?? []).filter((frame) => frame.kind === "interruption");
|
|
2283
|
+
const latenciesMs = interruptionFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
|
|
2284
|
+
const maxInterruptionLatencyMs = input.maxInterruptionLatencyMs;
|
|
2285
|
+
if (interruptionFrames.length === 0) {
|
|
2286
|
+
pushIssue(issues, "warning", "media.interruption_missing", "No interruption frame was observed.");
|
|
2287
|
+
}
|
|
2288
|
+
if (maxInterruptionLatencyMs !== undefined && latenciesMs.some((latency) => latency > maxInterruptionLatencyMs)) {
|
|
2289
|
+
pushIssue(issues, "error", "media.interruption_latency", `Interruption latency exceeded ${String(maxInterruptionLatencyMs)}ms.`);
|
|
2290
|
+
}
|
|
2291
|
+
return {
|
|
2292
|
+
checkedAt: Date.now(),
|
|
2293
|
+
interruptionFrames: interruptionFrames.length,
|
|
2294
|
+
issues,
|
|
2295
|
+
latenciesMs,
|
|
2296
|
+
status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass"
|
|
2297
|
+
};
|
|
2298
|
+
};
|
|
2299
|
+
var buildMediaQualityReport = (input = {}) => {
|
|
2300
|
+
const frames = [...input.frames ?? []].sort((a, b) => (a.at ?? 0) - (b.at ?? 0));
|
|
2301
|
+
const audioFrames = frames.filter((frame) => frame.kind === "input-audio" || frame.kind === "assistant-audio");
|
|
2302
|
+
const inputAudioFrames = frames.filter((frame) => frame.kind === "input-audio");
|
|
2303
|
+
const assistantAudioFrames = frames.filter((frame) => frame.kind === "assistant-audio");
|
|
2304
|
+
const issues = [];
|
|
2305
|
+
const gapsMs = [];
|
|
2306
|
+
for (const [index, frame] of audioFrames.entries()) {
|
|
2307
|
+
const previous = audioFrames[index - 1];
|
|
2308
|
+
if (previous?.at === undefined || frame.at === undefined || previous.durationMs === undefined) {
|
|
2309
|
+
continue;
|
|
2310
|
+
}
|
|
2311
|
+
const gap = frame.at - (previous.at + previous.durationMs);
|
|
2312
|
+
if (gap > 0) {
|
|
2313
|
+
gapsMs.push(gap);
|
|
2314
|
+
}
|
|
2315
|
+
}
|
|
2316
|
+
const jitterMs = audioFrames.map((frame) => numericMetadata(frame, "jitterMs")).filter((value) => value !== undefined).at(-1) ?? max(gapsMs);
|
|
2317
|
+
const first = audioFrames.find((frame) => frame.at !== undefined);
|
|
2318
|
+
const last = audioFrames.toReversed().find((frame) => frame.at !== undefined);
|
|
2319
|
+
const durationMs = first?.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined;
|
|
2320
|
+
const expectedDurationMs = audioFrames.length > 0 ? audioFrames.reduce((total, frame) => total + (frame.durationMs ?? 0), 0) : undefined;
|
|
2321
|
+
const timestampDriftMs = durationMs !== undefined && expectedDurationMs !== undefined ? Math.max(0, durationMs - expectedDurationMs) : undefined;
|
|
2322
|
+
const speechScores = inputAudioFrames.map(speechProbability);
|
|
2323
|
+
const speechFrames = speechScores.filter((score) => score >= 0.6).length;
|
|
2324
|
+
const silenceFrames = speechScores.filter((score) => score <= 0.35).length;
|
|
2325
|
+
const unknownSpeechFrames = Math.max(0, inputAudioFrames.length - speechFrames - silenceFrames);
|
|
2326
|
+
const speechRatio = inputAudioFrames.length === 0 ? 0 : speechFrames / inputAudioFrames.length;
|
|
2327
|
+
const silenceRatio = inputAudioFrames.length === 0 ? 0 : silenceFrames / inputAudioFrames.length;
|
|
2328
|
+
const levels = audioFrames.map((frame) => numericMetadata(frame, "level") ?? numericMetadata(frame, "rms") ?? numericMetadata(frame, "energy")).filter((value) => value !== undefined);
|
|
2329
|
+
const backpressureEvents = input.transport?.backpressureEvents ?? 0;
|
|
2330
|
+
const maxGapMs = input.maxGapMs;
|
|
2331
|
+
if (maxGapMs !== undefined && gapsMs.some((gap) => gap > maxGapMs)) {
|
|
2332
|
+
pushIssue(issues, "warning", "media.quality_gap", `Observed media gap above ${String(maxGapMs)}ms.`);
|
|
2333
|
+
}
|
|
2334
|
+
if (input.maxJitterMs !== undefined && jitterMs !== undefined && jitterMs > input.maxJitterMs) {
|
|
2335
|
+
pushIssue(issues, "warning", "media.quality_jitter", `Observed jitter ${String(jitterMs)}ms above ${String(input.maxJitterMs)}ms.`);
|
|
2336
|
+
}
|
|
2337
|
+
if (input.maxTimestampDriftMs !== undefined && timestampDriftMs !== undefined && timestampDriftMs > input.maxTimestampDriftMs) {
|
|
2338
|
+
pushIssue(issues, "warning", "media.quality_timestamp_drift", `Observed timestamp drift ${String(timestampDriftMs)}ms above ${String(input.maxTimestampDriftMs)}ms.`);
|
|
2339
|
+
}
|
|
2340
|
+
if (input.minSpeechRatio !== undefined && inputAudioFrames.length > 0 && speechRatio < input.minSpeechRatio) {
|
|
2341
|
+
pushIssue(issues, "warning", "media.quality_speech_ratio", `Observed speech ratio ${String(speechRatio)} below ${String(input.minSpeechRatio)}.`);
|
|
2342
|
+
}
|
|
2343
|
+
if (input.maxBackpressureEvents !== undefined && backpressureEvents > input.maxBackpressureEvents) {
|
|
2344
|
+
pushIssue(issues, "warning", "media.quality_backpressure", `Observed ${String(backpressureEvents)} backpressure event(s), above ${String(input.maxBackpressureEvents)}.`);
|
|
2345
|
+
}
|
|
2346
|
+
return {
|
|
2347
|
+
assistantAudioFrames: assistantAudioFrames.length,
|
|
2348
|
+
backpressureEvents,
|
|
2349
|
+
checkedAt: Date.now(),
|
|
2350
|
+
durationMs,
|
|
2351
|
+
gapCount: gapsMs.length,
|
|
2352
|
+
gapsMs,
|
|
2353
|
+
inputAudioFrames: inputAudioFrames.length,
|
|
2354
|
+
issues,
|
|
2355
|
+
jitterMs,
|
|
2356
|
+
levelAverage: average3(levels),
|
|
2357
|
+
levelMax: max(levels),
|
|
2358
|
+
levelMin: min(levels),
|
|
2359
|
+
silenceFrames,
|
|
2360
|
+
silenceRatio,
|
|
2361
|
+
speechFrames,
|
|
2362
|
+
speechRatio,
|
|
2363
|
+
status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
|
|
2364
|
+
timestampDriftMs,
|
|
2365
|
+
totalFrames: frames.length,
|
|
2366
|
+
unknownSpeechFrames
|
|
2367
|
+
};
|
|
2368
|
+
};
|
|
2369
|
+
var buildMediaWebRTCStatsReport = (input = {}) => {
|
|
2370
|
+
const stats = input.stats ?? [];
|
|
2371
|
+
const issues = [];
|
|
2372
|
+
const inbound = stats.filter((stat) => stat.type === "inbound-rtp" && stringStat(stat, "kind") !== "video");
|
|
2373
|
+
const outbound = stats.filter((stat) => stat.type === "outbound-rtp" && stringStat(stat, "kind") !== "video");
|
|
2374
|
+
const candidatePairs = stats.filter((stat) => stat.type === "candidate-pair");
|
|
2375
|
+
const audioTracks = stats.filter((stat) => (stat.type === "track" || stat.type === "media-source") && stringStat(stat, "kind") === "audio");
|
|
2376
|
+
const activeCandidatePairs = candidatePairs.filter((stat) => booleanStat(stat, "selected") === true || booleanStat(stat, "nominated") === true || stringStat(stat, "state") === "succeeded").length;
|
|
2377
|
+
const liveAudioTracks = audioTracks.filter((stat) => stringStat(stat, "readyState") !== "ended" && stringStat(stat, "trackState") !== "ended" && booleanStat(stat, "ended") !== true).length;
|
|
2378
|
+
const endedAudioTracks = audioTracks.filter((stat) => stringStat(stat, "readyState") === "ended" || stringStat(stat, "trackState") === "ended" || booleanStat(stat, "ended") === true).length;
|
|
2379
|
+
const inboundPackets = inbound.reduce((total, stat) => total + (numericStat(stat, "packetsReceived") ?? 0), 0);
|
|
2380
|
+
const outboundPackets = outbound.reduce((total, stat) => total + (numericStat(stat, "packetsSent") ?? 0), 0);
|
|
2381
|
+
const packetsLost = [...inbound, ...outbound].reduce((total, stat) => total + Math.max(0, numericStat(stat, "packetsLost") ?? 0), 0);
|
|
2382
|
+
const packetLossDenominator = inboundPackets + packetsLost;
|
|
2383
|
+
const packetLossRatio = packetLossDenominator === 0 ? 0 : packetsLost / packetLossDenominator;
|
|
2384
|
+
const bytesReceived = inbound.reduce((total, stat) => total + (numericStat(stat, "bytesReceived") ?? 0), 0);
|
|
2385
|
+
const bytesSent = outbound.reduce((total, stat) => total + (numericStat(stat, "bytesSent") ?? 0), 0);
|
|
2386
|
+
const roundTripTimeMs = max(candidatePairs.map((stat) => secondsToMs(numericStat(stat, "currentRoundTripTime") ?? numericStat(stat, "roundTripTime"))).filter((value) => value !== undefined));
|
|
2387
|
+
const jitterMs = max([...inbound, ...outbound].map((stat) => secondsToMs(numericStat(stat, "jitter"))).filter((value) => value !== undefined));
|
|
2388
|
+
const jitterBufferDelayMs = max(inbound.map((stat) => {
|
|
2389
|
+
const delay = numericStat(stat, "jitterBufferDelay");
|
|
2390
|
+
const emitted = numericStat(stat, "jitterBufferEmittedCount");
|
|
2391
|
+
return delay !== undefined && emitted !== undefined && emitted > 0 ? delay / emitted * 1000 : undefined;
|
|
2392
|
+
}).filter((value) => value !== undefined));
|
|
2393
|
+
const audioLevels = audioTracks.map((stat) => numericStat(stat, "audioLevel")).filter((value) => value !== undefined);
|
|
2394
|
+
if (input.requireConnectedCandidatePair && candidatePairs.length > 0 && activeCandidatePairs === 0) {
|
|
2395
|
+
pushIssue(issues, "error", "media.webrtc_candidate_pair_missing", "No active WebRTC candidate pair was observed.");
|
|
2396
|
+
}
|
|
2397
|
+
if (input.requireLiveAudioTrack && liveAudioTracks === 0) {
|
|
2398
|
+
pushIssue(issues, "error", "media.webrtc_audio_track_missing", "No live WebRTC audio track was observed.");
|
|
2399
|
+
}
|
|
2400
|
+
if (input.maxPacketLossRatio !== undefined && packetLossRatio > input.maxPacketLossRatio) {
|
|
2401
|
+
pushIssue(issues, "warning", "media.webrtc_packet_loss", `Observed WebRTC packet loss ratio ${String(packetLossRatio)} above ${String(input.maxPacketLossRatio)}.`);
|
|
2402
|
+
}
|
|
2403
|
+
if (input.maxRoundTripTimeMs !== undefined && roundTripTimeMs !== undefined && roundTripTimeMs > input.maxRoundTripTimeMs) {
|
|
2404
|
+
pushIssue(issues, "warning", "media.webrtc_round_trip_time", `Observed WebRTC RTT ${String(roundTripTimeMs)}ms above ${String(input.maxRoundTripTimeMs)}ms.`);
|
|
2405
|
+
}
|
|
2406
|
+
if (input.maxJitterMs !== undefined && jitterMs !== undefined && jitterMs > input.maxJitterMs) {
|
|
2407
|
+
pushIssue(issues, "warning", "media.webrtc_jitter", `Observed WebRTC jitter ${String(jitterMs)}ms above ${String(input.maxJitterMs)}ms.`);
|
|
2408
|
+
}
|
|
2409
|
+
return {
|
|
2410
|
+
activeCandidatePairs,
|
|
2411
|
+
audioLevelAverage: average3(audioLevels),
|
|
2412
|
+
bytesReceived,
|
|
2413
|
+
bytesSent,
|
|
2414
|
+
checkedAt: Date.now(),
|
|
2415
|
+
endedAudioTracks,
|
|
2416
|
+
inboundPackets,
|
|
2417
|
+
issues,
|
|
2418
|
+
jitterBufferDelayMs,
|
|
2419
|
+
jitterMs,
|
|
2420
|
+
liveAudioTracks,
|
|
2421
|
+
outboundPackets,
|
|
2422
|
+
packetLossRatio,
|
|
2423
|
+
packetsLost,
|
|
2424
|
+
roundTripTimeMs,
|
|
2425
|
+
status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
|
|
2426
|
+
totalStats: stats.length
|
|
2427
|
+
};
|
|
2428
|
+
};
|
|
2429
|
+
var collectMediaWebRTCStats = async (input) => {
|
|
2430
|
+
const report = await input.peerConnection.getStats(input.selector ?? null);
|
|
2431
|
+
return [...report.values()].map(normalizeWebRTCStat);
|
|
2432
|
+
};
|
|
2433
|
+
var collectMediaWebRTCStatsReport = async (input) => {
|
|
2434
|
+
const stats = await collectMediaWebRTCStats(input);
|
|
2435
|
+
return buildMediaWebRTCStatsReport({
|
|
2436
|
+
...input,
|
|
2437
|
+
stats
|
|
2438
|
+
});
|
|
2439
|
+
};
|
|
2440
|
+
var buildMediaPipelineCalibrationReport = (input = {}) => {
|
|
2441
|
+
const frames = input.frames ?? [];
|
|
2442
|
+
const issues = [];
|
|
2443
|
+
const inputFrames = frames.filter((frame) => frame.kind === "input-audio");
|
|
2444
|
+
const assistantFrames = frames.filter((frame) => frame.kind === "assistant-audio");
|
|
2445
|
+
const turnCommitFrames = frames.filter((frame) => frame.kind === "turn-commit");
|
|
2446
|
+
const interruptionFrameRecords = frames.filter((frame) => frame.kind === "interruption");
|
|
2447
|
+
const traceLinkedFrames = frames.filter((frame) => frame.traceEventId).length;
|
|
2448
|
+
const backpressureFrames = frames.filter((frame) => Boolean(frame.metadata?.backpressure)).length;
|
|
2449
|
+
const audioLatencies = assistantFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
|
|
2450
|
+
const firstAudioLatencyMs = audioLatencies.length > 0 ? Math.min(...audioLatencies) : undefined;
|
|
2451
|
+
const jitterValues = frames.map((frame) => numericMetadata(frame, "jitterMs")).filter((value) => value !== undefined);
|
|
2452
|
+
const jitterMs = jitterValues.length > 0 ? Math.max(...jitterValues) : undefined;
|
|
2453
|
+
const inputFormat = input.inputFormat ?? inputFrames.find((frame) => frame.format)?.format;
|
|
2454
|
+
const outputFormat = input.outputFormat ?? assistantFrames.find((frame) => frame.format)?.format;
|
|
2455
|
+
const resamplingRequired = Boolean(input.expectedInputFormat && inputFormat && inputFormat.sampleRateHz !== input.expectedInputFormat.sampleRateHz) || Boolean(input.expectedOutputFormat && outputFormat && outputFormat.sampleRateHz !== input.expectedOutputFormat.sampleRateHz);
|
|
2456
|
+
const resamplingTargetHz = resamplingRequired && input.expectedInputFormat ? input.expectedInputFormat.sampleRateHz : resamplingRequired ? input.expectedOutputFormat?.sampleRateHz : undefined;
|
|
2457
|
+
if (inputFrames.length === 0) {
|
|
2458
|
+
pushIssue(issues, "warning", "media.input_audio_missing", "No input audio frames were observed.");
|
|
2459
|
+
}
|
|
2460
|
+
if (assistantFrames.length === 0) {
|
|
2461
|
+
pushIssue(issues, "warning", "media.assistant_audio_missing", "No assistant audio frames were observed.");
|
|
2462
|
+
}
|
|
2463
|
+
if (input.expectedInputFormat && inputFormat && !formatMatches(inputFormat, input.expectedInputFormat)) {
|
|
2464
|
+
pushIssue(issues, inputFormat.sampleRateHz === input.expectedInputFormat.sampleRateHz ? "warning" : "error", "media.input_format_mismatch", `Input format ${formatLabel(inputFormat)} does not match expected ${formatLabel(input.expectedInputFormat)}.`);
|
|
2465
|
+
}
|
|
2466
|
+
if (input.expectedOutputFormat && outputFormat && !formatMatches(outputFormat, input.expectedOutputFormat)) {
|
|
2467
|
+
pushIssue(issues, outputFormat.sampleRateHz === input.expectedOutputFormat.sampleRateHz ? "warning" : "error", "media.output_format_mismatch", `Output format ${formatLabel(outputFormat)} does not match expected ${formatLabel(input.expectedOutputFormat)}.`);
|
|
2468
|
+
}
|
|
2469
|
+
if (firstAudioLatencyMs !== undefined && input.maxFirstAudioLatencyMs !== undefined && firstAudioLatencyMs > input.maxFirstAudioLatencyMs) {
|
|
2470
|
+
pushIssue(issues, "error", "media.first_audio_latency", `First audio latency ${String(firstAudioLatencyMs)}ms exceeds budget ${String(input.maxFirstAudioLatencyMs)}ms.`);
|
|
2471
|
+
}
|
|
2472
|
+
if (jitterMs !== undefined && input.maxJitterMs !== undefined && jitterMs > input.maxJitterMs) {
|
|
2473
|
+
pushIssue(issues, "warning", "media.jitter", `Media jitter ${String(jitterMs)}ms exceeds budget ${String(input.maxJitterMs)}ms.`);
|
|
2474
|
+
}
|
|
2475
|
+
if (input.maxBackpressureFrames !== undefined && backpressureFrames > input.maxBackpressureFrames) {
|
|
2476
|
+
pushIssue(issues, "warning", "media.backpressure", `Backpressure frame count ${String(backpressureFrames)} exceeds budget ${String(input.maxBackpressureFrames)}.`);
|
|
2477
|
+
}
|
|
2478
|
+
if (input.requireInterruptionFrame && interruptionFrameRecords.length === 0) {
|
|
2479
|
+
pushIssue(issues, "warning", "media.interruption_missing", "No interruption frame was observed.");
|
|
2480
|
+
}
|
|
2481
|
+
if (input.requireTraceEvidence && traceLinkedFrames === 0) {
|
|
2482
|
+
pushIssue(issues, "warning", "media.trace_evidence_missing", "No media frames were linked to trace evidence.");
|
|
2483
|
+
}
|
|
2484
|
+
return {
|
|
2485
|
+
assistantAudioFrames: assistantFrames.length,
|
|
2486
|
+
backpressureFrames,
|
|
2487
|
+
checkedAt: Date.now(),
|
|
2488
|
+
firstAudioLatencyMs,
|
|
2489
|
+
inputAudioFrames: inputFrames.length,
|
|
2490
|
+
inputFormat,
|
|
2491
|
+
interruptionFrames: interruptionFrameRecords.length,
|
|
2492
|
+
issues,
|
|
2493
|
+
jitterMs,
|
|
2494
|
+
outputFormat,
|
|
2495
|
+
resamplingRequired,
|
|
2496
|
+
resamplingTargetHz,
|
|
2497
|
+
status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
|
|
2498
|
+
surface: input.surface ?? "media-pipeline",
|
|
2499
|
+
traceLinkedFrames,
|
|
2500
|
+
turnCommitFrames: turnCommitFrames.length
|
|
2501
|
+
};
|
|
2502
|
+
};
|
|
2503
|
+
|
|
2504
|
+
// src/client/browserMedia.ts
|
|
2505
|
+
var DEFAULT_BROWSER_MEDIA_PATH = "/api/voice/browser-media";
|
|
2506
|
+
var DEFAULT_BROWSER_MEDIA_INTERVAL_MS = 5000;
|
|
2507
|
+
var resolvePeerConnection = async (options) => options.peerConnection ?? await options.getPeerConnection?.() ?? null;
|
|
2508
|
+
var postBrowserMediaReport = async (payload, options) => {
|
|
2509
|
+
const requestFetch = options.fetch ?? globalThis.fetch;
|
|
2510
|
+
if (!requestFetch) {
|
|
2511
|
+
return;
|
|
2512
|
+
}
|
|
2513
|
+
await requestFetch(options.path ?? DEFAULT_BROWSER_MEDIA_PATH, {
|
|
2514
|
+
body: JSON.stringify(payload),
|
|
2515
|
+
headers: {
|
|
2516
|
+
"Content-Type": "application/json"
|
|
2517
|
+
},
|
|
2518
|
+
keepalive: true,
|
|
2519
|
+
method: "POST"
|
|
2520
|
+
});
|
|
2521
|
+
};
|
|
2522
|
+
var createVoiceBrowserMediaReporter = (options) => {
|
|
2523
|
+
let interval = null;
|
|
2524
|
+
const reportOnce = async () => {
|
|
2525
|
+
const peerConnection = await resolvePeerConnection(options);
|
|
2526
|
+
if (!peerConnection) {
|
|
2527
|
+
return;
|
|
2528
|
+
}
|
|
2529
|
+
const report = await collectMediaWebRTCStatsReport({
|
|
2530
|
+
...options,
|
|
2531
|
+
peerConnection
|
|
2532
|
+
});
|
|
2533
|
+
const payload = {
|
|
2534
|
+
at: Date.now(),
|
|
2535
|
+
report,
|
|
2536
|
+
scenarioId: options.getScenarioId?.() ?? null,
|
|
2537
|
+
sessionId: options.getSessionId?.() ?? null
|
|
2538
|
+
};
|
|
2539
|
+
options.onReport?.(payload);
|
|
2540
|
+
await postBrowserMediaReport(payload, options);
|
|
2541
|
+
return payload;
|
|
2542
|
+
};
|
|
2543
|
+
const run = () => {
|
|
2544
|
+
reportOnce().catch((error) => {
|
|
2545
|
+
options.onError?.(error);
|
|
2546
|
+
});
|
|
2547
|
+
};
|
|
2548
|
+
const stop = () => {
|
|
2549
|
+
if (interval) {
|
|
2550
|
+
clearInterval(interval);
|
|
2551
|
+
interval = null;
|
|
2552
|
+
}
|
|
2553
|
+
};
|
|
2554
|
+
return {
|
|
2555
|
+
close: stop,
|
|
2556
|
+
reportOnce,
|
|
2557
|
+
start: () => {
|
|
2558
|
+
if (interval) {
|
|
2559
|
+
return;
|
|
2560
|
+
}
|
|
2561
|
+
run();
|
|
2562
|
+
interval = setInterval(run, options.intervalMs ?? DEFAULT_BROWSER_MEDIA_INTERVAL_MS);
|
|
2563
|
+
},
|
|
2564
|
+
stop
|
|
2565
|
+
};
|
|
2566
|
+
};
|
|
2567
|
+
|
|
2162
2568
|
// src/client/connection.ts
|
|
2163
2569
|
var WS_OPEN = 1;
|
|
2164
2570
|
var WS_CLOSED = 3;
|
|
@@ -2591,12 +2997,18 @@ var createVoiceStreamStore = () => {
|
|
|
2591
2997
|
var createVoiceStream = (path, options = {}) => {
|
|
2592
2998
|
const connection = createVoiceConnection(path, options);
|
|
2593
2999
|
const store = createVoiceStreamStore();
|
|
3000
|
+
const browserMediaReporter = options.browserMedia && typeof window !== "undefined" ? createVoiceBrowserMediaReporter({
|
|
3001
|
+
...options.browserMedia,
|
|
3002
|
+
getScenarioId: () => options.browserMedia ? options.browserMedia.getScenarioId?.() ?? connection.getScenarioId() : connection.getScenarioId(),
|
|
3003
|
+
getSessionId: () => options.browserMedia ? options.browserMedia.getSessionId?.() ?? connection.getSessionId() : connection.getSessionId()
|
|
3004
|
+
}) : null;
|
|
2594
3005
|
const subscribers = new Set;
|
|
2595
3006
|
const start = (input) => Promise.resolve().then(() => {
|
|
2596
3007
|
if (!input?.sessionId && !input?.scenarioId) {
|
|
2597
3008
|
return;
|
|
2598
3009
|
}
|
|
2599
3010
|
connection.start(input);
|
|
3011
|
+
browserMediaReporter?.start();
|
|
2600
3012
|
});
|
|
2601
3013
|
const notify = () => {
|
|
2602
3014
|
subscribers.forEach((subscriber) => subscriber());
|
|
@@ -2638,6 +3050,7 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
2638
3050
|
},
|
|
2639
3051
|
close() {
|
|
2640
3052
|
unsubscribeConnection();
|
|
3053
|
+
browserMediaReporter?.close();
|
|
2641
3054
|
connection.close();
|
|
2642
3055
|
store.dispatch({ type: "disconnected" });
|
|
2643
3056
|
notify();
|
|
@@ -7806,16 +8219,16 @@ var renderVoiceCallReviewHTML = (artifact) => {
|
|
|
7806
8219
|
</html>`;
|
|
7807
8220
|
};
|
|
7808
8221
|
// src/testing/sessionBenchmark.ts
|
|
7809
|
-
var
|
|
8222
|
+
var average4 = (values) => values.length > 0 ? values.reduce((sum, value) => sum + value, 0) / values.length : 0;
|
|
7810
8223
|
var normalizeTurnText = (value) => value.toLowerCase().replace(/[^\p{L}\p{N}\s']/gu, " ").replace(/\s+/g, " ").trim();
|
|
7811
8224
|
var countPassedTurns = (turnResults) => turnResults.reduce((count, result) => count + (result.passes ? 1 : 0), 0);
|
|
7812
8225
|
var calculateTurnPassRate = (turnResults) => turnResults.length > 0 ? countPassedTurns(turnResults) / turnResults.length : 0;
|
|
7813
8226
|
var summarizeScenarioCosts = (turnResults) => {
|
|
7814
8227
|
const costEstimates = turnResults.map((turn) => turn.quality?.cost).filter((value) => value !== undefined);
|
|
7815
8228
|
return {
|
|
7816
|
-
averageRelativeCostUnits: roundMetric5(
|
|
7817
|
-
fallbackReplayAudioMs: roundMetric5(
|
|
7818
|
-
primaryAudioMs: roundMetric5(
|
|
8229
|
+
averageRelativeCostUnits: roundMetric5(average4(costEstimates.map((estimate) => estimate.estimatedRelativeCostUnits))),
|
|
8230
|
+
fallbackReplayAudioMs: roundMetric5(average4(costEstimates.map((estimate) => estimate.fallbackReplayAudioMs)), 2),
|
|
8231
|
+
primaryAudioMs: roundMetric5(average4(costEstimates.map((estimate) => estimate.primaryAudioMs)), 2)
|
|
7819
8232
|
};
|
|
7820
8233
|
};
|
|
7821
8234
|
var roundMetric5 = (value, digits = 4) => {
|
|
@@ -8134,13 +8547,13 @@ var summarizeVoiceSessionBenchmark = (adapterId, scenarios) => {
|
|
|
8134
8547
|
const turnAccuracies = scenarios.flatMap((scenario) => scenario.turnResults.map((turn) => turn.accuracy?.wordErrorRate).filter((value) => typeof value === "number"));
|
|
8135
8548
|
return {
|
|
8136
8549
|
adapterId,
|
|
8137
|
-
averageElapsedMs: roundMetric5(
|
|
8138
|
-
averageFallbackReplayAudioMs: roundMetric5(
|
|
8139
|
-
averagePrimaryAudioMs: roundMetric5(
|
|
8140
|
-
averageReconnectCount: roundMetric5(
|
|
8141
|
-
averageRelativeCostUnits: roundMetric5(
|
|
8142
|
-
averageTurnPassRate: roundMetric5(
|
|
8143
|
-
averageWordErrorRate: roundMetric5(
|
|
8550
|
+
averageElapsedMs: roundMetric5(average4(scenarios.map((scenario) => scenario.elapsedMs)), 2),
|
|
8551
|
+
averageFallbackReplayAudioMs: roundMetric5(average4(scenarios.map((scenario) => scenario.fallbackReplayAudioMs)), 2),
|
|
8552
|
+
averagePrimaryAudioMs: roundMetric5(average4(scenarios.map((scenario) => scenario.primaryAudioMs)), 2),
|
|
8553
|
+
averageReconnectCount: roundMetric5(average4(scenarios.map((scenario) => scenario.reconnectCount))),
|
|
8554
|
+
averageRelativeCostUnits: roundMetric5(average4(scenarios.map((scenario) => scenario.averageRelativeCostUnits))),
|
|
8555
|
+
averageTurnPassRate: roundMetric5(average4(scenarios.map((scenario) => scenario.turnPassRate))),
|
|
8556
|
+
averageWordErrorRate: roundMetric5(average4(turnAccuracies)),
|
|
8144
8557
|
duplicateTurnRate: roundMetric5(scenarios.length > 0 ? scenarios.filter((scenario) => scenario.duplicateTurnCount > 0).length / scenarios.length : 0),
|
|
8145
8558
|
passCount,
|
|
8146
8559
|
passRate: roundMetric5(scenarios.length > 0 ? passCount / scenarios.length : 0),
|
|
@@ -8166,13 +8579,13 @@ var summarizeVoiceSessionBenchmarkSeries = (input) => {
|
|
|
8166
8579
|
const passCount = results.filter((scenario) => scenario.passes).length;
|
|
8167
8580
|
const sample = results[0];
|
|
8168
8581
|
return {
|
|
8169
|
-
averageElapsedMs: roundMetric5(
|
|
8170
|
-
averageFallbackReplayAudioMs: roundMetric5(
|
|
8171
|
-
averagePrimaryAudioMs: roundMetric5(
|
|
8172
|
-
averageReconnectCount: roundMetric5(
|
|
8173
|
-
averageRelativeCostUnits: roundMetric5(
|
|
8174
|
-
averageTurnPassRate: roundMetric5(
|
|
8175
|
-
averageWordErrorRate: roundMetric5(
|
|
8582
|
+
averageElapsedMs: roundMetric5(average4(results.map((scenario) => scenario.elapsedMs)), 2),
|
|
8583
|
+
averageFallbackReplayAudioMs: roundMetric5(average4(results.map((scenario) => scenario.fallbackReplayAudioMs)), 2),
|
|
8584
|
+
averagePrimaryAudioMs: roundMetric5(average4(results.map((scenario) => scenario.primaryAudioMs)), 2),
|
|
8585
|
+
averageReconnectCount: roundMetric5(average4(results.map((scenario) => scenario.reconnectCount))),
|
|
8586
|
+
averageRelativeCostUnits: roundMetric5(average4(results.map((scenario) => scenario.averageRelativeCostUnits))),
|
|
8587
|
+
averageTurnPassRate: roundMetric5(average4(results.map((scenario) => scenario.turnPassRate))),
|
|
8588
|
+
averageWordErrorRate: roundMetric5(average4(wordErrorRates)),
|
|
8176
8589
|
bestWordErrorRate: roundMetric5(wordErrorRates.length > 0 ? Math.min(...wordErrorRates) : 0),
|
|
8177
8590
|
fixtureId,
|
|
8178
8591
|
passCount,
|
|
@@ -8195,18 +8608,18 @@ var summarizeVoiceSessionBenchmarkSeries = (input) => {
|
|
|
8195
8608
|
scenarios: scenarioAggregates,
|
|
8196
8609
|
summary: {
|
|
8197
8610
|
adapterId: input.adapterId,
|
|
8198
|
-
averageElapsedMs: roundMetric5(
|
|
8199
|
-
averageFallbackReplayAudioMs: roundMetric5(
|
|
8200
|
-
averagePassRate: roundMetric5(
|
|
8201
|
-
averagePrimaryAudioMs: roundMetric5(
|
|
8202
|
-
averageReconnectCount: roundMetric5(
|
|
8203
|
-
averageRelativeCostUnits: roundMetric5(
|
|
8204
|
-
averageTurnPassRate: roundMetric5(
|
|
8205
|
-
averageWordErrorRate: roundMetric5(
|
|
8611
|
+
averageElapsedMs: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageElapsedMs)), 2),
|
|
8612
|
+
averageFallbackReplayAudioMs: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageFallbackReplayAudioMs)), 2),
|
|
8613
|
+
averagePassRate: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.passRate))),
|
|
8614
|
+
averagePrimaryAudioMs: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averagePrimaryAudioMs)), 2),
|
|
8615
|
+
averageReconnectCount: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageReconnectCount))),
|
|
8616
|
+
averageRelativeCostUnits: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageRelativeCostUnits))),
|
|
8617
|
+
averageTurnPassRate: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageTurnPassRate))),
|
|
8618
|
+
averageWordErrorRate: roundMetric5(average4(scenarioAggregates.map((scenario) => scenario.averageWordErrorRate))),
|
|
8206
8619
|
flakyScenarioCount: scenarioAggregates.filter((scenario) => scenario.passRate > 0 && scenario.passRate < 1).length,
|
|
8207
8620
|
generatedRunCount: input.reports.length,
|
|
8208
|
-
reconnectCoverageRate: roundMetric5(
|
|
8209
|
-
reconnectSuccessRate: roundMetric5(
|
|
8621
|
+
reconnectCoverageRate: roundMetric5(average4(reconnectCoverageRates)),
|
|
8622
|
+
reconnectSuccessRate: roundMetric5(average4(reconnectRates)),
|
|
8210
8623
|
scenarioCount: scenarioAggregates.length,
|
|
8211
8624
|
stableScenarioCount: scenarioAggregates.filter((scenario) => scenario.passRate === 1).length,
|
|
8212
8625
|
totalPassCount,
|
package/dist/trace.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { S3Client, S3Options } from 'bun';
|
|
2
|
-
export type VoiceTraceEventType = 'assistant.guardrail' | 'assistant.memory' | 'assistant.run' | 'agent.context' | 'agent.handoff' | 'agent.model' | 'agent.result' | 'agent.tool' | 'call.handoff' | 'call.lifecycle' | 'client.barge_in' | 'client.live_latency' | 'client.reconnect' | 'operator.action' | 'provider.decision' | 'session.error' | 'turn.assistant' | 'turn.committed' | 'turn.cost' | 'turn_latency.stage' | 'turn.transcript' | 'workflow.contract';
|
|
2
|
+
export type VoiceTraceEventType = 'assistant.guardrail' | 'assistant.memory' | 'assistant.run' | 'agent.context' | 'agent.handoff' | 'agent.model' | 'agent.result' | 'agent.tool' | 'call.handoff' | 'call.lifecycle' | 'client.barge_in' | 'client.browser_media' | 'client.live_latency' | 'client.reconnect' | 'operator.action' | 'provider.decision' | 'session.error' | 'turn.assistant' | 'turn.committed' | 'turn.cost' | 'turn_latency.stage' | 'turn.transcript' | 'workflow.contract';
|
|
3
3
|
export type VoiceTraceEvent<TPayload extends Record<string, unknown> = Record<string, unknown>> = {
|
|
4
4
|
at: number;
|
|
5
5
|
id?: string;
|
package/dist/types.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { SessionStore } from '@absolutejs/absolute';
|
|
2
|
+
import type { MediaWebRTCStatsCollector, MediaWebRTCStatsReport, MediaWebRTCStatsReportInput } from '@absolutejs/media';
|
|
2
3
|
import type { VoiceOpsDispositionTaskPolicies, VoiceOpsTaskAssignmentRule, VoiceOpsTaskAssignmentRules, VoiceIntegrationWebhookConfig, StoredVoiceIntegrationEvent, StoredVoiceOpsTask, VoiceIntegrationEventStore, VoiceOpsTaskPolicy, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
|
|
3
4
|
import type { VoiceIntegrationSink } from './opsSinks';
|
|
4
5
|
import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
|
|
@@ -762,6 +763,7 @@ export type VoiceServerConnectionMessage = {
|
|
|
762
763
|
};
|
|
763
764
|
export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerReplayMessage<TResult> | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerAudioMessage | VoiceServerCallLifecycleMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage | VoiceServerConnectionMessage;
|
|
764
765
|
export type VoiceConnectionOptions = {
|
|
766
|
+
browserMedia?: false | VoiceBrowserMediaReporterOptions;
|
|
765
767
|
protocols?: string[];
|
|
766
768
|
scenarioId?: string;
|
|
767
769
|
reconnect?: boolean;
|
|
@@ -770,6 +772,23 @@ export type VoiceConnectionOptions = {
|
|
|
770
772
|
pingInterval?: number;
|
|
771
773
|
sessionId?: string;
|
|
772
774
|
};
|
|
775
|
+
export type VoiceBrowserMediaReportPayload = {
|
|
776
|
+
at: number;
|
|
777
|
+
report: MediaWebRTCStatsReport;
|
|
778
|
+
scenarioId?: string | null;
|
|
779
|
+
sessionId?: string | null;
|
|
780
|
+
};
|
|
781
|
+
export type VoiceBrowserMediaReporterOptions = Omit<MediaWebRTCStatsReportInput, 'peerConnection'> & {
|
|
782
|
+
fetch?: typeof fetch;
|
|
783
|
+
getPeerConnection?: (() => MediaWebRTCStatsCollector | null | undefined) | (() => Promise<MediaWebRTCStatsCollector | null | undefined>);
|
|
784
|
+
getScenarioId?: () => string | null | undefined;
|
|
785
|
+
getSessionId?: () => string | null | undefined;
|
|
786
|
+
intervalMs?: number;
|
|
787
|
+
onError?: (error: unknown) => void;
|
|
788
|
+
onReport?: (payload: VoiceBrowserMediaReportPayload) => void;
|
|
789
|
+
path?: string;
|
|
790
|
+
peerConnection?: MediaWebRTCStatsCollector;
|
|
791
|
+
};
|
|
773
792
|
export type VoiceCaptureOptions = {
|
|
774
793
|
channelCount?: 1 | 2;
|
|
775
794
|
onAudio?: (audio: Uint8Array | ArrayBuffer, sendAudio: (audio: Uint8Array | ArrayBuffer) => void) => void;
|