@absolutejs/voice 0.0.22-beta.314 → 0.0.22-beta.315

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -11622,6 +11622,9 @@ var numericMetadata = (frame, key) => {
11622
11622
  const value = frame.metadata?.[key];
11623
11623
  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
11624
11624
  };
11625
+ var average = (values) => values.length === 0 ? undefined : values.reduce((total, value) => total + value, 0) / values.length;
11626
+ var max = (values) => values.length === 0 ? undefined : Math.max(...values);
11627
+ var min = (values) => values.length === 0 ? undefined : Math.min(...values);
11625
11628
  var buildMediaResamplingPlan = (input) => {
11626
11629
  const required = !formatMatches2(input.inputFormat, input.outputFormat);
11627
11630
  return {
@@ -11724,6 +11727,76 @@ var buildMediaInterruptionReport = (input = {}) => {
11724
11727
  status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass"
11725
11728
  };
11726
11729
  };
11730
+ var buildMediaQualityReport = (input = {}) => {
11731
+ const frames = [...input.frames ?? []].sort((a, b) => (a.at ?? 0) - (b.at ?? 0));
11732
+ const audioFrames = frames.filter((frame) => frame.kind === "input-audio" || frame.kind === "assistant-audio");
11733
+ const inputAudioFrames = frames.filter((frame) => frame.kind === "input-audio");
11734
+ const assistantAudioFrames = frames.filter((frame) => frame.kind === "assistant-audio");
11735
+ const issues = [];
11736
+ const gapsMs = [];
11737
+ for (const [index, frame] of audioFrames.entries()) {
11738
+ const previous = audioFrames[index - 1];
11739
+ if (previous?.at === undefined || frame.at === undefined || previous.durationMs === undefined) {
11740
+ continue;
11741
+ }
11742
+ const gap = frame.at - (previous.at + previous.durationMs);
11743
+ if (gap > 0) {
11744
+ gapsMs.push(gap);
11745
+ }
11746
+ }
11747
+ const jitterMs = audioFrames.map((frame) => numericMetadata(frame, "jitterMs")).filter((value) => value !== undefined).at(-1) ?? max(gapsMs);
11748
+ const first = audioFrames.find((frame) => frame.at !== undefined);
11749
+ const last = audioFrames.toReversed().find((frame) => frame.at !== undefined);
11750
+ const durationMs = first?.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined;
11751
+ const expectedDurationMs = audioFrames.length > 0 ? audioFrames.reduce((total, frame) => total + (frame.durationMs ?? 0), 0) : undefined;
11752
+ const timestampDriftMs = durationMs !== undefined && expectedDurationMs !== undefined ? Math.max(0, durationMs - expectedDurationMs) : undefined;
11753
+ const speechScores = inputAudioFrames.map(speechProbability);
11754
+ const speechFrames = speechScores.filter((score) => score >= 0.6).length;
11755
+ const silenceFrames = speechScores.filter((score) => score <= 0.35).length;
11756
+ const unknownSpeechFrames = Math.max(0, inputAudioFrames.length - speechFrames - silenceFrames);
11757
+ const speechRatio = inputAudioFrames.length === 0 ? 0 : speechFrames / inputAudioFrames.length;
11758
+ const silenceRatio = inputAudioFrames.length === 0 ? 0 : silenceFrames / inputAudioFrames.length;
11759
+ const levels = audioFrames.map((frame) => numericMetadata(frame, "level") ?? numericMetadata(frame, "rms") ?? numericMetadata(frame, "energy")).filter((value) => value !== undefined);
11760
+ const backpressureEvents = input.transport?.backpressureEvents ?? 0;
11761
+ const maxGapMs = input.maxGapMs;
11762
+ if (maxGapMs !== undefined && gapsMs.some((gap) => gap > maxGapMs)) {
11763
+ pushIssue(issues, "warning", "media.quality_gap", `Observed media gap above ${String(maxGapMs)}ms.`);
11764
+ }
11765
+ if (input.maxJitterMs !== undefined && jitterMs !== undefined && jitterMs > input.maxJitterMs) {
11766
+ pushIssue(issues, "warning", "media.quality_jitter", `Observed jitter ${String(jitterMs)}ms above ${String(input.maxJitterMs)}ms.`);
11767
+ }
11768
+ if (input.maxTimestampDriftMs !== undefined && timestampDriftMs !== undefined && timestampDriftMs > input.maxTimestampDriftMs) {
11769
+ pushIssue(issues, "warning", "media.quality_timestamp_drift", `Observed timestamp drift ${String(timestampDriftMs)}ms above ${String(input.maxTimestampDriftMs)}ms.`);
11770
+ }
11771
+ if (input.minSpeechRatio !== undefined && inputAudioFrames.length > 0 && speechRatio < input.minSpeechRatio) {
11772
+ pushIssue(issues, "warning", "media.quality_speech_ratio", `Observed speech ratio ${String(speechRatio)} below ${String(input.minSpeechRatio)}.`);
11773
+ }
11774
+ if (input.maxBackpressureEvents !== undefined && backpressureEvents > input.maxBackpressureEvents) {
11775
+ pushIssue(issues, "warning", "media.quality_backpressure", `Observed ${String(backpressureEvents)} backpressure event(s), above ${String(input.maxBackpressureEvents)}.`);
11776
+ }
11777
+ return {
11778
+ assistantAudioFrames: assistantAudioFrames.length,
11779
+ backpressureEvents,
11780
+ checkedAt: Date.now(),
11781
+ durationMs,
11782
+ gapCount: gapsMs.length,
11783
+ gapsMs,
11784
+ inputAudioFrames: inputAudioFrames.length,
11785
+ issues,
11786
+ jitterMs,
11787
+ levelAverage: average(levels),
11788
+ levelMax: max(levels),
11789
+ levelMin: min(levels),
11790
+ silenceFrames,
11791
+ silenceRatio,
11792
+ speechFrames,
11793
+ speechRatio,
11794
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
11795
+ timestampDriftMs,
11796
+ totalFrames: frames.length,
11797
+ unknownSpeechFrames
11798
+ };
11799
+ };
11727
11800
  var buildMediaPipelineCalibrationReport = (input = {}) => {
11728
11801
  const frames = input.frames ?? [];
11729
11802
  const issues = [];
@@ -11810,6 +11883,15 @@ var buildVoiceMediaPipelineReport = (options = {}) => {
11810
11883
  frames,
11811
11884
  maxInterruptionLatencyMs: options.maxInterruptionLatencyMs
11812
11885
  });
11886
+ const quality = buildMediaQualityReport({
11887
+ frames,
11888
+ maxBackpressureEvents: options.maxMediaBackpressureEvents,
11889
+ maxGapMs: options.maxMediaGapMs,
11890
+ maxJitterMs: options.maxMediaJitterMs,
11891
+ maxTimestampDriftMs: options.maxMediaTimestampDriftMs,
11892
+ minSpeechRatio: options.minMediaSpeechRatio,
11893
+ transport: options.transport
11894
+ });
11813
11895
  const resampling = calibration.inputFormat && calibration.outputFormat ? buildMediaResamplingPlan({
11814
11896
  inputFormat: calibration.inputFormat,
11815
11897
  outputFormat: calibration.outputFormat
@@ -11818,6 +11900,7 @@ var buildVoiceMediaPipelineReport = (options = {}) => {
11818
11900
  calibration.status,
11819
11901
  vad.status,
11820
11902
  interruption.status,
11903
+ quality.status,
11821
11904
  resampling?.status ?? "pass",
11822
11905
  options.processorGraph?.status ?? "pass",
11823
11906
  options.transport?.status ?? "pass"
@@ -11829,6 +11912,7 @@ var buildVoiceMediaPipelineReport = (options = {}) => {
11829
11912
  interruption,
11830
11913
  ok: status === "pass",
11831
11914
  processorGraph: options.processorGraph,
11915
+ quality,
11832
11916
  resampling,
11833
11917
  status,
11834
11918
  surface: options.surface ?? "voice-media-pipeline",
@@ -11865,6 +11949,25 @@ var evaluateVoiceMediaPipelineEvidence = (report, input = {}) => {
11865
11949
  if (input.requireResamplingReady && report.calibration.resamplingRequired && !report.resampling) {
11866
11950
  issues.push("Expected resampling plan when calibration requires resampling.");
11867
11951
  }
11952
+ if (input.requireQualityPass && report.quality.status !== "pass") {
11953
+ issues.push(`Expected media quality proof to pass, found ${report.quality.status}.`);
11954
+ }
11955
+ const maxMediaGapMs = input.maxMediaGapMs;
11956
+ if (maxMediaGapMs !== undefined && report.quality.gapsMs.some((gap) => gap > maxMediaGapMs)) {
11957
+ issues.push(`Expected media gaps at or below ${String(maxMediaGapMs)}ms.`);
11958
+ }
11959
+ if (input.maxMediaJitterMs !== undefined && report.quality.jitterMs !== undefined && report.quality.jitterMs > input.maxMediaJitterMs) {
11960
+ issues.push(`Expected media jitter at or below ${String(input.maxMediaJitterMs)}ms, found ${String(report.quality.jitterMs)}ms.`);
11961
+ }
11962
+ if (input.maxMediaTimestampDriftMs !== undefined && report.quality.timestampDriftMs !== undefined && report.quality.timestampDriftMs > input.maxMediaTimestampDriftMs) {
11963
+ issues.push(`Expected media timestamp drift at or below ${String(input.maxMediaTimestampDriftMs)}ms, found ${String(report.quality.timestampDriftMs)}ms.`);
11964
+ }
11965
+ if (input.minMediaSpeechRatio !== undefined && report.quality.speechRatio < input.minMediaSpeechRatio) {
11966
+ issues.push(`Expected media speech ratio at or above ${String(input.minMediaSpeechRatio)}, found ${String(report.quality.speechRatio)}.`);
11967
+ }
11968
+ if (input.maxMediaBackpressureEvents !== undefined && report.quality.backpressureEvents > input.maxMediaBackpressureEvents) {
11969
+ issues.push(`Expected at most ${String(input.maxMediaBackpressureEvents)} media backpressure event(s), found ${String(report.quality.backpressureEvents)}.`);
11970
+ }
11868
11971
  if (input.requireProcessorGraph && !report.processorGraph) {
11869
11972
  issues.push("Expected media processor graph evidence.");
11870
11973
  }
@@ -11913,6 +12016,10 @@ var renderVoiceMediaPipelineMarkdown = (report) => [
11913
12016
  `- Resampling required: ${report.calibration.resamplingRequired ? "yes" : "no"}`,
11914
12017
  `- VAD segments: ${String(report.vad.segments.length)}`,
11915
12018
  `- Interruption frames: ${String(report.interruption.interruptionFrames)}`,
12019
+ `- Media quality: ${report.quality.status}`,
12020
+ `- Media quality gaps: ${String(report.quality.gapCount)}`,
12021
+ `- Media quality jitter: ${String(report.quality.jitterMs ?? "n/a")}ms`,
12022
+ `- Media quality speech ratio: ${String(report.quality.speechRatio)}`,
11916
12023
  `- Processor graph: ${report.processorGraph ? `${report.processorGraph.name} (${String(report.processorGraph.nodes.length)} nodes)` : "n/a"}`,
11917
12024
  `- Processor graph emitted frames: ${String(report.processorGraph?.emittedFrames ?? 0)}`,
11918
12025
  `- Processor graph dropped frames: ${String(report.processorGraph?.droppedFrames ?? 0)}`,
@@ -11925,13 +12032,18 @@ var renderVoiceMediaPipelineMarkdown = (report) => [
11925
12032
  "",
11926
12033
  ...[
11927
12034
  ...report.calibration.issues,
11928
- ...report.interruption.issues
12035
+ ...report.interruption.issues,
12036
+ ...report.quality.issues
11929
12037
  ].map((issue) => `- ${issue.severity.toUpperCase()} ${issue.code}: ${issue.message}`),
11930
- ...report.calibration.issues.length + report.interruption.issues.length === 0 ? ["- None"] : []
12038
+ ...report.calibration.issues.length + report.interruption.issues.length + report.quality.issues.length === 0 ? ["- None"] : []
11931
12039
  ].join(`
11932
12040
  `);
11933
12041
  var renderVoiceMediaPipelineHTML = (report, title = "Voice Media Pipeline Proof") => {
11934
- const issues = [...report.calibration.issues, ...report.interruption.issues].map((issue) => `<li class="${escapeHtml15(issue.severity)}"><strong>${escapeHtml15(issue.code)}</strong>: ${escapeHtml15(issue.message)}</li>`).join("");
12042
+ const issues = [
12043
+ ...report.calibration.issues,
12044
+ ...report.interruption.issues,
12045
+ ...report.quality.issues
12046
+ ].map((issue) => `<li class="${escapeHtml15(issue.severity)}"><strong>${escapeHtml15(issue.code)}</strong>: ${escapeHtml15(issue.message)}</li>`).join("");
11935
12047
  const segments = report.vad.segments.map((segment) => `<tr><td>${escapeHtml15(segment.segmentId)}</td><td>${escapeHtml15(segment.frameCount)}</td><td>${escapeHtml15(segment.durationMs ?? "n/a")}</td><td>${escapeHtml15(segment.turnId ?? "n/a")}</td></tr>`).join("");
11936
12048
  return `<!doctype html><html lang="en"><head><meta charset="utf-8" /><meta name="viewport" content="width=device-width,initial-scale=1" /><title>${escapeHtml15(title)}</title><style>body{background:#101418;color:#f7f3e8;font-family:ui-sans-serif,system-ui,sans-serif;margin:0}main{margin:auto;max-width:1100px;padding:32px}.hero,.card{background:#17201d;border:1px solid #2e3d36;border-radius:24px;margin-bottom:16px;padding:22px}.hero{background:linear-gradient(135deg,rgba(20,184,166,.18),rgba(245,158,11,.12))}.eyebrow{color:#5eead4;font-weight:900;letter-spacing:.1em;text-transform:uppercase}h1{font-size:clamp(2.3rem,6vw,4.8rem);letter-spacing:-.06em;line-height:.9;margin:.2rem 0 1rem}.summary{display:grid;gap:12px;grid-template-columns:repeat(auto-fit,minmax(170px,1fr))}.metric{background:#101814;border:1px solid #2e3d36;border-radius:18px;padding:14px}.metric span{color:#a8b5ad;display:block;font-size:.78rem;text-transform:uppercase}.metric strong{display:block;font-size:1.65rem;margin-top:5px}.status{border:1px solid #64748b;border-radius:999px;display:inline-flex;font-weight:900;padding:7px 11px}.pass{color:#86efac}.warn,.warning{color:#fde68a}.fail,.error{color:#fecaca}table{border-collapse:collapse;width:100%}td,th{border-bottom:1px solid #2e3d36;padding:10px;text-align:left}</style></head><body><main><section class="hero"><p class="eyebrow">Native media pipeline</p><h1>${escapeHtml15(title)}</h1><p class="status ${escapeHtml15(report.status)}">${escapeHtml15(report.status)}</p><p>${escapeHtml15(report.surface)}</p><section class="summary"><div class="metric"><span>Frames</span><strong>${String(report.frames)}</strong></div><div class="metric"><span>Input audio</span><strong>${String(report.calibration.inputAudioFrames)}</strong></div><div class="metric"><span>Assistant audio</span><strong>${String(report.calibration.assistantAudioFrames)}</strong></div><div class="metric"><span>Trace linked</span><strong>${String(report.calibration.traceLinkedFrames)}</strong></div><div class="metric"><span>First audio</span><strong>${escapeHtml15(report.calibration.firstAudioLatencyMs ?? "n/a")}ms</strong></div><div class="metric"><span>VAD segments</span><strong>${String(report.vad.segments.length)}</strong></div><div class="metric"><span>Interruptions</span><strong>${String(report.interruption.interruptionFrames)}</strong></div><div class="metric"><span>Processor graph</span><strong>${String(report.processorGraph?.nodes.length ?? 0)} nodes</strong></div><div class="metric"><span>Graph out/drop</span><strong>${String(report.processorGraph?.emittedFrames ?? 0)}/${String(report.processorGraph?.droppedFrames ?? 0)}</strong></div><div class="metric"><span>Resampling</span><strong>${report.calibration.resamplingRequired ? "required" : "not required"}</strong></div><div class="metric"><span>Transport</span><strong>${escapeHtml15(report.transport?.state ?? "n/a")}</strong></div><div class="metric"><span>Transport in/out</span><strong>${String(report.transport?.inputFrames ?? 0)}/${String(report.transport?.outputFrames ?? 0)}</strong></div><div class="metric"><span>Backpressure</span><strong>${String(report.transport?.backpressureEvents ?? 0)}</strong></div></section></section><section class="card"><h2>Issues</h2><ul>${issues || '<li class="pass">No media pipeline issues.</li>'}</ul></section><section class="card"><h2>VAD Segments</h2><table><thead><tr><th>Segment</th><th>Frames</th><th>Duration ms</th><th>Turn</th></tr></thead><tbody>${segments || '<tr><td colspan="4">No VAD segments.</td></tr>'}</tbody></table></section></main></body></html>`;
11937
12049
  };
@@ -19265,7 +19377,7 @@ var percentile4 = (values, percentileValue) => {
19265
19377
  const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(percentileValue / 100 * sorted.length) - 1));
19266
19378
  return Math.round(sorted[index] ?? 0);
19267
19379
  };
19268
- var average = (values) => values.length === 0 ? undefined : Math.round(values.reduce((total, value) => total + value, 0) / values.length);
19380
+ var average2 = (values) => values.length === 0 ? undefined : Math.round(values.reduce((total, value) => total + value, 0) / values.length);
19269
19381
  var resolveBudget = (stage, options) => ({
19270
19382
  failAfterMs: options.budgets?.[stage]?.failAfterMs ?? options.failAfterMs ?? DEFAULT_FAIL_AFTER_MS2,
19271
19383
  warnAfterMs: options.budgets?.[stage]?.warnAfterMs ?? options.warnAfterMs ?? DEFAULT_WARN_AFTER_MS2
@@ -19459,7 +19571,7 @@ var summarizeStage = (stage, measurements, options) => {
19459
19571
  const failed = stageMeasurements.filter((measurement) => measurement.status === "fail").length;
19460
19572
  const warnings = stageMeasurements.filter((measurement) => measurement.status === "warn").length;
19461
19573
  return {
19462
- averageMs: average(latencies),
19574
+ averageMs: average2(latencies),
19463
19575
  budget: resolveBudget(stage, options),
19464
19576
  failed,
19465
19577
  label: STAGE_LABELS[stage],
@@ -31283,7 +31395,7 @@ var statusRank6 = {
31283
31395
  warn: 1,
31284
31396
  fail: 2
31285
31397
  };
31286
- var statusExceeds2 = (actual, max) => statusRank6[actual] > statusRank6[max];
31398
+ var statusExceeds2 = (actual, max2) => statusRank6[actual] > statusRank6[max2];
31287
31399
  var buildVoiceProviderContractMatrix = (input) => {
31288
31400
  const rows = input.contracts.map((contract) => {
31289
31401
  const configured = contract.configured !== false;
@@ -1,9 +1,14 @@
1
1
  import { Elysia } from 'elysia';
2
- import { type MediaFrame, type MediaInterruptionReport, type MediaPipelineCalibrationInput, type MediaPipelineCalibrationReport, type MediaPipelineStatus, type MediaProcessorGraphReport, type MediaResamplingPlan, type MediaTransportReport, type MediaVadReport } from '@absolutejs/media';
2
+ import { type MediaFrame, type MediaInterruptionReport, type MediaPipelineCalibrationInput, type MediaPipelineCalibrationReport, type MediaPipelineStatus, type MediaProcessorGraphReport, type MediaQualityReport, type MediaResamplingPlan, type MediaTransportReport, type MediaVadReport } from '@absolutejs/media';
3
3
  export type VoiceMediaPipelineReportOptions = MediaPipelineCalibrationInput & {
4
4
  frames?: readonly MediaFrame[];
5
5
  maxInterruptionLatencyMs?: number;
6
+ maxMediaBackpressureEvents?: number;
7
+ maxMediaGapMs?: number;
8
+ maxMediaJitterMs?: number;
9
+ maxMediaTimestampDriftMs?: number;
6
10
  maxSilenceFrames?: number;
11
+ minMediaSpeechRatio?: number;
7
12
  minSpeechFrames?: number;
8
13
  processorGraph?: MediaProcessorGraphReport;
9
14
  speechEndThreshold?: number;
@@ -16,6 +21,7 @@ export type VoiceMediaPipelineReport = {
16
21
  frames: number;
17
22
  interruption: MediaInterruptionReport;
18
23
  ok: boolean;
24
+ quality: MediaQualityReport;
19
25
  resampling?: MediaResamplingPlan;
20
26
  processorGraph?: MediaProcessorGraphReport;
21
27
  status: MediaPipelineStatus;
@@ -26,8 +32,13 @@ export type VoiceMediaPipelineReport = {
26
32
  export type VoiceMediaPipelineAssertionInput = {
27
33
  maxFirstAudioLatencyMs?: number;
28
34
  maxInterruptionLatencyMs?: number;
35
+ maxMediaBackpressureEvents?: number;
36
+ maxMediaGapMs?: number;
37
+ maxMediaJitterMs?: number;
38
+ maxMediaTimestampDriftMs?: number;
29
39
  minAssistantAudioFrames?: number;
30
40
  minInputAudioFrames?: number;
41
+ minMediaSpeechRatio?: number;
31
42
  minProcessorGraphEmittedFrames?: number;
32
43
  minProcessorGraphNodes?: number;
33
44
  minTransportInputFrames?: number;
@@ -38,6 +49,7 @@ export type VoiceMediaPipelineAssertionInput = {
38
49
  requireInterruptionFrame?: boolean;
39
50
  requirePass?: boolean;
40
51
  requireProcessorGraph?: boolean;
52
+ requireQualityPass?: boolean;
41
53
  requireResamplingReady?: boolean;
42
54
  requireTransportConnected?: boolean;
43
55
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.314",
3
+ "version": "0.0.22-beta.315",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",
@@ -246,7 +246,7 @@
246
246
  }
247
247
  },
248
248
  "dependencies": {
249
- "@absolutejs/media": "0.0.1-beta.1"
249
+ "@absolutejs/media": "0.0.1-beta.2"
250
250
  },
251
251
  "devDependencies": {
252
252
  "@absolutejs/absolute": "0.19.0-beta.646",