@absolutejs/voice 0.0.22-beta.313 → 0.0.22-beta.315
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +0 -2
- package/dist/index.js +132 -237
- package/dist/mediaPipelineRoutes.d.ts +25 -13
- package/package.json +4 -1
- package/dist/mediaPipeline.d.ts +0 -219
package/dist/index.d.ts
CHANGED
|
@@ -15,8 +15,6 @@ export type { VoiceRealtimeChannelAssertionInput, VoiceRealtimeChannelAssertionR
|
|
|
15
15
|
export { assertVoiceRealtimeProviderContractEvidence, buildVoiceRealtimeProviderContractMatrix, createVoiceRealtimeProviderContractMatrixPreset, createVoiceRealtimeProviderContractRoutes, evaluateVoiceRealtimeProviderContractEvidence, renderVoiceRealtimeProviderContractHTML } from './realtimeProviderContracts';
|
|
16
16
|
export type { VoiceRealtimeProviderContractAssertionInput, VoiceRealtimeProviderContractAssertionReport, VoiceRealtimeProviderContractCapability, VoiceRealtimeProviderContractCheck, VoiceRealtimeProviderContractDefinition, VoiceRealtimeProviderContractMatrixPresetOptions, VoiceRealtimeProviderContractMatrixInput, VoiceRealtimeProviderContractMatrixReport, VoiceRealtimeProviderContractRoutesOptions, VoiceRealtimeProviderContractRow, VoiceRealtimeProviderPresetProvider, VoiceRealtimeProviderContractStatus } from './realtimeProviderContracts';
|
|
17
17
|
export { buildVoiceDiagnosticsMarkdown, createVoiceDiagnosticsRoutes, resolveVoiceDiagnosticsTraceFilter } from './diagnosticsRoutes';
|
|
18
|
-
export { buildVoiceMediaTransportReport, buildVoiceMediaInterruptionReport, buildVoiceMediaPipelineCalibrationReport, buildVoiceMediaResamplingPlan, buildVoiceMediaProcessorGraphReport, buildVoiceMediaVadReport, createVoiceMediaFrame, createVoiceMediaFrameTransformPipeline, createVoiceMediaProcessorGraph, createVoiceMediaTransport } from './mediaPipeline';
|
|
19
|
-
export type { VoiceMediaFrame, VoiceMediaFrameKind, VoiceMediaFrameSource, VoiceMediaFrameTransform, VoiceMediaFrameTransformPipeline, VoiceMediaInterruptionInput, VoiceMediaInterruptionReport, VoiceMediaPipelineCalibrationInput, VoiceMediaPipelineCalibrationIssue, VoiceMediaPipelineCalibrationReport, VoiceMediaPipelineStatus, VoiceMediaProcessorGraph, VoiceMediaProcessorGraphReport, VoiceMediaProcessorNode, VoiceMediaProcessorNodeEvent, VoiceMediaProcessorNodeKind, VoiceMediaProcessorNodeReport, VoiceMediaResamplingPlan, VoiceMediaTransport, VoiceMediaTransportAdapter, VoiceMediaTransportEvent, VoiceMediaTransportEventKind, VoiceMediaTransportOptions, VoiceMediaTransportReport, VoiceMediaTransportState, VoiceMediaVadInput, VoiceMediaVadReport, VoiceMediaVadSegment } from './mediaPipeline';
|
|
20
18
|
export { assertVoiceMediaPipelineEvidence, buildVoiceMediaPipelineReport, createVoiceMediaPipelineRoutes, evaluateVoiceMediaPipelineEvidence, renderVoiceMediaPipelineHTML, renderVoiceMediaPipelineMarkdown } from './mediaPipelineRoutes';
|
|
21
19
|
export type { VoiceMediaPipelineAssertionInput, VoiceMediaPipelineAssertionReport, VoiceMediaPipelineReport, VoiceMediaPipelineReportOptions, VoiceMediaPipelineRoutesOptions } from './mediaPipelineRoutes';
|
|
22
20
|
export { buildVoiceDemoReadyReport, createVoiceDemoReadyRoutes, renderVoiceDemoReadyHTML } from './demoReadyRoutes';
|
package/dist/index.js
CHANGED
|
@@ -11609,7 +11609,10 @@ var createVoiceDiagnosticsRoutes = (options) => {
|
|
|
11609
11609
|
});
|
|
11610
11610
|
return routes;
|
|
11611
11611
|
};
|
|
11612
|
-
// src/
|
|
11612
|
+
// src/mediaPipelineRoutes.ts
|
|
11613
|
+
import { Elysia as Elysia12 } from "elysia";
|
|
11614
|
+
|
|
11615
|
+
// node_modules/@absolutejs/media/dist/index.js
|
|
11613
11616
|
var formatLabel2 = (format) => `${format.container}/${format.encoding}/${String(format.sampleRateHz)}hz/${String(format.channels)}ch`;
|
|
11614
11617
|
var formatMatches2 = (actual, expected) => actual.container === expected.container && actual.encoding === expected.encoding && actual.sampleRateHz === expected.sampleRateHz && actual.channels === expected.channels;
|
|
11615
11618
|
var pushIssue = (issues, severity, code, message) => {
|
|
@@ -11619,97 +11622,10 @@ var numericMetadata = (frame, key) => {
|
|
|
11619
11622
|
const value = frame.metadata?.[key];
|
|
11620
11623
|
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
|
11621
11624
|
};
|
|
11622
|
-
var
|
|
11623
|
-
var
|
|
11624
|
-
|
|
11625
|
-
|
|
11626
|
-
const backpressureEvents = events.filter((event) => event.kind === "backpressure").length;
|
|
11627
|
-
const failed = state === "failed" || events.some((event) => event.kind === "error");
|
|
11628
|
-
return {
|
|
11629
|
-
backpressureEvents,
|
|
11630
|
-
checkedAt: Date.now(),
|
|
11631
|
-
closed: state === "closed",
|
|
11632
|
-
connected: state === "open",
|
|
11633
|
-
events,
|
|
11634
|
-
failed,
|
|
11635
|
-
inputFrames: events.filter((event) => event.kind === "frame-in").length,
|
|
11636
|
-
name: input.name,
|
|
11637
|
-
outputFrames: events.filter((event) => event.kind === "frame-out").length,
|
|
11638
|
-
state,
|
|
11639
|
-
status: failed ? "fail" : backpressureEvents > 0 ? "warn" : "pass"
|
|
11640
|
-
};
|
|
11641
|
-
};
|
|
11642
|
-
var createVoiceMediaTransport = (options) => {
|
|
11643
|
-
let state = "idle";
|
|
11644
|
-
const events = [];
|
|
11645
|
-
const frameHandlers = new Set;
|
|
11646
|
-
const record = (event) => {
|
|
11647
|
-
events.push({ ...event, at: Date.now(), state });
|
|
11648
|
-
};
|
|
11649
|
-
return {
|
|
11650
|
-
close: async () => {
|
|
11651
|
-
state = "closing";
|
|
11652
|
-
await options.onClose?.();
|
|
11653
|
-
state = "closed";
|
|
11654
|
-
record({ kind: "close" });
|
|
11655
|
-
},
|
|
11656
|
-
connect: async () => {
|
|
11657
|
-
try {
|
|
11658
|
-
await options.onConnect?.();
|
|
11659
|
-
state = "open";
|
|
11660
|
-
record({ kind: "connect" });
|
|
11661
|
-
} catch (error) {
|
|
11662
|
-
state = "failed";
|
|
11663
|
-
record({
|
|
11664
|
-
error: error instanceof Error ? error.message : String(error),
|
|
11665
|
-
kind: "error"
|
|
11666
|
-
});
|
|
11667
|
-
throw error;
|
|
11668
|
-
}
|
|
11669
|
-
},
|
|
11670
|
-
events: () => [...events],
|
|
11671
|
-
inputFormat: options.inputFormat,
|
|
11672
|
-
name: options.name,
|
|
11673
|
-
onFrame: (handler) => {
|
|
11674
|
-
frameHandlers.add(handler);
|
|
11675
|
-
return () => frameHandlers.delete(handler);
|
|
11676
|
-
},
|
|
11677
|
-
outputFormat: options.outputFormat,
|
|
11678
|
-
receive: async (frame) => {
|
|
11679
|
-
record({ frameId: frame.id, kind: "frame-in" });
|
|
11680
|
-
if (options.maxBufferedFrames !== undefined && events.filter((event) => event.kind === "frame-in").length > options.maxBufferedFrames) {
|
|
11681
|
-
record({
|
|
11682
|
-
bufferedFrames: events.filter((event) => event.kind === "frame-in").length,
|
|
11683
|
-
kind: "backpressure"
|
|
11684
|
-
});
|
|
11685
|
-
}
|
|
11686
|
-
for (const handler of frameHandlers) {
|
|
11687
|
-
await handler(frame);
|
|
11688
|
-
}
|
|
11689
|
-
},
|
|
11690
|
-
report: () => buildVoiceMediaTransportReport({
|
|
11691
|
-
events,
|
|
11692
|
-
name: options.name,
|
|
11693
|
-
state
|
|
11694
|
-
}),
|
|
11695
|
-
send: async (frame) => {
|
|
11696
|
-
try {
|
|
11697
|
-
await options.onSend?.(frame);
|
|
11698
|
-
record({ frameId: frame.id, kind: "frame-out" });
|
|
11699
|
-
} catch (error) {
|
|
11700
|
-
state = "failed";
|
|
11701
|
-
record({
|
|
11702
|
-
error: error instanceof Error ? error.message : String(error),
|
|
11703
|
-
frameId: frame.id,
|
|
11704
|
-
kind: "error"
|
|
11705
|
-
});
|
|
11706
|
-
throw error;
|
|
11707
|
-
}
|
|
11708
|
-
},
|
|
11709
|
-
state: () => state
|
|
11710
|
-
};
|
|
11711
|
-
};
|
|
11712
|
-
var buildVoiceMediaResamplingPlan = (input) => {
|
|
11625
|
+
var average = (values) => values.length === 0 ? undefined : values.reduce((total, value) => total + value, 0) / values.length;
|
|
11626
|
+
var max = (values) => values.length === 0 ? undefined : Math.max(...values);
|
|
11627
|
+
var min = (values) => values.length === 0 ? undefined : Math.min(...values);
|
|
11628
|
+
var buildMediaResamplingPlan = (input) => {
|
|
11713
11629
|
const required = !formatMatches2(input.inputFormat, input.outputFormat);
|
|
11714
11630
|
return {
|
|
11715
11631
|
inputFormat: input.inputFormat,
|
|
@@ -11719,126 +11635,6 @@ var buildVoiceMediaResamplingPlan = (input) => {
|
|
|
11719
11635
|
status: input.inputFormat.container === input.outputFormat.container && input.inputFormat.encoding === input.outputFormat.encoding && input.inputFormat.channels === input.outputFormat.channels ? "pass" : "warn"
|
|
11720
11636
|
};
|
|
11721
11637
|
};
|
|
11722
|
-
var createVoiceMediaFrameTransformPipeline = (input = {}) => {
|
|
11723
|
-
const transforms = input.transforms ?? [];
|
|
11724
|
-
const push = async (frame) => {
|
|
11725
|
-
let frames = [frame];
|
|
11726
|
-
for (const transform of transforms) {
|
|
11727
|
-
const nextFrames = [];
|
|
11728
|
-
for (const current of frames) {
|
|
11729
|
-
const transformed = await transform.transform(current);
|
|
11730
|
-
if (transformed === undefined) {
|
|
11731
|
-
continue;
|
|
11732
|
-
}
|
|
11733
|
-
if (Array.isArray(transformed)) {
|
|
11734
|
-
nextFrames.push(...transformed);
|
|
11735
|
-
} else {
|
|
11736
|
-
nextFrames.push(transformed);
|
|
11737
|
-
}
|
|
11738
|
-
}
|
|
11739
|
-
frames = nextFrames;
|
|
11740
|
-
}
|
|
11741
|
-
return frames;
|
|
11742
|
-
};
|
|
11743
|
-
return {
|
|
11744
|
-
push,
|
|
11745
|
-
pushMany: async (frames) => {
|
|
11746
|
-
const output = [];
|
|
11747
|
-
for (const frame of frames) {
|
|
11748
|
-
output.push(...await push(frame));
|
|
11749
|
-
}
|
|
11750
|
-
return output;
|
|
11751
|
-
},
|
|
11752
|
-
transforms
|
|
11753
|
-
};
|
|
11754
|
-
};
|
|
11755
|
-
var normalizeProcessorResult = (frame, result) => {
|
|
11756
|
-
if (result === false || result === undefined) {
|
|
11757
|
-
return [];
|
|
11758
|
-
}
|
|
11759
|
-
if (result === true) {
|
|
11760
|
-
return [frame];
|
|
11761
|
-
}
|
|
11762
|
-
if (Array.isArray(result)) {
|
|
11763
|
-
return result;
|
|
11764
|
-
}
|
|
11765
|
-
return [result];
|
|
11766
|
-
};
|
|
11767
|
-
var buildVoiceMediaProcessorGraphReport = (input) => {
|
|
11768
|
-
const events = input.events ?? [];
|
|
11769
|
-
const nodes = input.nodes.map((node) => {
|
|
11770
|
-
const nodeEvents = events.filter((event) => event.node === node.name);
|
|
11771
|
-
const droppedFrames2 = nodeEvents.reduce((total, event) => total + event.dropped, 0);
|
|
11772
|
-
const emittedFrames2 = nodeEvents.reduce((total, event) => total + event.emitted, 0);
|
|
11773
|
-
const inputFrames2 = nodeEvents.reduce((total, event) => total + event.inputs, 0);
|
|
11774
|
-
return {
|
|
11775
|
-
droppedFrames: droppedFrames2,
|
|
11776
|
-
emittedFrames: emittedFrames2,
|
|
11777
|
-
events: nodeEvents,
|
|
11778
|
-
inputFrames: inputFrames2,
|
|
11779
|
-
kind: node.kind ?? "processor",
|
|
11780
|
-
name: node.name,
|
|
11781
|
-
status: inputFrames2 > 0 && emittedFrames2 === 0 && node.kind !== "sink" ? "warn" : "pass"
|
|
11782
|
-
};
|
|
11783
|
-
});
|
|
11784
|
-
const inputFrames = events.filter((event) => event.node === input.nodes[0]?.name).length;
|
|
11785
|
-
const droppedFrames = events.reduce((total, event) => total + event.dropped, 0);
|
|
11786
|
-
const emittedFrames = input.nodes.at(-1) ? events.filter((event) => event.node === input.nodes.at(-1)?.name).reduce((total, event) => total + event.emitted, 0) : 0;
|
|
11787
|
-
const status = nodes.some((node) => node.status === "warn") ? "warn" : "pass";
|
|
11788
|
-
return {
|
|
11789
|
-
checkedAt: Date.now(),
|
|
11790
|
-
droppedFrames,
|
|
11791
|
-
emittedFrames,
|
|
11792
|
-
events,
|
|
11793
|
-
inputFrames,
|
|
11794
|
-
name: input.name,
|
|
11795
|
-
nodes,
|
|
11796
|
-
status
|
|
11797
|
-
};
|
|
11798
|
-
};
|
|
11799
|
-
var createVoiceMediaProcessorGraph = (input = {}) => {
|
|
11800
|
-
const nodes = input.nodes ?? [];
|
|
11801
|
-
const events = [];
|
|
11802
|
-
const process = async (frame) => {
|
|
11803
|
-
let frames = [frame];
|
|
11804
|
-
for (const node of nodes) {
|
|
11805
|
-
const nextFrames = [];
|
|
11806
|
-
for (const current of frames) {
|
|
11807
|
-
const output = normalizeProcessorResult(current, await node.process(current));
|
|
11808
|
-
events.push({
|
|
11809
|
-
at: Date.now(),
|
|
11810
|
-
dropped: output.length === 0 ? 1 : 0,
|
|
11811
|
-
emitted: output.length,
|
|
11812
|
-
frameId: current.id,
|
|
11813
|
-
inputs: 1,
|
|
11814
|
-
node: node.name
|
|
11815
|
-
});
|
|
11816
|
-
nextFrames.push(...output);
|
|
11817
|
-
}
|
|
11818
|
-
frames = nextFrames;
|
|
11819
|
-
if (frames.length === 0) {
|
|
11820
|
-
break;
|
|
11821
|
-
}
|
|
11822
|
-
}
|
|
11823
|
-
return frames;
|
|
11824
|
-
};
|
|
11825
|
-
return {
|
|
11826
|
-
nodes,
|
|
11827
|
-
process,
|
|
11828
|
-
processMany: async (frames) => {
|
|
11829
|
-
const output = [];
|
|
11830
|
-
for (const frame of frames) {
|
|
11831
|
-
output.push(...await process(frame));
|
|
11832
|
-
}
|
|
11833
|
-
return output;
|
|
11834
|
-
},
|
|
11835
|
-
report: () => buildVoiceMediaProcessorGraphReport({
|
|
11836
|
-
events,
|
|
11837
|
-
name: input.name ?? "voice-media-processor-graph",
|
|
11838
|
-
nodes
|
|
11839
|
-
})
|
|
11840
|
-
};
|
|
11841
|
-
};
|
|
11842
11638
|
var speechProbability = (frame) => {
|
|
11843
11639
|
if (frame.metadata?.isSpeech === true) {
|
|
11844
11640
|
return 1;
|
|
@@ -11854,7 +11650,7 @@ var speechProbability = (frame) => {
|
|
|
11854
11650
|
}
|
|
11855
11651
|
return 0;
|
|
11856
11652
|
};
|
|
11857
|
-
var
|
|
11653
|
+
var buildMediaVadReport = (input = {}) => {
|
|
11858
11654
|
const frames = (input.frames ?? []).filter((frame) => frame.kind === "input-audio");
|
|
11859
11655
|
const speechStartThreshold = input.speechStartThreshold ?? 0.6;
|
|
11860
11656
|
const speechEndThreshold = input.speechEndThreshold ?? 0.35;
|
|
@@ -11912,7 +11708,7 @@ var buildVoiceMediaVadReport = (input = {}) => {
|
|
|
11912
11708
|
status: frames.length === 0 ? "warn" : "pass"
|
|
11913
11709
|
};
|
|
11914
11710
|
};
|
|
11915
|
-
var
|
|
11711
|
+
var buildMediaInterruptionReport = (input = {}) => {
|
|
11916
11712
|
const issues = [];
|
|
11917
11713
|
const interruptionFrames = (input.frames ?? []).filter((frame) => frame.kind === "interruption");
|
|
11918
11714
|
const latenciesMs = interruptionFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
|
|
@@ -11931,7 +11727,77 @@ var buildVoiceMediaInterruptionReport = (input = {}) => {
|
|
|
11931
11727
|
status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass"
|
|
11932
11728
|
};
|
|
11933
11729
|
};
|
|
11934
|
-
var
|
|
11730
|
+
var buildMediaQualityReport = (input = {}) => {
|
|
11731
|
+
const frames = [...input.frames ?? []].sort((a, b) => (a.at ?? 0) - (b.at ?? 0));
|
|
11732
|
+
const audioFrames = frames.filter((frame) => frame.kind === "input-audio" || frame.kind === "assistant-audio");
|
|
11733
|
+
const inputAudioFrames = frames.filter((frame) => frame.kind === "input-audio");
|
|
11734
|
+
const assistantAudioFrames = frames.filter((frame) => frame.kind === "assistant-audio");
|
|
11735
|
+
const issues = [];
|
|
11736
|
+
const gapsMs = [];
|
|
11737
|
+
for (const [index, frame] of audioFrames.entries()) {
|
|
11738
|
+
const previous = audioFrames[index - 1];
|
|
11739
|
+
if (previous?.at === undefined || frame.at === undefined || previous.durationMs === undefined) {
|
|
11740
|
+
continue;
|
|
11741
|
+
}
|
|
11742
|
+
const gap = frame.at - (previous.at + previous.durationMs);
|
|
11743
|
+
if (gap > 0) {
|
|
11744
|
+
gapsMs.push(gap);
|
|
11745
|
+
}
|
|
11746
|
+
}
|
|
11747
|
+
const jitterMs = audioFrames.map((frame) => numericMetadata(frame, "jitterMs")).filter((value) => value !== undefined).at(-1) ?? max(gapsMs);
|
|
11748
|
+
const first = audioFrames.find((frame) => frame.at !== undefined);
|
|
11749
|
+
const last = audioFrames.toReversed().find((frame) => frame.at !== undefined);
|
|
11750
|
+
const durationMs = first?.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined;
|
|
11751
|
+
const expectedDurationMs = audioFrames.length > 0 ? audioFrames.reduce((total, frame) => total + (frame.durationMs ?? 0), 0) : undefined;
|
|
11752
|
+
const timestampDriftMs = durationMs !== undefined && expectedDurationMs !== undefined ? Math.max(0, durationMs - expectedDurationMs) : undefined;
|
|
11753
|
+
const speechScores = inputAudioFrames.map(speechProbability);
|
|
11754
|
+
const speechFrames = speechScores.filter((score) => score >= 0.6).length;
|
|
11755
|
+
const silenceFrames = speechScores.filter((score) => score <= 0.35).length;
|
|
11756
|
+
const unknownSpeechFrames = Math.max(0, inputAudioFrames.length - speechFrames - silenceFrames);
|
|
11757
|
+
const speechRatio = inputAudioFrames.length === 0 ? 0 : speechFrames / inputAudioFrames.length;
|
|
11758
|
+
const silenceRatio = inputAudioFrames.length === 0 ? 0 : silenceFrames / inputAudioFrames.length;
|
|
11759
|
+
const levels = audioFrames.map((frame) => numericMetadata(frame, "level") ?? numericMetadata(frame, "rms") ?? numericMetadata(frame, "energy")).filter((value) => value !== undefined);
|
|
11760
|
+
const backpressureEvents = input.transport?.backpressureEvents ?? 0;
|
|
11761
|
+
const maxGapMs = input.maxGapMs;
|
|
11762
|
+
if (maxGapMs !== undefined && gapsMs.some((gap) => gap > maxGapMs)) {
|
|
11763
|
+
pushIssue(issues, "warning", "media.quality_gap", `Observed media gap above ${String(maxGapMs)}ms.`);
|
|
11764
|
+
}
|
|
11765
|
+
if (input.maxJitterMs !== undefined && jitterMs !== undefined && jitterMs > input.maxJitterMs) {
|
|
11766
|
+
pushIssue(issues, "warning", "media.quality_jitter", `Observed jitter ${String(jitterMs)}ms above ${String(input.maxJitterMs)}ms.`);
|
|
11767
|
+
}
|
|
11768
|
+
if (input.maxTimestampDriftMs !== undefined && timestampDriftMs !== undefined && timestampDriftMs > input.maxTimestampDriftMs) {
|
|
11769
|
+
pushIssue(issues, "warning", "media.quality_timestamp_drift", `Observed timestamp drift ${String(timestampDriftMs)}ms above ${String(input.maxTimestampDriftMs)}ms.`);
|
|
11770
|
+
}
|
|
11771
|
+
if (input.minSpeechRatio !== undefined && inputAudioFrames.length > 0 && speechRatio < input.minSpeechRatio) {
|
|
11772
|
+
pushIssue(issues, "warning", "media.quality_speech_ratio", `Observed speech ratio ${String(speechRatio)} below ${String(input.minSpeechRatio)}.`);
|
|
11773
|
+
}
|
|
11774
|
+
if (input.maxBackpressureEvents !== undefined && backpressureEvents > input.maxBackpressureEvents) {
|
|
11775
|
+
pushIssue(issues, "warning", "media.quality_backpressure", `Observed ${String(backpressureEvents)} backpressure event(s), above ${String(input.maxBackpressureEvents)}.`);
|
|
11776
|
+
}
|
|
11777
|
+
return {
|
|
11778
|
+
assistantAudioFrames: assistantAudioFrames.length,
|
|
11779
|
+
backpressureEvents,
|
|
11780
|
+
checkedAt: Date.now(),
|
|
11781
|
+
durationMs,
|
|
11782
|
+
gapCount: gapsMs.length,
|
|
11783
|
+
gapsMs,
|
|
11784
|
+
inputAudioFrames: inputAudioFrames.length,
|
|
11785
|
+
issues,
|
|
11786
|
+
jitterMs,
|
|
11787
|
+
levelAverage: average(levels),
|
|
11788
|
+
levelMax: max(levels),
|
|
11789
|
+
levelMin: min(levels),
|
|
11790
|
+
silenceFrames,
|
|
11791
|
+
silenceRatio,
|
|
11792
|
+
speechFrames,
|
|
11793
|
+
speechRatio,
|
|
11794
|
+
status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
|
|
11795
|
+
timestampDriftMs,
|
|
11796
|
+
totalFrames: frames.length,
|
|
11797
|
+
unknownSpeechFrames
|
|
11798
|
+
};
|
|
11799
|
+
};
|
|
11800
|
+
var buildMediaPipelineCalibrationReport = (input = {}) => {
|
|
11935
11801
|
const frames = input.frames ?? [];
|
|
11936
11802
|
const issues = [];
|
|
11937
11803
|
const inputFrames = frames.filter((frame) => frame.kind === "input-audio");
|
|
@@ -11989,13 +11855,13 @@ var buildVoiceMediaPipelineCalibrationReport = (input = {}) => {
|
|
|
11989
11855
|
resamplingRequired,
|
|
11990
11856
|
resamplingTargetHz,
|
|
11991
11857
|
status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass",
|
|
11992
|
-
surface: input.surface ?? "
|
|
11858
|
+
surface: input.surface ?? "media-pipeline",
|
|
11993
11859
|
traceLinkedFrames,
|
|
11994
11860
|
turnCommitFrames: turnCommitFrames.length
|
|
11995
11861
|
};
|
|
11996
11862
|
};
|
|
11863
|
+
|
|
11997
11864
|
// src/mediaPipelineRoutes.ts
|
|
11998
|
-
import { Elysia as Elysia12 } from "elysia";
|
|
11999
11865
|
var escapeHtml15 = (value) => String(value).replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """).replaceAll("'", "'");
|
|
12000
11866
|
var statusRank2 = {
|
|
12001
11867
|
pass: 0,
|
|
@@ -12005,19 +11871,28 @@ var statusRank2 = {
|
|
|
12005
11871
|
var worstStatus = (statuses) => statuses.reduce((worst, status) => statusRank2[status] > statusRank2[worst] ? status : worst, "pass");
|
|
12006
11872
|
var buildVoiceMediaPipelineReport = (options = {}) => {
|
|
12007
11873
|
const frames = options.frames ?? [];
|
|
12008
|
-
const calibration =
|
|
12009
|
-
const vad =
|
|
11874
|
+
const calibration = buildMediaPipelineCalibrationReport(options);
|
|
11875
|
+
const vad = buildMediaVadReport({
|
|
12010
11876
|
frames,
|
|
12011
11877
|
maxSilenceFrames: options.maxSilenceFrames,
|
|
12012
11878
|
minSpeechFrames: options.minSpeechFrames,
|
|
12013
11879
|
speechEndThreshold: options.speechEndThreshold,
|
|
12014
11880
|
speechStartThreshold: options.speechStartThreshold
|
|
12015
11881
|
});
|
|
12016
|
-
const interruption =
|
|
11882
|
+
const interruption = buildMediaInterruptionReport({
|
|
12017
11883
|
frames,
|
|
12018
11884
|
maxInterruptionLatencyMs: options.maxInterruptionLatencyMs
|
|
12019
11885
|
});
|
|
12020
|
-
const
|
|
11886
|
+
const quality = buildMediaQualityReport({
|
|
11887
|
+
frames,
|
|
11888
|
+
maxBackpressureEvents: options.maxMediaBackpressureEvents,
|
|
11889
|
+
maxGapMs: options.maxMediaGapMs,
|
|
11890
|
+
maxJitterMs: options.maxMediaJitterMs,
|
|
11891
|
+
maxTimestampDriftMs: options.maxMediaTimestampDriftMs,
|
|
11892
|
+
minSpeechRatio: options.minMediaSpeechRatio,
|
|
11893
|
+
transport: options.transport
|
|
11894
|
+
});
|
|
11895
|
+
const resampling = calibration.inputFormat && calibration.outputFormat ? buildMediaResamplingPlan({
|
|
12021
11896
|
inputFormat: calibration.inputFormat,
|
|
12022
11897
|
outputFormat: calibration.outputFormat
|
|
12023
11898
|
}) : undefined;
|
|
@@ -12025,6 +11900,7 @@ var buildVoiceMediaPipelineReport = (options = {}) => {
|
|
|
12025
11900
|
calibration.status,
|
|
12026
11901
|
vad.status,
|
|
12027
11902
|
interruption.status,
|
|
11903
|
+
quality.status,
|
|
12028
11904
|
resampling?.status ?? "pass",
|
|
12029
11905
|
options.processorGraph?.status ?? "pass",
|
|
12030
11906
|
options.transport?.status ?? "pass"
|
|
@@ -12036,6 +11912,7 @@ var buildVoiceMediaPipelineReport = (options = {}) => {
|
|
|
12036
11912
|
interruption,
|
|
12037
11913
|
ok: status === "pass",
|
|
12038
11914
|
processorGraph: options.processorGraph,
|
|
11915
|
+
quality,
|
|
12039
11916
|
resampling,
|
|
12040
11917
|
status,
|
|
12041
11918
|
surface: options.surface ?? "voice-media-pipeline",
|
|
@@ -12072,6 +11949,25 @@ var evaluateVoiceMediaPipelineEvidence = (report, input = {}) => {
|
|
|
12072
11949
|
if (input.requireResamplingReady && report.calibration.resamplingRequired && !report.resampling) {
|
|
12073
11950
|
issues.push("Expected resampling plan when calibration requires resampling.");
|
|
12074
11951
|
}
|
|
11952
|
+
if (input.requireQualityPass && report.quality.status !== "pass") {
|
|
11953
|
+
issues.push(`Expected media quality proof to pass, found ${report.quality.status}.`);
|
|
11954
|
+
}
|
|
11955
|
+
const maxMediaGapMs = input.maxMediaGapMs;
|
|
11956
|
+
if (maxMediaGapMs !== undefined && report.quality.gapsMs.some((gap) => gap > maxMediaGapMs)) {
|
|
11957
|
+
issues.push(`Expected media gaps at or below ${String(maxMediaGapMs)}ms.`);
|
|
11958
|
+
}
|
|
11959
|
+
if (input.maxMediaJitterMs !== undefined && report.quality.jitterMs !== undefined && report.quality.jitterMs > input.maxMediaJitterMs) {
|
|
11960
|
+
issues.push(`Expected media jitter at or below ${String(input.maxMediaJitterMs)}ms, found ${String(report.quality.jitterMs)}ms.`);
|
|
11961
|
+
}
|
|
11962
|
+
if (input.maxMediaTimestampDriftMs !== undefined && report.quality.timestampDriftMs !== undefined && report.quality.timestampDriftMs > input.maxMediaTimestampDriftMs) {
|
|
11963
|
+
issues.push(`Expected media timestamp drift at or below ${String(input.maxMediaTimestampDriftMs)}ms, found ${String(report.quality.timestampDriftMs)}ms.`);
|
|
11964
|
+
}
|
|
11965
|
+
if (input.minMediaSpeechRatio !== undefined && report.quality.speechRatio < input.minMediaSpeechRatio) {
|
|
11966
|
+
issues.push(`Expected media speech ratio at or above ${String(input.minMediaSpeechRatio)}, found ${String(report.quality.speechRatio)}.`);
|
|
11967
|
+
}
|
|
11968
|
+
if (input.maxMediaBackpressureEvents !== undefined && report.quality.backpressureEvents > input.maxMediaBackpressureEvents) {
|
|
11969
|
+
issues.push(`Expected at most ${String(input.maxMediaBackpressureEvents)} media backpressure event(s), found ${String(report.quality.backpressureEvents)}.`);
|
|
11970
|
+
}
|
|
12075
11971
|
if (input.requireProcessorGraph && !report.processorGraph) {
|
|
12076
11972
|
issues.push("Expected media processor graph evidence.");
|
|
12077
11973
|
}
|
|
@@ -12120,6 +12016,10 @@ var renderVoiceMediaPipelineMarkdown = (report) => [
|
|
|
12120
12016
|
`- Resampling required: ${report.calibration.resamplingRequired ? "yes" : "no"}`,
|
|
12121
12017
|
`- VAD segments: ${String(report.vad.segments.length)}`,
|
|
12122
12018
|
`- Interruption frames: ${String(report.interruption.interruptionFrames)}`,
|
|
12019
|
+
`- Media quality: ${report.quality.status}`,
|
|
12020
|
+
`- Media quality gaps: ${String(report.quality.gapCount)}`,
|
|
12021
|
+
`- Media quality jitter: ${String(report.quality.jitterMs ?? "n/a")}ms`,
|
|
12022
|
+
`- Media quality speech ratio: ${String(report.quality.speechRatio)}`,
|
|
12123
12023
|
`- Processor graph: ${report.processorGraph ? `${report.processorGraph.name} (${String(report.processorGraph.nodes.length)} nodes)` : "n/a"}`,
|
|
12124
12024
|
`- Processor graph emitted frames: ${String(report.processorGraph?.emittedFrames ?? 0)}`,
|
|
12125
12025
|
`- Processor graph dropped frames: ${String(report.processorGraph?.droppedFrames ?? 0)}`,
|
|
@@ -12132,13 +12032,18 @@ var renderVoiceMediaPipelineMarkdown = (report) => [
|
|
|
12132
12032
|
"",
|
|
12133
12033
|
...[
|
|
12134
12034
|
...report.calibration.issues,
|
|
12135
|
-
...report.interruption.issues
|
|
12035
|
+
...report.interruption.issues,
|
|
12036
|
+
...report.quality.issues
|
|
12136
12037
|
].map((issue) => `- ${issue.severity.toUpperCase()} ${issue.code}: ${issue.message}`),
|
|
12137
|
-
...report.calibration.issues.length + report.interruption.issues.length === 0 ? ["- None"] : []
|
|
12038
|
+
...report.calibration.issues.length + report.interruption.issues.length + report.quality.issues.length === 0 ? ["- None"] : []
|
|
12138
12039
|
].join(`
|
|
12139
12040
|
`);
|
|
12140
12041
|
var renderVoiceMediaPipelineHTML = (report, title = "Voice Media Pipeline Proof") => {
|
|
12141
|
-
const issues = [
|
|
12042
|
+
const issues = [
|
|
12043
|
+
...report.calibration.issues,
|
|
12044
|
+
...report.interruption.issues,
|
|
12045
|
+
...report.quality.issues
|
|
12046
|
+
].map((issue) => `<li class="${escapeHtml15(issue.severity)}"><strong>${escapeHtml15(issue.code)}</strong>: ${escapeHtml15(issue.message)}</li>`).join("");
|
|
12142
12047
|
const segments = report.vad.segments.map((segment) => `<tr><td>${escapeHtml15(segment.segmentId)}</td><td>${escapeHtml15(segment.frameCount)}</td><td>${escapeHtml15(segment.durationMs ?? "n/a")}</td><td>${escapeHtml15(segment.turnId ?? "n/a")}</td></tr>`).join("");
|
|
12143
12048
|
return `<!doctype html><html lang="en"><head><meta charset="utf-8" /><meta name="viewport" content="width=device-width,initial-scale=1" /><title>${escapeHtml15(title)}</title><style>body{background:#101418;color:#f7f3e8;font-family:ui-sans-serif,system-ui,sans-serif;margin:0}main{margin:auto;max-width:1100px;padding:32px}.hero,.card{background:#17201d;border:1px solid #2e3d36;border-radius:24px;margin-bottom:16px;padding:22px}.hero{background:linear-gradient(135deg,rgba(20,184,166,.18),rgba(245,158,11,.12))}.eyebrow{color:#5eead4;font-weight:900;letter-spacing:.1em;text-transform:uppercase}h1{font-size:clamp(2.3rem,6vw,4.8rem);letter-spacing:-.06em;line-height:.9;margin:.2rem 0 1rem}.summary{display:grid;gap:12px;grid-template-columns:repeat(auto-fit,minmax(170px,1fr))}.metric{background:#101814;border:1px solid #2e3d36;border-radius:18px;padding:14px}.metric span{color:#a8b5ad;display:block;font-size:.78rem;text-transform:uppercase}.metric strong{display:block;font-size:1.65rem;margin-top:5px}.status{border:1px solid #64748b;border-radius:999px;display:inline-flex;font-weight:900;padding:7px 11px}.pass{color:#86efac}.warn,.warning{color:#fde68a}.fail,.error{color:#fecaca}table{border-collapse:collapse;width:100%}td,th{border-bottom:1px solid #2e3d36;padding:10px;text-align:left}</style></head><body><main><section class="hero"><p class="eyebrow">Native media pipeline</p><h1>${escapeHtml15(title)}</h1><p class="status ${escapeHtml15(report.status)}">${escapeHtml15(report.status)}</p><p>${escapeHtml15(report.surface)}</p><section class="summary"><div class="metric"><span>Frames</span><strong>${String(report.frames)}</strong></div><div class="metric"><span>Input audio</span><strong>${String(report.calibration.inputAudioFrames)}</strong></div><div class="metric"><span>Assistant audio</span><strong>${String(report.calibration.assistantAudioFrames)}</strong></div><div class="metric"><span>Trace linked</span><strong>${String(report.calibration.traceLinkedFrames)}</strong></div><div class="metric"><span>First audio</span><strong>${escapeHtml15(report.calibration.firstAudioLatencyMs ?? "n/a")}ms</strong></div><div class="metric"><span>VAD segments</span><strong>${String(report.vad.segments.length)}</strong></div><div class="metric"><span>Interruptions</span><strong>${String(report.interruption.interruptionFrames)}</strong></div><div class="metric"><span>Processor graph</span><strong>${String(report.processorGraph?.nodes.length ?? 0)} nodes</strong></div><div class="metric"><span>Graph out/drop</span><strong>${String(report.processorGraph?.emittedFrames ?? 0)}/${String(report.processorGraph?.droppedFrames ?? 0)}</strong></div><div class="metric"><span>Resampling</span><strong>${report.calibration.resamplingRequired ? "required" : "not required"}</strong></div><div class="metric"><span>Transport</span><strong>${escapeHtml15(report.transport?.state ?? "n/a")}</strong></div><div class="metric"><span>Transport in/out</span><strong>${String(report.transport?.inputFrames ?? 0)}/${String(report.transport?.outputFrames ?? 0)}</strong></div><div class="metric"><span>Backpressure</span><strong>${String(report.transport?.backpressureEvents ?? 0)}</strong></div></section></section><section class="card"><h2>Issues</h2><ul>${issues || '<li class="pass">No media pipeline issues.</li>'}</ul></section><section class="card"><h2>VAD Segments</h2><table><thead><tr><th>Segment</th><th>Frames</th><th>Duration ms</th><th>Turn</th></tr></thead><tbody>${segments || '<tr><td colspan="4">No VAD segments.</td></tr>'}</tbody></table></section></main></body></html>`;
|
|
12144
12049
|
};
|
|
@@ -19472,7 +19377,7 @@ var percentile4 = (values, percentileValue) => {
|
|
|
19472
19377
|
const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(percentileValue / 100 * sorted.length) - 1));
|
|
19473
19378
|
return Math.round(sorted[index] ?? 0);
|
|
19474
19379
|
};
|
|
19475
|
-
var
|
|
19380
|
+
var average2 = (values) => values.length === 0 ? undefined : Math.round(values.reduce((total, value) => total + value, 0) / values.length);
|
|
19476
19381
|
var resolveBudget = (stage, options) => ({
|
|
19477
19382
|
failAfterMs: options.budgets?.[stage]?.failAfterMs ?? options.failAfterMs ?? DEFAULT_FAIL_AFTER_MS2,
|
|
19478
19383
|
warnAfterMs: options.budgets?.[stage]?.warnAfterMs ?? options.warnAfterMs ?? DEFAULT_WARN_AFTER_MS2
|
|
@@ -19666,7 +19571,7 @@ var summarizeStage = (stage, measurements, options) => {
|
|
|
19666
19571
|
const failed = stageMeasurements.filter((measurement) => measurement.status === "fail").length;
|
|
19667
19572
|
const warnings = stageMeasurements.filter((measurement) => measurement.status === "warn").length;
|
|
19668
19573
|
return {
|
|
19669
|
-
averageMs:
|
|
19574
|
+
averageMs: average2(latencies),
|
|
19670
19575
|
budget: resolveBudget(stage, options),
|
|
19671
19576
|
failed,
|
|
19672
19577
|
label: STAGE_LABELS[stage],
|
|
@@ -31490,7 +31395,7 @@ var statusRank6 = {
|
|
|
31490
31395
|
warn: 1,
|
|
31491
31396
|
fail: 2
|
|
31492
31397
|
};
|
|
31493
|
-
var statusExceeds2 = (actual,
|
|
31398
|
+
var statusExceeds2 = (actual, max2) => statusRank6[actual] > statusRank6[max2];
|
|
31494
31399
|
var buildVoiceProviderContractMatrix = (input) => {
|
|
31495
31400
|
const rows = input.contracts.map((contract) => {
|
|
31496
31401
|
const configured = contract.configured !== false;
|
|
@@ -34971,11 +34876,7 @@ export {
|
|
|
34971
34876
|
createVoiceMemoryAuditSinkDeliveryStore,
|
|
34972
34877
|
createVoiceMemoryAuditEventStore,
|
|
34973
34878
|
createVoiceMemoryAssistantMemoryStore,
|
|
34974
|
-
createVoiceMediaTransport,
|
|
34975
|
-
createVoiceMediaProcessorGraph,
|
|
34976
34879
|
createVoiceMediaPipelineRoutes,
|
|
34977
|
-
createVoiceMediaFrameTransformPipeline,
|
|
34978
|
-
createVoiceMediaFrame,
|
|
34979
34880
|
createVoiceLiveOpsRoutes,
|
|
34980
34881
|
createVoiceLiveOpsController,
|
|
34981
34882
|
createVoiceLiveLatencyRoutes,
|
|
@@ -35124,13 +35025,7 @@ export {
|
|
|
35124
35025
|
buildVoiceObservabilityExport,
|
|
35125
35026
|
buildVoiceObservabilityArtifactIndex,
|
|
35126
35027
|
buildVoiceMonitorRunReport,
|
|
35127
|
-
buildVoiceMediaVadReport,
|
|
35128
|
-
buildVoiceMediaTransportReport,
|
|
35129
|
-
buildVoiceMediaResamplingPlan,
|
|
35130
|
-
buildVoiceMediaProcessorGraphReport,
|
|
35131
35028
|
buildVoiceMediaPipelineReport,
|
|
35132
|
-
buildVoiceMediaPipelineCalibrationReport,
|
|
35133
|
-
buildVoiceMediaInterruptionReport,
|
|
35134
35029
|
buildVoiceLiveOpsControlState,
|
|
35135
35030
|
buildVoiceLatencySLOGate,
|
|
35136
35031
|
buildVoiceIncidentBundle,
|
|
@@ -1,33 +1,44 @@
|
|
|
1
1
|
import { Elysia } from 'elysia';
|
|
2
|
-
import { type
|
|
3
|
-
export type VoiceMediaPipelineReportOptions =
|
|
4
|
-
frames?: readonly
|
|
2
|
+
import { type MediaFrame, type MediaInterruptionReport, type MediaPipelineCalibrationInput, type MediaPipelineCalibrationReport, type MediaPipelineStatus, type MediaProcessorGraphReport, type MediaQualityReport, type MediaResamplingPlan, type MediaTransportReport, type MediaVadReport } from '@absolutejs/media';
|
|
3
|
+
export type VoiceMediaPipelineReportOptions = MediaPipelineCalibrationInput & {
|
|
4
|
+
frames?: readonly MediaFrame[];
|
|
5
5
|
maxInterruptionLatencyMs?: number;
|
|
6
|
+
maxMediaBackpressureEvents?: number;
|
|
7
|
+
maxMediaGapMs?: number;
|
|
8
|
+
maxMediaJitterMs?: number;
|
|
9
|
+
maxMediaTimestampDriftMs?: number;
|
|
6
10
|
maxSilenceFrames?: number;
|
|
11
|
+
minMediaSpeechRatio?: number;
|
|
7
12
|
minSpeechFrames?: number;
|
|
8
|
-
processorGraph?:
|
|
13
|
+
processorGraph?: MediaProcessorGraphReport;
|
|
9
14
|
speechEndThreshold?: number;
|
|
10
15
|
speechStartThreshold?: number;
|
|
11
|
-
transport?:
|
|
16
|
+
transport?: MediaTransportReport;
|
|
12
17
|
};
|
|
13
18
|
export type VoiceMediaPipelineReport = {
|
|
14
|
-
calibration:
|
|
19
|
+
calibration: MediaPipelineCalibrationReport;
|
|
15
20
|
checkedAt: number;
|
|
16
21
|
frames: number;
|
|
17
|
-
interruption:
|
|
22
|
+
interruption: MediaInterruptionReport;
|
|
18
23
|
ok: boolean;
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
quality: MediaQualityReport;
|
|
25
|
+
resampling?: MediaResamplingPlan;
|
|
26
|
+
processorGraph?: MediaProcessorGraphReport;
|
|
27
|
+
status: MediaPipelineStatus;
|
|
22
28
|
surface: string;
|
|
23
|
-
transport?:
|
|
24
|
-
vad:
|
|
29
|
+
transport?: MediaTransportReport;
|
|
30
|
+
vad: MediaVadReport;
|
|
25
31
|
};
|
|
26
32
|
export type VoiceMediaPipelineAssertionInput = {
|
|
27
33
|
maxFirstAudioLatencyMs?: number;
|
|
28
34
|
maxInterruptionLatencyMs?: number;
|
|
35
|
+
maxMediaBackpressureEvents?: number;
|
|
36
|
+
maxMediaGapMs?: number;
|
|
37
|
+
maxMediaJitterMs?: number;
|
|
38
|
+
maxMediaTimestampDriftMs?: number;
|
|
29
39
|
minAssistantAudioFrames?: number;
|
|
30
40
|
minInputAudioFrames?: number;
|
|
41
|
+
minMediaSpeechRatio?: number;
|
|
31
42
|
minProcessorGraphEmittedFrames?: number;
|
|
32
43
|
minProcessorGraphNodes?: number;
|
|
33
44
|
minTransportInputFrames?: number;
|
|
@@ -38,13 +49,14 @@ export type VoiceMediaPipelineAssertionInput = {
|
|
|
38
49
|
requireInterruptionFrame?: boolean;
|
|
39
50
|
requirePass?: boolean;
|
|
40
51
|
requireProcessorGraph?: boolean;
|
|
52
|
+
requireQualityPass?: boolean;
|
|
41
53
|
requireResamplingReady?: boolean;
|
|
42
54
|
requireTransportConnected?: boolean;
|
|
43
55
|
};
|
|
44
56
|
export type VoiceMediaPipelineAssertionReport = {
|
|
45
57
|
issues: string[];
|
|
46
58
|
ok: boolean;
|
|
47
|
-
status:
|
|
59
|
+
status: MediaPipelineStatus;
|
|
48
60
|
surface: string;
|
|
49
61
|
};
|
|
50
62
|
export type VoiceMediaPipelineRoutesOptions = VoiceMediaPipelineReportOptions & {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@absolutejs/voice",
|
|
3
|
-
"version": "0.0.22-beta.
|
|
3
|
+
"version": "0.0.22-beta.315",
|
|
4
4
|
"description": "Voice primitives and Elysia plugin for AbsoluteJS",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -245,6 +245,9 @@
|
|
|
245
245
|
"optional": true
|
|
246
246
|
}
|
|
247
247
|
},
|
|
248
|
+
"dependencies": {
|
|
249
|
+
"@absolutejs/media": "0.0.1-beta.2"
|
|
250
|
+
},
|
|
248
251
|
"devDependencies": {
|
|
249
252
|
"@absolutejs/absolute": "0.19.0-beta.646",
|
|
250
253
|
"@angular/core": "^21.0.0",
|
package/dist/mediaPipeline.d.ts
DELETED
|
@@ -1,219 +0,0 @@
|
|
|
1
|
-
import type { AudioFormat } from './types';
|
|
2
|
-
export type VoiceMediaFrameKind = 'assistant-audio' | 'input-audio' | 'interruption' | 'metadata' | 'transcript' | 'turn-commit';
|
|
3
|
-
export type VoiceMediaFrameSource = 'browser' | 'provider' | 'telephony' | 'voice-runtime';
|
|
4
|
-
export type VoiceMediaPipelineStatus = 'fail' | 'pass' | 'warn';
|
|
5
|
-
export type VoiceMediaResamplingPlan = {
|
|
6
|
-
inputFormat: AudioFormat;
|
|
7
|
-
outputFormat: AudioFormat;
|
|
8
|
-
ratio: number;
|
|
9
|
-
required: boolean;
|
|
10
|
-
status: VoiceMediaPipelineStatus;
|
|
11
|
-
};
|
|
12
|
-
export type VoiceMediaFrame = {
|
|
13
|
-
at?: number;
|
|
14
|
-
audio?: ArrayBuffer | ArrayBufferView;
|
|
15
|
-
durationMs?: number;
|
|
16
|
-
format?: AudioFormat;
|
|
17
|
-
id: string;
|
|
18
|
-
kind: VoiceMediaFrameKind;
|
|
19
|
-
latencyMs?: number;
|
|
20
|
-
metadata?: Record<string, unknown>;
|
|
21
|
-
sessionId?: string;
|
|
22
|
-
source: VoiceMediaFrameSource | (string & {});
|
|
23
|
-
traceEventId?: string;
|
|
24
|
-
turnId?: string;
|
|
25
|
-
};
|
|
26
|
-
export type VoiceMediaFrameTransform = {
|
|
27
|
-
inputFormat?: AudioFormat;
|
|
28
|
-
name: string;
|
|
29
|
-
outputFormat?: AudioFormat;
|
|
30
|
-
transform: (frame: VoiceMediaFrame) => VoiceMediaFrame | readonly VoiceMediaFrame[] | undefined | Promise<VoiceMediaFrame | readonly VoiceMediaFrame[] | undefined>;
|
|
31
|
-
};
|
|
32
|
-
export type VoiceMediaFrameTransformPipeline = {
|
|
33
|
-
push: (frame: VoiceMediaFrame) => Promise<readonly VoiceMediaFrame[]>;
|
|
34
|
-
pushMany: (frames: readonly VoiceMediaFrame[]) => Promise<readonly VoiceMediaFrame[]>;
|
|
35
|
-
transforms: readonly VoiceMediaFrameTransform[];
|
|
36
|
-
};
|
|
37
|
-
export type VoiceMediaProcessorNodeKind = 'branch' | 'filter' | 'processor' | 'sink';
|
|
38
|
-
export type VoiceMediaProcessorNodeEvent = {
|
|
39
|
-
at: number;
|
|
40
|
-
dropped: number;
|
|
41
|
-
emitted: number;
|
|
42
|
-
frameId: string;
|
|
43
|
-
inputs: number;
|
|
44
|
-
node: string;
|
|
45
|
-
};
|
|
46
|
-
export type VoiceMediaProcessorNodeReport = {
|
|
47
|
-
droppedFrames: number;
|
|
48
|
-
emittedFrames: number;
|
|
49
|
-
events: readonly VoiceMediaProcessorNodeEvent[];
|
|
50
|
-
inputFrames: number;
|
|
51
|
-
kind: VoiceMediaProcessorNodeKind;
|
|
52
|
-
name: string;
|
|
53
|
-
status: VoiceMediaPipelineStatus;
|
|
54
|
-
};
|
|
55
|
-
export type VoiceMediaProcessorGraphReport = {
|
|
56
|
-
checkedAt: number;
|
|
57
|
-
droppedFrames: number;
|
|
58
|
-
emittedFrames: number;
|
|
59
|
-
events: readonly VoiceMediaProcessorNodeEvent[];
|
|
60
|
-
inputFrames: number;
|
|
61
|
-
name: string;
|
|
62
|
-
nodes: readonly VoiceMediaProcessorNodeReport[];
|
|
63
|
-
status: VoiceMediaPipelineStatus;
|
|
64
|
-
};
|
|
65
|
-
export type VoiceMediaProcessorNode = {
|
|
66
|
-
inputFormat?: AudioFormat;
|
|
67
|
-
kind?: VoiceMediaProcessorNodeKind;
|
|
68
|
-
name: string;
|
|
69
|
-
outputFormat?: AudioFormat;
|
|
70
|
-
process: (frame: VoiceMediaFrame) => boolean | VoiceMediaFrame | readonly VoiceMediaFrame[] | undefined | Promise<boolean | VoiceMediaFrame | readonly VoiceMediaFrame[] | undefined>;
|
|
71
|
-
};
|
|
72
|
-
export type VoiceMediaProcessorGraph = {
|
|
73
|
-
nodes: readonly VoiceMediaProcessorNode[];
|
|
74
|
-
process: (frame: VoiceMediaFrame) => Promise<readonly VoiceMediaFrame[]>;
|
|
75
|
-
processMany: (frames: readonly VoiceMediaFrame[]) => Promise<readonly VoiceMediaFrame[]>;
|
|
76
|
-
report: () => VoiceMediaProcessorGraphReport;
|
|
77
|
-
};
|
|
78
|
-
export type VoiceMediaTransportAdapter = {
|
|
79
|
-
close?: () => Promise<void> | void;
|
|
80
|
-
connect?: () => Promise<void> | void;
|
|
81
|
-
inputFormat?: AudioFormat;
|
|
82
|
-
name: string;
|
|
83
|
-
onFrame?: (handler: (frame: VoiceMediaFrame) => Promise<void> | void) => () => void;
|
|
84
|
-
outputFormat?: AudioFormat;
|
|
85
|
-
send: (frame: VoiceMediaFrame) => Promise<void> | void;
|
|
86
|
-
};
|
|
87
|
-
export type VoiceMediaTransportState = 'closed' | 'closing' | 'failed' | 'idle' | 'open';
|
|
88
|
-
export type VoiceMediaTransportEventKind = 'backpressure' | 'close' | 'connect' | 'error' | 'frame-in' | 'frame-out';
|
|
89
|
-
export type VoiceMediaTransportEvent = {
|
|
90
|
-
at: number;
|
|
91
|
-
bufferedFrames?: number;
|
|
92
|
-
error?: string;
|
|
93
|
-
frameId?: string;
|
|
94
|
-
kind: VoiceMediaTransportEventKind;
|
|
95
|
-
state: VoiceMediaTransportState;
|
|
96
|
-
};
|
|
97
|
-
export type VoiceMediaTransportReport = {
|
|
98
|
-
backpressureEvents: number;
|
|
99
|
-
checkedAt: number;
|
|
100
|
-
closed: boolean;
|
|
101
|
-
connected: boolean;
|
|
102
|
-
events: readonly VoiceMediaTransportEvent[];
|
|
103
|
-
failed: boolean;
|
|
104
|
-
inputFrames: number;
|
|
105
|
-
name: string;
|
|
106
|
-
outputFrames: number;
|
|
107
|
-
state: VoiceMediaTransportState;
|
|
108
|
-
status: VoiceMediaPipelineStatus;
|
|
109
|
-
};
|
|
110
|
-
export type VoiceMediaTransport = VoiceMediaTransportAdapter & {
|
|
111
|
-
events: () => readonly VoiceMediaTransportEvent[];
|
|
112
|
-
receive: (frame: VoiceMediaFrame) => Promise<void>;
|
|
113
|
-
report: () => VoiceMediaTransportReport;
|
|
114
|
-
state: () => VoiceMediaTransportState;
|
|
115
|
-
};
|
|
116
|
-
export type VoiceMediaTransportOptions = {
|
|
117
|
-
inputFormat?: AudioFormat;
|
|
118
|
-
maxBufferedFrames?: number;
|
|
119
|
-
name: string;
|
|
120
|
-
onClose?: () => Promise<void> | void;
|
|
121
|
-
onConnect?: () => Promise<void> | void;
|
|
122
|
-
onSend?: (frame: VoiceMediaFrame) => Promise<void> | void;
|
|
123
|
-
outputFormat?: AudioFormat;
|
|
124
|
-
};
|
|
125
|
-
export type VoiceMediaPipelineCalibrationInput = {
|
|
126
|
-
expectedInputFormat?: AudioFormat;
|
|
127
|
-
expectedOutputFormat?: AudioFormat;
|
|
128
|
-
frames?: readonly VoiceMediaFrame[];
|
|
129
|
-
inputFormat?: AudioFormat;
|
|
130
|
-
maxBackpressureFrames?: number;
|
|
131
|
-
maxFirstAudioLatencyMs?: number;
|
|
132
|
-
maxJitterMs?: number;
|
|
133
|
-
outputFormat?: AudioFormat;
|
|
134
|
-
requireInterruptionFrame?: boolean;
|
|
135
|
-
requireTraceEvidence?: boolean;
|
|
136
|
-
surface?: string;
|
|
137
|
-
};
|
|
138
|
-
export type VoiceMediaPipelineCalibrationIssue = {
|
|
139
|
-
code: string;
|
|
140
|
-
message: string;
|
|
141
|
-
severity: 'error' | 'warning';
|
|
142
|
-
};
|
|
143
|
-
export type VoiceMediaPipelineCalibrationReport = {
|
|
144
|
-
assistantAudioFrames: number;
|
|
145
|
-
backpressureFrames: number;
|
|
146
|
-
checkedAt: number;
|
|
147
|
-
firstAudioLatencyMs?: number;
|
|
148
|
-
inputAudioFrames: number;
|
|
149
|
-
inputFormat?: AudioFormat;
|
|
150
|
-
interruptionFrames: number;
|
|
151
|
-
issues: VoiceMediaPipelineCalibrationIssue[];
|
|
152
|
-
jitterMs?: number;
|
|
153
|
-
outputFormat?: AudioFormat;
|
|
154
|
-
resamplingRequired: boolean;
|
|
155
|
-
resamplingTargetHz?: number;
|
|
156
|
-
status: VoiceMediaPipelineStatus;
|
|
157
|
-
surface: string;
|
|
158
|
-
traceLinkedFrames: number;
|
|
159
|
-
turnCommitFrames: number;
|
|
160
|
-
};
|
|
161
|
-
export type VoiceMediaVadInput = {
|
|
162
|
-
frames?: readonly VoiceMediaFrame[];
|
|
163
|
-
maxSilenceFrames?: number;
|
|
164
|
-
minSpeechFrames?: number;
|
|
165
|
-
speechEndThreshold?: number;
|
|
166
|
-
speechStartThreshold?: number;
|
|
167
|
-
};
|
|
168
|
-
export type VoiceMediaVadSegment = {
|
|
169
|
-
durationMs?: number;
|
|
170
|
-
endAt?: number;
|
|
171
|
-
frameCount: number;
|
|
172
|
-
segmentId: string;
|
|
173
|
-
sessionId?: string;
|
|
174
|
-
startAt?: number;
|
|
175
|
-
turnId?: string;
|
|
176
|
-
};
|
|
177
|
-
export type VoiceMediaVadReport = {
|
|
178
|
-
checkedAt: number;
|
|
179
|
-
inputAudioFrames: number;
|
|
180
|
-
segments: VoiceMediaVadSegment[];
|
|
181
|
-
status: VoiceMediaPipelineStatus;
|
|
182
|
-
};
|
|
183
|
-
export type VoiceMediaInterruptionInput = {
|
|
184
|
-
frames?: readonly VoiceMediaFrame[];
|
|
185
|
-
maxInterruptionLatencyMs?: number;
|
|
186
|
-
};
|
|
187
|
-
export type VoiceMediaInterruptionReport = {
|
|
188
|
-
checkedAt: number;
|
|
189
|
-
interruptionFrames: number;
|
|
190
|
-
issues: VoiceMediaPipelineCalibrationIssue[];
|
|
191
|
-
latenciesMs: number[];
|
|
192
|
-
status: VoiceMediaPipelineStatus;
|
|
193
|
-
};
|
|
194
|
-
export declare const createVoiceMediaFrame: (frame: VoiceMediaFrame) => VoiceMediaFrame;
|
|
195
|
-
export declare const buildVoiceMediaTransportReport: (input: {
|
|
196
|
-
events?: readonly VoiceMediaTransportEvent[];
|
|
197
|
-
name: string;
|
|
198
|
-
state?: VoiceMediaTransportState;
|
|
199
|
-
}) => VoiceMediaTransportReport;
|
|
200
|
-
export declare const createVoiceMediaTransport: (options: VoiceMediaTransportOptions) => VoiceMediaTransport;
|
|
201
|
-
export declare const buildVoiceMediaResamplingPlan: (input: {
|
|
202
|
-
inputFormat: AudioFormat;
|
|
203
|
-
outputFormat: AudioFormat;
|
|
204
|
-
}) => VoiceMediaResamplingPlan;
|
|
205
|
-
export declare const createVoiceMediaFrameTransformPipeline: (input?: {
|
|
206
|
-
transforms?: readonly VoiceMediaFrameTransform[];
|
|
207
|
-
}) => VoiceMediaFrameTransformPipeline;
|
|
208
|
-
export declare const buildVoiceMediaProcessorGraphReport: (input: {
|
|
209
|
-
events?: readonly VoiceMediaProcessorNodeEvent[];
|
|
210
|
-
name: string;
|
|
211
|
-
nodes: readonly VoiceMediaProcessorNode[];
|
|
212
|
-
}) => VoiceMediaProcessorGraphReport;
|
|
213
|
-
export declare const createVoiceMediaProcessorGraph: (input?: {
|
|
214
|
-
name?: string;
|
|
215
|
-
nodes?: readonly VoiceMediaProcessorNode[];
|
|
216
|
-
}) => VoiceMediaProcessorGraph;
|
|
217
|
-
export declare const buildVoiceMediaVadReport: (input?: VoiceMediaVadInput) => VoiceMediaVadReport;
|
|
218
|
-
export declare const buildVoiceMediaInterruptionReport: (input?: VoiceMediaInterruptionInput) => VoiceMediaInterruptionReport;
|
|
219
|
-
export declare const buildVoiceMediaPipelineCalibrationReport: (input?: VoiceMediaPipelineCalibrationInput) => VoiceMediaPipelineCalibrationReport;
|