@absolutejs/voice 0.0.22-beta.309 → 0.0.22-beta.310

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -15,8 +15,8 @@ export type { VoiceRealtimeChannelAssertionInput, VoiceRealtimeChannelAssertionR
15
15
  export { assertVoiceRealtimeProviderContractEvidence, buildVoiceRealtimeProviderContractMatrix, createVoiceRealtimeProviderContractMatrixPreset, createVoiceRealtimeProviderContractRoutes, evaluateVoiceRealtimeProviderContractEvidence, renderVoiceRealtimeProviderContractHTML } from './realtimeProviderContracts';
16
16
  export type { VoiceRealtimeProviderContractAssertionInput, VoiceRealtimeProviderContractAssertionReport, VoiceRealtimeProviderContractCapability, VoiceRealtimeProviderContractCheck, VoiceRealtimeProviderContractDefinition, VoiceRealtimeProviderContractMatrixPresetOptions, VoiceRealtimeProviderContractMatrixInput, VoiceRealtimeProviderContractMatrixReport, VoiceRealtimeProviderContractRoutesOptions, VoiceRealtimeProviderContractRow, VoiceRealtimeProviderPresetProvider, VoiceRealtimeProviderContractStatus } from './realtimeProviderContracts';
17
17
  export { buildVoiceDiagnosticsMarkdown, createVoiceDiagnosticsRoutes, resolveVoiceDiagnosticsTraceFilter } from './diagnosticsRoutes';
18
- export { buildVoiceMediaPipelineCalibrationReport, createVoiceMediaFrame } from './mediaPipeline';
19
- export type { VoiceMediaFrame, VoiceMediaFrameKind, VoiceMediaFrameSource, VoiceMediaPipelineCalibrationInput, VoiceMediaPipelineCalibrationIssue, VoiceMediaPipelineCalibrationReport, VoiceMediaPipelineStatus } from './mediaPipeline';
18
+ export { buildVoiceMediaInterruptionReport, buildVoiceMediaPipelineCalibrationReport, buildVoiceMediaResamplingPlan, buildVoiceMediaVadReport, createVoiceMediaFrame, createVoiceMediaFrameTransformPipeline } from './mediaPipeline';
19
+ export type { VoiceMediaFrame, VoiceMediaFrameKind, VoiceMediaFrameSource, VoiceMediaFrameTransform, VoiceMediaFrameTransformPipeline, VoiceMediaInterruptionInput, VoiceMediaInterruptionReport, VoiceMediaPipelineCalibrationInput, VoiceMediaPipelineCalibrationIssue, VoiceMediaPipelineCalibrationReport, VoiceMediaPipelineStatus, VoiceMediaResamplingPlan, VoiceMediaTransportAdapter, VoiceMediaVadInput, VoiceMediaVadReport, VoiceMediaVadSegment } from './mediaPipeline';
20
20
  export { buildVoiceDemoReadyReport, createVoiceDemoReadyRoutes, renderVoiceDemoReadyHTML } from './demoReadyRoutes';
21
21
  export { buildVoiceDeliverySinkReport, createVoiceDeliverySinkDescriptor, createVoiceDeliverySinkPair, createVoiceDeliverySinkRoutes, createVoiceFileDeliverySink, createVoicePostgresDeliverySink, createVoiceS3DeliverySink, createVoiceSQLiteDeliverySink, createVoiceWebhookDeliverySink, renderVoiceDeliverySinkHTML } from './deliverySinkRoutes';
22
22
  export { buildVoiceOpsActionHistoryReport, createVoiceOpsActionAuditRoutes, recordVoiceOpsActionAudit, renderVoiceOpsActionHistoryHTML } from './opsActionAuditRoutes';
package/dist/index.js CHANGED
@@ -11620,6 +11620,141 @@ var numericMetadata = (frame, key) => {
11620
11620
  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
11621
11621
  };
11622
11622
  var createVoiceMediaFrame = (frame) => frame;
11623
+ var buildVoiceMediaResamplingPlan = (input) => {
11624
+ const required = !formatMatches2(input.inputFormat, input.outputFormat);
11625
+ return {
11626
+ inputFormat: input.inputFormat,
11627
+ outputFormat: input.outputFormat,
11628
+ ratio: input.outputFormat.sampleRateHz / input.inputFormat.sampleRateHz,
11629
+ required,
11630
+ status: input.inputFormat.container === input.outputFormat.container && input.inputFormat.encoding === input.outputFormat.encoding && input.inputFormat.channels === input.outputFormat.channels ? "pass" : "warn"
11631
+ };
11632
+ };
11633
+ var createVoiceMediaFrameTransformPipeline = (input = {}) => {
11634
+ const transforms = input.transforms ?? [];
11635
+ const push = async (frame) => {
11636
+ let frames = [frame];
11637
+ for (const transform of transforms) {
11638
+ const nextFrames = [];
11639
+ for (const current of frames) {
11640
+ const transformed = await transform.transform(current);
11641
+ if (transformed === undefined) {
11642
+ continue;
11643
+ }
11644
+ if (Array.isArray(transformed)) {
11645
+ nextFrames.push(...transformed);
11646
+ } else {
11647
+ nextFrames.push(transformed);
11648
+ }
11649
+ }
11650
+ frames = nextFrames;
11651
+ }
11652
+ return frames;
11653
+ };
11654
+ return {
11655
+ push,
11656
+ pushMany: async (frames) => {
11657
+ const output = [];
11658
+ for (const frame of frames) {
11659
+ output.push(...await push(frame));
11660
+ }
11661
+ return output;
11662
+ },
11663
+ transforms
11664
+ };
11665
+ };
11666
+ var speechProbability = (frame) => {
11667
+ if (frame.metadata?.isSpeech === true) {
11668
+ return 1;
11669
+ }
11670
+ if (frame.metadata?.isSpeech === false) {
11671
+ return 0;
11672
+ }
11673
+ for (const key of ["speechProbability", "voiceProbability", "rms", "energy"]) {
11674
+ const value = numericMetadata(frame, key);
11675
+ if (value !== undefined) {
11676
+ return value;
11677
+ }
11678
+ }
11679
+ return 0;
11680
+ };
11681
+ var buildVoiceMediaVadReport = (input = {}) => {
11682
+ const frames = (input.frames ?? []).filter((frame) => frame.kind === "input-audio");
11683
+ const speechStartThreshold = input.speechStartThreshold ?? 0.6;
11684
+ const speechEndThreshold = input.speechEndThreshold ?? 0.35;
11685
+ const minSpeechFrames = input.minSpeechFrames ?? 1;
11686
+ const maxSilenceFrames = input.maxSilenceFrames ?? 1;
11687
+ const segments = [];
11688
+ let activeFrames = [];
11689
+ let silenceFrames = 0;
11690
+ const closeSegment = () => {
11691
+ if (activeFrames.length < minSpeechFrames) {
11692
+ activeFrames = [];
11693
+ silenceFrames = 0;
11694
+ return;
11695
+ }
11696
+ const first = activeFrames[0];
11697
+ const last = activeFrames.at(-1);
11698
+ if (!first) {
11699
+ return;
11700
+ }
11701
+ segments.push({
11702
+ durationMs: first.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined,
11703
+ endAt: last?.at !== undefined ? last.at + (last.durationMs ?? 0) : undefined,
11704
+ frameCount: activeFrames.length,
11705
+ segmentId: `vad:${String(segments.length + 1)}`,
11706
+ sessionId: first.sessionId,
11707
+ startAt: first.at,
11708
+ turnId: first.turnId
11709
+ });
11710
+ activeFrames = [];
11711
+ silenceFrames = 0;
11712
+ };
11713
+ for (const frame of frames) {
11714
+ const probability = speechProbability(frame);
11715
+ if (activeFrames.length === 0) {
11716
+ if (probability >= speechStartThreshold) {
11717
+ activeFrames.push(frame);
11718
+ }
11719
+ continue;
11720
+ }
11721
+ activeFrames.push(frame);
11722
+ if (probability <= speechEndThreshold) {
11723
+ silenceFrames += 1;
11724
+ } else {
11725
+ silenceFrames = 0;
11726
+ }
11727
+ if (silenceFrames > maxSilenceFrames) {
11728
+ closeSegment();
11729
+ }
11730
+ }
11731
+ closeSegment();
11732
+ return {
11733
+ checkedAt: Date.now(),
11734
+ inputAudioFrames: frames.length,
11735
+ segments,
11736
+ status: frames.length === 0 ? "warn" : "pass"
11737
+ };
11738
+ };
11739
+ var buildVoiceMediaInterruptionReport = (input = {}) => {
11740
+ const issues = [];
11741
+ const interruptionFrames = (input.frames ?? []).filter((frame) => frame.kind === "interruption");
11742
+ const latenciesMs = interruptionFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
11743
+ const maxInterruptionLatencyMs = input.maxInterruptionLatencyMs;
11744
+ if (interruptionFrames.length === 0) {
11745
+ pushIssue(issues, "warning", "media.interruption_missing", "No interruption frame was observed.");
11746
+ }
11747
+ if (maxInterruptionLatencyMs !== undefined && latenciesMs.some((latency) => latency > maxInterruptionLatencyMs)) {
11748
+ pushIssue(issues, "error", "media.interruption_latency", `Interruption latency exceeded ${String(maxInterruptionLatencyMs)}ms.`);
11749
+ }
11750
+ return {
11751
+ checkedAt: Date.now(),
11752
+ interruptionFrames: interruptionFrames.length,
11753
+ issues,
11754
+ latenciesMs,
11755
+ status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass"
11756
+ };
11757
+ };
11623
11758
  var buildVoiceMediaPipelineCalibrationReport = (input = {}) => {
11624
11759
  const frames = input.frames ?? [];
11625
11760
  const issues = [];
@@ -34456,6 +34591,7 @@ export {
34456
34591
  createVoiceMemoryAuditSinkDeliveryStore,
34457
34592
  createVoiceMemoryAuditEventStore,
34458
34593
  createVoiceMemoryAssistantMemoryStore,
34594
+ createVoiceMediaFrameTransformPipeline,
34459
34595
  createVoiceMediaFrame,
34460
34596
  createVoiceLiveOpsRoutes,
34461
34597
  createVoiceLiveOpsController,
@@ -34605,7 +34741,10 @@ export {
34605
34741
  buildVoiceObservabilityExport,
34606
34742
  buildVoiceObservabilityArtifactIndex,
34607
34743
  buildVoiceMonitorRunReport,
34744
+ buildVoiceMediaVadReport,
34745
+ buildVoiceMediaResamplingPlan,
34608
34746
  buildVoiceMediaPipelineCalibrationReport,
34747
+ buildVoiceMediaInterruptionReport,
34609
34748
  buildVoiceLiveOpsControlState,
34610
34749
  buildVoiceLatencySLOGate,
34611
34750
  buildVoiceIncidentBundle,
@@ -2,6 +2,13 @@ import type { AudioFormat } from './types';
2
2
  export type VoiceMediaFrameKind = 'assistant-audio' | 'input-audio' | 'interruption' | 'metadata' | 'transcript' | 'turn-commit';
3
3
  export type VoiceMediaFrameSource = 'browser' | 'provider' | 'telephony' | 'voice-runtime';
4
4
  export type VoiceMediaPipelineStatus = 'fail' | 'pass' | 'warn';
5
+ export type VoiceMediaResamplingPlan = {
6
+ inputFormat: AudioFormat;
7
+ outputFormat: AudioFormat;
8
+ ratio: number;
9
+ required: boolean;
10
+ status: VoiceMediaPipelineStatus;
11
+ };
5
12
  export type VoiceMediaFrame = {
6
13
  at?: number;
7
14
  audio?: ArrayBuffer | ArrayBufferView;
@@ -16,6 +23,26 @@ export type VoiceMediaFrame = {
16
23
  traceEventId?: string;
17
24
  turnId?: string;
18
25
  };
26
+ export type VoiceMediaFrameTransform = {
27
+ inputFormat?: AudioFormat;
28
+ name: string;
29
+ outputFormat?: AudioFormat;
30
+ transform: (frame: VoiceMediaFrame) => VoiceMediaFrame | readonly VoiceMediaFrame[] | undefined | Promise<VoiceMediaFrame | readonly VoiceMediaFrame[] | undefined>;
31
+ };
32
+ export type VoiceMediaFrameTransformPipeline = {
33
+ push: (frame: VoiceMediaFrame) => Promise<readonly VoiceMediaFrame[]>;
34
+ pushMany: (frames: readonly VoiceMediaFrame[]) => Promise<readonly VoiceMediaFrame[]>;
35
+ transforms: readonly VoiceMediaFrameTransform[];
36
+ };
37
+ export type VoiceMediaTransportAdapter = {
38
+ close?: () => Promise<void> | void;
39
+ connect?: () => Promise<void> | void;
40
+ inputFormat?: AudioFormat;
41
+ name: string;
42
+ onFrame?: (handler: (frame: VoiceMediaFrame) => Promise<void> | void) => () => void;
43
+ outputFormat?: AudioFormat;
44
+ send: (frame: VoiceMediaFrame) => Promise<void> | void;
45
+ };
19
46
  export type VoiceMediaPipelineCalibrationInput = {
20
47
  expectedInputFormat?: AudioFormat;
21
48
  expectedOutputFormat?: AudioFormat;
@@ -52,5 +79,47 @@ export type VoiceMediaPipelineCalibrationReport = {
52
79
  traceLinkedFrames: number;
53
80
  turnCommitFrames: number;
54
81
  };
82
+ export type VoiceMediaVadInput = {
83
+ frames?: readonly VoiceMediaFrame[];
84
+ maxSilenceFrames?: number;
85
+ minSpeechFrames?: number;
86
+ speechEndThreshold?: number;
87
+ speechStartThreshold?: number;
88
+ };
89
+ export type VoiceMediaVadSegment = {
90
+ durationMs?: number;
91
+ endAt?: number;
92
+ frameCount: number;
93
+ segmentId: string;
94
+ sessionId?: string;
95
+ startAt?: number;
96
+ turnId?: string;
97
+ };
98
+ export type VoiceMediaVadReport = {
99
+ checkedAt: number;
100
+ inputAudioFrames: number;
101
+ segments: VoiceMediaVadSegment[];
102
+ status: VoiceMediaPipelineStatus;
103
+ };
104
+ export type VoiceMediaInterruptionInput = {
105
+ frames?: readonly VoiceMediaFrame[];
106
+ maxInterruptionLatencyMs?: number;
107
+ };
108
+ export type VoiceMediaInterruptionReport = {
109
+ checkedAt: number;
110
+ interruptionFrames: number;
111
+ issues: VoiceMediaPipelineCalibrationIssue[];
112
+ latenciesMs: number[];
113
+ status: VoiceMediaPipelineStatus;
114
+ };
55
115
  export declare const createVoiceMediaFrame: (frame: VoiceMediaFrame) => VoiceMediaFrame;
116
+ export declare const buildVoiceMediaResamplingPlan: (input: {
117
+ inputFormat: AudioFormat;
118
+ outputFormat: AudioFormat;
119
+ }) => VoiceMediaResamplingPlan;
120
+ export declare const createVoiceMediaFrameTransformPipeline: (input?: {
121
+ transforms?: readonly VoiceMediaFrameTransform[];
122
+ }) => VoiceMediaFrameTransformPipeline;
123
+ export declare const buildVoiceMediaVadReport: (input?: VoiceMediaVadInput) => VoiceMediaVadReport;
124
+ export declare const buildVoiceMediaInterruptionReport: (input?: VoiceMediaInterruptionInput) => VoiceMediaInterruptionReport;
56
125
  export declare const buildVoiceMediaPipelineCalibrationReport: (input?: VoiceMediaPipelineCalibrationInput) => VoiceMediaPipelineCalibrationReport;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.309",
3
+ "version": "0.0.22-beta.310",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",