@absolutejs/voice 0.0.22-beta.309 → 0.0.22-beta.310
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -2
- package/dist/index.js +139 -0
- package/dist/mediaPipeline.d.ts +69 -0
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -15,8 +15,8 @@ export type { VoiceRealtimeChannelAssertionInput, VoiceRealtimeChannelAssertionR
|
|
|
15
15
|
export { assertVoiceRealtimeProviderContractEvidence, buildVoiceRealtimeProviderContractMatrix, createVoiceRealtimeProviderContractMatrixPreset, createVoiceRealtimeProviderContractRoutes, evaluateVoiceRealtimeProviderContractEvidence, renderVoiceRealtimeProviderContractHTML } from './realtimeProviderContracts';
|
|
16
16
|
export type { VoiceRealtimeProviderContractAssertionInput, VoiceRealtimeProviderContractAssertionReport, VoiceRealtimeProviderContractCapability, VoiceRealtimeProviderContractCheck, VoiceRealtimeProviderContractDefinition, VoiceRealtimeProviderContractMatrixPresetOptions, VoiceRealtimeProviderContractMatrixInput, VoiceRealtimeProviderContractMatrixReport, VoiceRealtimeProviderContractRoutesOptions, VoiceRealtimeProviderContractRow, VoiceRealtimeProviderPresetProvider, VoiceRealtimeProviderContractStatus } from './realtimeProviderContracts';
|
|
17
17
|
export { buildVoiceDiagnosticsMarkdown, createVoiceDiagnosticsRoutes, resolveVoiceDiagnosticsTraceFilter } from './diagnosticsRoutes';
|
|
18
|
-
export { buildVoiceMediaPipelineCalibrationReport, createVoiceMediaFrame } from './mediaPipeline';
|
|
19
|
-
export type { VoiceMediaFrame, VoiceMediaFrameKind, VoiceMediaFrameSource, VoiceMediaPipelineCalibrationInput, VoiceMediaPipelineCalibrationIssue, VoiceMediaPipelineCalibrationReport, VoiceMediaPipelineStatus } from './mediaPipeline';
|
|
18
|
+
export { buildVoiceMediaInterruptionReport, buildVoiceMediaPipelineCalibrationReport, buildVoiceMediaResamplingPlan, buildVoiceMediaVadReport, createVoiceMediaFrame, createVoiceMediaFrameTransformPipeline } from './mediaPipeline';
|
|
19
|
+
export type { VoiceMediaFrame, VoiceMediaFrameKind, VoiceMediaFrameSource, VoiceMediaFrameTransform, VoiceMediaFrameTransformPipeline, VoiceMediaInterruptionInput, VoiceMediaInterruptionReport, VoiceMediaPipelineCalibrationInput, VoiceMediaPipelineCalibrationIssue, VoiceMediaPipelineCalibrationReport, VoiceMediaPipelineStatus, VoiceMediaResamplingPlan, VoiceMediaTransportAdapter, VoiceMediaVadInput, VoiceMediaVadReport, VoiceMediaVadSegment } from './mediaPipeline';
|
|
20
20
|
export { buildVoiceDemoReadyReport, createVoiceDemoReadyRoutes, renderVoiceDemoReadyHTML } from './demoReadyRoutes';
|
|
21
21
|
export { buildVoiceDeliverySinkReport, createVoiceDeliverySinkDescriptor, createVoiceDeliverySinkPair, createVoiceDeliverySinkRoutes, createVoiceFileDeliverySink, createVoicePostgresDeliverySink, createVoiceS3DeliverySink, createVoiceSQLiteDeliverySink, createVoiceWebhookDeliverySink, renderVoiceDeliverySinkHTML } from './deliverySinkRoutes';
|
|
22
22
|
export { buildVoiceOpsActionHistoryReport, createVoiceOpsActionAuditRoutes, recordVoiceOpsActionAudit, renderVoiceOpsActionHistoryHTML } from './opsActionAuditRoutes';
|
package/dist/index.js
CHANGED
|
@@ -11620,6 +11620,141 @@ var numericMetadata = (frame, key) => {
|
|
|
11620
11620
|
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
|
11621
11621
|
};
|
|
11622
11622
|
var createVoiceMediaFrame = (frame) => frame;
|
|
11623
|
+
var buildVoiceMediaResamplingPlan = (input) => {
|
|
11624
|
+
const required = !formatMatches2(input.inputFormat, input.outputFormat);
|
|
11625
|
+
return {
|
|
11626
|
+
inputFormat: input.inputFormat,
|
|
11627
|
+
outputFormat: input.outputFormat,
|
|
11628
|
+
ratio: input.outputFormat.sampleRateHz / input.inputFormat.sampleRateHz,
|
|
11629
|
+
required,
|
|
11630
|
+
status: input.inputFormat.container === input.outputFormat.container && input.inputFormat.encoding === input.outputFormat.encoding && input.inputFormat.channels === input.outputFormat.channels ? "pass" : "warn"
|
|
11631
|
+
};
|
|
11632
|
+
};
|
|
11633
|
+
var createVoiceMediaFrameTransformPipeline = (input = {}) => {
|
|
11634
|
+
const transforms = input.transforms ?? [];
|
|
11635
|
+
const push = async (frame) => {
|
|
11636
|
+
let frames = [frame];
|
|
11637
|
+
for (const transform of transforms) {
|
|
11638
|
+
const nextFrames = [];
|
|
11639
|
+
for (const current of frames) {
|
|
11640
|
+
const transformed = await transform.transform(current);
|
|
11641
|
+
if (transformed === undefined) {
|
|
11642
|
+
continue;
|
|
11643
|
+
}
|
|
11644
|
+
if (Array.isArray(transformed)) {
|
|
11645
|
+
nextFrames.push(...transformed);
|
|
11646
|
+
} else {
|
|
11647
|
+
nextFrames.push(transformed);
|
|
11648
|
+
}
|
|
11649
|
+
}
|
|
11650
|
+
frames = nextFrames;
|
|
11651
|
+
}
|
|
11652
|
+
return frames;
|
|
11653
|
+
};
|
|
11654
|
+
return {
|
|
11655
|
+
push,
|
|
11656
|
+
pushMany: async (frames) => {
|
|
11657
|
+
const output = [];
|
|
11658
|
+
for (const frame of frames) {
|
|
11659
|
+
output.push(...await push(frame));
|
|
11660
|
+
}
|
|
11661
|
+
return output;
|
|
11662
|
+
},
|
|
11663
|
+
transforms
|
|
11664
|
+
};
|
|
11665
|
+
};
|
|
11666
|
+
var speechProbability = (frame) => {
|
|
11667
|
+
if (frame.metadata?.isSpeech === true) {
|
|
11668
|
+
return 1;
|
|
11669
|
+
}
|
|
11670
|
+
if (frame.metadata?.isSpeech === false) {
|
|
11671
|
+
return 0;
|
|
11672
|
+
}
|
|
11673
|
+
for (const key of ["speechProbability", "voiceProbability", "rms", "energy"]) {
|
|
11674
|
+
const value = numericMetadata(frame, key);
|
|
11675
|
+
if (value !== undefined) {
|
|
11676
|
+
return value;
|
|
11677
|
+
}
|
|
11678
|
+
}
|
|
11679
|
+
return 0;
|
|
11680
|
+
};
|
|
11681
|
+
var buildVoiceMediaVadReport = (input = {}) => {
|
|
11682
|
+
const frames = (input.frames ?? []).filter((frame) => frame.kind === "input-audio");
|
|
11683
|
+
const speechStartThreshold = input.speechStartThreshold ?? 0.6;
|
|
11684
|
+
const speechEndThreshold = input.speechEndThreshold ?? 0.35;
|
|
11685
|
+
const minSpeechFrames = input.minSpeechFrames ?? 1;
|
|
11686
|
+
const maxSilenceFrames = input.maxSilenceFrames ?? 1;
|
|
11687
|
+
const segments = [];
|
|
11688
|
+
let activeFrames = [];
|
|
11689
|
+
let silenceFrames = 0;
|
|
11690
|
+
const closeSegment = () => {
|
|
11691
|
+
if (activeFrames.length < minSpeechFrames) {
|
|
11692
|
+
activeFrames = [];
|
|
11693
|
+
silenceFrames = 0;
|
|
11694
|
+
return;
|
|
11695
|
+
}
|
|
11696
|
+
const first = activeFrames[0];
|
|
11697
|
+
const last = activeFrames.at(-1);
|
|
11698
|
+
if (!first) {
|
|
11699
|
+
return;
|
|
11700
|
+
}
|
|
11701
|
+
segments.push({
|
|
11702
|
+
durationMs: first.at !== undefined && last?.at !== undefined ? last.at - first.at + (last.durationMs ?? 0) : undefined,
|
|
11703
|
+
endAt: last?.at !== undefined ? last.at + (last.durationMs ?? 0) : undefined,
|
|
11704
|
+
frameCount: activeFrames.length,
|
|
11705
|
+
segmentId: `vad:${String(segments.length + 1)}`,
|
|
11706
|
+
sessionId: first.sessionId,
|
|
11707
|
+
startAt: first.at,
|
|
11708
|
+
turnId: first.turnId
|
|
11709
|
+
});
|
|
11710
|
+
activeFrames = [];
|
|
11711
|
+
silenceFrames = 0;
|
|
11712
|
+
};
|
|
11713
|
+
for (const frame of frames) {
|
|
11714
|
+
const probability = speechProbability(frame);
|
|
11715
|
+
if (activeFrames.length === 0) {
|
|
11716
|
+
if (probability >= speechStartThreshold) {
|
|
11717
|
+
activeFrames.push(frame);
|
|
11718
|
+
}
|
|
11719
|
+
continue;
|
|
11720
|
+
}
|
|
11721
|
+
activeFrames.push(frame);
|
|
11722
|
+
if (probability <= speechEndThreshold) {
|
|
11723
|
+
silenceFrames += 1;
|
|
11724
|
+
} else {
|
|
11725
|
+
silenceFrames = 0;
|
|
11726
|
+
}
|
|
11727
|
+
if (silenceFrames > maxSilenceFrames) {
|
|
11728
|
+
closeSegment();
|
|
11729
|
+
}
|
|
11730
|
+
}
|
|
11731
|
+
closeSegment();
|
|
11732
|
+
return {
|
|
11733
|
+
checkedAt: Date.now(),
|
|
11734
|
+
inputAudioFrames: frames.length,
|
|
11735
|
+
segments,
|
|
11736
|
+
status: frames.length === 0 ? "warn" : "pass"
|
|
11737
|
+
};
|
|
11738
|
+
};
|
|
11739
|
+
var buildVoiceMediaInterruptionReport = (input = {}) => {
|
|
11740
|
+
const issues = [];
|
|
11741
|
+
const interruptionFrames = (input.frames ?? []).filter((frame) => frame.kind === "interruption");
|
|
11742
|
+
const latenciesMs = interruptionFrames.map((frame) => frame.latencyMs).filter((latency) => typeof latency === "number");
|
|
11743
|
+
const maxInterruptionLatencyMs = input.maxInterruptionLatencyMs;
|
|
11744
|
+
if (interruptionFrames.length === 0) {
|
|
11745
|
+
pushIssue(issues, "warning", "media.interruption_missing", "No interruption frame was observed.");
|
|
11746
|
+
}
|
|
11747
|
+
if (maxInterruptionLatencyMs !== undefined && latenciesMs.some((latency) => latency > maxInterruptionLatencyMs)) {
|
|
11748
|
+
pushIssue(issues, "error", "media.interruption_latency", `Interruption latency exceeded ${String(maxInterruptionLatencyMs)}ms.`);
|
|
11749
|
+
}
|
|
11750
|
+
return {
|
|
11751
|
+
checkedAt: Date.now(),
|
|
11752
|
+
interruptionFrames: interruptionFrames.length,
|
|
11753
|
+
issues,
|
|
11754
|
+
latenciesMs,
|
|
11755
|
+
status: issues.some((issue) => issue.severity === "error") ? "fail" : issues.length > 0 ? "warn" : "pass"
|
|
11756
|
+
};
|
|
11757
|
+
};
|
|
11623
11758
|
var buildVoiceMediaPipelineCalibrationReport = (input = {}) => {
|
|
11624
11759
|
const frames = input.frames ?? [];
|
|
11625
11760
|
const issues = [];
|
|
@@ -34456,6 +34591,7 @@ export {
|
|
|
34456
34591
|
createVoiceMemoryAuditSinkDeliveryStore,
|
|
34457
34592
|
createVoiceMemoryAuditEventStore,
|
|
34458
34593
|
createVoiceMemoryAssistantMemoryStore,
|
|
34594
|
+
createVoiceMediaFrameTransformPipeline,
|
|
34459
34595
|
createVoiceMediaFrame,
|
|
34460
34596
|
createVoiceLiveOpsRoutes,
|
|
34461
34597
|
createVoiceLiveOpsController,
|
|
@@ -34605,7 +34741,10 @@ export {
|
|
|
34605
34741
|
buildVoiceObservabilityExport,
|
|
34606
34742
|
buildVoiceObservabilityArtifactIndex,
|
|
34607
34743
|
buildVoiceMonitorRunReport,
|
|
34744
|
+
buildVoiceMediaVadReport,
|
|
34745
|
+
buildVoiceMediaResamplingPlan,
|
|
34608
34746
|
buildVoiceMediaPipelineCalibrationReport,
|
|
34747
|
+
buildVoiceMediaInterruptionReport,
|
|
34609
34748
|
buildVoiceLiveOpsControlState,
|
|
34610
34749
|
buildVoiceLatencySLOGate,
|
|
34611
34750
|
buildVoiceIncidentBundle,
|
package/dist/mediaPipeline.d.ts
CHANGED
|
@@ -2,6 +2,13 @@ import type { AudioFormat } from './types';
|
|
|
2
2
|
export type VoiceMediaFrameKind = 'assistant-audio' | 'input-audio' | 'interruption' | 'metadata' | 'transcript' | 'turn-commit';
|
|
3
3
|
export type VoiceMediaFrameSource = 'browser' | 'provider' | 'telephony' | 'voice-runtime';
|
|
4
4
|
export type VoiceMediaPipelineStatus = 'fail' | 'pass' | 'warn';
|
|
5
|
+
export type VoiceMediaResamplingPlan = {
|
|
6
|
+
inputFormat: AudioFormat;
|
|
7
|
+
outputFormat: AudioFormat;
|
|
8
|
+
ratio: number;
|
|
9
|
+
required: boolean;
|
|
10
|
+
status: VoiceMediaPipelineStatus;
|
|
11
|
+
};
|
|
5
12
|
export type VoiceMediaFrame = {
|
|
6
13
|
at?: number;
|
|
7
14
|
audio?: ArrayBuffer | ArrayBufferView;
|
|
@@ -16,6 +23,26 @@ export type VoiceMediaFrame = {
|
|
|
16
23
|
traceEventId?: string;
|
|
17
24
|
turnId?: string;
|
|
18
25
|
};
|
|
26
|
+
export type VoiceMediaFrameTransform = {
|
|
27
|
+
inputFormat?: AudioFormat;
|
|
28
|
+
name: string;
|
|
29
|
+
outputFormat?: AudioFormat;
|
|
30
|
+
transform: (frame: VoiceMediaFrame) => VoiceMediaFrame | readonly VoiceMediaFrame[] | undefined | Promise<VoiceMediaFrame | readonly VoiceMediaFrame[] | undefined>;
|
|
31
|
+
};
|
|
32
|
+
export type VoiceMediaFrameTransformPipeline = {
|
|
33
|
+
push: (frame: VoiceMediaFrame) => Promise<readonly VoiceMediaFrame[]>;
|
|
34
|
+
pushMany: (frames: readonly VoiceMediaFrame[]) => Promise<readonly VoiceMediaFrame[]>;
|
|
35
|
+
transforms: readonly VoiceMediaFrameTransform[];
|
|
36
|
+
};
|
|
37
|
+
export type VoiceMediaTransportAdapter = {
|
|
38
|
+
close?: () => Promise<void> | void;
|
|
39
|
+
connect?: () => Promise<void> | void;
|
|
40
|
+
inputFormat?: AudioFormat;
|
|
41
|
+
name: string;
|
|
42
|
+
onFrame?: (handler: (frame: VoiceMediaFrame) => Promise<void> | void) => () => void;
|
|
43
|
+
outputFormat?: AudioFormat;
|
|
44
|
+
send: (frame: VoiceMediaFrame) => Promise<void> | void;
|
|
45
|
+
};
|
|
19
46
|
export type VoiceMediaPipelineCalibrationInput = {
|
|
20
47
|
expectedInputFormat?: AudioFormat;
|
|
21
48
|
expectedOutputFormat?: AudioFormat;
|
|
@@ -52,5 +79,47 @@ export type VoiceMediaPipelineCalibrationReport = {
|
|
|
52
79
|
traceLinkedFrames: number;
|
|
53
80
|
turnCommitFrames: number;
|
|
54
81
|
};
|
|
82
|
+
export type VoiceMediaVadInput = {
|
|
83
|
+
frames?: readonly VoiceMediaFrame[];
|
|
84
|
+
maxSilenceFrames?: number;
|
|
85
|
+
minSpeechFrames?: number;
|
|
86
|
+
speechEndThreshold?: number;
|
|
87
|
+
speechStartThreshold?: number;
|
|
88
|
+
};
|
|
89
|
+
export type VoiceMediaVadSegment = {
|
|
90
|
+
durationMs?: number;
|
|
91
|
+
endAt?: number;
|
|
92
|
+
frameCount: number;
|
|
93
|
+
segmentId: string;
|
|
94
|
+
sessionId?: string;
|
|
95
|
+
startAt?: number;
|
|
96
|
+
turnId?: string;
|
|
97
|
+
};
|
|
98
|
+
export type VoiceMediaVadReport = {
|
|
99
|
+
checkedAt: number;
|
|
100
|
+
inputAudioFrames: number;
|
|
101
|
+
segments: VoiceMediaVadSegment[];
|
|
102
|
+
status: VoiceMediaPipelineStatus;
|
|
103
|
+
};
|
|
104
|
+
export type VoiceMediaInterruptionInput = {
|
|
105
|
+
frames?: readonly VoiceMediaFrame[];
|
|
106
|
+
maxInterruptionLatencyMs?: number;
|
|
107
|
+
};
|
|
108
|
+
export type VoiceMediaInterruptionReport = {
|
|
109
|
+
checkedAt: number;
|
|
110
|
+
interruptionFrames: number;
|
|
111
|
+
issues: VoiceMediaPipelineCalibrationIssue[];
|
|
112
|
+
latenciesMs: number[];
|
|
113
|
+
status: VoiceMediaPipelineStatus;
|
|
114
|
+
};
|
|
55
115
|
export declare const createVoiceMediaFrame: (frame: VoiceMediaFrame) => VoiceMediaFrame;
|
|
116
|
+
export declare const buildVoiceMediaResamplingPlan: (input: {
|
|
117
|
+
inputFormat: AudioFormat;
|
|
118
|
+
outputFormat: AudioFormat;
|
|
119
|
+
}) => VoiceMediaResamplingPlan;
|
|
120
|
+
export declare const createVoiceMediaFrameTransformPipeline: (input?: {
|
|
121
|
+
transforms?: readonly VoiceMediaFrameTransform[];
|
|
122
|
+
}) => VoiceMediaFrameTransformPipeline;
|
|
123
|
+
export declare const buildVoiceMediaVadReport: (input?: VoiceMediaVadInput) => VoiceMediaVadReport;
|
|
124
|
+
export declare const buildVoiceMediaInterruptionReport: (input?: VoiceMediaInterruptionInput) => VoiceMediaInterruptionReport;
|
|
56
125
|
export declare const buildVoiceMediaPipelineCalibrationReport: (input?: VoiceMediaPipelineCalibrationInput) => VoiceMediaPipelineCalibrationReport;
|