@absolutejs/voice 0.0.20 → 0.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +387 -4
- package/dist/angular/index.d.ts +1 -0
- package/dist/angular/index.js +669 -3
- package/dist/angular/voice-controller.service.d.ts +21 -0
- package/dist/audioConditioning.d.ts +3 -0
- package/dist/client/actions.d.ts +7 -0
- package/dist/client/connection.d.ts +5 -0
- package/dist/client/controller.d.ts +2 -0
- package/dist/client/htmxBootstrap.js +576 -167
- package/dist/client/index.d.ts +1 -0
- package/dist/client/index.js +486 -3
- package/dist/client/microphone.d.ts +4 -2
- package/dist/correction.d.ts +16 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +1314 -283
- package/dist/presets.d.ts +13 -0
- package/dist/react/index.d.ts +1 -0
- package/dist/react/index.js +642 -3
- package/dist/react/useVoiceController.d.ts +20 -0
- package/dist/react/useVoiceStream.d.ts +1 -0
- package/dist/store.d.ts +2 -2
- package/dist/svelte/index.d.ts +1 -0
- package/dist/svelte/index.js +607 -3
- package/dist/testing/benchmark.d.ts +36 -0
- package/dist/testing/index.js +1453 -241
- package/dist/testing/sessionBenchmark.d.ts +67 -2
- package/dist/testing/stt.d.ts +1 -0
- package/dist/turnDetection.d.ts +5 -1
- package/dist/turnProfiles.d.ts +6 -0
- package/dist/types.d.ts +198 -8
- package/dist/vue/index.d.ts +1 -0
- package/dist/vue/index.js +660 -3
- package/dist/vue/useVoiceController.d.ts +19 -0
- package/fixtures/README.md +9 -0
- package/fixtures/manifest.json +59 -1
- package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
- package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
- package/package.json +21 -1
|
@@ -1,13 +1,23 @@
|
|
|
1
|
-
import type { STTAdapter } from '../types';
|
|
1
|
+
import type { STTAdapter, VoiceAudioConditioningConfig, VoicePhraseHint, VoiceSTTFallbackConfig, VoiceSTTLifecycle, VoiceTurnCorrectionHandler, VoiceTurnProfile, VoiceTranscriptQuality } from '../types';
|
|
2
2
|
import { type VoiceTranscriptAccuracy } from './accuracy';
|
|
3
3
|
import type { VoiceTestFixture } from './fixtures';
|
|
4
4
|
export type VoiceSessionBenchmarkScenario = VoiceTestFixture & {
|
|
5
5
|
expectedTurnTexts: string[];
|
|
6
|
+
phraseHints?: VoicePhraseHint[];
|
|
6
7
|
reconnectAtChunkIndex?: number;
|
|
7
8
|
reconnectPauseMs?: number;
|
|
8
9
|
silenceMs?: number;
|
|
9
10
|
speechThreshold?: number;
|
|
11
|
+
transcriptStabilityMs?: number;
|
|
10
12
|
transcriptThreshold?: number;
|
|
13
|
+
turnProfile?: VoiceTurnProfile;
|
|
14
|
+
audioConditioning?: VoiceAudioConditioningConfig;
|
|
15
|
+
sttLifecycle?: VoiceSTTLifecycle;
|
|
16
|
+
};
|
|
17
|
+
export type VoiceSessionBenchmarkTraceEntry = {
|
|
18
|
+
atMs: number;
|
|
19
|
+
data?: unknown;
|
|
20
|
+
phase: string;
|
|
11
21
|
};
|
|
12
22
|
export type VoiceSessionBenchmarkTurnResult = {
|
|
13
23
|
actualText: string;
|
|
@@ -15,6 +25,7 @@ export type VoiceSessionBenchmarkTurnResult = {
|
|
|
15
25
|
expectedText?: string;
|
|
16
26
|
index: number;
|
|
17
27
|
passes: boolean;
|
|
28
|
+
quality?: VoiceTranscriptQuality;
|
|
18
29
|
};
|
|
19
30
|
export type VoiceSessionBenchmarkScenarioResult = {
|
|
20
31
|
actualTurns: string[];
|
|
@@ -28,6 +39,7 @@ export type VoiceSessionBenchmarkScenarioResult = {
|
|
|
28
39
|
title: string;
|
|
29
40
|
turnCountDelta: number;
|
|
30
41
|
turnResults: VoiceSessionBenchmarkTurnResult[];
|
|
42
|
+
trace?: VoiceSessionBenchmarkTraceEntry[];
|
|
31
43
|
};
|
|
32
44
|
export type VoiceSessionBenchmarkSummary = {
|
|
33
45
|
adapterId: string;
|
|
@@ -47,15 +59,68 @@ export type VoiceSessionBenchmarkReport = {
|
|
|
47
59
|
scenarios: VoiceSessionBenchmarkScenarioResult[];
|
|
48
60
|
summary: VoiceSessionBenchmarkSummary;
|
|
49
61
|
};
|
|
50
|
-
export
|
|
62
|
+
export type VoiceSessionBenchmarkScenarioAggregate = {
|
|
63
|
+
averageElapsedMs: number;
|
|
64
|
+
averageWordErrorRate: number;
|
|
65
|
+
bestWordErrorRate: number;
|
|
66
|
+
fixtureId: string;
|
|
67
|
+
passCount: number;
|
|
68
|
+
passRate: number;
|
|
69
|
+
reconnectSuccessRate: number;
|
|
70
|
+
runCount: number;
|
|
71
|
+
tags: string[];
|
|
72
|
+
title: string;
|
|
73
|
+
worstWordErrorRate: number;
|
|
74
|
+
};
|
|
75
|
+
export type VoiceSessionBenchmarkSeriesSummary = {
|
|
76
|
+
adapterId: string;
|
|
77
|
+
averageElapsedMs: number;
|
|
78
|
+
averagePassRate: number;
|
|
79
|
+
averageWordErrorRate: number;
|
|
80
|
+
flakyScenarioCount: number;
|
|
81
|
+
generatedRunCount: number;
|
|
82
|
+
reconnectSuccessRate: number;
|
|
83
|
+
scenarioCount: number;
|
|
84
|
+
stableScenarioCount: number;
|
|
85
|
+
totalPassCount: number;
|
|
86
|
+
totalRunCount: number;
|
|
87
|
+
};
|
|
88
|
+
export type VoiceSessionBenchmarkSeriesReport = {
|
|
89
|
+
adapterId: string;
|
|
90
|
+
generatedAt: number;
|
|
91
|
+
runCount: number;
|
|
92
|
+
scenarios: VoiceSessionBenchmarkScenarioAggregate[];
|
|
93
|
+
summary: VoiceSessionBenchmarkSeriesSummary;
|
|
94
|
+
};
|
|
95
|
+
export declare const runVoiceSessionBenchmarkScenario: (adapter: STTAdapter, fixture: VoiceSessionBenchmarkScenario, options?: {
|
|
96
|
+
correctTurn?: VoiceTurnCorrectionHandler;
|
|
97
|
+
sttFallback?: VoiceSTTFallbackConfig;
|
|
98
|
+
trace?: boolean;
|
|
99
|
+
}) => Promise<VoiceSessionBenchmarkScenarioResult>;
|
|
51
100
|
export declare const summarizeVoiceSessionBenchmark: (adapterId: string, scenarios: VoiceSessionBenchmarkScenarioResult[]) => VoiceSessionBenchmarkSummary;
|
|
101
|
+
export declare const summarizeVoiceSessionBenchmarkSeries: (input: {
|
|
102
|
+
adapterId: string;
|
|
103
|
+
reports: VoiceSessionBenchmarkReport[];
|
|
104
|
+
}) => VoiceSessionBenchmarkSeriesReport;
|
|
52
105
|
export declare const runVoiceSessionBenchmark: (input: {
|
|
53
106
|
adapter: STTAdapter;
|
|
54
107
|
adapterId: string;
|
|
108
|
+
correctTurn?: VoiceTurnCorrectionHandler;
|
|
55
109
|
scenarios: VoiceSessionBenchmarkScenario[];
|
|
110
|
+
sttFallback?: VoiceSTTFallbackConfig;
|
|
111
|
+
trace?: boolean;
|
|
56
112
|
}) => Promise<{
|
|
57
113
|
adapterId: string;
|
|
58
114
|
generatedAt: number;
|
|
59
115
|
scenarios: VoiceSessionBenchmarkScenarioResult[];
|
|
60
116
|
summary: VoiceSessionBenchmarkSummary;
|
|
61
117
|
}>;
|
|
118
|
+
export declare const runVoiceSessionBenchmarkSeries: (input: {
|
|
119
|
+
adapter: STTAdapter;
|
|
120
|
+
adapterId: string;
|
|
121
|
+
correctTurn?: VoiceTurnCorrectionHandler;
|
|
122
|
+
runs: number;
|
|
123
|
+
scenarios: VoiceSessionBenchmarkScenario[];
|
|
124
|
+
sttFallback?: VoiceSTTFallbackConfig;
|
|
125
|
+
trace?: boolean;
|
|
126
|
+
}) => Promise<VoiceSessionBenchmarkSeriesReport>;
|
package/dist/testing/stt.d.ts
CHANGED
|
@@ -17,6 +17,7 @@ export type VoiceSTTAdapterHarnessResult = {
|
|
|
17
17
|
finalEvents: VoiceFinalEvent[];
|
|
18
18
|
finalText: string;
|
|
19
19
|
partialEvents: VoicePartialEvent[];
|
|
20
|
+
speechEndedAt: number;
|
|
20
21
|
startedAt: number;
|
|
21
22
|
};
|
|
22
23
|
export declare const runSTTAdapterFixture: (adapter: STTAdapter, fixture: VoiceTestFixture, options?: VoiceSTTAdapterHarnessOptions) => Promise<VoiceSTTAdapterHarnessResult>;
|
package/dist/turnDetection.d.ts
CHANGED
|
@@ -2,4 +2,8 @@ import type { AudioChunk, Transcript } from './types';
|
|
|
2
2
|
export declare const DEFAULT_SILENCE_MS = 700;
|
|
3
3
|
export declare const DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
4
4
|
export declare const measureAudioLevel: (audio: AudioChunk) => number;
|
|
5
|
-
export declare const
|
|
5
|
+
export declare const selectPreferredTranscriptText: (currentText: string, nextText: string) => string;
|
|
6
|
+
export declare const buildTurnText: (transcripts: Transcript[], partialText: string, options?: {
|
|
7
|
+
partialEndedAtMs?: number;
|
|
8
|
+
partialStartedAtMs?: number;
|
|
9
|
+
}) => string;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { VoiceResolvedTurnDetectionConfig, VoiceTurnDetectionConfig, VoiceTurnQualityProfile, VoiceTurnProfile } from './types';
|
|
2
|
+
export declare const TURN_PROFILE_DEFAULTS: Record<VoiceTurnProfile, Omit<VoiceResolvedTurnDetectionConfig, 'profile'>>;
|
|
3
|
+
export declare const QUALITY_PROFILE_DEFAULTS: Record<VoiceTurnQualityProfile, Partial<VoiceResolvedTurnDetectionConfig>>;
|
|
4
|
+
export declare const DEFAULT_TURN_PROFILE: VoiceTurnProfile;
|
|
5
|
+
export declare const DEFAULT_QUALITY_PROFILE: VoiceTurnQualityProfile;
|
|
6
|
+
export declare const resolveTurnDetectionConfig: (config?: VoiceTurnDetectionConfig) => VoiceResolvedTurnDetectionConfig;
|
package/dist/types.d.ts
CHANGED
|
@@ -6,6 +6,12 @@ export type AudioFormat = {
|
|
|
6
6
|
channels: 1 | 2;
|
|
7
7
|
};
|
|
8
8
|
export type AudioChunk = ArrayBuffer | ArrayBufferView;
|
|
9
|
+
export type VoicePhraseHint = {
|
|
10
|
+
text: string;
|
|
11
|
+
aliases?: string[];
|
|
12
|
+
boost?: number;
|
|
13
|
+
metadata?: Record<string, unknown>;
|
|
14
|
+
};
|
|
9
15
|
export type Transcript = {
|
|
10
16
|
id: string;
|
|
11
17
|
text: string;
|
|
@@ -16,6 +22,39 @@ export type Transcript = {
|
|
|
16
22
|
endedAtMs?: number;
|
|
17
23
|
vendor?: string;
|
|
18
24
|
};
|
|
25
|
+
export type VoiceTranscriptQuality = {
|
|
26
|
+
averageConfidence?: number;
|
|
27
|
+
confidenceSampleCount: number;
|
|
28
|
+
correction?: VoiceTurnCorrectionDiagnostics;
|
|
29
|
+
fallbackUsed: boolean;
|
|
30
|
+
finalTranscriptCount: number;
|
|
31
|
+
fallback?: VoiceFallbackDiagnostics;
|
|
32
|
+
partialTranscriptCount: number;
|
|
33
|
+
selectedTranscriptCount: number;
|
|
34
|
+
source: 'fallback' | 'primary';
|
|
35
|
+
};
|
|
36
|
+
export type VoiceTurnCorrectionDiagnostics = {
|
|
37
|
+
attempted: boolean;
|
|
38
|
+
changed: boolean;
|
|
39
|
+
correctedText: string;
|
|
40
|
+
metadata?: Record<string, unknown>;
|
|
41
|
+
originalText: string;
|
|
42
|
+
provider?: string;
|
|
43
|
+
reason?: string;
|
|
44
|
+
};
|
|
45
|
+
export type VoiceFallbackSelectionReason = 'fallback-empty' | 'primary-empty' | 'word-count-margin' | 'confidence-margin' | 'word-count-tiebreak' | 'kept-primary';
|
|
46
|
+
export type VoiceFallbackDiagnostics = {
|
|
47
|
+
attempted: boolean;
|
|
48
|
+
fallbackConfidence?: number;
|
|
49
|
+
fallbackText?: string;
|
|
50
|
+
fallbackWordCount?: number;
|
|
51
|
+
primaryConfidence: number;
|
|
52
|
+
primaryText: string;
|
|
53
|
+
primaryWordCount: number;
|
|
54
|
+
selected: boolean;
|
|
55
|
+
selectionReason: VoiceFallbackSelectionReason;
|
|
56
|
+
trigger: 'empty-turn' | 'low-confidence' | 'empty-or-low-confidence' | 'always';
|
|
57
|
+
};
|
|
19
58
|
export type VoicePartialEvent = {
|
|
20
59
|
type: 'partial';
|
|
21
60
|
transcript: Transcript;
|
|
@@ -58,6 +97,7 @@ export type STTAdapterSession = {
|
|
|
58
97
|
export type STTAdapterOpenOptions = {
|
|
59
98
|
sessionId: string;
|
|
60
99
|
format: AudioFormat;
|
|
100
|
+
phraseHints?: VoicePhraseHint[];
|
|
61
101
|
signal?: AbortSignal;
|
|
62
102
|
};
|
|
63
103
|
export type STTAdapter<TOptions extends STTAdapterOpenOptions = STTAdapterOpenOptions> = {
|
|
@@ -99,6 +139,7 @@ export type RealtimeAdapterSession = {
|
|
|
99
139
|
export type RealtimeAdapterOpenOptions = {
|
|
100
140
|
sessionId: string;
|
|
101
141
|
format: AudioFormat;
|
|
142
|
+
phraseHints?: VoicePhraseHint[];
|
|
102
143
|
signal?: AbortSignal;
|
|
103
144
|
};
|
|
104
145
|
export type RealtimeAdapter<TOptions extends RealtimeAdapterOpenOptions = RealtimeAdapterOpenOptions> = {
|
|
@@ -109,6 +150,7 @@ export type VoiceSessionStatus = 'active' | 'reconnecting' | 'completed' | 'fail
|
|
|
109
150
|
export type VoiceTurnRecord<TResult = unknown> = {
|
|
110
151
|
id: string;
|
|
111
152
|
text: string;
|
|
153
|
+
quality?: VoiceTranscriptQuality;
|
|
112
154
|
transcripts: Transcript[];
|
|
113
155
|
assistantText?: string;
|
|
114
156
|
committedAt: number;
|
|
@@ -123,8 +165,13 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
|
|
|
123
165
|
currentTurn: {
|
|
124
166
|
transcripts: Transcript[];
|
|
125
167
|
partialText: string;
|
|
168
|
+
partialStartedAt?: number;
|
|
169
|
+
partialEndedAt?: number;
|
|
126
170
|
finalText: string;
|
|
127
171
|
lastAudioAt?: number;
|
|
172
|
+
lastSpeechAt?: number;
|
|
173
|
+
lastTranscriptAt?: number;
|
|
174
|
+
silenceStartedAt?: number;
|
|
128
175
|
};
|
|
129
176
|
turns: VoiceTurnRecord<TResult>[];
|
|
130
177
|
committedTurnIds: string[];
|
|
@@ -132,7 +179,14 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
|
|
|
132
179
|
attempts: number;
|
|
133
180
|
lastDisconnectAt?: number;
|
|
134
181
|
};
|
|
182
|
+
lastCommittedTurn?: {
|
|
183
|
+
signature: string;
|
|
184
|
+
text: string;
|
|
185
|
+
transcriptIds: string[];
|
|
186
|
+
committedAt: number;
|
|
187
|
+
};
|
|
135
188
|
metadata?: TMeta;
|
|
189
|
+
scenarioId?: string;
|
|
136
190
|
};
|
|
137
191
|
export type VoiceSessionSummary = {
|
|
138
192
|
id: string;
|
|
@@ -153,6 +207,59 @@ export type VoiceReconnectConfig = {
|
|
|
153
207
|
timeout?: number;
|
|
154
208
|
maxAttempts?: number;
|
|
155
209
|
};
|
|
210
|
+
export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'reliability';
|
|
211
|
+
export type VoiceSTTLifecycle = 'continuous' | 'turn-scoped';
|
|
212
|
+
export type VoiceTurnProfile = 'fast' | 'balanced' | 'long-form';
|
|
213
|
+
export type VoiceTurnQualityProfile = 'general' | 'accent-heavy' | 'noisy-room' | 'short-command';
|
|
214
|
+
export type VoiceTurnFallbackTrigger = 'empty-turn' | 'low-confidence' | 'empty-or-low-confidence' | 'always';
|
|
215
|
+
export type VoiceSTTFallbackConfig = {
|
|
216
|
+
adapter: STTAdapter;
|
|
217
|
+
trigger?: VoiceTurnFallbackTrigger;
|
|
218
|
+
confidenceThreshold?: number;
|
|
219
|
+
minTextLength?: number;
|
|
220
|
+
replayWindowMs?: number;
|
|
221
|
+
settleMs?: number;
|
|
222
|
+
completionTimeoutMs?: number;
|
|
223
|
+
maxAttemptsPerTurn?: number;
|
|
224
|
+
};
|
|
225
|
+
export type VoiceResolvedSTTFallbackConfig = {
|
|
226
|
+
adapter: STTAdapter;
|
|
227
|
+
trigger: VoiceTurnFallbackTrigger;
|
|
228
|
+
confidenceThreshold: number;
|
|
229
|
+
minTextLength: number;
|
|
230
|
+
replayWindowMs: number;
|
|
231
|
+
settleMs: number;
|
|
232
|
+
completionTimeoutMs: number;
|
|
233
|
+
maxAttemptsPerTurn: number;
|
|
234
|
+
};
|
|
235
|
+
export type VoiceTurnDetectionConfig = {
|
|
236
|
+
profile?: VoiceTurnProfile;
|
|
237
|
+
qualityProfile?: VoiceTurnQualityProfile;
|
|
238
|
+
silenceMs?: number;
|
|
239
|
+
speechThreshold?: number;
|
|
240
|
+
transcriptStabilityMs?: number;
|
|
241
|
+
};
|
|
242
|
+
export type VoiceResolvedTurnDetectionConfig = {
|
|
243
|
+
qualityProfile: VoiceTurnQualityProfile;
|
|
244
|
+
profile: VoiceTurnProfile;
|
|
245
|
+
silenceMs: number;
|
|
246
|
+
speechThreshold: number;
|
|
247
|
+
transcriptStabilityMs: number;
|
|
248
|
+
};
|
|
249
|
+
export type VoiceAudioConditioningConfig = {
|
|
250
|
+
enabled?: boolean;
|
|
251
|
+
targetLevel?: number;
|
|
252
|
+
maxGain?: number;
|
|
253
|
+
noiseGateThreshold?: number;
|
|
254
|
+
noiseGateAttenuation?: number;
|
|
255
|
+
};
|
|
256
|
+
export type VoiceResolvedAudioConditioningConfig = {
|
|
257
|
+
enabled: true;
|
|
258
|
+
targetLevel: number;
|
|
259
|
+
maxGain: number;
|
|
260
|
+
noiseGateThreshold: number;
|
|
261
|
+
noiseGateAttenuation: number;
|
|
262
|
+
};
|
|
156
263
|
export type VoiceSocket = {
|
|
157
264
|
send: (data: string | Uint8Array | ArrayBuffer) => void | Promise<void>;
|
|
158
265
|
close: (code?: number, reason?: string) => void | Promise<void>;
|
|
@@ -173,6 +280,26 @@ export type VoiceRouteResult<TResult = unknown> = {
|
|
|
173
280
|
result?: TResult;
|
|
174
281
|
assistantText?: string;
|
|
175
282
|
};
|
|
283
|
+
export type VoiceTurnCorrectionResult = string | {
|
|
284
|
+
text: string;
|
|
285
|
+
reason?: string;
|
|
286
|
+
provider?: string;
|
|
287
|
+
metadata?: Record<string, unknown>;
|
|
288
|
+
};
|
|
289
|
+
export type VoiceTurnCorrectionHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
|
|
290
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
291
|
+
context: TContext;
|
|
292
|
+
fallback?: VoiceFallbackDiagnostics;
|
|
293
|
+
phraseHints: VoicePhraseHint[];
|
|
294
|
+
session: TSession;
|
|
295
|
+
text: string;
|
|
296
|
+
transcripts: Transcript[];
|
|
297
|
+
}) => Promise<VoiceTurnCorrectionResult | void> | VoiceTurnCorrectionResult | void;
|
|
298
|
+
export type VoicePhraseHintResolver<TContext = unknown> = (input: {
|
|
299
|
+
context: TContext;
|
|
300
|
+
scenarioId?: string;
|
|
301
|
+
sessionId: string;
|
|
302
|
+
}) => Promise<VoicePhraseHint[] | void> | VoicePhraseHint[] | void;
|
|
176
303
|
export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
|
|
177
304
|
context: TContext;
|
|
178
305
|
session: TSession;
|
|
@@ -186,6 +313,7 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
|
|
|
186
313
|
session: TSession;
|
|
187
314
|
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
188
315
|
}) => Promise<void> | void;
|
|
316
|
+
correctTurn?: VoiceTurnCorrectionHandler<TContext, TSession, TResult>;
|
|
189
317
|
onTurn: VoiceOnTurnHandler<TContext, TSession, TResult>;
|
|
190
318
|
onComplete: (input: {
|
|
191
319
|
context: TContext;
|
|
@@ -203,16 +331,24 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
|
|
|
203
331
|
export type VoiceNormalizedRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoiceRouteConfig<TContext, TSession, TResult>, 'onTurn'> & {
|
|
204
332
|
onTurn: VoiceOnTurnObjectHandler<TContext, TSession, TResult>;
|
|
205
333
|
};
|
|
334
|
+
export type VoiceScenario = {
|
|
335
|
+
id: string;
|
|
336
|
+
name?: string;
|
|
337
|
+
description?: string;
|
|
338
|
+
metadata?: Record<string, unknown>;
|
|
339
|
+
};
|
|
206
340
|
export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
207
341
|
path: string;
|
|
342
|
+
phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
|
|
343
|
+
preset?: VoiceRuntimePreset;
|
|
208
344
|
stt: STTAdapter;
|
|
345
|
+
sttFallback?: VoiceSTTFallbackConfig;
|
|
346
|
+
sttLifecycle?: VoiceSTTLifecycle;
|
|
209
347
|
tts?: TTSAdapter;
|
|
210
348
|
session: VoiceSessionStore<NoInfer<TSession>>;
|
|
211
349
|
reconnect?: VoiceReconnectConfig;
|
|
212
|
-
turnDetection?:
|
|
213
|
-
|
|
214
|
-
speechThreshold?: number;
|
|
215
|
-
};
|
|
350
|
+
turnDetection?: VoiceTurnDetectionConfig;
|
|
351
|
+
audioConditioning?: VoiceAudioConditioningConfig;
|
|
216
352
|
logger?: VoiceLogger;
|
|
217
353
|
htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
|
|
218
354
|
} & VoiceRouteConfig<TContext, TSession, TResult>;
|
|
@@ -221,12 +357,14 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
|
|
|
221
357
|
context: TContext;
|
|
222
358
|
socket: VoiceSocket;
|
|
223
359
|
stt: STTAdapter;
|
|
360
|
+
sttFallback?: VoiceResolvedSTTFallbackConfig;
|
|
224
361
|
store: VoiceSessionStore<TSession>;
|
|
225
362
|
reconnect: Required<VoiceReconnectConfig>;
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
363
|
+
phraseHints?: VoicePhraseHint[];
|
|
364
|
+
scenarioId?: string;
|
|
365
|
+
sttLifecycle: VoiceSTTLifecycle;
|
|
366
|
+
turnDetection: VoiceResolvedTurnDetectionConfig;
|
|
367
|
+
audioConditioning?: VoiceResolvedAudioConditioningConfig;
|
|
230
368
|
route: VoiceNormalizedRouteConfig<TContext, TSession, TResult>;
|
|
231
369
|
logger?: VoiceLogger;
|
|
232
370
|
};
|
|
@@ -234,6 +372,7 @@ export type CreateVoiceSession = <TContext = unknown, TSession extends VoiceSess
|
|
|
234
372
|
export type VoiceClientStartMessage = {
|
|
235
373
|
type: 'start';
|
|
236
374
|
sessionId?: string;
|
|
375
|
+
scenarioId?: string;
|
|
237
376
|
};
|
|
238
377
|
export type VoiceClientEndTurnMessage = {
|
|
239
378
|
type: 'end_turn';
|
|
@@ -250,6 +389,7 @@ export type VoiceServerSessionMessage = {
|
|
|
250
389
|
type: 'session';
|
|
251
390
|
sessionId: string;
|
|
252
391
|
status: VoiceSessionStatus;
|
|
392
|
+
scenarioId?: string;
|
|
253
393
|
};
|
|
254
394
|
export type VoiceServerPartialMessage = {
|
|
255
395
|
type: 'partial';
|
|
@@ -283,14 +423,27 @@ export type VoiceServerPongMessage = {
|
|
|
283
423
|
export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
|
|
284
424
|
export type VoiceConnectionOptions = {
|
|
285
425
|
protocols?: string[];
|
|
426
|
+
scenarioId?: string;
|
|
286
427
|
reconnect?: boolean;
|
|
287
428
|
maxReconnectAttempts?: number;
|
|
288
429
|
pingInterval?: number;
|
|
289
430
|
sessionId?: string;
|
|
290
431
|
};
|
|
432
|
+
export type VoiceCaptureOptions = {
|
|
433
|
+
channelCount?: 1 | 2;
|
|
434
|
+
onLevel?: (level: number) => void;
|
|
435
|
+
sampleRateHz?: number;
|
|
436
|
+
};
|
|
437
|
+
export type VoiceControllerOptions = {
|
|
438
|
+
preset?: VoiceRuntimePreset;
|
|
439
|
+
connection?: VoiceConnectionOptions;
|
|
440
|
+
capture?: VoiceCaptureOptions;
|
|
441
|
+
autoStopOnComplete?: boolean;
|
|
442
|
+
};
|
|
291
443
|
export type VoiceHTMXRenderInput<TResult = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord> = {
|
|
292
444
|
assistantTexts: string[];
|
|
293
445
|
partial: string;
|
|
446
|
+
scenarioId?: string;
|
|
294
447
|
result?: TResult;
|
|
295
448
|
session?: TSession;
|
|
296
449
|
sessionId?: string;
|
|
@@ -322,6 +475,7 @@ export type VoiceHTMXOptions<TSession extends VoiceSessionRecord = VoiceSessionR
|
|
|
322
475
|
export type VoiceHTMXConfig<TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceHTMXRenderer<TSession, TResult> | VoiceHTMXOptions<TSession, TResult>;
|
|
323
476
|
export type VoiceStreamState<TResult = unknown> = {
|
|
324
477
|
sessionId: string | null;
|
|
478
|
+
scenarioId: string | null;
|
|
325
479
|
status: VoiceSessionStatus | 'idle';
|
|
326
480
|
partial: string;
|
|
327
481
|
turns: VoiceTurnRecord<TResult>[];
|
|
@@ -331,6 +485,10 @@ export type VoiceStreamState<TResult = unknown> = {
|
|
|
331
485
|
};
|
|
332
486
|
export type VoiceStream<TResult = unknown> = {
|
|
333
487
|
close: () => void;
|
|
488
|
+
start: (input?: {
|
|
489
|
+
scenarioId?: string;
|
|
490
|
+
sessionId?: string;
|
|
491
|
+
}) => Promise<void>;
|
|
334
492
|
endTurn: () => void;
|
|
335
493
|
error: string | null;
|
|
336
494
|
getServerSnapshot: () => VoiceStreamState<TResult>;
|
|
@@ -339,8 +497,39 @@ export type VoiceStream<TResult = unknown> = {
|
|
|
339
497
|
partial: string;
|
|
340
498
|
sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
|
|
341
499
|
sessionId: string | null;
|
|
500
|
+
scenarioId: string | null;
|
|
501
|
+
status: VoiceSessionStatus | 'idle';
|
|
502
|
+
subscribe: (subscriber: () => void) => () => void;
|
|
503
|
+
turns: VoiceTurnRecord<TResult>[];
|
|
504
|
+
assistantTexts: string[];
|
|
505
|
+
};
|
|
506
|
+
export type VoiceControllerState<TResult = unknown> = VoiceStreamState<TResult> & {
|
|
507
|
+
isRecording: boolean;
|
|
508
|
+
recordingError: string | null;
|
|
509
|
+
};
|
|
510
|
+
export type VoiceController<TResult = unknown> = {
|
|
511
|
+
bindHTMX: (options: VoiceHTMXBindingOptions) => () => void;
|
|
512
|
+
close: () => void;
|
|
513
|
+
endTurn: () => void;
|
|
514
|
+
start: (input?: {
|
|
515
|
+
scenarioId?: string;
|
|
516
|
+
sessionId?: string;
|
|
517
|
+
}) => Promise<void>;
|
|
518
|
+
error: string | null;
|
|
519
|
+
getServerSnapshot: () => VoiceControllerState<TResult>;
|
|
520
|
+
getSnapshot: () => VoiceControllerState<TResult>;
|
|
521
|
+
isConnected: boolean;
|
|
522
|
+
isRecording: boolean;
|
|
523
|
+
partial: string;
|
|
524
|
+
recordingError: string | null;
|
|
525
|
+
sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
|
|
526
|
+
sessionId: string | null;
|
|
527
|
+
scenarioId: string | null;
|
|
528
|
+
startRecording: () => Promise<void>;
|
|
342
529
|
status: VoiceSessionStatus | 'idle';
|
|
530
|
+
stopRecording: () => void;
|
|
343
531
|
subscribe: (subscriber: () => void) => () => void;
|
|
532
|
+
toggleRecording: () => Promise<void>;
|
|
344
533
|
turns: VoiceTurnRecord<TResult>[];
|
|
345
534
|
assistantTexts: string[];
|
|
346
535
|
};
|
|
@@ -353,6 +542,7 @@ export type VoiceHTMXBindingOptions = {
|
|
|
353
542
|
export type VoiceStoreAction<TResult = unknown> = {
|
|
354
543
|
type: 'session';
|
|
355
544
|
sessionId: string;
|
|
545
|
+
scenarioId?: string;
|
|
356
546
|
status: VoiceSessionStatus;
|
|
357
547
|
} | {
|
|
358
548
|
type: 'partial';
|
package/dist/vue/index.d.ts
CHANGED