@volley/recognition-client-sdk 0.1.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,461 @@
1
+ import { A as ASRRequestConfig, G as GameContextV1, R as RecognitionCallbackUrl, T as TranscriptionResultV1, M as MetadataResultV1, E as ErrorResultV1, a as RealTimeTwoWayWebSocketRecognitionClientConfig, I as IRecognitionClient, C as ClientState, b as IRecognitionClientConfig } from './browser-CDQ_TzeH.js';
2
+ export { k as ASRRequestV1, f as AudioEncoding, h as ControlSignal, h as ControlSignalTypeV1, D as DeepgramModel, F as FunctionCallResultV1, m as GoogleModel, e as IRecognitionClientStats, L as Language, c as RealTimeTwoWayWebSocketRecognitionClient, g as RecognitionContextTypeV1, l as RecognitionProvider, j as RecognitionResultTypeV1, S as SampleRate, d as TranscriptionResult, i as isNormalDisconnection } from './browser-CDQ_TzeH.js';
3
+ import { z } from 'zod';
4
+
5
+ /**
6
+ * Gemini Model Types
7
+ * Based on available models as of January 2025
8
+ *
9
+ * API Version Notes:
10
+ * - Gemini 2.5+ models: Use v1beta API (early access features)
11
+ * - Gemini 2.0 models: Use v1beta API (early access features)
12
+ * - Gemini 1.5 models: Use v1 API (stable, production-ready)
13
+ *
14
+ * @see https://ai.google.dev/gemini-api/docs/models
15
+ * @see https://ai.google.dev/gemini-api/docs/api-versions
16
+ */
17
+ declare enum GeminiModel {
18
+ GEMINI_2_5_PRO = "gemini-2.5-pro",// State-of-the-art thinking model
19
+ GEMINI_2_5_FLASH = "gemini-2.5-flash",// Best price-performance balance
20
+ GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",// Fastest, most cost-efficient
21
+ GEMINI_2_0_FLASH_LATEST = "gemini-2.0-flash-latest",// Auto-updated to latest 2.0 flash
22
+ GEMINI_2_0_FLASH = "gemini-2.0-flash-002",// Specific stable version
23
+ GEMINI_2_0_FLASH_EXP = "gemini-2.0-flash-exp",// Experimental version
24
+ GEMINI_1_5_FLASH = "gemini-1.5-flash",
25
+ GEMINI_1_5_PRO = "gemini-1.5-pro"
26
+ }
27
+
28
+ /**
29
+ * OpenAI Model Types
30
+ */
31
+ declare enum OpenAIModel {
32
+ WHISPER_1 = "whisper-1"
33
+ }
34
+
35
+ /**
36
+ * Standard stage/environment constants used across all services
37
+ */
38
+ declare const STAGES: {
39
+ readonly LOCAL: "local";
40
+ readonly DEV: "dev";
41
+ readonly STAGING: "staging";
42
+ readonly PRODUCTION: "production";
43
+ };
44
+ type Stage = typeof STAGES[keyof typeof STAGES];
45
+
46
+ /**
47
+ * Configuration Builder for Recognition Client
48
+ *
49
+ * Simple builder pattern for RealTimeTwoWayWebSocketRecognitionClientConfig
50
+ */
51
+
52
+ /**
53
+ * Builder for RealTimeTwoWayWebSocketRecognitionClientConfig
54
+ *
55
+ * Provides a fluent API for building client configurations.
56
+ *
57
+ * Example:
58
+ * ```typescript
59
+ * const config = new ConfigBuilder()
60
+ * .url('ws://localhost:3101/ws/v1/recognize')
61
+ * .asrRequestConfig({
62
+ * provider: RecognitionProvider.DEEPGRAM,
63
+ * model: 'nova-2-general'
64
+ * })
65
+ * .onTranscript((result) => console.log(result))
66
+ * .build();
67
+ * ```
68
+ */
69
+ declare class ConfigBuilder {
70
+ private config;
71
+ /**
72
+ * Set the WebSocket URL
73
+ */
74
+ url(url: string): this;
75
+ /**
76
+ * Set ASR request configuration
77
+ */
78
+ asrRequestConfig(config: ASRRequestConfig): this;
79
+ /**
80
+ * Set game context
81
+ */
82
+ gameContext(context: GameContextV1): this;
83
+ /**
84
+ * Set audio utterance ID
85
+ */
86
+ audioUtteranceId(id: string): this;
87
+ /**
88
+ * Set callback URLs
89
+ */
90
+ callbackUrls(urls: RecognitionCallbackUrl[]): this;
91
+ /**
92
+ * Set user ID
93
+ */
94
+ userId(id: string): this;
95
+ /**
96
+ * Set game session ID
97
+ */
98
+ gameSessionId(id: string): this;
99
+ /**
100
+ * Set device ID
101
+ */
102
+ deviceId(id: string): this;
103
+ /**
104
+ * Set account ID
105
+ */
106
+ accountId(id: string): this;
107
+ /**
108
+ * Set question answer ID
109
+ */
110
+ questionAnswerId(id: string): this;
111
+ /**
112
+ * Set platform
113
+ */
114
+ platform(platform: string): this;
115
+ /**
116
+ * Set transcript callback
117
+ */
118
+ onTranscript(callback: (result: TranscriptionResultV1) => void): this;
119
+ /**
120
+ * Set metadata callback
121
+ */
122
+ onMetadata(callback: (metadata: MetadataResultV1) => void): this;
123
+ /**
124
+ * Set error callback
125
+ */
126
+ onError(callback: (error: ErrorResultV1) => void): this;
127
+ /**
128
+ * Set connected callback
129
+ */
130
+ onConnected(callback: () => void): this;
131
+ /**
132
+ * Set disconnected callback
133
+ */
134
+ onDisconnected(callback: (code: number, reason: string) => void): this;
135
+ /**
136
+ * Set high water mark
137
+ */
138
+ highWaterMark(bytes: number): this;
139
+ /**
140
+ * Set low water mark
141
+ */
142
+ lowWaterMark(bytes: number): this;
143
+ /**
144
+ * Set max buffer duration in seconds
145
+ */
146
+ maxBufferDurationSec(seconds: number): this;
147
+ /**
148
+ * Set chunks per second
149
+ */
150
+ chunksPerSecond(chunks: number): this;
151
+ /**
152
+ * Set logger function
153
+ */
154
+ logger(logger: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void): this;
155
+ /**
156
+ * Build the configuration
157
+ */
158
+ build(): RealTimeTwoWayWebSocketRecognitionClientConfig;
159
+ }
160
+
161
+ /**
162
+ * Factory function for creating Recognition Client instances
163
+ */
164
+
165
+ /**
166
+ * Create a recognition client from a configuration object
167
+ *
168
+ * Example:
169
+ * ```typescript
170
+ * const client = createClient({
171
+ * url: 'ws://localhost:3101/ws/v1/recognize',
172
+ * onTranscript: (result) => console.log(result)
173
+ * });
174
+ * ```
175
+ */
176
+ declare function createClient(config: RealTimeTwoWayWebSocketRecognitionClientConfig): IRecognitionClient;
177
+ /**
178
+ * Create a recognition client using the builder pattern
179
+ *
180
+ * Example:
181
+ * ```typescript
182
+ * const client = createClientWithBuilder((builder) =>
183
+ * builder
184
+ * .url('ws://localhost:3101/ws/v1/recognize')
185
+ * .onTranscript((result) => console.log(result))
186
+ * .onError((error) => console.error(error))
187
+ * );
188
+ * ```
189
+ */
190
+ declare function createClientWithBuilder(configure: (builder: ConfigBuilder) => ConfigBuilder): IRecognitionClient;
191
+
192
+ /**
193
+ * VGF-style state schema for game-side recognition state/results management.
194
+ *
195
+ * This schema provides a standardized way for game developers to manage
196
+ * voice recognition state and results in their applications. It supports:
197
+ *
198
+ * STEP 1: Basic transcription flow
199
+ * STEP 2: Mic auto-stop upon correct answer (using partial transcripts)
200
+ * STEP 3: Semantic/function-call outcomes for game actions
201
+ *
202
+ * Ideally this should be part of a more centralized shared type library to free
203
+ * game developers and provide helper functions (VGF? Platform SDK?).
204
+ */
205
+ declare const RecognitionVGFStateSchema: z.ZodObject<{
206
+ audioUtteranceId: z.ZodString;
207
+ startRecordingStatus: z.ZodOptional<z.ZodString>;
208
+ transcriptionStatus: z.ZodOptional<z.ZodString>;
209
+ finalTranscript: z.ZodOptional<z.ZodString>;
210
+ finalConfidence: z.ZodOptional<z.ZodNumber>;
211
+ asrConfig: z.ZodOptional<z.ZodString>;
212
+ startRecordingTimestamp: z.ZodOptional<z.ZodString>;
213
+ finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
214
+ finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
215
+ pendingTranscript: z.ZodDefault<z.ZodOptional<z.ZodString>>;
216
+ pendingConfidence: z.ZodOptional<z.ZodNumber>;
217
+ functionCallMetadata: z.ZodOptional<z.ZodString>;
218
+ functionCallConfidence: z.ZodOptional<z.ZodNumber>;
219
+ finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
220
+ promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
221
+ }, "strip", z.ZodTypeAny, {
222
+ audioUtteranceId: string;
223
+ pendingTranscript: string;
224
+ startRecordingStatus?: string | undefined;
225
+ transcriptionStatus?: string | undefined;
226
+ finalTranscript?: string | undefined;
227
+ finalConfidence?: number | undefined;
228
+ asrConfig?: string | undefined;
229
+ startRecordingTimestamp?: string | undefined;
230
+ finalRecordingTimestamp?: string | undefined;
231
+ finalTranscriptionTimestamp?: string | undefined;
232
+ pendingConfidence?: number | undefined;
233
+ functionCallMetadata?: string | undefined;
234
+ functionCallConfidence?: number | undefined;
235
+ finalFunctionCallTimestamp?: string | undefined;
236
+ promptSlotMap?: Record<string, string[]> | undefined;
237
+ }, {
238
+ audioUtteranceId: string;
239
+ startRecordingStatus?: string | undefined;
240
+ transcriptionStatus?: string | undefined;
241
+ finalTranscript?: string | undefined;
242
+ finalConfidence?: number | undefined;
243
+ asrConfig?: string | undefined;
244
+ startRecordingTimestamp?: string | undefined;
245
+ finalRecordingTimestamp?: string | undefined;
246
+ finalTranscriptionTimestamp?: string | undefined;
247
+ pendingTranscript?: string | undefined;
248
+ pendingConfidence?: number | undefined;
249
+ functionCallMetadata?: string | undefined;
250
+ functionCallConfidence?: number | undefined;
251
+ finalFunctionCallTimestamp?: string | undefined;
252
+ promptSlotMap?: Record<string, string[]> | undefined;
253
+ }>;
254
+ type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
255
+ declare const RecordingStatus: {
256
+ readonly NOT_READY: "NOT_READY";
257
+ readonly READY: "READY";
258
+ readonly RECORDING: "RECORDING";
259
+ readonly FINISHED: "FINISHED";
260
+ };
261
+ type RecordingStatusType = typeof RecordingStatus[keyof typeof RecordingStatus];
262
+ declare const TranscriptionStatus: {
263
+ readonly NOT_STARTED: "NOT_STARTED";
264
+ readonly IN_PROGRESS: "IN_PROGRESS";
265
+ readonly FINALIZED: "FINALIZED";
266
+ readonly ERROR: "ERROR";
267
+ };
268
+ type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus];
269
+ declare function createInitialRecognitionState(audioUtteranceId: string): RecognitionState;
270
+ declare function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean;
271
+
272
+ /**
273
+ * Simplified VGF Recognition Client
274
+ *
275
+ * A thin wrapper around RealTimeTwoWayWebSocketRecognitionClient that maintains
276
+ * a VGF RecognitionState as a pure sink/output of recognition events.
277
+ *
278
+ * The VGF state is updated based on events but never influences client behavior.
279
+ * All functionality is delegated to the underlying client.
280
+ */
281
+
282
+ /**
283
+ * Configuration for SimplifiedVGFRecognitionClient
284
+ */
285
+ interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
286
+ /**
287
+ * Callback invoked whenever the VGF state changes
288
+ * Use this to update your UI or React state
289
+ */
290
+ onStateChange?: (state: RecognitionState) => void;
291
+ /**
292
+ * Optional initial state to restore from a previous session
293
+ * If provided, audioUtteranceId will be extracted and used
294
+ */
295
+ initialState?: RecognitionState;
296
+ }
297
+ /**
298
+ * Interface for SimplifiedVGFRecognitionClient
299
+ *
300
+ * A simplified client that maintains VGF state for game developers.
301
+ * All methods from the underlying client are available, plus VGF state management.
302
+ */
303
+ interface ISimplifiedVGFRecognitionClient {
304
+ /**
305
+ * Connect to the recognition service WebSocket
306
+ * @returns Promise that resolves when connected and ready
307
+ */
308
+ connect(): Promise<void>;
309
+ /**
310
+ * Send audio data for transcription
311
+ * @param audioData - PCM audio data as ArrayBuffer or typed array
312
+ */
313
+ sendAudio(audioData: ArrayBuffer | ArrayBufferView): void;
314
+ /**
315
+ * Stop recording and wait for final transcription
316
+ * @returns Promise that resolves when transcription is complete
317
+ */
318
+ stopRecording(): Promise<void>;
319
+ /**
320
+ * Get the current VGF recognition state
321
+ * @returns Current RecognitionState with all transcription data
322
+ */
323
+ getVGFState(): RecognitionState;
324
+ /**
325
+ * Check if connected to the WebSocket
326
+ */
327
+ isConnected(): boolean;
328
+ /**
329
+ * Check if currently connecting
330
+ */
331
+ isConnecting(): boolean;
332
+ /**
333
+ * Check if currently stopping
334
+ */
335
+ isStopping(): boolean;
336
+ /**
337
+ * Check if transcription has finished
338
+ */
339
+ isTranscriptionFinished(): boolean;
340
+ /**
341
+ * Check if the audio buffer has overflowed
342
+ */
343
+ isBufferOverflowing(): boolean;
344
+ /**
345
+ * Get the audio utterance ID for this session
346
+ */
347
+ getAudioUtteranceId(): string;
348
+ /**
349
+ * Get the underlying client state (for advanced usage)
350
+ */
351
+ getState(): ClientState;
352
+ }
353
+ /**
354
+ * This wrapper ONLY maintains VGF state as a sink.
355
+ * All actual functionality is delegated to the underlying client.
356
+ */
357
+ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitionClient {
358
+ private client;
359
+ private state;
360
+ private isRecordingAudio;
361
+ private stateChangeCallback;
362
+ constructor(config: SimplifiedVGFClientConfig);
363
+ connect(): Promise<void>;
364
+ sendAudio(audioData: ArrayBuffer | ArrayBufferView): void;
365
+ stopRecording(): Promise<void>;
366
+ getAudioUtteranceId(): string;
367
+ getState(): ClientState;
368
+ isConnected(): boolean;
369
+ isConnecting(): boolean;
370
+ isStopping(): boolean;
371
+ isTranscriptionFinished(): boolean;
372
+ isBufferOverflowing(): boolean;
373
+ getVGFState(): RecognitionState;
374
+ private notifyStateChange;
375
+ }
376
+ /**
377
+ * Factory function for creating simplified client
378
+ * Usage examples:
379
+ *
380
+ * // Basic usage
381
+ * const client = createSimplifiedVGFClient({
382
+ * asrRequestConfig: { provider: 'deepgram', language: 'en' },
383
+ * onStateChange: (state) => {
384
+ * console.log('VGF State updated:', state);
385
+ * // Update React state, game UI, etc.
386
+ * }
387
+ * });
388
+ *
389
+ * // With initial state (e.g., restoring from previous session)
390
+ * const client = createSimplifiedVGFClient({
391
+ * asrRequestConfig: { provider: 'deepgram', language: 'en' },
392
+ * initialState: previousState, // Will use audioUtteranceId from state
393
+ * onStateChange: (state) => setVGFState(state)
394
+ * });
395
+ *
396
+ * // With initial state containing promptSlotMap for enhanced recognition
397
+ * const stateWithSlots: RecognitionState = {
398
+ * audioUtteranceId: 'session-123',
399
+ * promptSlotMap: {
400
+ * 'song_title': ['one time', 'baby'],
401
+ * 'artists': ['justin bieber']
402
+ * }
403
+ * };
404
+ * const client = createSimplifiedVGFClient({
405
+ * asrRequestConfig: { provider: 'deepgram', language: 'en' },
406
+ * gameContext: {
407
+ * type: RecognitionContextTypeV1.GAME_CONTEXT,
408
+ * gameId: 'music-quiz', // Your game's ID
409
+ * gamePhase: 'song-guessing' // Current game phase
410
+ * },
411
+ * initialState: stateWithSlots, // promptSlotMap will be added to gameContext
412
+ * onStateChange: (state) => setVGFState(state)
413
+ * });
414
+ *
415
+ * await client.connect();
416
+ * client.sendAudio(audioData);
417
+ * // VGF state automatically updates based on transcription results
418
+ */
419
+ declare function createSimplifiedVGFClient(config: SimplifiedVGFClientConfig): ISimplifiedVGFRecognitionClient;
420
+
421
+ /**
422
+ * Base URL schema shared across service endpoint helpers.
423
+ */
424
+ type ServiceBaseUrls = {
425
+ httpBase: string;
426
+ wsBase: string;
427
+ };
428
+ /**
429
+ * Base URL mappings keyed by stage.
430
+ */
431
+ declare const RECOGNITION_SERVICE_BASES: Record<Stage, ServiceBaseUrls>;
432
+ declare const RECOGNITION_CONDUCTOR_BASES: Record<Stage, ServiceBaseUrls>;
433
+ /**
434
+ * Normalize arbitrary stage input into a known `Stage`, defaulting to `local`.
435
+ */
436
+ declare function normalizeStage(input?: Stage | string | null | undefined): Stage;
437
+ /**
438
+ * Resolve the recognition-service base URLs for a given stage.
439
+ */
440
+ declare function getRecognitionServiceBase(stage?: Stage | string | null | undefined): ServiceBaseUrls;
441
+ /**
442
+ * Convenience helper for retrieving the HTTP base URL.
443
+ */
444
+ declare function getRecognitionServiceHttpBase(stage?: Stage | string | null | undefined): string;
445
+ /**
446
+ * Convenience helper for retrieving the WebSocket base URL.
447
+ */
448
+ declare function getRecognitionServiceWsBase(stage?: Stage | string | null | undefined): string;
449
+ /**
450
+ * Expose hostname lookup separately for callers that need raw host strings.
451
+ */
452
+ declare function getRecognitionServiceHost(stage?: Stage | string | null | undefined): string;
453
+ /**
454
+ * Resolve the recognition-conductor base URLs for a given stage.
455
+ */
456
+ declare function getRecognitionConductorBase(stage?: Stage | string | null | undefined): ServiceBaseUrls;
457
+ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null | undefined): string;
458
+ declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
459
+ declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
460
+
461
+ export { ASRRequestConfig, ClientState, ConfigBuilder, ErrorResultV1, GameContextV1, GeminiModel, IRecognitionClient, IRecognitionClientConfig, type ISimplifiedVGFRecognitionClient, MetadataResultV1, OpenAIModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, type RecordingStatusType, type SimplifiedVGFClientConfig, SimplifiedVGFRecognitionClient, TranscriptionResultV1, TranscriptionStatus, type TranscriptionStatusType, createClient, createClientWithBuilder, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, isValidRecordingStatusTransition, normalizeStage };