@volley/recognition-client-sdk 0.1.200
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +168 -0
- package/dist/browser-CDQ_TzeH.d.ts +1039 -0
- package/dist/index.d.ts +461 -0
- package/dist/index.js +2332 -0
- package/dist/index.js.map +1 -0
- package/dist/recog-client-sdk.browser.d.ts +2 -0
- package/dist/recog-client-sdk.browser.js +1843 -0
- package/dist/recog-client-sdk.browser.js.map +1 -0
- package/package.json +73 -0
- package/src/browser.ts +24 -0
- package/src/config-builder.ts +213 -0
- package/src/factory.ts +43 -0
- package/src/index.ts +86 -0
- package/src/recognition-client.spec.ts +551 -0
- package/src/recognition-client.ts +595 -0
- package/src/recognition-client.types.ts +260 -0
- package/src/simplified-vgf-recognition-client.spec.ts +671 -0
- package/src/simplified-vgf-recognition-client.ts +339 -0
- package/src/utils/audio-ring-buffer.ts +170 -0
- package/src/utils/message-handler.ts +131 -0
- package/src/utils/url-builder.ts +70 -0
- package/src/vgf-recognition-mapper.ts +225 -0
- package/src/vgf-recognition-state.ts +89 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Recognition Client Types
|
|
3
|
+
*
|
|
4
|
+
* Type definitions and interfaces for the recognition client SDK.
|
|
5
|
+
* These interfaces enable dependency injection, testing, and alternative implementations.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import {
|
|
9
|
+
TranscriptionResultV1,
|
|
10
|
+
FunctionCallResultV1,
|
|
11
|
+
MetadataResultV1,
|
|
12
|
+
ErrorResultV1,
|
|
13
|
+
ASRRequestConfig,
|
|
14
|
+
GameContextV1
|
|
15
|
+
} from '@recog/shared-types';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Client connection state enum
|
|
19
|
+
* Represents the various states a recognition client can be in during its lifecycle
|
|
20
|
+
*/
|
|
21
|
+
export enum ClientState {
|
|
22
|
+
/** Initial state, no connection established */
|
|
23
|
+
INITIAL = 'initial',
|
|
24
|
+
|
|
25
|
+
/** Actively establishing WebSocket connection */
|
|
26
|
+
CONNECTING = 'connecting',
|
|
27
|
+
|
|
28
|
+
/** WebSocket connected but waiting for server ready signal */
|
|
29
|
+
CONNECTED = 'connected',
|
|
30
|
+
|
|
31
|
+
/** Server ready, can send audio */
|
|
32
|
+
READY = 'ready',
|
|
33
|
+
|
|
34
|
+
/** Sent stop signal, waiting for final transcript */
|
|
35
|
+
STOPPING = 'stopping',
|
|
36
|
+
|
|
37
|
+
/** Connection closed normally after stop */
|
|
38
|
+
STOPPED = 'stopped',
|
|
39
|
+
|
|
40
|
+
/** Connection failed or lost unexpectedly */
|
|
41
|
+
FAILED = 'failed'
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Callback URL configuration with message type filtering
|
|
46
|
+
*/
|
|
47
|
+
export interface RecognitionCallbackUrl {
|
|
48
|
+
/** The callback URL endpoint */
|
|
49
|
+
url: string;
|
|
50
|
+
|
|
51
|
+
/** Array of message types to send to this URL. If empty/undefined, all types are sent */
|
|
52
|
+
messageTypes?: Array<string | number>;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Legacy alias for backward compatibility
|
|
56
|
+
export type IRecognitionCallbackUrl = RecognitionCallbackUrl;
|
|
57
|
+
|
|
58
|
+
export interface IRecognitionClientConfig {
|
|
59
|
+
/**
|
|
60
|
+
* WebSocket endpoint URL (optional - defaults to production)
|
|
61
|
+
*
|
|
62
|
+
* For different stages, use the helper function:
|
|
63
|
+
* ```typescript
|
|
64
|
+
* import { getRecognitionServiceBase } from '@recog/client-sdk-ts';
|
|
65
|
+
* const base = getRecognitionServiceBase('staging'); // or 'dev', 'production'
|
|
66
|
+
* const url = `${base.wsBase}/ws/v1/recognize`;
|
|
67
|
+
* ```
|
|
68
|
+
*/
|
|
69
|
+
url?: string;
|
|
70
|
+
|
|
71
|
+
/** ASR configuration (provider, model, language, etc.) - optional */
|
|
72
|
+
asrRequestConfig?: ASRRequestConfig;
|
|
73
|
+
|
|
74
|
+
/** Game context for improved recognition accuracy */
|
|
75
|
+
gameContext?: GameContextV1;
|
|
76
|
+
|
|
77
|
+
/** Audio utterance ID (optional) - if not provided, a UUID v4 will be generated */
|
|
78
|
+
audioUtteranceId?: string;
|
|
79
|
+
|
|
80
|
+
/** Callback URLs for server-side notifications with optional message type filtering (optional)
|
|
81
|
+
* Game side only need to use it if another service need to be notified about the transcription results.
|
|
82
|
+
*/
|
|
83
|
+
callbackUrls?: RecognitionCallbackUrl[];
|
|
84
|
+
|
|
85
|
+
/** User identification (optional) */
|
|
86
|
+
userId?: string;
|
|
87
|
+
|
|
88
|
+
/** Game session identification (optional). called 'sessionId' in Platform and most games. */
|
|
89
|
+
gameSessionId?: string;
|
|
90
|
+
|
|
91
|
+
/** Device identification (optional) */
|
|
92
|
+
deviceId?: string;
|
|
93
|
+
|
|
94
|
+
/** Account identification (optional) */
|
|
95
|
+
accountId?: string;
|
|
96
|
+
|
|
97
|
+
/** Question answer identifier for tracking Q&A sessions (optional and tracking purpose only) */
|
|
98
|
+
questionAnswerId?: string;
|
|
99
|
+
|
|
100
|
+
/** Platform for audio recording device (optional, e.g., 'ios', 'android', 'web', 'unity') */
|
|
101
|
+
platform?: string;
|
|
102
|
+
|
|
103
|
+
/** Callback when transcript is received */
|
|
104
|
+
onTranscript?: (result: TranscriptionResultV1) => void;
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Callback when function call is received
|
|
108
|
+
* Note: Not supported in 2025. P2 feature for future speech-to-function-call capability.
|
|
109
|
+
*/
|
|
110
|
+
onFunctionCall?: (result: FunctionCallResultV1) => void;
|
|
111
|
+
|
|
112
|
+
/** Callback when metadata is received. Only once after transcription is complete.*/
|
|
113
|
+
onMetadata?: (metadata: MetadataResultV1) => void;
|
|
114
|
+
|
|
115
|
+
/** Callback when error occurs */
|
|
116
|
+
onError?: (error: ErrorResultV1) => void;
|
|
117
|
+
|
|
118
|
+
/** Callback when connected to WebSocket */
|
|
119
|
+
onConnected?: () => void;
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Callback when WebSocket disconnects
|
|
123
|
+
* @param code - WebSocket close code (1000 = normal, 1006 = abnormal, etc.)
|
|
124
|
+
* @param reason - Close reason string
|
|
125
|
+
*/
|
|
126
|
+
onDisconnected?: (code: number, reason: string) => void;
|
|
127
|
+
|
|
128
|
+
/** High water mark for backpressure control (bytes) */
|
|
129
|
+
highWaterMark?: number;
|
|
130
|
+
|
|
131
|
+
/** Low water mark for backpressure control (bytes) */
|
|
132
|
+
lowWaterMark?: number;
|
|
133
|
+
|
|
134
|
+
/** Maximum buffer duration in seconds (default: 60s) */
|
|
135
|
+
maxBufferDurationSec?: number;
|
|
136
|
+
|
|
137
|
+
/** Expected chunks per second for ring buffer sizing (default: 100) */
|
|
138
|
+
chunksPerSecond?: number;
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Optional logger function for debugging
|
|
142
|
+
* If not provided, no logging will occur
|
|
143
|
+
* @param level - Log level: 'debug', 'info', 'warn', 'error'
|
|
144
|
+
* @param message - Log message
|
|
145
|
+
* @param data - Optional additional data
|
|
146
|
+
*/
|
|
147
|
+
logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Recognition Client Interface
|
|
152
|
+
*
|
|
153
|
+
* Main interface for real-time speech recognition clients.
|
|
154
|
+
* Provides methods for connection management, audio streaming, and session control.
|
|
155
|
+
*/
|
|
156
|
+
export interface IRecognitionClient {
|
|
157
|
+
/**
|
|
158
|
+
* Connect to the WebSocket endpoint
|
|
159
|
+
* @returns Promise that resolves when connected
|
|
160
|
+
* @throws Error if connection fails or times out
|
|
161
|
+
*/
|
|
162
|
+
connect(): Promise<void>;
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Send audio data to the recognition service
|
|
166
|
+
* Audio is buffered locally and sent when connection is ready.
|
|
167
|
+
* @param audioData - PCM audio data as ArrayBuffer or typed array view
|
|
168
|
+
*/
|
|
169
|
+
sendAudio(audioData: ArrayBuffer | ArrayBufferView): void;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Stop recording and wait for final transcript
|
|
173
|
+
* The server will close the connection after sending the final transcript.
|
|
174
|
+
* @returns Promise that resolves when final transcript is received
|
|
175
|
+
*/
|
|
176
|
+
stopRecording(): Promise<void>;
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Get the audio utterance ID for this session
|
|
180
|
+
* Available immediately after client construction.
|
|
181
|
+
* @returns UUID v4 string identifying this recognition session
|
|
182
|
+
*/
|
|
183
|
+
getAudioUtteranceId(): string;
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Get the current state of the client
|
|
187
|
+
* @returns Current ClientState value
|
|
188
|
+
*/
|
|
189
|
+
getState(): ClientState;
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Check if WebSocket connection is open
|
|
193
|
+
* @returns true if connected and ready to communicate
|
|
194
|
+
*/
|
|
195
|
+
isConnected(): boolean;
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Check if client is currently connecting
|
|
199
|
+
* @returns true if connection is in progress
|
|
200
|
+
*/
|
|
201
|
+
isConnecting(): boolean;
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Check if client is currently stopping
|
|
205
|
+
* @returns true if stopRecording() is in progress
|
|
206
|
+
*/
|
|
207
|
+
isStopping(): boolean;
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Check if transcription has finished
|
|
211
|
+
* @returns true if the transcription is complete
|
|
212
|
+
*/
|
|
213
|
+
isTranscriptionFinished(): boolean;
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Check if the audio buffer has overflowed
|
|
217
|
+
* @returns true if the ring buffer has wrapped around
|
|
218
|
+
*/
|
|
219
|
+
isBufferOverflowing(): boolean;
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Get client statistics
|
|
223
|
+
* @returns Statistics about audio transmission and buffering
|
|
224
|
+
*/
|
|
225
|
+
getStats(): IRecognitionClientStats;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Client statistics interface
|
|
230
|
+
*/
|
|
231
|
+
export interface IRecognitionClientStats {
|
|
232
|
+
/** Total audio bytes sent to server */
|
|
233
|
+
audioBytesSent: number;
|
|
234
|
+
|
|
235
|
+
/** Total number of audio chunks sent */
|
|
236
|
+
audioChunksSent: number;
|
|
237
|
+
|
|
238
|
+
/** Total number of audio chunks buffered */
|
|
239
|
+
audioChunksBuffered: number;
|
|
240
|
+
|
|
241
|
+
/** Number of times the ring buffer overflowed */
|
|
242
|
+
bufferOverflowCount: number;
|
|
243
|
+
|
|
244
|
+
/** Current number of chunks in buffer */
|
|
245
|
+
currentBufferedChunks: number;
|
|
246
|
+
|
|
247
|
+
/** Whether the ring buffer has wrapped (overwritten old data) */
|
|
248
|
+
hasWrapped: boolean;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Configuration for RealTimeTwoWayWebSocketRecognitionClient
|
|
253
|
+
* This extends IRecognitionClientConfig and is the main configuration interface
|
|
254
|
+
* for creating a new RealTimeTwoWayWebSocketRecognitionClient instance.
|
|
255
|
+
*/
|
|
256
|
+
export interface RealTimeTwoWayWebSocketRecognitionClientConfig extends IRecognitionClientConfig {
|
|
257
|
+
// All fields are inherited from IRecognitionClientConfig
|
|
258
|
+
// This interface exists for backward compatibility and clarity
|
|
259
|
+
}
|
|
260
|
+
|