@volley/recognition-client-sdk 0.1.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,260 @@
1
+ /**
2
+ * Recognition Client Types
3
+ *
4
+ * Type definitions and interfaces for the recognition client SDK.
5
+ * These interfaces enable dependency injection, testing, and alternative implementations.
6
+ */
7
+
8
+ import {
9
+ TranscriptionResultV1,
10
+ FunctionCallResultV1,
11
+ MetadataResultV1,
12
+ ErrorResultV1,
13
+ ASRRequestConfig,
14
+ GameContextV1
15
+ } from '@recog/shared-types';
16
+
17
+ /**
18
+ * Client connection state enum
19
+ * Represents the various states a recognition client can be in during its lifecycle
20
+ */
21
+ export enum ClientState {
22
+ /** Initial state, no connection established */
23
+ INITIAL = 'initial',
24
+
25
+ /** Actively establishing WebSocket connection */
26
+ CONNECTING = 'connecting',
27
+
28
+ /** WebSocket connected but waiting for server ready signal */
29
+ CONNECTED = 'connected',
30
+
31
+ /** Server ready, can send audio */
32
+ READY = 'ready',
33
+
34
+ /** Sent stop signal, waiting for final transcript */
35
+ STOPPING = 'stopping',
36
+
37
+ /** Connection closed normally after stop */
38
+ STOPPED = 'stopped',
39
+
40
+ /** Connection failed or lost unexpectedly */
41
+ FAILED = 'failed'
42
+ }
43
+
44
+ /**
45
+ * Callback URL configuration with message type filtering
46
+ */
47
+ export interface RecognitionCallbackUrl {
48
+ /** The callback URL endpoint */
49
+ url: string;
50
+
51
+ /** Array of message types to send to this URL. If empty/undefined, all types are sent */
52
+ messageTypes?: Array<string | number>;
53
+ }
54
+
55
+ // Legacy alias for backward compatibility
56
+ export type IRecognitionCallbackUrl = RecognitionCallbackUrl;
57
+
58
+ export interface IRecognitionClientConfig {
59
+ /**
60
+ * WebSocket endpoint URL (optional - defaults to production)
61
+ *
62
+ * For different stages, use the helper function:
63
+ * ```typescript
64
+ * import { getRecognitionServiceBase } from '@recog/client-sdk-ts';
65
+ * const base = getRecognitionServiceBase('staging'); // or 'dev', 'production'
66
+ * const url = `${base.wsBase}/ws/v1/recognize`;
67
+ * ```
68
+ */
69
+ url?: string;
70
+
71
+ /** ASR configuration (provider, model, language, etc.) - optional */
72
+ asrRequestConfig?: ASRRequestConfig;
73
+
74
+ /** Game context for improved recognition accuracy */
75
+ gameContext?: GameContextV1;
76
+
77
+ /** Audio utterance ID (optional) - if not provided, a UUID v4 will be generated */
78
+ audioUtteranceId?: string;
79
+
80
+ /** Callback URLs for server-side notifications with optional message type filtering (optional)
81
+ * Game side only need to use it if another service need to be notified about the transcription results.
82
+ */
83
+ callbackUrls?: RecognitionCallbackUrl[];
84
+
85
+ /** User identification (optional) */
86
+ userId?: string;
87
+
88
+ /** Game session identification (optional). called 'sessionId' in Platform and most games. */
89
+ gameSessionId?: string;
90
+
91
+ /** Device identification (optional) */
92
+ deviceId?: string;
93
+
94
+ /** Account identification (optional) */
95
+ accountId?: string;
96
+
97
+ /** Question answer identifier for tracking Q&A sessions (optional and tracking purpose only) */
98
+ questionAnswerId?: string;
99
+
100
+ /** Platform for audio recording device (optional, e.g., 'ios', 'android', 'web', 'unity') */
101
+ platform?: string;
102
+
103
+ /** Callback when transcript is received */
104
+ onTranscript?: (result: TranscriptionResultV1) => void;
105
+
106
+ /**
107
+ * Callback when function call is received
108
+ * Note: Not supported in 2025. P2 feature for future speech-to-function-call capability.
109
+ */
110
+ onFunctionCall?: (result: FunctionCallResultV1) => void;
111
+
112
+ /** Callback when metadata is received. Only once after transcription is complete.*/
113
+ onMetadata?: (metadata: MetadataResultV1) => void;
114
+
115
+ /** Callback when error occurs */
116
+ onError?: (error: ErrorResultV1) => void;
117
+
118
+ /** Callback when connected to WebSocket */
119
+ onConnected?: () => void;
120
+
121
+ /**
122
+ * Callback when WebSocket disconnects
123
+ * @param code - WebSocket close code (1000 = normal, 1006 = abnormal, etc.)
124
+ * @param reason - Close reason string
125
+ */
126
+ onDisconnected?: (code: number, reason: string) => void;
127
+
128
+ /** High water mark for backpressure control (bytes) */
129
+ highWaterMark?: number;
130
+
131
+ /** Low water mark for backpressure control (bytes) */
132
+ lowWaterMark?: number;
133
+
134
+ /** Maximum buffer duration in seconds (default: 60s) */
135
+ maxBufferDurationSec?: number;
136
+
137
+ /** Expected chunks per second for ring buffer sizing (default: 100) */
138
+ chunksPerSecond?: number;
139
+
140
+ /**
141
+ * Optional logger function for debugging
142
+ * If not provided, no logging will occur
143
+ * @param level - Log level: 'debug', 'info', 'warn', 'error'
144
+ * @param message - Log message
145
+ * @param data - Optional additional data
146
+ */
147
+ logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void;
148
+ }
149
+
150
+ /**
151
+ * Recognition Client Interface
152
+ *
153
+ * Main interface for real-time speech recognition clients.
154
+ * Provides methods for connection management, audio streaming, and session control.
155
+ */
156
+ export interface IRecognitionClient {
157
+ /**
158
+ * Connect to the WebSocket endpoint
159
+ * @returns Promise that resolves when connected
160
+ * @throws Error if connection fails or times out
161
+ */
162
+ connect(): Promise<void>;
163
+
164
+ /**
165
+ * Send audio data to the recognition service
166
+ * Audio is buffered locally and sent when connection is ready.
167
+ * @param audioData - PCM audio data as ArrayBuffer or typed array view
168
+ */
169
+ sendAudio(audioData: ArrayBuffer | ArrayBufferView): void;
170
+
171
+ /**
172
+ * Stop recording and wait for final transcript
173
+ * The server will close the connection after sending the final transcript.
174
+ * @returns Promise that resolves when final transcript is received
175
+ */
176
+ stopRecording(): Promise<void>;
177
+
178
+ /**
179
+ * Get the audio utterance ID for this session
180
+ * Available immediately after client construction.
181
+ * @returns UUID v4 string identifying this recognition session
182
+ */
183
+ getAudioUtteranceId(): string;
184
+
185
+ /**
186
+ * Get the current state of the client
187
+ * @returns Current ClientState value
188
+ */
189
+ getState(): ClientState;
190
+
191
+ /**
192
+ * Check if WebSocket connection is open
193
+ * @returns true if connected and ready to communicate
194
+ */
195
+ isConnected(): boolean;
196
+
197
+ /**
198
+ * Check if client is currently connecting
199
+ * @returns true if connection is in progress
200
+ */
201
+ isConnecting(): boolean;
202
+
203
+ /**
204
+ * Check if client is currently stopping
205
+ * @returns true if stopRecording() is in progress
206
+ */
207
+ isStopping(): boolean;
208
+
209
+ /**
210
+ * Check if transcription has finished
211
+ * @returns true if the transcription is complete
212
+ */
213
+ isTranscriptionFinished(): boolean;
214
+
215
+ /**
216
+ * Check if the audio buffer has overflowed
217
+ * @returns true if the ring buffer has wrapped around
218
+ */
219
+ isBufferOverflowing(): boolean;
220
+
221
+ /**
222
+ * Get client statistics
223
+ * @returns Statistics about audio transmission and buffering
224
+ */
225
+ getStats(): IRecognitionClientStats;
226
+ }
227
+
228
+ /**
229
+ * Client statistics interface
230
+ */
231
+ export interface IRecognitionClientStats {
232
+ /** Total audio bytes sent to server */
233
+ audioBytesSent: number;
234
+
235
+ /** Total number of audio chunks sent */
236
+ audioChunksSent: number;
237
+
238
+ /** Total number of audio chunks buffered */
239
+ audioChunksBuffered: number;
240
+
241
+ /** Number of times the ring buffer overflowed */
242
+ bufferOverflowCount: number;
243
+
244
+ /** Current number of chunks in buffer */
245
+ currentBufferedChunks: number;
246
+
247
+ /** Whether the ring buffer has wrapped (overwritten old data) */
248
+ hasWrapped: boolean;
249
+ }
250
+
251
+ /**
252
+ * Configuration for RealTimeTwoWayWebSocketRecognitionClient
253
+ * This extends IRecognitionClientConfig and is the main configuration interface
254
+ * for creating a new RealTimeTwoWayWebSocketRecognitionClient instance.
255
+ */
256
+ export interface RealTimeTwoWayWebSocketRecognitionClientConfig extends IRecognitionClientConfig {
257
+ // All fields are inherited from IRecognitionClientConfig
258
+ // This interface exists for backward compatibility and clarity
259
+ }
260
+