@volley/recognition-client-sdk-node22 0.1.424

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +344 -0
  2. package/dist/browser.bundled.d.ts +1280 -0
  3. package/dist/browser.d.ts +10 -0
  4. package/dist/browser.d.ts.map +1 -0
  5. package/dist/config-builder.d.ts +134 -0
  6. package/dist/config-builder.d.ts.map +1 -0
  7. package/dist/errors.d.ts +41 -0
  8. package/dist/errors.d.ts.map +1 -0
  9. package/dist/factory.d.ts +36 -0
  10. package/dist/factory.d.ts.map +1 -0
  11. package/dist/index.bundled.d.ts +2572 -0
  12. package/dist/index.d.ts +16 -0
  13. package/dist/index.d.ts.map +1 -0
  14. package/dist/index.js +10199 -0
  15. package/dist/index.js.map +7 -0
  16. package/dist/recog-client-sdk.browser.d.ts +10 -0
  17. package/dist/recog-client-sdk.browser.d.ts.map +1 -0
  18. package/dist/recog-client-sdk.browser.js +5746 -0
  19. package/dist/recog-client-sdk.browser.js.map +7 -0
  20. package/dist/recognition-client.d.ts +128 -0
  21. package/dist/recognition-client.d.ts.map +1 -0
  22. package/dist/recognition-client.types.d.ts +271 -0
  23. package/dist/recognition-client.types.d.ts.map +1 -0
  24. package/dist/simplified-vgf-recognition-client.d.ts +178 -0
  25. package/dist/simplified-vgf-recognition-client.d.ts.map +1 -0
  26. package/dist/utils/audio-ring-buffer.d.ts +69 -0
  27. package/dist/utils/audio-ring-buffer.d.ts.map +1 -0
  28. package/dist/utils/message-handler.d.ts +45 -0
  29. package/dist/utils/message-handler.d.ts.map +1 -0
  30. package/dist/utils/url-builder.d.ts +28 -0
  31. package/dist/utils/url-builder.d.ts.map +1 -0
  32. package/dist/vgf-recognition-mapper.d.ts +66 -0
  33. package/dist/vgf-recognition-mapper.d.ts.map +1 -0
  34. package/dist/vgf-recognition-state.d.ts +91 -0
  35. package/dist/vgf-recognition-state.d.ts.map +1 -0
  36. package/package.json +74 -0
  37. package/src/browser.ts +24 -0
  38. package/src/config-builder.spec.ts +265 -0
  39. package/src/config-builder.ts +240 -0
  40. package/src/errors.ts +84 -0
  41. package/src/factory.spec.ts +215 -0
  42. package/src/factory.ts +47 -0
  43. package/src/index.ts +127 -0
  44. package/src/recognition-client.spec.ts +889 -0
  45. package/src/recognition-client.ts +844 -0
  46. package/src/recognition-client.types.ts +338 -0
  47. package/src/simplified-vgf-recognition-client.integration.spec.ts +718 -0
  48. package/src/simplified-vgf-recognition-client.spec.ts +1525 -0
  49. package/src/simplified-vgf-recognition-client.ts +524 -0
  50. package/src/utils/audio-ring-buffer.spec.ts +335 -0
  51. package/src/utils/audio-ring-buffer.ts +170 -0
  52. package/src/utils/message-handler.spec.ts +311 -0
  53. package/src/utils/message-handler.ts +131 -0
  54. package/src/utils/url-builder.spec.ts +252 -0
  55. package/src/utils/url-builder.ts +92 -0
  56. package/src/vgf-recognition-mapper.spec.ts +78 -0
  57. package/src/vgf-recognition-mapper.ts +232 -0
  58. package/src/vgf-recognition-state.ts +102 -0
package/README.md ADDED
@@ -0,0 +1,344 @@
1
+ # @volley/recognition-client-sdk
2
+
3
+ TypeScript SDK for real-time speech recognition via WebSocket.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @volley/recognition-client-sdk
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```typescript
14
+ import {
15
+ createClientWithBuilder,
16
+ RecognitionProvider,
17
+ DeepgramModel,
18
+ STAGES
19
+ } from '@volley/recognition-client-sdk';
20
+
21
+ // Create client with builder pattern (recommended)
22
+ const client = createClientWithBuilder(builder =>
23
+ builder
24
+ .stage(STAGES.STAGING) // ✨ Simple environment selection using enum
25
+ .provider(RecognitionProvider.DEEPGRAM)
26
+ .model(DeepgramModel.NOVA_2)
27
+ .onTranscript(result => {
28
+ console.log('Final:', result.finalTranscript);
29
+ console.log('Interim:', result.pendingTranscript);
30
+ })
31
+ .onError(error => console.error(error))
32
+ );
33
+
34
+ // Stream audio
35
+ await client.connect();
36
+ client.sendAudio(pcm16AudioChunk); // Call repeatedly with audio chunks
37
+ await client.stopRecording(); // Wait for final transcript
38
+
39
+ // Check the actual URL being used
40
+ console.log('Connected to:', client.getUrl());
41
+ ```
42
+
43
+ ### Alternative: Direct Client Creation
44
+
45
+ ```typescript
46
+ import {
47
+ RealTimeTwoWayWebSocketRecognitionClient,
48
+ RecognitionProvider,
49
+ DeepgramModel,
50
+ Language,
51
+ STAGES
52
+ } from '@volley/recognition-client-sdk';
53
+
54
+ const client = new RealTimeTwoWayWebSocketRecognitionClient({
55
+ stage: STAGES.STAGING, // ✨ Recommended: Use STAGES enum for type safety
56
+ asrRequestConfig: {
57
+ provider: RecognitionProvider.DEEPGRAM,
58
+ model: DeepgramModel.NOVA_2,
59
+ language: Language.ENGLISH_US
60
+ },
61
+ onTranscript: (result) => console.log(result),
62
+ onError: (error) => console.error(error)
63
+ });
64
+
65
+ // Check the actual URL being used
66
+ console.log('Connected to:', client.getUrl());
67
+ ```
68
+
69
+ ## Configuration
70
+
71
+ ### Environment Selection
72
+
73
+ **Recommended: Use `stage` parameter with STAGES enum** for automatic environment configuration:
74
+
75
+ ```typescript
76
+ import {
77
+ RecognitionProvider,
78
+ DeepgramModel,
79
+ Language,
80
+ STAGES
81
+ } from '@volley/recognition-client-sdk';
82
+
83
+ builder
84
+ .stage(STAGES.STAGING) // STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
85
+ .provider(RecognitionProvider.DEEPGRAM) // DEEPGRAM, GOOGLE
86
+ .model(DeepgramModel.NOVA_2) // Provider-specific model enum
87
+ .language(Language.ENGLISH_US) // Language enum
88
+ .interimResults(true) // Enable partial transcripts
89
+ ```
90
+
91
+ **Available Stages and URLs:**
92
+
93
+ | Stage | Enum | WebSocket URL |
94
+ |-------|------|---------------|
95
+ | **Local** | `STAGES.LOCAL` | `ws://localhost:3101/ws/v1/recognize` |
96
+ | **Development** | `STAGES.DEV` | `wss://recognition-service-dev.volley-services.net/ws/v1/recognize` |
97
+ | **Staging** | `STAGES.STAGING` | `wss://recognition-service-staging.volley-services.net/ws/v1/recognize` |
98
+ | **Production** | `STAGES.PRODUCTION` | `wss://recognition-service.volley-services.net/ws/v1/recognize` |
99
+
100
+ > 💡 Using the `stage` parameter automatically constructs the correct URL for each environment.
101
+
102
+ **Automatic Connection Retry:**
103
+
104
+ The SDK **automatically retries failed connections** with sensible defaults - no configuration needed!
105
+
106
+ **Default behavior (works out of the box):**
107
+ - 4 connection attempts (try once, retry 3 times if failed)
108
+ - 200ms delay between retries
109
+ - Handles temporary service unavailability (503)
110
+ - Fast failure (~600ms total on complete failure)
111
+ - Timing: `Attempt 1 → FAIL → wait 200ms → Attempt 2 → FAIL → wait 200ms → Attempt 3 → FAIL → wait 200ms → Attempt 4`
112
+
113
+ ```typescript
114
+ import { STAGES } from '@volley/recognition-client-sdk';
115
+
116
+ // ✅ Automatic retry - no config needed!
117
+ const client = new RealTimeTwoWayWebSocketRecognitionClient({
118
+ stage: STAGES.STAGING,
119
+ // connectionRetry works automatically with defaults
120
+ });
121
+ ```
122
+
123
+ **Optional: Customize retry behavior** (only if needed):
124
+ ```typescript
125
+ const client = new RealTimeTwoWayWebSocketRecognitionClient({
126
+ stage: STAGES.STAGING,
127
+ connectionRetry: {
128
+ maxAttempts: 2, // Fewer attempts (min: 1, max: 5)
129
+ delayMs: 500 // Longer delay between attempts
130
+ }
131
+ });
132
+ ```
133
+
134
+ > ⚠️ **Note**: Retry only applies to **initial connection establishment**. If the connection drops during audio streaming, the SDK will not auto-retry (caller must handle this).
135
+
136
+ **Advanced: Custom URL** for non-standard endpoints:
137
+
138
+ ```typescript
139
+ builder
140
+ .url('wss://custom-endpoint.example.com/ws/v1/recognize') // Custom WebSocket URL
141
+ .provider(RecognitionProvider.DEEPGRAM)
142
+ // ... rest of config
143
+ ```
144
+
145
+ > 💡 **Note**: If both `stage` and `url` are provided, `url` takes precedence.
146
+
147
+ ### Event Handlers
148
+
149
+ ```typescript
150
+ builder
151
+ .onTranscript(result => {}) // Handle transcription results
152
+ .onError(error => {}) // Handle errors
153
+ .onConnected(() => {}) // Connection established
154
+ .onDisconnected((code) => {}) // Connection closed
155
+ .onMetadata(meta => {}) // Timing information
156
+ ```
157
+
158
+ ### Optional Parameters
159
+
160
+ ```typescript
161
+ builder
162
+ .gameContext({ // Context for better recognition
163
+ gameId: 'session-123',
164
+ prompt: 'Expected responses: yes, no, maybe'
165
+ })
166
+ .userId('user-123') // User identification
167
+ .platform('web') // Platform identifier
168
+ .logger((level, msg, data) => {}) // Custom logging
169
+ ```
170
+
171
+ ## API Reference
172
+
173
+ ### Client Methods
174
+
175
+ ```typescript
176
+ await client.connect(); // Establish connection
177
+ client.sendAudio(chunk); // Send PCM16 audio
178
+ await client.stopRecording(); // End and get final transcript
179
+ client.getAudioUtteranceId(); // Get session UUID
180
+ client.getUrl(); // Get actual WebSocket URL being used
181
+ client.getState(); // Get current state
182
+ client.isConnected(); // Check connection status
183
+ ```
184
+
185
+ ### TranscriptionResult
186
+
187
+ ```typescript
188
+ {
189
+ type: 'Transcription'; // Message type discriminator
190
+ audioUtteranceId: string; // Session UUID
191
+ finalTranscript: string; // Confirmed text (won't change)
192
+ finalTranscriptConfidence?: number; // Confidence 0-1 for final transcript
193
+ pendingTranscript?: string; // In-progress text (may change)
194
+ pendingTranscriptConfidence?: number; // Confidence 0-1 for pending transcript
195
+ is_finished: boolean; // Transcription complete (last message)
196
+ voiceStart?: number; // Voice activity start time (ms from stream start)
197
+ voiceDuration?: number; // Voice duration (ms)
198
+ voiceEnd?: number; // Voice activity end time (ms from stream start)
199
+ startTimestamp?: number; // Transcription start timestamp (ms)
200
+ endTimestamp?: number; // Transcription end timestamp (ms)
201
+ receivedAtMs?: number; // Server receive timestamp (ms since epoch)
202
+ accumulatedAudioTimeMs?: number; // Total audio duration sent (ms)
203
+ }
204
+ ```
205
+
206
+ ## Providers
207
+
208
+ ### Deepgram
209
+
210
+ ```typescript
211
+ import { RecognitionProvider, DeepgramModel } from '@volley/recognition-client-sdk';
212
+
213
+ builder
214
+ .provider(RecognitionProvider.DEEPGRAM)
215
+ .model(DeepgramModel.NOVA_2); // NOVA_2, NOVA_3, FLUX_GENERAL_EN
216
+ ```
217
+
218
+ ### Google Cloud Speech-to-Text
219
+
220
+ ```typescript
221
+ import { RecognitionProvider, GoogleModel } from '@volley/recognition-client-sdk';
222
+
223
+ builder
224
+ .provider(RecognitionProvider.GOOGLE)
225
+ .model(GoogleModel.LATEST_SHORT); // LATEST_SHORT, LATEST_LONG, TELEPHONY, etc.
226
+ ```
227
+
228
+ Available Google models:
229
+ - `LATEST_SHORT` - Optimized for short audio (< 1 minute)
230
+ - `LATEST_LONG` - Optimized for long audio (> 1 minute)
231
+ - `TELEPHONY` - Optimized for phone audio
232
+ - `TELEPHONY_SHORT` - Short telephony audio
233
+ - `MEDICAL_DICTATION` - Medical dictation (premium)
234
+ - `MEDICAL_CONVERSATION` - Medical conversations (premium)
235
+
236
+
237
+ ## Audio Format
238
+
239
+ The SDK expects PCM16 audio:
240
+ - Format: Linear PCM (16-bit signed integers)
241
+ - Sample Rate: 16kHz recommended
242
+ - Channels: Mono
243
+ Please reach out to AI team if ther are essential reasons that we need other formats.
244
+
245
+ ## Error Handling
246
+
247
+ ```typescript
248
+ builder.onError(error => {
249
+ console.error(`Error ${error.code}: ${error.message}`);
250
+ });
251
+
252
+ // Check disconnection type
253
+ import { isNormalDisconnection } from '@volley/recognition-client-sdk';
254
+
255
+ builder.onDisconnected((code, reason) => {
256
+ if (!isNormalDisconnection(code)) {
257
+ console.error('Unexpected disconnect:', code);
258
+ }
259
+ });
260
+ ```
261
+
262
+ ## Troubleshooting
263
+
264
+ ### Connection Issues
265
+
266
+ **WebSocket fails to connect**
267
+ - Verify the recognition service is running
268
+ - Check the WebSocket URL format: `ws://` or `wss://`
269
+ - Ensure network allows WebSocket connections
270
+
271
+ **Authentication errors**
272
+ - Verify `audioUtteranceId` is provided
273
+ - Check if service requires additional auth headers
274
+
275
+ ### Audio Issues
276
+
277
+ **No transcription results**
278
+ - Confirm audio format is PCM16, 16kHz, mono
279
+ - Check if audio chunks are being sent (use `onAudioSent` callback)
280
+ - Verify audio data is not empty or corrupted
281
+
282
+ **Poor transcription quality**
283
+ - Try different models (e.g., `NOVA_2` vs `NOVA_2_GENERAL`)
284
+ - Adjust language setting to match audio
285
+ - Ensure audio sample rate matches configuration
286
+
287
+ ### Performance Issues
288
+
289
+ **High latency**
290
+ - Use smaller audio chunks (e.g., 100ms instead of 500ms)
291
+ - Choose a model optimized for real-time (e.g., Deepgram Nova 2)
292
+ - Check network latency to service
293
+
294
+ **Memory issues**
295
+ - Call `disconnect()` when done to clean up resources
296
+ - Avoid keeping multiple client instances active
297
+
298
+ ## Publishing
299
+
300
+ This package uses automated publishing via semantic-release with npm Trusted Publishers (OIDC).
301
+
302
+ ### First-Time Setup (One-time)
303
+
304
+ After the first manual publish, configure npm Trusted Publishers:
305
+
306
+ 1. Go to https://www.npmjs.com/package/@volley/recognition-client-sdk/access
307
+ 2. Click "Add publisher" → Select "GitHub Actions"
308
+ 3. Configure:
309
+ - **Organization**: `Volley-Inc`
310
+ - **Repository**: `recognition-service`
311
+ - **Workflow**: `sdk-release.yml`
312
+ - **Environment**: Leave empty (not required)
313
+
314
+ ### How It Works
315
+
316
+ - **Automated releases**: Push to `dev` branch triggers semantic-release
317
+ - **Version bumping**: Based on conventional commits (feat/fix/BREAKING CHANGE)
318
+ - **No tokens needed**: Uses OIDC authentication with npm
319
+ - **Provenance**: Automatic supply chain attestation
320
+ - **Path filtering**: Only releases when SDK or libs change
321
+
322
+ ### Manual Publishing (Not Recommended)
323
+
324
+ If needed for testing:
325
+
326
+ ```bash
327
+ cd packages/client-sdk-ts
328
+ npm login --scope=@volley
329
+ pnpm build
330
+ npm publish --provenance --access public
331
+ ```
332
+
333
+ ## Contributing
334
+
335
+ This SDK is part of the Recognition Service monorepo. To contribute:
336
+
337
+ 1. Make changes to SDK or libs
338
+ 2. Test locally with `pnpm test`
339
+ 3. Create PR to `dev` branch with conventional commit messages (`feat:`, `fix:`, etc.)
340
+ 4. After merge, automated workflow will publish new version to npm
341
+
342
+ ## License
343
+
344
+ Proprietary