@volley/recognition-client-sdk 0.1.211 → 0.1.254
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -7
- package/dist/{browser-C4ZssGoU.d.ts → browser-BZs4BL_w.d.ts} +84 -6
- package/dist/index.d.ts +630 -16
- package/dist/index.js +276 -44
- package/dist/index.js.map +1 -1
- package/dist/recog-client-sdk.browser.d.ts +1 -1
- package/dist/recog-client-sdk.browser.js +195 -42
- package/dist/recog-client-sdk.browser.js.map +1 -1
- package/package.json +2 -2
- package/src/config-builder.ts +21 -3
- package/src/errors.ts +84 -0
- package/src/index.ts +34 -1
- package/src/recognition-client.spec.ts +39 -0
- package/src/recognition-client.ts +153 -40
- package/src/recognition-client.types.ts +58 -6
- package/src/simplified-vgf-recognition-client.ts +9 -0
- package/src/utils/url-builder.spec.ts +53 -6
- package/src/utils/url-builder.ts +19 -4
package/README.md
CHANGED
|
@@ -11,12 +11,17 @@ npm install @volley/recognition-client-sdk
|
|
|
11
11
|
## Quick Start
|
|
12
12
|
|
|
13
13
|
```typescript
|
|
14
|
-
import {
|
|
14
|
+
import {
|
|
15
|
+
createClientWithBuilder,
|
|
16
|
+
RecognitionProvider,
|
|
17
|
+
DeepgramModel,
|
|
18
|
+
STAGES
|
|
19
|
+
} from '@volley/recognition-client-sdk';
|
|
15
20
|
|
|
16
21
|
// Create client with builder pattern (recommended)
|
|
17
22
|
const client = createClientWithBuilder(builder =>
|
|
18
23
|
builder
|
|
19
|
-
.
|
|
24
|
+
.stage(STAGES.STAGING) // ✨ Simple environment selection using enum
|
|
20
25
|
.provider(RecognitionProvider.DEEPGRAM)
|
|
21
26
|
.model(DeepgramModel.NOVA_2)
|
|
22
27
|
.onTranscript(result => {
|
|
@@ -30,6 +35,9 @@ const client = createClientWithBuilder(builder =>
|
|
|
30
35
|
await client.connect();
|
|
31
36
|
client.sendAudio(pcm16AudioChunk); // Call repeatedly with audio chunks
|
|
32
37
|
await client.stopRecording(); // Wait for final transcript
|
|
38
|
+
|
|
39
|
+
// Check the actual URL being used
|
|
40
|
+
console.log('Connected to:', client.getUrl());
|
|
33
41
|
```
|
|
34
42
|
|
|
35
43
|
### Alternative: Direct Client Creation
|
|
@@ -39,11 +47,12 @@ import {
|
|
|
39
47
|
RealTimeTwoWayWebSocketRecognitionClient,
|
|
40
48
|
RecognitionProvider,
|
|
41
49
|
DeepgramModel,
|
|
42
|
-
Language
|
|
50
|
+
Language,
|
|
51
|
+
STAGES
|
|
43
52
|
} from '@volley/recognition-client-sdk';
|
|
44
53
|
|
|
45
54
|
const client = new RealTimeTwoWayWebSocketRecognitionClient({
|
|
46
|
-
|
|
55
|
+
stage: STAGES.STAGING, // ✨ Recommended: Use STAGES enum for type safety
|
|
47
56
|
asrRequestConfig: {
|
|
48
57
|
provider: RecognitionProvider.DEEPGRAM,
|
|
49
58
|
model: DeepgramModel.NOVA_2,
|
|
@@ -52,23 +61,89 @@ const client = new RealTimeTwoWayWebSocketRecognitionClient({
|
|
|
52
61
|
onTranscript: (result) => console.log(result),
|
|
53
62
|
onError: (error) => console.error(error)
|
|
54
63
|
});
|
|
64
|
+
|
|
65
|
+
// Check the actual URL being used
|
|
66
|
+
console.log('Connected to:', client.getUrl());
|
|
55
67
|
```
|
|
56
68
|
|
|
57
69
|
## Configuration
|
|
58
70
|
|
|
59
|
-
###
|
|
71
|
+
### Environment Selection
|
|
72
|
+
|
|
73
|
+
**Recommended: Use `stage` parameter with STAGES enum** for automatic environment configuration:
|
|
60
74
|
|
|
61
75
|
```typescript
|
|
62
|
-
import {
|
|
76
|
+
import {
|
|
77
|
+
RecognitionProvider,
|
|
78
|
+
DeepgramModel,
|
|
79
|
+
Language,
|
|
80
|
+
STAGES
|
|
81
|
+
} from '@volley/recognition-client-sdk';
|
|
63
82
|
|
|
64
83
|
builder
|
|
65
|
-
.
|
|
84
|
+
.stage(STAGES.STAGING) // STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
|
|
66
85
|
.provider(RecognitionProvider.DEEPGRAM) // DEEPGRAM, GOOGLE
|
|
67
86
|
.model(DeepgramModel.NOVA_2) // Provider-specific model enum
|
|
68
87
|
.language(Language.ENGLISH_US) // Language enum
|
|
69
88
|
.interimResults(true) // Enable partial transcripts
|
|
70
89
|
```
|
|
71
90
|
|
|
91
|
+
**Available Stages and URLs:**
|
|
92
|
+
|
|
93
|
+
| Stage | Enum | WebSocket URL |
|
|
94
|
+
|-------|------|---------------|
|
|
95
|
+
| **Local** | `STAGES.LOCAL` | `ws://localhost:3101/ws/v1/recognize` |
|
|
96
|
+
| **Development** | `STAGES.DEV` | `wss://recognition-service-dev.volley-services.net/ws/v1/recognize` |
|
|
97
|
+
| **Staging** | `STAGES.STAGING` | `wss://recognition-service-staging.volley-services.net/ws/v1/recognize` |
|
|
98
|
+
| **Production** | `STAGES.PRODUCTION` | `wss://recognition-service.volley-services.net/ws/v1/recognize` |
|
|
99
|
+
|
|
100
|
+
> 💡 Using the `stage` parameter automatically constructs the correct URL for each environment.
|
|
101
|
+
|
|
102
|
+
**Automatic Connection Retry:**
|
|
103
|
+
|
|
104
|
+
The SDK **automatically retries failed connections** with sensible defaults - no configuration needed!
|
|
105
|
+
|
|
106
|
+
**Default behavior (works out of the box):**
|
|
107
|
+
- 4 connection attempts (try once, retry 3 times if failed)
|
|
108
|
+
- 200ms delay between retries
|
|
109
|
+
- Handles temporary service unavailability (503)
|
|
110
|
+
- Fast failure (~600ms total on complete failure)
|
|
111
|
+
- Timing: `Attempt 1 → FAIL → wait 200ms → Attempt 2 → FAIL → wait 200ms → Attempt 3 → FAIL → wait 200ms → Attempt 4`
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
114
|
+
import { STAGES } from '@volley/recognition-client-sdk';
|
|
115
|
+
|
|
116
|
+
// ✅ Automatic retry - no config needed!
|
|
117
|
+
const client = new RealTimeTwoWayWebSocketRecognitionClient({
|
|
118
|
+
stage: STAGES.STAGING,
|
|
119
|
+
// connectionRetry works automatically with defaults
|
|
120
|
+
});
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
**Optional: Customize retry behavior** (only if needed):
|
|
124
|
+
```typescript
|
|
125
|
+
const client = new RealTimeTwoWayWebSocketRecognitionClient({
|
|
126
|
+
stage: STAGES.STAGING,
|
|
127
|
+
connectionRetry: {
|
|
128
|
+
maxAttempts: 2, // Fewer attempts (min: 1, max: 5)
|
|
129
|
+
delayMs: 500 // Longer delay between attempts
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
> ⚠️ **Note**: Retry only applies to **initial connection establishment**. If the connection drops during audio streaming, the SDK will not auto-retry (caller must handle this).
|
|
135
|
+
|
|
136
|
+
**Advanced: Custom URL** for non-standard endpoints:
|
|
137
|
+
|
|
138
|
+
```typescript
|
|
139
|
+
builder
|
|
140
|
+
.url('wss://custom-endpoint.example.com/ws/v1/recognize') // Custom WebSocket URL
|
|
141
|
+
.provider(RecognitionProvider.DEEPGRAM)
|
|
142
|
+
// ... rest of config
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
> 💡 **Note**: If both `stage` and `url` are provided, `url` takes precedence.
|
|
146
|
+
|
|
72
147
|
### Event Handlers
|
|
73
148
|
|
|
74
149
|
```typescript
|
|
@@ -102,6 +177,7 @@ await client.connect(); // Establish connection
|
|
|
102
177
|
client.sendAudio(chunk); // Send PCM16 audio
|
|
103
178
|
await client.stopRecording(); // End and get final transcript
|
|
104
179
|
client.getAudioUtteranceId(); // Get session UUID
|
|
180
|
+
client.getUrl(); // Get actual WebSocket URL being used
|
|
105
181
|
client.getState(); // Get current state
|
|
106
182
|
client.isConnected(); // Check connection status
|
|
107
183
|
```
|
|
@@ -14,6 +14,15 @@ declare enum RecognitionProvider {
|
|
|
14
14
|
GEMINI_BATCH = "gemini-batch",
|
|
15
15
|
OPENAI_BATCH = "openai-batch"
|
|
16
16
|
}
|
|
17
|
+
/**
|
|
18
|
+
* ASR API type - distinguishes between streaming and file-based transcription APIs
|
|
19
|
+
* - STREAMING: Real-time streaming APIs (Deepgram, AssemblyAI, Google)
|
|
20
|
+
* - FILE_BASED: File upload/batch APIs (OpenAI Batch, Gemini Batch)
|
|
21
|
+
*/
|
|
22
|
+
declare enum ASRApiType {
|
|
23
|
+
STREAMING = "streaming",
|
|
24
|
+
FILE_BASED = "file-based"
|
|
25
|
+
}
|
|
17
26
|
/**
|
|
18
27
|
* Deepgram model names
|
|
19
28
|
*/
|
|
@@ -266,6 +275,7 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
266
275
|
volume: z.ZodOptional<z.ZodNumber>;
|
|
267
276
|
accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
268
277
|
costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
|
|
278
|
+
apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
|
|
269
279
|
asrConfig: z.ZodOptional<z.ZodString>;
|
|
270
280
|
rawAsrMetadata: z.ZodOptional<z.ZodString>;
|
|
271
281
|
}, "strip", z.ZodTypeAny, {
|
|
@@ -279,6 +289,7 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
279
289
|
duration?: number | undefined;
|
|
280
290
|
volume?: number | undefined;
|
|
281
291
|
costInUSD?: number | undefined;
|
|
292
|
+
apiType?: ASRApiType | undefined;
|
|
282
293
|
asrConfig?: string | undefined;
|
|
283
294
|
rawAsrMetadata?: string | undefined;
|
|
284
295
|
}, {
|
|
@@ -292,6 +303,7 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
|
|
|
292
303
|
duration?: number | undefined;
|
|
293
304
|
volume?: number | undefined;
|
|
294
305
|
costInUSD?: number | undefined;
|
|
306
|
+
apiType?: ASRApiType | undefined;
|
|
295
307
|
asrConfig?: string | undefined;
|
|
296
308
|
rawAsrMetadata?: string | undefined;
|
|
297
309
|
}>;
|
|
@@ -305,6 +317,7 @@ declare enum ErrorTypeV1 {
|
|
|
305
317
|
PROVIDER_ERROR = "provider_error",// Error from ASR provider (Deepgram, Google, etc.) Unlikely to happen with fall
|
|
306
318
|
TIMEOUT_ERROR = "timeout_error",// Request or operation timeout. Likely business logic did not handle timeout.
|
|
307
319
|
QUOTA_EXCEEDED = "quota_exceeded",// Quota or rate limit exceeded. Unlikely to happen with fallbakcs
|
|
320
|
+
CONNECTION_ERROR = "connection_error",// Connection establishment or network error
|
|
308
321
|
UNKNOWN_ERROR = "unknown_error"
|
|
309
322
|
}
|
|
310
323
|
/**
|
|
@@ -584,6 +597,17 @@ interface ASRRequestConfig {
|
|
|
584
597
|
fallbackModels?: ASRRequestConfig[];
|
|
585
598
|
}
|
|
586
599
|
|
|
600
|
+
/**
|
|
601
|
+
* Standard stage/environment constants used across all services
|
|
602
|
+
*/
|
|
603
|
+
declare const STAGES: {
|
|
604
|
+
readonly LOCAL: "local";
|
|
605
|
+
readonly DEV: "dev";
|
|
606
|
+
readonly STAGING: "staging";
|
|
607
|
+
readonly PRODUCTION: "production";
|
|
608
|
+
};
|
|
609
|
+
type Stage = typeof STAGES[keyof typeof STAGES];
|
|
610
|
+
|
|
587
611
|
/**
|
|
588
612
|
* Generic WebSocket protocol types and utilities
|
|
589
613
|
* Supports flexible versioning and message types
|
|
@@ -767,16 +791,34 @@ interface RecognitionCallbackUrl {
|
|
|
767
791
|
}
|
|
768
792
|
interface IRecognitionClientConfig {
|
|
769
793
|
/**
|
|
770
|
-
* WebSocket endpoint URL (optional
|
|
794
|
+
* WebSocket endpoint URL (optional)
|
|
795
|
+
* Either `url` or `stage` must be provided.
|
|
796
|
+
* If both are provided, `url` takes precedence.
|
|
771
797
|
*
|
|
772
|
-
*
|
|
798
|
+
* Example with explicit URL:
|
|
773
799
|
* ```typescript
|
|
774
|
-
*
|
|
775
|
-
* const base = getRecognitionServiceBase('staging'); // or 'dev', 'production'
|
|
776
|
-
* const url = `${base.wsBase}/ws/v1/recognize`;
|
|
800
|
+
* { url: 'wss://custom-endpoint.example.com/ws/v1/recognize' }
|
|
777
801
|
* ```
|
|
778
802
|
*/
|
|
779
803
|
url?: string;
|
|
804
|
+
/**
|
|
805
|
+
* Stage for recognition service (recommended)
|
|
806
|
+
* Either `url` or `stage` must be provided.
|
|
807
|
+
* If both are provided, `url` takes precedence.
|
|
808
|
+
* Defaults to production if neither is provided.
|
|
809
|
+
*
|
|
810
|
+
* Example with STAGES enum (recommended):
|
|
811
|
+
* ```typescript
|
|
812
|
+
* import { STAGES } from '@recog/shared-types';
|
|
813
|
+
* { stage: STAGES.STAGING }
|
|
814
|
+
* ```
|
|
815
|
+
*
|
|
816
|
+
* String values also accepted:
|
|
817
|
+
* ```typescript
|
|
818
|
+
* { stage: 'staging' } // STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
|
|
819
|
+
* ```
|
|
820
|
+
*/
|
|
821
|
+
stage?: Stage | string;
|
|
780
822
|
/** ASR configuration (provider, model, language, etc.) - optional */
|
|
781
823
|
asrRequestConfig?: ASRRequestConfig;
|
|
782
824
|
/** Game context for improved recognition accuracy */
|
|
@@ -826,6 +868,30 @@ interface IRecognitionClientConfig {
|
|
|
826
868
|
maxBufferDurationSec?: number;
|
|
827
869
|
/** Expected chunks per second for ring buffer sizing (default: 100) */
|
|
828
870
|
chunksPerSecond?: number;
|
|
871
|
+
/**
|
|
872
|
+
* Connection retry configuration (optional)
|
|
873
|
+
* Only applies to initial connection establishment, not mid-stream interruptions.
|
|
874
|
+
*
|
|
875
|
+
* Default: { maxAttempts: 4, delayMs: 200 } (try once, retry 3 times = 4 total attempts)
|
|
876
|
+
*
|
|
877
|
+
* Timing: Attempt 1 → FAIL → wait 200ms → Attempt 2 → FAIL → wait 200ms → Attempt 3 → FAIL → wait 200ms → Attempt 4
|
|
878
|
+
*
|
|
879
|
+
* Example:
|
|
880
|
+
* ```typescript
|
|
881
|
+
* {
|
|
882
|
+
* connectionRetry: {
|
|
883
|
+
* maxAttempts: 2, // Try connecting up to 2 times (1 retry)
|
|
884
|
+
* delayMs: 500 // Wait 500ms between attempts
|
|
885
|
+
* }
|
|
886
|
+
* }
|
|
887
|
+
* ```
|
|
888
|
+
*/
|
|
889
|
+
connectionRetry?: {
|
|
890
|
+
/** Maximum number of connection attempts (default: 4, min: 1, max: 5) */
|
|
891
|
+
maxAttempts?: number;
|
|
892
|
+
/** Delay in milliseconds between retry attempts (default: 200ms) */
|
|
893
|
+
delayMs?: number;
|
|
894
|
+
};
|
|
829
895
|
/**
|
|
830
896
|
* Optional logger function for debugging
|
|
831
897
|
* If not provided, no logging will occur
|
|
@@ -901,6 +967,12 @@ interface IRecognitionClient {
|
|
|
901
967
|
* @returns Statistics about audio transmission and buffering
|
|
902
968
|
*/
|
|
903
969
|
getStats(): IRecognitionClientStats;
|
|
970
|
+
/**
|
|
971
|
+
* Get the WebSocket URL being used by this client
|
|
972
|
+
* Available immediately after client construction.
|
|
973
|
+
* @returns WebSocket URL string
|
|
974
|
+
*/
|
|
975
|
+
getUrl(): string;
|
|
904
976
|
}
|
|
905
977
|
/**
|
|
906
978
|
* Client statistics interface
|
|
@@ -1006,10 +1078,16 @@ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioCli
|
|
|
1006
1078
|
*/
|
|
1007
1079
|
private cleanup;
|
|
1008
1080
|
connect(): Promise<void>;
|
|
1081
|
+
/**
|
|
1082
|
+
* Attempt to connect with retry logic
|
|
1083
|
+
* Only retries on initial connection establishment, not mid-stream interruptions
|
|
1084
|
+
*/
|
|
1085
|
+
private connectWithRetry;
|
|
1009
1086
|
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
1010
1087
|
private sendAudioInternal;
|
|
1011
1088
|
stopRecording(): Promise<void>;
|
|
1012
1089
|
getAudioUtteranceId(): string;
|
|
1090
|
+
getUrl(): string;
|
|
1013
1091
|
getState(): ClientState;
|
|
1014
1092
|
isConnected(): boolean;
|
|
1015
1093
|
isConnecting(): boolean;
|
|
@@ -1037,4 +1115,4 @@ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioCli
|
|
|
1037
1115
|
private sendAudioNow;
|
|
1038
1116
|
}
|
|
1039
1117
|
|
|
1040
|
-
export { type ASRRequestConfig as A, ClientState as C, DeepgramModel as D,
|
|
1118
|
+
export { type ASRRequestConfig as A, ClientState as C, DeepgramModel as D, ErrorTypeV1 as E, type FunctionCallResultV1 as F, type GameContextV1 as G, type IRecognitionClient as I, Language as L, type MetadataResultV1 as M, RecognitionProvider as R, type Stage as S, type TranscriptionResultV1 as T, type RecognitionCallbackUrl as a, type ErrorResultV1 as b, type RealTimeTwoWayWebSocketRecognitionClientConfig as c, type IRecognitionClientConfig as d, RealTimeTwoWayWebSocketRecognitionClient as e, type TranscriptionResult as f, type IRecognitionClientStats as g, AudioEncoding as h, isNormalDisconnection as i, RecognitionContextTypeV1 as j, ControlSignalTypeV1 as k, RecognitionResultTypeV1 as l, type ASRRequestV1 as m, GoogleModel as n, SampleRate as o, STAGES as p };
|