@volley/recognition-client-sdk 0.1.211 → 0.1.255

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,12 +11,17 @@ npm install @volley/recognition-client-sdk
11
11
  ## Quick Start
12
12
 
13
13
  ```typescript
14
- import { createClientWithBuilder, RecognitionProvider, DeepgramModel } from '@volley/recognition-client-sdk';
14
+ import {
15
+ createClientWithBuilder,
16
+ RecognitionProvider,
17
+ DeepgramModel,
18
+ STAGES
19
+ } from '@volley/recognition-client-sdk';
15
20
 
16
21
  // Create client with builder pattern (recommended)
17
22
  const client = createClientWithBuilder(builder =>
18
23
  builder
19
- .url('ws://localhost:3101/ws/v1/recognize')
24
+ .stage(STAGES.STAGING) // ✨ Simple environment selection using enum
20
25
  .provider(RecognitionProvider.DEEPGRAM)
21
26
  .model(DeepgramModel.NOVA_2)
22
27
  .onTranscript(result => {
@@ -30,6 +35,9 @@ const client = createClientWithBuilder(builder =>
30
35
  await client.connect();
31
36
  client.sendAudio(pcm16AudioChunk); // Call repeatedly with audio chunks
32
37
  await client.stopRecording(); // Wait for final transcript
38
+
39
+ // Check the actual URL being used
40
+ console.log('Connected to:', client.getUrl());
33
41
  ```
34
42
 
35
43
  ### Alternative: Direct Client Creation
@@ -39,11 +47,12 @@ import {
39
47
  RealTimeTwoWayWebSocketRecognitionClient,
40
48
  RecognitionProvider,
41
49
  DeepgramModel,
42
- Language
50
+ Language,
51
+ STAGES
43
52
  } from '@volley/recognition-client-sdk';
44
53
 
45
54
  const client = new RealTimeTwoWayWebSocketRecognitionClient({
46
- url: 'ws://localhost:3101/ws/v1/recognize',
55
+ stage: STAGES.STAGING, // ✨ Recommended: Use STAGES enum for type safety
47
56
  asrRequestConfig: {
48
57
  provider: RecognitionProvider.DEEPGRAM,
49
58
  model: DeepgramModel.NOVA_2,
@@ -52,23 +61,89 @@ const client = new RealTimeTwoWayWebSocketRecognitionClient({
52
61
  onTranscript: (result) => console.log(result),
53
62
  onError: (error) => console.error(error)
54
63
  });
64
+
65
+ // Check the actual URL being used
66
+ console.log('Connected to:', client.getUrl());
55
67
  ```
56
68
 
57
69
  ## Configuration
58
70
 
59
- ### Basic Setup
71
+ ### Environment Selection
72
+
73
+ **Recommended: Use `stage` parameter with STAGES enum** for automatic environment configuration:
60
74
 
61
75
  ```typescript
62
- import { RecognitionProvider, DeepgramModel, Language } from '@volley/recognition-client-sdk';
76
+ import {
77
+ RecognitionProvider,
78
+ DeepgramModel,
79
+ Language,
80
+ STAGES
81
+ } from '@volley/recognition-client-sdk';
63
82
 
64
83
  builder
65
- .url('ws://localhost:3101/ws/v1/recognize')
84
+ .stage(STAGES.STAGING) // STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
66
85
  .provider(RecognitionProvider.DEEPGRAM) // DEEPGRAM, GOOGLE
67
86
  .model(DeepgramModel.NOVA_2) // Provider-specific model enum
68
87
  .language(Language.ENGLISH_US) // Language enum
69
88
  .interimResults(true) // Enable partial transcripts
70
89
  ```
71
90
 
91
+ **Available Stages and URLs:**
92
+
93
+ | Stage | Enum | WebSocket URL |
94
+ |-------|------|---------------|
95
+ | **Local** | `STAGES.LOCAL` | `ws://localhost:3101/ws/v1/recognize` |
96
+ | **Development** | `STAGES.DEV` | `wss://recognition-service-dev.volley-services.net/ws/v1/recognize` |
97
+ | **Staging** | `STAGES.STAGING` | `wss://recognition-service-staging.volley-services.net/ws/v1/recognize` |
98
+ | **Production** | `STAGES.PRODUCTION` | `wss://recognition-service.volley-services.net/ws/v1/recognize` |
99
+
100
+ > 💡 Using the `stage` parameter automatically constructs the correct URL for each environment.
101
+
102
+ **Automatic Connection Retry:**
103
+
104
+ The SDK **automatically retries failed connections** with sensible defaults - no configuration needed!
105
+
106
+ **Default behavior (works out of the box):**
107
+ - 4 connection attempts (try once, retry 3 times if failed)
108
+ - 200ms delay between retries
109
+ - Handles temporary service unavailability (503)
110
+ - Fast failure (~600ms total on complete failure)
111
+ - Timing: `Attempt 1 → FAIL → wait 200ms → Attempt 2 → FAIL → wait 200ms → Attempt 3 → FAIL → wait 200ms → Attempt 4`
112
+
113
+ ```typescript
114
+ import { STAGES } from '@volley/recognition-client-sdk';
115
+
116
+ // ✅ Automatic retry - no config needed!
117
+ const client = new RealTimeTwoWayWebSocketRecognitionClient({
118
+ stage: STAGES.STAGING,
119
+ // connectionRetry works automatically with defaults
120
+ });
121
+ ```
122
+
123
+ **Optional: Customize retry behavior** (only if needed):
124
+ ```typescript
125
+ const client = new RealTimeTwoWayWebSocketRecognitionClient({
126
+ stage: STAGES.STAGING,
127
+ connectionRetry: {
128
+ maxAttempts: 2, // Fewer attempts (min: 1, max: 5)
129
+ delayMs: 500 // Longer delay between attempts
130
+ }
131
+ });
132
+ ```
133
+
134
+ > ⚠️ **Note**: Retry only applies to **initial connection establishment**. If the connection drops during audio streaming, the SDK will not auto-retry (caller must handle this).
135
+
136
+ **Advanced: Custom URL** for non-standard endpoints:
137
+
138
+ ```typescript
139
+ builder
140
+ .url('wss://custom-endpoint.example.com/ws/v1/recognize') // Custom WebSocket URL
141
+ .provider(RecognitionProvider.DEEPGRAM)
142
+ // ... rest of config
143
+ ```
144
+
145
+ > 💡 **Note**: If both `stage` and `url` are provided, `url` takes precedence.
146
+
72
147
  ### Event Handlers
73
148
 
74
149
  ```typescript
@@ -102,6 +177,7 @@ await client.connect(); // Establish connection
102
177
  client.sendAudio(chunk); // Send PCM16 audio
103
178
  await client.stopRecording(); // End and get final transcript
104
179
  client.getAudioUtteranceId(); // Get session UUID
180
+ client.getUrl(); // Get actual WebSocket URL being used
105
181
  client.getState(); // Get current state
106
182
  client.isConnected(); // Check connection status
107
183
  ```
@@ -14,6 +14,15 @@ declare enum RecognitionProvider {
14
14
  GEMINI_BATCH = "gemini-batch",
15
15
  OPENAI_BATCH = "openai-batch"
16
16
  }
17
+ /**
18
+ * ASR API type - distinguishes between streaming and file-based transcription APIs
19
+ * - STREAMING: Real-time streaming APIs (Deepgram, AssemblyAI, Google)
20
+ * - FILE_BASED: File upload/batch APIs (OpenAI Batch, Gemini Batch)
21
+ */
22
+ declare enum ASRApiType {
23
+ STREAMING = "streaming",
24
+ FILE_BASED = "file-based"
25
+ }
17
26
  /**
18
27
  * Deepgram model names
19
28
  */
@@ -266,6 +275,7 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
266
275
  volume: z.ZodOptional<z.ZodNumber>;
267
276
  accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
268
277
  costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
278
+ apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
269
279
  asrConfig: z.ZodOptional<z.ZodString>;
270
280
  rawAsrMetadata: z.ZodOptional<z.ZodString>;
271
281
  }, "strip", z.ZodTypeAny, {
@@ -279,6 +289,7 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
279
289
  duration?: number | undefined;
280
290
  volume?: number | undefined;
281
291
  costInUSD?: number | undefined;
292
+ apiType?: ASRApiType | undefined;
282
293
  asrConfig?: string | undefined;
283
294
  rawAsrMetadata?: string | undefined;
284
295
  }, {
@@ -292,6 +303,7 @@ declare const MetadataResultSchemaV1: z.ZodObject<{
292
303
  duration?: number | undefined;
293
304
  volume?: number | undefined;
294
305
  costInUSD?: number | undefined;
306
+ apiType?: ASRApiType | undefined;
295
307
  asrConfig?: string | undefined;
296
308
  rawAsrMetadata?: string | undefined;
297
309
  }>;
@@ -305,6 +317,7 @@ declare enum ErrorTypeV1 {
305
317
  PROVIDER_ERROR = "provider_error",// Error from ASR provider (Deepgram, Google, etc.) Unlikely to happen with fall
306
318
  TIMEOUT_ERROR = "timeout_error",// Request or operation timeout. Likely business logic did not handle timeout.
307
319
  QUOTA_EXCEEDED = "quota_exceeded",// Quota or rate limit exceeded. Unlikely to happen with fallbakcs
320
+ CONNECTION_ERROR = "connection_error",// Connection establishment or network error
308
321
  UNKNOWN_ERROR = "unknown_error"
309
322
  }
310
323
  /**
@@ -584,6 +597,17 @@ interface ASRRequestConfig {
584
597
  fallbackModels?: ASRRequestConfig[];
585
598
  }
586
599
 
600
+ /**
601
+ * Standard stage/environment constants used across all services
602
+ */
603
+ declare const STAGES: {
604
+ readonly LOCAL: "local";
605
+ readonly DEV: "dev";
606
+ readonly STAGING: "staging";
607
+ readonly PRODUCTION: "production";
608
+ };
609
+ type Stage = typeof STAGES[keyof typeof STAGES];
610
+
587
611
  /**
588
612
  * Generic WebSocket protocol types and utilities
589
613
  * Supports flexible versioning and message types
@@ -767,16 +791,34 @@ interface RecognitionCallbackUrl {
767
791
  }
768
792
  interface IRecognitionClientConfig {
769
793
  /**
770
- * WebSocket endpoint URL (optional - defaults to production)
794
+ * WebSocket endpoint URL (optional)
795
+ * Either `url` or `stage` must be provided.
796
+ * If both are provided, `url` takes precedence.
771
797
  *
772
- * For different stages, use the helper function:
798
+ * Example with explicit URL:
773
799
  * ```typescript
774
- * import { getRecognitionServiceBase } from '@recog/client-sdk-ts';
775
- * const base = getRecognitionServiceBase('staging'); // or 'dev', 'production'
776
- * const url = `${base.wsBase}/ws/v1/recognize`;
800
+ * { url: 'wss://custom-endpoint.example.com/ws/v1/recognize' }
777
801
  * ```
778
802
  */
779
803
  url?: string;
804
+ /**
805
+ * Stage for recognition service (recommended)
806
+ * Either `url` or `stage` must be provided.
807
+ * If both are provided, `url` takes precedence.
808
+ * Defaults to production if neither is provided.
809
+ *
810
+ * Example with STAGES enum (recommended):
811
+ * ```typescript
812
+ * import { STAGES } from '@recog/shared-types';
813
+ * { stage: STAGES.STAGING }
814
+ * ```
815
+ *
816
+ * String values also accepted:
817
+ * ```typescript
818
+ * { stage: 'staging' } // STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
819
+ * ```
820
+ */
821
+ stage?: Stage | string;
780
822
  /** ASR configuration (provider, model, language, etc.) - optional */
781
823
  asrRequestConfig?: ASRRequestConfig;
782
824
  /** Game context for improved recognition accuracy */
@@ -826,6 +868,30 @@ interface IRecognitionClientConfig {
826
868
  maxBufferDurationSec?: number;
827
869
  /** Expected chunks per second for ring buffer sizing (default: 100) */
828
870
  chunksPerSecond?: number;
871
+ /**
872
+ * Connection retry configuration (optional)
873
+ * Only applies to initial connection establishment, not mid-stream interruptions.
874
+ *
875
+ * Default: { maxAttempts: 4, delayMs: 200 } (try once, retry 3 times = 4 total attempts)
876
+ *
877
+ * Timing: Attempt 1 → FAIL → wait 200ms → Attempt 2 → FAIL → wait 200ms → Attempt 3 → FAIL → wait 200ms → Attempt 4
878
+ *
879
+ * Example:
880
+ * ```typescript
881
+ * {
882
+ * connectionRetry: {
883
+ * maxAttempts: 2, // Try connecting up to 2 times (1 retry)
884
+ * delayMs: 500 // Wait 500ms between attempts
885
+ * }
886
+ * }
887
+ * ```
888
+ */
889
+ connectionRetry?: {
890
+ /** Maximum number of connection attempts (default: 4, min: 1, max: 5) */
891
+ maxAttempts?: number;
892
+ /** Delay in milliseconds between retry attempts (default: 200ms) */
893
+ delayMs?: number;
894
+ };
829
895
  /**
830
896
  * Optional logger function for debugging
831
897
  * If not provided, no logging will occur
@@ -901,6 +967,12 @@ interface IRecognitionClient {
901
967
  * @returns Statistics about audio transmission and buffering
902
968
  */
903
969
  getStats(): IRecognitionClientStats;
970
+ /**
971
+ * Get the WebSocket URL being used by this client
972
+ * Available immediately after client construction.
973
+ * @returns WebSocket URL string
974
+ */
975
+ getUrl(): string;
904
976
  }
905
977
  /**
906
978
  * Client statistics interface
@@ -1006,10 +1078,16 @@ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioCli
1006
1078
  */
1007
1079
  private cleanup;
1008
1080
  connect(): Promise<void>;
1081
+ /**
1082
+ * Attempt to connect with retry logic
1083
+ * Only retries on initial connection establishment, not mid-stream interruptions
1084
+ */
1085
+ private connectWithRetry;
1009
1086
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
1010
1087
  private sendAudioInternal;
1011
1088
  stopRecording(): Promise<void>;
1012
1089
  getAudioUtteranceId(): string;
1090
+ getUrl(): string;
1013
1091
  getState(): ClientState;
1014
1092
  isConnected(): boolean;
1015
1093
  isConnecting(): boolean;
@@ -1037,4 +1115,4 @@ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioCli
1037
1115
  private sendAudioNow;
1038
1116
  }
1039
1117
 
1040
- export { type ASRRequestConfig as A, ClientState as C, DeepgramModel as D, type ErrorResultV1 as E, type FunctionCallResultV1 as F, type GameContextV1 as G, type IRecognitionClient as I, Language as L, type MetadataResultV1 as M, type RecognitionCallbackUrl as R, SampleRate as S, type TranscriptionResultV1 as T, type RealTimeTwoWayWebSocketRecognitionClientConfig as a, type IRecognitionClientConfig as b, RealTimeTwoWayWebSocketRecognitionClient as c, type TranscriptionResult as d, type IRecognitionClientStats as e, AudioEncoding as f, RecognitionContextTypeV1 as g, ControlSignalTypeV1 as h, isNormalDisconnection as i, RecognitionResultTypeV1 as j, type ASRRequestV1 as k, RecognitionProvider as l, GoogleModel as m };
1118
+ export { type ASRRequestConfig as A, ClientState as C, DeepgramModel as D, ErrorTypeV1 as E, type FunctionCallResultV1 as F, type GameContextV1 as G, type IRecognitionClient as I, Language as L, type MetadataResultV1 as M, RecognitionProvider as R, type Stage as S, type TranscriptionResultV1 as T, type RecognitionCallbackUrl as a, type ErrorResultV1 as b, type RealTimeTwoWayWebSocketRecognitionClientConfig as c, type IRecognitionClientConfig as d, RealTimeTwoWayWebSocketRecognitionClient as e, type TranscriptionResult as f, type IRecognitionClientStats as g, AudioEncoding as h, isNormalDisconnection as i, RecognitionContextTypeV1 as j, ControlSignalTypeV1 as k, RecognitionResultTypeV1 as l, type ASRRequestV1 as m, GoogleModel as n, SampleRate as o, STAGES as p };