@volley/recognition-client-sdk 0.1.767 → 0.1.799

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,7 @@ declare enum RecognitionProvider {
24
24
  BEDROCK = "bedrock",
25
25
  INWORLD_STT = "inworld-stt",
26
26
  AWS_TRANSCRIBE = "aws-transcribe",
27
+ AMAZON_NOVA_SONIC = "amazon-nova-sonic",
27
28
  TEST_ASR_PROVIDER_QUOTA = "test-asr-provider-quota",
28
29
  TEST_ASR_STREAMING = "test-asr-streaming"
29
30
  }
@@ -96,6 +97,7 @@ declare enum ElevenLabsModel {
96
97
  * @see https://platform.openai.com/docs/models/gpt-4o-transcribe
97
98
  */
98
99
  declare enum OpenAIRealtimeModel {
100
+ GPT_REALTIME_WHISPER = "gpt-realtime-whisper",
99
101
  GPT_4O_TRANSCRIBE = "gpt-4o-transcribe",
100
102
  GPT_4O_MINI_TRANSCRIBE = "gpt-4o-mini-transcribe"
101
103
  }
@@ -146,135 +148,28 @@ declare enum InworldSttModel {
146
148
  declare enum AwsTranscribeModel {
147
149
  DEFAULT = "default"
148
150
  }
151
+ /**
152
+ * Amazon Nova Sonic bidirectional streaming model (Bedrock).
153
+ * Speech-to-speech model; we consume the USER FINAL transcript and discard the assistant text/audio output.
154
+ * @see https://docs.aws.amazon.com/nova/latest/userguide/speech-bidirection.html
155
+ */
156
+ declare enum AmazonNovaSonicModel {
157
+ AMAZON_NOVA_SONIC_V1 = "amazon.nova-sonic-v1:0",
158
+ AMAZON_NOVA_2_SONIC = "amazon.nova-2-sonic-v1:0"
159
+ }
149
160
  /**
150
161
  * Self-serve vLLM batch transcription models
151
- * Backed by recognition-inference / RunPod `/transcribe`
162
+ * Backed by recognition-inference / RunPod `/ws/transcribe`
152
163
  */
153
164
  declare enum SelfServeVllmModel {
165
+ QWEN3_ASR_0_6B = "qwen3-asr-0.6b",
166
+ QWEN3_ASR_0_6B_WOF_LETTER = "qwen3-asr-0.6b-wof-letter",
154
167
  QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
155
168
  }
156
169
  /**
157
170
  * Type alias for any model from any provider
158
171
  */
159
- type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | InworldSttModel | SelfServeVllmModel | BedrockModel | AwsTranscribeModel | string;
160
-
161
- /**
162
- * Audio encoding types
163
- */
164
- declare enum AudioEncoding {
165
- ENCODING_UNSPECIFIED = 0,
166
- LINEAR16 = 1,
167
- OGG_OPUS = 2,
168
- FLAC = 3,
169
- MULAW = 4,
170
- ALAW = 5
171
- }
172
- declare namespace AudioEncoding {
173
- /**
174
- * Convert numeric ID to AudioEncoding enum
175
- * @param id - Numeric encoding identifier (0-5)
176
- * @returns AudioEncoding enum value or undefined if invalid
177
- */
178
- function fromId(id: number): AudioEncoding | undefined;
179
- /**
180
- * Convert string name to AudioEncoding enum
181
- * @param nameStr - String name like "linear16", "LINEAR16", "ogg_opus", "OGG_OPUS", etc. (case insensitive)
182
- * @returns AudioEncoding enum value or undefined if invalid
183
- */
184
- function fromName(nameStr: string): AudioEncoding | undefined;
185
- /**
186
- * Convert AudioEncoding enum to numeric ID
187
- * @param encoding - AudioEncoding enum value
188
- * @returns Numeric ID (0-5)
189
- */
190
- function toId(encoding: AudioEncoding): number;
191
- /**
192
- * Convert AudioEncoding enum to string name
193
- * @param encoding - AudioEncoding enum value
194
- * @returns String name like "LINEAR16", "MULAW", etc.
195
- */
196
- function toName(encoding: AudioEncoding): string;
197
- /**
198
- * Check if a numeric ID is a valid encoding
199
- * @param id - Numeric identifier to validate
200
- * @returns true if valid encoding ID
201
- */
202
- function isIdValid(id: number): boolean;
203
- /**
204
- * Check if a string name is a valid encoding
205
- * @param nameStr - String name to validate
206
- * @returns true if valid encoding name
207
- */
208
- function isNameValid(nameStr: string): boolean;
209
- }
210
- /**
211
- * Common sample rates (in Hz)
212
- */
213
- declare enum SampleRate {
214
- RATE_8000 = 8000,
215
- RATE_16000 = 16000,
216
- RATE_22050 = 22050,
217
- RATE_24000 = 24000,
218
- RATE_32000 = 32000,
219
- RATE_44100 = 44100,
220
- RATE_48000 = 48000
221
- }
222
- declare namespace SampleRate {
223
- /**
224
- * Convert Hz value to SampleRate enum
225
- * @param hz - Sample rate in Hz (8000, 16000, etc.)
226
- * @returns SampleRate enum value or undefined if invalid
227
- */
228
- function fromHz(hz: number): SampleRate | undefined;
229
- /**
230
- * Convert string name to SampleRate enum
231
- * @param nameStr - String name like "rate_8000", "RATE_16000", etc. (case insensitive)
232
- * @returns SampleRate enum value or undefined if invalid
233
- */
234
- function fromName(nameStr: string): SampleRate | undefined;
235
- /**
236
- * Convert SampleRate enum to Hz value
237
- * @param rate - SampleRate enum value
238
- * @returns Hz value (8000, 16000, etc.)
239
- */
240
- function toHz(rate: SampleRate): number;
241
- /**
242
- * Convert SampleRate enum to string name
243
- * @param rate - SampleRate enum value
244
- * @returns String name like "RATE_8000", "RATE_16000", etc.
245
- */
246
- function toName(rate: SampleRate): string;
247
- /**
248
- * Check if a numeric Hz value is a valid sample rate
249
- * @param hz - Hz value to validate
250
- * @returns true if valid sample rate
251
- */
252
- function isHzValid(hz: number): boolean;
253
- /**
254
- * Check if a string name is a valid sample rate
255
- * @param nameStr - String name to validate
256
- * @returns true if valid sample rate name
257
- */
258
- function isNameValid(nameStr: string): boolean;
259
- }
260
- /**
261
- * Supported languages for recognition
262
- * Using BCP-47 language tags
263
- */
264
- declare enum Language {
265
- ENGLISH_US = "en-US",
266
- ENGLISH_GB = "en-GB",
267
- SPANISH_ES = "es-ES",
268
- SPANISH_MX = "es-MX",
269
- FRENCH_FR = "fr-FR",
270
- GERMAN_DE = "de-DE",
271
- ITALIAN_IT = "it-IT",
272
- PORTUGUESE_BR = "pt-BR",
273
- JAPANESE_JP = "ja-JP",
274
- KOREAN_KR = "ko-KR",
275
- CHINESE_CN = "zh-CN",
276
- CHINESE_TW = "zh-TW"
277
- }
172
+ type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | GladiaModel | ElevenLabsModel | OpenAIRealtimeModel | MistralVoxtralModel | CartesiaModel | DashScopeModel | InworldSttModel | SelfServeVllmModel | BedrockModel | AwsTranscribeModel | AmazonNovaSonicModel | string;
278
173
 
279
174
  /**
280
175
  * Recognition Result Types V1
@@ -294,6 +189,16 @@ declare enum RecognitionResultTypeV1 {
294
189
  AUDIO_METRICS = "AudioMetrics",
295
190
  SESSION_CONFIGURED = "SessionConfigured"
296
191
  }
192
+ /**
193
+ * Source of a phrase detection — what kind of provider feature produced
194
+ * the hit. Currently only Deepgram's `search` parameter is wired up, so
195
+ * this enum has one value. New entries (e.g. KEYWORDS, KEYTERMS,
196
+ * SPEECH_CONTEXTS) get added when other providers join.
197
+ */
198
+ declare enum DetectionTypeV1 {
199
+ /** Deepgram phonetic phrase match via the `search=…` request parameter */
200
+ SEARCH = "search"
201
+ }
297
202
  /**
298
203
  * Transcription result V1 - contains transcript message
299
204
  * In the long run game side should not need to know it. In the short run it is send back to client.
@@ -318,6 +223,25 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
318
223
  receivedAtMs: z.ZodOptional<z.ZodNumber>;
319
224
  accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
320
225
  rawAudioTimeMs: z.ZodOptional<z.ZodNumber>;
226
+ detections: z.ZodOptional<z.ZodArray<z.ZodObject<{
227
+ type: z.ZodNativeEnum<typeof DetectionTypeV1>;
228
+ query: z.ZodString;
229
+ score: z.ZodNumber;
230
+ startMs: z.ZodOptional<z.ZodNumber>;
231
+ endMs: z.ZodOptional<z.ZodNumber>;
232
+ }, "strip", z.ZodTypeAny, {
233
+ type: DetectionTypeV1;
234
+ query: string;
235
+ score: number;
236
+ startMs?: number | undefined;
237
+ endMs?: number | undefined;
238
+ }, {
239
+ type: DetectionTypeV1;
240
+ query: string;
241
+ score: number;
242
+ startMs?: number | undefined;
243
+ endMs?: number | undefined;
244
+ }>, "many">>;
321
245
  }, "strip", z.ZodTypeAny, {
322
246
  type: RecognitionResultTypeV1.TRANSCRIPTION;
323
247
  audioUtteranceId: string;
@@ -337,6 +261,13 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
337
261
  receivedAtMs?: number | undefined;
338
262
  accumulatedAudioTimeMs?: number | undefined;
339
263
  rawAudioTimeMs?: number | undefined;
264
+ detections?: {
265
+ type: DetectionTypeV1;
266
+ query: string;
267
+ score: number;
268
+ startMs?: number | undefined;
269
+ endMs?: number | undefined;
270
+ }[] | undefined;
340
271
  }, {
341
272
  type: RecognitionResultTypeV1.TRANSCRIPTION;
342
273
  audioUtteranceId: string;
@@ -356,6 +287,13 @@ declare const TranscriptionResultSchemaV1: z.ZodObject<{
356
287
  receivedAtMs?: number | undefined;
357
288
  accumulatedAudioTimeMs?: number | undefined;
358
289
  rawAudioTimeMs?: number | undefined;
290
+ detections?: {
291
+ type: DetectionTypeV1;
292
+ query: string;
293
+ score: number;
294
+ startMs?: number | undefined;
295
+ endMs?: number | undefined;
296
+ }[] | undefined;
359
297
  }>;
360
298
  type TranscriptionResultV1 = z.infer<typeof TranscriptionResultSchemaV1>;
361
299
  /**
@@ -676,6 +614,138 @@ declare const AudioMetricsResultSchemaV1: z.ZodObject<{
676
614
  }>;
677
615
  type AudioMetricsResultV1 = z.infer<typeof AudioMetricsResultSchemaV1>;
678
616
 
617
+ /**
618
+ * Audio encoding types
619
+ */
620
+ declare enum AudioEncoding {
621
+ ENCODING_UNSPECIFIED = 0,
622
+ LINEAR16 = 1,
623
+ OGG_OPUS = 2,
624
+ FLAC = 3,
625
+ MULAW = 4,
626
+ ALAW = 5
627
+ }
628
+ declare namespace AudioEncoding {
629
+ /**
630
+ * Convert numeric ID to AudioEncoding enum
631
+ * @param id - Numeric encoding identifier (0-5)
632
+ * @returns AudioEncoding enum value or undefined if invalid
633
+ */
634
+ function fromId(id: number): AudioEncoding | undefined;
635
+ /**
636
+ * Convert string name to AudioEncoding enum
637
+ * @param nameStr - String name like "linear16", "LINEAR16", "ogg_opus", "OGG_OPUS", etc. (case insensitive)
638
+ * @returns AudioEncoding enum value or undefined if invalid
639
+ */
640
+ function fromName(nameStr: string): AudioEncoding | undefined;
641
+ /**
642
+ * Convert AudioEncoding enum to numeric ID
643
+ * @param encoding - AudioEncoding enum value
644
+ * @returns Numeric ID (0-5)
645
+ */
646
+ function toId(encoding: AudioEncoding): number;
647
+ /**
648
+ * Convert AudioEncoding enum to string name
649
+ * @param encoding - AudioEncoding enum value
650
+ * @returns String name like "LINEAR16", "MULAW", etc.
651
+ */
652
+ function toName(encoding: AudioEncoding): string;
653
+ /**
654
+ * Check if a numeric ID is a valid encoding
655
+ * @param id - Numeric identifier to validate
656
+ * @returns true if valid encoding ID
657
+ */
658
+ function isIdValid(id: number): boolean;
659
+ /**
660
+ * Check if a string name is a valid encoding
661
+ * @param nameStr - String name to validate
662
+ * @returns true if valid encoding name
663
+ */
664
+ function isNameValid(nameStr: string): boolean;
665
+ /**
666
+ * Coerce a possibly-stringly-typed encoding value into the AudioEncoding enum.
667
+ *
668
+ * - enum / number → returned as-is (already AudioEncoding-shaped)
669
+ * - string (case-insensitive, e.g. 'linear16', 'LINEAR16') → converted via {@link fromName}.
670
+ * Invokes `onStringInput` with a warning message so callers can route it
671
+ * to their preferred logger.
672
+ * - invalid string → throws (preferred over silent fallback so typos surface)
673
+ * - undefined → defaults to {@link AudioEncoding.LINEAR16}
674
+ *
675
+ * Always normalize at the SDK / server boundary so downstream code can rely
676
+ * on a numeric AudioEncoding (the wire-level binary frame header is uint32).
677
+ */
678
+ function coerce(value: AudioEncoding | string | number | undefined, onStringInput?: (warning: string) => void): AudioEncoding;
679
+ }
680
+ /**
681
+ * Common sample rates (in Hz)
682
+ */
683
+ declare enum SampleRate {
684
+ RATE_8000 = 8000,
685
+ RATE_16000 = 16000,
686
+ RATE_22050 = 22050,
687
+ RATE_24000 = 24000,
688
+ RATE_32000 = 32000,
689
+ RATE_44100 = 44100,
690
+ RATE_48000 = 48000
691
+ }
692
+ declare namespace SampleRate {
693
+ /**
694
+ * Convert Hz value to SampleRate enum
695
+ * @param hz - Sample rate in Hz (8000, 16000, etc.)
696
+ * @returns SampleRate enum value or undefined if invalid
697
+ */
698
+ function fromHz(hz: number): SampleRate | undefined;
699
+ /**
700
+ * Convert string name to SampleRate enum
701
+ * @param nameStr - String name like "rate_8000", "RATE_16000", etc. (case insensitive)
702
+ * @returns SampleRate enum value or undefined if invalid
703
+ */
704
+ function fromName(nameStr: string): SampleRate | undefined;
705
+ /**
706
+ * Convert SampleRate enum to Hz value
707
+ * @param rate - SampleRate enum value
708
+ * @returns Hz value (8000, 16000, etc.)
709
+ */
710
+ function toHz(rate: SampleRate): number;
711
+ /**
712
+ * Convert SampleRate enum to string name
713
+ * @param rate - SampleRate enum value
714
+ * @returns String name like "RATE_8000", "RATE_16000", etc.
715
+ */
716
+ function toName(rate: SampleRate): string;
717
+ /**
718
+ * Check if a numeric Hz value is a valid sample rate
719
+ * @param hz - Hz value to validate
720
+ * @returns true if valid sample rate
721
+ */
722
+ function isHzValid(hz: number): boolean;
723
+ /**
724
+ * Check if a string name is a valid sample rate
725
+ * @param nameStr - String name to validate
726
+ * @returns true if valid sample rate name
727
+ */
728
+ function isNameValid(nameStr: string): boolean;
729
+ }
730
+ /**
731
+ * Supported languages for recognition
732
+ * Using BCP-47 language tags
733
+ */
734
+ declare enum Language {
735
+ ENGLISH_US = "en-US",
736
+ ENGLISH_GB = "en-GB",
737
+ SPANISH_ES = "es-ES",
738
+ SPANISH_MX = "es-MX",
739
+ FRENCH_FR = "fr-FR",
740
+ GERMAN_DE = "de-DE",
741
+ ITALIAN_IT = "it-IT",
742
+ PORTUGUESE_BR = "pt-BR",
743
+ JAPANESE_JP = "ja-JP",
744
+ KOREAN_KR = "ko-KR",
745
+ CHINESE_CN = "zh-CN",
746
+ CHINESE_TW = "zh-TW"
747
+ }
748
+
679
749
  /**
680
750
  * Error Exception Types
681
751
  *
@@ -1371,6 +1441,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
1371
1441
  prefixId: z.ZodOptional<z.ZodString>;
1372
1442
  prefixTextToRemove: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1373
1443
  audioMetricsIntervalMs: z.ZodOptional<z.ZodNumber>;
1444
+ appendSearch: z.ZodOptional<z.ZodBoolean>;
1374
1445
  debugCommand: z.ZodOptional<z.ZodObject<{
1375
1446
  enableDebugLog: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1376
1447
  enableAudioStorage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
@@ -1413,6 +1484,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
1413
1484
  prefixId?: string | undefined;
1414
1485
  prefixTextToRemove?: string[] | undefined;
1415
1486
  audioMetricsIntervalMs?: number | undefined;
1487
+ appendSearch?: boolean | undefined;
1416
1488
  debugCommand?: {
1417
1489
  enableDebugLog: boolean;
1418
1490
  enableAudioStorage: boolean;
@@ -1445,6 +1517,7 @@ declare const ASRRequestSchemaV1: z.ZodObject<{
1445
1517
  prefixId?: string | undefined;
1446
1518
  prefixTextToRemove?: string[] | undefined;
1447
1519
  audioMetricsIntervalMs?: number | undefined;
1520
+ appendSearch?: boolean | undefined;
1448
1521
  debugCommand?: {
1449
1522
  enableDebugLog?: boolean | undefined;
1450
1523
  enableAudioStorage?: boolean | undefined;
@@ -1579,10 +1652,10 @@ interface ASRRequestConfig {
1579
1652
  * doesn't respond with is_final=true after stopRecording().
1580
1653
  *
1581
1654
  * - aggressive: 100ms - fast response, may cut off slow providers
1582
- * - balanced: 500ms - current default, good for most cases
1583
- * - conservative: 1000ms - wait longer for complex utterances
1655
+ * - balanced: 500ms - good for most cases
1656
+ * - conservative: 1000ms - current default, wait longer for complex utterances
1584
1657
  *
1585
- * @default 'balanced'
1658
+ * @default 'conservative'
1586
1659
  * @see FinalTranscriptStability enum for detailed descriptions
1587
1660
  */
1588
1661
  finalTranscriptStability?: FinalTranscriptStability | string;
@@ -1676,6 +1749,29 @@ interface ASRRequestConfig {
1676
1749
  * @example 500
1677
1750
  */
1678
1751
  audioMetricsIntervalMs?: number;
1752
+ /**
1753
+ * Opt-in: round-trip Deepgram `search` phrase hits into the transcript.
1754
+ *
1755
+ * When `true` AND the resolved provider/model is **deepgram nova-2** AND the
1756
+ * GameContext `gamePhase` is `'Solve Puzzle'`, every Deepgram Results event
1757
+ * with a `channel.search` hit at confidence ≥ 0.6 has the original query
1758
+ * prepended to the transcript text delivered to the client. This restores
1759
+ * parity with the legacy Roku→Deepgram WoF Puzzle-Solve path where the
1760
+ * phrase round-trip lets downstream NLU match multi-word puzzle solutions
1761
+ * even when nova-2's primary transcription drifts.
1762
+ *
1763
+ * Default: `false` (no prepend; transcript is whatever nova-2 produces).
1764
+ *
1765
+ * Scope guard rationale:
1766
+ * - nova-2 only: nova-3 / flux do not need this (they handle phrase
1767
+ * spotting differently and the prepend would only add noise).
1768
+ * - Solve-Puzzle scene only: other WoF scenes (Letter-Guess,
1769
+ * Bonus-Round, etc.) do NOT want the slotMap phrase prepended — only
1770
+ * Puzzle-Solve depends on the phrase round-trip.
1771
+ *
1772
+ * @default false
1773
+ */
1774
+ appendSearch?: boolean;
1679
1775
  /**
1680
1776
  * Optional fallback ASR configurations
1681
1777
  *
@@ -1736,7 +1832,10 @@ declare enum GeminiModel {
1736
1832
  }
1737
1833
 
1738
1834
  /**
1739
- * OpenAI Model Types
1835
+ * OpenAI Batch API Model Types (HTTP API)
1836
+ * @see https://platform.openai.com/docs/guides/speech-to-text
1837
+ *
1838
+ * Note: For openai-realtime provider models, see OpenAIRealtimeModel in provider.types.ts
1740
1839
  */
1741
1840
  declare enum OpenAIModel {
1742
1841
  WHISPER_1 = "whisper-1"
@@ -2090,6 +2189,23 @@ interface IRecognitionClient {
2090
2189
  * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
2091
2190
  */
2092
2191
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2192
+ /**
2193
+ * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
2194
+ * downsamples to the session's target rate (currently 16 kHz, set by the
2195
+ * server validator) before transmitting.
2196
+ *
2197
+ * Use this when your capture pipeline produces audio at the system's
2198
+ * native rate (browser `AudioContext` is typically 44.1 kHz or 48 kHz).
2199
+ * If your audio is already at the target rate, prefer `sendAudio()` to
2200
+ * skip the resample step.
2201
+ *
2202
+ * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
2203
+ * mixed to mono by the caller.
2204
+ *
2205
+ * @param audioData - PCM16 mono audio at `sourceSampleRate`.
2206
+ * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
2207
+ */
2208
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2093
2209
  /**
2094
2210
  * Stop recording and wait for final transcript
2095
2211
  * The server will close the connection after sending the final transcript.
@@ -2295,6 +2411,29 @@ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioCli
2295
2411
  */
2296
2412
  private connectWithRetry;
2297
2413
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2414
+ /**
2415
+ * Send PCM16 mono audio captured at any sample rate. The SDK downsamples
2416
+ * to the session's target rate (currently 16 kHz per server validator)
2417
+ * before sending.
2418
+ *
2419
+ * Use this when your capture pipeline produces audio at the system's
2420
+ * native rate — `AudioContext` defaults to 44.1 kHz or 48 kHz on most
2421
+ * desktop/mobile hardware — and you don't want to bring your own
2422
+ * resampler. If your audio is already at the target rate, prefer
2423
+ * `sendAudio()` to skip the resample step.
2424
+ *
2425
+ * Algorithm: box-filter averaging (see audio-resampler.ts). Cheap, no
2426
+ * dependencies, has a built-in low-pass effect so aliasing stays out of
2427
+ * the speech band. Suitable for ASR; not a substitute for a high-quality
2428
+ * resampler if you're doing music or full-fidelity processing.
2429
+ *
2430
+ * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
2431
+ * mixed to mono by the caller.
2432
+ *
2433
+ * @param audioData - PCM16 mono audio at `sourceSampleRate`.
2434
+ * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
2435
+ */
2436
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2298
2437
  private sendAudioInternal;
2299
2438
  /**
2300
2439
  * Only active ehwne client is in READY state. otherwise it will return immediately.
@@ -2718,6 +2857,14 @@ interface ISimplifiedVGFRecognitionClient {
2718
2857
  * @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
2719
2858
  */
2720
2859
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2860
+ /**
2861
+ * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
2862
+ * downsamples to the session's target rate before transmitting. Use
2863
+ * when capture is at the system's native rate (browser AudioContext is
2864
+ * typically 44.1 kHz or 48 kHz). Audio must be signed 16-bit
2865
+ * little-endian PCM, mono.
2866
+ */
2867
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2721
2868
  /**
2722
2869
  * Stop recording and wait for final transcription
2723
2870
  * @returns Promise that resolves when transcription is complete
@@ -2808,6 +2955,13 @@ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitio
2808
2955
  constructor(config: SimplifiedVGFClientConfig);
2809
2956
  connect(): Promise<void>;
2810
2957
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2958
+ sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2959
+ /**
2960
+ * Set VGF recording status to RECORDING on the first audio chunk.
2961
+ * Idempotent — subsequent calls are no-ops until disconnect/stop resets
2962
+ * `isRecordingAudio`.
2963
+ */
2964
+ private markRecordingStarted;
2811
2965
  stopRecording(): Promise<void>;
2812
2966
  stopAbnormally(): void;
2813
2967
  getAudioUtteranceId(): string;
@@ -2934,5 +3088,5 @@ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null |
2934
3088
  declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
2935
3089
  declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
2936
3090
 
2937
- export { AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
3091
+ export { AmazonNovaSonicModel, AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
2938
3092
  export type { ASRRequestConfig, ASRRequestV1, AudioMetricsResultV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
package/dist/index.d.ts CHANGED
@@ -11,6 +11,6 @@ export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, Tran
11
11
  export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
12
12
  export { AudioEncoding } from '@recog/websocket';
13
13
  export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
14
- type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type AudioMetricsResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, BedrockModel, AwsTranscribeModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
14
+ type TranscriptionResultV1, type FunctionCallResultV1, type MetadataResultV1, type AudioMetricsResultV1, type ErrorResultV1, RecognitionResultTypeV1, ClientControlActionV1, type ASRRequestConfig, type ASRRequestV1, FinalTranscriptStability, createDefaultASRConfig, RecognitionProvider, DeepgramModel, ElevenLabsModel, FireworksModel, GladiaModel, GoogleModel, GeminiModel, OpenAIModel, SelfServeVllmModel, OpenAIRealtimeModel, MistralVoxtralModel, CartesiaModel, DashScopeModel, BedrockModel, AwsTranscribeModel, AmazonNovaSonicModel, Language, SampleRate, STAGES, type Stage } from '@recog/shared-types';
15
15
  export { getRecognitionServiceBase, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getRecognitionServiceHost, getRecognitionConductorBase, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionConductorHost, normalizeStage, RECOGNITION_SERVICE_BASES, RECOGNITION_CONDUCTOR_BASES } from '@recog/shared-config';
16
16
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,oBAAoB,EACpB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}