@volley/recognition-client-sdk 0.1.799 → 0.1.800

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1406,6 +1406,11 @@ interface IRecognitionClientConfig {
1406
1406
  *
1407
1407
  * Main interface for real-time speech recognition clients.
1408
1408
  * Provides methods for connection management, audio streaming, and session control.
1409
+ *
1410
+ * NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
1411
+ * so any method added here must also be implemented (typically as a delegate) by
1412
+ * `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
1413
+ * compile time — do not work around the error, add the delegate.
1409
1414
  */
1410
1415
  interface IRecognitionClient {
1411
1416
  /**
@@ -1437,6 +1442,16 @@ interface IRecognitionClient {
1437
1442
  * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
1438
1443
  */
1439
1444
  sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
1445
+ /**
1446
+ * Send prefix audio (e.g. a TTS prompt) that primes the provider's language
1447
+ * model before user audio is streamed. Chunks accepted — the server buffers
1448
+ * until the session is READY and flushes. Must be sent BEFORE the first
1449
+ * `sendAudio()` to take effect. Only meaningful when
1450
+ * `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
1451
+ *
1452
+ * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
1453
+ */
1454
+ sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
1440
1455
  /**
1441
1456
  * Stop recording and wait for final transcript
1442
1457
  * The server will close the connection after sending the final transcript.
@@ -2175,6 +2175,11 @@ interface IRecognitionClientConfig {
2175
2175
  *
2176
2176
  * Main interface for real-time speech recognition clients.
2177
2177
  * Provides methods for connection management, audio streaming, and session control.
2178
+ *
2179
+ * NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
2180
+ * so any method added here must also be implemented (typically as a delegate) by
2181
+ * `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
2182
+ * compile time — do not work around the error, add the delegate.
2178
2183
  */
2179
2184
  interface IRecognitionClient {
2180
2185
  /**
@@ -2206,6 +2211,16 @@ interface IRecognitionClient {
2206
2211
  * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
2207
2212
  */
2208
2213
  sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2214
+ /**
2215
+ * Send prefix audio (e.g. a TTS prompt) that primes the provider's language
2216
+ * model before user audio is streamed. Chunks accepted — the server buffers
2217
+ * until the session is READY and flushes. Must be sent BEFORE the first
2218
+ * `sendAudio()` to take effect. Only meaningful when
2219
+ * `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
2220
+ *
2221
+ * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
2222
+ */
2223
+ sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2209
2224
  /**
2210
2225
  * Stop recording and wait for final transcript
2211
2226
  * The server will close the connection after sending the final transcript.
@@ -2746,7 +2761,42 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2746
2761
  finalConfidence: z.ZodOptional<z.ZodNumber>;
2747
2762
  voiceEnd: z.ZodOptional<z.ZodNumber>;
2748
2763
  lastNonSilence: z.ZodOptional<z.ZodNumber>;
2764
+ accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
2749
2765
  asrConfig: z.ZodOptional<z.ZodString>;
2766
+ sessionConfigured: z.ZodOptional<z.ZodObject<{
2767
+ type: z.ZodLiteral<RecognitionResultTypeV1.SESSION_CONFIGURED>;
2768
+ audioUtteranceId: z.ZodString;
2769
+ provider: z.ZodOptional<z.ZodString>;
2770
+ model: z.ZodOptional<z.ZodString>;
2771
+ sampleRate: z.ZodOptional<z.ZodNumber>;
2772
+ encoding: z.ZodOptional<z.ZodString>;
2773
+ apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
2774
+ isFallback: z.ZodOptional<z.ZodBoolean>;
2775
+ asrRequest: z.ZodOptional<z.ZodString>;
2776
+ providerConfig: z.ZodOptional<z.ZodString>;
2777
+ }, "strip", z.ZodTypeAny, {
2778
+ type: RecognitionResultTypeV1.SESSION_CONFIGURED;
2779
+ audioUtteranceId: string;
2780
+ provider?: string | undefined;
2781
+ model?: string | undefined;
2782
+ sampleRate?: number | undefined;
2783
+ encoding?: string | undefined;
2784
+ apiType?: ASRApiType | undefined;
2785
+ isFallback?: boolean | undefined;
2786
+ asrRequest?: string | undefined;
2787
+ providerConfig?: string | undefined;
2788
+ }, {
2789
+ type: RecognitionResultTypeV1.SESSION_CONFIGURED;
2790
+ audioUtteranceId: string;
2791
+ provider?: string | undefined;
2792
+ model?: string | undefined;
2793
+ sampleRate?: number | undefined;
2794
+ encoding?: string | undefined;
2795
+ apiType?: ASRApiType | undefined;
2796
+ isFallback?: boolean | undefined;
2797
+ asrRequest?: string | undefined;
2798
+ providerConfig?: string | undefined;
2799
+ }>>;
2750
2800
  startRecordingTimestamp: z.ZodOptional<z.ZodString>;
2751
2801
  finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
2752
2802
  finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
@@ -2756,6 +2806,28 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2756
2806
  functionCallConfidence: z.ZodOptional<z.ZodNumber>;
2757
2807
  finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
2758
2808
  promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
2809
+ promptSTT: z.ZodOptional<z.ZodString>;
2810
+ promptSTF: z.ZodOptional<z.ZodString>;
2811
+ promptTTF: z.ZodOptional<z.ZodString>;
2812
+ detections: z.ZodOptional<z.ZodArray<z.ZodObject<{
2813
+ type: z.ZodNativeEnum<typeof DetectionTypeV1>;
2814
+ query: z.ZodString;
2815
+ score: z.ZodNumber;
2816
+ startMs: z.ZodOptional<z.ZodNumber>;
2817
+ endMs: z.ZodOptional<z.ZodNumber>;
2818
+ }, "strip", z.ZodTypeAny, {
2819
+ type: DetectionTypeV1;
2820
+ query: string;
2821
+ score: number;
2822
+ startMs?: number | undefined;
2823
+ endMs?: number | undefined;
2824
+ }, {
2825
+ type: DetectionTypeV1;
2826
+ query: string;
2827
+ score: number;
2828
+ startMs?: number | undefined;
2829
+ endMs?: number | undefined;
2830
+ }>, "many">>;
2759
2831
  recognitionActionProcessingState: z.ZodOptional<z.ZodString>;
2760
2832
  }, "strip", z.ZodTypeAny, {
2761
2833
  audioUtteranceId: string;
@@ -2766,7 +2838,20 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2766
2838
  finalConfidence?: number | undefined;
2767
2839
  voiceEnd?: number | undefined;
2768
2840
  lastNonSilence?: number | undefined;
2841
+ accumulatedAudioTimeMs?: number | undefined;
2769
2842
  asrConfig?: string | undefined;
2843
+ sessionConfigured?: {
2844
+ type: RecognitionResultTypeV1.SESSION_CONFIGURED;
2845
+ audioUtteranceId: string;
2846
+ provider?: string | undefined;
2847
+ model?: string | undefined;
2848
+ sampleRate?: number | undefined;
2849
+ encoding?: string | undefined;
2850
+ apiType?: ASRApiType | undefined;
2851
+ isFallback?: boolean | undefined;
2852
+ asrRequest?: string | undefined;
2853
+ providerConfig?: string | undefined;
2854
+ } | undefined;
2770
2855
  startRecordingTimestamp?: string | undefined;
2771
2856
  finalRecordingTimestamp?: string | undefined;
2772
2857
  finalTranscriptionTimestamp?: string | undefined;
@@ -2775,6 +2860,16 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2775
2860
  functionCallConfidence?: number | undefined;
2776
2861
  finalFunctionCallTimestamp?: string | undefined;
2777
2862
  promptSlotMap?: Record<string, string[]> | undefined;
2863
+ promptSTT?: string | undefined;
2864
+ promptSTF?: string | undefined;
2865
+ promptTTF?: string | undefined;
2866
+ detections?: {
2867
+ type: DetectionTypeV1;
2868
+ query: string;
2869
+ score: number;
2870
+ startMs?: number | undefined;
2871
+ endMs?: number | undefined;
2872
+ }[] | undefined;
2778
2873
  recognitionActionProcessingState?: string | undefined;
2779
2874
  }, {
2780
2875
  audioUtteranceId: string;
@@ -2784,7 +2879,20 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2784
2879
  finalConfidence?: number | undefined;
2785
2880
  voiceEnd?: number | undefined;
2786
2881
  lastNonSilence?: number | undefined;
2882
+ accumulatedAudioTimeMs?: number | undefined;
2787
2883
  asrConfig?: string | undefined;
2884
+ sessionConfigured?: {
2885
+ type: RecognitionResultTypeV1.SESSION_CONFIGURED;
2886
+ audioUtteranceId: string;
2887
+ provider?: string | undefined;
2888
+ model?: string | undefined;
2889
+ sampleRate?: number | undefined;
2890
+ encoding?: string | undefined;
2891
+ apiType?: ASRApiType | undefined;
2892
+ isFallback?: boolean | undefined;
2893
+ asrRequest?: string | undefined;
2894
+ providerConfig?: string | undefined;
2895
+ } | undefined;
2788
2896
  startRecordingTimestamp?: string | undefined;
2789
2897
  finalRecordingTimestamp?: string | undefined;
2790
2898
  finalTranscriptionTimestamp?: string | undefined;
@@ -2794,6 +2902,16 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2794
2902
  functionCallConfidence?: number | undefined;
2795
2903
  finalFunctionCallTimestamp?: string | undefined;
2796
2904
  promptSlotMap?: Record<string, string[]> | undefined;
2905
+ promptSTT?: string | undefined;
2906
+ promptSTF?: string | undefined;
2907
+ promptTTF?: string | undefined;
2908
+ detections?: {
2909
+ type: DetectionTypeV1;
2910
+ query: string;
2911
+ score: number;
2912
+ startMs?: number | undefined;
2913
+ endMs?: number | undefined;
2914
+ }[] | undefined;
2797
2915
  recognitionActionProcessingState?: string | undefined;
2798
2916
  }>;
2799
2917
  type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
@@ -2843,102 +2961,22 @@ interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
2843
2961
  /**
2844
2962
  * Interface for SimplifiedVGFRecognitionClient
2845
2963
  *
2846
- * A simplified client that maintains VGF state for game developers.
2847
- * All methods from the underlying client are available, plus VGF state management.
2964
+ * Inherits the full IRecognitionClient surface (connect, sendAudio,
2965
+ * sendAudioWithSampleRate, sendPrefixAudio, stopRecording, stopAbnormally,
2966
+ * status checks, sendGameContext, getStats, getUrl, getState, getAudioUtteranceId)
2967
+ * — see recognition-client.types.ts for those. Adds VGF-specific state access.
2968
+ *
2969
+ * Extending IRecognitionClient (rather than redeclaring methods) means
2970
+ * TypeScript catches any base-client method that's not delegated by the
2971
+ * VGF wrapper at compile time — keeps the two surfaces in sync.
2848
2972
  */
2849
- interface ISimplifiedVGFRecognitionClient {
2973
+ interface ISimplifiedVGFRecognitionClient extends IRecognitionClient {
2850
2974
  /**
2851
- * Connect to the recognition service WebSocket
2852
- * @returns Promise that resolves when connected and ready
2853
- */
2854
- connect(): Promise<void>;
2855
- /**
2856
- * Send audio data for transcription
2857
- * @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
2858
- */
2859
- sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2860
- /**
2861
- * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
2862
- * downsamples to the session's target rate before transmitting. Use
2863
- * when capture is at the system's native rate (browser AudioContext is
2864
- * typically 44.1 kHz or 48 kHz). Audio must be signed 16-bit
2865
- * little-endian PCM, mono.
2866
- */
2867
- sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2868
- /**
2869
- * Stop recording and wait for final transcription
2870
- * @returns Promise that resolves when transcription is complete
2871
- */
2872
- stopRecording(): Promise<void>;
2873
- /**
2874
- * Force stop and immediately close connection without waiting for server
2875
- *
2876
- * WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
2877
- * - Does NOT wait for server to process remaining audio
2878
- * - Does NOT receive final transcript from server (VGF state set to empty)
2879
- * - Immediately closes WebSocket connection
2880
- * - Cleans up resources (buffers, listeners)
2881
- *
2882
- * Use Cases:
2883
- * - User explicitly cancels/abandons the session
2884
- * - Timeout scenarios where waiting is not acceptable
2885
- * - Need immediate cleanup and can't wait for server
2886
- *
2887
- * RECOMMENDED: Use stopRecording() for normal shutdown.
2888
- * Only use this when immediate disconnection is required.
2889
- */
2890
- stopAbnormally(): void;
2891
- /**
2892
- * Get the current VGF recognition state
2975
+ * Get the current VGF recognition state — the single shared store
2976
+ * of inputs and outputs for this utterance.
2893
2977
  * @returns Current RecognitionState with all transcription data
2894
2978
  */
2895
2979
  getVGFState(): RecognitionState;
2896
- /**
2897
- * Check if connected to the WebSocket
2898
- */
2899
- isConnected(): boolean;
2900
- /**
2901
- * Check if currently connecting
2902
- */
2903
- isConnecting(): boolean;
2904
- /**
2905
- * Check if currently stopping
2906
- */
2907
- isStopping(): boolean;
2908
- /**
2909
- * Check if transcription has finished
2910
- */
2911
- isTranscriptionFinished(): boolean;
2912
- /**
2913
- * Check if the audio buffer has overflowed
2914
- */
2915
- isBufferOverflowing(): boolean;
2916
- /**
2917
- * Send game context after connection is established (for preconnect flow).
2918
- *
2919
- * Preconnect flow: Create client with asrRequestConfig (useContext: true) but
2920
- * WITHOUT gameContext → call connect() → later call sendGameContext() with slotMap.
2921
- *
2922
- * @param context - Game context including slotMap for keyword boosting
2923
- */
2924
- sendGameContext(context: GameContextV1): void;
2925
- /**
2926
- * Check if server has sent READY signal (provider connected, ready for audio).
2927
- * In preconnect flow, this becomes true after sendGameContext() triggers provider attachment.
2928
- */
2929
- isServerReady(): boolean;
2930
- /**
2931
- * Get the audio utterance ID for this session
2932
- */
2933
- getAudioUtteranceId(): string;
2934
- /**
2935
- * Get the WebSocket URL being used
2936
- */
2937
- getUrl(): string;
2938
- /**
2939
- * Get the underlying client state (for advanced usage)
2940
- */
2941
- getState(): ClientState;
2942
2980
  }
2943
2981
  /**
2944
2982
  * This wrapper ONLY maintains VGF state as a sink.
@@ -2956,6 +2994,8 @@ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitio
2956
2994
  connect(): Promise<void>;
2957
2995
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2958
2996
  sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2997
+ sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2998
+ getStats(): IRecognitionClientStats;
2959
2999
  /**
2960
3000
  * Set VGF recording status to RECORDING on the first audio chunk.
2961
3001
  * Idempotent — subsequent calls are no-ops until disconnect/stop resets
package/dist/index.js CHANGED
@@ -6435,9 +6435,15 @@ var RecognitionVGFStateSchema = z.object({
6435
6435
  // voice end time identified by ASR
6436
6436
  lastNonSilence: z.number().optional(),
6437
6437
  // last non-silence sample time from PCM analysis
6438
+ accumulatedAudioTimeMs: z.number().optional(),
6439
+ // total user audio time watermark (ms) — mirrors TranscriptionResultV1.accumulatedAudioTimeMs
6438
6440
  // Tracking-only metadata
6439
6441
  asrConfig: z.string().optional(),
6440
- // Json format of the ASR config
6442
+ // Json format of the *requested* ASR config (set once at construction).
6443
+ // For the *resolved* truth — actual provider/model/sampleRate/encoding/apiType/isFallback chosen by the
6444
+ // server after circuit-breaker/fallback — see `sessionConfigured` below.
6445
+ sessionConfigured: SessionConfiguredSchemaV1.optional(),
6446
+ // Mirrors the SessionConfiguredV1 message; populated when the server emits it (before audio streams).
6441
6447
  startRecordingTimestamp: z.string().optional(),
6442
6448
  // Start of recording. Immutable after set.
6443
6449
  finalRecordingTimestamp: z.string().optional(),
@@ -6458,6 +6464,17 @@ var RecognitionVGFStateSchema = z.object({
6458
6464
  // Support for prompt slot mapping - passed to recognition context when present
6459
6465
  promptSlotMap: z.record(z.string(), z.array(z.string())).optional(),
6460
6466
  // Optional map of slot names to prompt values for recognition context
6467
+ // Optional prompt inputs - when set, forwarded into GameContext at client creation.
6468
+ // Mirror the GameContextV1 fields: STT (ASR keywords/keyterms), STF (speech->function), TTF (text->function).
6469
+ promptSTT: z.string().optional(),
6470
+ promptSTF: z.string().optional(),
6471
+ promptTTF: z.string().optional(),
6472
+ // Provider-reported phrase detections from the last transcript message.
6473
+ // Mirrors TranscriptionResultV1.detections — a heterogeneous list keyed by DetectionTypeV1
6474
+ // (today only 'search' from Deepgram; future entries may include keywords/keyterms/speech_contexts).
6475
+ // Sorted by `score` descending by the server (see deepgram/message-handlers/v1/transform-transcript.ts
6476
+ // and provider-to-recognition-transformer.ts), so [0] is the top hit — no client-side re-rank needed.
6477
+ detections: z.array(DetectionV1Schema).optional(),
6461
6478
  // Recognition action processing state - managed externally, SDK preserves but never modifies
6462
6479
  recognitionActionProcessingState: z.string().optional()
6463
6480
  // "NOT_STARTED", "IN_PROGRESS", "COMPLETED"
@@ -6529,6 +6546,9 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
6529
6546
  if (result.lastNonSilence !== void 0) {
6530
6547
  newState.lastNonSilence = result.lastNonSilence;
6531
6548
  }
6549
+ if (result.accumulatedAudioTimeMs !== void 0) {
6550
+ newState.accumulatedAudioTimeMs = result.accumulatedAudioTimeMs;
6551
+ }
6532
6552
  } else {
6533
6553
  newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
6534
6554
  newState.finalTranscript = result.finalTranscript || "";
@@ -6542,12 +6562,24 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
6542
6562
  if (result.lastNonSilence !== void 0) {
6543
6563
  newState.lastNonSilence = result.lastNonSilence;
6544
6564
  }
6565
+ if (result.accumulatedAudioTimeMs !== void 0) {
6566
+ newState.accumulatedAudioTimeMs = result.accumulatedAudioTimeMs;
6567
+ }
6545
6568
  newState.pendingTranscript = "";
6546
6569
  newState.pendingConfidence = void 0;
6547
6570
  }
6571
+ if (result.detections !== void 0) {
6572
+ newState.detections = result.detections;
6573
+ }
6548
6574
  return newState;
6549
6575
  }
6550
- function mapErrorToState(currentState, error) {
6576
+ function mapSessionConfiguredToState(currentState, sessionConfigured) {
6577
+ return {
6578
+ ...currentState,
6579
+ sessionConfigured
6580
+ };
6581
+ }
6582
+ function mapErrorToState(currentState) {
6551
6583
  return {
6552
6584
  ...currentState,
6553
6585
  transcriptionStatus: TranscriptionStatus.ERROR,
@@ -6579,7 +6611,10 @@ function resetRecognitionVGFState(currentState) {
6579
6611
  recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
6580
6612
  finalTranscript: void 0,
6581
6613
  voiceEnd: void 0,
6582
- lastNonSilence: void 0
6614
+ lastNonSilence: void 0,
6615
+ accumulatedAudioTimeMs: void 0,
6616
+ detections: void 0,
6617
+ sessionConfigured: void 0
6583
6618
  };
6584
6619
  }
6585
6620
  function generateUUID() {
@@ -6624,16 +6659,28 @@ var SimplifiedVGFRecognitionClient = class {
6624
6659
  }
6625
6660
  this.state = { ...this.state, startRecordingStatus: "READY" };
6626
6661
  this.expectedUuid = this.state.audioUtteranceId;
6627
- if (this.state.promptSlotMap) {
6662
+ const hasPromptInputs = this.state.promptSlotMap !== void 0 || this.state.promptSTT !== void 0 || this.state.promptSTF !== void 0 || this.state.promptTTF !== void 0;
6663
+ if (hasPromptInputs) {
6628
6664
  if (clientConfig.asrRequestConfig) {
6629
6665
  clientConfig.asrRequestConfig.useContext = true;
6630
6666
  }
6631
6667
  if (!clientConfig.gameContext) {
6632
6668
  if (clientConfig.logger) {
6633
- clientConfig.logger("warn", "[VGF] promptSlotMap found but no gameContext provided. SlotMap will not be sent.");
6669
+ clientConfig.logger("warn", "[VGF] prompt inputs found but no gameContext provided. They will not be sent.");
6634
6670
  }
6635
6671
  } else {
6636
- clientConfig.gameContext.slotMap = this.state.promptSlotMap;
6672
+ if (this.state.promptSlotMap !== void 0) {
6673
+ clientConfig.gameContext.slotMap = this.state.promptSlotMap;
6674
+ }
6675
+ if (this.state.promptSTT !== void 0) {
6676
+ clientConfig.gameContext.promptSTT = this.state.promptSTT;
6677
+ }
6678
+ if (this.state.promptSTF !== void 0) {
6679
+ clientConfig.gameContext.promptSTF = this.state.promptSTF;
6680
+ }
6681
+ if (this.state.promptTTF !== void 0) {
6682
+ clientConfig.gameContext.promptTTF = this.state.promptTTF;
6683
+ }
6637
6684
  }
6638
6685
  }
6639
6686
  this.client = new RealTimeTwoWayWebSocketRecognitionClient({
@@ -6669,6 +6716,22 @@ var SimplifiedVGFRecognitionClient = class {
6669
6716
  clientConfig.onMetadata(metadata);
6670
6717
  }
6671
6718
  },
6719
+ onSessionConfigured: (sessionConfigured) => {
6720
+ if (sessionConfigured.audioUtteranceId && sessionConfigured.audioUtteranceId !== this.expectedUuid) {
6721
+ if (this.logger) {
6722
+ this.logger(
6723
+ "warn",
6724
+ `[RecogSDK:VGF] Skipping sessionConfigured update: UUID mismatch (expected: ${this.expectedUuid}, got: ${sessionConfigured.audioUtteranceId})`
6725
+ );
6726
+ }
6727
+ return;
6728
+ }
6729
+ this.state = mapSessionConfiguredToState(this.state, sessionConfigured);
6730
+ this.notifyStateChange();
6731
+ if (clientConfig.onSessionConfigured) {
6732
+ clientConfig.onSessionConfigured(sessionConfigured);
6733
+ }
6734
+ },
6672
6735
  onFunctionCall: (result) => {
6673
6736
  if (clientConfig.onFunctionCall) {
6674
6737
  clientConfig.onFunctionCall(result);
@@ -6685,7 +6748,7 @@ var SimplifiedVGFRecognitionClient = class {
6685
6748
  return;
6686
6749
  }
6687
6750
  this.isRecordingAudio = false;
6688
- this.state = mapErrorToState(this.state, error);
6751
+ this.state = mapErrorToState(this.state);
6689
6752
  this.notifyStateChange();
6690
6753
  if (clientConfig.onError) {
6691
6754
  clientConfig.onError(error);
@@ -6717,6 +6780,12 @@ var SimplifiedVGFRecognitionClient = class {
6717
6780
  this.markRecordingStarted();
6718
6781
  this.client.sendAudioWithSampleRate(audioData, sourceSampleRate);
6719
6782
  }
6783
+ sendPrefixAudio(audioData) {
6784
+ this.client.sendPrefixAudio(audioData);
6785
+ }
6786
+ getStats() {
6787
+ return this.client.getStats();
6788
+ }
6720
6789
  /**
6721
6790
  * Set VGF recording status to RECORDING on the first audio chunk.
6722
6791
  * Idempotent — subsequent calls are no-ops until disconnect/stop resets