@volley/recognition-client-sdk 0.1.799 → 0.1.803

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1406,6 +1406,11 @@ interface IRecognitionClientConfig {
1406
1406
  *
1407
1407
  * Main interface for real-time speech recognition clients.
1408
1408
  * Provides methods for connection management, audio streaming, and session control.
1409
+ *
1410
+ * NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
1411
+ * so any method added here must also be implemented (typically as a delegate) by
1412
+ * `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
1413
+ * compile time — do not work around the error, add the delegate.
1409
1414
  */
1410
1415
  interface IRecognitionClient {
1411
1416
  /**
@@ -1437,6 +1442,16 @@ interface IRecognitionClient {
1437
1442
  * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
1438
1443
  */
1439
1444
  sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
1445
+ /**
1446
+ * Send prefix audio (e.g. a TTS prompt) that primes the provider's language
1447
+ * model before user audio is streamed. Chunks accepted — the server buffers
1448
+ * until the session is READY and flushes. Must be sent BEFORE the first
1449
+ * `sendAudio()` to take effect. Only meaningful when
1450
+ * `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
1451
+ *
1452
+ * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
1453
+ */
1454
+ sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
1440
1455
  /**
1441
1456
  * Stop recording and wait for final transcript
1442
1457
  * The server will close the connection after sending the final transcript.
@@ -2175,6 +2175,11 @@ interface IRecognitionClientConfig {
2175
2175
  *
2176
2176
  * Main interface for real-time speech recognition clients.
2177
2177
  * Provides methods for connection management, audio streaming, and session control.
2178
+ *
2179
+ * NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
2180
+ * so any method added here must also be implemented (typically as a delegate) by
2181
+ * `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
2182
+ * compile time — do not work around the error, add the delegate.
2178
2183
  */
2179
2184
  interface IRecognitionClient {
2180
2185
  /**
@@ -2206,6 +2211,16 @@ interface IRecognitionClient {
2206
2211
  * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
2207
2212
  */
2208
2213
  sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2214
+ /**
2215
+ * Send prefix audio (e.g. a TTS prompt) that primes the provider's language
2216
+ * model before user audio is streamed. Chunks accepted — the server buffers
2217
+ * until the session is READY and flushes. Must be sent BEFORE the first
2218
+ * `sendAudio()` to take effect. Only meaningful when
2219
+ * `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
2220
+ *
2221
+ * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
2222
+ */
2223
+ sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2209
2224
  /**
2210
2225
  * Stop recording and wait for final transcript
2211
2226
  * The server will close the connection after sending the final transcript.
@@ -2746,7 +2761,42 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2746
2761
  finalConfidence: z.ZodOptional<z.ZodNumber>;
2747
2762
  voiceEnd: z.ZodOptional<z.ZodNumber>;
2748
2763
  lastNonSilence: z.ZodOptional<z.ZodNumber>;
2764
+ accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
2749
2765
  asrConfig: z.ZodOptional<z.ZodString>;
2766
+ sessionConfigured: z.ZodOptional<z.ZodObject<{
2767
+ type: z.ZodLiteral<RecognitionResultTypeV1.SESSION_CONFIGURED>;
2768
+ audioUtteranceId: z.ZodString;
2769
+ provider: z.ZodOptional<z.ZodString>;
2770
+ model: z.ZodOptional<z.ZodString>;
2771
+ sampleRate: z.ZodOptional<z.ZodNumber>;
2772
+ encoding: z.ZodOptional<z.ZodString>;
2773
+ apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
2774
+ isFallback: z.ZodOptional<z.ZodBoolean>;
2775
+ asrRequest: z.ZodOptional<z.ZodString>;
2776
+ providerConfig: z.ZodOptional<z.ZodString>;
2777
+ }, "strip", z.ZodTypeAny, {
2778
+ type: RecognitionResultTypeV1.SESSION_CONFIGURED;
2779
+ audioUtteranceId: string;
2780
+ provider?: string | undefined;
2781
+ model?: string | undefined;
2782
+ sampleRate?: number | undefined;
2783
+ encoding?: string | undefined;
2784
+ apiType?: ASRApiType | undefined;
2785
+ isFallback?: boolean | undefined;
2786
+ asrRequest?: string | undefined;
2787
+ providerConfig?: string | undefined;
2788
+ }, {
2789
+ type: RecognitionResultTypeV1.SESSION_CONFIGURED;
2790
+ audioUtteranceId: string;
2791
+ provider?: string | undefined;
2792
+ model?: string | undefined;
2793
+ sampleRate?: number | undefined;
2794
+ encoding?: string | undefined;
2795
+ apiType?: ASRApiType | undefined;
2796
+ isFallback?: boolean | undefined;
2797
+ asrRequest?: string | undefined;
2798
+ providerConfig?: string | undefined;
2799
+ }>>;
2750
2800
  startRecordingTimestamp: z.ZodOptional<z.ZodString>;
2751
2801
  finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
2752
2802
  finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
@@ -2755,7 +2805,31 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2755
2805
  functionCallMetadata: z.ZodOptional<z.ZodString>;
2756
2806
  functionCallConfidence: z.ZodOptional<z.ZodNumber>;
2757
2807
  finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
2808
+ gameId: z.ZodOptional<z.ZodString>;
2809
+ gamePhase: z.ZodOptional<z.ZodString>;
2758
2810
  promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
2811
+ promptSTT: z.ZodOptional<z.ZodString>;
2812
+ promptSTF: z.ZodOptional<z.ZodString>;
2813
+ promptTTF: z.ZodOptional<z.ZodString>;
2814
+ detections: z.ZodOptional<z.ZodArray<z.ZodObject<{
2815
+ type: z.ZodNativeEnum<typeof DetectionTypeV1>;
2816
+ query: z.ZodString;
2817
+ score: z.ZodNumber;
2818
+ startMs: z.ZodOptional<z.ZodNumber>;
2819
+ endMs: z.ZodOptional<z.ZodNumber>;
2820
+ }, "strip", z.ZodTypeAny, {
2821
+ type: DetectionTypeV1;
2822
+ query: string;
2823
+ score: number;
2824
+ startMs?: number | undefined;
2825
+ endMs?: number | undefined;
2826
+ }, {
2827
+ type: DetectionTypeV1;
2828
+ query: string;
2829
+ score: number;
2830
+ startMs?: number | undefined;
2831
+ endMs?: number | undefined;
2832
+ }>, "many">>;
2759
2833
  recognitionActionProcessingState: z.ZodOptional<z.ZodString>;
2760
2834
  }, "strip", z.ZodTypeAny, {
2761
2835
  audioUtteranceId: string;
@@ -2766,7 +2840,20 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2766
2840
  finalConfidence?: number | undefined;
2767
2841
  voiceEnd?: number | undefined;
2768
2842
  lastNonSilence?: number | undefined;
2843
+ accumulatedAudioTimeMs?: number | undefined;
2769
2844
  asrConfig?: string | undefined;
2845
+ sessionConfigured?: {
2846
+ type: RecognitionResultTypeV1.SESSION_CONFIGURED;
2847
+ audioUtteranceId: string;
2848
+ provider?: string | undefined;
2849
+ model?: string | undefined;
2850
+ sampleRate?: number | undefined;
2851
+ encoding?: string | undefined;
2852
+ apiType?: ASRApiType | undefined;
2853
+ isFallback?: boolean | undefined;
2854
+ asrRequest?: string | undefined;
2855
+ providerConfig?: string | undefined;
2856
+ } | undefined;
2770
2857
  startRecordingTimestamp?: string | undefined;
2771
2858
  finalRecordingTimestamp?: string | undefined;
2772
2859
  finalTranscriptionTimestamp?: string | undefined;
@@ -2774,7 +2861,19 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2774
2861
  functionCallMetadata?: string | undefined;
2775
2862
  functionCallConfidence?: number | undefined;
2776
2863
  finalFunctionCallTimestamp?: string | undefined;
2864
+ gameId?: string | undefined;
2865
+ gamePhase?: string | undefined;
2777
2866
  promptSlotMap?: Record<string, string[]> | undefined;
2867
+ promptSTT?: string | undefined;
2868
+ promptSTF?: string | undefined;
2869
+ promptTTF?: string | undefined;
2870
+ detections?: {
2871
+ type: DetectionTypeV1;
2872
+ query: string;
2873
+ score: number;
2874
+ startMs?: number | undefined;
2875
+ endMs?: number | undefined;
2876
+ }[] | undefined;
2778
2877
  recognitionActionProcessingState?: string | undefined;
2779
2878
  }, {
2780
2879
  audioUtteranceId: string;
@@ -2784,7 +2883,20 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2784
2883
  finalConfidence?: number | undefined;
2785
2884
  voiceEnd?: number | undefined;
2786
2885
  lastNonSilence?: number | undefined;
2886
+ accumulatedAudioTimeMs?: number | undefined;
2787
2887
  asrConfig?: string | undefined;
2888
+ sessionConfigured?: {
2889
+ type: RecognitionResultTypeV1.SESSION_CONFIGURED;
2890
+ audioUtteranceId: string;
2891
+ provider?: string | undefined;
2892
+ model?: string | undefined;
2893
+ sampleRate?: number | undefined;
2894
+ encoding?: string | undefined;
2895
+ apiType?: ASRApiType | undefined;
2896
+ isFallback?: boolean | undefined;
2897
+ asrRequest?: string | undefined;
2898
+ providerConfig?: string | undefined;
2899
+ } | undefined;
2788
2900
  startRecordingTimestamp?: string | undefined;
2789
2901
  finalRecordingTimestamp?: string | undefined;
2790
2902
  finalTranscriptionTimestamp?: string | undefined;
@@ -2793,7 +2905,19 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
2793
2905
  functionCallMetadata?: string | undefined;
2794
2906
  functionCallConfidence?: number | undefined;
2795
2907
  finalFunctionCallTimestamp?: string | undefined;
2908
+ gameId?: string | undefined;
2909
+ gamePhase?: string | undefined;
2796
2910
  promptSlotMap?: Record<string, string[]> | undefined;
2911
+ promptSTT?: string | undefined;
2912
+ promptSTF?: string | undefined;
2913
+ promptTTF?: string | undefined;
2914
+ detections?: {
2915
+ type: DetectionTypeV1;
2916
+ query: string;
2917
+ score: number;
2918
+ startMs?: number | undefined;
2919
+ endMs?: number | undefined;
2920
+ }[] | undefined;
2797
2921
  recognitionActionProcessingState?: string | undefined;
2798
2922
  }>;
2799
2923
  type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
@@ -2813,6 +2937,7 @@ declare const TranscriptionStatus: {
2813
2937
  };
2814
2938
  type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus];
2815
2939
  declare function createInitialRecognitionState(audioUtteranceId: string): RecognitionState;
2940
+ declare function isTerminal(state: Pick<RecognitionState, "transcriptionStatus">): boolean;
2816
2941
  declare function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean;
2817
2942
 
2818
2943
  /**
@@ -2843,102 +2968,22 @@ interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
2843
2968
  /**
2844
2969
  * Interface for SimplifiedVGFRecognitionClient
2845
2970
  *
2846
- * A simplified client that maintains VGF state for game developers.
2847
- * All methods from the underlying client are available, plus VGF state management.
2971
+ * Inherits the full IRecognitionClient surface (connect, sendAudio,
2972
+ * sendAudioWithSampleRate, sendPrefixAudio, stopRecording, stopAbnormally,
2973
+ * status checks, sendGameContext, getStats, getUrl, getState, getAudioUtteranceId)
2974
+ * — see recognition-client.types.ts for those. Adds VGF-specific state access.
2975
+ *
2976
+ * Extending IRecognitionClient (rather than redeclaring methods) means
2977
+ * TypeScript catches any base-client method that's not delegated by the
2978
+ * VGF wrapper at compile time — keeps the two surfaces in sync.
2848
2979
  */
2849
- interface ISimplifiedVGFRecognitionClient {
2980
+ interface ISimplifiedVGFRecognitionClient extends IRecognitionClient {
2850
2981
  /**
2851
- * Connect to the recognition service WebSocket
2852
- * @returns Promise that resolves when connected and ready
2853
- */
2854
- connect(): Promise<void>;
2855
- /**
2856
- * Send audio data for transcription
2857
- * @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
2858
- */
2859
- sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2860
- /**
2861
- * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
2862
- * downsamples to the session's target rate before transmitting. Use
2863
- * when capture is at the system's native rate (browser AudioContext is
2864
- * typically 44.1 kHz or 48 kHz). Audio must be signed 16-bit
2865
- * little-endian PCM, mono.
2866
- */
2867
- sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
2868
- /**
2869
- * Stop recording and wait for final transcription
2870
- * @returns Promise that resolves when transcription is complete
2871
- */
2872
- stopRecording(): Promise<void>;
2873
- /**
2874
- * Force stop and immediately close connection without waiting for server
2875
- *
2876
- * WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
2877
- * - Does NOT wait for server to process remaining audio
2878
- * - Does NOT receive final transcript from server (VGF state set to empty)
2879
- * - Immediately closes WebSocket connection
2880
- * - Cleans up resources (buffers, listeners)
2881
- *
2882
- * Use Cases:
2883
- * - User explicitly cancels/abandons the session
2884
- * - Timeout scenarios where waiting is not acceptable
2885
- * - Need immediate cleanup and can't wait for server
2886
- *
2887
- * RECOMMENDED: Use stopRecording() for normal shutdown.
2888
- * Only use this when immediate disconnection is required.
2889
- */
2890
- stopAbnormally(): void;
2891
- /**
2892
- * Get the current VGF recognition state
2982
+ * Get the current VGF recognition state — the single shared store
2983
+ * of inputs and outputs for this utterance.
2893
2984
  * @returns Current RecognitionState with all transcription data
2894
2985
  */
2895
2986
  getVGFState(): RecognitionState;
2896
- /**
2897
- * Check if connected to the WebSocket
2898
- */
2899
- isConnected(): boolean;
2900
- /**
2901
- * Check if currently connecting
2902
- */
2903
- isConnecting(): boolean;
2904
- /**
2905
- * Check if currently stopping
2906
- */
2907
- isStopping(): boolean;
2908
- /**
2909
- * Check if transcription has finished
2910
- */
2911
- isTranscriptionFinished(): boolean;
2912
- /**
2913
- * Check if the audio buffer has overflowed
2914
- */
2915
- isBufferOverflowing(): boolean;
2916
- /**
2917
- * Send game context after connection is established (for preconnect flow).
2918
- *
2919
- * Preconnect flow: Create client with asrRequestConfig (useContext: true) but
2920
- * WITHOUT gameContext → call connect() → later call sendGameContext() with slotMap.
2921
- *
2922
- * @param context - Game context including slotMap for keyword boosting
2923
- */
2924
- sendGameContext(context: GameContextV1): void;
2925
- /**
2926
- * Check if server has sent READY signal (provider connected, ready for audio).
2927
- * In preconnect flow, this becomes true after sendGameContext() triggers provider attachment.
2928
- */
2929
- isServerReady(): boolean;
2930
- /**
2931
- * Get the audio utterance ID for this session
2932
- */
2933
- getAudioUtteranceId(): string;
2934
- /**
2935
- * Get the WebSocket URL being used
2936
- */
2937
- getUrl(): string;
2938
- /**
2939
- * Get the underlying client state (for advanced usage)
2940
- */
2941
- getState(): ClientState;
2942
2987
  }
2943
2988
  /**
2944
2989
  * This wrapper ONLY maintains VGF state as a sink.
@@ -2956,6 +3001,8 @@ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitio
2956
3001
  connect(): Promise<void>;
2957
3002
  sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
2958
3003
  sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
3004
+ sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
3005
+ getStats(): IRecognitionClientStats;
2959
3006
  /**
2960
3007
  * Set VGF recording status to RECORDING on the first audio chunk.
2961
3008
  * Idempotent — subsequent calls are no-ops until disconnect/stop resets
@@ -2975,7 +3022,6 @@ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitio
2975
3022
  sendGameContext(context: GameContextV1): void;
2976
3023
  isServerReady(): boolean;
2977
3024
  getVGFState(): RecognitionState;
2978
- private isTerminalStatus;
2979
3025
  private notifyStateChange;
2980
3026
  }
2981
3027
  /**
@@ -3088,5 +3134,5 @@ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null |
3088
3134
  declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
3089
3135
  declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
3090
3136
 
3091
- export { AmazonNovaSonicModel, AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
3137
+ export { AmazonNovaSonicModel, AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isTerminal, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
3092
3138
  export type { ASRRequestConfig, ASRRequestV1, AudioMetricsResultV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
package/dist/index.d.ts CHANGED
@@ -7,7 +7,7 @@ export { ErrorTypeV1 } from '@recog/shared-types';
7
7
  export type { RecognitionException, ConnectionException, TimeoutException, ValidationException, AuthenticationException, ProviderException, QuotaExceededException, UnknownException } from '@recog/shared-types';
8
8
  export { isExceptionImmediatelyAvailable, getUserFriendlyMessage } from '@recog/shared-types';
9
9
  export { SimplifiedVGFRecognitionClient, createSimplifiedVGFClient, type ISimplifiedVGFRecognitionClient, type SimplifiedVGFClientConfig } from './simplified-vgf-recognition-client.js';
10
- export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, TranscriptionStatus, type RecordingStatusType, type TranscriptionStatusType, createInitialRecognitionState, isValidRecordingStatusTransition } from './vgf-recognition-state.js';
10
+ export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, TranscriptionStatus, type RecordingStatusType, type TranscriptionStatusType, createInitialRecognitionState, isTerminal, isValidRecordingStatusTransition } from './vgf-recognition-state.js';
11
11
  export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
12
12
  export { AudioEncoding } from '@recog/websocket';
13
13
  export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,oBAAoB,EACpB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,UAAU,EACV,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,oBAAoB,EACpB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
package/dist/index.js CHANGED
@@ -6435,9 +6435,15 @@ var RecognitionVGFStateSchema = z.object({
6435
6435
  // voice end time identified by ASR
6436
6436
  lastNonSilence: z.number().optional(),
6437
6437
  // last non-silence sample time from PCM analysis
6438
+ accumulatedAudioTimeMs: z.number().optional(),
6439
+ // total user audio time watermark (ms) — mirrors TranscriptionResultV1.accumulatedAudioTimeMs
6438
6440
  // Tracking-only metadata
6439
6441
  asrConfig: z.string().optional(),
6440
- // Json format of the ASR config
6442
+ // Json format of the *requested* ASR config (set once at construction).
6443
+ // For the *resolved* truth — actual provider/model/sampleRate/encoding/apiType/isFallback chosen by the
6444
+ // server after circuit-breaker/fallback — see `sessionConfigured` below.
6445
+ sessionConfigured: SessionConfiguredSchemaV1.optional(),
6446
+ // Mirrors the SessionConfiguredV1 message; populated when the server emits it (before audio streams).
6441
6447
  startRecordingTimestamp: z.string().optional(),
6442
6448
  // Start of recording. Immutable after set.
6443
6449
  finalRecordingTimestamp: z.string().optional(),
@@ -6455,9 +6461,28 @@ var RecognitionVGFStateSchema = z.object({
6455
6461
  // Confidence score for the function call.
6456
6462
  finalFunctionCallTimestamp: z.string().optional(),
6457
6463
  // When the final action after interpreting the transcript was taken. Immutable.
6464
+ // Session identity — when set, the VGF client backfills these into
6465
+ // GameContextV1 if the caller didn't pass a `gameContext` in config.
6466
+ // Lets RecognitionState be the single source of truth: server seeds
6467
+ // `gameId` + `gamePhase` + `promptSlotMap` per player, controller passes
6468
+ // the whole state as `initialState`, no separate `gameContext` needed.
6469
+ // Backward-compatible: if `gameContext` is also passed in config, it wins.
6470
+ gameId: z.string().optional(),
6471
+ gamePhase: z.string().optional(),
6458
6472
  // Support for prompt slot mapping - passed to recognition context when present
6459
6473
  promptSlotMap: z.record(z.string(), z.array(z.string())).optional(),
6460
6474
  // Optional map of slot names to prompt values for recognition context
6475
+ // Optional prompt inputs - when set, forwarded into GameContext at client creation.
6476
+ // Mirror the GameContextV1 fields: STT (ASR keywords/keyterms), STF (speech->function), TTF (text->function).
6477
+ promptSTT: z.string().optional(),
6478
+ promptSTF: z.string().optional(),
6479
+ promptTTF: z.string().optional(),
6480
+ // Provider-reported phrase detections from the last transcript message.
6481
+ // Mirrors TranscriptionResultV1.detections — a heterogeneous list keyed by DetectionTypeV1
6482
+ // (today only 'search' from Deepgram; future entries may include keywords/keyterms/speech_contexts).
6483
+ // Sorted by `score` descending by the server (see deepgram/message-handlers/v1/transform-transcript.ts
6484
+ // and provider-to-recognition-transformer.ts), so [0] is the top hit — no client-side re-rank needed.
6485
+ detections: z.array(DetectionV1Schema).optional(),
6461
6486
  // Recognition action processing state - managed externally, SDK preserves but never modifies
6462
6487
  recognitionActionProcessingState: z.string().optional()
6463
6488
  // "NOT_STARTED", "IN_PROGRESS", "COMPLETED"
@@ -6490,6 +6515,9 @@ function createInitialRecognitionState(audioUtteranceId) {
6490
6515
  recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED
6491
6516
  };
6492
6517
  }
6518
+ function isTerminal(state) {
6519
+ return state.transcriptionStatus === TranscriptionStatus.FINALIZED || state.transcriptionStatus === TranscriptionStatus.ABORTED || state.transcriptionStatus === TranscriptionStatus.ERROR;
6520
+ }
6493
6521
  function isValidRecordingStatusTransition(from, to) {
6494
6522
  const statusOrder = [
6495
6523
  RecordingStatus.NOT_READY,
@@ -6529,6 +6557,9 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
6529
6557
  if (result.lastNonSilence !== void 0) {
6530
6558
  newState.lastNonSilence = result.lastNonSilence;
6531
6559
  }
6560
+ if (result.accumulatedAudioTimeMs !== void 0) {
6561
+ newState.accumulatedAudioTimeMs = result.accumulatedAudioTimeMs;
6562
+ }
6532
6563
  } else {
6533
6564
  newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
6534
6565
  newState.finalTranscript = result.finalTranscript || "";
@@ -6542,12 +6573,24 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
6542
6573
  if (result.lastNonSilence !== void 0) {
6543
6574
  newState.lastNonSilence = result.lastNonSilence;
6544
6575
  }
6576
+ if (result.accumulatedAudioTimeMs !== void 0) {
6577
+ newState.accumulatedAudioTimeMs = result.accumulatedAudioTimeMs;
6578
+ }
6545
6579
  newState.pendingTranscript = "";
6546
6580
  newState.pendingConfidence = void 0;
6547
6581
  }
6582
+ if (result.detections !== void 0) {
6583
+ newState.detections = result.detections;
6584
+ }
6548
6585
  return newState;
6549
6586
  }
6550
- function mapErrorToState(currentState, error) {
6587
+ function mapSessionConfiguredToState(currentState, sessionConfigured) {
6588
+ return {
6589
+ ...currentState,
6590
+ sessionConfigured
6591
+ };
6592
+ }
6593
+ function mapErrorToState(currentState) {
6551
6594
  return {
6552
6595
  ...currentState,
6553
6596
  transcriptionStatus: TranscriptionStatus.ERROR,
@@ -6579,7 +6622,10 @@ function resetRecognitionVGFState(currentState) {
6579
6622
  recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
6580
6623
  finalTranscript: void 0,
6581
6624
  voiceEnd: void 0,
6582
- lastNonSilence: void 0
6625
+ lastNonSilence: void 0,
6626
+ accumulatedAudioTimeMs: void 0,
6627
+ detections: void 0,
6628
+ sessionConfigured: void 0
6583
6629
  };
6584
6630
  }
6585
6631
  function generateUUID() {
@@ -6624,16 +6670,35 @@ var SimplifiedVGFRecognitionClient = class {
6624
6670
  }
6625
6671
  this.state = { ...this.state, startRecordingStatus: "READY" };
6626
6672
  this.expectedUuid = this.state.audioUtteranceId;
6627
- if (this.state.promptSlotMap) {
6673
+ if (!clientConfig.gameContext && this.state.gameId && this.state.gamePhase) {
6674
+ clientConfig.gameContext = {
6675
+ type: RecognitionContextTypeV1.GAME_CONTEXT,
6676
+ gameId: this.state.gameId,
6677
+ gamePhase: this.state.gamePhase
6678
+ };
6679
+ }
6680
+ const hasPromptInputs = this.state.promptSlotMap !== void 0 || this.state.promptSTT !== void 0 || this.state.promptSTF !== void 0 || this.state.promptTTF !== void 0;
6681
+ if (hasPromptInputs) {
6628
6682
  if (clientConfig.asrRequestConfig) {
6629
6683
  clientConfig.asrRequestConfig.useContext = true;
6630
6684
  }
6631
6685
  if (!clientConfig.gameContext) {
6632
6686
  if (clientConfig.logger) {
6633
- clientConfig.logger("warn", "[VGF] promptSlotMap found but no gameContext provided. SlotMap will not be sent.");
6687
+ clientConfig.logger("warn", "[VGF] prompt inputs found but no gameContext provided and state has no gameId/gamePhase. They will not be sent.");
6634
6688
  }
6635
6689
  } else {
6636
- clientConfig.gameContext.slotMap = this.state.promptSlotMap;
6690
+ if (this.state.promptSlotMap !== void 0) {
6691
+ clientConfig.gameContext.slotMap = this.state.promptSlotMap;
6692
+ }
6693
+ if (this.state.promptSTT !== void 0) {
6694
+ clientConfig.gameContext.promptSTT = this.state.promptSTT;
6695
+ }
6696
+ if (this.state.promptSTF !== void 0) {
6697
+ clientConfig.gameContext.promptSTF = this.state.promptSTF;
6698
+ }
6699
+ if (this.state.promptTTF !== void 0) {
6700
+ clientConfig.gameContext.promptTTF = this.state.promptTTF;
6701
+ }
6637
6702
  }
6638
6703
  }
6639
6704
  this.client = new RealTimeTwoWayWebSocketRecognitionClient({
@@ -6669,6 +6734,22 @@ var SimplifiedVGFRecognitionClient = class {
6669
6734
  clientConfig.onMetadata(metadata);
6670
6735
  }
6671
6736
  },
6737
+ onSessionConfigured: (sessionConfigured) => {
6738
+ if (sessionConfigured.audioUtteranceId && sessionConfigured.audioUtteranceId !== this.expectedUuid) {
6739
+ if (this.logger) {
6740
+ this.logger(
6741
+ "warn",
6742
+ `[RecogSDK:VGF] Skipping sessionConfigured update: UUID mismatch (expected: ${this.expectedUuid}, got: ${sessionConfigured.audioUtteranceId})`
6743
+ );
6744
+ }
6745
+ return;
6746
+ }
6747
+ this.state = mapSessionConfiguredToState(this.state, sessionConfigured);
6748
+ this.notifyStateChange();
6749
+ if (clientConfig.onSessionConfigured) {
6750
+ clientConfig.onSessionConfigured(sessionConfigured);
6751
+ }
6752
+ },
6672
6753
  onFunctionCall: (result) => {
6673
6754
  if (clientConfig.onFunctionCall) {
6674
6755
  clientConfig.onFunctionCall(result);
@@ -6685,7 +6766,7 @@ var SimplifiedVGFRecognitionClient = class {
6685
6766
  return;
6686
6767
  }
6687
6768
  this.isRecordingAudio = false;
6688
- this.state = mapErrorToState(this.state, error);
6769
+ this.state = mapErrorToState(this.state);
6689
6770
  this.notifyStateChange();
6690
6771
  if (clientConfig.onError) {
6691
6772
  clientConfig.onError(error);
@@ -6717,6 +6798,12 @@ var SimplifiedVGFRecognitionClient = class {
6717
6798
  this.markRecordingStarted();
6718
6799
  this.client.sendAudioWithSampleRate(audioData, sourceSampleRate);
6719
6800
  }
6801
+ sendPrefixAudio(audioData) {
6802
+ this.client.sendPrefixAudio(audioData);
6803
+ }
6804
+ getStats() {
6805
+ return this.client.getStats();
6806
+ }
6720
6807
  /**
6721
6808
  * Set VGF recording status to RECORDING on the first audio chunk.
6722
6809
  * Idempotent — subsequent calls are no-ops until disconnect/stop resets
@@ -6809,11 +6896,8 @@ var SimplifiedVGFRecognitionClient = class {
6809
6896
  getVGFState() {
6810
6897
  return { ...this.state };
6811
6898
  }
6812
- isTerminalStatus(status) {
6813
- return status === TranscriptionStatus.FINALIZED || status === TranscriptionStatus.ABORTED || status === TranscriptionStatus.ERROR;
6814
- }
6815
6899
  notifyStateChange() {
6816
- if (this.isTerminalStatus(this.state.transcriptionStatus)) {
6900
+ if (isTerminal(this.state)) {
6817
6901
  if (this.lastSentTerminalUuid === this.expectedUuid) {
6818
6902
  if (this.logger) {
6819
6903
  this.logger(
@@ -6899,6 +6983,7 @@ export {
6899
6983
  getUserFriendlyMessage,
6900
6984
  isExceptionImmediatelyAvailable,
6901
6985
  isNormalDisconnection,
6986
+ isTerminal,
6902
6987
  isValidRecordingStatusTransition,
6903
6988
  normalizeStage,
6904
6989
  resetRecognitionVGFState