@volley/recognition-client-sdk 0.1.799 → 0.1.803
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.bundled.d.ts +15 -0
- package/dist/index.bundled.d.ts +139 -93
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +96 -11
- package/dist/index.js.map +2 -2
- package/dist/recognition-client.types.d.ts +15 -0
- package/dist/recognition-client.types.d.ts.map +1 -1
- package/dist/simplified-vgf-recognition-client.d.ts +15 -94
- package/dist/simplified-vgf-recognition-client.d.ts.map +1 -1
- package/dist/vgf-recognition-mapper.d.ts +9 -17
- package/dist/vgf-recognition-mapper.d.ts.map +1 -1
- package/dist/vgf-recognition-state.d.ts +110 -0
- package/dist/vgf-recognition-state.d.ts.map +1 -1
- package/package.json +4 -4
- package/src/index.ts +1 -0
- package/src/recognition-client.types.ts +16 -0
- package/src/simplified-vgf-recognition-client.spec.ts +0 -27
- package/src/simplified-vgf-recognition-client.ts +84 -133
- package/src/vgf-recognition-mapper.spec.ts +143 -0
- package/src/vgf-recognition-mapper.ts +35 -45
- package/src/vgf-recognition-state.ts +44 -1
|
@@ -1406,6 +1406,11 @@ interface IRecognitionClientConfig {
|
|
|
1406
1406
|
*
|
|
1407
1407
|
* Main interface for real-time speech recognition clients.
|
|
1408
1408
|
* Provides methods for connection management, audio streaming, and session control.
|
|
1409
|
+
*
|
|
1410
|
+
* NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
|
|
1411
|
+
* so any method added here must also be implemented (typically as a delegate) by
|
|
1412
|
+
* `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
|
|
1413
|
+
* compile time — do not work around the error, add the delegate.
|
|
1409
1414
|
*/
|
|
1410
1415
|
interface IRecognitionClient {
|
|
1411
1416
|
/**
|
|
@@ -1437,6 +1442,16 @@ interface IRecognitionClient {
|
|
|
1437
1442
|
* @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
|
|
1438
1443
|
*/
|
|
1439
1444
|
sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
|
|
1445
|
+
/**
|
|
1446
|
+
* Send prefix audio (e.g. a TTS prompt) that primes the provider's language
|
|
1447
|
+
* model before user audio is streamed. Chunks accepted — the server buffers
|
|
1448
|
+
* until the session is READY and flushes. Must be sent BEFORE the first
|
|
1449
|
+
* `sendAudio()` to take effect. Only meaningful when
|
|
1450
|
+
* `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
|
|
1451
|
+
*
|
|
1452
|
+
* @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
|
|
1453
|
+
*/
|
|
1454
|
+
sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
1440
1455
|
/**
|
|
1441
1456
|
* Stop recording and wait for final transcript
|
|
1442
1457
|
* The server will close the connection after sending the final transcript.
|
package/dist/index.bundled.d.ts
CHANGED
|
@@ -2175,6 +2175,11 @@ interface IRecognitionClientConfig {
|
|
|
2175
2175
|
*
|
|
2176
2176
|
* Main interface for real-time speech recognition clients.
|
|
2177
2177
|
* Provides methods for connection management, audio streaming, and session control.
|
|
2178
|
+
*
|
|
2179
|
+
* NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
|
|
2180
|
+
* so any method added here must also be implemented (typically as a delegate) by
|
|
2181
|
+
* `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
|
|
2182
|
+
* compile time — do not work around the error, add the delegate.
|
|
2178
2183
|
*/
|
|
2179
2184
|
interface IRecognitionClient {
|
|
2180
2185
|
/**
|
|
@@ -2206,6 +2211,16 @@ interface IRecognitionClient {
|
|
|
2206
2211
|
* @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
|
|
2207
2212
|
*/
|
|
2208
2213
|
sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
|
|
2214
|
+
/**
|
|
2215
|
+
* Send prefix audio (e.g. a TTS prompt) that primes the provider's language
|
|
2216
|
+
* model before user audio is streamed. Chunks accepted — the server buffers
|
|
2217
|
+
* until the session is READY and flushes. Must be sent BEFORE the first
|
|
2218
|
+
* `sendAudio()` to take effect. Only meaningful when
|
|
2219
|
+
* `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
|
|
2220
|
+
*
|
|
2221
|
+
* @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
|
|
2222
|
+
*/
|
|
2223
|
+
sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
2209
2224
|
/**
|
|
2210
2225
|
* Stop recording and wait for final transcript
|
|
2211
2226
|
* The server will close the connection after sending the final transcript.
|
|
@@ -2746,7 +2761,42 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
2746
2761
|
finalConfidence: z.ZodOptional<z.ZodNumber>;
|
|
2747
2762
|
voiceEnd: z.ZodOptional<z.ZodNumber>;
|
|
2748
2763
|
lastNonSilence: z.ZodOptional<z.ZodNumber>;
|
|
2764
|
+
accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
|
|
2749
2765
|
asrConfig: z.ZodOptional<z.ZodString>;
|
|
2766
|
+
sessionConfigured: z.ZodOptional<z.ZodObject<{
|
|
2767
|
+
type: z.ZodLiteral<RecognitionResultTypeV1.SESSION_CONFIGURED>;
|
|
2768
|
+
audioUtteranceId: z.ZodString;
|
|
2769
|
+
provider: z.ZodOptional<z.ZodString>;
|
|
2770
|
+
model: z.ZodOptional<z.ZodString>;
|
|
2771
|
+
sampleRate: z.ZodOptional<z.ZodNumber>;
|
|
2772
|
+
encoding: z.ZodOptional<z.ZodString>;
|
|
2773
|
+
apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
|
|
2774
|
+
isFallback: z.ZodOptional<z.ZodBoolean>;
|
|
2775
|
+
asrRequest: z.ZodOptional<z.ZodString>;
|
|
2776
|
+
providerConfig: z.ZodOptional<z.ZodString>;
|
|
2777
|
+
}, "strip", z.ZodTypeAny, {
|
|
2778
|
+
type: RecognitionResultTypeV1.SESSION_CONFIGURED;
|
|
2779
|
+
audioUtteranceId: string;
|
|
2780
|
+
provider?: string | undefined;
|
|
2781
|
+
model?: string | undefined;
|
|
2782
|
+
sampleRate?: number | undefined;
|
|
2783
|
+
encoding?: string | undefined;
|
|
2784
|
+
apiType?: ASRApiType | undefined;
|
|
2785
|
+
isFallback?: boolean | undefined;
|
|
2786
|
+
asrRequest?: string | undefined;
|
|
2787
|
+
providerConfig?: string | undefined;
|
|
2788
|
+
}, {
|
|
2789
|
+
type: RecognitionResultTypeV1.SESSION_CONFIGURED;
|
|
2790
|
+
audioUtteranceId: string;
|
|
2791
|
+
provider?: string | undefined;
|
|
2792
|
+
model?: string | undefined;
|
|
2793
|
+
sampleRate?: number | undefined;
|
|
2794
|
+
encoding?: string | undefined;
|
|
2795
|
+
apiType?: ASRApiType | undefined;
|
|
2796
|
+
isFallback?: boolean | undefined;
|
|
2797
|
+
asrRequest?: string | undefined;
|
|
2798
|
+
providerConfig?: string | undefined;
|
|
2799
|
+
}>>;
|
|
2750
2800
|
startRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
2751
2801
|
finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
|
|
2752
2802
|
finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
|
|
@@ -2755,7 +2805,31 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
2755
2805
|
functionCallMetadata: z.ZodOptional<z.ZodString>;
|
|
2756
2806
|
functionCallConfidence: z.ZodOptional<z.ZodNumber>;
|
|
2757
2807
|
finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
|
|
2808
|
+
gameId: z.ZodOptional<z.ZodString>;
|
|
2809
|
+
gamePhase: z.ZodOptional<z.ZodString>;
|
|
2758
2810
|
promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
|
|
2811
|
+
promptSTT: z.ZodOptional<z.ZodString>;
|
|
2812
|
+
promptSTF: z.ZodOptional<z.ZodString>;
|
|
2813
|
+
promptTTF: z.ZodOptional<z.ZodString>;
|
|
2814
|
+
detections: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
2815
|
+
type: z.ZodNativeEnum<typeof DetectionTypeV1>;
|
|
2816
|
+
query: z.ZodString;
|
|
2817
|
+
score: z.ZodNumber;
|
|
2818
|
+
startMs: z.ZodOptional<z.ZodNumber>;
|
|
2819
|
+
endMs: z.ZodOptional<z.ZodNumber>;
|
|
2820
|
+
}, "strip", z.ZodTypeAny, {
|
|
2821
|
+
type: DetectionTypeV1;
|
|
2822
|
+
query: string;
|
|
2823
|
+
score: number;
|
|
2824
|
+
startMs?: number | undefined;
|
|
2825
|
+
endMs?: number | undefined;
|
|
2826
|
+
}, {
|
|
2827
|
+
type: DetectionTypeV1;
|
|
2828
|
+
query: string;
|
|
2829
|
+
score: number;
|
|
2830
|
+
startMs?: number | undefined;
|
|
2831
|
+
endMs?: number | undefined;
|
|
2832
|
+
}>, "many">>;
|
|
2759
2833
|
recognitionActionProcessingState: z.ZodOptional<z.ZodString>;
|
|
2760
2834
|
}, "strip", z.ZodTypeAny, {
|
|
2761
2835
|
audioUtteranceId: string;
|
|
@@ -2766,7 +2840,20 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
2766
2840
|
finalConfidence?: number | undefined;
|
|
2767
2841
|
voiceEnd?: number | undefined;
|
|
2768
2842
|
lastNonSilence?: number | undefined;
|
|
2843
|
+
accumulatedAudioTimeMs?: number | undefined;
|
|
2769
2844
|
asrConfig?: string | undefined;
|
|
2845
|
+
sessionConfigured?: {
|
|
2846
|
+
type: RecognitionResultTypeV1.SESSION_CONFIGURED;
|
|
2847
|
+
audioUtteranceId: string;
|
|
2848
|
+
provider?: string | undefined;
|
|
2849
|
+
model?: string | undefined;
|
|
2850
|
+
sampleRate?: number | undefined;
|
|
2851
|
+
encoding?: string | undefined;
|
|
2852
|
+
apiType?: ASRApiType | undefined;
|
|
2853
|
+
isFallback?: boolean | undefined;
|
|
2854
|
+
asrRequest?: string | undefined;
|
|
2855
|
+
providerConfig?: string | undefined;
|
|
2856
|
+
} | undefined;
|
|
2770
2857
|
startRecordingTimestamp?: string | undefined;
|
|
2771
2858
|
finalRecordingTimestamp?: string | undefined;
|
|
2772
2859
|
finalTranscriptionTimestamp?: string | undefined;
|
|
@@ -2774,7 +2861,19 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
2774
2861
|
functionCallMetadata?: string | undefined;
|
|
2775
2862
|
functionCallConfidence?: number | undefined;
|
|
2776
2863
|
finalFunctionCallTimestamp?: string | undefined;
|
|
2864
|
+
gameId?: string | undefined;
|
|
2865
|
+
gamePhase?: string | undefined;
|
|
2777
2866
|
promptSlotMap?: Record<string, string[]> | undefined;
|
|
2867
|
+
promptSTT?: string | undefined;
|
|
2868
|
+
promptSTF?: string | undefined;
|
|
2869
|
+
promptTTF?: string | undefined;
|
|
2870
|
+
detections?: {
|
|
2871
|
+
type: DetectionTypeV1;
|
|
2872
|
+
query: string;
|
|
2873
|
+
score: number;
|
|
2874
|
+
startMs?: number | undefined;
|
|
2875
|
+
endMs?: number | undefined;
|
|
2876
|
+
}[] | undefined;
|
|
2778
2877
|
recognitionActionProcessingState?: string | undefined;
|
|
2779
2878
|
}, {
|
|
2780
2879
|
audioUtteranceId: string;
|
|
@@ -2784,7 +2883,20 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
2784
2883
|
finalConfidence?: number | undefined;
|
|
2785
2884
|
voiceEnd?: number | undefined;
|
|
2786
2885
|
lastNonSilence?: number | undefined;
|
|
2886
|
+
accumulatedAudioTimeMs?: number | undefined;
|
|
2787
2887
|
asrConfig?: string | undefined;
|
|
2888
|
+
sessionConfigured?: {
|
|
2889
|
+
type: RecognitionResultTypeV1.SESSION_CONFIGURED;
|
|
2890
|
+
audioUtteranceId: string;
|
|
2891
|
+
provider?: string | undefined;
|
|
2892
|
+
model?: string | undefined;
|
|
2893
|
+
sampleRate?: number | undefined;
|
|
2894
|
+
encoding?: string | undefined;
|
|
2895
|
+
apiType?: ASRApiType | undefined;
|
|
2896
|
+
isFallback?: boolean | undefined;
|
|
2897
|
+
asrRequest?: string | undefined;
|
|
2898
|
+
providerConfig?: string | undefined;
|
|
2899
|
+
} | undefined;
|
|
2788
2900
|
startRecordingTimestamp?: string | undefined;
|
|
2789
2901
|
finalRecordingTimestamp?: string | undefined;
|
|
2790
2902
|
finalTranscriptionTimestamp?: string | undefined;
|
|
@@ -2793,7 +2905,19 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
|
|
|
2793
2905
|
functionCallMetadata?: string | undefined;
|
|
2794
2906
|
functionCallConfidence?: number | undefined;
|
|
2795
2907
|
finalFunctionCallTimestamp?: string | undefined;
|
|
2908
|
+
gameId?: string | undefined;
|
|
2909
|
+
gamePhase?: string | undefined;
|
|
2796
2910
|
promptSlotMap?: Record<string, string[]> | undefined;
|
|
2911
|
+
promptSTT?: string | undefined;
|
|
2912
|
+
promptSTF?: string | undefined;
|
|
2913
|
+
promptTTF?: string | undefined;
|
|
2914
|
+
detections?: {
|
|
2915
|
+
type: DetectionTypeV1;
|
|
2916
|
+
query: string;
|
|
2917
|
+
score: number;
|
|
2918
|
+
startMs?: number | undefined;
|
|
2919
|
+
endMs?: number | undefined;
|
|
2920
|
+
}[] | undefined;
|
|
2797
2921
|
recognitionActionProcessingState?: string | undefined;
|
|
2798
2922
|
}>;
|
|
2799
2923
|
type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
|
|
@@ -2813,6 +2937,7 @@ declare const TranscriptionStatus: {
|
|
|
2813
2937
|
};
|
|
2814
2938
|
type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus];
|
|
2815
2939
|
declare function createInitialRecognitionState(audioUtteranceId: string): RecognitionState;
|
|
2940
|
+
declare function isTerminal(state: Pick<RecognitionState, "transcriptionStatus">): boolean;
|
|
2816
2941
|
declare function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean;
|
|
2817
2942
|
|
|
2818
2943
|
/**
|
|
@@ -2843,102 +2968,22 @@ interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
|
|
|
2843
2968
|
/**
|
|
2844
2969
|
* Interface for SimplifiedVGFRecognitionClient
|
|
2845
2970
|
*
|
|
2846
|
-
*
|
|
2847
|
-
*
|
|
2971
|
+
* Inherits the full IRecognitionClient surface (connect, sendAudio,
|
|
2972
|
+
* sendAudioWithSampleRate, sendPrefixAudio, stopRecording, stopAbnormally,
|
|
2973
|
+
* status checks, sendGameContext, getStats, getUrl, getState, getAudioUtteranceId)
|
|
2974
|
+
* — see recognition-client.types.ts for those. Adds VGF-specific state access.
|
|
2975
|
+
*
|
|
2976
|
+
* Extending IRecognitionClient (rather than redeclaring methods) means
|
|
2977
|
+
* TypeScript catches any base-client method that's not delegated by the
|
|
2978
|
+
* VGF wrapper at compile time — keeps the two surfaces in sync.
|
|
2848
2979
|
*/
|
|
2849
|
-
interface ISimplifiedVGFRecognitionClient {
|
|
2980
|
+
interface ISimplifiedVGFRecognitionClient extends IRecognitionClient {
|
|
2850
2981
|
/**
|
|
2851
|
-
*
|
|
2852
|
-
*
|
|
2853
|
-
*/
|
|
2854
|
-
connect(): Promise<void>;
|
|
2855
|
-
/**
|
|
2856
|
-
* Send audio data for transcription
|
|
2857
|
-
* @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
|
|
2858
|
-
*/
|
|
2859
|
-
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
2860
|
-
/**
|
|
2861
|
-
* Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
|
|
2862
|
-
* downsamples to the session's target rate before transmitting. Use
|
|
2863
|
-
* when capture is at the system's native rate (browser AudioContext is
|
|
2864
|
-
* typically 44.1 kHz or 48 kHz). Audio must be signed 16-bit
|
|
2865
|
-
* little-endian PCM, mono.
|
|
2866
|
-
*/
|
|
2867
|
-
sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
|
|
2868
|
-
/**
|
|
2869
|
-
* Stop recording and wait for final transcription
|
|
2870
|
-
* @returns Promise that resolves when transcription is complete
|
|
2871
|
-
*/
|
|
2872
|
-
stopRecording(): Promise<void>;
|
|
2873
|
-
/**
|
|
2874
|
-
* Force stop and immediately close connection without waiting for server
|
|
2875
|
-
*
|
|
2876
|
-
* WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
|
|
2877
|
-
* - Does NOT wait for server to process remaining audio
|
|
2878
|
-
* - Does NOT receive final transcript from server (VGF state set to empty)
|
|
2879
|
-
* - Immediately closes WebSocket connection
|
|
2880
|
-
* - Cleans up resources (buffers, listeners)
|
|
2881
|
-
*
|
|
2882
|
-
* Use Cases:
|
|
2883
|
-
* - User explicitly cancels/abandons the session
|
|
2884
|
-
* - Timeout scenarios where waiting is not acceptable
|
|
2885
|
-
* - Need immediate cleanup and can't wait for server
|
|
2886
|
-
*
|
|
2887
|
-
* RECOMMENDED: Use stopRecording() for normal shutdown.
|
|
2888
|
-
* Only use this when immediate disconnection is required.
|
|
2889
|
-
*/
|
|
2890
|
-
stopAbnormally(): void;
|
|
2891
|
-
/**
|
|
2892
|
-
* Get the current VGF recognition state
|
|
2982
|
+
* Get the current VGF recognition state — the single shared store
|
|
2983
|
+
* of inputs and outputs for this utterance.
|
|
2893
2984
|
* @returns Current RecognitionState with all transcription data
|
|
2894
2985
|
*/
|
|
2895
2986
|
getVGFState(): RecognitionState;
|
|
2896
|
-
/**
|
|
2897
|
-
* Check if connected to the WebSocket
|
|
2898
|
-
*/
|
|
2899
|
-
isConnected(): boolean;
|
|
2900
|
-
/**
|
|
2901
|
-
* Check if currently connecting
|
|
2902
|
-
*/
|
|
2903
|
-
isConnecting(): boolean;
|
|
2904
|
-
/**
|
|
2905
|
-
* Check if currently stopping
|
|
2906
|
-
*/
|
|
2907
|
-
isStopping(): boolean;
|
|
2908
|
-
/**
|
|
2909
|
-
* Check if transcription has finished
|
|
2910
|
-
*/
|
|
2911
|
-
isTranscriptionFinished(): boolean;
|
|
2912
|
-
/**
|
|
2913
|
-
* Check if the audio buffer has overflowed
|
|
2914
|
-
*/
|
|
2915
|
-
isBufferOverflowing(): boolean;
|
|
2916
|
-
/**
|
|
2917
|
-
* Send game context after connection is established (for preconnect flow).
|
|
2918
|
-
*
|
|
2919
|
-
* Preconnect flow: Create client with asrRequestConfig (useContext: true) but
|
|
2920
|
-
* WITHOUT gameContext → call connect() → later call sendGameContext() with slotMap.
|
|
2921
|
-
*
|
|
2922
|
-
* @param context - Game context including slotMap for keyword boosting
|
|
2923
|
-
*/
|
|
2924
|
-
sendGameContext(context: GameContextV1): void;
|
|
2925
|
-
/**
|
|
2926
|
-
* Check if server has sent READY signal (provider connected, ready for audio).
|
|
2927
|
-
* In preconnect flow, this becomes true after sendGameContext() triggers provider attachment.
|
|
2928
|
-
*/
|
|
2929
|
-
isServerReady(): boolean;
|
|
2930
|
-
/**
|
|
2931
|
-
* Get the audio utterance ID for this session
|
|
2932
|
-
*/
|
|
2933
|
-
getAudioUtteranceId(): string;
|
|
2934
|
-
/**
|
|
2935
|
-
* Get the WebSocket URL being used
|
|
2936
|
-
*/
|
|
2937
|
-
getUrl(): string;
|
|
2938
|
-
/**
|
|
2939
|
-
* Get the underlying client state (for advanced usage)
|
|
2940
|
-
*/
|
|
2941
|
-
getState(): ClientState;
|
|
2942
2987
|
}
|
|
2943
2988
|
/**
|
|
2944
2989
|
* This wrapper ONLY maintains VGF state as a sink.
|
|
@@ -2956,6 +3001,8 @@ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitio
|
|
|
2956
3001
|
connect(): Promise<void>;
|
|
2957
3002
|
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
2958
3003
|
sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
|
|
3004
|
+
sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
|
|
3005
|
+
getStats(): IRecognitionClientStats;
|
|
2959
3006
|
/**
|
|
2960
3007
|
* Set VGF recording status to RECORDING on the first audio chunk.
|
|
2961
3008
|
* Idempotent — subsequent calls are no-ops until disconnect/stop resets
|
|
@@ -2975,7 +3022,6 @@ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitio
|
|
|
2975
3022
|
sendGameContext(context: GameContextV1): void;
|
|
2976
3023
|
isServerReady(): boolean;
|
|
2977
3024
|
getVGFState(): RecognitionState;
|
|
2978
|
-
private isTerminalStatus;
|
|
2979
3025
|
private notifyStateChange;
|
|
2980
3026
|
}
|
|
2981
3027
|
/**
|
|
@@ -3088,5 +3134,5 @@ declare function getRecognitionConductorHttpBase(stage?: Stage | string | null |
|
|
|
3088
3134
|
declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
|
|
3089
3135
|
declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
|
|
3090
3136
|
|
|
3091
|
-
export { AmazonNovaSonicModel, AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
|
|
3137
|
+
export { AmazonNovaSonicModel, AudioEncoding, AwsTranscribeModel, BedrockModel, CartesiaModel, ClientControlActionV1, ClientState, ConfigBuilder, ConnectionError, ControlSignalTypeV1 as ControlSignal, ControlSignalTypeV1, DashScopeModel, DeepgramModel, ElevenLabsModel, ErrorTypeV1, FinalTranscriptStability, FireworksModel, GeminiModel, GladiaModel, GoogleModel, Language, MistralVoxtralModel, OpenAIModel, OpenAIRealtimeModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClient, RecognitionContextTypeV1, RecognitionError, RecognitionProvider, RecognitionResultTypeV1, RecognitionVGFStateSchema, RecordingStatus, STAGES, SampleRate, SelfServeVllmModel, SimplifiedVGFRecognitionClient, TimeoutError, TranscriptionStatus, ValidationError, createClient, createClientWithBuilder, createDefaultASRConfig, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, getUserFriendlyMessage, isExceptionImmediatelyAvailable, isNormalDisconnection, isTerminal, isValidRecordingStatusTransition, normalizeStage, resetRecognitionVGFState };
|
|
3092
3138
|
export type { ASRRequestConfig, ASRRequestV1, AudioMetricsResultV1, AuthenticationException, ConnectionException, ErrorResultV1, FunctionCallResultV1, GameContextV1, IRecognitionClient, IRecognitionClientConfig, IRecognitionClientStats, ISimplifiedVGFRecognitionClient, MetadataResultV1, ProviderException, QuotaExceededException, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, RecognitionException, RecognitionState, RecordingStatusType, SimplifiedVGFClientConfig, SlotMap, Stage, TimeoutException, TranscriptionResult, TranscriptionResultV1, TranscriptionStatusType, UnknownException, ValidationException };
|
package/dist/index.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ export { ErrorTypeV1 } from '@recog/shared-types';
|
|
|
7
7
|
export type { RecognitionException, ConnectionException, TimeoutException, ValidationException, AuthenticationException, ProviderException, QuotaExceededException, UnknownException } from '@recog/shared-types';
|
|
8
8
|
export { isExceptionImmediatelyAvailable, getUserFriendlyMessage } from '@recog/shared-types';
|
|
9
9
|
export { SimplifiedVGFRecognitionClient, createSimplifiedVGFClient, type ISimplifiedVGFRecognitionClient, type SimplifiedVGFClientConfig } from './simplified-vgf-recognition-client.js';
|
|
10
|
-
export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, TranscriptionStatus, type RecordingStatusType, type TranscriptionStatusType, createInitialRecognitionState, isValidRecordingStatusTransition } from './vgf-recognition-state.js';
|
|
10
|
+
export { type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, TranscriptionStatus, type RecordingStatusType, type TranscriptionStatusType, createInitialRecognitionState, isTerminal, isValidRecordingStatusTransition } from './vgf-recognition-state.js';
|
|
11
11
|
export { resetRecognitionVGFState } from './vgf-recognition-mapper.js';
|
|
12
12
|
export { AudioEncoding } from '@recog/websocket';
|
|
13
13
|
export { type GameContextV1, type SlotMap, RecognitionContextTypeV1, ControlSignalTypeV1, ControlSignalTypeV1 as ControlSignal, // Alias for backward compatibility
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,oBAAoB,EACpB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,wCAAwC,EACxC,KAAK,8CAA8C,EACnD,KAAK,mBAAmB,EACxB,qBAAqB,EACtB,MAAM,yBAAyB,CAAC;AAGjC,OAAO,EACL,KAAK,kBAAkB,EACvB,KAAK,wBAAwB,EAC7B,KAAK,uBAAuB,EAC5B,KAAK,sBAAsB,EAC3B,WAAW,EACZ,MAAM,+BAA+B,CAAC;AAGvC,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,YAAY,EAAE,uBAAuB,EAAE,MAAM,cAAc,CAAC;AAGrE,OAAO,EACL,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EAChB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAGlD,YAAY,EACV,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,uBAAuB,EACvB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,+BAA+B,EAC/B,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,8BAA8B,EAC9B,yBAAyB,EACzB,KAAK,+BAA+B,EACpC,KAAK,yBAAyB,EAC/B,MAAM,wCAAwC,CAAC;AAEhD,OAAO,EACL,KAAK,gBAAgB,EACrB,yBAAyB,EACzB,eAAe,EACf,mBAAmB,EACnB,KAAK,mBAAmB,EACxB,KAAK,uBAAuB,EAC5B,6BAA6B,EAC7B,UAAU,EACV,gCAAgC,EACjC,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EAAE,wBAAwB,EAAE,MAAM,6BAA6B,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAGjD,OAAO,EAEL,KAAK,aAAa,EAClB,KAAK,OAAO,EACZ,wBAAwB,EACxB,mBAAmB,EACnB,mBAAmB,IAAI,aAAa,EAAG,mCAAmC;AAG1E,KAAK,qBAAqB,EAC1B,KAAK,oBAAoB,EACzB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,EACzB,KAAK,aAAa,EAClB,uBAAuB,EACvB,qBAAqB,EAGrB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,wBAAwB,EACxB,sBAAsB,EACtB,mBAAmB,EACnB,aAAa,EACb,eAAe,EACf,cAAc,EACd,WAAW,EACX,WAAW,EACX,WAAW,EACX,WAAW,EACX,kBAAkB,EAClB,mBAAmB,EACnB,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,YAAY,EACZ,kBAAkB,EAClB,oBAAoB,EACpB,QAAQ,EACR,UAAU,EAGV,MAAM,EACN,KAAK,KAAK,EACX,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,yBAAyB,EACzB,6BAA6B,EAC7B,2BAA2B,EAC3B,yBAAyB,EACzB,2BAA2B,EAC3B,+BAA+B,EAC/B,6BAA6B,EAC7B,2BAA2B,EAC3B,cAAc,EACd,yBAAyB,EACzB,2BAA2B,EAC5B,MAAM,sBAAsB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -6435,9 +6435,15 @@ var RecognitionVGFStateSchema = z.object({
|
|
|
6435
6435
|
// voice end time identified by ASR
|
|
6436
6436
|
lastNonSilence: z.number().optional(),
|
|
6437
6437
|
// last non-silence sample time from PCM analysis
|
|
6438
|
+
accumulatedAudioTimeMs: z.number().optional(),
|
|
6439
|
+
// total user audio time watermark (ms) — mirrors TranscriptionResultV1.accumulatedAudioTimeMs
|
|
6438
6440
|
// Tracking-only metadata
|
|
6439
6441
|
asrConfig: z.string().optional(),
|
|
6440
|
-
// Json format of the ASR config
|
|
6442
|
+
// Json format of the *requested* ASR config (set once at construction).
|
|
6443
|
+
// For the *resolved* truth — actual provider/model/sampleRate/encoding/apiType/isFallback chosen by the
|
|
6444
|
+
// server after circuit-breaker/fallback — see `sessionConfigured` below.
|
|
6445
|
+
sessionConfigured: SessionConfiguredSchemaV1.optional(),
|
|
6446
|
+
// Mirrors the SessionConfiguredV1 message; populated when the server emits it (before audio streams).
|
|
6441
6447
|
startRecordingTimestamp: z.string().optional(),
|
|
6442
6448
|
// Start of recording. Immutable after set.
|
|
6443
6449
|
finalRecordingTimestamp: z.string().optional(),
|
|
@@ -6455,9 +6461,28 @@ var RecognitionVGFStateSchema = z.object({
|
|
|
6455
6461
|
// Confidence score for the function call.
|
|
6456
6462
|
finalFunctionCallTimestamp: z.string().optional(),
|
|
6457
6463
|
// When the final action after interpreting the transcript was taken. Immutable.
|
|
6464
|
+
// Session identity — when set, the VGF client backfills these into
|
|
6465
|
+
// GameContextV1 if the caller didn't pass a `gameContext` in config.
|
|
6466
|
+
// Lets RecognitionState be the single source of truth: server seeds
|
|
6467
|
+
// `gameId` + `gamePhase` + `promptSlotMap` per player, controller passes
|
|
6468
|
+
// the whole state as `initialState`, no separate `gameContext` needed.
|
|
6469
|
+
// Backward-compatible: if `gameContext` is also passed in config, it wins.
|
|
6470
|
+
gameId: z.string().optional(),
|
|
6471
|
+
gamePhase: z.string().optional(),
|
|
6458
6472
|
// Support for prompt slot mapping - passed to recognition context when present
|
|
6459
6473
|
promptSlotMap: z.record(z.string(), z.array(z.string())).optional(),
|
|
6460
6474
|
// Optional map of slot names to prompt values for recognition context
|
|
6475
|
+
// Optional prompt inputs - when set, forwarded into GameContext at client creation.
|
|
6476
|
+
// Mirror the GameContextV1 fields: STT (ASR keywords/keyterms), STF (speech->function), TTF (text->function).
|
|
6477
|
+
promptSTT: z.string().optional(),
|
|
6478
|
+
promptSTF: z.string().optional(),
|
|
6479
|
+
promptTTF: z.string().optional(),
|
|
6480
|
+
// Provider-reported phrase detections from the last transcript message.
|
|
6481
|
+
// Mirrors TranscriptionResultV1.detections — a heterogeneous list keyed by DetectionTypeV1
|
|
6482
|
+
// (today only 'search' from Deepgram; future entries may include keywords/keyterms/speech_contexts).
|
|
6483
|
+
// Sorted by `score` descending by the server (see deepgram/message-handlers/v1/transform-transcript.ts
|
|
6484
|
+
// and provider-to-recognition-transformer.ts), so [0] is the top hit — no client-side re-rank needed.
|
|
6485
|
+
detections: z.array(DetectionV1Schema).optional(),
|
|
6461
6486
|
// Recognition action processing state - managed externally, SDK preserves but never modifies
|
|
6462
6487
|
recognitionActionProcessingState: z.string().optional()
|
|
6463
6488
|
// "NOT_STARTED", "IN_PROGRESS", "COMPLETED"
|
|
@@ -6490,6 +6515,9 @@ function createInitialRecognitionState(audioUtteranceId) {
|
|
|
6490
6515
|
recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED
|
|
6491
6516
|
};
|
|
6492
6517
|
}
|
|
6518
|
+
function isTerminal(state) {
|
|
6519
|
+
return state.transcriptionStatus === TranscriptionStatus.FINALIZED || state.transcriptionStatus === TranscriptionStatus.ABORTED || state.transcriptionStatus === TranscriptionStatus.ERROR;
|
|
6520
|
+
}
|
|
6493
6521
|
function isValidRecordingStatusTransition(from, to) {
|
|
6494
6522
|
const statusOrder = [
|
|
6495
6523
|
RecordingStatus.NOT_READY,
|
|
@@ -6529,6 +6557,9 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
|
|
|
6529
6557
|
if (result.lastNonSilence !== void 0) {
|
|
6530
6558
|
newState.lastNonSilence = result.lastNonSilence;
|
|
6531
6559
|
}
|
|
6560
|
+
if (result.accumulatedAudioTimeMs !== void 0) {
|
|
6561
|
+
newState.accumulatedAudioTimeMs = result.accumulatedAudioTimeMs;
|
|
6562
|
+
}
|
|
6532
6563
|
} else {
|
|
6533
6564
|
newState.transcriptionStatus = TranscriptionStatus.FINALIZED;
|
|
6534
6565
|
newState.finalTranscript = result.finalTranscript || "";
|
|
@@ -6542,12 +6573,24 @@ function mapTranscriptionResultToState(currentState, result, isRecording) {
|
|
|
6542
6573
|
if (result.lastNonSilence !== void 0) {
|
|
6543
6574
|
newState.lastNonSilence = result.lastNonSilence;
|
|
6544
6575
|
}
|
|
6576
|
+
if (result.accumulatedAudioTimeMs !== void 0) {
|
|
6577
|
+
newState.accumulatedAudioTimeMs = result.accumulatedAudioTimeMs;
|
|
6578
|
+
}
|
|
6545
6579
|
newState.pendingTranscript = "";
|
|
6546
6580
|
newState.pendingConfidence = void 0;
|
|
6547
6581
|
}
|
|
6582
|
+
if (result.detections !== void 0) {
|
|
6583
|
+
newState.detections = result.detections;
|
|
6584
|
+
}
|
|
6548
6585
|
return newState;
|
|
6549
6586
|
}
|
|
6550
|
-
function
|
|
6587
|
+
function mapSessionConfiguredToState(currentState, sessionConfigured) {
|
|
6588
|
+
return {
|
|
6589
|
+
...currentState,
|
|
6590
|
+
sessionConfigured
|
|
6591
|
+
};
|
|
6592
|
+
}
|
|
6593
|
+
function mapErrorToState(currentState) {
|
|
6551
6594
|
return {
|
|
6552
6595
|
...currentState,
|
|
6553
6596
|
transcriptionStatus: TranscriptionStatus.ERROR,
|
|
@@ -6579,7 +6622,10 @@ function resetRecognitionVGFState(currentState) {
|
|
|
6579
6622
|
recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
|
|
6580
6623
|
finalTranscript: void 0,
|
|
6581
6624
|
voiceEnd: void 0,
|
|
6582
|
-
lastNonSilence: void 0
|
|
6625
|
+
lastNonSilence: void 0,
|
|
6626
|
+
accumulatedAudioTimeMs: void 0,
|
|
6627
|
+
detections: void 0,
|
|
6628
|
+
sessionConfigured: void 0
|
|
6583
6629
|
};
|
|
6584
6630
|
}
|
|
6585
6631
|
function generateUUID() {
|
|
@@ -6624,16 +6670,35 @@ var SimplifiedVGFRecognitionClient = class {
|
|
|
6624
6670
|
}
|
|
6625
6671
|
this.state = { ...this.state, startRecordingStatus: "READY" };
|
|
6626
6672
|
this.expectedUuid = this.state.audioUtteranceId;
|
|
6627
|
-
if (this.state.
|
|
6673
|
+
if (!clientConfig.gameContext && this.state.gameId && this.state.gamePhase) {
|
|
6674
|
+
clientConfig.gameContext = {
|
|
6675
|
+
type: RecognitionContextTypeV1.GAME_CONTEXT,
|
|
6676
|
+
gameId: this.state.gameId,
|
|
6677
|
+
gamePhase: this.state.gamePhase
|
|
6678
|
+
};
|
|
6679
|
+
}
|
|
6680
|
+
const hasPromptInputs = this.state.promptSlotMap !== void 0 || this.state.promptSTT !== void 0 || this.state.promptSTF !== void 0 || this.state.promptTTF !== void 0;
|
|
6681
|
+
if (hasPromptInputs) {
|
|
6628
6682
|
if (clientConfig.asrRequestConfig) {
|
|
6629
6683
|
clientConfig.asrRequestConfig.useContext = true;
|
|
6630
6684
|
}
|
|
6631
6685
|
if (!clientConfig.gameContext) {
|
|
6632
6686
|
if (clientConfig.logger) {
|
|
6633
|
-
clientConfig.logger("warn", "[VGF]
|
|
6687
|
+
clientConfig.logger("warn", "[VGF] prompt inputs found but no gameContext provided and state has no gameId/gamePhase. They will not be sent.");
|
|
6634
6688
|
}
|
|
6635
6689
|
} else {
|
|
6636
|
-
|
|
6690
|
+
if (this.state.promptSlotMap !== void 0) {
|
|
6691
|
+
clientConfig.gameContext.slotMap = this.state.promptSlotMap;
|
|
6692
|
+
}
|
|
6693
|
+
if (this.state.promptSTT !== void 0) {
|
|
6694
|
+
clientConfig.gameContext.promptSTT = this.state.promptSTT;
|
|
6695
|
+
}
|
|
6696
|
+
if (this.state.promptSTF !== void 0) {
|
|
6697
|
+
clientConfig.gameContext.promptSTF = this.state.promptSTF;
|
|
6698
|
+
}
|
|
6699
|
+
if (this.state.promptTTF !== void 0) {
|
|
6700
|
+
clientConfig.gameContext.promptTTF = this.state.promptTTF;
|
|
6701
|
+
}
|
|
6637
6702
|
}
|
|
6638
6703
|
}
|
|
6639
6704
|
this.client = new RealTimeTwoWayWebSocketRecognitionClient({
|
|
@@ -6669,6 +6734,22 @@ var SimplifiedVGFRecognitionClient = class {
|
|
|
6669
6734
|
clientConfig.onMetadata(metadata);
|
|
6670
6735
|
}
|
|
6671
6736
|
},
|
|
6737
|
+
onSessionConfigured: (sessionConfigured) => {
|
|
6738
|
+
if (sessionConfigured.audioUtteranceId && sessionConfigured.audioUtteranceId !== this.expectedUuid) {
|
|
6739
|
+
if (this.logger) {
|
|
6740
|
+
this.logger(
|
|
6741
|
+
"warn",
|
|
6742
|
+
`[RecogSDK:VGF] Skipping sessionConfigured update: UUID mismatch (expected: ${this.expectedUuid}, got: ${sessionConfigured.audioUtteranceId})`
|
|
6743
|
+
);
|
|
6744
|
+
}
|
|
6745
|
+
return;
|
|
6746
|
+
}
|
|
6747
|
+
this.state = mapSessionConfiguredToState(this.state, sessionConfigured);
|
|
6748
|
+
this.notifyStateChange();
|
|
6749
|
+
if (clientConfig.onSessionConfigured) {
|
|
6750
|
+
clientConfig.onSessionConfigured(sessionConfigured);
|
|
6751
|
+
}
|
|
6752
|
+
},
|
|
6672
6753
|
onFunctionCall: (result) => {
|
|
6673
6754
|
if (clientConfig.onFunctionCall) {
|
|
6674
6755
|
clientConfig.onFunctionCall(result);
|
|
@@ -6685,7 +6766,7 @@ var SimplifiedVGFRecognitionClient = class {
|
|
|
6685
6766
|
return;
|
|
6686
6767
|
}
|
|
6687
6768
|
this.isRecordingAudio = false;
|
|
6688
|
-
this.state = mapErrorToState(this.state
|
|
6769
|
+
this.state = mapErrorToState(this.state);
|
|
6689
6770
|
this.notifyStateChange();
|
|
6690
6771
|
if (clientConfig.onError) {
|
|
6691
6772
|
clientConfig.onError(error);
|
|
@@ -6717,6 +6798,12 @@ var SimplifiedVGFRecognitionClient = class {
|
|
|
6717
6798
|
this.markRecordingStarted();
|
|
6718
6799
|
this.client.sendAudioWithSampleRate(audioData, sourceSampleRate);
|
|
6719
6800
|
}
|
|
6801
|
+
sendPrefixAudio(audioData) {
|
|
6802
|
+
this.client.sendPrefixAudio(audioData);
|
|
6803
|
+
}
|
|
6804
|
+
getStats() {
|
|
6805
|
+
return this.client.getStats();
|
|
6806
|
+
}
|
|
6720
6807
|
/**
|
|
6721
6808
|
* Set VGF recording status to RECORDING on the first audio chunk.
|
|
6722
6809
|
* Idempotent — subsequent calls are no-ops until disconnect/stop resets
|
|
@@ -6809,11 +6896,8 @@ var SimplifiedVGFRecognitionClient = class {
|
|
|
6809
6896
|
getVGFState() {
|
|
6810
6897
|
return { ...this.state };
|
|
6811
6898
|
}
|
|
6812
|
-
isTerminalStatus(status) {
|
|
6813
|
-
return status === TranscriptionStatus.FINALIZED || status === TranscriptionStatus.ABORTED || status === TranscriptionStatus.ERROR;
|
|
6814
|
-
}
|
|
6815
6899
|
notifyStateChange() {
|
|
6816
|
-
if (
|
|
6900
|
+
if (isTerminal(this.state)) {
|
|
6817
6901
|
if (this.lastSentTerminalUuid === this.expectedUuid) {
|
|
6818
6902
|
if (this.logger) {
|
|
6819
6903
|
this.logger(
|
|
@@ -6899,6 +6983,7 @@ export {
|
|
|
6899
6983
|
getUserFriendlyMessage,
|
|
6900
6984
|
isExceptionImmediatelyAvailable,
|
|
6901
6985
|
isNormalDisconnection,
|
|
6986
|
+
isTerminal,
|
|
6902
6987
|
isValidRecordingStatusTransition,
|
|
6903
6988
|
normalizeStage,
|
|
6904
6989
|
resetRecognitionVGFState
|