npm - @volley/recognition-client-sdk - Versions diffs - 0.1.782 → 0.1.800 - Mend

@volley/recognition-client-sdk 0.1.782 → 0.1.800

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/browser.bundled.d.ts +75 -4
package/dist/index.bundled.d.ts +198 -87
package/dist/index.js +191 -20
package/dist/index.js.map +4 -4
package/dist/recog-client-sdk.browser.js +95 -4
package/dist/recog-client-sdk.browser.js.map +4 -4
package/dist/recognition-client.d.ts +23 -0
package/dist/recognition-client.d.ts.map +1 -1
package/dist/recognition-client.types.d.ts +32 -0
package/dist/recognition-client.types.d.ts.map +1 -1
package/dist/simplified-vgf-recognition-client.d.ts +22 -85
package/dist/simplified-vgf-recognition-client.d.ts.map +1 -1
package/dist/utils/audio-resampler.d.ts +32 -0
package/dist/utils/audio-resampler.d.ts.map +1 -0
package/dist/vgf-recognition-mapper.d.ts +9 -17
package/dist/vgf-recognition-mapper.d.ts.map +1 -1
package/dist/vgf-recognition-state.d.ts +103 -0
package/dist/vgf-recognition-state.d.ts.map +1 -1
package/package.json +1 -1
package/src/index.spec.ts +2 -0
package/src/recognition-client.ts +65 -7
package/src/recognition-client.types.ts +37 -0
package/src/simplified-vgf-recognition-client.spec.ts +0 -27
package/src/simplified-vgf-recognition-client.ts +97 -127
package/src/utils/audio-resampler.spec.ts +69 -0
package/src/utils/audio-resampler.ts +79 -0
package/src/vgf-recognition-mapper.spec.ts +143 -0
package/src/vgf-recognition-mapper.ts +35 -45
package/src/vgf-recognition-state.ts +19 -1

package/dist/browser.bundled.d.ts CHANGED Viewed

@@ -159,9 +159,11 @@ declare enum AmazonNovaSonicModel {
 }
 /**
  * Self-serve vLLM batch transcription models
- * Backed by recognition-inference / RunPod `/transcribe`
+ * Backed by recognition-inference / RunPod `/ws/transcribe`
  */
 declare enum SelfServeVllmModel {
+    QWEN3_ASR_0_6B = "qwen3-asr-0.6b",
+    QWEN3_ASR_0_6B_WOF_LETTER = "qwen3-asr-0.6b-wof-letter",
     QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
 }
 /**
@@ -651,6 +653,20 @@ declare namespace AudioEncoding {
      * @returns true if valid encoding name
      */
     function isNameValid(nameStr: string): boolean;
+    /**
+     * Coerce a possibly-stringly-typed encoding value into the AudioEncoding enum.
+     *
+     * - enum / number → returned as-is (already AudioEncoding-shaped)
+     * - string (case-insensitive, e.g. 'linear16', 'LINEAR16') → converted via {@link fromName}.
+     *   Invokes `onStringInput` with a warning message so callers can route it
+     *   to their preferred logger.
+     * - invalid string → throws (preferred over silent fallback so typos surface)
+     * - undefined → defaults to {@link AudioEncoding.LINEAR16}
+     *
+     * Always normalize at the SDK / server boundary so downstream code can rely
+     * on a numeric AudioEncoding (the wire-level binary frame header is uint32).
+     */
+    function coerce(value: AudioEncoding | string | number | undefined, onStringInput?: (warning: string) => void): AudioEncoding;
 }
 /**
  * Common sample rates (in Hz)
@@ -906,10 +922,10 @@ interface ASRRequestConfig {
      * doesn't respond with is_final=true after stopRecording().
      *
      * - aggressive: 100ms - fast response, may cut off slow providers
-     * - balanced: 500ms - current default, good for most cases
-     * - conservative: 1000ms - wait longer for complex utterances
+     * - balanced: 500ms - good for most cases
+     * - conservative: 1000ms - current default, wait longer for complex utterances
      *
-     * @default 'balanced'
+     * @default 'conservative'
      * @see FinalTranscriptStability enum for detailed descriptions
      */
     finalTranscriptStability?: FinalTranscriptStability | string;
@@ -1390,6 +1406,11 @@ interface IRecognitionClientConfig {
  *
  * Main interface for real-time speech recognition clients.
  * Provides methods for connection management, audio streaming, and session control.
+ *
+ * NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
+ * so any method added here must also be implemented (typically as a delegate) by
+ * `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
+ * compile time — do not work around the error, add the delegate.
  */
 interface IRecognitionClient {
     /**
@@ -1404,6 +1425,33 @@ interface IRecognitionClient {
      * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
      */
     sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
+    /**
+     * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
+     * downsamples to the session's target rate (currently 16 kHz, set by the
+     * server validator) before transmitting.
+     *
+     * Use this when your capture pipeline produces audio at the system's
+     * native rate (browser `AudioContext` is typically 44.1 kHz or 48 kHz).
+     * If your audio is already at the target rate, prefer `sendAudio()` to
+     * skip the resample step.
+     *
+     * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
+     * mixed to mono by the caller.
+     *
+     * @param audioData - PCM16 mono audio at `sourceSampleRate`.
+     * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
+     */
+    sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
+    /**
+     * Send prefix audio (e.g. a TTS prompt) that primes the provider's language
+     * model before user audio is streamed. Chunks accepted — the server buffers
+     * until the session is READY and flushes. Must be sent BEFORE the first
+     * `sendAudio()` to take effect. Only meaningful when
+     * `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
+     *
+     * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
+     */
+    sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
     /**
      * Stop recording and wait for final transcript
      * The server will close the connection after sending the final transcript.
@@ -1603,6 +1651,29 @@ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioCli
      */
     private connectWithRetry;
     sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
+    /**
+     * Send PCM16 mono audio captured at any sample rate. The SDK downsamples
+     * to the session's target rate (currently 16 kHz per server validator)
+     * before sending.
+     *
+     * Use this when your capture pipeline produces audio at the system's
+     * native rate — `AudioContext` defaults to 44.1 kHz or 48 kHz on most
+     * desktop/mobile hardware — and you don't want to bring your own
+     * resampler. If your audio is already at the target rate, prefer
+     * `sendAudio()` to skip the resample step.
+     *
+     * Algorithm: box-filter averaging (see audio-resampler.ts). Cheap, no
+     * dependencies, has a built-in low-pass effect so aliasing stays out of
+     * the speech band. Suitable for ASR; not a substitute for a high-quality
+     * resampler if you're doing music or full-fidelity processing.
+     *
+     * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
+     * mixed to mono by the caller.
+     *
+     * @param audioData - PCM16 mono audio at `sourceSampleRate`.
+     * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
+     */
+    sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
     private sendAudioInternal;
     /**
      * Only active ehwne client is in READY state. otherwise it will return immediately.

package/dist/index.bundled.d.ts CHANGED Viewed

@@ -159,9 +159,11 @@ declare enum AmazonNovaSonicModel {
 }
 /**
  * Self-serve vLLM batch transcription models
- * Backed by recognition-inference / RunPod `/transcribe`
+ * Backed by recognition-inference / RunPod `/ws/transcribe`
  */
 declare enum SelfServeVllmModel {
+    QWEN3_ASR_0_6B = "qwen3-asr-0.6b",
+    QWEN3_ASR_0_6B_WOF_LETTER = "qwen3-asr-0.6b-wof-letter",
     QWEN3_ASR_1_7B = "qwen3-asr-1.7b"
 }
 /**
@@ -660,6 +662,20 @@ declare namespace AudioEncoding {
      * @returns true if valid encoding name
      */
     function isNameValid(nameStr: string): boolean;
+    /**
+     * Coerce a possibly-stringly-typed encoding value into the AudioEncoding enum.
+     *
+     * - enum / number → returned as-is (already AudioEncoding-shaped)
+     * - string (case-insensitive, e.g. 'linear16', 'LINEAR16') → converted via {@link fromName}.
+     *   Invokes `onStringInput` with a warning message so callers can route it
+     *   to their preferred logger.
+     * - invalid string → throws (preferred over silent fallback so typos surface)
+     * - undefined → defaults to {@link AudioEncoding.LINEAR16}
+     *
+     * Always normalize at the SDK / server boundary so downstream code can rely
+     * on a numeric AudioEncoding (the wire-level binary frame header is uint32).
+     */
+    function coerce(value: AudioEncoding | string | number | undefined, onStringInput?: (warning: string) => void): AudioEncoding;
 }
 /**
  * Common sample rates (in Hz)
@@ -1636,10 +1652,10 @@ interface ASRRequestConfig {
      * doesn't respond with is_final=true after stopRecording().
      *
      * - aggressive: 100ms - fast response, may cut off slow providers
-     * - balanced: 500ms - current default, good for most cases
-     * - conservative: 1000ms - wait longer for complex utterances
+     * - balanced: 500ms - good for most cases
+     * - conservative: 1000ms - current default, wait longer for complex utterances
      *
-     * @default 'balanced'
+     * @default 'conservative'
      * @see FinalTranscriptStability enum for detailed descriptions
      */
     finalTranscriptStability?: FinalTranscriptStability | string;
@@ -2159,6 +2175,11 @@ interface IRecognitionClientConfig {
  *
  * Main interface for real-time speech recognition clients.
  * Provides methods for connection management, audio streaming, and session control.
+ *
+ * NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
+ * so any method added here must also be implemented (typically as a delegate) by
+ * `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
+ * compile time — do not work around the error, add the delegate.
  */
 interface IRecognitionClient {
     /**
@@ -2173,6 +2194,33 @@ interface IRecognitionClient {
      * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
      */
     sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
+    /**
+     * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
+     * downsamples to the session's target rate (currently 16 kHz, set by the
+     * server validator) before transmitting.
+     *
+     * Use this when your capture pipeline produces audio at the system's
+     * native rate (browser `AudioContext` is typically 44.1 kHz or 48 kHz).
+     * If your audio is already at the target rate, prefer `sendAudio()` to
+     * skip the resample step.
+     *
+     * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
+     * mixed to mono by the caller.
+     *
+     * @param audioData - PCM16 mono audio at `sourceSampleRate`.
+     * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
+     */
+    sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
+    /**
+     * Send prefix audio (e.g. a TTS prompt) that primes the provider's language
+     * model before user audio is streamed. Chunks accepted — the server buffers
+     * until the session is READY and flushes. Must be sent BEFORE the first
+     * `sendAudio()` to take effect. Only meaningful when
+     * `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
+     *
+     * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
+     */
+    sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
     /**
      * Stop recording and wait for final transcript
      * The server will close the connection after sending the final transcript.
@@ -2378,6 +2426,29 @@ declare class RealTimeTwoWayWebSocketRecognitionClient extends WebSocketAudioCli
      */
     private connectWithRetry;
     sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
+    /**
+     * Send PCM16 mono audio captured at any sample rate. The SDK downsamples
+     * to the session's target rate (currently 16 kHz per server validator)
+     * before sending.
+     *
+     * Use this when your capture pipeline produces audio at the system's
+     * native rate — `AudioContext` defaults to 44.1 kHz or 48 kHz on most
+     * desktop/mobile hardware — and you don't want to bring your own
+     * resampler. If your audio is already at the target rate, prefer
+     * `sendAudio()` to skip the resample step.
+     *
+     * Algorithm: box-filter averaging (see audio-resampler.ts). Cheap, no
+     * dependencies, has a built-in low-pass effect so aliasing stays out of
+     * the speech band. Suitable for ASR; not a substitute for a high-quality
+     * resampler if you're doing music or full-fidelity processing.
+     *
+     * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
+     * mixed to mono by the caller.
+     *
+     * @param audioData - PCM16 mono audio at `sourceSampleRate`.
+     * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
+     */
+    sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
     private sendAudioInternal;
     /**
      * Only active ehwne client is in READY state. otherwise it will return immediately.
@@ -2690,7 +2761,42 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
     finalConfidence: z.ZodOptional<z.ZodNumber>;
     voiceEnd: z.ZodOptional<z.ZodNumber>;
     lastNonSilence: z.ZodOptional<z.ZodNumber>;
+    accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
     asrConfig: z.ZodOptional<z.ZodString>;
+    sessionConfigured: z.ZodOptional<z.ZodObject<{
+        type: z.ZodLiteral<RecognitionResultTypeV1.SESSION_CONFIGURED>;
+        audioUtteranceId: z.ZodString;
+        provider: z.ZodOptional<z.ZodString>;
+        model: z.ZodOptional<z.ZodString>;
+        sampleRate: z.ZodOptional<z.ZodNumber>;
+        encoding: z.ZodOptional<z.ZodString>;
+        apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
+        isFallback: z.ZodOptional<z.ZodBoolean>;
+        asrRequest: z.ZodOptional<z.ZodString>;
+        providerConfig: z.ZodOptional<z.ZodString>;
+    }, "strip", z.ZodTypeAny, {
+        type: RecognitionResultTypeV1.SESSION_CONFIGURED;
+        audioUtteranceId: string;
+        provider?: string | undefined;
+        model?: string | undefined;
+        sampleRate?: number | undefined;
+        encoding?: string | undefined;
+        apiType?: ASRApiType | undefined;
+        isFallback?: boolean | undefined;
+        asrRequest?: string | undefined;
+        providerConfig?: string | undefined;
+    }, {
+        type: RecognitionResultTypeV1.SESSION_CONFIGURED;
+        audioUtteranceId: string;
+        provider?: string | undefined;
+        model?: string | undefined;
+        sampleRate?: number | undefined;
+        encoding?: string | undefined;
+        apiType?: ASRApiType | undefined;
+        isFallback?: boolean | undefined;
+        asrRequest?: string | undefined;
+        providerConfig?: string | undefined;
+    }>>;
     startRecordingTimestamp: z.ZodOptional<z.ZodString>;
     finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
     finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
@@ -2700,6 +2806,28 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
     functionCallConfidence: z.ZodOptional<z.ZodNumber>;
     finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
     promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
+    promptSTT: z.ZodOptional<z.ZodString>;
+    promptSTF: z.ZodOptional<z.ZodString>;
+    promptTTF: z.ZodOptional<z.ZodString>;
+    detections: z.ZodOptional<z.ZodArray<z.ZodObject<{
+        type: z.ZodNativeEnum<typeof DetectionTypeV1>;
+        query: z.ZodString;
+        score: z.ZodNumber;
+        startMs: z.ZodOptional<z.ZodNumber>;
+        endMs: z.ZodOptional<z.ZodNumber>;
+    }, "strip", z.ZodTypeAny, {
+        type: DetectionTypeV1;
+        query: string;
+        score: number;
+        startMs?: number | undefined;
+        endMs?: number | undefined;
+    }, {
+        type: DetectionTypeV1;
+        query: string;
+        score: number;
+        startMs?: number | undefined;
+        endMs?: number | undefined;
+    }>, "many">>;
     recognitionActionProcessingState: z.ZodOptional<z.ZodString>;
 }, "strip", z.ZodTypeAny, {
     audioUtteranceId: string;
@@ -2710,7 +2838,20 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
     finalConfidence?: number | undefined;
     voiceEnd?: number | undefined;
     lastNonSilence?: number | undefined;
+    accumulatedAudioTimeMs?: number | undefined;
     asrConfig?: string | undefined;
+    sessionConfigured?: {
+        type: RecognitionResultTypeV1.SESSION_CONFIGURED;
+        audioUtteranceId: string;
+        provider?: string | undefined;
+        model?: string | undefined;
+        sampleRate?: number | undefined;
+        encoding?: string | undefined;
+        apiType?: ASRApiType | undefined;
+        isFallback?: boolean | undefined;
+        asrRequest?: string | undefined;
+        providerConfig?: string | undefined;
+    } | undefined;
     startRecordingTimestamp?: string | undefined;
     finalRecordingTimestamp?: string | undefined;
     finalTranscriptionTimestamp?: string | undefined;
@@ -2719,6 +2860,16 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
     functionCallConfidence?: number | undefined;
     finalFunctionCallTimestamp?: string | undefined;
     promptSlotMap?: Record<string, string[]> | undefined;
+    promptSTT?: string | undefined;
+    promptSTF?: string | undefined;
+    promptTTF?: string | undefined;
+    detections?: {
+        type: DetectionTypeV1;
+        query: string;
+        score: number;
+        startMs?: number | undefined;
+        endMs?: number | undefined;
+    }[] | undefined;
     recognitionActionProcessingState?: string | undefined;
 }, {
     audioUtteranceId: string;
@@ -2728,7 +2879,20 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
     finalConfidence?: number | undefined;
     voiceEnd?: number | undefined;
     lastNonSilence?: number | undefined;
+    accumulatedAudioTimeMs?: number | undefined;
     asrConfig?: string | undefined;
+    sessionConfigured?: {
+        type: RecognitionResultTypeV1.SESSION_CONFIGURED;
+        audioUtteranceId: string;
+        provider?: string | undefined;
+        model?: string | undefined;
+        sampleRate?: number | undefined;
+        encoding?: string | undefined;
+        apiType?: ASRApiType | undefined;
+        isFallback?: boolean | undefined;
+        asrRequest?: string | undefined;
+        providerConfig?: string | undefined;
+    } | undefined;
     startRecordingTimestamp?: string | undefined;
     finalRecordingTimestamp?: string | undefined;
     finalTranscriptionTimestamp?: string | undefined;
@@ -2738,6 +2902,16 @@ declare const RecognitionVGFStateSchema: z.ZodObject<{
     functionCallConfidence?: number | undefined;
     finalFunctionCallTimestamp?: string | undefined;
     promptSlotMap?: Record<string, string[]> | undefined;
+    promptSTT?: string | undefined;
+    promptSTF?: string | undefined;
+    promptTTF?: string | undefined;
+    detections?: {
+        type: DetectionTypeV1;
+        query: string;
+        score: number;
+        startMs?: number | undefined;
+        endMs?: number | undefined;
+    }[] | undefined;
     recognitionActionProcessingState?: string | undefined;
 }>;
 type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
@@ -2787,94 +2961,22 @@ interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
 /**
  * Interface for SimplifiedVGFRecognitionClient
  *
- * A simplified client that maintains VGF state for game developers.
- * All methods from the underlying client are available, plus VGF state management.
+ * Inherits the full IRecognitionClient surface (connect, sendAudio,
+ * sendAudioWithSampleRate, sendPrefixAudio, stopRecording, stopAbnormally,
+ * status checks, sendGameContext, getStats, getUrl, getState, getAudioUtteranceId)
+ * — see recognition-client.types.ts for those. Adds VGF-specific state access.
+ *
+ * Extending IRecognitionClient (rather than redeclaring methods) means
+ * TypeScript catches any base-client method that's not delegated by the
+ * VGF wrapper at compile time — keeps the two surfaces in sync.
  */
-interface ISimplifiedVGFRecognitionClient {
-    /**
-     * Connect to the recognition service WebSocket
-     * @returns Promise that resolves when connected and ready
-     */
-    connect(): Promise<void>;
-    /**
-     * Send audio data for transcription
-     * @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
-     */
-    sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
-    /**
-     * Stop recording and wait for final transcription
-     * @returns Promise that resolves when transcription is complete
-     */
-    stopRecording(): Promise<void>;
-    /**
-     * Force stop and immediately close connection without waiting for server
-     *
-     * WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
-     * - Does NOT wait for server to process remaining audio
-     * - Does NOT receive final transcript from server (VGF state set to empty)
-     * - Immediately closes WebSocket connection
-     * - Cleans up resources (buffers, listeners)
-     *
-     * Use Cases:
-     * - User explicitly cancels/abandons the session
-     * - Timeout scenarios where waiting is not acceptable
-     * - Need immediate cleanup and can't wait for server
-     *
-     * RECOMMENDED: Use stopRecording() for normal shutdown.
-     * Only use this when immediate disconnection is required.
-     */
-    stopAbnormally(): void;
+interface ISimplifiedVGFRecognitionClient extends IRecognitionClient {
     /**
-     * Get the current VGF recognition state
+     * Get the current VGF recognition state — the single shared store
+     * of inputs and outputs for this utterance.
      * @returns Current RecognitionState with all transcription data
      */
     getVGFState(): RecognitionState;
-    /**
-     * Check if connected to the WebSocket
-     */
-    isConnected(): boolean;
-    /**
-     * Check if currently connecting
-     */
-    isConnecting(): boolean;
-    /**
-     * Check if currently stopping
-     */
-    isStopping(): boolean;
-    /**
-     * Check if transcription has finished
-     */
-    isTranscriptionFinished(): boolean;
-    /**
-     * Check if the audio buffer has overflowed
-     */
-    isBufferOverflowing(): boolean;
-    /**
-     * Send game context after connection is established (for preconnect flow).
-     *
-     * Preconnect flow: Create client with asrRequestConfig (useContext: true) but
-     * WITHOUT gameContext → call connect() → later call sendGameContext() with slotMap.
-     *
-     * @param context - Game context including slotMap for keyword boosting
-     */
-    sendGameContext(context: GameContextV1): void;
-    /**
-     * Check if server has sent READY signal (provider connected, ready for audio).
-     * In preconnect flow, this becomes true after sendGameContext() triggers provider attachment.
-     */
-    isServerReady(): boolean;
-    /**
-     * Get the audio utterance ID for this session
-     */
-    getAudioUtteranceId(): string;
-    /**
-     * Get the WebSocket URL being used
-     */
-    getUrl(): string;
-    /**
-     * Get the underlying client state (for advanced usage)
-     */
-    getState(): ClientState;
 }
 /**
  * This wrapper ONLY maintains VGF state as a sink.
@@ -2891,6 +2993,15 @@ declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitio
     constructor(config: SimplifiedVGFClientConfig);
     connect(): Promise<void>;
     sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
+    sendAudioWithSampleRate(audioData: ArrayBuffer | ArrayBufferView | Blob, sourceSampleRate: number): void;
+    sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
+    getStats(): IRecognitionClientStats;
+    /**
+     * Set VGF recording status to RECORDING on the first audio chunk.
+     * Idempotent — subsequent calls are no-ops until disconnect/stop resets
+     * `isRecordingAudio`.
+     */
+    private markRecordingStarted;
     stopRecording(): Promise<void>;
     stopAbnormally(): void;
     getAudioUtteranceId(): string;