npm - @volley/recognition-client-sdk - Versions diffs - 0.1.782 → 0.1.800 - Mend

@volley/recognition-client-sdk 0.1.782 → 0.1.800

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/browser.bundled.d.ts +75 -4
package/dist/index.bundled.d.ts +198 -87
package/dist/index.js +191 -20
package/dist/index.js.map +4 -4
package/dist/recog-client-sdk.browser.js +95 -4
package/dist/recog-client-sdk.browser.js.map +4 -4
package/dist/recognition-client.d.ts +23 -0
package/dist/recognition-client.d.ts.map +1 -1
package/dist/recognition-client.types.d.ts +32 -0
package/dist/recognition-client.types.d.ts.map +1 -1
package/dist/simplified-vgf-recognition-client.d.ts +22 -85
package/dist/simplified-vgf-recognition-client.d.ts.map +1 -1
package/dist/utils/audio-resampler.d.ts +32 -0
package/dist/utils/audio-resampler.d.ts.map +1 -0
package/dist/vgf-recognition-mapper.d.ts +9 -17
package/dist/vgf-recognition-mapper.d.ts.map +1 -1
package/dist/vgf-recognition-state.d.ts +103 -0
package/dist/vgf-recognition-state.d.ts.map +1 -1
package/package.json +1 -1
package/src/index.spec.ts +2 -0
package/src/recognition-client.ts +65 -7
package/src/recognition-client.types.ts +37 -0
package/src/simplified-vgf-recognition-client.spec.ts +0 -27
package/src/simplified-vgf-recognition-client.ts +97 -127
package/src/utils/audio-resampler.spec.ts +69 -0
package/src/utils/audio-resampler.ts +79 -0
package/src/vgf-recognition-mapper.spec.ts +143 -0
package/src/vgf-recognition-mapper.ts +35 -45
package/src/vgf-recognition-state.ts +19 -1

package/src/recognition-client.types.ts CHANGED Viewed

@@ -223,6 +223,11 @@ export interface IRecognitionClientConfig {
  *
  * Main interface for real-time speech recognition clients.
  * Provides methods for connection management, audio streaming, and session control.
+ *
+ * NOTE for maintainers: `ISimplifiedVGFRecognitionClient` extends this interface,
+ * so any method added here must also be implemented (typically as a delegate) by
+ * `SimplifiedVGFRecognitionClient`. TypeScript will flag missing delegates at
+ * compile time — do not work around the error, add the delegate.
  */
 export interface IRecognitionClient {
   /**
@@ -239,6 +244,38 @@ export interface IRecognitionClient {
    */
   sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
+  /**
+   * Send PCM16 mono audio captured at `sourceSampleRate`; the SDK
+   * downsamples to the session's target rate (currently 16 kHz, set by the
+   * server validator) before transmitting.
+   *
+   * Use this when your capture pipeline produces audio at the system's
+   * native rate (browser `AudioContext` is typically 44.1 kHz or 48 kHz).
+   * If your audio is already at the target rate, prefer `sendAudio()` to
+   * skip the resample step.
+   *
+   * Audio must be signed 16-bit little-endian PCM, mono. Stereo must be
+   * mixed to mono by the caller.
+   *
+   * @param audioData - PCM16 mono audio at `sourceSampleRate`.
+   * @param sourceSampleRate - Source sample rate in Hz (e.g. 44100, 48000).
+   */
+  sendAudioWithSampleRate(
+    audioData: ArrayBuffer | ArrayBufferView | Blob,
+    sourceSampleRate: number
+  ): void;
+  /**
+   * Send prefix audio (e.g. a TTS prompt) that primes the provider's language
+   * model before user audio is streamed. Chunks accepted — the server buffers
+   * until the session is READY and flushes. Must be sent BEFORE the first
+   * `sendAudio()` to take effect. Only meaningful when
+   * `asrRequestConfig.prefixMode === PrefixMode.CLIENT`.
+   *
+   * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
+   */
+  sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
   /**
    * Stop recording and wait for final transcript
    * The server will close the connection after sending the final transcript.

package/src/simplified-vgf-recognition-client.spec.ts CHANGED Viewed

@@ -766,33 +766,6 @@ describe('SimplifiedVGFRecognitionClient', () => {
       });
     });
-    it('should warn if promptSlotMap exists but no gameContext provided', () => {
-      const logger = jest.fn();
-      const initialState: RecognitionState = {
-        audioUtteranceId: 'test-123',
-        pendingTranscript: '', // Required field
-        promptSlotMap: {
-          'entity1': ['value1']
-        }
-      };
-      simplifiedClient = new SimplifiedVGFRecognitionClient({
-        asrRequestConfig: {
-          provider: 'deepgram',
-          language: 'en',
-          sampleRate: 16000,
-          encoding: AudioEncoding.LINEAR16
-        },
-        initialState,
-        logger
-      });
-      expect(logger).toHaveBeenCalledWith(
-        'warn',
-        '[VGF] promptSlotMap found but no gameContext provided. SlotMap will not be sent.'
-      );
-    });
     it('should preserve promptSlotMap throughout state changes', () => {
       const initialState: RecognitionState = {
         audioUtteranceId: 'test-123',

package/src/simplified-vgf-recognition-client.ts CHANGED Viewed

@@ -17,17 +17,19 @@ import {
 import {
     IRecognitionClient,
     IRecognitionClientConfig,
+    IRecognitionClientStats,
     ClientState
 } from './recognition-client.types.js';
 import { RealTimeTwoWayWebSocketRecognitionClient } from './recognition-client.js';
 import {
     createVGFStateFromConfig,
     mapTranscriptionResultToState,
+    mapSessionConfiguredToState,
     mapErrorToState,
     updateStateOnStop,
     resetRecognitionVGFState
 } from './vgf-recognition-mapper.js';
-import { RecognitionContextTypeV1, type GameContextV1 } from '@recog/shared-types';
+import type { GameContextV1 } from '@recog/shared-types';
 /**
  * Configuration for SimplifiedVGFRecognitionClient
@@ -49,114 +51,22 @@ export interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
 /**
  * Interface for SimplifiedVGFRecognitionClient
  *
- * A simplified client that maintains VGF state for game developers.
- * All methods from the underlying client are available, plus VGF state management.
+ * Inherits the full IRecognitionClient surface (connect, sendAudio,
+ * sendAudioWithSampleRate, sendPrefixAudio, stopRecording, stopAbnormally,
+ * status checks, sendGameContext, getStats, getUrl, getState, getAudioUtteranceId)
+ * — see recognition-client.types.ts for those. Adds VGF-specific state access.
+ *
+ * Extending IRecognitionClient (rather than redeclaring methods) means
+ * TypeScript catches any base-client method that's not delegated by the
+ * VGF wrapper at compile time — keeps the two surfaces in sync.
  */
-export interface ISimplifiedVGFRecognitionClient {
-    // ============= Core Connection Methods =============
-    /**
-     * Connect to the recognition service WebSocket
-     * @returns Promise that resolves when connected and ready
-     */
-    connect(): Promise<void>;
-    /**
-     * Send audio data for transcription
-     * @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
-     */
-    sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
-    /**
-     * Stop recording and wait for final transcription
-     * @returns Promise that resolves when transcription is complete
-     */
-    stopRecording(): Promise<void>;
+export interface ISimplifiedVGFRecognitionClient extends IRecognitionClient {
     /**
-     * Force stop and immediately close connection without waiting for server
-     *
-     * WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
-     * - Does NOT wait for server to process remaining audio
-     * - Does NOT receive final transcript from server (VGF state set to empty)
-     * - Immediately closes WebSocket connection
-     * - Cleans up resources (buffers, listeners)
-     *
-     * Use Cases:
-     * - User explicitly cancels/abandons the session
-     * - Timeout scenarios where waiting is not acceptable
-     * - Need immediate cleanup and can't wait for server
-     *
-     * RECOMMENDED: Use stopRecording() for normal shutdown.
-     * Only use this when immediate disconnection is required.
-     */
-    stopAbnormally(): void;
-    // ============= VGF State Methods =============
-    /**
-     * Get the current VGF recognition state
+     * Get the current VGF recognition state — the single shared store
+     * of inputs and outputs for this utterance.
      * @returns Current RecognitionState with all transcription data
      */
     getVGFState(): RecognitionState;
-    // ============= Status Check Methods =============
-    /**
-     * Check if connected to the WebSocket
-     */
-    isConnected(): boolean;
-    /**
-     * Check if currently connecting
-     */
-    isConnecting(): boolean;
-    /**
-     * Check if currently stopping
-     */
-    isStopping(): boolean;
-    /**
-     * Check if transcription has finished
-     */
-    isTranscriptionFinished(): boolean;
-    /**
-     * Check if the audio buffer has overflowed
-     */
-    isBufferOverflowing(): boolean;
-    // ============= Preconnect Methods =============
-    /**
-     * Send game context after connection is established (for preconnect flow).
-     *
-     * Preconnect flow: Create client with asrRequestConfig (useContext: true) but
-     * WITHOUT gameContext → call connect() → later call sendGameContext() with slotMap.
-     *
-     * @param context - Game context including slotMap for keyword boosting
-     */
-    sendGameContext(context: GameContextV1): void;
-    /**
-     * Check if server has sent READY signal (provider connected, ready for audio).
-     * In preconnect flow, this becomes true after sendGameContext() triggers provider attachment.
-     */
-    isServerReady(): boolean;
-    // ============= Utility Methods =============
-    /**
-     * Get the audio utterance ID for this session
-     */
-    getAudioUtteranceId(): string;
-    /**
-     * Get the WebSocket URL being used
-     */
-    getUrl(): string;
-    /**
-     * Get the underlying client state (for advanced usage)
-     */
-    getState(): ClientState;
 }
 /**
@@ -229,23 +139,38 @@ export class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognition
         // Track the expected UUID for this session
         this.expectedUuid = this.state.audioUtteranceId;
-        // If VGF state has promptSlotMap, configure gameContext to use it
-        if (this.state.promptSlotMap) {
+        // Forward optional VGF inputs (promptSlotMap, promptSTT/STF/TTF) into the GameContext.
+        const hasPromptInputs =
+            this.state.promptSlotMap !== undefined ||
+            this.state.promptSTT !== undefined ||
+            this.state.promptSTF !== undefined ||
+            this.state.promptTTF !== undefined;
+        if (hasPromptInputs) {
             // Set useContext=true in ASR config to enable context processing
             if (clientConfig.asrRequestConfig) {
                 clientConfig.asrRequestConfig.useContext = true;
             }
-            // Add promptSlotMap to gameContext
             if (!clientConfig.gameContext) {
                 // Only create gameContext if we have gameId and gamePhase
                 // These should come from the game's configuration
                 if (clientConfig.logger) {
-                    clientConfig.logger('warn', '[VGF] promptSlotMap found but no gameContext provided. SlotMap will not be sent.');
+                    clientConfig.logger('warn', '[VGF] prompt inputs found but no gameContext provided. They will not be sent.');
                 }
             } else {
-                // Merge promptSlotMap into existing gameContext
-                clientConfig.gameContext.slotMap = this.state.promptSlotMap;
+                if (this.state.promptSlotMap !== undefined) {
+                    clientConfig.gameContext.slotMap = this.state.promptSlotMap;
+                }
+                if (this.state.promptSTT !== undefined) {
+                    clientConfig.gameContext.promptSTT = this.state.promptSTT;
+                }
+                if (this.state.promptSTF !== undefined) {
+                    clientConfig.gameContext.promptSTF = this.state.promptSTF;
+                }
+                if (this.state.promptTTF !== undefined) {
+                    clientConfig.gameContext.promptTTF = this.state.promptTTF;
+                }
             }
         }
@@ -254,7 +179,7 @@ export class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognition
             ...clientConfig,
             // These callbacks ONLY update the VGF state sink
-            onTranscript: (result) => {
+            onTranscript: (result): void => {
                 // Skip update if UUID doesn't match (stale callback from previous session)
                 if (result.audioUtteranceId && result.audioUtteranceId !== this.expectedUuid) {
                     if (this.logger) {
@@ -275,7 +200,7 @@ export class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognition
                 }
             },
-            onMetadata: (metadata) => {
+            onMetadata: (metadata): void => {
                 // Skip update if UUID doesn't match (stale callback from previous session)
                 if (metadata.audioUtteranceId && metadata.audioUtteranceId !== this.expectedUuid) {
                     if (this.logger) {
@@ -291,14 +216,33 @@ export class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognition
                 }
             },
-            onFunctionCall: (result) => {
+            onSessionConfigured: (sessionConfigured): void => {
+                // Skip update if UUID doesn't match (stale callback from previous session)
+                if (sessionConfigured.audioUtteranceId && sessionConfigured.audioUtteranceId !== this.expectedUuid) {
+                    if (this.logger) {
+                        this.logger('warn',
+                            `[RecogSDK:VGF] Skipping sessionConfigured update: UUID mismatch (expected: ${this.expectedUuid}, got: ${sessionConfigured.audioUtteranceId})`
+                        );
+                    }
+                    return;
+                }
+                this.state = mapSessionConfiguredToState(this.state, sessionConfigured);
+                this.notifyStateChange();
+                if (clientConfig.onSessionConfigured) {
+                    clientConfig.onSessionConfigured(sessionConfigured);
+                }
+            },
+            onFunctionCall: (result): void => {
                 // Pass through function call - no VGF state changes needed for P2 feature
                 if (clientConfig.onFunctionCall) {
                     clientConfig.onFunctionCall(result);
                 }
             },
-            onError: (error) => {
+            onError: (error): void => {
                 // Skip update if UUID doesn't match (stale callback from previous session)
                 if (error.audioUtteranceId && error.audioUtteranceId !== this.expectedUuid) {
                     if (this.logger) {
@@ -310,7 +254,7 @@ export class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognition
                 }
                 this.isRecordingAudio = false; // Reset on error
-                this.state = mapErrorToState(this.state, error);
+                this.state = mapErrorToState(this.state);
                 this.notifyStateChange();
                 if (clientConfig.onError) {
@@ -318,14 +262,14 @@ export class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognition
                 }
             },
-            onConnected: () => {
+            onConnected: (): void => {
                 // Don't update READY here - client can accept audio before connection
                 if (clientConfig.onConnected) {
                     clientConfig.onConnected();
                 }
             },
-            onDisconnected: (code, reason) => {
+            onDisconnected: (code, reason): void => {
                 this.isRecordingAudio = false; // Reset on disconnect
                 if (clientConfig.onDisconnected) {
                     clientConfig.onDisconnected(code, reason);
@@ -343,19 +287,45 @@ export class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognition
     }
     sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void {
-        // Track recording for state updates
-        if (!this.isRecordingAudio) {
-            this.isRecordingAudio = true;
-            this.state = {
-                ...this.state,
-                startRecordingStatus: 'RECORDING',
-                startRecordingTimestamp: new Date().toISOString()
-            };
-            this.notifyStateChange();
-        }
+        this.markRecordingStarted();
         this.client.sendAudio(audioData);
     }
+    sendAudioWithSampleRate(
+        audioData: ArrayBuffer | ArrayBufferView | Blob,
+        sourceSampleRate: number
+    ): void {
+        this.markRecordingStarted();
+        this.client.sendAudioWithSampleRate(audioData, sourceSampleRate);
+    }
+    sendPrefixAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void {
+        // Pure pass-through. Prefix audio is NOT user audio, so do not flip
+        // startRecordingStatus to RECORDING — that transition belongs to the
+        // first sendAudio() call.
+        this.client.sendPrefixAudio(audioData);
+    }
+    getStats(): IRecognitionClientStats {
+        return this.client.getStats();
+    }
+    /**
+     * Set VGF recording status to RECORDING on the first audio chunk.
+     * Idempotent — subsequent calls are no-ops until disconnect/stop resets
+     * `isRecordingAudio`.
+     */
+    private markRecordingStarted(): void {
+        if (this.isRecordingAudio) return;
+        this.isRecordingAudio = true;
+        this.state = {
+            ...this.state,
+            startRecordingStatus: 'RECORDING',
+            startRecordingTimestamp: new Date().toISOString()
+        };
+        this.notifyStateChange();
+    }
     async stopRecording(): Promise<void> {
         this.isRecordingAudio = false;
         this.state = updateStateOnStop(this.state);

package/src/utils/audio-resampler.spec.ts ADDED Viewed

@@ -0,0 +1,69 @@
+import { downsamplePcm16 } from './audio-resampler.js';
+// Helper: build an Int16Array from numbers and return its backing buffer.
+const buf = (samples: number[]): ArrayBuffer => new Int16Array(samples).slice().buffer;
+const samples = (b: ArrayBuffer): number[] => Array.from(new Int16Array(b));
+describe('downsamplePcm16', () => {
+  it('returns a defensive copy when srcRate === targetRate', () => {
+    const input = new Int16Array([100, 200, 300, 400]);
+    const out = downsamplePcm16(input, 16000, 16000);
+    expect(samples(out)).toEqual([100, 200, 300, 400]);
+    // Mutating the input must not affect the returned buffer.
+    input[0] = 9999;
+    expect(samples(out)[0]).toBe(100);
+  });
+  it('integer ratio 48000 → 16000 averages every 3 source samples', () => {
+    // 9 source samples → 3 output samples (ratio = 3)
+    const input = buf([0, 6, 12, 30, 60, 90, -3, -6, -9]);
+    const out = downsamplePcm16(input, 48000, 16000);
+    expect(samples(out)).toEqual([
+      Math.round((0 + 6 + 12) / 3), // 6
+      Math.round((30 + 60 + 90) / 3), // 60
+      Math.round((-3 + -6 + -9) / 3), // -6
+    ]);
+  });
+  it('fractional ratio 44100 → 16000 produces ~44100/16000 output samples', () => {
+    // 441 source samples at 44.1kHz ≈ 10ms; expect ~160 output samples at 16kHz.
+    const input = new Int16Array(441).fill(1000);
+    const out = downsamplePcm16(input, 44100, 16000);
+    const outArr = new Int16Array(out);
+    expect(outArr.length).toBe(Math.floor(441 / (44100 / 16000))); // = 160
+    // Constant input should produce constant output (within rounding).
+    for (let i = 0; i < outArr.length; i++) {
+      expect(outArr[i]).toBe(1000);
+    }
+  });
+  it('accepts ArrayBuffer input as well as typed-array view', () => {
+    const view = new Int16Array([10, 20, 30, 40, 50, 60]);
+    const fromView = samples(downsamplePcm16(view, 48000, 16000));
+    const fromBuf = samples(downsamplePcm16(view.buffer, 48000, 16000));
+    expect(fromBuf).toEqual(fromView);
+  });
+  it('handles a typed-array view that shares a larger backing buffer', () => {
+    // Simulate a slice from a bigger capture buffer — only the view's bytes
+    // should be considered, not the rest of the underlying ArrayBuffer.
+    const big = new Int16Array([99, 99, 0, 6, 12, 30, 60, 90, 99, 99]);
+    const slice = new Int16Array(big.buffer, 2 * Int16Array.BYTES_PER_ELEMENT, 6);
+    const out = downsamplePcm16(slice, 48000, 16000);
+    expect(samples(out)).toEqual([
+      Math.round((0 + 6 + 12) / 3),
+      Math.round((30 + 60 + 90) / 3),
+    ]);
+  });
+  it('returns an empty buffer for empty input', () => {
+    const out = downsamplePcm16(new Int16Array(0), 48000, 16000);
+    expect(new Int16Array(out).length).toBe(0);
+  });
+  it('throws when asked to upsample', () => {
+    expect(() => downsamplePcm16(new Int16Array([1, 2]), 8000, 16000)).toThrow(
+      /cannot upsample/i
+    );
+  });
+});

package/src/utils/audio-resampler.ts ADDED Viewed

@@ -0,0 +1,79 @@
+/**
+ * Downsample PCM16 mono audio to a target sample rate.
+ *
+ * Used by `sendAudioWithSampleRate()` so integrators whose capture pipeline
+ * produces audio at the system's native rate (AudioContext defaults to
+ * 44.1 kHz or 48 kHz on most desktop/mobile hardware) can hand raw bytes
+ * to the SDK without having to bring in their own resampler. The
+ * recognition-service `SampleRateValidator` accepts only 16 kHz, so the SDK
+ * resamples on the client side before sending.
+ *
+ * Algorithm: box-filter averaging. For each output sample we average the
+ * source samples that fall into its time window. This is the cheapest
+ * correct approach for speech ASR — it has a built-in low-pass effect that
+ * suppresses aliasing far better than naive decimation or linear
+ * interpolation, while staying O(n) with no FFT and no dependencies.
+ * For integer ratios (e.g. 48000 → 16000, ratio = 3) it degenerates to a
+ * plain 3-sample average; for fractional ratios (e.g. 44100 → 16000) the
+ * window count varies by ±1 across output samples.
+ *
+ * Assumes the input is signed 16-bit little-endian PCM (the SDK's
+ * documented `AudioEncoding.LINEAR16` input format). Mono only. Stereo
+ * audio must be mixed to mono by the caller.
+ *
+ * @param input - Source PCM16 audio (ArrayBuffer or any ArrayBufferView).
+ * @param srcRate - Source sample rate in Hz (e.g. 44100, 48000).
+ * @param targetRate - Target sample rate in Hz. Must be ≤ srcRate.
+ * @returns A new ArrayBuffer of PCM16 samples at `targetRate`.
+ * @throws Error if `targetRate > srcRate` (upsampling is not supported —
+ *   capture at ≥ targetRate instead).
+ */
+export function downsamplePcm16(
+  input: ArrayBuffer | ArrayBufferView,
+  srcRate: number,
+  targetRate: number
+): ArrayBuffer {
+  if (targetRate > srcRate) {
+    throw new Error(
+      `downsamplePcm16: cannot upsample from ${srcRate}Hz to ${targetRate}Hz; ` +
+        `capture audio at ≥ ${targetRate}Hz instead.`
+    );
+  }
+  // Wrap input as Int16Array regardless of how it was passed in. Slice the
+  // backing ArrayBuffer to the exact byte range so a typed-array view that
+  // shares a larger buffer doesn't pull in neighbouring bytes.
+  const buffer = ArrayBuffer.isView(input)
+    ? input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength)
+    : input;
+  const src = new Int16Array(buffer);
+  if (srcRate === targetRate || src.length === 0) {
+    // Return a defensive copy so the caller's typed-array view can't be
+    // mutated through the returned buffer.
+    return src.slice().buffer;
+  }
+  const ratio = srcRate / targetRate;
+  const dstLen = Math.floor(src.length / ratio);
+  const dst = new Int16Array(dstLen);
+  for (let i = 0; i < dstLen; i++) {
+    const startPos = i * ratio;
+    const endPos = (i + 1) * ratio;
+    const startIdx = Math.floor(startPos);
+    const endIdx = Math.min(Math.ceil(endPos), src.length);
+    let sum = 0;
+    let count = 0;
+    for (let j = startIdx; j < endIdx; j++) {
+      // Bounds are guaranteed by the surrounding floor/ceil/min math, but
+      // noUncheckedIndexedAccess still narrows to `number | undefined`.
+      sum += src[j] ?? 0;
+      count++;
+    }
+    dst[i] = count > 0 ? Math.round(sum / count) : 0;
+  }
+  return dst.buffer;
+}