npm - @volley/recognition-client-sdk - Versions diffs - 0.1.200 - Mend

@volley/recognition-client-sdk 0.1.200

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +168 -0
package/dist/browser-CDQ_TzeH.d.ts +1039 -0
package/dist/index.d.ts +461 -0
package/dist/index.js +2332 -0
package/dist/index.js.map +1 -0
package/dist/recog-client-sdk.browser.d.ts +2 -0
package/dist/recog-client-sdk.browser.js +1843 -0
package/dist/recog-client-sdk.browser.js.map +1 -0
package/package.json +73 -0
package/src/browser.ts +24 -0
package/src/config-builder.ts +213 -0
package/src/factory.ts +43 -0
package/src/index.ts +86 -0
package/src/recognition-client.spec.ts +551 -0
package/src/recognition-client.ts +595 -0
package/src/recognition-client.types.ts +260 -0
package/src/simplified-vgf-recognition-client.spec.ts +671 -0
package/src/simplified-vgf-recognition-client.ts +339 -0
package/src/utils/audio-ring-buffer.ts +170 -0
package/src/utils/message-handler.ts +131 -0
package/src/utils/url-builder.ts +70 -0
package/src/vgf-recognition-mapper.ts +225 -0
package/src/vgf-recognition-state.ts +89 -0

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,461 @@
+import { A as ASRRequestConfig, G as GameContextV1, R as RecognitionCallbackUrl, T as TranscriptionResultV1, M as MetadataResultV1, E as ErrorResultV1, a as RealTimeTwoWayWebSocketRecognitionClientConfig, I as IRecognitionClient, C as ClientState, b as IRecognitionClientConfig } from './browser-CDQ_TzeH.js';
+export { k as ASRRequestV1, f as AudioEncoding, h as ControlSignal, h as ControlSignalTypeV1, D as DeepgramModel, F as FunctionCallResultV1, m as GoogleModel, e as IRecognitionClientStats, L as Language, c as RealTimeTwoWayWebSocketRecognitionClient, g as RecognitionContextTypeV1, l as RecognitionProvider, j as RecognitionResultTypeV1, S as SampleRate, d as TranscriptionResult, i as isNormalDisconnection } from './browser-CDQ_TzeH.js';
+import { z } from 'zod';
+/**
+ * Gemini Model Types
+ * Based on available models as of January 2025
+ *
+ * API Version Notes:
+ * - Gemini 2.5+ models: Use v1beta API (early access features)
+ * - Gemini 2.0 models: Use v1beta API (early access features)
+ * - Gemini 1.5 models: Use v1 API (stable, production-ready)
+ *
+ * @see https://ai.google.dev/gemini-api/docs/models
+ * @see https://ai.google.dev/gemini-api/docs/api-versions
+ */
+declare enum GeminiModel {
+    GEMINI_2_5_PRO = "gemini-2.5-pro",// State-of-the-art thinking model
+    GEMINI_2_5_FLASH = "gemini-2.5-flash",// Best price-performance balance
+    GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",// Fastest, most cost-efficient
+    GEMINI_2_0_FLASH_LATEST = "gemini-2.0-flash-latest",// Auto-updated to latest 2.0 flash
+    GEMINI_2_0_FLASH = "gemini-2.0-flash-002",// Specific stable version
+    GEMINI_2_0_FLASH_EXP = "gemini-2.0-flash-exp",// Experimental version
+    GEMINI_1_5_FLASH = "gemini-1.5-flash",
+    GEMINI_1_5_PRO = "gemini-1.5-pro"
+}
+/**
+ * OpenAI Model Types
+ */
+declare enum OpenAIModel {
+    WHISPER_1 = "whisper-1"
+}
+/**
+ * Standard stage/environment constants used across all services
+ */
+declare const STAGES: {
+    readonly LOCAL: "local";
+    readonly DEV: "dev";
+    readonly STAGING: "staging";
+    readonly PRODUCTION: "production";
+};
+type Stage = typeof STAGES[keyof typeof STAGES];
+/**
+ * Configuration Builder for Recognition Client
+ *
+ * Simple builder pattern for RealTimeTwoWayWebSocketRecognitionClientConfig
+ */
+/**
+ * Builder for RealTimeTwoWayWebSocketRecognitionClientConfig
+ *
+ * Provides a fluent API for building client configurations.
+ *
+ * Example:
+ * ```typescript
+ * const config = new ConfigBuilder()
+ *   .url('ws://localhost:3101/ws/v1/recognize')
+ *   .asrRequestConfig({
+ *     provider: RecognitionProvider.DEEPGRAM,
+ *     model: 'nova-2-general'
+ *   })
+ *   .onTranscript((result) => console.log(result))
+ *   .build();
+ * ```
+ */
+declare class ConfigBuilder {
+    private config;
+    /**
+     * Set the WebSocket URL
+     */
+    url(url: string): this;
+    /**
+     * Set ASR request configuration
+     */
+    asrRequestConfig(config: ASRRequestConfig): this;
+    /**
+     * Set game context
+     */
+    gameContext(context: GameContextV1): this;
+    /**
+     * Set audio utterance ID
+     */
+    audioUtteranceId(id: string): this;
+    /**
+     * Set callback URLs
+     */
+    callbackUrls(urls: RecognitionCallbackUrl[]): this;
+    /**
+     * Set user ID
+     */
+    userId(id: string): this;
+    /**
+     * Set game session ID
+     */
+    gameSessionId(id: string): this;
+    /**
+     * Set device ID
+     */
+    deviceId(id: string): this;
+    /**
+     * Set account ID
+     */
+    accountId(id: string): this;
+    /**
+     * Set question answer ID
+     */
+    questionAnswerId(id: string): this;
+    /**
+     * Set platform
+     */
+    platform(platform: string): this;
+    /**
+     * Set transcript callback
+     */
+    onTranscript(callback: (result: TranscriptionResultV1) => void): this;
+    /**
+     * Set metadata callback
+     */
+    onMetadata(callback: (metadata: MetadataResultV1) => void): this;
+    /**
+     * Set error callback
+     */
+    onError(callback: (error: ErrorResultV1) => void): this;
+    /**
+     * Set connected callback
+     */
+    onConnected(callback: () => void): this;
+    /**
+     * Set disconnected callback
+     */
+    onDisconnected(callback: (code: number, reason: string) => void): this;
+    /**
+     * Set high water mark
+     */
+    highWaterMark(bytes: number): this;
+    /**
+     * Set low water mark
+     */
+    lowWaterMark(bytes: number): this;
+    /**
+     * Set max buffer duration in seconds
+     */
+    maxBufferDurationSec(seconds: number): this;
+    /**
+     * Set chunks per second
+     */
+    chunksPerSecond(chunks: number): this;
+    /**
+     * Set logger function
+     */
+    logger(logger: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void): this;
+    /**
+     * Build the configuration
+     */
+    build(): RealTimeTwoWayWebSocketRecognitionClientConfig;
+}
+/**
+ * Factory function for creating Recognition Client instances
+ */
+/**
+ * Create a recognition client from a configuration object
+ *
+ * Example:
+ * ```typescript
+ * const client = createClient({
+ *   url: 'ws://localhost:3101/ws/v1/recognize',
+ *   onTranscript: (result) => console.log(result)
+ * });
+ * ```
+ */
+declare function createClient(config: RealTimeTwoWayWebSocketRecognitionClientConfig): IRecognitionClient;
+/**
+ * Create a recognition client using the builder pattern
+ *
+ * Example:
+ * ```typescript
+ * const client = createClientWithBuilder((builder) =>
+ *   builder
+ *     .url('ws://localhost:3101/ws/v1/recognize')
+ *     .onTranscript((result) => console.log(result))
+ *     .onError((error) => console.error(error))
+ * );
+ * ```
+ */
+declare function createClientWithBuilder(configure: (builder: ConfigBuilder) => ConfigBuilder): IRecognitionClient;
+/**
+ * VGF-style state schema for game-side recognition state/results management.
+ *
+ * This schema provides a standardized way for game developers to manage
+ * voice recognition state and results in their applications. It supports:
+ *
+ * STEP 1: Basic transcription flow
+ * STEP 2: Mic auto-stop upon correct answer (using partial transcripts)
+ * STEP 3: Semantic/function-call outcomes for game actions
+ *
+ * Ideally this should be part of a more centralized shared type library to free
+ * game developers and provide helper functions (VGF? Platform SDK?).
+ */
+declare const RecognitionVGFStateSchema: z.ZodObject<{
+    audioUtteranceId: z.ZodString;
+    startRecordingStatus: z.ZodOptional<z.ZodString>;
+    transcriptionStatus: z.ZodOptional<z.ZodString>;
+    finalTranscript: z.ZodOptional<z.ZodString>;
+    finalConfidence: z.ZodOptional<z.ZodNumber>;
+    asrConfig: z.ZodOptional<z.ZodString>;
+    startRecordingTimestamp: z.ZodOptional<z.ZodString>;
+    finalRecordingTimestamp: z.ZodOptional<z.ZodString>;
+    finalTranscriptionTimestamp: z.ZodOptional<z.ZodString>;
+    pendingTranscript: z.ZodDefault<z.ZodOptional<z.ZodString>>;
+    pendingConfidence: z.ZodOptional<z.ZodNumber>;
+    functionCallMetadata: z.ZodOptional<z.ZodString>;
+    functionCallConfidence: z.ZodOptional<z.ZodNumber>;
+    finalFunctionCallTimestamp: z.ZodOptional<z.ZodString>;
+    promptSlotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
+}, "strip", z.ZodTypeAny, {
+    audioUtteranceId: string;
+    pendingTranscript: string;
+    startRecordingStatus?: string | undefined;
+    transcriptionStatus?: string | undefined;
+    finalTranscript?: string | undefined;
+    finalConfidence?: number | undefined;
+    asrConfig?: string | undefined;
+    startRecordingTimestamp?: string | undefined;
+    finalRecordingTimestamp?: string | undefined;
+    finalTranscriptionTimestamp?: string | undefined;
+    pendingConfidence?: number | undefined;
+    functionCallMetadata?: string | undefined;
+    functionCallConfidence?: number | undefined;
+    finalFunctionCallTimestamp?: string | undefined;
+    promptSlotMap?: Record<string, string[]> | undefined;
+}, {
+    audioUtteranceId: string;
+    startRecordingStatus?: string | undefined;
+    transcriptionStatus?: string | undefined;
+    finalTranscript?: string | undefined;
+    finalConfidence?: number | undefined;
+    asrConfig?: string | undefined;
+    startRecordingTimestamp?: string | undefined;
+    finalRecordingTimestamp?: string | undefined;
+    finalTranscriptionTimestamp?: string | undefined;
+    pendingTranscript?: string | undefined;
+    pendingConfidence?: number | undefined;
+    functionCallMetadata?: string | undefined;
+    functionCallConfidence?: number | undefined;
+    finalFunctionCallTimestamp?: string | undefined;
+    promptSlotMap?: Record<string, string[]> | undefined;
+}>;
+type RecognitionState = z.infer<typeof RecognitionVGFStateSchema>;
+declare const RecordingStatus: {
+    readonly NOT_READY: "NOT_READY";
+    readonly READY: "READY";
+    readonly RECORDING: "RECORDING";
+    readonly FINISHED: "FINISHED";
+};
+type RecordingStatusType = typeof RecordingStatus[keyof typeof RecordingStatus];
+declare const TranscriptionStatus: {
+    readonly NOT_STARTED: "NOT_STARTED";
+    readonly IN_PROGRESS: "IN_PROGRESS";
+    readonly FINALIZED: "FINALIZED";
+    readonly ERROR: "ERROR";
+};
+type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus];
+declare function createInitialRecognitionState(audioUtteranceId: string): RecognitionState;
+declare function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean;
+/**
+ * Simplified VGF Recognition Client
+ *
+ * A thin wrapper around RealTimeTwoWayWebSocketRecognitionClient that maintains
+ * a VGF RecognitionState as a pure sink/output of recognition events.
+ *
+ * The VGF state is updated based on events but never influences client behavior.
+ * All functionality is delegated to the underlying client.
+ */
+/**
+ * Configuration for SimplifiedVGFRecognitionClient
+ */
+interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
+    /**
+     * Callback invoked whenever the VGF state changes
+     * Use this to update your UI or React state
+     */
+    onStateChange?: (state: RecognitionState) => void;
+    /**
+     * Optional initial state to restore from a previous session
+     * If provided, audioUtteranceId will be extracted and used
+     */
+    initialState?: RecognitionState;
+}
+/**
+ * Interface for SimplifiedVGFRecognitionClient
+ *
+ * A simplified client that maintains VGF state for game developers.
+ * All methods from the underlying client are available, plus VGF state management.
+ */
+interface ISimplifiedVGFRecognitionClient {
+    /**
+     * Connect to the recognition service WebSocket
+     * @returns Promise that resolves when connected and ready
+     */
+    connect(): Promise<void>;
+    /**
+     * Send audio data for transcription
+     * @param audioData - PCM audio data as ArrayBuffer or typed array
+     */
+    sendAudio(audioData: ArrayBuffer | ArrayBufferView): void;
+    /**
+     * Stop recording and wait for final transcription
+     * @returns Promise that resolves when transcription is complete
+     */
+    stopRecording(): Promise<void>;
+    /**
+     * Get the current VGF recognition state
+     * @returns Current RecognitionState with all transcription data
+     */
+    getVGFState(): RecognitionState;
+    /**
+     * Check if connected to the WebSocket
+     */
+    isConnected(): boolean;
+    /**
+     * Check if currently connecting
+     */
+    isConnecting(): boolean;
+    /**
+     * Check if currently stopping
+     */
+    isStopping(): boolean;
+    /**
+     * Check if transcription has finished
+     */
+    isTranscriptionFinished(): boolean;
+    /**
+     * Check if the audio buffer has overflowed
+     */
+    isBufferOverflowing(): boolean;
+    /**
+     * Get the audio utterance ID for this session
+     */
+    getAudioUtteranceId(): string;
+    /**
+     * Get the underlying client state (for advanced usage)
+     */
+    getState(): ClientState;
+}
+/**
+ * This wrapper ONLY maintains VGF state as a sink.
+ * All actual functionality is delegated to the underlying client.
+ */
+declare class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitionClient {
+    private client;
+    private state;
+    private isRecordingAudio;
+    private stateChangeCallback;
+    constructor(config: SimplifiedVGFClientConfig);
+    connect(): Promise<void>;
+    sendAudio(audioData: ArrayBuffer | ArrayBufferView): void;
+    stopRecording(): Promise<void>;
+    getAudioUtteranceId(): string;
+    getState(): ClientState;
+    isConnected(): boolean;
+    isConnecting(): boolean;
+    isStopping(): boolean;
+    isTranscriptionFinished(): boolean;
+    isBufferOverflowing(): boolean;
+    getVGFState(): RecognitionState;
+    private notifyStateChange;
+}
+/**
+ * Factory function for creating simplified client
+ * Usage examples:
+ *
+ * // Basic usage
+ * const client = createSimplifiedVGFClient({
+ *   asrRequestConfig: { provider: 'deepgram', language: 'en' },
+ *   onStateChange: (state) => {
+ *     console.log('VGF State updated:', state);
+ *     // Update React state, game UI, etc.
+ *   }
+ * });
+ *
+ * // With initial state (e.g., restoring from previous session)
+ * const client = createSimplifiedVGFClient({
+ *   asrRequestConfig: { provider: 'deepgram', language: 'en' },
+ *   initialState: previousState, // Will use audioUtteranceId from state
+ *   onStateChange: (state) => setVGFState(state)
+ * });
+ *
+ * // With initial state containing promptSlotMap for enhanced recognition
+ * const stateWithSlots: RecognitionState = {
+ *   audioUtteranceId: 'session-123',
+ *   promptSlotMap: {
+ *     'song_title': ['one time', 'baby'],
+ *     'artists': ['justin bieber']
+ *   }
+ * };
+ * const client = createSimplifiedVGFClient({
+ *   asrRequestConfig: { provider: 'deepgram', language: 'en' },
+ *   gameContext: {
+ *     type: RecognitionContextTypeV1.GAME_CONTEXT,
+ *     gameId: 'music-quiz',  // Your game's ID
+ *     gamePhase: 'song-guessing'  // Current game phase
+ *   },
+ *   initialState: stateWithSlots, // promptSlotMap will be added to gameContext
+ *   onStateChange: (state) => setVGFState(state)
+ * });
+ *
+ * await client.connect();
+ * client.sendAudio(audioData);
+ * // VGF state automatically updates based on transcription results
+ */
+declare function createSimplifiedVGFClient(config: SimplifiedVGFClientConfig): ISimplifiedVGFRecognitionClient;
+/**
+ * Base URL schema shared across service endpoint helpers.
+ */
+type ServiceBaseUrls = {
+    httpBase: string;
+    wsBase: string;
+};
+/**
+ * Base URL mappings keyed by stage.
+ */
+declare const RECOGNITION_SERVICE_BASES: Record<Stage, ServiceBaseUrls>;
+declare const RECOGNITION_CONDUCTOR_BASES: Record<Stage, ServiceBaseUrls>;
+/**
+ * Normalize arbitrary stage input into a known `Stage`, defaulting to `local`.
+ */
+declare function normalizeStage(input?: Stage | string | null | undefined): Stage;
+/**
+ * Resolve the recognition-service base URLs for a given stage.
+ */
+declare function getRecognitionServiceBase(stage?: Stage | string | null | undefined): ServiceBaseUrls;
+/**
+ * Convenience helper for retrieving the HTTP base URL.
+ */
+declare function getRecognitionServiceHttpBase(stage?: Stage | string | null | undefined): string;
+/**
+ * Convenience helper for retrieving the WebSocket base URL.
+ */
+declare function getRecognitionServiceWsBase(stage?: Stage | string | null | undefined): string;
+/**
+ * Expose hostname lookup separately for callers that need raw host strings.
+ */
+declare function getRecognitionServiceHost(stage?: Stage | string | null | undefined): string;
+/**
+ * Resolve the recognition-conductor base URLs for a given stage.
+ */
+declare function getRecognitionConductorBase(stage?: Stage | string | null | undefined): ServiceBaseUrls;
+declare function getRecognitionConductorHttpBase(stage?: Stage | string | null | undefined): string;
+declare function getRecognitionConductorWsBase(stage?: Stage | string | null | undefined): string;
+declare function getRecognitionConductorHost(stage?: Stage | string | null | undefined): string;
+export { ASRRequestConfig, ClientState, ConfigBuilder, ErrorResultV1, GameContextV1, GeminiModel, IRecognitionClient, IRecognitionClientConfig, type ISimplifiedVGFRecognitionClient, MetadataResultV1, OpenAIModel, RECOGNITION_CONDUCTOR_BASES, RECOGNITION_SERVICE_BASES, RealTimeTwoWayWebSocketRecognitionClientConfig, RecognitionCallbackUrl, type RecognitionState, RecognitionVGFStateSchema, RecordingStatus, type RecordingStatusType, type SimplifiedVGFClientConfig, SimplifiedVGFRecognitionClient, TranscriptionResultV1, TranscriptionStatus, type TranscriptionStatusType, createClient, createClientWithBuilder, createInitialRecognitionState, createSimplifiedVGFClient, getRecognitionConductorBase, getRecognitionConductorHost, getRecognitionConductorHttpBase, getRecognitionConductorWsBase, getRecognitionServiceBase, getRecognitionServiceHost, getRecognitionServiceHttpBase, getRecognitionServiceWsBase, isValidRecordingStatusTransition, normalizeStage };