npm - @speechos/core - Versions diffs - 0.2.0 → 0.2.2 - Mend

@speechos/core 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/audio-capture.d.cts +130 -0
package/dist/audio-capture.d.ts +130 -0
package/dist/backend.d.cts +41 -0
package/dist/backend.d.ts +41 -0
package/dist/config.d.cts +13 -7
package/dist/config.d.ts +13 -7
package/dist/index.cjs +1239 -158
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +6 -5
package/dist/index.d.ts +6 -5
package/dist/index.js +1238 -157
package/dist/index.js.map +1 -1
package/dist/livekit.d.cts +81 -14
package/dist/livekit.d.ts +81 -14
package/dist/speechos.d.cts +19 -3
package/dist/speechos.d.ts +19 -3
package/dist/state.d.cts +4 -1
package/dist/state.d.ts +4 -1
package/dist/types.d.cts +105 -9
package/dist/types.d.ts +105 -9
package/dist/websocket.d.cts +133 -0
package/dist/websocket.d.ts +133 -0
package/package.json +5 -4
package/dist/transcript-store.d.cts +0 -35
package/dist/transcript-store.d.ts +0 -35

package/dist/audio-capture.d.cts ADDED Viewed

@@ -0,0 +1,130 @@
+/**
+ * Audio capture module for SpeechOS WebSocket integration.
+ *
+ * Provides MediaRecorder-based audio capture with:
+ * - Format detection for cross-browser compatibility
+ * - Buffering for instant start (audio captured before connection is ready)
+ * - Atomic buffer swap pattern to prevent chunk reordering
+ */
+/**
+ * Supported audio formats with their MIME types and whether
+ * Deepgram needs explicit encoding parameters.
+ */
+export interface AudioFormat {
+    /** MIME type for MediaRecorder */
+    mimeType: string;
+    /** Short identifier for the format */
+    format: 'webm' | 'mp4' | 'pcm';
+    /** Whether Deepgram needs encoding/sample_rate params */
+    needsEncodingParams: boolean;
+}
+/**
+ * Detect the best supported audio format for the current browser.
+ *
+ * IMPORTANT: Safari must use MP4/AAC. Its WebM/Opus implementation is buggy
+ * and produces truncated/incomplete audio.
+ */
+export declare function getSupportedAudioFormat(): AudioFormat;
+/**
+ * Callback for receiving audio chunks.
+ */
+export type AudioChunkCallback = (chunk: Blob) => void;
+/**
+ * Audio capture manager with buffering support.
+ *
+ * Usage:
+ * 1. Create instance with onChunk callback
+ * 2. Call start() - immediately begins capturing
+ * 3. Call setReady() when connection is established - flushes buffer
+ * 4. Call stop() when done
+ */
+export declare class AudioCapture {
+    private mediaStream;
+    private recorder;
+    private buffer;
+    private isReady;
+    private isRecording;
+    private onChunk;
+    private audioFormat;
+    private deviceId;
+    /**
+     * Time slice for MediaRecorder in milliseconds.
+     *
+     * Safari requires a larger timeslice (1000ms) to properly flush its internal
+     * audio buffers. Smaller values cause Safari to drop or truncate audio data.
+     * See: https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/
+     *
+     * Other browsers (Chrome, Firefox, Edge) work well with smaller timeslices
+     * which provide lower latency for real-time transcription.
+     */
+    private static readonly TIME_SLICE_MS;
+    private static readonly SAFARI_TIME_SLICE_MS;
+    /**
+     * @param onChunk - Callback for receiving audio chunks
+     * @param deviceId - Optional audio device ID (empty string or undefined for system default)
+     */
+    constructor(onChunk: AudioChunkCallback, deviceId?: string);
+    /**
+     * Get the appropriate timeslice for the current browser.
+     * Safari needs a larger timeslice to avoid dropping audio data.
+     */
+    private getTimeSlice;
+    /**
+     * Get the timeslice being used (in milliseconds).
+     * Useful for callers that need to wait for audio processing.
+     */
+    getTimeSliceMs(): number;
+    /**
+     * Get the audio format being used.
+     */
+    getFormat(): AudioFormat;
+    /**
+     * Start capturing audio immediately.
+     *
+     * Audio chunks will be buffered until setReady() is called.
+     */
+    start(): Promise<void>;
+    /**
+     * Handle an audio chunk with atomic buffer swap pattern.
+     *
+     * If not ready: buffer the chunk.
+     * If ready: send directly via callback.
+     */
+    private handleChunk;
+    /**
+     * Mark the capture as ready (connection established).
+     *
+     * This flushes any buffered chunks and switches to direct mode.
+     * Uses atomic swap to prevent chunk reordering.
+     */
+    setReady(): void;
+    /**
+     * Stop capturing audio and wait for final chunk.
+     *
+     * Uses requestData() before stop() to force the MediaRecorder to flush
+     * any buffered audio immediately. This is critical for Safari which
+     * may hold audio data in internal buffers.
+     *
+     * Safari requires an additional delay after stopping to ensure all audio
+     * from its internal encoding pipeline has been fully processed and emitted.
+     */
+    stop(): Promise<void>;
+    /**
+     * Check if currently recording.
+     */
+    get recording(): boolean;
+    /**
+     * Check if ready (connection established, direct mode active).
+     */
+    get ready(): boolean;
+    /**
+     * Get the number of buffered chunks waiting to be sent.
+     */
+    get bufferedChunks(): number;
+}
+/**
+ * Factory function to create an AudioCapture instance.
+ * @param onChunk - Callback for receiving audio chunks
+ * @param deviceId - Optional audio device ID (empty string or undefined for system default)
+ */
+export declare function createAudioCapture(onChunk: AudioChunkCallback, deviceId?: string): AudioCapture;

package/dist/audio-capture.d.ts ADDED Viewed

@@ -0,0 +1,130 @@
+/**
+ * Audio capture module for SpeechOS WebSocket integration.
+ *
+ * Provides MediaRecorder-based audio capture with:
+ * - Format detection for cross-browser compatibility
+ * - Buffering for instant start (audio captured before connection is ready)
+ * - Atomic buffer swap pattern to prevent chunk reordering
+ */
+/**
+ * Supported audio formats with their MIME types and whether
+ * Deepgram needs explicit encoding parameters.
+ */
+export interface AudioFormat {
+    /** MIME type for MediaRecorder */
+    mimeType: string;
+    /** Short identifier for the format */
+    format: 'webm' | 'mp4' | 'pcm';
+    /** Whether Deepgram needs encoding/sample_rate params */
+    needsEncodingParams: boolean;
+}
+/**
+ * Detect the best supported audio format for the current browser.
+ *
+ * IMPORTANT: Safari must use MP4/AAC. Its WebM/Opus implementation is buggy
+ * and produces truncated/incomplete audio.
+ */
+export declare function getSupportedAudioFormat(): AudioFormat;
+/**
+ * Callback for receiving audio chunks.
+ */
+export type AudioChunkCallback = (chunk: Blob) => void;
+/**
+ * Audio capture manager with buffering support.
+ *
+ * Usage:
+ * 1. Create instance with onChunk callback
+ * 2. Call start() - immediately begins capturing
+ * 3. Call setReady() when connection is established - flushes buffer
+ * 4. Call stop() when done
+ */
+export declare class AudioCapture {
+    private mediaStream;
+    private recorder;
+    private buffer;
+    private isReady;
+    private isRecording;
+    private onChunk;
+    private audioFormat;
+    private deviceId;
+    /**
+     * Time slice for MediaRecorder in milliseconds.
+     *
+     * Safari requires a larger timeslice (1000ms) to properly flush its internal
+     * audio buffers. Smaller values cause Safari to drop or truncate audio data.
+     * See: https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/
+     *
+     * Other browsers (Chrome, Firefox, Edge) work well with smaller timeslices
+     * which provide lower latency for real-time transcription.
+     */
+    private static readonly TIME_SLICE_MS;
+    private static readonly SAFARI_TIME_SLICE_MS;
+    /**
+     * @param onChunk - Callback for receiving audio chunks
+     * @param deviceId - Optional audio device ID (empty string or undefined for system default)
+     */
+    constructor(onChunk: AudioChunkCallback, deviceId?: string);
+    /**
+     * Get the appropriate timeslice for the current browser.
+     * Safari needs a larger timeslice to avoid dropping audio data.
+     */
+    private getTimeSlice;
+    /**
+     * Get the timeslice being used (in milliseconds).
+     * Useful for callers that need to wait for audio processing.
+     */
+    getTimeSliceMs(): number;
+    /**
+     * Get the audio format being used.
+     */
+    getFormat(): AudioFormat;
+    /**
+     * Start capturing audio immediately.
+     *
+     * Audio chunks will be buffered until setReady() is called.
+     */
+    start(): Promise<void>;
+    /**
+     * Handle an audio chunk with atomic buffer swap pattern.
+     *
+     * If not ready: buffer the chunk.
+     * If ready: send directly via callback.
+     */
+    private handleChunk;
+    /**
+     * Mark the capture as ready (connection established).
+     *
+     * This flushes any buffered chunks and switches to direct mode.
+     * Uses atomic swap to prevent chunk reordering.
+     */
+    setReady(): void;
+    /**
+     * Stop capturing audio and wait for final chunk.
+     *
+     * Uses requestData() before stop() to force the MediaRecorder to flush
+     * any buffered audio immediately. This is critical for Safari which
+     * may hold audio data in internal buffers.
+     *
+     * Safari requires an additional delay after stopping to ensure all audio
+     * from its internal encoding pipeline has been fully processed and emitted.
+     */
+    stop(): Promise<void>;
+    /**
+     * Check if currently recording.
+     */
+    get recording(): boolean;
+    /**
+     * Check if ready (connection established, direct mode active).
+     */
+    get ready(): boolean;
+    /**
+     * Get the number of buffered chunks waiting to be sent.
+     */
+    get bufferedChunks(): number;
+}
+/**
+ * Factory function to create an AudioCapture instance.
+ * @param onChunk - Callback for receiving audio chunks
+ * @param deviceId - Optional audio device ID (empty string or undefined for system default)
+ */
+export declare function createAudioCapture(onChunk: AudioChunkCallback, deviceId?: string): AudioCapture;

package/dist/backend.d.cts ADDED Viewed

@@ -0,0 +1,41 @@
+/**
+ * Backend abstraction for voice sessions.
+ *
+ * Provides a unified interface for voice backends.
+ * Currently always uses WebSocket backend.
+ */
+import type { CommandDefinition, CommandResult, VoiceSessionOptions } from './types.js';
+/**
+ * Voice backend interface - common methods between backends
+ */
+export interface VoiceBackend {
+    startVoiceSession(options?: VoiceSessionOptions): Promise<void>;
+    stopVoiceSession(): Promise<string>;
+    requestEditText(originalText: string): Promise<string>;
+    requestCommand(commands: CommandDefinition[]): Promise<CommandResult | null>;
+    disconnect(): Promise<void>;
+    isConnected(): boolean;
+    /** Get the last input text (transcript) from a command result */
+    getLastInputText?(): string | undefined;
+    prefetchToken?(): Promise<unknown>;
+    startAutoRefresh?(): void;
+    stopAutoRefresh?(): void;
+    invalidateTokenCache?(): void;
+}
+/**
+ * Get the active voice backend.
+ * Always returns WebSocket backend (LiveKit is legacy).
+ *
+ * @returns The websocket backend
+ */
+export declare function getBackend(): VoiceBackend;
+/**
+ * Check if the current backend is LiveKit.
+ * @deprecated Always returns false - LiveKit is legacy
+ */
+export declare function isLiveKitBackend(): boolean;
+/**
+ * Check if the current backend is WebSocket.
+ * @deprecated Always returns true - WebSocket is the only backend
+ */
+export declare function isWebSocketBackend(): boolean;

package/dist/backend.d.ts ADDED Viewed

@@ -0,0 +1,41 @@
+/**
+ * Backend abstraction for voice sessions.
+ *
+ * Provides a unified interface for voice backends.
+ * Currently always uses WebSocket backend.
+ */
+import type { CommandDefinition, CommandResult, VoiceSessionOptions } from './types.js';
+/**
+ * Voice backend interface - common methods between backends
+ */
+export interface VoiceBackend {
+    startVoiceSession(options?: VoiceSessionOptions): Promise<void>;
+    stopVoiceSession(): Promise<string>;
+    requestEditText(originalText: string): Promise<string>;
+    requestCommand(commands: CommandDefinition[]): Promise<CommandResult | null>;
+    disconnect(): Promise<void>;
+    isConnected(): boolean;
+    /** Get the last input text (transcript) from a command result */
+    getLastInputText?(): string | undefined;
+    prefetchToken?(): Promise<unknown>;
+    startAutoRefresh?(): void;
+    stopAutoRefresh?(): void;
+    invalidateTokenCache?(): void;
+}
+/**
+ * Get the active voice backend.
+ * Always returns WebSocket backend (LiveKit is legacy).
+ *
+ * @returns The websocket backend
+ */
+export declare function getBackend(): VoiceBackend;
+/**
+ * Check if the current backend is LiveKit.
+ * @deprecated Always returns false - LiveKit is legacy
+ */
+export declare function isLiveKitBackend(): boolean;
+/**
+ * Check if the current backend is WebSocket.
+ * @deprecated Always returns true - WebSocket is the only backend
+ */
+export declare function isWebSocketBackend(): boolean;

package/dist/config.d.cts CHANGED Viewed

@@ -1,30 +1,35 @@
 /**
- * Configuration management for SpeechOS SDK
+ * Configuration management for SpeechOS Core SDK
  */
-import type { SpeechOSConfig } from "./types.js";
+import type { SpeechOSCoreConfig } from "./types.js";
 /**
  * Default host - can be overridden by SPEECHOS_HOST env var at build time
  */
 export declare const DEFAULT_HOST: string;
 /**
- * Default configuration values
+ * Configuration with defaults applied (all fields required internally)
  */
-export declare const defaultConfig: Required<SpeechOSConfig>;
+interface ResolvedConfig {
+    apiKey: string;
+    userId: string;
+    host: string;
+    debug: boolean;
+}
 /**
  * Validates and merges user config with defaults
  * @param userConfig - User-provided configuration
  * @returns Validated and merged configuration
  */
-export declare function validateConfig(userConfig?: SpeechOSConfig): Required<SpeechOSConfig>;
+export declare function validateConfig(userConfig: SpeechOSCoreConfig): ResolvedConfig;
 /**
  * Get the current configuration
  */
-export declare function getConfig(): Required<SpeechOSConfig>;
+export declare function getConfig(): ResolvedConfig;
 /**
  * Set the current configuration
  * @param config - Configuration to set
  */
-export declare function setConfig(config: SpeechOSConfig): void;
+export declare function setConfig(config: SpeechOSCoreConfig): void;
 /**
  * Reset configuration to defaults
  */
@@ -34,3 +39,4 @@ export declare function resetConfig(): void;
  * @param userId - The user identifier to set
  */
 export declare function updateUserId(userId: string): void;
+export {};

package/dist/config.d.ts CHANGED Viewed

@@ -1,30 +1,35 @@
 /**
- * Configuration management for SpeechOS SDK
+ * Configuration management for SpeechOS Core SDK
  */
-import type { SpeechOSConfig } from "./types.js";
+import type { SpeechOSCoreConfig } from "./types.js";
 /**
  * Default host - can be overridden by SPEECHOS_HOST env var at build time
  */
 export declare const DEFAULT_HOST: string;
 /**
- * Default configuration values
+ * Configuration with defaults applied (all fields required internally)
  */
-export declare const defaultConfig: Required<SpeechOSConfig>;
+interface ResolvedConfig {
+    apiKey: string;
+    userId: string;
+    host: string;
+    debug: boolean;
+}
 /**
  * Validates and merges user config with defaults
  * @param userConfig - User-provided configuration
  * @returns Validated and merged configuration
  */
-export declare function validateConfig(userConfig?: SpeechOSConfig): Required<SpeechOSConfig>;
+export declare function validateConfig(userConfig: SpeechOSCoreConfig): ResolvedConfig;
 /**
  * Get the current configuration
  */
-export declare function getConfig(): Required<SpeechOSConfig>;
+export declare function getConfig(): ResolvedConfig;
 /**
  * Set the current configuration
  * @param config - Configuration to set
  */
-export declare function setConfig(config: SpeechOSConfig): void;
+export declare function setConfig(config: SpeechOSCoreConfig): void;
 /**
  * Reset configuration to defaults
  */
@@ -34,3 +39,4 @@ export declare function resetConfig(): void;
  * @param userId - The user identifier to set
  */
 export declare function updateUserId(userId: string): void;
+export {};