npm - @speechos/core - Versions diffs - 0.2.0 → 0.2.3 - Mend

@speechos/core 0.2.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/audio-capture.d.cts +130 -0
package/dist/audio-capture.d.ts +130 -0
package/dist/backend.d.cts +41 -0
package/dist/backend.d.ts +41 -0
package/dist/config.d.cts +23 -7
package/dist/config.d.ts +23 -7
package/dist/index.cjs +1263 -158
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +6 -5
package/dist/index.d.ts +6 -5
package/dist/index.js +1262 -157
package/dist/index.js.map +1 -1
package/dist/livekit.d.cts +81 -14
package/dist/livekit.d.ts +81 -14
package/dist/speechos.d.cts +19 -3
package/dist/speechos.d.ts +19 -3
package/dist/state.d.cts +4 -1
package/dist/state.d.ts +4 -1
package/dist/types.d.cts +105 -9
package/dist/types.d.ts +105 -9
package/dist/websocket.d.cts +133 -0
package/dist/websocket.d.ts +133 -0
package/package.json +5 -4
package/dist/transcript-store.d.cts +0 -35
package/dist/transcript-store.d.ts +0 -35

package/dist/types.d.ts CHANGED Viewed

@@ -18,22 +18,60 @@ export interface ServerErrorMessage {
  */
 export type ErrorSource = "init" | "connection" | "timeout" | "server";
 /**
- * Configuration options for initializing SpeechOS
+ * Backend type for voice sessions
+ * - 'websocket': Direct WebSocket connection (lower latency, recommended)
+ * - 'livekit': LiveKit WebRTC connection (legacy)
  */
-export interface SpeechOSConfig {
-    /** API key for authentication with SpeechOS backend */
-    apiKey?: string;
+export type VoiceBackend = "websocket" | "livekit";
+/**
+ * Configuration options for initializing SpeechOS Core
+ */
+export interface SpeechOSCoreConfig {
+    /** API key for authentication with SpeechOS backend (required) */
+    apiKey: string;
     /** Optional user identifier for tracking which end user is using the SDK */
     userId?: string;
     /** Backend host URL for API calls (default: https://app.speechos.ai) */
     host?: string;
-    /** Position of the widget on screen (used by client package) */
-    position?: "bottom-center" | "bottom-right" | "bottom-left";
-    /** Custom z-index for widget overlay (used by client package) */
-    zIndex?: number;
     /** Enable debug logging */
     debug?: boolean;
 }
+/**
+ * Session settings passed when starting a voice session
+ * Contains user preferences for transcription and processing
+ */
+export interface SessionSettings {
+    /** Input language code for speech recognition (e.g., "en-US", "es", "fr") */
+    inputLanguageCode?: string;
+    /** Output language code for transcription formatting */
+    outputLanguageCode?: string;
+    /** Whether to apply AI formatting (removes filler words, adds punctuation) */
+    smartFormat?: boolean;
+    /** Custom vocabulary terms to improve transcription accuracy */
+    vocabulary?: string[];
+    /** Text snippets with trigger phrases that expand to full text */
+    snippets?: Array<{
+        trigger: string;
+        expansion: string;
+    }>;
+    /** Audio input device ID (empty string for system default) */
+    audioDeviceId?: string;
+}
+/**
+ * Options for starting a voice session
+ */
+export interface VoiceSessionOptions {
+    /** Callback when microphone is ready and capturing */
+    onMicReady?: () => void;
+    /** Action type for this session */
+    action?: SpeechOSAction;
+    /** Text to edit (for edit action) */
+    inputText?: string;
+    /** Command definitions (for command action) */
+    commands?: CommandDefinition[];
+    /** User settings for this session */
+    settings?: SessionSettings;
+}
 /**
  * LiveKit token response from the backend
  */
@@ -43,10 +81,59 @@ export interface LiveKitTokenResponse {
     room: string;
     identity: string;
 }
+/**
+ * User vocabulary data sent with transcription/edit requests
+ * Includes custom vocabulary terms for improved transcription accuracy
+ * and text snippets that can be expanded from trigger phrases
+ */
+export interface UserVocabularyData {
+    /** Custom vocabulary terms to improve transcription of domain-specific words */
+    vocabulary: string[];
+    /** Text snippets with trigger phrases that expand to full text */
+    snippets: Array<{
+        /** Short trigger phrase the user speaks */
+        trigger: string;
+        /** Full text to expand the trigger into */
+        expansion: string;
+    }>;
+}
 /**
  * Available actions that can be triggered from the widget
  */
-export type SpeechOSAction = "dictate" | "edit";
+export type SpeechOSAction = "dictate" | "edit" | "command";
+/**
+ * Definition of a command argument
+ */
+export interface CommandArgument {
+    /** Name of the argument (used as key in the result) */
+    name: string;
+    /** Description of what this argument represents */
+    description: string;
+    /** Type of the argument value */
+    type?: "string" | "number" | "integer" | "boolean";
+    /** Whether this argument is required (default: true) */
+    required?: boolean;
+}
+/**
+ * Definition of a command that can be matched
+ */
+export interface CommandDefinition {
+    /** Unique name/identifier for the command */
+    name: string;
+    /** Description of what this command does (helps LLM match intent) */
+    description: string;
+    /** Arguments that can be extracted from the user's speech */
+    arguments?: CommandArgument[];
+}
+/**
+ * Result of a successful command match
+ */
+export interface CommandResult {
+    /** Name of the matched command */
+    name: string;
+    /** Extracted argument values */
+    arguments: Record<string, unknown>;
+}
 /**
  * Recording/dictation states
  */
@@ -109,6 +196,10 @@ export interface SpeechOSEventMap {
         text: string;
         originalText: string;
     };
+    /** Emitted when command matching completes (null if no command matched) */
+    "command:complete": {
+        command: CommandResult | null;
+    };
     /** Emitted when transcribed text is inserted into a form field */
     "transcription:inserted": {
         text: string;
@@ -120,6 +211,11 @@ export interface SpeechOSEventMap {
         editedContent: string;
         element: HTMLElement;
     };
+    /** Emitted when user settings change (language, snippets, vocabulary, smartFormat) */
+    "settings:changed": {
+        /** Type of setting that changed */
+        setting: "language" | "snippets" | "vocabulary" | "smartFormat";
+    };
     /** Emitted when an error occurs */
     error: {
         code: string;

package/dist/websocket.d.cts ADDED Viewed

@@ -0,0 +1,133 @@
+/**
+ * WebSocket integration for SpeechOS SDK.
+ *
+ * Provides a direct WebSocket connection to the backend for voice sessions,
+ * bypassing LiveKit for lower latency. Uses audio buffering to capture
+ * audio immediately while the connection is being established.
+ */
+import type { CommandDefinition, CommandResult, ErrorSource, VoiceSessionOptions } from './types.js';
+/**
+ * A deferred promise with timeout support.
+ */
+export declare class Deferred<T> {
+    readonly promise: Promise<T>;
+    private _resolve;
+    private _reject;
+    private _timeoutId;
+    private _settled;
+    constructor();
+    setTimeout(ms: number, errorMessage: string, errorCode: string, errorSource: ErrorSource): void;
+    resolve(value: T): void;
+    reject(error: Error): void;
+    private clearTimeout;
+    get isSettled(): boolean;
+}
+/**
+ * WebSocket connection manager for voice sessions.
+ */
+declare class WebSocketManager {
+    private ws;
+    private audioCapture;
+    private sessionId;
+    private pendingAuth;
+    private pendingTranscript;
+    private pendingEditText;
+    private pendingCommand;
+    private pendingAudioSends;
+    private editOriginalText;
+    private lastInputText;
+    private sessionAction;
+    private sessionInputText;
+    private sessionCommands;
+    private sessionSettings;
+    /**
+     * Get the WebSocket URL for voice sessions.
+     */
+    private getWebSocketUrl;
+    /**
+     * Start a voice session with the WebSocket backend.
+     *
+     * This method:
+     * 1. Starts audio capture immediately (buffering)
+     * 2. Opens WebSocket connection
+     * 3. Authenticates with API key and action parameters
+     * 4. Flushes buffered audio and continues streaming
+     *
+     * @param options - Session options including action type and parameters
+     */
+    startVoiceSession(options?: VoiceSessionOptions): Promise<void>;
+    /**
+     * Send authentication message with action parameters.
+     * All session parameters are now sent upfront in the auth message.
+     */
+    private authenticate;
+    /**
+     * Send an audio chunk over the WebSocket.
+     * Tracks the promise so we can wait for all sends to complete.
+     */
+    private sendAudioChunk;
+    /**
+     * Actually send the audio chunk (async operation).
+     */
+    private doSendAudioChunk;
+    /**
+     * Handle incoming WebSocket messages.
+     */
+    private handleMessage;
+    private handleReady;
+    private handleIntermediateTranscription;
+    private handleFinalTranscript;
+    private handleEditedText;
+    private handleCommandResult;
+    private handleError;
+    /**
+     * Stop the voice session and request the transcript.
+     */
+    stopVoiceSession(): Promise<string>;
+    /**
+     * Request text editing using the transcript as instructions.
+     * Note: The input text was already sent in the auth message via startVoiceSession.
+     */
+    requestEditText(_originalText: string): Promise<string>;
+    /**
+     * Request command matching using the transcript as input.
+     * Note: The command definitions were already sent in the auth message via startVoiceSession.
+     */
+    requestCommand(_commands: CommandDefinition[]): Promise<CommandResult | null>;
+    /**
+     * Stop audio capture and wait for all data to be sent.
+     *
+     * Waits for:
+     * 1. All pending sendAudioChunk calls to complete (arrayBuffer conversion)
+     * 2. WebSocket buffer to drain (all data transmitted)
+     *
+     * WebSocket message ordering ensures server receives all audio before transcript request.
+     */
+    private stopAudioCapture;
+    /**
+     * Wait for the WebSocket send buffer to drain.
+     *
+     * This ensures all audio data has been transmitted before we request
+     * the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
+     */
+    private waitForBufferDrain;
+    /**
+     * Send a JSON message over the WebSocket.
+     */
+    private sendMessage;
+    /**
+     * Disconnect from the WebSocket.
+     */
+    disconnect(): Promise<void>;
+    /**
+     * Check if connected to WebSocket.
+     */
+    isConnected(): boolean;
+    /**
+     * Get the last input text from a command result.
+     * This is the raw transcript of what the user said.
+     */
+    getLastInputText(): string | undefined;
+}
+export declare const websocket: WebSocketManager;
+export {};

package/dist/websocket.d.ts ADDED Viewed

@@ -0,0 +1,133 @@
+/**
+ * WebSocket integration for SpeechOS SDK.
+ *
+ * Provides a direct WebSocket connection to the backend for voice sessions,
+ * bypassing LiveKit for lower latency. Uses audio buffering to capture
+ * audio immediately while the connection is being established.
+ */
+import type { CommandDefinition, CommandResult, ErrorSource, VoiceSessionOptions } from './types.js';
+/**
+ * A deferred promise with timeout support.
+ */
+export declare class Deferred<T> {
+    readonly promise: Promise<T>;
+    private _resolve;
+    private _reject;
+    private _timeoutId;
+    private _settled;
+    constructor();
+    setTimeout(ms: number, errorMessage: string, errorCode: string, errorSource: ErrorSource): void;
+    resolve(value: T): void;
+    reject(error: Error): void;
+    private clearTimeout;
+    get isSettled(): boolean;
+}
+/**
+ * WebSocket connection manager for voice sessions.
+ */
+declare class WebSocketManager {
+    private ws;
+    private audioCapture;
+    private sessionId;
+    private pendingAuth;
+    private pendingTranscript;
+    private pendingEditText;
+    private pendingCommand;
+    private pendingAudioSends;
+    private editOriginalText;
+    private lastInputText;
+    private sessionAction;
+    private sessionInputText;
+    private sessionCommands;
+    private sessionSettings;
+    /**
+     * Get the WebSocket URL for voice sessions.
+     */
+    private getWebSocketUrl;
+    /**
+     * Start a voice session with the WebSocket backend.
+     *
+     * This method:
+     * 1. Starts audio capture immediately (buffering)
+     * 2. Opens WebSocket connection
+     * 3. Authenticates with API key and action parameters
+     * 4. Flushes buffered audio and continues streaming
+     *
+     * @param options - Session options including action type and parameters
+     */
+    startVoiceSession(options?: VoiceSessionOptions): Promise<void>;
+    /**
+     * Send authentication message with action parameters.
+     * All session parameters are now sent upfront in the auth message.
+     */
+    private authenticate;
+    /**
+     * Send an audio chunk over the WebSocket.
+     * Tracks the promise so we can wait for all sends to complete.
+     */
+    private sendAudioChunk;
+    /**
+     * Actually send the audio chunk (async operation).
+     */
+    private doSendAudioChunk;
+    /**
+     * Handle incoming WebSocket messages.
+     */
+    private handleMessage;
+    private handleReady;
+    private handleIntermediateTranscription;
+    private handleFinalTranscript;
+    private handleEditedText;
+    private handleCommandResult;
+    private handleError;
+    /**
+     * Stop the voice session and request the transcript.
+     */
+    stopVoiceSession(): Promise<string>;
+    /**
+     * Request text editing using the transcript as instructions.
+     * Note: The input text was already sent in the auth message via startVoiceSession.
+     */
+    requestEditText(_originalText: string): Promise<string>;
+    /**
+     * Request command matching using the transcript as input.
+     * Note: The command definitions were already sent in the auth message via startVoiceSession.
+     */
+    requestCommand(_commands: CommandDefinition[]): Promise<CommandResult | null>;
+    /**
+     * Stop audio capture and wait for all data to be sent.
+     *
+     * Waits for:
+     * 1. All pending sendAudioChunk calls to complete (arrayBuffer conversion)
+     * 2. WebSocket buffer to drain (all data transmitted)
+     *
+     * WebSocket message ordering ensures server receives all audio before transcript request.
+     */
+    private stopAudioCapture;
+    /**
+     * Wait for the WebSocket send buffer to drain.
+     *
+     * This ensures all audio data has been transmitted before we request
+     * the transcript. Uses the same pattern as LiveKit's ReadableStream approach.
+     */
+    private waitForBufferDrain;
+    /**
+     * Send a JSON message over the WebSocket.
+     */
+    private sendMessage;
+    /**
+     * Disconnect from the WebSocket.
+     */
+    disconnect(): Promise<void>;
+    /**
+     * Check if connected to WebSocket.
+     */
+    isConnected(): boolean;
+    /**
+     * Get the last input text from a command result.
+     * This is the raw transcript of what the user said.
+     */
+    getLastInputText(): string | undefined;
+}
+export declare const websocket: WebSocketManager;
+export {};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@speechos/core",
-  "version": "0.2.0",
+  "version": "0.2.3",
   "description": "Headless core SDK for SpeechOS - state, events, LiveKit integration",
   "type": "module",
   "main": "./dist/index.cjs",
@@ -22,14 +22,15 @@
   },
   "repository": {
     "type": "git",
-    "url": "https://github.com/speechos-org/speechos.git",
-    "directory": "speechos-client/packages/core"
+    "url": "git+ssh://git@github.com/speechos-org/speechos-client.git",
+    "directory": "packages/core"
   },
   "homepage": "https://speechos.ai",
   "bugs": {
-    "url": "https://github.com/speechos-org/speechos/issues"
+    "url": "https://github.com/speechos-org/speechos-client/issues"
   },
   "scripts": {
+    "prepare": "npm run build",
     "build": "tsdown",
     "dev": "tsdown --watch",
     "type-check": "tsc --noEmit",

package/dist/transcript-store.d.cts DELETED Viewed

@@ -1,35 +0,0 @@
-/**
- * Transcript history store
- * Persists transcripts to localStorage for viewing in the settings modal
- */
-export type TranscriptAction = "dictate" | "edit";
-export interface TranscriptEntry {
-    id: string;
-    text: string;
-    timestamp: number;
-    action: TranscriptAction;
-    /** Original text before edit (only for edit actions) */
-    originalText?: string;
-}
-/**
- * Get all transcripts from localStorage
- */
-export declare function getTranscripts(): TranscriptEntry[];
-/**
- * Save a new transcript entry
- */
-export declare function saveTranscript(text: string, action: TranscriptAction, originalText?: string): TranscriptEntry;
-/**
- * Clear all transcript history
- */
-export declare function clearTranscripts(): void;
-/**
- * Delete a single transcript by ID
- */
-export declare function deleteTranscript(id: string): void;
-export declare const transcriptStore: {
-    getTranscripts: typeof getTranscripts;
-    saveTranscript: typeof saveTranscript;
-    clearTranscripts: typeof clearTranscripts;
-    deleteTranscript: typeof deleteTranscript;
-};

package/dist/transcript-store.d.ts DELETED Viewed

@@ -1,35 +0,0 @@
-/**
- * Transcript history store
- * Persists transcripts to localStorage for viewing in the settings modal
- */
-export type TranscriptAction = "dictate" | "edit";
-export interface TranscriptEntry {
-    id: string;
-    text: string;
-    timestamp: number;
-    action: TranscriptAction;
-    /** Original text before edit (only for edit actions) */
-    originalText?: string;
-}
-/**
- * Get all transcripts from localStorage
- */
-export declare function getTranscripts(): TranscriptEntry[];
-/**
- * Save a new transcript entry
- */
-export declare function saveTranscript(text: string, action: TranscriptAction, originalText?: string): TranscriptEntry;
-/**
- * Clear all transcript history
- */
-export declare function clearTranscripts(): void;
-/**
- * Delete a single transcript by ID
- */
-export declare function deleteTranscript(id: string): void;
-export declare const transcriptStore: {
-    getTranscripts: typeof getTranscripts;
-    saveTranscript: typeof saveTranscript;
-    clearTranscripts: typeof clearTranscripts;
-    deleteTranscript: typeof deleteTranscript;
-};