@tryhamster/gerbil 1.0.0-rc.6 → 1.0.0-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/index.js +6 -6
- package/dist/cli.mjs +7 -7
- package/dist/cli.mjs.map +1 -1
- package/dist/frameworks/express.d.mts +1 -1
- package/dist/frameworks/express.mjs +1 -1
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.mjs +1 -1
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.mjs +1 -1
- package/dist/frameworks/next.d.mts +2 -2
- package/dist/frameworks/next.mjs +1 -1
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.mjs +1 -1
- package/dist/gerbil-CJ3ifloF.mjs +4 -0
- package/dist/{gerbil-CgLjZy0K.mjs → gerbil-Dw4Qj77e.mjs} +3 -3
- package/dist/{gerbil-CgLjZy0K.mjs.map → gerbil-Dw4Qj77e.mjs.map} +1 -1
- package/dist/{gerbil-CnncBh38.d.mts → gerbil-qOTe1nl2.d.mts} +2 -2
- package/dist/{gerbil-CnncBh38.d.mts.map → gerbil-qOTe1nl2.d.mts.map} +1 -1
- package/dist/index.d.mts +2 -2
- package/dist/index.mjs +3 -3
- package/dist/integrations/ai-sdk.d.mts +1 -1
- package/dist/integrations/ai-sdk.mjs +2 -2
- package/dist/integrations/langchain.d.mts +1 -1
- package/dist/integrations/langchain.mjs +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.mjs +1 -1
- package/dist/integrations/mcp.d.mts +2 -2
- package/dist/integrations/mcp.mjs +4 -4
- package/dist/kokoro-BNTb6egA.mjs +20210 -0
- package/dist/kokoro-BNTb6egA.mjs.map +1 -0
- package/dist/{kokoro-DFRQ1OeM.js → kokoro-CMOGDSgT.js} +2 -2
- package/dist/{kokoro-DFRQ1OeM.js.map → kokoro-CMOGDSgT.js.map} +1 -1
- package/dist/{mcp-DGK69gbU.mjs → mcp-BvbriaBy.mjs} +3 -3
- package/dist/{mcp-DGK69gbU.mjs.map → mcp-BvbriaBy.mjs.map} +1 -1
- package/dist/{one-liner-BJ7hDVlY.mjs → one-liner-s-lD8rCC.mjs} +2 -2
- package/dist/{one-liner-BJ7hDVlY.mjs.map → one-liner-s-lD8rCC.mjs.map} +1 -1
- package/dist/{repl-BTq1JyH7.mjs → repl-DveXw36T.mjs} +3 -3
- package/dist/skills/index.d.mts +6 -6
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +3 -3
- package/dist/{skills-Hf3iEkq4.mjs → skills-CD3Orlex.mjs} +12 -12
- package/dist/skills-CD3Orlex.mjs.map +1 -0
- package/dist/{stt-Te8Qz-Ay.js → stt-Bu-E23Sc.js} +2 -2
- package/dist/{stt-Te8Qz-Ay.js.map → stt-Bu-E23Sc.js.map} +1 -1
- package/dist/{transformers.web-M6mCnEYJ.js → transformers.web-DiD1gTwk.js} +14483 -170
- package/dist/transformers.web-DiD1gTwk.js.map +1 -0
- package/dist/{transformers.web-DokyH3rP.js → transformers.web-u34VxRFM.js} +1 -1
- package/dist/{tts-C0xx3CtE.js → tts-CqroPaSK.js} +3 -3
- package/dist/{tts-C0xx3CtE.js.map → tts-CqroPaSK.js.map} +1 -1
- package/dist/{tts-DzJDBsFR.mjs → tts-DXgsKGCe.mjs} +1 -1
- package/dist/{tts-D5hrNcGD.mjs → tts-DeGANMNV.mjs} +2 -2
- package/dist/{tts-D5hrNcGD.mjs.map → tts-DeGANMNV.mjs.map} +1 -1
- package/dist/{types-DJhOZ6Ct.d.mts → types-CiTc7ez3.d.mts} +1 -1
- package/dist/{types-DJhOZ6Ct.d.mts.map → types-CiTc7ez3.d.mts.map} +1 -1
- package/package.json +4 -4
- package/dist/gerbil-IwhB_Sip.mjs +0 -4
- package/dist/skills-Hf3iEkq4.mjs.map +0 -1
- package/dist/transformers.web-M6mCnEYJ.js.map +0 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt-Te8Qz-Ay.js","names":["WHISPER_MODELS: STTModelConfig[]","tfDevice: \"webgpu\" | \"cpu\" | \"wasm\"","audioData: Float32Array","pipelineOptions: any","transcribeResult: TranscribeResult","audioBuffer: Float32Array[]","intervalId: ReturnType<typeof setInterval> | null","e: any"],"sources":["../src/core/stt.ts"],"sourcesContent":["/**\n * Speech-to-Text with Whisper\n *\n * Provides local speech recognition using Whisper ONNX models via transformers.js.\n * Supports multiple model sizes and languages.\n *\n * @example\n * ```ts\n * const stt = new WhisperSTT();\n * await stt.load({ onProgress: (p) => console.log(p.status) });\n *\n * // Transcribe audio (Float32Array at 16kHz)\n * const result = await stt.transcribe(audioData);\n * console.log(result.text);\n *\n * // With timestamps\n * const result = await stt.transcribe(audioData, { timestamps: true });\n * for (const seg of result.segments) {\n * console.log(`[${seg.start.toFixed(1)}s] ${seg.text}`);\n * }\n * ```\n */\n\nimport type {\n LoadSTTOptions,\n ProgressInfo,\n STTModelConfig,\n StreamingTranscriptionOptions,\n StreamingTranscriptionSession,\n TranscribeOptions,\n TranscribeResult,\n TranscribeSegment,\n} from \"./types.js\";\n\n// ============================================\n// Model Registry\n// ============================================\n\n/**\n * Available Whisper models\n * Ordered by size (smallest first)\n */\nexport const WHISPER_MODELS: STTModelConfig[] = [\n {\n id: \"whisper-tiny.en\",\n repo: \"onnx-community/whisper-tiny.en\",\n description: \"Tiny English-only model, fastest\",\n size: \"39M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-tiny\",\n repo: \"onnx-community/whisper-tiny\",\n description: \"Tiny multilingual model\",\n size: \"39M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-base.en\",\n repo: \"onnx-community/whisper-base.en\",\n description: \"Base English-only model, good balance\",\n size: \"74M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-base\",\n repo: \"onnx-community/whisper-base\",\n description: \"Base multilingual model\",\n size: \"74M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-small.en\",\n repo: \"onnx-community/whisper-small.en\",\n description: \"Small English-only model, high quality\",\n size: \"244M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-small\",\n repo: \"onnx-community/whisper-small\",\n description: \"Small multilingual model\",\n size: \"244M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-large-v3-turbo\",\n repo: \"onnx-community/whisper-large-v3-turbo\",\n description: \"Large Turbo model, 5.4x faster, 80+ languages\",\n size: \"809M\",\n multilingual: true,\n languages: [\n \"en\",\n \"es\",\n \"fr\",\n \"de\",\n \"it\",\n \"pt\",\n \"nl\",\n \"ru\",\n \"zh\",\n \"ja\",\n \"ko\",\n \"ar\",\n \"hi\",\n \"vi\",\n \"th\",\n ],\n sampleRate: 16000,\n },\n];\n\n// Default model\nconst DEFAULT_MODEL = \"whisper-tiny.en\";\n\n// ============================================\n// Audio Utilities\n// ============================================\n\n/**\n * Decode WAV file to Float32Array\n * Handles stereo to mono conversion\n */\nexport function decodeWav(buffer: Uint8Array): { audio: Float32Array; sampleRate: number } {\n const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);\n\n // Validate RIFF header\n const riff = String.fromCharCode(buffer[0], buffer[1], buffer[2], buffer[3]);\n if (riff !== \"RIFF\") {\n throw new Error(\"Invalid WAV file: missing RIFF header\");\n }\n\n // Get format details\n const numChannels = view.getUint16(22, true);\n const sampleRate = view.getUint32(24, true);\n const bitsPerSample = view.getUint16(34, true);\n\n if (bitsPerSample !== 16) {\n throw new Error(`Unsupported bit depth: ${bitsPerSample}. Only 16-bit WAV is supported.`);\n }\n\n // Find data chunk\n let dataOffset = 12;\n while (dataOffset < buffer.length - 8) {\n const chunkId = String.fromCharCode(\n buffer[dataOffset],\n buffer[dataOffset + 1],\n buffer[dataOffset + 2],\n buffer[dataOffset + 3],\n );\n const chunkSize = view.getUint32(dataOffset + 4, true);\n if (chunkId === \"data\") {\n dataOffset += 8;\n break;\n }\n dataOffset += 8 + chunkSize;\n }\n\n const dataSize = buffer.length - dataOffset;\n const bytesPerSample = bitsPerSample / 8;\n const totalSamples = Math.floor(dataSize / bytesPerSample);\n const samplesPerChannel = Math.floor(totalSamples / numChannels);\n\n // Convert to mono Float32Array\n const audio = new Float32Array(samplesPerChannel);\n\n for (let i = 0; i < samplesPerChannel; i++) {\n if (numChannels === 2) {\n const left = view.getInt16(dataOffset + i * 4, true);\n const right = view.getInt16(dataOffset + i * 4 + 2, true);\n audio[i] = (left + right) / 2 / 32768;\n } else {\n const sample = view.getInt16(dataOffset + i * 2, true);\n audio[i] = sample / 32768;\n }\n }\n\n return { audio, sampleRate };\n}\n\n/**\n * Resample audio to target sample rate using linear interpolation\n */\nexport function resampleAudio(audio: Float32Array, fromRate: number, toRate: number): Float32Array {\n if (fromRate === toRate) return audio;\n\n const ratio = toRate / fromRate;\n const newLength = Math.round(audio.length * ratio);\n const result = new Float32Array(newLength);\n\n for (let i = 0; i < newLength; i++) {\n const srcIndex = i / ratio;\n const floor = Math.floor(srcIndex);\n const ceil = Math.min(floor + 1, audio.length - 1);\n const t = srcIndex - floor;\n result[i] = audio[floor] * (1 - t) + audio[ceil] * t;\n }\n\n return result;\n}\n\n// ============================================\n// WhisperSTT Class\n// ============================================\n\n/**\n * Speech-to-Text using Whisper ONNX models\n */\nexport class WhisperSTT {\n private modelConfig: STTModelConfig;\n private pipeline: any = null;\n private loadPromise: Promise<void> | null = null;\n private _isLoaded = false;\n private _deviceMode: \"webgpu\" | \"cpu\" = \"cpu\";\n\n constructor(modelId: string = DEFAULT_MODEL) {\n const config = WHISPER_MODELS.find((m) => m.id === modelId);\n if (!config) {\n const available = WHISPER_MODELS.map((m) => m.id).join(\", \");\n throw new Error(`Unknown STT model: ${modelId}. Available: ${available}`);\n }\n this.modelConfig = config;\n }\n\n /**\n * Check if model is loaded\n */\n isLoaded(): boolean {\n return this._isLoaded;\n }\n\n /**\n * Get model configuration\n */\n getModelConfig(): STTModelConfig {\n return this.modelConfig;\n }\n\n /**\n * Get model info (alias for getModelConfig)\n */\n getModelInfo(): STTModelConfig {\n return this.modelConfig;\n }\n\n /**\n * Get current device mode\n */\n getDeviceMode(): \"webgpu\" | \"cpu\" {\n return this._deviceMode;\n }\n\n /**\n * List available models\n */\n static listModels(): STTModelConfig[] {\n return [...WHISPER_MODELS];\n }\n\n /**\n * Load the STT model\n */\n async load(options: LoadSTTOptions = {}): Promise<void> {\n if (this._isLoaded) return;\n if (this.loadPromise) {\n await this.loadPromise;\n return;\n }\n\n this.loadPromise = this._load(options);\n await this.loadPromise;\n }\n\n private async _load(options: LoadSTTOptions = {}): Promise<void> {\n const { onProgress, device = \"auto\" } = options;\n\n onProgress?.({ status: \"Loading transformers.js...\" });\n\n // Check if we're in Node.js or browser\n const isNode = typeof process !== \"undefined\" && process.versions?.node;\n\n // Import transformers.js dynamically\n // tsdown handles resolution: Node.js = external, Browser = bundled\n const { pipeline, env } = await import(\"@huggingface/transformers\");\n\n // Configure environment based on runtime\n if (isNode) {\n // Node.js: allow local models (for CLI/server use)\n env.allowLocalModels = true;\n env.allowRemoteModels = true;\n } else {\n // Browser: use IndexedDB cache, fetch from HuggingFace CDN\n env.useBrowserCache = true;\n env.allowLocalModels = false;\n }\n\n // Determine device\n // Note: Whisper ONNX models work best with fp32 on CPU/WASM\n // WebGPU support for ASR is limited, so we use CPU for reliability\n let tfDevice: \"webgpu\" | \"cpu\" | \"wasm\" = \"cpu\";\n\n // In browser, use WASM for better compatibility\n if (!isNode) {\n tfDevice = \"wasm\";\n }\n\n // Store device mode\n this._deviceMode = \"cpu\"; // STT always reports as CPU since WASM is CPU-based\n\n onProgress?.({ status: `Loading ${this.modelConfig.id}...` });\n\n // Load the ASR pipeline\n // Always use fp32 for Whisper models (fp16 not available for ONNX ASR)\n this.pipeline = await pipeline(\"automatic-speech-recognition\", this.modelConfig.repo, {\n dtype: \"fp32\",\n device: tfDevice,\n progress_callback: (progress: any) => {\n if (progress.status === \"progress\" && progress.file) {\n onProgress?.({\n status: `Downloading ${progress.file}`,\n progress: Math.round(progress.progress || 0),\n file: progress.file,\n });\n }\n },\n });\n\n this._isLoaded = true;\n onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });\n }\n\n /**\n * Transcribe audio to text\n *\n * @param audio - Audio data as Float32Array (mono, 16kHz preferred) or Uint8Array (WAV file)\n * @param options - Transcription options\n * @returns Transcription result with text and optional timestamps\n */\n async transcribe(\n audio: Float32Array | Uint8Array,\n options: TranscribeOptions = {},\n ): Promise<TranscribeResult> {\n if (!this._isLoaded) {\n throw new Error(\"STT model not loaded. Call load() first.\");\n }\n\n const { language, timestamps = false, onProgress } = options;\n const startTime = performance.now();\n\n // Convert Uint8Array (WAV) to Float32Array\n let audioData: Float32Array;\n let inputSampleRate = 16000;\n\n if (audio instanceof Uint8Array) {\n onProgress?.({ status: \"Decoding audio...\" });\n const decoded = decodeWav(audio);\n audioData = decoded.audio;\n inputSampleRate = decoded.sampleRate;\n } else {\n audioData = audio;\n }\n\n // Resample to 16kHz if needed\n if (inputSampleRate !== 16000) {\n onProgress?.({ status: \"Resampling to 16kHz...\" });\n audioData = resampleAudio(audioData, inputSampleRate, 16000);\n }\n\n const audioDuration = audioData.length / 16000;\n onProgress?.({ status: `Transcribing ${audioDuration.toFixed(1)}s of audio...` });\n\n // Build pipeline options\n const pipelineOptions: any = {};\n\n // Only set language for multilingual models\n if (language && this.modelConfig.multilingual) {\n pipelineOptions.language = language;\n pipelineOptions.task = \"transcribe\";\n }\n\n // Enable timestamps if requested\n if (timestamps) {\n pipelineOptions.return_timestamps = true;\n }\n\n // Run transcription\n const result = await this.pipeline(audioData, pipelineOptions);\n\n const totalTime = performance.now() - startTime;\n\n // Build result\n let text = result.text?.trim() || \"\";\n\n // Filter out Whisper artifacts\n if (text === \"[BLANK_AUDIO]\" || text === \"(blank audio)\" || text === \"[BLANK AUDIO]\") {\n text = \"\";\n }\n\n const transcribeResult: TranscribeResult = {\n text,\n language: language || (this.modelConfig.multilingual ? \"auto\" : \"en\"),\n duration: audioDuration,\n totalTime,\n };\n\n // Add segments if timestamps were requested\n if (timestamps && result.chunks) {\n transcribeResult.segments = result.chunks.map(\n (chunk: any): TranscribeSegment => ({\n text: chunk.text?.trim() || \"\",\n start: chunk.timestamp?.[0] || 0,\n end: chunk.timestamp?.[1] || 0,\n }),\n );\n }\n\n onProgress?.({ status: \"Done!\" });\n\n return transcribeResult;\n }\n\n /**\n * Create a streaming transcription session\n *\n * Transcribes audio in real-time by processing chunks at regular intervals.\n * Perfect for live captioning, call transcription, or real-time subtitles.\n *\n * @param options - Streaming options\n * @returns Streaming session controller\n *\n * @example\n * ```ts\n * const session = stt.createStreamingSession({\n * chunkDuration: 3000, // Transcribe every 3 seconds\n * onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),\n * onTranscript: (fullText) => console.log(\"Full:\", fullText),\n * });\n *\n * // Feed audio data as it comes in (Float32Array at 16kHz)\n * session.feedAudio(audioChunk);\n *\n * // Or manually trigger transcription\n * await session.flush();\n *\n * // Stop and get final transcript\n * const finalText = await session.stop();\n * ```\n */\n createStreamingSession(\n options: StreamingTranscriptionOptions = {},\n ): StreamingTranscriptionSession {\n const {\n chunkDuration = 3000,\n minChunkSize = 8000, // ~0.5 seconds at 16kHz\n onChunk,\n onTranscript,\n onError,\n language,\n } = options;\n\n let audioBuffer: Float32Array[] = [];\n let fullTranscript = \"\";\n let chunkIndex = 0;\n let intervalId: ReturnType<typeof setInterval> | null = null;\n let isRunning = false;\n\n const getBufferSize = (): number => {\n return audioBuffer.reduce((sum, chunk) => sum + chunk.length, 0);\n };\n\n const mergeBuffer = (): Float32Array => {\n const totalLength = getBufferSize();\n const merged = new Float32Array(totalLength);\n let offset = 0;\n for (const chunk of audioBuffer) {\n merged.set(chunk, offset);\n offset += chunk.length;\n }\n return merged;\n };\n\n const transcribeBuffer = async (): Promise<string> => {\n if (!this._isLoaded || getBufferSize() < minChunkSize) {\n return \"\";\n }\n\n const audio = mergeBuffer();\n audioBuffer = []; // Clear buffer\n\n try {\n const result = await this.transcribe(audio, { language });\n const text = result.text.trim();\n\n if (text) {\n chunkIndex++;\n onChunk?.(text, chunkIndex);\n\n // Append to full transcript\n fullTranscript = fullTranscript + (fullTranscript ? \" \" : \"\") + text;\n onTranscript?.(fullTranscript);\n }\n\n return text;\n } catch (e: any) {\n onError?.(e.message || \"Transcription failed\");\n return \"\";\n }\n };\n\n let aborted = false;\n\n const session: StreamingTranscriptionSession = {\n feedAudio: (audio: Float32Array) => {\n if (!aborted) {\n audioBuffer.push(audio);\n }\n },\n\n flush: async () => {\n if (aborted) return \"\";\n return transcribeBuffer();\n },\n\n start: () => {\n if (isRunning || aborted) return;\n isRunning = true;\n\n intervalId = setInterval(async () => {\n if (isRunning && !aborted) {\n await transcribeBuffer();\n }\n }, chunkDuration);\n },\n\n stop: async () => {\n isRunning = false;\n\n if (intervalId) {\n clearInterval(intervalId);\n intervalId = null;\n }\n\n // Transcribe any remaining audio (unless aborted)\n if (!aborted && getBufferSize() >= minChunkSize) {\n await transcribeBuffer();\n }\n\n return fullTranscript;\n },\n\n abort: () => {\n // Immediately stop without final transcription\n aborted = true;\n isRunning = false;\n\n if (intervalId) {\n clearInterval(intervalId);\n intervalId = null;\n }\n\n audioBuffer = [];\n },\n\n isRunning: () => isRunning,\n\n getTranscript: () => fullTranscript,\n\n getChunkCount: () => chunkIndex,\n\n reset: () => {\n audioBuffer = [];\n fullTranscript = \"\";\n chunkIndex = 0;\n },\n };\n\n return session;\n }\n\n /**\n * Dispose of resources\n */\n dispose(): void {\n this.pipeline = null;\n this._isLoaded = false;\n this.loadPromise = null;\n }\n}\n"],"mappings":";;;;;AA0CA,MAAaA,iBAAmC;CAC9C;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GACT;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACD;EACD,YAAY;EACb;CACF;AAGD,MAAM,gBAAgB;;;;;AAUtB,SAAgB,UAAU,QAAiE;CACzF,MAAM,OAAO,IAAI,SAAS,OAAO,QAAQ,OAAO,YAAY,OAAO,WAAW;AAI9E,KADa,OAAO,aAAa,OAAO,IAAI,OAAO,IAAI,OAAO,IAAI,OAAO,GAAG,KAC/D,OACX,OAAM,IAAI,MAAM,wCAAwC;CAI1D,MAAM,cAAc,KAAK,UAAU,IAAI,KAAK;CAC5C,MAAM,aAAa,KAAK,UAAU,IAAI,KAAK;CAC3C,MAAM,gBAAgB,KAAK,UAAU,IAAI,KAAK;AAE9C,KAAI,kBAAkB,GACpB,OAAM,IAAI,MAAM,0BAA0B,cAAc,iCAAiC;CAI3F,IAAI,aAAa;AACjB,QAAO,aAAa,OAAO,SAAS,GAAG;EACrC,MAAM,UAAU,OAAO,aACrB,OAAO,aACP,OAAO,aAAa,IACpB,OAAO,aAAa,IACpB,OAAO,aAAa,GACrB;EACD,MAAM,YAAY,KAAK,UAAU,aAAa,GAAG,KAAK;AACtD,MAAI,YAAY,QAAQ;AACtB,iBAAc;AACd;;AAEF,gBAAc,IAAI;;CAGpB,MAAM,WAAW,OAAO,SAAS;CACjC,MAAM,iBAAiB,gBAAgB;CACvC,MAAM,eAAe,KAAK,MAAM,WAAW,eAAe;CAC1D,MAAM,oBAAoB,KAAK,MAAM,eAAe,YAAY;CAGhE,MAAM,QAAQ,IAAI,aAAa,kBAAkB;AAEjD,MAAK,IAAI,IAAI,GAAG,IAAI,mBAAmB,IACrC,KAAI,gBAAgB,EAGlB,OAAM,MAFO,KAAK,SAAS,aAAa,IAAI,GAAG,KAAK,GACtC,KAAK,SAAS,aAAa,IAAI,IAAI,GAAG,KAAK,IAC7B,IAAI;KAGhC,OAAM,KADS,KAAK,SAAS,aAAa,IAAI,GAAG,KAAK,GAClC;AAIxB,QAAO;EAAE;EAAO;EAAY;;;;;AAM9B,SAAgB,cAAc,OAAqB,UAAkB,QAA8B;AACjG,KAAI,aAAa,OAAQ,QAAO;CAEhC,MAAM,QAAQ,SAAS;CACvB,MAAM,YAAY,KAAK,MAAM,MAAM,SAAS,MAAM;CAClD,MAAM,SAAS,IAAI,aAAa,UAAU;AAE1C,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,KAAK;EAClC,MAAM,WAAW,IAAI;EACrB,MAAM,QAAQ,KAAK,MAAM,SAAS;EAClC,MAAM,OAAO,KAAK,IAAI,QAAQ,GAAG,MAAM,SAAS,EAAE;EAClD,MAAM,IAAI,WAAW;AACrB,SAAO,KAAK,MAAM,UAAU,IAAI,KAAK,MAAM,QAAQ;;AAGrD,QAAO;;;;;AAUT,IAAa,aAAb,MAAwB;CACtB,AAAQ;CACR,AAAQ,WAAgB;CACxB,AAAQ,cAAoC;CAC5C,AAAQ,YAAY;CACpB,AAAQ,cAAgC;CAExC,YAAY,UAAkB,eAAe;EAC3C,MAAM,SAAS,eAAe,MAAM,MAAM,EAAE,OAAO,QAAQ;AAC3D,MAAI,CAAC,QAAQ;GACX,MAAM,YAAY,eAAe,KAAK,MAAM,EAAE,GAAG,CAAC,KAAK,KAAK;AAC5D,SAAM,IAAI,MAAM,sBAAsB,QAAQ,eAAe,YAAY;;AAE3E,OAAK,cAAc;;;;;CAMrB,WAAoB;AAClB,SAAO,KAAK;;;;;CAMd,iBAAiC;AAC/B,SAAO,KAAK;;;;;CAMd,eAA+B;AAC7B,SAAO,KAAK;;;;;CAMd,gBAAkC;AAChC,SAAO,KAAK;;;;;CAMd,OAAO,aAA+B;AACpC,SAAO,CAAC,GAAG,eAAe;;;;;CAM5B,MAAM,KAAK,UAA0B,EAAE,EAAiB;AACtD,MAAI,KAAK,UAAW;AACpB,MAAI,KAAK,aAAa;AACpB,SAAM,KAAK;AACX;;AAGF,OAAK,cAAc,KAAK,MAAM,QAAQ;AACtC,QAAM,KAAK;;CAGb,MAAc,MAAM,UAA0B,EAAE,EAAiB;EAC/D,MAAM,EAAE,YAAY,SAAS,WAAW;AAExC,eAAa,EAAE,QAAQ,8BAA8B,CAAC;EAGtD,MAAM,SAAS,OAAO,YAAY,eAAe,QAAQ,UAAU;EAInE,MAAM,EAAE,UAAU,QAAQ,MAAM,OAAO;AAGvC,MAAI,QAAQ;AAEV,OAAI,mBAAmB;AACvB,OAAI,oBAAoB;SACnB;AAEL,OAAI,kBAAkB;AACtB,OAAI,mBAAmB;;EAMzB,IAAIC,WAAsC;AAG1C,MAAI,CAAC,OACH,YAAW;AAIb,OAAK,cAAc;AAEnB,eAAa,EAAE,QAAQ,WAAW,KAAK,YAAY,GAAG,MAAM,CAAC;AAI7D,OAAK,WAAW,MAAM,SAAS,gCAAgC,KAAK,YAAY,MAAM;GACpF,OAAO;GACP,QAAQ;GACR,oBAAoB,aAAkB;AACpC,QAAI,SAAS,WAAW,cAAc,SAAS,KAC7C,cAAa;KACX,QAAQ,eAAe,SAAS;KAChC,UAAU,KAAK,MAAM,SAAS,YAAY,EAAE;KAC5C,MAAM,SAAS;KAChB,CAAC;;GAGP,CAAC;AAEF,OAAK,YAAY;AACjB,eAAa,EAAE,QAAQ,UAAU,SAAS,aAAa,CAAC,KAAK,CAAC;;;;;;;;;CAUhE,MAAM,WACJ,OACA,UAA6B,EAAE,EACJ;AAC3B,MAAI,CAAC,KAAK,UACR,OAAM,IAAI,MAAM,2CAA2C;EAG7D,MAAM,EAAE,UAAU,aAAa,OAAO,eAAe;EACrD,MAAM,YAAY,YAAY,KAAK;EAGnC,IAAIC;EACJ,IAAI,kBAAkB;AAEtB,MAAI,iBAAiB,YAAY;AAC/B,gBAAa,EAAE,QAAQ,qBAAqB,CAAC;GAC7C,MAAM,UAAU,UAAU,MAAM;AAChC,eAAY,QAAQ;AACpB,qBAAkB,QAAQ;QAE1B,aAAY;AAId,MAAI,oBAAoB,MAAO;AAC7B,gBAAa,EAAE,QAAQ,0BAA0B,CAAC;AAClD,eAAY,cAAc,WAAW,iBAAiB,KAAM;;EAG9D,MAAM,gBAAgB,UAAU,SAAS;AACzC,eAAa,EAAE,QAAQ,gBAAgB,cAAc,QAAQ,EAAE,CAAC,gBAAgB,CAAC;EAGjF,MAAMC,kBAAuB,EAAE;AAG/B,MAAI,YAAY,KAAK,YAAY,cAAc;AAC7C,mBAAgB,WAAW;AAC3B,mBAAgB,OAAO;;AAIzB,MAAI,WACF,iBAAgB,oBAAoB;EAItC,MAAM,SAAS,MAAM,KAAK,SAAS,WAAW,gBAAgB;EAE9D,MAAM,YAAY,YAAY,KAAK,GAAG;EAGtC,IAAI,OAAO,OAAO,MAAM,MAAM,IAAI;AAGlC,MAAI,SAAS,mBAAmB,SAAS,mBAAmB,SAAS,gBACnE,QAAO;EAGT,MAAMC,mBAAqC;GACzC;GACA,UAAU,aAAa,KAAK,YAAY,eAAe,SAAS;GAChE,UAAU;GACV;GACD;AAGD,MAAI,cAAc,OAAO,OACvB,kBAAiB,WAAW,OAAO,OAAO,KACvC,WAAmC;GAClC,MAAM,MAAM,MAAM,MAAM,IAAI;GAC5B,OAAO,MAAM,YAAY,MAAM;GAC/B,KAAK,MAAM,YAAY,MAAM;GAC9B,EACF;AAGH,eAAa,EAAE,QAAQ,SAAS,CAAC;AAEjC,SAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8BT,uBACE,UAAyC,EAAE,EACZ;EAC/B,MAAM,EACJ,gBAAgB,KAChB,eAAe,KACf,SACA,cACA,SACA,aACE;EAEJ,IAAIC,cAA8B,EAAE;EACpC,IAAI,iBAAiB;EACrB,IAAI,aAAa;EACjB,IAAIC,aAAoD;EACxD,IAAI,YAAY;EAEhB,MAAM,sBAA8B;AAClC,UAAO,YAAY,QAAQ,KAAK,UAAU,MAAM,MAAM,QAAQ,EAAE;;EAGlE,MAAM,oBAAkC;GACtC,MAAM,cAAc,eAAe;GACnC,MAAM,SAAS,IAAI,aAAa,YAAY;GAC5C,IAAI,SAAS;AACb,QAAK,MAAM,SAAS,aAAa;AAC/B,WAAO,IAAI,OAAO,OAAO;AACzB,cAAU,MAAM;;AAElB,UAAO;;EAGT,MAAM,mBAAmB,YAA6B;AACpD,OAAI,CAAC,KAAK,aAAa,eAAe,GAAG,aACvC,QAAO;GAGT,MAAM,QAAQ,aAAa;AAC3B,iBAAc,EAAE;AAEhB,OAAI;IAEF,MAAM,QADS,MAAM,KAAK,WAAW,OAAO,EAAE,UAAU,CAAC,EACrC,KAAK,MAAM;AAE/B,QAAI,MAAM;AACR;AACA,eAAU,MAAM,WAAW;AAG3B,sBAAiB,kBAAkB,iBAAiB,MAAM,MAAM;AAChE,oBAAe,eAAe;;AAGhC,WAAO;YACAC,GAAQ;AACf,cAAU,EAAE,WAAW,uBAAuB;AAC9C,WAAO;;;EAIX,IAAI,UAAU;AAmEd,SAjE+C;GAC7C,YAAY,UAAwB;AAClC,QAAI,CAAC,QACH,aAAY,KAAK,MAAM;;GAI3B,OAAO,YAAY;AACjB,QAAI,QAAS,QAAO;AACpB,WAAO,kBAAkB;;GAG3B,aAAa;AACX,QAAI,aAAa,QAAS;AAC1B,gBAAY;AAEZ,iBAAa,YAAY,YAAY;AACnC,SAAI,aAAa,CAAC,QAChB,OAAM,kBAAkB;OAEzB,cAAc;;GAGnB,MAAM,YAAY;AAChB,gBAAY;AAEZ,QAAI,YAAY;AACd,mBAAc,WAAW;AACzB,kBAAa;;AAIf,QAAI,CAAC,WAAW,eAAe,IAAI,aACjC,OAAM,kBAAkB;AAG1B,WAAO;;GAGT,aAAa;AAEX,cAAU;AACV,gBAAY;AAEZ,QAAI,YAAY;AACd,mBAAc,WAAW;AACzB,kBAAa;;AAGf,kBAAc,EAAE;;GAGlB,iBAAiB;GAEjB,qBAAqB;GAErB,qBAAqB;GAErB,aAAa;AACX,kBAAc,EAAE;AAChB,qBAAiB;AACjB,iBAAa;;GAEhB;;;;;CAQH,UAAgB;AACd,OAAK,WAAW;AAChB,OAAK,YAAY;AACjB,OAAK,cAAc"}
|
|
1
|
+
{"version":3,"file":"stt-Bu-E23Sc.js","names":["WHISPER_MODELS: STTModelConfig[]","tfDevice: \"webgpu\" | \"cpu\" | \"wasm\"","audioData: Float32Array","pipelineOptions: any","transcribeResult: TranscribeResult","audioBuffer: Float32Array[]","intervalId: ReturnType<typeof setInterval> | null","e: any"],"sources":["../src/core/stt.ts"],"sourcesContent":["/**\n * Speech-to-Text with Whisper\n *\n * Provides local speech recognition using Whisper ONNX models via transformers.js.\n * Supports multiple model sizes and languages.\n *\n * @example\n * ```ts\n * const stt = new WhisperSTT();\n * await stt.load({ onProgress: (p) => console.log(p.status) });\n *\n * // Transcribe audio (Float32Array at 16kHz)\n * const result = await stt.transcribe(audioData);\n * console.log(result.text);\n *\n * // With timestamps\n * const result = await stt.transcribe(audioData, { timestamps: true });\n * for (const seg of result.segments) {\n * console.log(`[${seg.start.toFixed(1)}s] ${seg.text}`);\n * }\n * ```\n */\n\nimport type {\n LoadSTTOptions,\n ProgressInfo,\n STTModelConfig,\n StreamingTranscriptionOptions,\n StreamingTranscriptionSession,\n TranscribeOptions,\n TranscribeResult,\n TranscribeSegment,\n} from \"./types.js\";\n\n// ============================================\n// Model Registry\n// ============================================\n\n/**\n * Available Whisper models\n * Ordered by size (smallest first)\n */\nexport const WHISPER_MODELS: STTModelConfig[] = [\n {\n id: \"whisper-tiny.en\",\n repo: \"onnx-community/whisper-tiny.en\",\n description: \"Tiny English-only model, fastest\",\n size: \"39M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-tiny\",\n repo: \"onnx-community/whisper-tiny\",\n description: \"Tiny multilingual model\",\n size: \"39M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-base.en\",\n repo: \"onnx-community/whisper-base.en\",\n description: \"Base English-only model, good balance\",\n size: \"74M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-base\",\n repo: \"onnx-community/whisper-base\",\n description: \"Base multilingual model\",\n size: \"74M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-small.en\",\n repo: \"onnx-community/whisper-small.en\",\n description: \"Small English-only model, high quality\",\n size: \"244M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-small\",\n repo: \"onnx-community/whisper-small\",\n description: \"Small multilingual model\",\n size: \"244M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-large-v3-turbo\",\n repo: \"onnx-community/whisper-large-v3-turbo\",\n description: \"Large Turbo model, 5.4x faster, 80+ languages\",\n size: \"809M\",\n multilingual: true,\n languages: [\n \"en\",\n \"es\",\n \"fr\",\n \"de\",\n \"it\",\n \"pt\",\n \"nl\",\n \"ru\",\n \"zh\",\n \"ja\",\n \"ko\",\n \"ar\",\n \"hi\",\n \"vi\",\n \"th\",\n ],\n sampleRate: 16000,\n },\n];\n\n// Default model\nconst DEFAULT_MODEL = \"whisper-tiny.en\";\n\n// ============================================\n// Audio Utilities\n// ============================================\n\n/**\n * Decode WAV file to Float32Array\n * Handles stereo to mono conversion\n */\nexport function decodeWav(buffer: Uint8Array): { audio: Float32Array; sampleRate: number } {\n const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);\n\n // Validate RIFF header\n const riff = String.fromCharCode(buffer[0], buffer[1], buffer[2], buffer[3]);\n if (riff !== \"RIFF\") {\n throw new Error(\"Invalid WAV file: missing RIFF header\");\n }\n\n // Get format details\n const numChannels = view.getUint16(22, true);\n const sampleRate = view.getUint32(24, true);\n const bitsPerSample = view.getUint16(34, true);\n\n if (bitsPerSample !== 16) {\n throw new Error(`Unsupported bit depth: ${bitsPerSample}. Only 16-bit WAV is supported.`);\n }\n\n // Find data chunk\n let dataOffset = 12;\n while (dataOffset < buffer.length - 8) {\n const chunkId = String.fromCharCode(\n buffer[dataOffset],\n buffer[dataOffset + 1],\n buffer[dataOffset + 2],\n buffer[dataOffset + 3],\n );\n const chunkSize = view.getUint32(dataOffset + 4, true);\n if (chunkId === \"data\") {\n dataOffset += 8;\n break;\n }\n dataOffset += 8 + chunkSize;\n }\n\n const dataSize = buffer.length - dataOffset;\n const bytesPerSample = bitsPerSample / 8;\n const totalSamples = Math.floor(dataSize / bytesPerSample);\n const samplesPerChannel = Math.floor(totalSamples / numChannels);\n\n // Convert to mono Float32Array\n const audio = new Float32Array(samplesPerChannel);\n\n for (let i = 0; i < samplesPerChannel; i++) {\n if (numChannels === 2) {\n const left = view.getInt16(dataOffset + i * 4, true);\n const right = view.getInt16(dataOffset + i * 4 + 2, true);\n audio[i] = (left + right) / 2 / 32768;\n } else {\n const sample = view.getInt16(dataOffset + i * 2, true);\n audio[i] = sample / 32768;\n }\n }\n\n return { audio, sampleRate };\n}\n\n/**\n * Resample audio to target sample rate using linear interpolation\n */\nexport function resampleAudio(audio: Float32Array, fromRate: number, toRate: number): Float32Array {\n if (fromRate === toRate) return audio;\n\n const ratio = toRate / fromRate;\n const newLength = Math.round(audio.length * ratio);\n const result = new Float32Array(newLength);\n\n for (let i = 0; i < newLength; i++) {\n const srcIndex = i / ratio;\n const floor = Math.floor(srcIndex);\n const ceil = Math.min(floor + 1, audio.length - 1);\n const t = srcIndex - floor;\n result[i] = audio[floor] * (1 - t) + audio[ceil] * t;\n }\n\n return result;\n}\n\n// ============================================\n// WhisperSTT Class\n// ============================================\n\n/**\n * Speech-to-Text using Whisper ONNX models\n */\nexport class WhisperSTT {\n private modelConfig: STTModelConfig;\n private pipeline: any = null;\n private loadPromise: Promise<void> | null = null;\n private _isLoaded = false;\n private _deviceMode: \"webgpu\" | \"cpu\" = \"cpu\";\n\n constructor(modelId: string = DEFAULT_MODEL) {\n const config = WHISPER_MODELS.find((m) => m.id === modelId);\n if (!config) {\n const available = WHISPER_MODELS.map((m) => m.id).join(\", \");\n throw new Error(`Unknown STT model: ${modelId}. Available: ${available}`);\n }\n this.modelConfig = config;\n }\n\n /**\n * Check if model is loaded\n */\n isLoaded(): boolean {\n return this._isLoaded;\n }\n\n /**\n * Get model configuration\n */\n getModelConfig(): STTModelConfig {\n return this.modelConfig;\n }\n\n /**\n * Get model info (alias for getModelConfig)\n */\n getModelInfo(): STTModelConfig {\n return this.modelConfig;\n }\n\n /**\n * Get current device mode\n */\n getDeviceMode(): \"webgpu\" | \"cpu\" {\n return this._deviceMode;\n }\n\n /**\n * List available models\n */\n static listModels(): STTModelConfig[] {\n return [...WHISPER_MODELS];\n }\n\n /**\n * Load the STT model\n */\n async load(options: LoadSTTOptions = {}): Promise<void> {\n if (this._isLoaded) return;\n if (this.loadPromise) {\n await this.loadPromise;\n return;\n }\n\n this.loadPromise = this._load(options);\n await this.loadPromise;\n }\n\n private async _load(options: LoadSTTOptions = {}): Promise<void> {\n const { onProgress, device = \"auto\" } = options;\n\n onProgress?.({ status: \"Loading transformers.js...\" });\n\n // Check if we're in Node.js or browser\n const isNode = typeof process !== \"undefined\" && process.versions?.node;\n\n // Import transformers.js dynamically\n // tsdown handles resolution: Node.js = external, Browser = bundled\n const { pipeline, env } = await import(\"@huggingface/transformers\");\n\n // Configure environment based on runtime\n if (isNode) {\n // Node.js: allow local models (for CLI/server use)\n env.allowLocalModels = true;\n env.allowRemoteModels = true;\n } else {\n // Browser: use IndexedDB cache, fetch from HuggingFace CDN\n env.useBrowserCache = true;\n env.allowLocalModels = false;\n }\n\n // Determine device\n // Note: Whisper ONNX models work best with fp32 on CPU/WASM\n // WebGPU support for ASR is limited, so we use CPU for reliability\n let tfDevice: \"webgpu\" | \"cpu\" | \"wasm\" = \"cpu\";\n\n // In browser, use WASM for better compatibility\n if (!isNode) {\n tfDevice = \"wasm\";\n }\n\n // Store device mode\n this._deviceMode = \"cpu\"; // STT always reports as CPU since WASM is CPU-based\n\n onProgress?.({ status: `Loading ${this.modelConfig.id}...` });\n\n // Load the ASR pipeline\n // Always use fp32 for Whisper models (fp16 not available for ONNX ASR)\n this.pipeline = await pipeline(\"automatic-speech-recognition\", this.modelConfig.repo, {\n dtype: \"fp32\",\n device: tfDevice,\n progress_callback: (progress: any) => {\n if (progress.status === \"progress\" && progress.file) {\n onProgress?.({\n status: `Downloading ${progress.file}`,\n progress: Math.round(progress.progress || 0),\n file: progress.file,\n });\n }\n },\n });\n\n this._isLoaded = true;\n onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });\n }\n\n /**\n * Transcribe audio to text\n *\n * @param audio - Audio data as Float32Array (mono, 16kHz preferred) or Uint8Array (WAV file)\n * @param options - Transcription options\n * @returns Transcription result with text and optional timestamps\n */\n async transcribe(\n audio: Float32Array | Uint8Array,\n options: TranscribeOptions = {},\n ): Promise<TranscribeResult> {\n if (!this._isLoaded) {\n throw new Error(\"STT model not loaded. Call load() first.\");\n }\n\n const { language, timestamps = false, onProgress } = options;\n const startTime = performance.now();\n\n // Convert Uint8Array (WAV) to Float32Array\n let audioData: Float32Array;\n let inputSampleRate = 16000;\n\n if (audio instanceof Uint8Array) {\n onProgress?.({ status: \"Decoding audio...\" });\n const decoded = decodeWav(audio);\n audioData = decoded.audio;\n inputSampleRate = decoded.sampleRate;\n } else {\n audioData = audio;\n }\n\n // Resample to 16kHz if needed\n if (inputSampleRate !== 16000) {\n onProgress?.({ status: \"Resampling to 16kHz...\" });\n audioData = resampleAudio(audioData, inputSampleRate, 16000);\n }\n\n const audioDuration = audioData.length / 16000;\n onProgress?.({ status: `Transcribing ${audioDuration.toFixed(1)}s of audio...` });\n\n // Build pipeline options\n const pipelineOptions: any = {};\n\n // Only set language for multilingual models\n if (language && this.modelConfig.multilingual) {\n pipelineOptions.language = language;\n pipelineOptions.task = \"transcribe\";\n }\n\n // Enable timestamps if requested\n if (timestamps) {\n pipelineOptions.return_timestamps = true;\n }\n\n // Run transcription\n const result = await this.pipeline(audioData, pipelineOptions);\n\n const totalTime = performance.now() - startTime;\n\n // Build result\n let text = result.text?.trim() || \"\";\n\n // Filter out Whisper artifacts\n if (text === \"[BLANK_AUDIO]\" || text === \"(blank audio)\" || text === \"[BLANK AUDIO]\") {\n text = \"\";\n }\n\n const transcribeResult: TranscribeResult = {\n text,\n language: language || (this.modelConfig.multilingual ? \"auto\" : \"en\"),\n duration: audioDuration,\n totalTime,\n };\n\n // Add segments if timestamps were requested\n if (timestamps && result.chunks) {\n transcribeResult.segments = result.chunks.map(\n (chunk: any): TranscribeSegment => ({\n text: chunk.text?.trim() || \"\",\n start: chunk.timestamp?.[0] || 0,\n end: chunk.timestamp?.[1] || 0,\n }),\n );\n }\n\n onProgress?.({ status: \"Done!\" });\n\n return transcribeResult;\n }\n\n /**\n * Create a streaming transcription session\n *\n * Transcribes audio in real-time by processing chunks at regular intervals.\n * Perfect for live captioning, call transcription, or real-time subtitles.\n *\n * @param options - Streaming options\n * @returns Streaming session controller\n *\n * @example\n * ```ts\n * const session = stt.createStreamingSession({\n * chunkDuration: 3000, // Transcribe every 3 seconds\n * onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),\n * onTranscript: (fullText) => console.log(\"Full:\", fullText),\n * });\n *\n * // Feed audio data as it comes in (Float32Array at 16kHz)\n * session.feedAudio(audioChunk);\n *\n * // Or manually trigger transcription\n * await session.flush();\n *\n * // Stop and get final transcript\n * const finalText = await session.stop();\n * ```\n */\n createStreamingSession(\n options: StreamingTranscriptionOptions = {},\n ): StreamingTranscriptionSession {\n const {\n chunkDuration = 3000,\n minChunkSize = 8000, // ~0.5 seconds at 16kHz\n onChunk,\n onTranscript,\n onError,\n language,\n } = options;\n\n let audioBuffer: Float32Array[] = [];\n let fullTranscript = \"\";\n let chunkIndex = 0;\n let intervalId: ReturnType<typeof setInterval> | null = null;\n let isRunning = false;\n\n const getBufferSize = (): number => {\n return audioBuffer.reduce((sum, chunk) => sum + chunk.length, 0);\n };\n\n const mergeBuffer = (): Float32Array => {\n const totalLength = getBufferSize();\n const merged = new Float32Array(totalLength);\n let offset = 0;\n for (const chunk of audioBuffer) {\n merged.set(chunk, offset);\n offset += chunk.length;\n }\n return merged;\n };\n\n const transcribeBuffer = async (): Promise<string> => {\n if (!this._isLoaded || getBufferSize() < minChunkSize) {\n return \"\";\n }\n\n const audio = mergeBuffer();\n audioBuffer = []; // Clear buffer\n\n try {\n const result = await this.transcribe(audio, { language });\n const text = result.text.trim();\n\n if (text) {\n chunkIndex++;\n onChunk?.(text, chunkIndex);\n\n // Append to full transcript\n fullTranscript = fullTranscript + (fullTranscript ? \" \" : \"\") + text;\n onTranscript?.(fullTranscript);\n }\n\n return text;\n } catch (e: any) {\n onError?.(e.message || \"Transcription failed\");\n return \"\";\n }\n };\n\n let aborted = false;\n\n const session: StreamingTranscriptionSession = {\n feedAudio: (audio: Float32Array) => {\n if (!aborted) {\n audioBuffer.push(audio);\n }\n },\n\n flush: async () => {\n if (aborted) return \"\";\n return transcribeBuffer();\n },\n\n start: () => {\n if (isRunning || aborted) return;\n isRunning = true;\n\n intervalId = setInterval(async () => {\n if (isRunning && !aborted) {\n await transcribeBuffer();\n }\n }, chunkDuration);\n },\n\n stop: async () => {\n isRunning = false;\n\n if (intervalId) {\n clearInterval(intervalId);\n intervalId = null;\n }\n\n // Transcribe any remaining audio (unless aborted)\n if (!aborted && getBufferSize() >= minChunkSize) {\n await transcribeBuffer();\n }\n\n return fullTranscript;\n },\n\n abort: () => {\n // Immediately stop without final transcription\n aborted = true;\n isRunning = false;\n\n if (intervalId) {\n clearInterval(intervalId);\n intervalId = null;\n }\n\n audioBuffer = [];\n },\n\n isRunning: () => isRunning,\n\n getTranscript: () => fullTranscript,\n\n getChunkCount: () => chunkIndex,\n\n reset: () => {\n audioBuffer = [];\n fullTranscript = \"\";\n chunkIndex = 0;\n },\n };\n\n return session;\n }\n\n /**\n * Dispose of resources\n */\n dispose(): void {\n this.pipeline = null;\n this._isLoaded = false;\n this.loadPromise = null;\n }\n}\n"],"mappings":";;;;;AA0CA,MAAaA,iBAAmC;CAC9C;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GACT;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACD;EACD,YAAY;EACb;CACF;AAGD,MAAM,gBAAgB;;;;;AAUtB,SAAgB,UAAU,QAAiE;CACzF,MAAM,OAAO,IAAI,SAAS,OAAO,QAAQ,OAAO,YAAY,OAAO,WAAW;AAI9E,KADa,OAAO,aAAa,OAAO,IAAI,OAAO,IAAI,OAAO,IAAI,OAAO,GAAG,KAC/D,OACX,OAAM,IAAI,MAAM,wCAAwC;CAI1D,MAAM,cAAc,KAAK,UAAU,IAAI,KAAK;CAC5C,MAAM,aAAa,KAAK,UAAU,IAAI,KAAK;CAC3C,MAAM,gBAAgB,KAAK,UAAU,IAAI,KAAK;AAE9C,KAAI,kBAAkB,GACpB,OAAM,IAAI,MAAM,0BAA0B,cAAc,iCAAiC;CAI3F,IAAI,aAAa;AACjB,QAAO,aAAa,OAAO,SAAS,GAAG;EACrC,MAAM,UAAU,OAAO,aACrB,OAAO,aACP,OAAO,aAAa,IACpB,OAAO,aAAa,IACpB,OAAO,aAAa,GACrB;EACD,MAAM,YAAY,KAAK,UAAU,aAAa,GAAG,KAAK;AACtD,MAAI,YAAY,QAAQ;AACtB,iBAAc;AACd;;AAEF,gBAAc,IAAI;;CAGpB,MAAM,WAAW,OAAO,SAAS;CACjC,MAAM,iBAAiB,gBAAgB;CACvC,MAAM,eAAe,KAAK,MAAM,WAAW,eAAe;CAC1D,MAAM,oBAAoB,KAAK,MAAM,eAAe,YAAY;CAGhE,MAAM,QAAQ,IAAI,aAAa,kBAAkB;AAEjD,MAAK,IAAI,IAAI,GAAG,IAAI,mBAAmB,IACrC,KAAI,gBAAgB,EAGlB,OAAM,MAFO,KAAK,SAAS,aAAa,IAAI,GAAG,KAAK,GACtC,KAAK,SAAS,aAAa,IAAI,IAAI,GAAG,KAAK,IAC7B,IAAI;KAGhC,OAAM,KADS,KAAK,SAAS,aAAa,IAAI,GAAG,KAAK,GAClC;AAIxB,QAAO;EAAE;EAAO;EAAY;;;;;AAM9B,SAAgB,cAAc,OAAqB,UAAkB,QAA8B;AACjG,KAAI,aAAa,OAAQ,QAAO;CAEhC,MAAM,QAAQ,SAAS;CACvB,MAAM,YAAY,KAAK,MAAM,MAAM,SAAS,MAAM;CAClD,MAAM,SAAS,IAAI,aAAa,UAAU;AAE1C,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,KAAK;EAClC,MAAM,WAAW,IAAI;EACrB,MAAM,QAAQ,KAAK,MAAM,SAAS;EAClC,MAAM,OAAO,KAAK,IAAI,QAAQ,GAAG,MAAM,SAAS,EAAE;EAClD,MAAM,IAAI,WAAW;AACrB,SAAO,KAAK,MAAM,UAAU,IAAI,KAAK,MAAM,QAAQ;;AAGrD,QAAO;;;;;AAUT,IAAa,aAAb,MAAwB;CACtB,AAAQ;CACR,AAAQ,WAAgB;CACxB,AAAQ,cAAoC;CAC5C,AAAQ,YAAY;CACpB,AAAQ,cAAgC;CAExC,YAAY,UAAkB,eAAe;EAC3C,MAAM,SAAS,eAAe,MAAM,MAAM,EAAE,OAAO,QAAQ;AAC3D,MAAI,CAAC,QAAQ;GACX,MAAM,YAAY,eAAe,KAAK,MAAM,EAAE,GAAG,CAAC,KAAK,KAAK;AAC5D,SAAM,IAAI,MAAM,sBAAsB,QAAQ,eAAe,YAAY;;AAE3E,OAAK,cAAc;;;;;CAMrB,WAAoB;AAClB,SAAO,KAAK;;;;;CAMd,iBAAiC;AAC/B,SAAO,KAAK;;;;;CAMd,eAA+B;AAC7B,SAAO,KAAK;;;;;CAMd,gBAAkC;AAChC,SAAO,KAAK;;;;;CAMd,OAAO,aAA+B;AACpC,SAAO,CAAC,GAAG,eAAe;;;;;CAM5B,MAAM,KAAK,UAA0B,EAAE,EAAiB;AACtD,MAAI,KAAK,UAAW;AACpB,MAAI,KAAK,aAAa;AACpB,SAAM,KAAK;AACX;;AAGF,OAAK,cAAc,KAAK,MAAM,QAAQ;AACtC,QAAM,KAAK;;CAGb,MAAc,MAAM,UAA0B,EAAE,EAAiB;EAC/D,MAAM,EAAE,YAAY,SAAS,WAAW;AAExC,eAAa,EAAE,QAAQ,8BAA8B,CAAC;EAGtD,MAAM,SAAS,OAAO,YAAY,eAAe,QAAQ,UAAU;EAInE,MAAM,EAAE,UAAU,QAAQ,MAAM,OAAO;AAGvC,MAAI,QAAQ;AAEV,OAAI,mBAAmB;AACvB,OAAI,oBAAoB;SACnB;AAEL,OAAI,kBAAkB;AACtB,OAAI,mBAAmB;;EAMzB,IAAIC,WAAsC;AAG1C,MAAI,CAAC,OACH,YAAW;AAIb,OAAK,cAAc;AAEnB,eAAa,EAAE,QAAQ,WAAW,KAAK,YAAY,GAAG,MAAM,CAAC;AAI7D,OAAK,WAAW,MAAM,SAAS,gCAAgC,KAAK,YAAY,MAAM;GACpF,OAAO;GACP,QAAQ;GACR,oBAAoB,aAAkB;AACpC,QAAI,SAAS,WAAW,cAAc,SAAS,KAC7C,cAAa;KACX,QAAQ,eAAe,SAAS;KAChC,UAAU,KAAK,MAAM,SAAS,YAAY,EAAE;KAC5C,MAAM,SAAS;KAChB,CAAC;;GAGP,CAAC;AAEF,OAAK,YAAY;AACjB,eAAa,EAAE,QAAQ,UAAU,SAAS,aAAa,CAAC,KAAK,CAAC;;;;;;;;;CAUhE,MAAM,WACJ,OACA,UAA6B,EAAE,EACJ;AAC3B,MAAI,CAAC,KAAK,UACR,OAAM,IAAI,MAAM,2CAA2C;EAG7D,MAAM,EAAE,UAAU,aAAa,OAAO,eAAe;EACrD,MAAM,YAAY,YAAY,KAAK;EAGnC,IAAIC;EACJ,IAAI,kBAAkB;AAEtB,MAAI,iBAAiB,YAAY;AAC/B,gBAAa,EAAE,QAAQ,qBAAqB,CAAC;GAC7C,MAAM,UAAU,UAAU,MAAM;AAChC,eAAY,QAAQ;AACpB,qBAAkB,QAAQ;QAE1B,aAAY;AAId,MAAI,oBAAoB,MAAO;AAC7B,gBAAa,EAAE,QAAQ,0BAA0B,CAAC;AAClD,eAAY,cAAc,WAAW,iBAAiB,KAAM;;EAG9D,MAAM,gBAAgB,UAAU,SAAS;AACzC,eAAa,EAAE,QAAQ,gBAAgB,cAAc,QAAQ,EAAE,CAAC,gBAAgB,CAAC;EAGjF,MAAMC,kBAAuB,EAAE;AAG/B,MAAI,YAAY,KAAK,YAAY,cAAc;AAC7C,mBAAgB,WAAW;AAC3B,mBAAgB,OAAO;;AAIzB,MAAI,WACF,iBAAgB,oBAAoB;EAItC,MAAM,SAAS,MAAM,KAAK,SAAS,WAAW,gBAAgB;EAE9D,MAAM,YAAY,YAAY,KAAK,GAAG;EAGtC,IAAI,OAAO,OAAO,MAAM,MAAM,IAAI;AAGlC,MAAI,SAAS,mBAAmB,SAAS,mBAAmB,SAAS,gBACnE,QAAO;EAGT,MAAMC,mBAAqC;GACzC;GACA,UAAU,aAAa,KAAK,YAAY,eAAe,SAAS;GAChE,UAAU;GACV;GACD;AAGD,MAAI,cAAc,OAAO,OACvB,kBAAiB,WAAW,OAAO,OAAO,KACvC,WAAmC;GAClC,MAAM,MAAM,MAAM,MAAM,IAAI;GAC5B,OAAO,MAAM,YAAY,MAAM;GAC/B,KAAK,MAAM,YAAY,MAAM;GAC9B,EACF;AAGH,eAAa,EAAE,QAAQ,SAAS,CAAC;AAEjC,SAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8BT,uBACE,UAAyC,EAAE,EACZ;EAC/B,MAAM,EACJ,gBAAgB,KAChB,eAAe,KACf,SACA,cACA,SACA,aACE;EAEJ,IAAIC,cAA8B,EAAE;EACpC,IAAI,iBAAiB;EACrB,IAAI,aAAa;EACjB,IAAIC,aAAoD;EACxD,IAAI,YAAY;EAEhB,MAAM,sBAA8B;AAClC,UAAO,YAAY,QAAQ,KAAK,UAAU,MAAM,MAAM,QAAQ,EAAE;;EAGlE,MAAM,oBAAkC;GACtC,MAAM,cAAc,eAAe;GACnC,MAAM,SAAS,IAAI,aAAa,YAAY;GAC5C,IAAI,SAAS;AACb,QAAK,MAAM,SAAS,aAAa;AAC/B,WAAO,IAAI,OAAO,OAAO;AACzB,cAAU,MAAM;;AAElB,UAAO;;EAGT,MAAM,mBAAmB,YAA6B;AACpD,OAAI,CAAC,KAAK,aAAa,eAAe,GAAG,aACvC,QAAO;GAGT,MAAM,QAAQ,aAAa;AAC3B,iBAAc,EAAE;AAEhB,OAAI;IAEF,MAAM,QADS,MAAM,KAAK,WAAW,OAAO,EAAE,UAAU,CAAC,EACrC,KAAK,MAAM;AAE/B,QAAI,MAAM;AACR;AACA,eAAU,MAAM,WAAW;AAG3B,sBAAiB,kBAAkB,iBAAiB,MAAM,MAAM;AAChE,oBAAe,eAAe;;AAGhC,WAAO;YACAC,GAAQ;AACf,cAAU,EAAE,WAAW,uBAAuB;AAC9C,WAAO;;;EAIX,IAAI,UAAU;AAmEd,SAjE+C;GAC7C,YAAY,UAAwB;AAClC,QAAI,CAAC,QACH,aAAY,KAAK,MAAM;;GAI3B,OAAO,YAAY;AACjB,QAAI,QAAS,QAAO;AACpB,WAAO,kBAAkB;;GAG3B,aAAa;AACX,QAAI,aAAa,QAAS;AAC1B,gBAAY;AAEZ,iBAAa,YAAY,YAAY;AACnC,SAAI,aAAa,CAAC,QAChB,OAAM,kBAAkB;OAEzB,cAAc;;GAGnB,MAAM,YAAY;AAChB,gBAAY;AAEZ,QAAI,YAAY;AACd,mBAAc,WAAW;AACzB,kBAAa;;AAIf,QAAI,CAAC,WAAW,eAAe,IAAI,aACjC,OAAM,kBAAkB;AAG1B,WAAO;;GAGT,aAAa;AAEX,cAAU;AACV,gBAAY;AAEZ,QAAI,YAAY;AACd,mBAAc,WAAW;AACzB,kBAAa;;AAGf,kBAAc,EAAE;;GAGlB,iBAAiB;GAEjB,qBAAqB;GAErB,qBAAqB;GAErB,aAAa;AACX,kBAAc,EAAE;AAChB,qBAAiB;AACjB,iBAAa;;GAEhB;;;;;CAQH,UAAgB;AACd,OAAK,WAAW;AAChB,OAAK,YAAY;AACjB,OAAK,cAAc"}
|