@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +79 -14
  2. package/dist/auto-update-S9s5-g0C.mjs +3 -0
  3. package/dist/browser/index.d.ts +1009 -0
  4. package/dist/browser/index.d.ts.map +1 -0
  5. package/dist/browser/index.js +2492 -0
  6. package/dist/browser/index.js.map +1 -0
  7. package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-CORwaIyC.mjs} +514 -73
  8. package/dist/chrome-backend-CORwaIyC.mjs.map +1 -0
  9. package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-DIKYoWj-.mjs} +1 -1
  10. package/dist/cli.mjs +3359 -647
  11. package/dist/cli.mjs.map +1 -1
  12. package/dist/frameworks/express.d.mts +1 -1
  13. package/dist/frameworks/express.mjs +3 -4
  14. package/dist/frameworks/express.mjs.map +1 -1
  15. package/dist/frameworks/fastify.d.mts +1 -1
  16. package/dist/frameworks/fastify.mjs +2 -3
  17. package/dist/frameworks/fastify.mjs.map +1 -1
  18. package/dist/frameworks/hono.d.mts +1 -1
  19. package/dist/frameworks/hono.mjs +2 -3
  20. package/dist/frameworks/hono.mjs.map +1 -1
  21. package/dist/frameworks/next.d.mts +2 -2
  22. package/dist/frameworks/next.mjs +2 -3
  23. package/dist/frameworks/next.mjs.map +1 -1
  24. package/dist/frameworks/react.d.mts +1 -1
  25. package/dist/frameworks/trpc.d.mts +1 -1
  26. package/dist/frameworks/trpc.mjs +2 -3
  27. package/dist/frameworks/trpc.mjs.map +1 -1
  28. package/dist/gerbil-DJGqq7BX.mjs +4 -0
  29. package/dist/gerbil-DoDGHe6Z.mjs +1631 -0
  30. package/dist/gerbil-DoDGHe6Z.mjs.map +1 -0
  31. package/dist/gerbil-qOTe1nl2.d.mts +431 -0
  32. package/dist/gerbil-qOTe1nl2.d.mts.map +1 -0
  33. package/dist/index.d.mts +411 -9
  34. package/dist/index.d.mts.map +1 -1
  35. package/dist/index.mjs +7 -6
  36. package/dist/index.mjs.map +1 -1
  37. package/dist/integrations/ai-sdk.d.mts +122 -4
  38. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  39. package/dist/integrations/ai-sdk.mjs +238 -11
  40. package/dist/integrations/ai-sdk.mjs.map +1 -1
  41. package/dist/integrations/langchain.d.mts +132 -2
  42. package/dist/integrations/langchain.d.mts.map +1 -1
  43. package/dist/integrations/langchain.mjs +175 -8
  44. package/dist/integrations/langchain.mjs.map +1 -1
  45. package/dist/integrations/llamaindex.d.mts +1 -1
  46. package/dist/integrations/llamaindex.mjs +2 -3
  47. package/dist/integrations/llamaindex.mjs.map +1 -1
  48. package/dist/integrations/mcp-client.mjs +4 -4
  49. package/dist/integrations/mcp-client.mjs.map +1 -1
  50. package/dist/integrations/mcp.d.mts +2 -2
  51. package/dist/integrations/mcp.d.mts.map +1 -1
  52. package/dist/integrations/mcp.mjs +5 -6
  53. package/dist/kokoro-BNTb6egA.mjs +20210 -0
  54. package/dist/kokoro-BNTb6egA.mjs.map +1 -0
  55. package/dist/kokoro-CMOGDSgT.js +20212 -0
  56. package/dist/kokoro-CMOGDSgT.js.map +1 -0
  57. package/dist/{mcp-R8kRLIKb.mjs → mcp-kzDDWIoS.mjs} +10 -37
  58. package/dist/mcp-kzDDWIoS.mjs.map +1 -0
  59. package/dist/microphone-DaMZFRuR.mjs +3 -0
  60. package/dist/{one-liner-BUQR0nqq.mjs → one-liner-DxnNs_JK.mjs} +2 -2
  61. package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-DxnNs_JK.mjs.map} +1 -1
  62. package/dist/repl-DGUw4fCc.mjs +9 -0
  63. package/dist/skills/index.d.mts +305 -14
  64. package/dist/skills/index.d.mts.map +1 -1
  65. package/dist/skills/index.mjs +5 -6
  66. package/dist/skills-DulrOPeP.mjs +1435 -0
  67. package/dist/skills-DulrOPeP.mjs.map +1 -0
  68. package/dist/stt-1WIefHwc.mjs +3 -0
  69. package/dist/stt-CG_7KB_0.mjs +434 -0
  70. package/dist/stt-CG_7KB_0.mjs.map +1 -0
  71. package/dist/stt-Dne6SENv.js +434 -0
  72. package/dist/stt-Dne6SENv.js.map +1 -0
  73. package/dist/{tools-BsiEE6f2.mjs → tools-Bi1P7Xoy.mjs} +6 -7
  74. package/dist/{tools-BsiEE6f2.mjs.map → tools-Bi1P7Xoy.mjs.map} +1 -1
  75. package/dist/transformers.web-DiD1gTwk.js +44695 -0
  76. package/dist/transformers.web-DiD1gTwk.js.map +1 -0
  77. package/dist/transformers.web-u34VxRFM.js +3 -0
  78. package/dist/tts-B1pZMlDv.mjs +3 -0
  79. package/dist/tts-C2FzKuSx.js +725 -0
  80. package/dist/tts-C2FzKuSx.js.map +1 -0
  81. package/dist/tts-CyHhcLtN.mjs +731 -0
  82. package/dist/tts-CyHhcLtN.mjs.map +1 -0
  83. package/dist/types-CiTc7ez3.d.mts +353 -0
  84. package/dist/types-CiTc7ez3.d.mts.map +1 -0
  85. package/dist/{utils-7vXqtq2Q.mjs → utils-CZBZ8dgR.mjs} +1 -1
  86. package/dist/{utils-7vXqtq2Q.mjs.map → utils-CZBZ8dgR.mjs.map} +1 -1
  87. package/docs/ai-sdk.md +137 -21
  88. package/docs/browser.md +241 -2
  89. package/docs/memory.md +72 -0
  90. package/docs/stt.md +494 -0
  91. package/docs/tts.md +569 -0
  92. package/docs/vision.md +396 -0
  93. package/package.json +21 -22
  94. package/dist/auto-update-BbNHbSU1.mjs +0 -3
  95. package/dist/browser/index.d.mts +0 -262
  96. package/dist/browser/index.d.mts.map +0 -1
  97. package/dist/browser/index.mjs +0 -755
  98. package/dist/browser/index.mjs.map +0 -1
  99. package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
  100. package/dist/gerbil-BfnsFWRE.mjs +0 -644
  101. package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
  102. package/dist/gerbil-BjW-z7Fq.mjs +0 -5
  103. package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
  104. package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
  105. package/dist/mcp-R8kRLIKb.mjs.map +0 -1
  106. package/dist/models-DKULvhOr.mjs +0 -136
  107. package/dist/models-DKULvhOr.mjs.map +0 -1
  108. package/dist/models-De2-_GmQ.d.mts +0 -22
  109. package/dist/models-De2-_GmQ.d.mts.map +0 -1
  110. package/dist/skills-D3CEpgDc.mjs +0 -630
  111. package/dist/skills-D3CEpgDc.mjs.map +0 -1
  112. package/dist/types-BS1N92Jt.d.mts +0 -183
  113. package/dist/types-BS1N92Jt.d.mts.map +0 -1
  114. /package/dist/{chunk-Ct1HF2bE.mjs → chunk-CkXuGtQK.mjs} +0 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stt-CG_7KB_0.mjs","names":["WHISPER_MODELS: STTModelConfig[]","tfDevice: \"webgpu\" | \"cpu\" | \"wasm\"","audioData: Float32Array","pipelineOptions: any","transcribeResult: TranscribeResult","audioBuffer: Float32Array[]","intervalId: ReturnType<typeof setInterval> | null","e: any"],"sources":["../src/core/stt.ts"],"sourcesContent":["/**\n * Speech-to-Text with Whisper\n *\n * Provides local speech recognition using Whisper ONNX models via transformers.js.\n * Supports multiple model sizes and languages.\n *\n * @example\n * ```ts\n * const stt = new WhisperSTT();\n * await stt.load({ onProgress: (p) => console.log(p.status) });\n *\n * // Transcribe audio (Float32Array at 16kHz)\n * const result = await stt.transcribe(audioData);\n * console.log(result.text);\n *\n * // With timestamps\n * const result = await stt.transcribe(audioData, { timestamps: true });\n * for (const seg of result.segments) {\n * console.log(`[${seg.start.toFixed(1)}s] ${seg.text}`);\n * }\n * ```\n */\n\nimport type {\n LoadSTTOptions,\n ProgressInfo,\n STTModelConfig,\n StreamingTranscriptionOptions,\n StreamingTranscriptionSession,\n TranscribeOptions,\n TranscribeResult,\n TranscribeSegment,\n} from \"./types.js\";\n\n// ============================================\n// Model Registry\n// ============================================\n\n/**\n * Available Whisper models\n * Ordered by size (smallest first)\n */\nexport const WHISPER_MODELS: STTModelConfig[] = [\n {\n id: \"whisper-tiny.en\",\n repo: \"onnx-community/whisper-tiny.en\",\n description: \"Tiny English-only model, fastest\",\n size: \"39M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-tiny\",\n repo: \"onnx-community/whisper-tiny\",\n description: \"Tiny multilingual model\",\n size: \"39M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-base.en\",\n repo: \"onnx-community/whisper-base.en\",\n description: \"Base English-only model, good balance\",\n size: \"74M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-base\",\n repo: \"onnx-community/whisper-base\",\n description: \"Base multilingual model\",\n size: \"74M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-small.en\",\n repo: \"onnx-community/whisper-small.en\",\n description: \"Small English-only model, high quality\",\n size: \"244M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-small\",\n repo: \"onnx-community/whisper-small\",\n description: \"Small multilingual model\",\n size: \"244M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-large-v3-turbo\",\n repo: \"onnx-community/whisper-large-v3-turbo\",\n description: \"Large Turbo model, 5.4x faster, 80+ languages\",\n size: \"809M\",\n multilingual: true,\n languages: [\n \"en\",\n \"es\",\n \"fr\",\n \"de\",\n \"it\",\n \"pt\",\n \"nl\",\n \"ru\",\n \"zh\",\n \"ja\",\n \"ko\",\n \"ar\",\n \"hi\",\n \"vi\",\n \"th\",\n ],\n sampleRate: 16000,\n },\n];\n\n// Default model\nconst DEFAULT_MODEL = \"whisper-tiny.en\";\n\n// ============================================\n// Audio Utilities\n// ============================================\n\n/**\n * Decode WAV file to Float32Array\n * Handles stereo to mono conversion\n */\nexport function decodeWav(buffer: Uint8Array): { audio: Float32Array; sampleRate: number } {\n const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);\n\n // Validate RIFF header\n const riff = String.fromCharCode(buffer[0], buffer[1], buffer[2], buffer[3]);\n if (riff !== \"RIFF\") {\n throw new Error(\"Invalid WAV file: missing RIFF header\");\n }\n\n // Get format details\n const numChannels = view.getUint16(22, true);\n const sampleRate = view.getUint32(24, true);\n const bitsPerSample = view.getUint16(34, true);\n\n if (bitsPerSample !== 16) {\n throw new Error(`Unsupported bit depth: ${bitsPerSample}. Only 16-bit WAV is supported.`);\n }\n\n // Find data chunk\n let dataOffset = 12;\n while (dataOffset < buffer.length - 8) {\n const chunkId = String.fromCharCode(\n buffer[dataOffset],\n buffer[dataOffset + 1],\n buffer[dataOffset + 2],\n buffer[dataOffset + 3],\n );\n const chunkSize = view.getUint32(dataOffset + 4, true);\n if (chunkId === \"data\") {\n dataOffset += 8;\n break;\n }\n dataOffset += 8 + chunkSize;\n }\n\n const dataSize = buffer.length - dataOffset;\n const bytesPerSample = bitsPerSample / 8;\n const totalSamples = Math.floor(dataSize / bytesPerSample);\n const samplesPerChannel = Math.floor(totalSamples / numChannels);\n\n // Convert to mono Float32Array\n const audio = new Float32Array(samplesPerChannel);\n\n for (let i = 0; i < samplesPerChannel; i++) {\n if (numChannels === 2) {\n const left = view.getInt16(dataOffset + i * 4, true);\n const right = view.getInt16(dataOffset + i * 4 + 2, true);\n audio[i] = (left + right) / 2 / 32768;\n } else {\n const sample = view.getInt16(dataOffset + i * 2, true);\n audio[i] = sample / 32768;\n }\n }\n\n return { audio, sampleRate };\n}\n\n/**\n * Resample audio to target sample rate using linear interpolation\n */\nexport function resampleAudio(audio: Float32Array, fromRate: number, toRate: number): Float32Array {\n if (fromRate === toRate) return audio;\n\n const ratio = toRate / fromRate;\n const newLength = Math.round(audio.length * ratio);\n const result = new Float32Array(newLength);\n\n for (let i = 0; i < newLength; i++) {\n const srcIndex = i / ratio;\n const floor = Math.floor(srcIndex);\n const ceil = Math.min(floor + 1, audio.length - 1);\n const t = srcIndex - floor;\n result[i] = audio[floor] * (1 - t) + audio[ceil] * t;\n }\n\n return result;\n}\n\n// ============================================\n// WhisperSTT Class\n// ============================================\n\n/**\n * Speech-to-Text using Whisper ONNX models\n */\nexport class WhisperSTT {\n private modelConfig: STTModelConfig;\n private pipeline: any = null;\n private loadPromise: Promise<void> | null = null;\n private _isLoaded = false;\n private _deviceMode: \"webgpu\" | \"cpu\" = \"cpu\";\n\n constructor(modelId: string = DEFAULT_MODEL) {\n const config = WHISPER_MODELS.find((m) => m.id === modelId);\n if (!config) {\n const available = WHISPER_MODELS.map((m) => m.id).join(\", \");\n throw new Error(`Unknown STT model: ${modelId}. Available: ${available}`);\n }\n this.modelConfig = config;\n }\n\n /**\n * Check if model is loaded\n */\n isLoaded(): boolean {\n return this._isLoaded;\n }\n\n /**\n * Get model configuration\n */\n getModelConfig(): STTModelConfig {\n return this.modelConfig;\n }\n\n /**\n * Get model info (alias for getModelConfig)\n */\n getModelInfo(): STTModelConfig {\n return this.modelConfig;\n }\n\n /**\n * Get current device mode\n */\n getDeviceMode(): \"webgpu\" | \"cpu\" {\n return this._deviceMode;\n }\n\n /**\n * List available models\n */\n static listModels(): STTModelConfig[] {\n return [...WHISPER_MODELS];\n }\n\n /**\n * Load the STT model\n */\n async load(options: LoadSTTOptions = {}): Promise<void> {\n if (this._isLoaded) return;\n if (this.loadPromise) {\n await this.loadPromise;\n return;\n }\n\n this.loadPromise = this._load(options);\n await this.loadPromise;\n }\n\n private async _load(options: LoadSTTOptions = {}): Promise<void> {\n const { onProgress, device = \"auto\" } = options;\n\n onProgress?.({ status: \"Loading transformers.js...\" });\n\n // Check if we're in Node.js or browser\n const isNode = typeof process !== \"undefined\" && process.versions?.node;\n\n // Import transformers.js dynamically\n // tsdown handles resolution: Node.js = external, Browser = bundled\n const { pipeline, env } = await import(\"@huggingface/transformers\");\n\n // Configure environment based on runtime\n if (isNode) {\n // Node.js: allow local models (for CLI/server use)\n env.allowLocalModels = true;\n env.allowRemoteModels = true;\n } else {\n // Browser: use IndexedDB cache, fetch from HuggingFace CDN\n env.useBrowserCache = true;\n env.allowLocalModels = false;\n // Load WASM files from CDN (not bundled with gerbil)\n if (env.backends?.onnx?.wasm) {\n env.backends.onnx.wasm.wasmPaths =\n \"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.21.0/dist/\";\n }\n }\n\n // Determine device\n // Note: Whisper ONNX models work best with fp32 on CPU/WASM\n // WebGPU support for ASR is limited, so we use CPU for reliability\n let tfDevice: \"webgpu\" | \"cpu\" | \"wasm\" = \"cpu\";\n\n // In browser, use WASM for better compatibility\n if (!isNode) {\n tfDevice = \"wasm\";\n }\n\n // Store device mode\n this._deviceMode = \"cpu\"; // STT always reports as CPU since WASM is CPU-based\n\n onProgress?.({ status: `Loading ${this.modelConfig.id}...` });\n\n // Load the ASR pipeline\n // Always use fp32 for Whisper models (fp16 not available for ONNX ASR)\n this.pipeline = await pipeline(\"automatic-speech-recognition\", this.modelConfig.repo, {\n dtype: \"fp32\",\n device: tfDevice,\n progress_callback: (progress: any) => {\n if (progress.status === \"progress\" && progress.file) {\n onProgress?.({\n status: `Downloading ${progress.file}`,\n progress: Math.round(progress.progress || 0),\n file: progress.file,\n });\n }\n },\n });\n\n this._isLoaded = true;\n onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });\n }\n\n /**\n * Transcribe audio to text\n *\n * @param audio - Audio data as Float32Array (mono, 16kHz preferred) or Uint8Array (WAV file)\n * @param options - Transcription options\n * @returns Transcription result with text and optional timestamps\n */\n async transcribe(\n audio: Float32Array | Uint8Array,\n options: TranscribeOptions = {},\n ): Promise<TranscribeResult> {\n if (!this._isLoaded) {\n throw new Error(\"STT model not loaded. Call load() first.\");\n }\n\n const { language, timestamps = false, onProgress } = options;\n const startTime = performance.now();\n\n // Convert Uint8Array (WAV) to Float32Array\n let audioData: Float32Array;\n let inputSampleRate = 16000;\n\n if (audio instanceof Uint8Array) {\n onProgress?.({ status: \"Decoding audio...\" });\n const decoded = decodeWav(audio);\n audioData = decoded.audio;\n inputSampleRate = decoded.sampleRate;\n } else {\n audioData = audio;\n }\n\n // Resample to 16kHz if needed\n if (inputSampleRate !== 16000) {\n onProgress?.({ status: \"Resampling to 16kHz...\" });\n audioData = resampleAudio(audioData, inputSampleRate, 16000);\n }\n\n const audioDuration = audioData.length / 16000;\n onProgress?.({ status: `Transcribing ${audioDuration.toFixed(1)}s of audio...` });\n\n // Build pipeline options\n const pipelineOptions: any = {};\n\n // Only set language for multilingual models\n if (language && this.modelConfig.multilingual) {\n pipelineOptions.language = language;\n pipelineOptions.task = \"transcribe\";\n }\n\n // Enable timestamps if requested\n if (timestamps) {\n pipelineOptions.return_timestamps = true;\n }\n\n // Run transcription\n const result = await this.pipeline(audioData, pipelineOptions);\n\n const totalTime = performance.now() - startTime;\n\n // Build result\n let text = result.text?.trim() || \"\";\n\n // Filter out Whisper artifacts\n if (text === \"[BLANK_AUDIO]\" || text === \"(blank audio)\" || text === \"[BLANK AUDIO]\") {\n text = \"\";\n }\n\n const transcribeResult: TranscribeResult = {\n text,\n language: language || (this.modelConfig.multilingual ? \"auto\" : \"en\"),\n duration: audioDuration,\n totalTime,\n };\n\n // Add segments if timestamps were requested\n if (timestamps && result.chunks) {\n transcribeResult.segments = result.chunks.map(\n (chunk: any): TranscribeSegment => ({\n text: chunk.text?.trim() || \"\",\n start: chunk.timestamp?.[0] || 0,\n end: chunk.timestamp?.[1] || 0,\n }),\n );\n }\n\n onProgress?.({ status: \"Done!\" });\n\n return transcribeResult;\n }\n\n /**\n * Create a streaming transcription session\n *\n * Transcribes audio in real-time by processing chunks at regular intervals.\n * Perfect for live captioning, call transcription, or real-time subtitles.\n *\n * @param options - Streaming options\n * @returns Streaming session controller\n *\n * @example\n * ```ts\n * const session = stt.createStreamingSession({\n * chunkDuration: 3000, // Transcribe every 3 seconds\n * onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),\n * onTranscript: (fullText) => console.log(\"Full:\", fullText),\n * });\n *\n * // Feed audio data as it comes in (Float32Array at 16kHz)\n * session.feedAudio(audioChunk);\n *\n * // Or manually trigger transcription\n * await session.flush();\n *\n * // Stop and get final transcript\n * const finalText = await session.stop();\n * ```\n */\n createStreamingSession(\n options: StreamingTranscriptionOptions = {},\n ): StreamingTranscriptionSession {\n const {\n chunkDuration = 3000,\n minChunkSize = 8000, // ~0.5 seconds at 16kHz\n onChunk,\n onTranscript,\n onError,\n language,\n } = options;\n\n let audioBuffer: Float32Array[] = [];\n let fullTranscript = \"\";\n let chunkIndex = 0;\n let intervalId: ReturnType<typeof setInterval> | null = null;\n let isRunning = false;\n\n const getBufferSize = (): number => {\n return audioBuffer.reduce((sum, chunk) => sum + chunk.length, 0);\n };\n\n const mergeBuffer = (): Float32Array => {\n const totalLength = getBufferSize();\n const merged = new Float32Array(totalLength);\n let offset = 0;\n for (const chunk of audioBuffer) {\n merged.set(chunk, offset);\n offset += chunk.length;\n }\n return merged;\n };\n\n const transcribeBuffer = async (): Promise<string> => {\n if (!this._isLoaded || getBufferSize() < minChunkSize) {\n return \"\";\n }\n\n const audio = mergeBuffer();\n audioBuffer = []; // Clear buffer\n\n try {\n const result = await this.transcribe(audio, { language });\n const text = result.text.trim();\n\n if (text) {\n chunkIndex++;\n onChunk?.(text, chunkIndex);\n\n // Append to full transcript\n fullTranscript = fullTranscript + (fullTranscript ? \" \" : \"\") + text;\n onTranscript?.(fullTranscript);\n }\n\n return text;\n } catch (e: any) {\n onError?.(e.message || \"Transcription failed\");\n return \"\";\n }\n };\n\n let aborted = false;\n\n const session: StreamingTranscriptionSession = {\n feedAudio: (audio: Float32Array) => {\n if (!aborted) {\n audioBuffer.push(audio);\n }\n },\n\n flush: async () => {\n if (aborted) return \"\";\n return transcribeBuffer();\n },\n\n start: () => {\n if (isRunning || aborted) return;\n isRunning = true;\n\n intervalId = setInterval(async () => {\n if (isRunning && !aborted) {\n await transcribeBuffer();\n }\n }, chunkDuration);\n },\n\n stop: async () => {\n isRunning = false;\n\n if (intervalId) {\n clearInterval(intervalId);\n intervalId = null;\n }\n\n // Transcribe any remaining audio (unless aborted)\n if (!aborted && getBufferSize() >= minChunkSize) {\n await transcribeBuffer();\n }\n\n return fullTranscript;\n },\n\n abort: () => {\n // Immediately stop without final transcription\n aborted = true;\n isRunning = false;\n\n if (intervalId) {\n clearInterval(intervalId);\n intervalId = null;\n }\n\n audioBuffer = [];\n },\n\n isRunning: () => isRunning,\n\n getTranscript: () => fullTranscript,\n\n getChunkCount: () => chunkIndex,\n\n reset: () => {\n audioBuffer = [];\n fullTranscript = \"\";\n chunkIndex = 0;\n },\n };\n\n return session;\n }\n\n /**\n * Dispose of resources\n */\n dispose(): void {\n this.pipeline = null;\n this._isLoaded = false;\n this.loadPromise = null;\n }\n}\n"],"mappings":";;;;;AA0CA,MAAaA,iBAAmC;CAC9C;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GACT;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACD;EACD,YAAY;EACb;CACF;AAGD,MAAM,gBAAgB;;;;;AAUtB,SAAgB,UAAU,QAAiE;CACzF,MAAM,OAAO,IAAI,SAAS,OAAO,QAAQ,OAAO,YAAY,OAAO,WAAW;AAI9E,KADa,OAAO,aAAa,OAAO,IAAI,OAAO,IAAI,OAAO,IAAI,OAAO,GAAG,KAC/D,OACX,OAAM,IAAI,MAAM,wCAAwC;CAI1D,MAAM,cAAc,KAAK,UAAU,IAAI,KAAK;CAC5C,MAAM,aAAa,KAAK,UAAU,IAAI,KAAK;CAC3C,MAAM,gBAAgB,KAAK,UAAU,IAAI,KAAK;AAE9C,KAAI,kBAAkB,GACpB,OAAM,IAAI,MAAM,0BAA0B,cAAc,iCAAiC;CAI3F,IAAI,aAAa;AACjB,QAAO,aAAa,OAAO,SAAS,GAAG;EACrC,MAAM,UAAU,OAAO,aACrB,OAAO,aACP,OAAO,aAAa,IACpB,OAAO,aAAa,IACpB,OAAO,aAAa,GACrB;EACD,MAAM,YAAY,KAAK,UAAU,aAAa,GAAG,KAAK;AACtD,MAAI,YAAY,QAAQ;AACtB,iBAAc;AACd;;AAEF,gBAAc,IAAI;;CAGpB,MAAM,WAAW,OAAO,SAAS;CACjC,MAAM,iBAAiB,gBAAgB;CACvC,MAAM,eAAe,KAAK,MAAM,WAAW,eAAe;CAC1D,MAAM,oBAAoB,KAAK,MAAM,eAAe,YAAY;CAGhE,MAAM,QAAQ,IAAI,aAAa,kBAAkB;AAEjD,MAAK,IAAI,IAAI,GAAG,IAAI,mBAAmB,IACrC,KAAI,gBAAgB,EAGlB,OAAM,MAFO,KAAK,SAAS,aAAa,IAAI,GAAG,KAAK,GACtC,KAAK,SAAS,aAAa,IAAI,IAAI,GAAG,KAAK,IAC7B,IAAI;KAGhC,OAAM,KADS,KAAK,SAAS,aAAa,IAAI,GAAG,KAAK,GAClC;AAIxB,QAAO;EAAE;EAAO;EAAY;;;;;AAM9B,SAAgB,cAAc,OAAqB,UAAkB,QAA8B;AACjG,KAAI,aAAa,OAAQ,QAAO;CAEhC,MAAM,QAAQ,SAAS;CACvB,MAAM,YAAY,KAAK,MAAM,MAAM,SAAS,MAAM;CAClD,MAAM,SAAS,IAAI,aAAa,UAAU;AAE1C,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,KAAK;EAClC,MAAM,WAAW,IAAI;EACrB,MAAM,QAAQ,KAAK,MAAM,SAAS;EAClC,MAAM,OAAO,KAAK,IAAI,QAAQ,GAAG,MAAM,SAAS,EAAE;EAClD,MAAM,IAAI,WAAW;AACrB,SAAO,KAAK,MAAM,UAAU,IAAI,KAAK,MAAM,QAAQ;;AAGrD,QAAO;;;;;AAUT,IAAa,aAAb,MAAwB;CACtB,AAAQ;CACR,AAAQ,WAAgB;CACxB,AAAQ,cAAoC;CAC5C,AAAQ,YAAY;CACpB,AAAQ,cAAgC;CAExC,YAAY,UAAkB,eAAe;EAC3C,MAAM,SAAS,eAAe,MAAM,MAAM,EAAE,OAAO,QAAQ;AAC3D,MAAI,CAAC,QAAQ;GACX,MAAM,YAAY,eAAe,KAAK,MAAM,EAAE,GAAG,CAAC,KAAK,KAAK;AAC5D,SAAM,IAAI,MAAM,sBAAsB,QAAQ,eAAe,YAAY;;AAE3E,OAAK,cAAc;;;;;CAMrB,WAAoB;AAClB,SAAO,KAAK;;;;;CAMd,iBAAiC;AAC/B,SAAO,KAAK;;;;;CAMd,eAA+B;AAC7B,SAAO,KAAK;;;;;CAMd,gBAAkC;AAChC,SAAO,KAAK;;;;;CAMd,OAAO,aAA+B;AACpC,SAAO,CAAC,GAAG,eAAe;;;;;CAM5B,MAAM,KAAK,UAA0B,EAAE,EAAiB;AACtD,MAAI,KAAK,UAAW;AACpB,MAAI,KAAK,aAAa;AACpB,SAAM,KAAK;AACX;;AAGF,OAAK,cAAc,KAAK,MAAM,QAAQ;AACtC,QAAM,KAAK;;CAGb,MAAc,MAAM,UAA0B,EAAE,EAAiB;EAC/D,MAAM,EAAE,YAAY,SAAS,WAAW;AAExC,eAAa,EAAE,QAAQ,8BAA8B,CAAC;EAGtD,MAAM,SAAS,OAAO,YAAY,eAAe,QAAQ,UAAU;EAInE,MAAM,EAAE,UAAU,QAAQ,MAAM,OAAO;AAGvC,MAAI,QAAQ;AAEV,OAAI,mBAAmB;AACvB,OAAI,oBAAoB;SACnB;AAEL,OAAI,kBAAkB;AACtB,OAAI,mBAAmB;AAEvB,OAAI,IAAI,UAAU,MAAM,KACtB,KAAI,SAAS,KAAK,KAAK,YACrB;;EAON,IAAIC,WAAsC;AAG1C,MAAI,CAAC,OACH,YAAW;AAIb,OAAK,cAAc;AAEnB,eAAa,EAAE,QAAQ,WAAW,KAAK,YAAY,GAAG,MAAM,CAAC;AAI7D,OAAK,WAAW,MAAM,SAAS,gCAAgC,KAAK,YAAY,MAAM;GACpF,OAAO;GACP,QAAQ;GACR,oBAAoB,aAAkB;AACpC,QAAI,SAAS,WAAW,cAAc,SAAS,KAC7C,cAAa;KACX,QAAQ,eAAe,SAAS;KAChC,UAAU,KAAK,MAAM,SAAS,YAAY,EAAE;KAC5C,MAAM,SAAS;KAChB,CAAC;;GAGP,CAAC;AAEF,OAAK,YAAY;AACjB,eAAa,EAAE,QAAQ,UAAU,SAAS,aAAa,CAAC,KAAK,CAAC;;;;;;;;;CAUhE,MAAM,WACJ,OACA,UAA6B,EAAE,EACJ;AAC3B,MAAI,CAAC,KAAK,UACR,OAAM,IAAI,MAAM,2CAA2C;EAG7D,MAAM,EAAE,UAAU,aAAa,OAAO,eAAe;EACrD,MAAM,YAAY,YAAY,KAAK;EAGnC,IAAIC;EACJ,IAAI,kBAAkB;AAEtB,MAAI,iBAAiB,YAAY;AAC/B,gBAAa,EAAE,QAAQ,qBAAqB,CAAC;GAC7C,MAAM,UAAU,UAAU,MAAM;AAChC,eAAY,QAAQ;AACpB,qBAAkB,QAAQ;QAE1B,aAAY;AAId,MAAI,oBAAoB,MAAO;AAC7B,gBAAa,EAAE,QAAQ,0BAA0B,CAAC;AAClD,eAAY,cAAc,WAAW,iBAAiB,KAAM;;EAG9D,MAAM,gBAAgB,UAAU,SAAS;AACzC,eAAa,EAAE,QAAQ,gBAAgB,cAAc,QAAQ,EAAE,CAAC,gBAAgB,CAAC;EAGjF,MAAMC,kBAAuB,EAAE;AAG/B,MAAI,YAAY,KAAK,YAAY,cAAc;AAC7C,mBAAgB,WAAW;AAC3B,mBAAgB,OAAO;;AAIzB,MAAI,WACF,iBAAgB,oBAAoB;EAItC,MAAM,SAAS,MAAM,KAAK,SAAS,WAAW,gBAAgB;EAE9D,MAAM,YAAY,YAAY,KAAK,GAAG;EAGtC,IAAI,OAAO,OAAO,MAAM,MAAM,IAAI;AAGlC,MAAI,SAAS,mBAAmB,SAAS,mBAAmB,SAAS,gBACnE,QAAO;EAGT,MAAMC,mBAAqC;GACzC;GACA,UAAU,aAAa,KAAK,YAAY,eAAe,SAAS;GAChE,UAAU;GACV;GACD;AAGD,MAAI,cAAc,OAAO,OACvB,kBAAiB,WAAW,OAAO,OAAO,KACvC,WAAmC;GAClC,MAAM,MAAM,MAAM,MAAM,IAAI;GAC5B,OAAO,MAAM,YAAY,MAAM;GAC/B,KAAK,MAAM,YAAY,MAAM;GAC9B,EACF;AAGH,eAAa,EAAE,QAAQ,SAAS,CAAC;AAEjC,SAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8BT,uBACE,UAAyC,EAAE,EACZ;EAC/B,MAAM,EACJ,gBAAgB,KAChB,eAAe,KACf,SACA,cACA,SACA,aACE;EAEJ,IAAIC,cAA8B,EAAE;EACpC,IAAI,iBAAiB;EACrB,IAAI,aAAa;EACjB,IAAIC,aAAoD;EACxD,IAAI,YAAY;EAEhB,MAAM,sBAA8B;AAClC,UAAO,YAAY,QAAQ,KAAK,UAAU,MAAM,MAAM,QAAQ,EAAE;;EAGlE,MAAM,oBAAkC;GACtC,MAAM,cAAc,eAAe;GACnC,MAAM,SAAS,IAAI,aAAa,YAAY;GAC5C,IAAI,SAAS;AACb,QAAK,MAAM,SAAS,aAAa;AAC/B,WAAO,IAAI,OAAO,OAAO;AACzB,cAAU,MAAM;;AAElB,UAAO;;EAGT,MAAM,mBAAmB,YAA6B;AACpD,OAAI,CAAC,KAAK,aAAa,eAAe,GAAG,aACvC,QAAO;GAGT,MAAM,QAAQ,aAAa;AAC3B,iBAAc,EAAE;AAEhB,OAAI;IAEF,MAAM,QADS,MAAM,KAAK,WAAW,OAAO,EAAE,UAAU,CAAC,EACrC,KAAK,MAAM;AAE/B,QAAI,MAAM;AACR;AACA,eAAU,MAAM,WAAW;AAG3B,sBAAiB,kBAAkB,iBAAiB,MAAM,MAAM;AAChE,oBAAe,eAAe;;AAGhC,WAAO;YACAC,GAAQ;AACf,cAAU,EAAE,WAAW,uBAAuB;AAC9C,WAAO;;;EAIX,IAAI,UAAU;AAmEd,SAjE+C;GAC7C,YAAY,UAAwB;AAClC,QAAI,CAAC,QACH,aAAY,KAAK,MAAM;;GAI3B,OAAO,YAAY;AACjB,QAAI,QAAS,QAAO;AACpB,WAAO,kBAAkB;;GAG3B,aAAa;AACX,QAAI,aAAa,QAAS;AAC1B,gBAAY;AAEZ,iBAAa,YAAY,YAAY;AACnC,SAAI,aAAa,CAAC,QAChB,OAAM,kBAAkB;OAEzB,cAAc;;GAGnB,MAAM,YAAY;AAChB,gBAAY;AAEZ,QAAI,YAAY;AACd,mBAAc,WAAW;AACzB,kBAAa;;AAIf,QAAI,CAAC,WAAW,eAAe,IAAI,aACjC,OAAM,kBAAkB;AAG1B,WAAO;;GAGT,aAAa;AAEX,cAAU;AACV,gBAAY;AAEZ,QAAI,YAAY;AACd,mBAAc,WAAW;AACzB,kBAAa;;AAGf,kBAAc,EAAE;;GAGlB,iBAAiB;GAEjB,qBAAqB;GAErB,qBAAqB;GAErB,aAAa;AACX,kBAAc,EAAE;AAChB,qBAAiB;AACjB,iBAAa;;GAEhB;;;;;CAQH,UAAgB;AACd,OAAK,WAAW;AAChB,OAAK,YAAY;AACjB,OAAK,cAAc"}
@@ -0,0 +1,434 @@
1
+ //#region src/core/stt.ts
2
+ /**
3
+ * Available Whisper models
4
+ * Ordered by size (smallest first)
5
+ */
6
+ const WHISPER_MODELS = [
7
+ {
8
+ id: "whisper-tiny.en",
9
+ repo: "onnx-community/whisper-tiny.en",
10
+ description: "Tiny English-only model, fastest",
11
+ size: "39M",
12
+ multilingual: false,
13
+ languages: ["en"],
14
+ sampleRate: 16e3
15
+ },
16
+ {
17
+ id: "whisper-tiny",
18
+ repo: "onnx-community/whisper-tiny",
19
+ description: "Tiny multilingual model",
20
+ size: "39M",
21
+ multilingual: true,
22
+ languages: [
23
+ "en",
24
+ "es",
25
+ "fr",
26
+ "de",
27
+ "it",
28
+ "pt",
29
+ "nl",
30
+ "ru",
31
+ "zh",
32
+ "ja",
33
+ "ko"
34
+ ],
35
+ sampleRate: 16e3
36
+ },
37
+ {
38
+ id: "whisper-base.en",
39
+ repo: "onnx-community/whisper-base.en",
40
+ description: "Base English-only model, good balance",
41
+ size: "74M",
42
+ multilingual: false,
43
+ languages: ["en"],
44
+ sampleRate: 16e3
45
+ },
46
+ {
47
+ id: "whisper-base",
48
+ repo: "onnx-community/whisper-base",
49
+ description: "Base multilingual model",
50
+ size: "74M",
51
+ multilingual: true,
52
+ languages: [
53
+ "en",
54
+ "es",
55
+ "fr",
56
+ "de",
57
+ "it",
58
+ "pt",
59
+ "nl",
60
+ "ru",
61
+ "zh",
62
+ "ja",
63
+ "ko"
64
+ ],
65
+ sampleRate: 16e3
66
+ },
67
+ {
68
+ id: "whisper-small.en",
69
+ repo: "onnx-community/whisper-small.en",
70
+ description: "Small English-only model, high quality",
71
+ size: "244M",
72
+ multilingual: false,
73
+ languages: ["en"],
74
+ sampleRate: 16e3
75
+ },
76
+ {
77
+ id: "whisper-small",
78
+ repo: "onnx-community/whisper-small",
79
+ description: "Small multilingual model",
80
+ size: "244M",
81
+ multilingual: true,
82
+ languages: [
83
+ "en",
84
+ "es",
85
+ "fr",
86
+ "de",
87
+ "it",
88
+ "pt",
89
+ "nl",
90
+ "ru",
91
+ "zh",
92
+ "ja",
93
+ "ko"
94
+ ],
95
+ sampleRate: 16e3
96
+ },
97
+ {
98
+ id: "whisper-large-v3-turbo",
99
+ repo: "onnx-community/whisper-large-v3-turbo",
100
+ description: "Large Turbo model, 5.4x faster, 80+ languages",
101
+ size: "809M",
102
+ multilingual: true,
103
+ languages: [
104
+ "en",
105
+ "es",
106
+ "fr",
107
+ "de",
108
+ "it",
109
+ "pt",
110
+ "nl",
111
+ "ru",
112
+ "zh",
113
+ "ja",
114
+ "ko",
115
+ "ar",
116
+ "hi",
117
+ "vi",
118
+ "th"
119
+ ],
120
+ sampleRate: 16e3
121
+ }
122
+ ];
123
+ const DEFAULT_MODEL = "whisper-tiny.en";
124
+ /**
125
+ * Decode WAV file to Float32Array
126
+ * Handles stereo to mono conversion
127
+ */
128
+ function decodeWav(buffer) {
129
+ const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
130
+ if (String.fromCharCode(buffer[0], buffer[1], buffer[2], buffer[3]) !== "RIFF") throw new Error("Invalid WAV file: missing RIFF header");
131
+ const numChannels = view.getUint16(22, true);
132
+ const sampleRate = view.getUint32(24, true);
133
+ const bitsPerSample = view.getUint16(34, true);
134
+ if (bitsPerSample !== 16) throw new Error(`Unsupported bit depth: ${bitsPerSample}. Only 16-bit WAV is supported.`);
135
+ let dataOffset = 12;
136
+ while (dataOffset < buffer.length - 8) {
137
+ const chunkId = String.fromCharCode(buffer[dataOffset], buffer[dataOffset + 1], buffer[dataOffset + 2], buffer[dataOffset + 3]);
138
+ const chunkSize = view.getUint32(dataOffset + 4, true);
139
+ if (chunkId === "data") {
140
+ dataOffset += 8;
141
+ break;
142
+ }
143
+ dataOffset += 8 + chunkSize;
144
+ }
145
+ const dataSize = buffer.length - dataOffset;
146
+ const bytesPerSample = bitsPerSample / 8;
147
+ const totalSamples = Math.floor(dataSize / bytesPerSample);
148
+ const samplesPerChannel = Math.floor(totalSamples / numChannels);
149
+ const audio = new Float32Array(samplesPerChannel);
150
+ for (let i = 0; i < samplesPerChannel; i++) if (numChannels === 2) audio[i] = (view.getInt16(dataOffset + i * 4, true) + view.getInt16(dataOffset + i * 4 + 2, true)) / 2 / 32768;
151
+ else audio[i] = view.getInt16(dataOffset + i * 2, true) / 32768;
152
+ return {
153
+ audio,
154
+ sampleRate
155
+ };
156
+ }
157
+ /**
158
+ * Resample audio to target sample rate using linear interpolation
159
+ */
160
+ function resampleAudio(audio, fromRate, toRate) {
161
+ if (fromRate === toRate) return audio;
162
+ const ratio = toRate / fromRate;
163
+ const newLength = Math.round(audio.length * ratio);
164
+ const result = new Float32Array(newLength);
165
+ for (let i = 0; i < newLength; i++) {
166
+ const srcIndex = i / ratio;
167
+ const floor = Math.floor(srcIndex);
168
+ const ceil = Math.min(floor + 1, audio.length - 1);
169
+ const t = srcIndex - floor;
170
+ result[i] = audio[floor] * (1 - t) + audio[ceil] * t;
171
+ }
172
+ return result;
173
+ }
174
+ /**
175
+ * Speech-to-Text using Whisper ONNX models
176
+ */
177
+ var WhisperSTT = class {
178
+ modelConfig;
179
+ pipeline = null;
180
+ loadPromise = null;
181
+ _isLoaded = false;
182
+ _deviceMode = "cpu";
183
+ constructor(modelId = DEFAULT_MODEL) {
184
+ const config = WHISPER_MODELS.find((m) => m.id === modelId);
185
+ if (!config) {
186
+ const available = WHISPER_MODELS.map((m) => m.id).join(", ");
187
+ throw new Error(`Unknown STT model: ${modelId}. Available: ${available}`);
188
+ }
189
+ this.modelConfig = config;
190
+ }
191
+ /**
192
+ * Check if model is loaded
193
+ */
194
+ isLoaded() {
195
+ return this._isLoaded;
196
+ }
197
+ /**
198
+ * Get model configuration
199
+ */
200
+ getModelConfig() {
201
+ return this.modelConfig;
202
+ }
203
+ /**
204
+ * Get model info (alias for getModelConfig)
205
+ */
206
+ getModelInfo() {
207
+ return this.modelConfig;
208
+ }
209
+ /**
210
+ * Get current device mode
211
+ */
212
+ getDeviceMode() {
213
+ return this._deviceMode;
214
+ }
215
+ /**
216
+ * List available models
217
+ */
218
+ static listModels() {
219
+ return [...WHISPER_MODELS];
220
+ }
221
+ /**
222
+ * Load the STT model
223
+ */
224
+ async load(options = {}) {
225
+ if (this._isLoaded) return;
226
+ if (this.loadPromise) {
227
+ await this.loadPromise;
228
+ return;
229
+ }
230
+ this.loadPromise = this._load(options);
231
+ await this.loadPromise;
232
+ }
233
+ async _load(options = {}) {
234
+ const { onProgress, device = "auto" } = options;
235
+ onProgress?.({ status: "Loading transformers.js..." });
236
+ const isNode = typeof process !== "undefined" && process.versions?.node;
237
+ const { pipeline, env } = await import("./transformers.web-u34VxRFM.js");
238
+ if (isNode) {
239
+ env.allowLocalModels = true;
240
+ env.allowRemoteModels = true;
241
+ } else {
242
+ env.useBrowserCache = true;
243
+ env.allowLocalModels = false;
244
+ if (env.backends?.onnx?.wasm) env.backends.onnx.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.21.0/dist/";
245
+ }
246
+ let tfDevice = "cpu";
247
+ if (!isNode) tfDevice = "wasm";
248
+ this._deviceMode = "cpu";
249
+ onProgress?.({ status: `Loading ${this.modelConfig.id}...` });
250
+ this.pipeline = await pipeline("automatic-speech-recognition", this.modelConfig.repo, {
251
+ dtype: "fp32",
252
+ device: tfDevice,
253
+ progress_callback: (progress) => {
254
+ if (progress.status === "progress" && progress.file) onProgress?.({
255
+ status: `Downloading ${progress.file}`,
256
+ progress: Math.round(progress.progress || 0),
257
+ file: progress.file
258
+ });
259
+ }
260
+ });
261
+ this._isLoaded = true;
262
+ onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });
263
+ }
264
+ /**
265
+ * Transcribe audio to text
266
+ *
267
+ * @param audio - Audio data as Float32Array (mono, 16kHz preferred) or Uint8Array (WAV file)
268
+ * @param options - Transcription options
269
+ * @returns Transcription result with text and optional timestamps
270
+ */
271
+ async transcribe(audio, options = {}) {
272
+ if (!this._isLoaded) throw new Error("STT model not loaded. Call load() first.");
273
+ const { language, timestamps = false, onProgress } = options;
274
+ const startTime = performance.now();
275
+ let audioData;
276
+ let inputSampleRate = 16e3;
277
+ if (audio instanceof Uint8Array) {
278
+ onProgress?.({ status: "Decoding audio..." });
279
+ const decoded = decodeWav(audio);
280
+ audioData = decoded.audio;
281
+ inputSampleRate = decoded.sampleRate;
282
+ } else audioData = audio;
283
+ if (inputSampleRate !== 16e3) {
284
+ onProgress?.({ status: "Resampling to 16kHz..." });
285
+ audioData = resampleAudio(audioData, inputSampleRate, 16e3);
286
+ }
287
+ const audioDuration = audioData.length / 16e3;
288
+ onProgress?.({ status: `Transcribing ${audioDuration.toFixed(1)}s of audio...` });
289
+ const pipelineOptions = {};
290
+ if (language && this.modelConfig.multilingual) {
291
+ pipelineOptions.language = language;
292
+ pipelineOptions.task = "transcribe";
293
+ }
294
+ if (timestamps) pipelineOptions.return_timestamps = true;
295
+ const result = await this.pipeline(audioData, pipelineOptions);
296
+ const totalTime = performance.now() - startTime;
297
+ let text = result.text?.trim() || "";
298
+ if (text === "[BLANK_AUDIO]" || text === "(blank audio)" || text === "[BLANK AUDIO]") text = "";
299
+ const transcribeResult = {
300
+ text,
301
+ language: language || (this.modelConfig.multilingual ? "auto" : "en"),
302
+ duration: audioDuration,
303
+ totalTime
304
+ };
305
+ if (timestamps && result.chunks) transcribeResult.segments = result.chunks.map((chunk) => ({
306
+ text: chunk.text?.trim() || "",
307
+ start: chunk.timestamp?.[0] || 0,
308
+ end: chunk.timestamp?.[1] || 0
309
+ }));
310
+ onProgress?.({ status: "Done!" });
311
+ return transcribeResult;
312
+ }
313
+ /**
314
+ * Create a streaming transcription session
315
+ *
316
+ * Transcribes audio in real-time by processing chunks at regular intervals.
317
+ * Perfect for live captioning, call transcription, or real-time subtitles.
318
+ *
319
+ * @param options - Streaming options
320
+ * @returns Streaming session controller
321
+ *
322
+ * @example
323
+ * ```ts
324
+ * const session = stt.createStreamingSession({
325
+ * chunkDuration: 3000, // Transcribe every 3 seconds
326
+ * onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
327
+ * onTranscript: (fullText) => console.log("Full:", fullText),
328
+ * });
329
+ *
330
+ * // Feed audio data as it comes in (Float32Array at 16kHz)
331
+ * session.feedAudio(audioChunk);
332
+ *
333
+ * // Or manually trigger transcription
334
+ * await session.flush();
335
+ *
336
+ * // Stop and get final transcript
337
+ * const finalText = await session.stop();
338
+ * ```
339
+ */
340
+ createStreamingSession(options = {}) {
341
+ const { chunkDuration = 3e3, minChunkSize = 8e3, onChunk, onTranscript, onError, language } = options;
342
+ let audioBuffer = [];
343
+ let fullTranscript = "";
344
+ let chunkIndex = 0;
345
+ let intervalId = null;
346
+ let isRunning = false;
347
+ const getBufferSize = () => {
348
+ return audioBuffer.reduce((sum, chunk) => sum + chunk.length, 0);
349
+ };
350
+ const mergeBuffer = () => {
351
+ const totalLength = getBufferSize();
352
+ const merged = new Float32Array(totalLength);
353
+ let offset = 0;
354
+ for (const chunk of audioBuffer) {
355
+ merged.set(chunk, offset);
356
+ offset += chunk.length;
357
+ }
358
+ return merged;
359
+ };
360
+ const transcribeBuffer = async () => {
361
+ if (!this._isLoaded || getBufferSize() < minChunkSize) return "";
362
+ const audio = mergeBuffer();
363
+ audioBuffer = [];
364
+ try {
365
+ const text = (await this.transcribe(audio, { language })).text.trim();
366
+ if (text) {
367
+ chunkIndex++;
368
+ onChunk?.(text, chunkIndex);
369
+ fullTranscript = fullTranscript + (fullTranscript ? " " : "") + text;
370
+ onTranscript?.(fullTranscript);
371
+ }
372
+ return text;
373
+ } catch (e) {
374
+ onError?.(e.message || "Transcription failed");
375
+ return "";
376
+ }
377
+ };
378
+ let aborted = false;
379
+ return {
380
+ feedAudio: (audio) => {
381
+ if (!aborted) audioBuffer.push(audio);
382
+ },
383
+ flush: async () => {
384
+ if (aborted) return "";
385
+ return transcribeBuffer();
386
+ },
387
+ start: () => {
388
+ if (isRunning || aborted) return;
389
+ isRunning = true;
390
+ intervalId = setInterval(async () => {
391
+ if (isRunning && !aborted) await transcribeBuffer();
392
+ }, chunkDuration);
393
+ },
394
+ stop: async () => {
395
+ isRunning = false;
396
+ if (intervalId) {
397
+ clearInterval(intervalId);
398
+ intervalId = null;
399
+ }
400
+ if (!aborted && getBufferSize() >= minChunkSize) await transcribeBuffer();
401
+ return fullTranscript;
402
+ },
403
+ abort: () => {
404
+ aborted = true;
405
+ isRunning = false;
406
+ if (intervalId) {
407
+ clearInterval(intervalId);
408
+ intervalId = null;
409
+ }
410
+ audioBuffer = [];
411
+ },
412
+ isRunning: () => isRunning,
413
+ getTranscript: () => fullTranscript,
414
+ getChunkCount: () => chunkIndex,
415
+ reset: () => {
416
+ audioBuffer = [];
417
+ fullTranscript = "";
418
+ chunkIndex = 0;
419
+ }
420
+ };
421
+ }
422
+ /**
423
+ * Dispose of resources
424
+ */
425
+ dispose() {
426
+ this.pipeline = null;
427
+ this._isLoaded = false;
428
+ this.loadPromise = null;
429
+ }
430
+ };
431
+
432
+ //#endregion
433
+ export { WhisperSTT };
434
+ //# sourceMappingURL=stt-Dne6SENv.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stt-Dne6SENv.js","names":["WHISPER_MODELS: STTModelConfig[]","tfDevice: \"webgpu\" | \"cpu\" | \"wasm\"","audioData: Float32Array","pipelineOptions: any","transcribeResult: TranscribeResult","audioBuffer: Float32Array[]","intervalId: ReturnType<typeof setInterval> | null","e: any"],"sources":["../src/core/stt.ts"],"sourcesContent":["/**\n * Speech-to-Text with Whisper\n *\n * Provides local speech recognition using Whisper ONNX models via transformers.js.\n * Supports multiple model sizes and languages.\n *\n * @example\n * ```ts\n * const stt = new WhisperSTT();\n * await stt.load({ onProgress: (p) => console.log(p.status) });\n *\n * // Transcribe audio (Float32Array at 16kHz)\n * const result = await stt.transcribe(audioData);\n * console.log(result.text);\n *\n * // With timestamps\n * const result = await stt.transcribe(audioData, { timestamps: true });\n * for (const seg of result.segments) {\n * console.log(`[${seg.start.toFixed(1)}s] ${seg.text}`);\n * }\n * ```\n */\n\nimport type {\n LoadSTTOptions,\n ProgressInfo,\n STTModelConfig,\n StreamingTranscriptionOptions,\n StreamingTranscriptionSession,\n TranscribeOptions,\n TranscribeResult,\n TranscribeSegment,\n} from \"./types.js\";\n\n// ============================================\n// Model Registry\n// ============================================\n\n/**\n * Available Whisper models\n * Ordered by size (smallest first)\n */\nexport const WHISPER_MODELS: STTModelConfig[] = [\n {\n id: \"whisper-tiny.en\",\n repo: \"onnx-community/whisper-tiny.en\",\n description: \"Tiny English-only model, fastest\",\n size: \"39M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-tiny\",\n repo: \"onnx-community/whisper-tiny\",\n description: \"Tiny multilingual model\",\n size: \"39M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-base.en\",\n repo: \"onnx-community/whisper-base.en\",\n description: \"Base English-only model, good balance\",\n size: \"74M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-base\",\n repo: \"onnx-community/whisper-base\",\n description: \"Base multilingual model\",\n size: \"74M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-small.en\",\n repo: \"onnx-community/whisper-small.en\",\n description: \"Small English-only model, high quality\",\n size: \"244M\",\n multilingual: false,\n languages: [\"en\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-small\",\n repo: \"onnx-community/whisper-small\",\n description: \"Small multilingual model\",\n size: \"244M\",\n multilingual: true,\n languages: [\"en\", \"es\", \"fr\", \"de\", \"it\", \"pt\", \"nl\", \"ru\", \"zh\", \"ja\", \"ko\"],\n sampleRate: 16000,\n },\n {\n id: \"whisper-large-v3-turbo\",\n repo: \"onnx-community/whisper-large-v3-turbo\",\n description: \"Large Turbo model, 5.4x faster, 80+ languages\",\n size: \"809M\",\n multilingual: true,\n languages: [\n \"en\",\n \"es\",\n \"fr\",\n \"de\",\n \"it\",\n \"pt\",\n \"nl\",\n \"ru\",\n \"zh\",\n \"ja\",\n \"ko\",\n \"ar\",\n \"hi\",\n \"vi\",\n \"th\",\n ],\n sampleRate: 16000,\n },\n];\n\n// Default model\nconst DEFAULT_MODEL = \"whisper-tiny.en\";\n\n// ============================================\n// Audio Utilities\n// ============================================\n\n/**\n * Decode WAV file to Float32Array\n * Handles stereo to mono conversion\n */\nexport function decodeWav(buffer: Uint8Array): { audio: Float32Array; sampleRate: number } {\n const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);\n\n // Validate RIFF header\n const riff = String.fromCharCode(buffer[0], buffer[1], buffer[2], buffer[3]);\n if (riff !== \"RIFF\") {\n throw new Error(\"Invalid WAV file: missing RIFF header\");\n }\n\n // Get format details\n const numChannels = view.getUint16(22, true);\n const sampleRate = view.getUint32(24, true);\n const bitsPerSample = view.getUint16(34, true);\n\n if (bitsPerSample !== 16) {\n throw new Error(`Unsupported bit depth: ${bitsPerSample}. Only 16-bit WAV is supported.`);\n }\n\n // Find data chunk\n let dataOffset = 12;\n while (dataOffset < buffer.length - 8) {\n const chunkId = String.fromCharCode(\n buffer[dataOffset],\n buffer[dataOffset + 1],\n buffer[dataOffset + 2],\n buffer[dataOffset + 3],\n );\n const chunkSize = view.getUint32(dataOffset + 4, true);\n if (chunkId === \"data\") {\n dataOffset += 8;\n break;\n }\n dataOffset += 8 + chunkSize;\n }\n\n const dataSize = buffer.length - dataOffset;\n const bytesPerSample = bitsPerSample / 8;\n const totalSamples = Math.floor(dataSize / bytesPerSample);\n const samplesPerChannel = Math.floor(totalSamples / numChannels);\n\n // Convert to mono Float32Array\n const audio = new Float32Array(samplesPerChannel);\n\n for (let i = 0; i < samplesPerChannel; i++) {\n if (numChannels === 2) {\n const left = view.getInt16(dataOffset + i * 4, true);\n const right = view.getInt16(dataOffset + i * 4 + 2, true);\n audio[i] = (left + right) / 2 / 32768;\n } else {\n const sample = view.getInt16(dataOffset + i * 2, true);\n audio[i] = sample / 32768;\n }\n }\n\n return { audio, sampleRate };\n}\n\n/**\n * Resample audio to target sample rate using linear interpolation\n */\nexport function resampleAudio(audio: Float32Array, fromRate: number, toRate: number): Float32Array {\n if (fromRate === toRate) return audio;\n\n const ratio = toRate / fromRate;\n const newLength = Math.round(audio.length * ratio);\n const result = new Float32Array(newLength);\n\n for (let i = 0; i < newLength; i++) {\n const srcIndex = i / ratio;\n const floor = Math.floor(srcIndex);\n const ceil = Math.min(floor + 1, audio.length - 1);\n const t = srcIndex - floor;\n result[i] = audio[floor] * (1 - t) + audio[ceil] * t;\n }\n\n return result;\n}\n\n// ============================================\n// WhisperSTT Class\n// ============================================\n\n/**\n * Speech-to-Text using Whisper ONNX models\n */\nexport class WhisperSTT {\n private modelConfig: STTModelConfig;\n private pipeline: any = null;\n private loadPromise: Promise<void> | null = null;\n private _isLoaded = false;\n private _deviceMode: \"webgpu\" | \"cpu\" = \"cpu\";\n\n constructor(modelId: string = DEFAULT_MODEL) {\n const config = WHISPER_MODELS.find((m) => m.id === modelId);\n if (!config) {\n const available = WHISPER_MODELS.map((m) => m.id).join(\", \");\n throw new Error(`Unknown STT model: ${modelId}. Available: ${available}`);\n }\n this.modelConfig = config;\n }\n\n /**\n * Check if model is loaded\n */\n isLoaded(): boolean {\n return this._isLoaded;\n }\n\n /**\n * Get model configuration\n */\n getModelConfig(): STTModelConfig {\n return this.modelConfig;\n }\n\n /**\n * Get model info (alias for getModelConfig)\n */\n getModelInfo(): STTModelConfig {\n return this.modelConfig;\n }\n\n /**\n * Get current device mode\n */\n getDeviceMode(): \"webgpu\" | \"cpu\" {\n return this._deviceMode;\n }\n\n /**\n * List available models\n */\n static listModels(): STTModelConfig[] {\n return [...WHISPER_MODELS];\n }\n\n /**\n * Load the STT model\n */\n async load(options: LoadSTTOptions = {}): Promise<void> {\n if (this._isLoaded) return;\n if (this.loadPromise) {\n await this.loadPromise;\n return;\n }\n\n this.loadPromise = this._load(options);\n await this.loadPromise;\n }\n\n private async _load(options: LoadSTTOptions = {}): Promise<void> {\n const { onProgress, device = \"auto\" } = options;\n\n onProgress?.({ status: \"Loading transformers.js...\" });\n\n // Check if we're in Node.js or browser\n const isNode = typeof process !== \"undefined\" && process.versions?.node;\n\n // Import transformers.js dynamically\n // tsdown handles resolution: Node.js = external, Browser = bundled\n const { pipeline, env } = await import(\"@huggingface/transformers\");\n\n // Configure environment based on runtime\n if (isNode) {\n // Node.js: allow local models (for CLI/server use)\n env.allowLocalModels = true;\n env.allowRemoteModels = true;\n } else {\n // Browser: use IndexedDB cache, fetch from HuggingFace CDN\n env.useBrowserCache = true;\n env.allowLocalModels = false;\n // Load WASM files from CDN (not bundled with gerbil)\n if (env.backends?.onnx?.wasm) {\n env.backends.onnx.wasm.wasmPaths =\n \"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.21.0/dist/\";\n }\n }\n\n // Determine device\n // Note: Whisper ONNX models work best with fp32 on CPU/WASM\n // WebGPU support for ASR is limited, so we use CPU for reliability\n let tfDevice: \"webgpu\" | \"cpu\" | \"wasm\" = \"cpu\";\n\n // In browser, use WASM for better compatibility\n if (!isNode) {\n tfDevice = \"wasm\";\n }\n\n // Store device mode\n this._deviceMode = \"cpu\"; // STT always reports as CPU since WASM is CPU-based\n\n onProgress?.({ status: `Loading ${this.modelConfig.id}...` });\n\n // Load the ASR pipeline\n // Always use fp32 for Whisper models (fp16 not available for ONNX ASR)\n this.pipeline = await pipeline(\"automatic-speech-recognition\", this.modelConfig.repo, {\n dtype: \"fp32\",\n device: tfDevice,\n progress_callback: (progress: any) => {\n if (progress.status === \"progress\" && progress.file) {\n onProgress?.({\n status: `Downloading ${progress.file}`,\n progress: Math.round(progress.progress || 0),\n file: progress.file,\n });\n }\n },\n });\n\n this._isLoaded = true;\n onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });\n }\n\n /**\n * Transcribe audio to text\n *\n * @param audio - Audio data as Float32Array (mono, 16kHz preferred) or Uint8Array (WAV file)\n * @param options - Transcription options\n * @returns Transcription result with text and optional timestamps\n */\n async transcribe(\n audio: Float32Array | Uint8Array,\n options: TranscribeOptions = {},\n ): Promise<TranscribeResult> {\n if (!this._isLoaded) {\n throw new Error(\"STT model not loaded. Call load() first.\");\n }\n\n const { language, timestamps = false, onProgress } = options;\n const startTime = performance.now();\n\n // Convert Uint8Array (WAV) to Float32Array\n let audioData: Float32Array;\n let inputSampleRate = 16000;\n\n if (audio instanceof Uint8Array) {\n onProgress?.({ status: \"Decoding audio...\" });\n const decoded = decodeWav(audio);\n audioData = decoded.audio;\n inputSampleRate = decoded.sampleRate;\n } else {\n audioData = audio;\n }\n\n // Resample to 16kHz if needed\n if (inputSampleRate !== 16000) {\n onProgress?.({ status: \"Resampling to 16kHz...\" });\n audioData = resampleAudio(audioData, inputSampleRate, 16000);\n }\n\n const audioDuration = audioData.length / 16000;\n onProgress?.({ status: `Transcribing ${audioDuration.toFixed(1)}s of audio...` });\n\n // Build pipeline options\n const pipelineOptions: any = {};\n\n // Only set language for multilingual models\n if (language && this.modelConfig.multilingual) {\n pipelineOptions.language = language;\n pipelineOptions.task = \"transcribe\";\n }\n\n // Enable timestamps if requested\n if (timestamps) {\n pipelineOptions.return_timestamps = true;\n }\n\n // Run transcription\n const result = await this.pipeline(audioData, pipelineOptions);\n\n const totalTime = performance.now() - startTime;\n\n // Build result\n let text = result.text?.trim() || \"\";\n\n // Filter out Whisper artifacts\n if (text === \"[BLANK_AUDIO]\" || text === \"(blank audio)\" || text === \"[BLANK AUDIO]\") {\n text = \"\";\n }\n\n const transcribeResult: TranscribeResult = {\n text,\n language: language || (this.modelConfig.multilingual ? \"auto\" : \"en\"),\n duration: audioDuration,\n totalTime,\n };\n\n // Add segments if timestamps were requested\n if (timestamps && result.chunks) {\n transcribeResult.segments = result.chunks.map(\n (chunk: any): TranscribeSegment => ({\n text: chunk.text?.trim() || \"\",\n start: chunk.timestamp?.[0] || 0,\n end: chunk.timestamp?.[1] || 0,\n }),\n );\n }\n\n onProgress?.({ status: \"Done!\" });\n\n return transcribeResult;\n }\n\n /**\n * Create a streaming transcription session\n *\n * Transcribes audio in real-time by processing chunks at regular intervals.\n * Perfect for live captioning, call transcription, or real-time subtitles.\n *\n * @param options - Streaming options\n * @returns Streaming session controller\n *\n * @example\n * ```ts\n * const session = stt.createStreamingSession({\n * chunkDuration: 3000, // Transcribe every 3 seconds\n * onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),\n * onTranscript: (fullText) => console.log(\"Full:\", fullText),\n * });\n *\n * // Feed audio data as it comes in (Float32Array at 16kHz)\n * session.feedAudio(audioChunk);\n *\n * // Or manually trigger transcription\n * await session.flush();\n *\n * // Stop and get final transcript\n * const finalText = await session.stop();\n * ```\n */\n createStreamingSession(\n options: StreamingTranscriptionOptions = {},\n ): StreamingTranscriptionSession {\n const {\n chunkDuration = 3000,\n minChunkSize = 8000, // ~0.5 seconds at 16kHz\n onChunk,\n onTranscript,\n onError,\n language,\n } = options;\n\n let audioBuffer: Float32Array[] = [];\n let fullTranscript = \"\";\n let chunkIndex = 0;\n let intervalId: ReturnType<typeof setInterval> | null = null;\n let isRunning = false;\n\n const getBufferSize = (): number => {\n return audioBuffer.reduce((sum, chunk) => sum + chunk.length, 0);\n };\n\n const mergeBuffer = (): Float32Array => {\n const totalLength = getBufferSize();\n const merged = new Float32Array(totalLength);\n let offset = 0;\n for (const chunk of audioBuffer) {\n merged.set(chunk, offset);\n offset += chunk.length;\n }\n return merged;\n };\n\n const transcribeBuffer = async (): Promise<string> => {\n if (!this._isLoaded || getBufferSize() < minChunkSize) {\n return \"\";\n }\n\n const audio = mergeBuffer();\n audioBuffer = []; // Clear buffer\n\n try {\n const result = await this.transcribe(audio, { language });\n const text = result.text.trim();\n\n if (text) {\n chunkIndex++;\n onChunk?.(text, chunkIndex);\n\n // Append to full transcript\n fullTranscript = fullTranscript + (fullTranscript ? \" \" : \"\") + text;\n onTranscript?.(fullTranscript);\n }\n\n return text;\n } catch (e: any) {\n onError?.(e.message || \"Transcription failed\");\n return \"\";\n }\n };\n\n let aborted = false;\n\n const session: StreamingTranscriptionSession = {\n feedAudio: (audio: Float32Array) => {\n if (!aborted) {\n audioBuffer.push(audio);\n }\n },\n\n flush: async () => {\n if (aborted) return \"\";\n return transcribeBuffer();\n },\n\n start: () => {\n if (isRunning || aborted) return;\n isRunning = true;\n\n intervalId = setInterval(async () => {\n if (isRunning && !aborted) {\n await transcribeBuffer();\n }\n }, chunkDuration);\n },\n\n stop: async () => {\n isRunning = false;\n\n if (intervalId) {\n clearInterval(intervalId);\n intervalId = null;\n }\n\n // Transcribe any remaining audio (unless aborted)\n if (!aborted && getBufferSize() >= minChunkSize) {\n await transcribeBuffer();\n }\n\n return fullTranscript;\n },\n\n abort: () => {\n // Immediately stop without final transcription\n aborted = true;\n isRunning = false;\n\n if (intervalId) {\n clearInterval(intervalId);\n intervalId = null;\n }\n\n audioBuffer = [];\n },\n\n isRunning: () => isRunning,\n\n getTranscript: () => fullTranscript,\n\n getChunkCount: () => chunkIndex,\n\n reset: () => {\n audioBuffer = [];\n fullTranscript = \"\";\n chunkIndex = 0;\n },\n };\n\n return session;\n }\n\n /**\n * Dispose of resources\n */\n dispose(): void {\n this.pipeline = null;\n this._isLoaded = false;\n this.loadPromise = null;\n }\n}\n"],"mappings":";;;;;AA0CA,MAAaA,iBAAmC;CAC9C;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW,CAAC,KAAK;EACjB,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GAAC;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAM;GAAK;EAC7E,YAAY;EACb;CACD;EACE,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,cAAc;EACd,WAAW;GACT;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACA;GACD;EACD,YAAY;EACb;CACF;AAGD,MAAM,gBAAgB;;;;;AAUtB,SAAgB,UAAU,QAAiE;CACzF,MAAM,OAAO,IAAI,SAAS,OAAO,QAAQ,OAAO,YAAY,OAAO,WAAW;AAI9E,KADa,OAAO,aAAa,OAAO,IAAI,OAAO,IAAI,OAAO,IAAI,OAAO,GAAG,KAC/D,OACX,OAAM,IAAI,MAAM,wCAAwC;CAI1D,MAAM,cAAc,KAAK,UAAU,IAAI,KAAK;CAC5C,MAAM,aAAa,KAAK,UAAU,IAAI,KAAK;CAC3C,MAAM,gBAAgB,KAAK,UAAU,IAAI,KAAK;AAE9C,KAAI,kBAAkB,GACpB,OAAM,IAAI,MAAM,0BAA0B,cAAc,iCAAiC;CAI3F,IAAI,aAAa;AACjB,QAAO,aAAa,OAAO,SAAS,GAAG;EACrC,MAAM,UAAU,OAAO,aACrB,OAAO,aACP,OAAO,aAAa,IACpB,OAAO,aAAa,IACpB,OAAO,aAAa,GACrB;EACD,MAAM,YAAY,KAAK,UAAU,aAAa,GAAG,KAAK;AACtD,MAAI,YAAY,QAAQ;AACtB,iBAAc;AACd;;AAEF,gBAAc,IAAI;;CAGpB,MAAM,WAAW,OAAO,SAAS;CACjC,MAAM,iBAAiB,gBAAgB;CACvC,MAAM,eAAe,KAAK,MAAM,WAAW,eAAe;CAC1D,MAAM,oBAAoB,KAAK,MAAM,eAAe,YAAY;CAGhE,MAAM,QAAQ,IAAI,aAAa,kBAAkB;AAEjD,MAAK,IAAI,IAAI,GAAG,IAAI,mBAAmB,IACrC,KAAI,gBAAgB,EAGlB,OAAM,MAFO,KAAK,SAAS,aAAa,IAAI,GAAG,KAAK,GACtC,KAAK,SAAS,aAAa,IAAI,IAAI,GAAG,KAAK,IAC7B,IAAI;KAGhC,OAAM,KADS,KAAK,SAAS,aAAa,IAAI,GAAG,KAAK,GAClC;AAIxB,QAAO;EAAE;EAAO;EAAY;;;;;AAM9B,SAAgB,cAAc,OAAqB,UAAkB,QAA8B;AACjG,KAAI,aAAa,OAAQ,QAAO;CAEhC,MAAM,QAAQ,SAAS;CACvB,MAAM,YAAY,KAAK,MAAM,MAAM,SAAS,MAAM;CAClD,MAAM,SAAS,IAAI,aAAa,UAAU;AAE1C,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,KAAK;EAClC,MAAM,WAAW,IAAI;EACrB,MAAM,QAAQ,KAAK,MAAM,SAAS;EAClC,MAAM,OAAO,KAAK,IAAI,QAAQ,GAAG,MAAM,SAAS,EAAE;EAClD,MAAM,IAAI,WAAW;AACrB,SAAO,KAAK,MAAM,UAAU,IAAI,KAAK,MAAM,QAAQ;;AAGrD,QAAO;;;;;AAUT,IAAa,aAAb,MAAwB;CACtB,AAAQ;CACR,AAAQ,WAAgB;CACxB,AAAQ,cAAoC;CAC5C,AAAQ,YAAY;CACpB,AAAQ,cAAgC;CAExC,YAAY,UAAkB,eAAe;EAC3C,MAAM,SAAS,eAAe,MAAM,MAAM,EAAE,OAAO,QAAQ;AAC3D,MAAI,CAAC,QAAQ;GACX,MAAM,YAAY,eAAe,KAAK,MAAM,EAAE,GAAG,CAAC,KAAK,KAAK;AAC5D,SAAM,IAAI,MAAM,sBAAsB,QAAQ,eAAe,YAAY;;AAE3E,OAAK,cAAc;;;;;CAMrB,WAAoB;AAClB,SAAO,KAAK;;;;;CAMd,iBAAiC;AAC/B,SAAO,KAAK;;;;;CAMd,eAA+B;AAC7B,SAAO,KAAK;;;;;CAMd,gBAAkC;AAChC,SAAO,KAAK;;;;;CAMd,OAAO,aAA+B;AACpC,SAAO,CAAC,GAAG,eAAe;;;;;CAM5B,MAAM,KAAK,UAA0B,EAAE,EAAiB;AACtD,MAAI,KAAK,UAAW;AACpB,MAAI,KAAK,aAAa;AACpB,SAAM,KAAK;AACX;;AAGF,OAAK,cAAc,KAAK,MAAM,QAAQ;AACtC,QAAM,KAAK;;CAGb,MAAc,MAAM,UAA0B,EAAE,EAAiB;EAC/D,MAAM,EAAE,YAAY,SAAS,WAAW;AAExC,eAAa,EAAE,QAAQ,8BAA8B,CAAC;EAGtD,MAAM,SAAS,OAAO,YAAY,eAAe,QAAQ,UAAU;EAInE,MAAM,EAAE,UAAU,QAAQ,MAAM,OAAO;AAGvC,MAAI,QAAQ;AAEV,OAAI,mBAAmB;AACvB,OAAI,oBAAoB;SACnB;AAEL,OAAI,kBAAkB;AACtB,OAAI,mBAAmB;AAEvB,OAAI,IAAI,UAAU,MAAM,KACtB,KAAI,SAAS,KAAK,KAAK,YACrB;;EAON,IAAIC,WAAsC;AAG1C,MAAI,CAAC,OACH,YAAW;AAIb,OAAK,cAAc;AAEnB,eAAa,EAAE,QAAQ,WAAW,KAAK,YAAY,GAAG,MAAM,CAAC;AAI7D,OAAK,WAAW,MAAM,SAAS,gCAAgC,KAAK,YAAY,MAAM;GACpF,OAAO;GACP,QAAQ;GACR,oBAAoB,aAAkB;AACpC,QAAI,SAAS,WAAW,cAAc,SAAS,KAC7C,cAAa;KACX,QAAQ,eAAe,SAAS;KAChC,UAAU,KAAK,MAAM,SAAS,YAAY,EAAE;KAC5C,MAAM,SAAS;KAChB,CAAC;;GAGP,CAAC;AAEF,OAAK,YAAY;AACjB,eAAa,EAAE,QAAQ,UAAU,SAAS,aAAa,CAAC,KAAK,CAAC;;;;;;;;;CAUhE,MAAM,WACJ,OACA,UAA6B,EAAE,EACJ;AAC3B,MAAI,CAAC,KAAK,UACR,OAAM,IAAI,MAAM,2CAA2C;EAG7D,MAAM,EAAE,UAAU,aAAa,OAAO,eAAe;EACrD,MAAM,YAAY,YAAY,KAAK;EAGnC,IAAIC;EACJ,IAAI,kBAAkB;AAEtB,MAAI,iBAAiB,YAAY;AAC/B,gBAAa,EAAE,QAAQ,qBAAqB,CAAC;GAC7C,MAAM,UAAU,UAAU,MAAM;AAChC,eAAY,QAAQ;AACpB,qBAAkB,QAAQ;QAE1B,aAAY;AAId,MAAI,oBAAoB,MAAO;AAC7B,gBAAa,EAAE,QAAQ,0BAA0B,CAAC;AAClD,eAAY,cAAc,WAAW,iBAAiB,KAAM;;EAG9D,MAAM,gBAAgB,UAAU,SAAS;AACzC,eAAa,EAAE,QAAQ,gBAAgB,cAAc,QAAQ,EAAE,CAAC,gBAAgB,CAAC;EAGjF,MAAMC,kBAAuB,EAAE;AAG/B,MAAI,YAAY,KAAK,YAAY,cAAc;AAC7C,mBAAgB,WAAW;AAC3B,mBAAgB,OAAO;;AAIzB,MAAI,WACF,iBAAgB,oBAAoB;EAItC,MAAM,SAAS,MAAM,KAAK,SAAS,WAAW,gBAAgB;EAE9D,MAAM,YAAY,YAAY,KAAK,GAAG;EAGtC,IAAI,OAAO,OAAO,MAAM,MAAM,IAAI;AAGlC,MAAI,SAAS,mBAAmB,SAAS,mBAAmB,SAAS,gBACnE,QAAO;EAGT,MAAMC,mBAAqC;GACzC;GACA,UAAU,aAAa,KAAK,YAAY,eAAe,SAAS;GAChE,UAAU;GACV;GACD;AAGD,MAAI,cAAc,OAAO,OACvB,kBAAiB,WAAW,OAAO,OAAO,KACvC,WAAmC;GAClC,MAAM,MAAM,MAAM,MAAM,IAAI;GAC5B,OAAO,MAAM,YAAY,MAAM;GAC/B,KAAK,MAAM,YAAY,MAAM;GAC9B,EACF;AAGH,eAAa,EAAE,QAAQ,SAAS,CAAC;AAEjC,SAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA8BT,uBACE,UAAyC,EAAE,EACZ;EAC/B,MAAM,EACJ,gBAAgB,KAChB,eAAe,KACf,SACA,cACA,SACA,aACE;EAEJ,IAAIC,cAA8B,EAAE;EACpC,IAAI,iBAAiB;EACrB,IAAI,aAAa;EACjB,IAAIC,aAAoD;EACxD,IAAI,YAAY;EAEhB,MAAM,sBAA8B;AAClC,UAAO,YAAY,QAAQ,KAAK,UAAU,MAAM,MAAM,QAAQ,EAAE;;EAGlE,MAAM,oBAAkC;GACtC,MAAM,cAAc,eAAe;GACnC,MAAM,SAAS,IAAI,aAAa,YAAY;GAC5C,IAAI,SAAS;AACb,QAAK,MAAM,SAAS,aAAa;AAC/B,WAAO,IAAI,OAAO,OAAO;AACzB,cAAU,MAAM;;AAElB,UAAO;;EAGT,MAAM,mBAAmB,YAA6B;AACpD,OAAI,CAAC,KAAK,aAAa,eAAe,GAAG,aACvC,QAAO;GAGT,MAAM,QAAQ,aAAa;AAC3B,iBAAc,EAAE;AAEhB,OAAI;IAEF,MAAM,QADS,MAAM,KAAK,WAAW,OAAO,EAAE,UAAU,CAAC,EACrC,KAAK,MAAM;AAE/B,QAAI,MAAM;AACR;AACA,eAAU,MAAM,WAAW;AAG3B,sBAAiB,kBAAkB,iBAAiB,MAAM,MAAM;AAChE,oBAAe,eAAe;;AAGhC,WAAO;YACAC,GAAQ;AACf,cAAU,EAAE,WAAW,uBAAuB;AAC9C,WAAO;;;EAIX,IAAI,UAAU;AAmEd,SAjE+C;GAC7C,YAAY,UAAwB;AAClC,QAAI,CAAC,QACH,aAAY,KAAK,MAAM;;GAI3B,OAAO,YAAY;AACjB,QAAI,QAAS,QAAO;AACpB,WAAO,kBAAkB;;GAG3B,aAAa;AACX,QAAI,aAAa,QAAS;AAC1B,gBAAY;AAEZ,iBAAa,YAAY,YAAY;AACnC,SAAI,aAAa,CAAC,QAChB,OAAM,kBAAkB;OAEzB,cAAc;;GAGnB,MAAM,YAAY;AAChB,gBAAY;AAEZ,QAAI,YAAY;AACd,mBAAc,WAAW;AACzB,kBAAa;;AAIf,QAAI,CAAC,WAAW,eAAe,IAAI,aACjC,OAAM,kBAAkB;AAG1B,WAAO;;GAGT,aAAa;AAEX,cAAU;AACV,gBAAY;AAEZ,QAAI,YAAY;AACd,mBAAc,WAAW;AACzB,kBAAa;;AAGf,kBAAc,EAAE;;GAGlB,iBAAiB;GAEjB,qBAAqB;GAErB,qBAAqB;GAErB,aAAa;AACX,kBAAc,EAAE;AAChB,qBAAiB;AACjB,iBAAa;;GAEhB;;;;;CAQH,UAAgB;AACd,OAAK,WAAW;AAChB,OAAK,YAAY;AACjB,OAAK,cAAc"}
@@ -1,4 +1,4 @@
1
- import { n as zodToJsonSchema } from "./utils-7vXqtq2Q.mjs";
1
+ import { n as zodToJsonSchema } from "./utils-CZBZ8dgR.mjs";
2
2
  import { z } from "zod";
3
3
 
4
4
  //#region src/core/docs.ts
@@ -397,8 +397,8 @@ function getToolDefinitions() {
397
397
  * No module imports - we eval the execute function directly
398
398
  */
399
399
  async function loadTools(dir) {
400
- const fs = await import("fs");
401
- const pathModule = await import("path");
400
+ const fs = await import("node:fs");
401
+ const pathModule = await import("node:path");
402
402
  if (!fs.existsSync(dir)) return [];
403
403
  const files = fs.readdirSync(dir).filter((f) => f.endsWith(".tool.ts") || f.endsWith(".tool.js"));
404
404
  const results = [];
@@ -462,8 +462,7 @@ function parseToolFile(content, fallbackName) {
462
462
  execute: config.execute
463
463
  };
464
464
  return null;
465
- } catch (e) {
466
- console.error("Failed to parse tool file:", e);
465
+ } catch (_e) {
467
466
  return null;
468
467
  }
469
468
  }
@@ -471,7 +470,7 @@ function parseToolFile(content, fallbackName) {
471
470
  * Load tools from project .gerbil/tools directory
472
471
  */
473
472
  async function loadProjectTools() {
474
- return loadTools((await import("path")).join(process.cwd(), ".gerbil", "tools"));
473
+ return loadTools((await import("node:path")).join(process.cwd(), ".gerbil", "tools"));
475
474
  }
476
475
  /**
477
476
  * Format tools for Qwen3 prompt
@@ -564,4 +563,4 @@ const docsTool = defineTool({
564
563
 
565
564
  //#endregion
566
565
  export { getToolDefinitions as a, setToolContext as c, getTool as i, executeToolCall as n, loadProjectTools as o, formatToolsForPrompt as r, parseToolCall as s, defineTool as t };
567
- //# sourceMappingURL=tools-BsiEE6f2.mjs.map
566
+ //# sourceMappingURL=tools-Bi1P7Xoy.mjs.map