@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +79 -14
  2. package/dist/auto-update-DsWBBnEk.mjs +3 -0
  3. package/dist/browser/index.d.mts +401 -5
  4. package/dist/browser/index.d.mts.map +1 -1
  5. package/dist/browser/index.mjs +1772 -146
  6. package/dist/browser/index.mjs.map +1 -1
  7. package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-JEPeM2YE.mjs} +1 -1
  8. package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-Y9F7W5VQ.mjs} +514 -73
  9. package/dist/chrome-backend-Y9F7W5VQ.mjs.map +1 -0
  10. package/dist/cli.mjs +3359 -646
  11. package/dist/cli.mjs.map +1 -1
  12. package/dist/frameworks/express.d.mts +1 -1
  13. package/dist/frameworks/express.mjs +3 -3
  14. package/dist/frameworks/fastify.d.mts +1 -1
  15. package/dist/frameworks/fastify.mjs +3 -3
  16. package/dist/frameworks/hono.d.mts +1 -1
  17. package/dist/frameworks/hono.mjs +3 -3
  18. package/dist/frameworks/next.d.mts +2 -2
  19. package/dist/frameworks/next.mjs +3 -3
  20. package/dist/frameworks/react.d.mts +1 -1
  21. package/dist/frameworks/trpc.d.mts +1 -1
  22. package/dist/frameworks/trpc.mjs +3 -3
  23. package/dist/gerbil-DeQlX_Mt.mjs +5 -0
  24. package/dist/gerbil-POAz8peb.d.mts +431 -0
  25. package/dist/gerbil-POAz8peb.d.mts.map +1 -0
  26. package/dist/gerbil-yoSpRHgv.mjs +1463 -0
  27. package/dist/gerbil-yoSpRHgv.mjs.map +1 -0
  28. package/dist/index.d.mts +395 -9
  29. package/dist/index.d.mts.map +1 -1
  30. package/dist/index.mjs +8 -6
  31. package/dist/index.mjs.map +1 -1
  32. package/dist/integrations/ai-sdk.d.mts +122 -4
  33. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  34. package/dist/integrations/ai-sdk.mjs +239 -11
  35. package/dist/integrations/ai-sdk.mjs.map +1 -1
  36. package/dist/integrations/langchain.d.mts +132 -2
  37. package/dist/integrations/langchain.d.mts.map +1 -1
  38. package/dist/integrations/langchain.mjs +176 -8
  39. package/dist/integrations/langchain.mjs.map +1 -1
  40. package/dist/integrations/llamaindex.d.mts +1 -1
  41. package/dist/integrations/llamaindex.mjs +3 -3
  42. package/dist/integrations/mcp-client.mjs +4 -4
  43. package/dist/integrations/mcp-client.mjs.map +1 -1
  44. package/dist/integrations/mcp.d.mts +2 -2
  45. package/dist/integrations/mcp.d.mts.map +1 -1
  46. package/dist/integrations/mcp.mjs +6 -6
  47. package/dist/{mcp-R8kRLIKb.mjs → mcp-Bitg4sjX.mjs} +10 -37
  48. package/dist/mcp-Bitg4sjX.mjs.map +1 -0
  49. package/dist/microphone-D-6y9aiE.mjs +3 -0
  50. package/dist/{models-DKULvhOr.mjs → models-BAtL8qsA.mjs} +42 -7
  51. package/dist/models-BAtL8qsA.mjs.map +1 -0
  52. package/dist/{models-De2-_GmQ.d.mts → models-CE0fBq0U.d.mts} +2 -2
  53. package/dist/models-CE0fBq0U.d.mts.map +1 -0
  54. package/dist/{one-liner-BUQR0nqq.mjs → one-liner-B1rmFto6.mjs} +2 -2
  55. package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-B1rmFto6.mjs.map} +1 -1
  56. package/dist/repl-D20JO260.mjs +10 -0
  57. package/dist/skills/index.d.mts +303 -12
  58. package/dist/skills/index.d.mts.map +1 -1
  59. package/dist/skills/index.mjs +6 -6
  60. package/dist/skills-5DxAV-rn.mjs +1435 -0
  61. package/dist/skills-5DxAV-rn.mjs.map +1 -0
  62. package/dist/stt-Bv_dum-R.mjs +433 -0
  63. package/dist/stt-Bv_dum-R.mjs.map +1 -0
  64. package/dist/stt-KzSoNvwI.mjs +3 -0
  65. package/dist/{tools-BsiEE6f2.mjs → tools-IYPrqoek.mjs} +6 -7
  66. package/dist/{tools-BsiEE6f2.mjs.map → tools-IYPrqoek.mjs.map} +1 -1
  67. package/dist/tts-5yWeP_I0.mjs +3 -0
  68. package/dist/tts-DG6denWG.mjs +729 -0
  69. package/dist/tts-DG6denWG.mjs.map +1 -0
  70. package/dist/types-s6Py2_DL.d.mts +353 -0
  71. package/dist/types-s6Py2_DL.d.mts.map +1 -0
  72. package/dist/{utils-7vXqtq2Q.mjs → utils-CkB4Roi6.mjs} +1 -1
  73. package/dist/{utils-7vXqtq2Q.mjs.map → utils-CkB4Roi6.mjs.map} +1 -1
  74. package/docs/ai-sdk.md +137 -21
  75. package/docs/browser.md +241 -2
  76. package/docs/memory.md +72 -0
  77. package/docs/stt.md +494 -0
  78. package/docs/tts.md +569 -0
  79. package/docs/vision.md +396 -0
  80. package/package.json +17 -18
  81. package/dist/auto-update-BbNHbSU1.mjs +0 -3
  82. package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
  83. package/dist/gerbil-BfnsFWRE.mjs +0 -644
  84. package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
  85. package/dist/gerbil-BjW-z7Fq.mjs +0 -5
  86. package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
  87. package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
  88. package/dist/mcp-R8kRLIKb.mjs.map +0 -1
  89. package/dist/models-DKULvhOr.mjs.map +0 -1
  90. package/dist/models-De2-_GmQ.d.mts.map +0 -1
  91. package/dist/skills-D3CEpgDc.mjs +0 -630
  92. package/dist/skills-D3CEpgDc.mjs.map +0 -1
  93. package/dist/types-BS1N92Jt.d.mts +0 -183
  94. package/dist/types-BS1N92Jt.d.mts.map +0 -1
@@ -1 +0,0 @@
1
- {"version":3,"file":"gerbil-BfnsFWRE.mjs","names":["pipeline","rawPipeline","isBrowser","tfDevice: \"webgpu\" | \"wasm\" | \"cpu\"","chromeErr: any","tokenQueue: string[]","resolveNext: ((value: string | null) => void) | null","results: EmbedResult[]","messages: Array<{ role: string; content: string }>"],"sources":["../src/core/gerbil.ts"],"sourcesContent":["/**\n * Gerbil - Local GPU-accelerated LLM inference\n */\n\nimport {\n AutoModelForCausalLM,\n AutoTokenizer,\n env,\n type FeatureExtractionPipeline,\n type PreTrainedTokenizer,\n pipeline as rawPipeline,\n type TextGenerationPipeline,\n} from \"@huggingface/transformers\";\n\n// Wrapper to avoid TypeScript complexity issues with transformers.js types\nconst pipeline = rawPipeline as (task: string, model: string, options?: any) => Promise<any>;\n\n// Suppress noisy transformers.js warnings during model loading\nfunction suppressNoisyWarnings<T>(fn: () => Promise<T>): Promise<T> {\n const originalWarn = console.warn;\n console.warn = (...args: any[]) => {\n const msg = args[0]?.toString?.() || \"\";\n // Suppress \"Unable to determine content-length\" warnings from transformers.js\n if (msg.includes(\"content-length\") || msg.includes(\"Unable to determine\")) {\n return;\n }\n originalWarn.apply(console, args);\n };\n\n return fn().finally(() => {\n console.warn = originalWarn;\n });\n}\n\nimport {\n BUILTIN_MODELS,\n createExternalModelConfig,\n getModelConfig,\n resolveModel,\n} from \"./models.js\";\nimport type {\n EmbedOptions,\n EmbedResult,\n GenerateOptions,\n GenerateResult,\n GerbilConfig,\n JsonOptions,\n LoadOptions,\n ModelConfig,\n SessionStats,\n SystemInfo,\n} from \"./types.js\";\n\nimport { extractJson, zodToJsonSchema } from \"./utils.js\";\n\n// Configure transformers.js based on environment\nconst isBrowser = typeof window !== \"undefined\";\nenv.allowLocalModels = !isBrowser; // false in browser (fetch from HuggingFace)\nenv.useBrowserCache = isBrowser; // true in browser (cache in IndexedDB)\n\n// ============================================\n// Gerbil Class\n// ============================================\n\n// WebGPU initialization state for Node.js\nlet webgpuInitialized = false;\nlet webgpuAvailable = false;\n\n/**\n * Initialize WebGPU for Node.js environments\n * Called automatically before model loading\n */\nasync function initNodeWebGPU(): Promise<boolean> {\n if (webgpuInitialized) {\n return webgpuAvailable;\n }\n webgpuInitialized = true;\n\n // Skip if in browser (already has WebGPU)\n if (typeof window !== \"undefined\") {\n webgpuAvailable = \"gpu\" in navigator;\n return webgpuAvailable;\n }\n\n // Try to initialize WebGPU in Node.js via Dawn\n // Use Function constructor to hide import from bundlers\n try {\n const dynamicImport = new Function(\"specifier\", \"return import(specifier)\");\n const webgpuModule = await dynamicImport(\"webgpu\");\n const { create, globals } = webgpuModule;\n\n // Extend globalThis with WebGPU globals\n Object.assign(globalThis, globals);\n\n // Create navigator.gpu\n if (!(globalThis as any).navigator) {\n (globalThis as any).navigator = {};\n }\n (globalThis as any).navigator.gpu = create([]);\n\n webgpuAvailable = true;\n } catch {\n // WebGPU not available, will fall back to CPU\n webgpuAvailable = false;\n }\n\n return webgpuAvailable;\n}\n\n// ChromeGPUBackend is dynamically imported only in Node.js to avoid bundling puppeteer in browser\ntype ChromeGPUBackendType = import(\"./chrome-backend.js\").ChromeGPUBackend;\n\nexport class Gerbil {\n private generator: TextGenerationPipeline | null = null;\n private tokenizer: PreTrainedTokenizer | null = null;\n private model: any = null; // AutoModelForCausalLM instance\n private embedder: FeatureExtractionPipeline | null = null;\n private currentModel: string | null = null;\n private modelConfig: ModelConfig | null = null;\n private readonly config: GerbilConfig;\n private stats: SessionStats;\n private useDirect = false; // Use direct model loading (for WebGPU)\n private chromeBackend: ChromeGPUBackendType | null = null; // Chrome backend for Node.js WebGPU\n private _deviceMode: \"webgpu\" | \"cpu\" | \"wasm\" = \"cpu\"; // Track which backend is active\n\n constructor(config: GerbilConfig = {}) {\n this.config = config;\n this.stats = {\n prompts: 0,\n tokensIn: 0,\n tokensOut: 0,\n avgSpeed: 0,\n totalTime: 0,\n cacheHits: 0,\n cacheMisses: 0,\n };\n }\n\n // ============================================\n // Static Methods\n // ============================================\n\n static listModels(): ModelConfig[] {\n return Object.values(BUILTIN_MODELS);\n }\n\n static getModel(modelId: string): ModelConfig | undefined {\n return BUILTIN_MODELS[modelId];\n }\n\n // ============================================\n // Model Loading\n // ============================================\n\n /**\n * Load a model\n *\n * @example\n * ```ts\n * // Built-in model\n * await g.loadModel(\"qwen3-0.6b\");\n *\n * // HuggingFace model\n * await g.loadModel(\"hf:microsoft/Phi-3-mini\");\n *\n * // Local model\n * await g.loadModel(\"file:./models/my-model\");\n * ```\n */\n async loadModel(modelId = \"qwen3-0.6b\", options: LoadOptions = {}): Promise<void> {\n // Initialize WebGPU for Node.js if needed\n await initNodeWebGPU();\n\n const source = resolveModel(modelId);\n const { onProgress, device = \"auto\", dtype: userDtype } = options;\n\n // Get or create model config\n let config = getModelConfig(modelId);\n if (!config) {\n config = createExternalModelConfig(modelId, source.path);\n }\n\n onProgress?.({ status: `Loading ${modelId}...` });\n\n // Map device to transformers.js device\n // Browser supports: webgpu, wasm (no cpu)\n // Node supports: webgpu, cpu\n const isBrowser = typeof window !== \"undefined\";\n const fallbackDevice = isBrowser ? \"wasm\" : \"cpu\";\n let tfDevice: \"webgpu\" | \"wasm\" | \"cpu\" = fallbackDevice;\n if (device === \"webgpu\" || device === \"gpu\" || device === \"auto\") {\n tfDevice = \"webgpu\";\n }\n\n // Use q4f16 for WebGPU (required for Qwen3), q4 for CPU/WASM\n const dtype = userDtype ?? (tfDevice === \"webgpu\" ? \"q4f16\" : \"q4\");\n\n // Track if we're still in loading phase (to suppress progress during inference)\n let isLoading = true;\n let lastFile = \"\";\n let lastPct = -1;\n\n const progressCallback = (progress: any) => {\n if (!isLoading) {\n return; // Suppress progress after initial load\n }\n\n if (progress.status === \"progress\" && progress.file) {\n const pct = Math.round(progress.progress || 0);\n // Only report if file changed or progress increased significantly\n if (progress.file !== lastFile || pct >= lastPct + 5) {\n lastFile = progress.file;\n lastPct = pct;\n onProgress?.({\n status: `Downloading ${progress.file}`,\n progress: pct,\n file: progress.file,\n });\n }\n }\n };\n\n try {\n // Use direct model loading for browser WebGPU (like qwen-web does)\n // This bypasses pipeline() which may have different ONNX session config\n if (isBrowser && tfDevice === \"webgpu\") {\n onProgress?.({ status: \"Loading tokenizer...\" });\n this.tokenizer = (await suppressNoisyWarnings(() =>\n AutoTokenizer.from_pretrained(source.path, {\n progress_callback: progressCallback,\n }),\n )) as PreTrainedTokenizer;\n\n onProgress?.({ status: \"Loading model...\" });\n this.model = await suppressNoisyWarnings(() =>\n AutoModelForCausalLM.from_pretrained(source.path, {\n dtype,\n device: tfDevice,\n progress_callback: progressCallback,\n }),\n );\n\n this.useDirect = true;\n this._deviceMode = \"webgpu\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n onProgress?.({ status: \"Ready (WebGPU)!\" });\n } else if (!isBrowser && tfDevice === \"webgpu\") {\n // Node.js + WebGPU: Use Chrome backend for real GPU acceleration\n onProgress?.({ status: \"Starting Chrome WebGPU backend...\" });\n\n // Dynamic import to avoid bundling puppeteer in browser builds\n const { ChromeGPUBackend } = await import(\"./chrome-backend.js\");\n this.chromeBackend = await ChromeGPUBackend.create({\n modelId: source.path,\n onProgress,\n });\n\n this.useDirect = false;\n this._deviceMode = \"webgpu\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n // Ready status is set by ChromeGPUBackend\n } else {\n // Use pipeline for CPU / WASM\n const pipelineOptions = {\n dtype,\n device: tfDevice,\n progress_callback: progressCallback,\n };\n this.generator = (await suppressNoisyWarnings(() =>\n pipeline(\"text-generation\", source.path, pipelineOptions as any),\n )) as TextGenerationPipeline;\n\n this.useDirect = false;\n this._deviceMode = tfDevice as \"cpu\" | \"wasm\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });\n }\n } catch (err) {\n // Fallback to CPU/WASM if GPU fails (silently)\n if (tfDevice !== fallbackDevice) {\n onProgress?.({ status: `Using ${fallbackDevice.toUpperCase()}...` });\n\n // Clean up Chrome backend if it was partially initialized\n if (this.chromeBackend) {\n await this.chromeBackend.dispose();\n this.chromeBackend = null;\n }\n\n // Fallback always uses pipeline (WASM/CPU don't need direct loading)\n this.generator = (await suppressNoisyWarnings(() =>\n pipeline(\"text-generation\", source.path, {\n dtype: \"q4\",\n device: fallbackDevice,\n progress_callback: progressCallback,\n } as any),\n )) as TextGenerationPipeline;\n\n this.useDirect = false;\n this._deviceMode = fallbackDevice as \"cpu\" | \"wasm\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n onProgress?.({ status: `Ready (${fallbackDevice.toUpperCase()})!` });\n } else {\n throw err;\n }\n }\n }\n\n /**\n * Check if a model is loaded\n */\n isLoaded(): boolean {\n return (\n this.generator !== null ||\n (this.useDirect && this.model !== null) ||\n this.chromeBackend !== null\n );\n }\n\n /**\n * Get current model info\n */\n getModelInfo(): ModelConfig | null {\n return this.modelConfig;\n }\n\n /**\n * Get current device mode (webgpu, cpu, or wasm)\n */\n getDeviceMode(): \"webgpu\" | \"cpu\" | \"wasm\" {\n return this._deviceMode;\n }\n\n /**\n * Get dtype used for current model\n */\n getDtype(): string {\n // WebGPU uses q4f16, CPU/WASM use q4\n return this._deviceMode === \"webgpu\" ? \"q4f16\" : \"q4\";\n }\n\n /**\n * Get Chrome backend status (if using WebGPU via Chrome)\n */\n getChromeStatus(): {\n pid: number | null;\n port: number;\n modelId: string;\n startedAt: Date | null;\n } | null {\n if (!this.chromeBackend) {\n return null;\n }\n return this.chromeBackend.getStatus();\n }\n\n /**\n * Get Chrome memory usage (if using WebGPU via Chrome)\n * Returns JS heap memory in bytes\n */\n async getChromeMemory(): Promise<{ jsHeapUsed: number; jsHeapTotal: number } | null> {\n if (!this.chromeBackend) {\n return null;\n }\n return this.chromeBackend.getMemoryUsage();\n }\n\n /**\n * Get memory usage in GB (if using WebGPU via Chrome)\n */\n async getMemoryUsage(): Promise<{ usedGB: number; totalGB: number; usedPercent: number } | null> {\n if (!this.chromeBackend) {\n return null;\n }\n return this.chromeBackend.getMemoryStats();\n }\n\n /**\n * Clear KV cache to free memory\n * This will reset the conversation context but free up memory\n */\n async clearCache(): Promise<void> {\n if (this.chromeBackend) {\n await this.chromeBackend.reset();\n }\n }\n\n /**\n * Check memory usage and cleanup if needed\n * @param thresholdGB Memory threshold in GB (default: 8)\n * @returns true if cleanup was performed\n */\n async checkMemoryAndCleanup(thresholdGB = 8): Promise<boolean> {\n if (!this.chromeBackend) {\n return false;\n }\n return this.chromeBackend.checkMemoryAndCleanup(thresholdGB);\n }\n\n // ============================================\n // Text Generation\n // ============================================\n\n /**\n * Generate text\n */\n async generate(prompt: string, options: GenerateOptions = {}): Promise<GenerateResult> {\n if (!this.isLoaded()) {\n // Auto-load default model\n await this.loadModel(this.config.model || \"qwen3-0.6b\");\n }\n\n const {\n maxTokens = 256,\n temperature = 0.7,\n topP = 0.9,\n topK = 50,\n thinking = false,\n system,\n } = options;\n\n const startTime = performance.now();\n\n try {\n let rawText = \"\";\n\n if (this.chromeBackend) {\n // Chrome backend approach (for Node.js WebGPU via Chrome)\n try {\n rawText = await this.chromeBackend.generate(prompt, {\n maxTokens,\n temperature,\n topP,\n topK,\n thinking,\n system,\n // Wrap onToken to match Gerbil's simpler signature\n onToken: options.onToken ? (t) => options.onToken!(t.text) : undefined,\n });\n } catch (chromeErr: any) {\n // If Chrome died (OOM, crash), fall back to CPU silently\n if (chromeErr?.message === \"CHROME_BACKEND_DEAD\" || !this.chromeBackend?.isAlive()) {\n await this.chromeBackend?.dispose().catch(() => {});\n this.chromeBackend = null;\n this._deviceMode = \"cpu\";\n // Load CPU fallback and retry\n const modelPath = this.currentModel || \"qwen3-0.6b\";\n this.generator = (await pipeline(\"text-generation\", modelPath, {\n dtype: \"q4\",\n device: \"cpu\",\n } as any)) as TextGenerationPipeline;\n // Retry with CPU\n return this.generate(prompt, options);\n }\n throw chromeErr;\n }\n } else if (this.useDirect && this.model && this.tokenizer) {\n // Direct model approach (for browser WebGPU)\n const messages = this.buildMessages(prompt, { ...options, thinking });\n\n const inputs = (this.tokenizer as any).apply_chat_template(messages, {\n add_generation_prompt: true,\n return_dict: true,\n enable_thinking: thinking, // Qwen3 thinking mode\n });\n\n const output = await this.model.generate({\n ...inputs,\n max_new_tokens: maxTokens,\n temperature: temperature > 0 ? temperature : undefined,\n top_p: topP,\n top_k: topK,\n do_sample: temperature > 0,\n });\n\n // Get input length to extract only generated tokens\n const inputLength = inputs.input_ids.dims?.[1] || inputs.input_ids.data?.length || 0;\n\n // Slice output tensor to get only new tokens (skip prompt)\n const outputTokens = output.slice(null, [inputLength, null]);\n const decoded = this.tokenizer.batch_decode(outputTokens, {\n skip_special_tokens: true,\n });\n\n rawText = decoded[0] || \"\";\n\n // If we still have prompt artifacts, extract assistant response\n if (rawText.toLowerCase().includes(\"assistant\")) {\n const match = rawText.match(/assistant[:\\s]*([\\s\\S]*)/i);\n if (match) {\n rawText = match[1].trim();\n }\n }\n } else if (this.generator) {\n // Pipeline approach (for Node.js / CPU / WASM)\n const formattedPrompt = this.formatPrompt(prompt, { ...options, thinking });\n\n const output = await this.generator(formattedPrompt, {\n max_new_tokens: maxTokens,\n temperature,\n top_p: topP,\n top_k: topK,\n do_sample: temperature > 0,\n return_full_text: false,\n });\n\n // Extract text from pipeline output\n if (Array.isArray(output) && output[0]) {\n const result = output[0] as any;\n if (Array.isArray(result.generated_text)) {\n const last = result.generated_text.at(-1);\n rawText = last?.content || \"\";\n } else {\n rawText = result.generated_text || \"\";\n }\n }\n } else {\n throw new Error(\"No model loaded\");\n }\n\n const endTime = performance.now();\n const totalTime = endTime - startTime;\n\n rawText = this.cleanOutput(rawText);\n\n // Always parse thinking to strip <think> tags from output\n // (model may generate them even without thinking mode enabled)\n const { thinking: thinkingText, response } = this.parseThinking(rawText);\n\n // Only include thinking in result if mode was enabled\n const finalThinking = thinking ? thinkingText : undefined;\n\n const tokensGenerated = Math.ceil(response.length / 4);\n\n // Update stats\n this.stats.prompts++;\n this.stats.tokensOut += tokensGenerated;\n this.stats.totalTime += totalTime;\n this.stats.avgSpeed = (this.stats.tokensOut / this.stats.totalTime) * 1000;\n\n return {\n text: response,\n thinking: finalThinking,\n tokensGenerated,\n tokensPerSecond: (tokensGenerated / totalTime) * 1000,\n totalTime,\n finishReason: \"stop\",\n provider: \"local\",\n cached: false,\n };\n } catch (_error) {\n return {\n text: \"\",\n tokensGenerated: 0,\n tokensPerSecond: 0,\n totalTime: performance.now() - startTime,\n finishReason: \"error\",\n provider: \"local\",\n cached: false,\n };\n }\n }\n\n /**\n * Stream text generation (simulated token-by-token)\n *\n * Note: Yields the raw output including <think> tags if thinking mode is enabled.\n * The final result has parsed thinking separated out.\n */\n async *stream(\n prompt: string,\n options: GenerateOptions = {},\n ): AsyncGenerator<string, GenerateResult, unknown> {\n if (!this.isLoaded()) {\n await this.loadModel(this.config.model || \"qwen3-0.6b\");\n }\n\n const startTime = performance.now();\n\n // For Chrome backend, use real streaming via onToken callback\n if (this.chromeBackend) {\n let fullText = \"\";\n const tokenQueue: string[] = [];\n let resolveNext: ((value: string | null) => void) | null = null;\n let done = false;\n\n // Start generation with streaming callback\n const generatePromise = this.chromeBackend\n .generate(prompt, {\n ...options,\n onToken: (token) => {\n fullText += token.text;\n if (resolveNext) {\n resolveNext(token.text);\n resolveNext = null;\n } else {\n tokenQueue.push(token.text);\n }\n },\n })\n .then(() => {\n done = true;\n if (resolveNext) {\n resolveNext(null);\n }\n })\n .catch((err) => {\n done = true;\n if (resolveNext) {\n resolveNext(null);\n }\n throw err;\n });\n\n // Yield tokens as they arrive\n while (!done || tokenQueue.length > 0) {\n if (tokenQueue.length > 0) {\n const token = tokenQueue.shift()!;\n yield token;\n options.onToken?.(token);\n } else if (!done) {\n const token = await new Promise<string | null>((resolve) => {\n resolveNext = resolve;\n });\n if (token) {\n yield token;\n options.onToken?.(token);\n }\n }\n }\n\n await generatePromise;\n\n const { thinking: thinkingText, response } = this.parseThinking(fullText);\n const tokensGenerated = Math.ceil(response.length / 4);\n const totalTime = performance.now() - startTime;\n\n return {\n text: response,\n thinking: options.thinking ? thinkingText : undefined,\n tokensGenerated,\n totalTime,\n tokensPerSecond: (tokensGenerated / totalTime) * 1000,\n finishReason: \"stop\" as const,\n };\n }\n\n // For pipeline/direct model, use fake streaming (generate then yield)\n const result = await this.generateRaw(prompt, options);\n\n // Yield word by word for more accurate token simulation\n // (actual tokens average ~4 chars, words are a reasonable approximation)\n const words = result.rawText.split(/(\\s+)/);\n for (const word of words) {\n if (word) {\n yield word;\n options.onToken?.(word);\n }\n }\n\n return result.result;\n }\n\n /**\n * Internal: Generate with raw text access for streaming\n */\n private async generateRaw(\n prompt: string,\n options: GenerateOptions = {},\n ): Promise<{ rawText: string; result: GenerateResult }> {\n const { maxTokens = 256, temperature = 0.7, topP = 0.9, topK = 50, thinking = false } = options;\n\n const startTime = performance.now();\n const formattedPrompt = this.formatPrompt(prompt, { ...options, thinking });\n\n try {\n const output = await this.generator!(formattedPrompt, {\n max_new_tokens: maxTokens,\n temperature,\n top_p: topP,\n top_k: topK,\n do_sample: temperature > 0,\n return_full_text: false,\n });\n\n const endTime = performance.now();\n const totalTime = endTime - startTime;\n\n // Extract text from output\n let rawText = \"\";\n if (Array.isArray(output) && output[0]) {\n const result = output[0] as any;\n if (Array.isArray(result.generated_text)) {\n const last = result.generated_text.at(-1);\n rawText = last?.content || \"\";\n } else {\n rawText = result.generated_text || \"\";\n }\n }\n\n rawText = this.cleanOutput(rawText);\n const { thinking: thinkingText, response } = this.parseThinking(rawText);\n const finalThinking = thinking ? thinkingText : undefined;\n const tokensGenerated = Math.ceil(response.length / 4);\n\n // Update stats\n this.stats.prompts++;\n this.stats.tokensOut += tokensGenerated;\n this.stats.totalTime += totalTime;\n this.stats.avgSpeed = (this.stats.tokensOut / this.stats.totalTime) * 1000;\n\n return {\n rawText,\n result: {\n text: response,\n thinking: finalThinking,\n tokensGenerated,\n tokensPerSecond: (tokensGenerated / totalTime) * 1000,\n totalTime,\n finishReason: \"stop\",\n provider: \"local\",\n cached: false,\n },\n };\n } catch (_error) {\n return {\n rawText: \"\",\n result: {\n text: \"\",\n tokensGenerated: 0,\n tokensPerSecond: 0,\n totalTime: performance.now() - startTime,\n finishReason: \"error\",\n provider: \"local\",\n cached: false,\n },\n };\n }\n }\n\n // ============================================\n // Structured Output (JSON)\n // ============================================\n\n /**\n * Generate structured JSON output\n */\n async json<T>(prompt: string, options: JsonOptions<T>): Promise<T> {\n const { schema, retries = 3, temperature = 0.3 } = options;\n\n const systemPrompt = `You are a JSON generator. You MUST respond with valid JSON only.\nNo explanations, no markdown, no code blocks. Just pure JSON.\nThe JSON must conform to this schema: ${JSON.stringify(zodToJsonSchema(schema))}`;\n\n for (let attempt = 0; attempt < retries; attempt++) {\n const result = await this.generate(prompt, {\n system: options.system || systemPrompt,\n temperature,\n maxTokens: 1000,\n });\n\n try {\n // Try to extract JSON from response\n const jsonStr = extractJson(result.text);\n const parsed = JSON.parse(jsonStr);\n const validated = schema.parse(parsed);\n return validated;\n } catch (error) {\n if (attempt === retries - 1) {\n throw new Error(`Failed to generate valid JSON after ${retries} attempts: ${error}`);\n }\n }\n }\n\n throw new Error(\"Failed to generate valid JSON\");\n }\n\n // ============================================\n // Embeddings\n // ============================================\n\n /**\n * Generate embeddings\n */\n async embed(text: string, options: EmbedOptions = {}): Promise<EmbedResult> {\n if (!this.embedder) {\n // Load embedding model\n const model = options.model || \"Xenova/all-MiniLM-L6-v2\";\n this.embedder = (await pipeline(\"feature-extraction\", model)) as FeatureExtractionPipeline;\n }\n\n const startTime = performance.now();\n const output = await this.embedder(text, {\n pooling: \"mean\",\n normalize: options.normalize !== false,\n });\n\n const vector = Array.from(output.data as Float32Array);\n\n return {\n vector,\n text,\n totalTime: performance.now() - startTime,\n };\n }\n\n /**\n * Generate embeddings for multiple texts\n */\n async embedBatch(texts: string[], options: EmbedOptions = {}): Promise<EmbedResult[]> {\n const results: EmbedResult[] = [];\n for (const text of texts) {\n results.push(await this.embed(text, options));\n }\n return results;\n }\n\n // ============================================\n // Stats & Info\n // ============================================\n\n /**\n * Get session stats\n */\n getStats(): SessionStats {\n return { ...this.stats };\n }\n\n /**\n * Get system info\n */\n getInfo(): SystemInfo {\n return {\n version: \"1.0.0\",\n model: this.modelConfig,\n device: {\n backend: \"transformers.js\",\n gpu: null, // TODO: detect GPU\n vram: null,\n status: this.isLoaded() ? \"ready\" : \"loading\",\n },\n context: {\n max: this.modelConfig?.contextLength || 0,\n used: 0,\n available: this.modelConfig?.contextLength || 0,\n },\n cache: {\n location: \"~/.gerbil/models\",\n size: \"0 MB\",\n modelCount: 0,\n },\n };\n }\n\n /**\n * Reset stats\n */\n resetStats(): void {\n this.stats = {\n prompts: 0,\n tokensIn: 0,\n tokensOut: 0,\n avgSpeed: 0,\n totalTime: 0,\n cacheHits: 0,\n cacheMisses: 0,\n };\n }\n\n // ============================================\n // Cleanup\n // ============================================\n\n /**\n * Dispose of resources\n */\n async dispose(): Promise<void> {\n // Clean up Chrome backend first (most important to release resources)\n if (this.chromeBackend) {\n try {\n await this.chromeBackend.dispose();\n } catch {\n // Ignore errors during cleanup\n }\n this.chromeBackend = null;\n }\n\n if (this.generator) {\n if (typeof (this.generator as any).dispose === \"function\") {\n try {\n await (this.generator as any).dispose();\n } catch {\n // Ignore errors during cleanup\n }\n }\n this.generator = null;\n }\n if (this.embedder) {\n if (typeof (this.embedder as any).dispose === \"function\") {\n try {\n await (this.embedder as any).dispose();\n } catch {\n // Ignore errors during cleanup\n }\n }\n this.embedder = null;\n }\n this.currentModel = null;\n this.modelConfig = null;\n }\n\n // ============================================\n // Private Methods\n // ============================================\n\n private formatPrompt(prompt: string, options: GenerateOptions): string {\n const system = options.system || \"You are a helpful assistant.\";\n const isQwen = this.currentModel?.includes(\"qwen\");\n\n if (options.thinking && this.modelConfig?.supportsThinking) {\n const thinkSystem = `${system}\\n\\nThink step-by-step before answering. Wrap your reasoning in <think></think> tags, then provide your answer.`;\n return `<|im_start|>system\\n${thinkSystem}<|im_end|>\\n<|im_start|>user\\n${prompt}<|im_end|>\\n<|im_start|>assistant\\n`;\n }\n\n if (isQwen) {\n return `<|im_start|>system\\n${system}<|im_end|>\\n<|im_start|>user\\n${prompt} /no_think<|im_end|>\\n<|im_start|>assistant\\n`;\n }\n\n return `<|im_start|>system\\n${system}<|im_end|>\\n<|im_start|>user\\n${prompt}<|im_end|>\\n<|im_start|>assistant\\n`;\n }\n\n private buildMessages(\n prompt: string,\n options: GenerateOptions,\n ): Array<{ role: string; content: string }> {\n const system = options.system || \"You are a helpful assistant.\";\n const messages: Array<{ role: string; content: string }> = [];\n\n // For direct model (WebGPU), enable_thinking is passed to apply_chat_template\n // so we don't need to add /no_think or modify the system prompt\n messages.push({ role: \"system\", content: system });\n messages.push({ role: \"user\", content: prompt });\n\n return messages;\n }\n\n private parseThinking(text: string): {\n thinking?: string;\n response: string;\n } {\n // Handle complete <think>...</think> blocks\n const match = text.match(/<think>([\\s\\S]*?)<\\/think>/);\n if (match) {\n const thinking = match[1].trim();\n const response = text.replace(/<think>[\\s\\S]*?<\\/think>/, \"\").trim();\n return { thinking, response };\n }\n\n // Handle unclosed <think> tags (model stopped mid-thought)\n const unclosedMatch = text.match(/<think>([\\s\\S]*)$/);\n if (unclosedMatch) {\n const thinking = unclosedMatch[1].trim();\n const response = text.replace(/<think>[\\s\\S]*$/, \"\").trim();\n return { thinking: thinking || undefined, response };\n }\n\n // Handle any remaining think tags\n const response = text.replace(/<\\/?think>/g, \"\").trim();\n return { response };\n }\n\n private cleanOutput(text: string): string {\n return (\n text\n .replace(/<\\|im_end\\|>/g, \"\")\n .replace(/<\\|im_start\\|>/g, \"\")\n .replace(/<\\|endoftext\\|>/g, \"\")\n .replace(/<\\/s>/g, \"\")\n // Clean up artifacts from direct model output\n .replace(/^\\/no_think\\s*/i, \"\")\n .replace(/^assistant\\s*/i, \"\")\n .replace(/^\\s*\\/no_think\\s*/gim, \"\")\n .replace(/^\\s*assistant\\s*/gim, \"\")\n // Clean up role markers that might appear\n .replace(/^(system|user|assistant):\\s*/gim, \"\")\n .trim()\n );\n }\n}\n\nexport default Gerbil;\n"],"mappings":";;;;;;;;AAeA,MAAMA,aAAWC;AAGjB,SAAS,sBAAyB,IAAkC;CAClE,MAAM,eAAe,QAAQ;AAC7B,SAAQ,QAAQ,GAAG,SAAgB;EACjC,MAAM,MAAM,KAAK,IAAI,YAAY,IAAI;AAErC,MAAI,IAAI,SAAS,iBAAiB,IAAI,IAAI,SAAS,sBAAsB,CACvE;AAEF,eAAa,MAAM,SAAS,KAAK;;AAGnC,QAAO,IAAI,CAAC,cAAc;AACxB,UAAQ,OAAO;GACf;;AAyBJ,MAAM,YAAY,OAAO,WAAW;AACpC,IAAI,mBAAmB,CAAC;AACxB,IAAI,kBAAkB;AAOtB,IAAI,oBAAoB;AACxB,IAAI,kBAAkB;;;;;AAMtB,eAAe,iBAAmC;AAChD,KAAI,kBACF,QAAO;AAET,qBAAoB;AAGpB,KAAI,OAAO,WAAW,aAAa;AACjC,oBAAkB,SAAS;AAC3B,SAAO;;AAKT,KAAI;EAGF,MAAM,EAAE,QAAQ,YADK,MADC,IAAI,SAAS,aAAa,2BAA2B,CAClC,SAAS;AAIlD,SAAO,OAAO,YAAY,QAAQ;AAGlC,MAAI,CAAE,WAAmB,UACvB,CAAC,WAAmB,YAAY,EAAE;AAEpC,EAAC,WAAmB,UAAU,MAAM,OAAO,EAAE,CAAC;AAE9C,oBAAkB;SACZ;AAEN,oBAAkB;;AAGpB,QAAO;;AAMT,IAAa,SAAb,MAAoB;CAClB,AAAQ,YAA2C;CACnD,AAAQ,YAAwC;CAChD,AAAQ,QAAa;CACrB,AAAQ,WAA6C;CACrD,AAAQ,eAA8B;CACtC,AAAQ,cAAkC;CAC1C,AAAiB;CACjB,AAAQ;CACR,AAAQ,YAAY;CACpB,AAAQ,gBAA6C;CACrD,AAAQ,cAAyC;CAEjD,YAAY,SAAuB,EAAE,EAAE;AACrC,OAAK,SAAS;AACd,OAAK,QAAQ;GACX,SAAS;GACT,UAAU;GACV,WAAW;GACX,UAAU;GACV,WAAW;GACX,WAAW;GACX,aAAa;GACd;;CAOH,OAAO,aAA4B;AACjC,SAAO,OAAO,OAAO,eAAe;;CAGtC,OAAO,SAAS,SAA0C;AACxD,SAAO,eAAe;;;;;;;;;;;;;;;;;CAsBxB,MAAM,UAAU,UAAU,cAAc,UAAuB,EAAE,EAAiB;AAEhF,QAAM,gBAAgB;EAEtB,MAAM,SAAS,aAAa,QAAQ;EACpC,MAAM,EAAE,YAAY,SAAS,QAAQ,OAAO,cAAc;EAG1D,IAAI,SAAS,eAAe,QAAQ;AACpC,MAAI,CAAC,OACH,UAAS,0BAA0B,SAAS,OAAO,KAAK;AAG1D,eAAa,EAAE,QAAQ,WAAW,QAAQ,MAAM,CAAC;EAKjD,MAAMC,cAAY,OAAO,WAAW;EACpC,MAAM,iBAAiBA,cAAY,SAAS;EAC5C,IAAIC,WAAsC;AAC1C,MAAI,WAAW,YAAY,WAAW,SAAS,WAAW,OACxD,YAAW;EAIb,MAAM,QAAQ,cAAc,aAAa,WAAW,UAAU;EAG9D,IAAI,YAAY;EAChB,IAAI,WAAW;EACf,IAAI,UAAU;EAEd,MAAM,oBAAoB,aAAkB;AAC1C,OAAI,CAAC,UACH;AAGF,OAAI,SAAS,WAAW,cAAc,SAAS,MAAM;IACnD,MAAM,MAAM,KAAK,MAAM,SAAS,YAAY,EAAE;AAE9C,QAAI,SAAS,SAAS,YAAY,OAAO,UAAU,GAAG;AACpD,gBAAW,SAAS;AACpB,eAAU;AACV,kBAAa;MACX,QAAQ,eAAe,SAAS;MAChC,UAAU;MACV,MAAM,SAAS;MAChB,CAAC;;;;AAKR,MAAI;AAGF,OAAID,eAAa,aAAa,UAAU;AACtC,iBAAa,EAAE,QAAQ,wBAAwB,CAAC;AAChD,SAAK,YAAa,MAAM,4BACtB,cAAc,gBAAgB,OAAO,MAAM,EACzC,mBAAmB,kBACpB,CAAC,CACH;AAED,iBAAa,EAAE,QAAQ,oBAAoB,CAAC;AAC5C,SAAK,QAAQ,MAAM,4BACjB,qBAAqB,gBAAgB,OAAO,MAAM;KAChD;KACA,QAAQ;KACR,mBAAmB;KACpB,CAAC,CACH;AAED,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,iBAAa,EAAE,QAAQ,mBAAmB,CAAC;cAClC,CAACA,eAAa,aAAa,UAAU;AAE9C,iBAAa,EAAE,QAAQ,qCAAqC,CAAC;IAG7D,MAAM,EAAE,qBAAqB,MAAM,OAAO;AAC1C,SAAK,gBAAgB,MAAM,iBAAiB,OAAO;KACjD,SAAS,OAAO;KAChB;KACD,CAAC;AAEF,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;UAEd;IAEL,MAAM,kBAAkB;KACtB;KACA,QAAQ;KACR,mBAAmB;KACpB;AACD,SAAK,YAAa,MAAM,4BACtBF,WAAS,mBAAmB,OAAO,MAAM,gBAAuB,CACjE;AAED,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,iBAAa,EAAE,QAAQ,UAAU,SAAS,aAAa,CAAC,KAAK,CAAC;;WAEzD,KAAK;AAEZ,OAAI,aAAa,gBAAgB;AAC/B,iBAAa,EAAE,QAAQ,SAAS,eAAe,aAAa,CAAC,MAAM,CAAC;AAGpE,QAAI,KAAK,eAAe;AACtB,WAAM,KAAK,cAAc,SAAS;AAClC,UAAK,gBAAgB;;AAIvB,SAAK,YAAa,MAAM,4BACtBA,WAAS,mBAAmB,OAAO,MAAM;KACvC,OAAO;KACP,QAAQ;KACR,mBAAmB;KACpB,CAAQ,CACV;AAED,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,iBAAa,EAAE,QAAQ,UAAU,eAAe,aAAa,CAAC,KAAK,CAAC;SAEpE,OAAM;;;;;;CAQZ,WAAoB;AAClB,SACE,KAAK,cAAc,QAClB,KAAK,aAAa,KAAK,UAAU,QAClC,KAAK,kBAAkB;;;;;CAO3B,eAAmC;AACjC,SAAO,KAAK;;;;;CAMd,gBAA2C;AACzC,SAAO,KAAK;;;;;CAMd,WAAmB;AAEjB,SAAO,KAAK,gBAAgB,WAAW,UAAU;;;;;CAMnD,kBAKS;AACP,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,WAAW;;;;;;CAOvC,MAAM,kBAA+E;AACnF,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,gBAAgB;;;;;CAM5C,MAAM,iBAA2F;AAC/F,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,gBAAgB;;;;;;CAO5C,MAAM,aAA4B;AAChC,MAAI,KAAK,cACP,OAAM,KAAK,cAAc,OAAO;;;;;;;CASpC,MAAM,sBAAsB,cAAc,GAAqB;AAC7D,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,sBAAsB,YAAY;;;;;CAU9D,MAAM,SAAS,QAAgB,UAA2B,EAAE,EAA2B;AACrF,MAAI,CAAC,KAAK,UAAU,CAElB,OAAM,KAAK,UAAU,KAAK,OAAO,SAAS,aAAa;EAGzD,MAAM,EACJ,YAAY,KACZ,cAAc,IACd,OAAO,IACP,OAAO,IACP,WAAW,OACX,WACE;EAEJ,MAAM,YAAY,YAAY,KAAK;AAEnC,MAAI;GACF,IAAI,UAAU;AAEd,OAAI,KAAK,cAEP,KAAI;AACF,cAAU,MAAM,KAAK,cAAc,SAAS,QAAQ;KAClD;KACA;KACA;KACA;KACA;KACA;KAEA,SAAS,QAAQ,WAAW,MAAM,QAAQ,QAAS,EAAE,KAAK,GAAG;KAC9D,CAAC;YACKI,WAAgB;AAEvB,QAAI,WAAW,YAAY,yBAAyB,CAAC,KAAK,eAAe,SAAS,EAAE;AAClF,WAAM,KAAK,eAAe,SAAS,CAAC,YAAY,GAAG;AACnD,UAAK,gBAAgB;AACrB,UAAK,cAAc;AAGnB,UAAK,YAAa,MAAMJ,WAAS,mBADf,KAAK,gBAAgB,cACwB;MAC7D,OAAO;MACP,QAAQ;MACT,CAAQ;AAET,YAAO,KAAK,SAAS,QAAQ,QAAQ;;AAEvC,UAAM;;YAEC,KAAK,aAAa,KAAK,SAAS,KAAK,WAAW;IAEzD,MAAM,WAAW,KAAK,cAAc,QAAQ;KAAE,GAAG;KAAS;KAAU,CAAC;IAErE,MAAM,SAAU,KAAK,UAAkB,oBAAoB,UAAU;KACnE,uBAAuB;KACvB,aAAa;KACb,iBAAiB;KAClB,CAAC;IAEF,MAAM,SAAS,MAAM,KAAK,MAAM,SAAS;KACvC,GAAG;KACH,gBAAgB;KAChB,aAAa,cAAc,IAAI,cAAc;KAC7C,OAAO;KACP,OAAO;KACP,WAAW,cAAc;KAC1B,CAAC;IAGF,MAAM,cAAc,OAAO,UAAU,OAAO,MAAM,OAAO,UAAU,MAAM,UAAU;IAGnF,MAAM,eAAe,OAAO,MAAM,MAAM,CAAC,aAAa,KAAK,CAAC;AAK5D,cAJgB,KAAK,UAAU,aAAa,cAAc,EACxD,qBAAqB,MACtB,CAAC,CAEgB,MAAM;AAGxB,QAAI,QAAQ,aAAa,CAAC,SAAS,YAAY,EAAE;KAC/C,MAAM,QAAQ,QAAQ,MAAM,4BAA4B;AACxD,SAAI,MACF,WAAU,MAAM,GAAG,MAAM;;cAGpB,KAAK,WAAW;IAEzB,MAAM,kBAAkB,KAAK,aAAa,QAAQ;KAAE,GAAG;KAAS;KAAU,CAAC;IAE3E,MAAM,SAAS,MAAM,KAAK,UAAU,iBAAiB;KACnD,gBAAgB;KAChB;KACA,OAAO;KACP,OAAO;KACP,WAAW,cAAc;KACzB,kBAAkB;KACnB,CAAC;AAGF,QAAI,MAAM,QAAQ,OAAO,IAAI,OAAO,IAAI;KACtC,MAAM,SAAS,OAAO;AACtB,SAAI,MAAM,QAAQ,OAAO,eAAe,CAEtC,WADa,OAAO,eAAe,GAAG,GAAG,EACzB,WAAW;SAE3B,WAAU,OAAO,kBAAkB;;SAIvC,OAAM,IAAI,MAAM,kBAAkB;GAIpC,MAAM,YADU,YAAY,KAAK,GACL;AAE5B,aAAU,KAAK,YAAY,QAAQ;GAInC,MAAM,EAAE,UAAU,cAAc,aAAa,KAAK,cAAc,QAAQ;GAGxE,MAAM,gBAAgB,WAAW,eAAe;GAEhD,MAAM,kBAAkB,KAAK,KAAK,SAAS,SAAS,EAAE;AAGtD,QAAK,MAAM;AACX,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,WAAY,KAAK,MAAM,YAAY,KAAK,MAAM,YAAa;AAEtE,UAAO;IACL,MAAM;IACN,UAAU;IACV;IACA,iBAAkB,kBAAkB,YAAa;IACjD;IACA,cAAc;IACd,UAAU;IACV,QAAQ;IACT;WACM,QAAQ;AACf,UAAO;IACL,MAAM;IACN,iBAAiB;IACjB,iBAAiB;IACjB,WAAW,YAAY,KAAK,GAAG;IAC/B,cAAc;IACd,UAAU;IACV,QAAQ;IACT;;;;;;;;;CAUL,OAAO,OACL,QACA,UAA2B,EAAE,EACoB;AACjD,MAAI,CAAC,KAAK,UAAU,CAClB,OAAM,KAAK,UAAU,KAAK,OAAO,SAAS,aAAa;EAGzD,MAAM,YAAY,YAAY,KAAK;AAGnC,MAAI,KAAK,eAAe;GACtB,IAAI,WAAW;GACf,MAAMK,aAAuB,EAAE;GAC/B,IAAIC,cAAuD;GAC3D,IAAI,OAAO;GAGX,MAAM,kBAAkB,KAAK,cAC1B,SAAS,QAAQ;IAChB,GAAG;IACH,UAAU,UAAU;AAClB,iBAAY,MAAM;AAClB,SAAI,aAAa;AACf,kBAAY,MAAM,KAAK;AACvB,oBAAc;WAEd,YAAW,KAAK,MAAM,KAAK;;IAGhC,CAAC,CACD,WAAW;AACV,WAAO;AACP,QAAI,YACF,aAAY,KAAK;KAEnB,CACD,OAAO,QAAQ;AACd,WAAO;AACP,QAAI,YACF,aAAY,KAAK;AAEnB,UAAM;KACN;AAGJ,UAAO,CAAC,QAAQ,WAAW,SAAS,EAClC,KAAI,WAAW,SAAS,GAAG;IACzB,MAAM,QAAQ,WAAW,OAAO;AAChC,UAAM;AACN,YAAQ,UAAU,MAAM;cACf,CAAC,MAAM;IAChB,MAAM,QAAQ,MAAM,IAAI,SAAwB,YAAY;AAC1D,mBAAc;MACd;AACF,QAAI,OAAO;AACT,WAAM;AACN,aAAQ,UAAU,MAAM;;;AAK9B,SAAM;GAEN,MAAM,EAAE,UAAU,cAAc,aAAa,KAAK,cAAc,SAAS;GACzE,MAAM,kBAAkB,KAAK,KAAK,SAAS,SAAS,EAAE;GACtD,MAAM,YAAY,YAAY,KAAK,GAAG;AAEtC,UAAO;IACL,MAAM;IACN,UAAU,QAAQ,WAAW,eAAe;IAC5C;IACA;IACA,iBAAkB,kBAAkB,YAAa;IACjD,cAAc;IACf;;EAIH,MAAM,SAAS,MAAM,KAAK,YAAY,QAAQ,QAAQ;EAItD,MAAM,QAAQ,OAAO,QAAQ,MAAM,QAAQ;AAC3C,OAAK,MAAM,QAAQ,MACjB,KAAI,MAAM;AACR,SAAM;AACN,WAAQ,UAAU,KAAK;;AAI3B,SAAO,OAAO;;;;;CAMhB,MAAc,YACZ,QACA,UAA2B,EAAE,EACyB;EACtD,MAAM,EAAE,YAAY,KAAK,cAAc,IAAK,OAAO,IAAK,OAAO,IAAI,WAAW,UAAU;EAExF,MAAM,YAAY,YAAY,KAAK;EACnC,MAAM,kBAAkB,KAAK,aAAa,QAAQ;GAAE,GAAG;GAAS;GAAU,CAAC;AAE3E,MAAI;GACF,MAAM,SAAS,MAAM,KAAK,UAAW,iBAAiB;IACpD,gBAAgB;IAChB;IACA,OAAO;IACP,OAAO;IACP,WAAW,cAAc;IACzB,kBAAkB;IACnB,CAAC;GAGF,MAAM,YADU,YAAY,KAAK,GACL;GAG5B,IAAI,UAAU;AACd,OAAI,MAAM,QAAQ,OAAO,IAAI,OAAO,IAAI;IACtC,MAAM,SAAS,OAAO;AACtB,QAAI,MAAM,QAAQ,OAAO,eAAe,CAEtC,WADa,OAAO,eAAe,GAAG,GAAG,EACzB,WAAW;QAE3B,WAAU,OAAO,kBAAkB;;AAIvC,aAAU,KAAK,YAAY,QAAQ;GACnC,MAAM,EAAE,UAAU,cAAc,aAAa,KAAK,cAAc,QAAQ;GACxE,MAAM,gBAAgB,WAAW,eAAe;GAChD,MAAM,kBAAkB,KAAK,KAAK,SAAS,SAAS,EAAE;AAGtD,QAAK,MAAM;AACX,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,WAAY,KAAK,MAAM,YAAY,KAAK,MAAM,YAAa;AAEtE,UAAO;IACL;IACA,QAAQ;KACN,MAAM;KACN,UAAU;KACV;KACA,iBAAkB,kBAAkB,YAAa;KACjD;KACA,cAAc;KACd,UAAU;KACV,QAAQ;KACT;IACF;WACM,QAAQ;AACf,UAAO;IACL,SAAS;IACT,QAAQ;KACN,MAAM;KACN,iBAAiB;KACjB,iBAAiB;KACjB,WAAW,YAAY,KAAK,GAAG;KAC/B,cAAc;KACd,UAAU;KACV,QAAQ;KACT;IACF;;;;;;CAWL,MAAM,KAAQ,QAAgB,SAAqC;EACjE,MAAM,EAAE,QAAQ,UAAU,GAAG,cAAc,OAAQ;EAEnD,MAAM,eAAe;;wCAEe,KAAK,UAAU,gBAAgB,OAAO,CAAC;AAE3E,OAAK,IAAI,UAAU,GAAG,UAAU,SAAS,WAAW;GAClD,MAAM,SAAS,MAAM,KAAK,SAAS,QAAQ;IACzC,QAAQ,QAAQ,UAAU;IAC1B;IACA,WAAW;IACZ,CAAC;AAEF,OAAI;IAEF,MAAM,UAAU,YAAY,OAAO,KAAK;IACxC,MAAM,SAAS,KAAK,MAAM,QAAQ;AAElC,WADkB,OAAO,MAAM,OAAO;YAE/B,OAAO;AACd,QAAI,YAAY,UAAU,EACxB,OAAM,IAAI,MAAM,uCAAuC,QAAQ,aAAa,QAAQ;;;AAK1F,QAAM,IAAI,MAAM,gCAAgC;;;;;CAUlD,MAAM,MAAM,MAAc,UAAwB,EAAE,EAAwB;AAC1E,MAAI,CAAC,KAAK,SAGR,MAAK,WAAY,MAAMN,WAAS,sBADlB,QAAQ,SAAS,0BAC6B;EAG9D,MAAM,YAAY,YAAY,KAAK;EACnC,MAAM,SAAS,MAAM,KAAK,SAAS,MAAM;GACvC,SAAS;GACT,WAAW,QAAQ,cAAc;GAClC,CAAC;AAIF,SAAO;GACL,QAHa,MAAM,KAAK,OAAO,KAAqB;GAIpD;GACA,WAAW,YAAY,KAAK,GAAG;GAChC;;;;;CAMH,MAAM,WAAW,OAAiB,UAAwB,EAAE,EAA0B;EACpF,MAAMO,UAAyB,EAAE;AACjC,OAAK,MAAM,QAAQ,MACjB,SAAQ,KAAK,MAAM,KAAK,MAAM,MAAM,QAAQ,CAAC;AAE/C,SAAO;;;;;CAUT,WAAyB;AACvB,SAAO,EAAE,GAAG,KAAK,OAAO;;;;;CAM1B,UAAsB;AACpB,SAAO;GACL,SAAS;GACT,OAAO,KAAK;GACZ,QAAQ;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,QAAQ,KAAK,UAAU,GAAG,UAAU;IACrC;GACD,SAAS;IACP,KAAK,KAAK,aAAa,iBAAiB;IACxC,MAAM;IACN,WAAW,KAAK,aAAa,iBAAiB;IAC/C;GACD,OAAO;IACL,UAAU;IACV,MAAM;IACN,YAAY;IACb;GACF;;;;;CAMH,aAAmB;AACjB,OAAK,QAAQ;GACX,SAAS;GACT,UAAU;GACV,WAAW;GACX,UAAU;GACV,WAAW;GACX,WAAW;GACX,aAAa;GACd;;;;;CAUH,MAAM,UAAyB;AAE7B,MAAI,KAAK,eAAe;AACtB,OAAI;AACF,UAAM,KAAK,cAAc,SAAS;WAC5B;AAGR,QAAK,gBAAgB;;AAGvB,MAAI,KAAK,WAAW;AAClB,OAAI,OAAQ,KAAK,UAAkB,YAAY,WAC7C,KAAI;AACF,UAAO,KAAK,UAAkB,SAAS;WACjC;AAIV,QAAK,YAAY;;AAEnB,MAAI,KAAK,UAAU;AACjB,OAAI,OAAQ,KAAK,SAAiB,YAAY,WAC5C,KAAI;AACF,UAAO,KAAK,SAAiB,SAAS;WAChC;AAIV,QAAK,WAAW;;AAElB,OAAK,eAAe;AACpB,OAAK,cAAc;;CAOrB,AAAQ,aAAa,QAAgB,SAAkC;EACrE,MAAM,SAAS,QAAQ,UAAU;EACjC,MAAM,SAAS,KAAK,cAAc,SAAS,OAAO;AAElD,MAAI,QAAQ,YAAY,KAAK,aAAa,iBAExC,QAAO,uBADa,GAAG,OAAO,iHACY,gCAAgC,OAAO;AAGnF,MAAI,OACF,QAAO,uBAAuB,OAAO,gCAAgC,OAAO;AAG9E,SAAO,uBAAuB,OAAO,gCAAgC,OAAO;;CAG9E,AAAQ,cACN,QACA,SAC0C;EAC1C,MAAM,SAAS,QAAQ,UAAU;EACjC,MAAMC,WAAqD,EAAE;AAI7D,WAAS,KAAK;GAAE,MAAM;GAAU,SAAS;GAAQ,CAAC;AAClD,WAAS,KAAK;GAAE,MAAM;GAAQ,SAAS;GAAQ,CAAC;AAEhD,SAAO;;CAGT,AAAQ,cAAc,MAGpB;EAEA,MAAM,QAAQ,KAAK,MAAM,6BAA6B;AACtD,MAAI,MAGF,QAAO;GAAE,UAFQ,MAAM,GAAG,MAAM;GAEb,UADF,KAAK,QAAQ,4BAA4B,GAAG,CAAC,MAAM;GACvC;EAI/B,MAAM,gBAAgB,KAAK,MAAM,oBAAoB;AACrD,MAAI,eAAe;GACjB,MAAM,WAAW,cAAc,GAAG,MAAM;GACxC,MAAM,WAAW,KAAK,QAAQ,mBAAmB,GAAG,CAAC,MAAM;AAC3D,UAAO;IAAE,UAAU,YAAY;IAAW;IAAU;;AAKtD,SAAO,EAAE,UADQ,KAAK,QAAQ,eAAe,GAAG,CAAC,MAAM,EACpC;;CAGrB,AAAQ,YAAY,MAAsB;AACxC,SACE,KACG,QAAQ,iBAAiB,GAAG,CAC5B,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,oBAAoB,GAAG,CAC/B,QAAQ,UAAU,GAAG,CAErB,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,kBAAkB,GAAG,CAC7B,QAAQ,wBAAwB,GAAG,CACnC,QAAQ,uBAAuB,GAAG,CAElC,QAAQ,mCAAmC,GAAG,CAC9C,MAAM"}
@@ -1,5 +0,0 @@
1
- import "./models-DKULvhOr.mjs";
2
- import "./utils-7vXqtq2Q.mjs";
3
- import { t as Gerbil } from "./gerbil-BfnsFWRE.mjs";
4
-
5
- export { Gerbil };
@@ -1,138 +0,0 @@
1
- import { _ as SystemInfo, a as GenerateOptions, d as LoadOptions, f as ModelConfig, g as SessionStats, n as EmbedOptions, o as GenerateResult, r as EmbedResult, s as GerbilConfig, u as JsonOptions } from "./types-BS1N92Jt.mjs";
2
-
3
- //#region src/core/gerbil.d.ts
4
-
5
- declare class Gerbil {
6
- private generator;
7
- private tokenizer;
8
- private model;
9
- private embedder;
10
- private currentModel;
11
- private modelConfig;
12
- private readonly config;
13
- private stats;
14
- private useDirect;
15
- private chromeBackend;
16
- private _deviceMode;
17
- constructor(config?: GerbilConfig);
18
- static listModels(): ModelConfig[];
19
- static getModel(modelId: string): ModelConfig | undefined;
20
- /**
21
- * Load a model
22
- *
23
- * @example
24
- * ```ts
25
- * // Built-in model
26
- * await g.loadModel("qwen3-0.6b");
27
- *
28
- * // HuggingFace model
29
- * await g.loadModel("hf:microsoft/Phi-3-mini");
30
- *
31
- * // Local model
32
- * await g.loadModel("file:./models/my-model");
33
- * ```
34
- */
35
- loadModel(modelId?: string, options?: LoadOptions): Promise<void>;
36
- /**
37
- * Check if a model is loaded
38
- */
39
- isLoaded(): boolean;
40
- /**
41
- * Get current model info
42
- */
43
- getModelInfo(): ModelConfig | null;
44
- /**
45
- * Get current device mode (webgpu, cpu, or wasm)
46
- */
47
- getDeviceMode(): "webgpu" | "cpu" | "wasm";
48
- /**
49
- * Get dtype used for current model
50
- */
51
- getDtype(): string;
52
- /**
53
- * Get Chrome backend status (if using WebGPU via Chrome)
54
- */
55
- getChromeStatus(): {
56
- pid: number | null;
57
- port: number;
58
- modelId: string;
59
- startedAt: Date | null;
60
- } | null;
61
- /**
62
- * Get Chrome memory usage (if using WebGPU via Chrome)
63
- * Returns JS heap memory in bytes
64
- */
65
- getChromeMemory(): Promise<{
66
- jsHeapUsed: number;
67
- jsHeapTotal: number;
68
- } | null>;
69
- /**
70
- * Get memory usage in GB (if using WebGPU via Chrome)
71
- */
72
- getMemoryUsage(): Promise<{
73
- usedGB: number;
74
- totalGB: number;
75
- usedPercent: number;
76
- } | null>;
77
- /**
78
- * Clear KV cache to free memory
79
- * This will reset the conversation context but free up memory
80
- */
81
- clearCache(): Promise<void>;
82
- /**
83
- * Check memory usage and cleanup if needed
84
- * @param thresholdGB Memory threshold in GB (default: 8)
85
- * @returns true if cleanup was performed
86
- */
87
- checkMemoryAndCleanup(thresholdGB?: number): Promise<boolean>;
88
- /**
89
- * Generate text
90
- */
91
- generate(prompt: string, options?: GenerateOptions): Promise<GenerateResult>;
92
- /**
93
- * Stream text generation (simulated token-by-token)
94
- *
95
- * Note: Yields the raw output including <think> tags if thinking mode is enabled.
96
- * The final result has parsed thinking separated out.
97
- */
98
- stream(prompt: string, options?: GenerateOptions): AsyncGenerator<string, GenerateResult, unknown>;
99
- /**
100
- * Internal: Generate with raw text access for streaming
101
- */
102
- private generateRaw;
103
- /**
104
- * Generate structured JSON output
105
- */
106
- json<T>(prompt: string, options: JsonOptions<T>): Promise<T>;
107
- /**
108
- * Generate embeddings
109
- */
110
- embed(text: string, options?: EmbedOptions): Promise<EmbedResult>;
111
- /**
112
- * Generate embeddings for multiple texts
113
- */
114
- embedBatch(texts: string[], options?: EmbedOptions): Promise<EmbedResult[]>;
115
- /**
116
- * Get session stats
117
- */
118
- getStats(): SessionStats;
119
- /**
120
- * Get system info
121
- */
122
- getInfo(): SystemInfo;
123
- /**
124
- * Reset stats
125
- */
126
- resetStats(): void;
127
- /**
128
- * Dispose of resources
129
- */
130
- dispose(): Promise<void>;
131
- private formatPrompt;
132
- private buildMessages;
133
- private parseThinking;
134
- private cleanOutput;
135
- }
136
- //#endregion
137
- export { Gerbil as t };
138
- //# sourceMappingURL=gerbil-DZ1k3ChC.d.mts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"gerbil-DZ1k3ChC.d.mts","names":[],"sources":["../src/core/gerbil.ts"],"sourcesContent":[],"mappings":";;;;AA6HsB,cAbT,MAAA,CAaS;EAiBC,QAAA,SAAA;EAIa,QAAA,SAAA;EAuBe,QAAA,KAAA;EAAmB,QAAA,QAAA;EAgKpD,QAAA,YAAA;EA0BH,QAAA,WAAA;EAYY,iBAAA,MAAA;EAUD,QAAA,KAAA;EAWJ,QAAA,SAAA;EAW0B,QAAA,aAAA;EAcN,QAAA,WAAA;EAA+B,WAAA,CAAA,MAAA,CAAA,EAhSnD,YAgSmD;EAAR,OAAA,UAAA,CAAA,CAAA,EA/Q1C,WA+Q0C,EAAA;EAqKpD,OAAA,QAAA,CAAA,OAAA,EAAA,MAAA,CAAA,EAhbuB,WAgbvB,GAAA,SAAA;EACe;;;;;;;;;;;;;;;EAgTF,SAAA,CAAA,OAAA,CAAA,EAAA,MAAA,EAAA,OAAA,CAAA,EA1sByB,WA0sBzB,CAAA,EA1sB4C,OA0sB5C,CAAA,IAAA,CAAA;;;;;;;;kBA1iBR;;;;;;;;;;;;;;;;eA0BH;;;;;;qBAYY;;;;;;;oBAUD;;;;;;;;;gBAWJ;;;;;;+CAW0B;;;;qCAcN,kBAAuB,QAAQ;;;;;;;mCAqK5D,kBACR,uBAAuB;;;;;;;;mCA+Ka,YAAY,KAAK,QAAQ;;;;gCAqC7B,eAAoB,QAAQ;;;;wCAyBpB,eAAoB,QAAQ;;;;cAe3D;;;;aAOD;;;;;;;;aA6CM"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"mcp-R8kRLIKb.mjs","names":["handlers: Record<string, (args: any) => Promise<any>>","skills.summarize","skills.explain","skills.review","skills.commit","skills.translate","response: any"],"sources":["../src/integrations/mcp.ts"],"sourcesContent":["/**\n * Gerbil MCP Server\n *\n * Model Context Protocol server for Claude Desktop, Cursor, etc.\n *\n * @example\n * ```bash\n * gerbil serve --mcp\n * ```\n *\n * @example Claude Desktop config\n * ```json\n * {\n * \"mcpServers\": {\n * \"gerbil\": {\n * \"command\": \"npx\",\n * \"args\": [\"-y\", \"gerbil\", \"serve\", \"--mcp\"]\n * }\n * }\n * }\n * ```\n */\n\nimport { Gerbil } from \"../core/gerbil.js\";\nimport type { GerbilConfig } from \"../core/types.js\";\nimport * as skills from \"../skills/index.js\";\n\nexport interface MCPServerOptions extends GerbilConfig {\n /** Port for HTTP transport (default: stdio) */\n port?: number;\n\n /** Tools to expose */\n tools?: string[];\n}\n\n// Tool definitions\nconst TOOL_DEFINITIONS = {\n generate: {\n name: \"gerbil_generate\",\n description: \"Generate text using a local LLM\",\n inputSchema: {\n type: \"object\",\n properties: {\n prompt: { type: \"string\", description: \"The prompt to generate from\" },\n maxTokens: { type: \"number\", description: \"Maximum tokens to generate\" },\n temperature: { type: \"number\", description: \"Sampling temperature (0-2)\" },\n system: { type: \"string\", description: \"System prompt\" },\n thinking: { type: \"boolean\", description: \"Enable thinking mode\" },\n },\n required: [\"prompt\"],\n },\n },\n summarize: {\n name: \"gerbil_summarize\",\n description: \"Summarize content\",\n inputSchema: {\n type: \"object\",\n properties: {\n content: { type: \"string\", description: \"Content to summarize\" },\n length: { type: \"string\", enum: [\"short\", \"medium\", \"long\"] },\n format: { type: \"string\", enum: [\"paragraph\", \"bullets\"] },\n },\n required: [\"content\"],\n },\n },\n explain: {\n name: \"gerbil_explain\",\n description: \"Explain code or concepts\",\n inputSchema: {\n type: \"object\",\n properties: {\n content: { type: \"string\", description: \"Code or concept to explain\" },\n level: { type: \"string\", enum: [\"beginner\", \"intermediate\", \"expert\"] },\n language: { type: \"string\", description: \"Programming language\" },\n },\n required: [\"content\"],\n },\n },\n review: {\n name: \"gerbil_review\",\n description: \"Review code for issues\",\n inputSchema: {\n type: \"object\",\n properties: {\n code: { type: \"string\", description: \"Code to review\" },\n focus: {\n type: \"array\",\n items: { type: \"string\", enum: [\"security\", \"performance\", \"style\", \"bugs\"] },\n },\n },\n required: [\"code\"],\n },\n },\n commit: {\n name: \"gerbil_commit\",\n description: \"Generate a commit message from a diff\",\n inputSchema: {\n type: \"object\",\n properties: {\n diff: { type: \"string\", description: \"Git diff\" },\n type: { type: \"string\", enum: [\"conventional\", \"simple\", \"detailed\"] },\n },\n required: [\"diff\"],\n },\n },\n translate: {\n name: \"gerbil_translate\",\n description: \"Translate text\",\n inputSchema: {\n type: \"object\",\n properties: {\n text: { type: \"string\", description: \"Text to translate\" },\n to: { type: \"string\", description: \"Target language\" },\n from: { type: \"string\", description: \"Source language (optional)\" },\n },\n required: [\"text\", \"to\"],\n },\n },\n embed: {\n name: \"gerbil_embed\",\n description: \"Generate embeddings for text\",\n inputSchema: {\n type: \"object\",\n properties: {\n text: { type: \"string\", description: \"Text to embed\" },\n },\n required: [\"text\"],\n },\n },\n};\n\n/**\n * Create MCP server\n */\nexport async function createMCPServer(options: MCPServerOptions = {}) {\n const g = new Gerbil(options);\n\n // Load model\n await g.loadModel(options.model || \"qwen3-0.6b\");\n\n // Determine which tools to expose\n const enabledTools = options.tools || Object.keys(TOOL_DEFINITIONS);\n\n // Tool handlers\n const handlers: Record<string, (args: any) => Promise<any>> = {\n gerbil_generate: async (args) => {\n const result = await g.generate(args.prompt, {\n maxTokens: args.maxTokens,\n temperature: args.temperature,\n system: args.system,\n thinking: args.thinking,\n });\n return {\n content: [\n {\n type: \"text\",\n text: result.thinking\n ? `Thinking: ${result.thinking}\\n\\nAnswer: ${result.text}`\n : result.text,\n },\n ],\n };\n },\n\n gerbil_summarize: async (args) => {\n const summary = await skills.summarize({\n content: args.content,\n length: args.length,\n format: args.format,\n });\n return { content: [{ type: \"text\", text: summary }] };\n },\n\n gerbil_explain: async (args) => {\n const explanation = await skills.explain({\n content: args.content,\n level: args.level,\n language: args.language,\n });\n return { content: [{ type: \"text\", text: explanation }] };\n },\n\n gerbil_review: async (args) => {\n const reviewResult = await skills.review({\n code: args.code,\n focus: args.focus,\n });\n return { content: [{ type: \"text\", text: reviewResult }] };\n },\n\n gerbil_commit: async (args) => {\n const message = await skills.commit({\n diff: args.diff,\n type: args.type,\n });\n return { content: [{ type: \"text\", text: message }] };\n },\n\n gerbil_translate: async (args) => {\n const translated = await skills.translate({\n text: args.text,\n to: args.to,\n from: args.from,\n });\n return { content: [{ type: \"text\", text: translated }] };\n },\n\n gerbil_embed: async (args) => {\n const result = await g.embed(args.text);\n return {\n content: [\n {\n type: \"text\",\n text: JSON.stringify({\n vector: result.vector.slice(0, 10),\n dimensions: result.vector.length,\n }),\n },\n ],\n };\n },\n };\n\n return {\n // Server info\n info: {\n name: \"gerbil\",\n version: \"1.0.0\",\n description: \"Local LLM inference via Gerbil\",\n },\n\n // List available tools\n listTools: () =>\n enabledTools\n .filter((t) => TOOL_DEFINITIONS[t as keyof typeof TOOL_DEFINITIONS])\n .map((t) => TOOL_DEFINITIONS[t as keyof typeof TOOL_DEFINITIONS]),\n\n // Call a tool\n callTool: async (name: string, args: any) => {\n const handler = handlers[name];\n if (!handler) {\n throw new Error(`Unknown tool: ${name}`);\n }\n return handler(args);\n },\n\n // Get Gerbil instance\n gerbil: g,\n };\n}\n\n/**\n * Start MCP server with stdio transport\n */\nexport async function startMCPServer(options: MCPServerOptions = {}) {\n const server = await createMCPServer(options);\n\n // Simple stdio protocol handler\n const readline = await import(\"readline\");\n const rl = readline.createInterface({\n input: process.stdin,\n output: process.stdout,\n terminal: false,\n });\n\n console.error(\"🐹 Gerbil MCP Server\");\n console.error(` Model: ${options.model || \"qwen3-0.6b\"}`);\n console.error(\n ` Tools: ${server\n .listTools()\n .map((t) => t.name)\n .join(\", \")}`,\n );\n console.error(\" Ready for connections...\");\n\n rl.on(\"line\", async (line) => {\n try {\n const request = JSON.parse(line);\n\n let response: any;\n\n switch (request.method) {\n case \"initialize\":\n response = {\n protocolVersion: \"2024-11-05\",\n serverInfo: server.info,\n capabilities: {\n tools: {},\n },\n };\n break;\n\n case \"tools/list\":\n response = { tools: server.listTools() };\n break;\n\n case \"tools/call\":\n response = await server.callTool(request.params.name, request.params.arguments);\n break;\n\n default:\n response = { error: { code: -32_601, message: \"Method not found\" } };\n }\n\n console.log(JSON.stringify({ jsonrpc: \"2.0\", id: request.id, result: response }));\n } catch (error) {\n console.log(\n JSON.stringify({\n jsonrpc: \"2.0\",\n id: null,\n error: { code: -32_700, message: String(error) },\n }),\n );\n }\n });\n}\n\nexport default { createMCPServer, startMCPServer };\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAoCA,MAAM,mBAAmB;CACvB,UAAU;EACR,MAAM;EACN,aAAa;EACb,aAAa;GACX,MAAM;GACN,YAAY;IACV,QAAQ;KAAE,MAAM;KAAU,aAAa;KAA+B;IACtE,WAAW;KAAE,MAAM;KAAU,aAAa;KAA8B;IACxE,aAAa;KAAE,MAAM;KAAU,aAAa;KAA8B;IAC1E,QAAQ;KAAE,MAAM;KAAU,aAAa;KAAiB;IACxD,UAAU;KAAE,MAAM;KAAW,aAAa;KAAwB;IACnE;GACD,UAAU,CAAC,SAAS;GACrB;EACF;CACD,WAAW;EACT,MAAM;EACN,aAAa;EACb,aAAa;GACX,MAAM;GACN,YAAY;IACV,SAAS;KAAE,MAAM;KAAU,aAAa;KAAwB;IAChE,QAAQ;KAAE,MAAM;KAAU,MAAM;MAAC;MAAS;MAAU;MAAO;KAAE;IAC7D,QAAQ;KAAE,MAAM;KAAU,MAAM,CAAC,aAAa,UAAU;KAAE;IAC3D;GACD,UAAU,CAAC,UAAU;GACtB;EACF;CACD,SAAS;EACP,MAAM;EACN,aAAa;EACb,aAAa;GACX,MAAM;GACN,YAAY;IACV,SAAS;KAAE,MAAM;KAAU,aAAa;KAA8B;IACtE,OAAO;KAAE,MAAM;KAAU,MAAM;MAAC;MAAY;MAAgB;MAAS;KAAE;IACvE,UAAU;KAAE,MAAM;KAAU,aAAa;KAAwB;IAClE;GACD,UAAU,CAAC,UAAU;GACtB;EACF;CACD,QAAQ;EACN,MAAM;EACN,aAAa;EACb,aAAa;GACX,MAAM;GACN,YAAY;IACV,MAAM;KAAE,MAAM;KAAU,aAAa;KAAkB;IACvD,OAAO;KACL,MAAM;KACN,OAAO;MAAE,MAAM;MAAU,MAAM;OAAC;OAAY;OAAe;OAAS;OAAO;MAAE;KAC9E;IACF;GACD,UAAU,CAAC,OAAO;GACnB;EACF;CACD,QAAQ;EACN,MAAM;EACN,aAAa;EACb,aAAa;GACX,MAAM;GACN,YAAY;IACV,MAAM;KAAE,MAAM;KAAU,aAAa;KAAY;IACjD,MAAM;KAAE,MAAM;KAAU,MAAM;MAAC;MAAgB;MAAU;MAAW;KAAE;IACvE;GACD,UAAU,CAAC,OAAO;GACnB;EACF;CACD,WAAW;EACT,MAAM;EACN,aAAa;EACb,aAAa;GACX,MAAM;GACN,YAAY;IACV,MAAM;KAAE,MAAM;KAAU,aAAa;KAAqB;IAC1D,IAAI;KAAE,MAAM;KAAU,aAAa;KAAmB;IACtD,MAAM;KAAE,MAAM;KAAU,aAAa;KAA8B;IACpE;GACD,UAAU,CAAC,QAAQ,KAAK;GACzB;EACF;CACD,OAAO;EACL,MAAM;EACN,aAAa;EACb,aAAa;GACX,MAAM;GACN,YAAY,EACV,MAAM;IAAE,MAAM;IAAU,aAAa;IAAiB,EACvD;GACD,UAAU,CAAC,OAAO;GACnB;EACF;CACF;;;;AAKD,eAAsB,gBAAgB,UAA4B,EAAE,EAAE;CACpE,MAAM,IAAI,IAAI,OAAO,QAAQ;AAG7B,OAAM,EAAE,UAAU,QAAQ,SAAS,aAAa;CAGhD,MAAM,eAAe,QAAQ,SAAS,OAAO,KAAK,iBAAiB;CAGnE,MAAMA,WAAwD;EAC5D,iBAAiB,OAAO,SAAS;GAC/B,MAAM,SAAS,MAAM,EAAE,SAAS,KAAK,QAAQ;IAC3C,WAAW,KAAK;IAChB,aAAa,KAAK;IAClB,QAAQ,KAAK;IACb,UAAU,KAAK;IAChB,CAAC;AACF,UAAO,EACL,SAAS,CACP;IACE,MAAM;IACN,MAAM,OAAO,WACT,aAAa,OAAO,SAAS,cAAc,OAAO,SAClD,OAAO;IACZ,CACF,EACF;;EAGH,kBAAkB,OAAO,SAAS;AAMhC,UAAO,EAAE,SAAS,CAAC;IAAE,MAAM;IAAQ,MALnB,MAAMC,UAAiB;KACrC,SAAS,KAAK;KACd,QAAQ,KAAK;KACb,QAAQ,KAAK;KACd,CAAC;IACgD,CAAC,EAAE;;EAGvD,gBAAgB,OAAO,SAAS;AAM9B,UAAO,EAAE,SAAS,CAAC;IAAE,MAAM;IAAQ,MALf,MAAMC,QAAe;KACvC,SAAS,KAAK;KACd,OAAO,KAAK;KACZ,UAAU,KAAK;KAChB,CAAC;IACoD,CAAC,EAAE;;EAG3D,eAAe,OAAO,SAAS;AAK7B,UAAO,EAAE,SAAS,CAAC;IAAE,MAAM;IAAQ,MAJd,MAAMC,OAAc;KACvC,MAAM,KAAK;KACX,OAAO,KAAK;KACb,CAAC;IACqD,CAAC,EAAE;;EAG5D,eAAe,OAAO,SAAS;AAK7B,UAAO,EAAE,SAAS,CAAC;IAAE,MAAM;IAAQ,MAJnB,MAAMC,OAAc;KAClC,MAAM,KAAK;KACX,MAAM,KAAK;KACZ,CAAC;IACgD,CAAC,EAAE;;EAGvD,kBAAkB,OAAO,SAAS;AAMhC,UAAO,EAAE,SAAS,CAAC;IAAE,MAAM;IAAQ,MALhB,MAAMC,UAAiB;KACxC,MAAM,KAAK;KACX,IAAI,KAAK;KACT,MAAM,KAAK;KACZ,CAAC;IACmD,CAAC,EAAE;;EAG1D,cAAc,OAAO,SAAS;GAC5B,MAAM,SAAS,MAAM,EAAE,MAAM,KAAK,KAAK;AACvC,UAAO,EACL,SAAS,CACP;IACE,MAAM;IACN,MAAM,KAAK,UAAU;KACnB,QAAQ,OAAO,OAAO,MAAM,GAAG,GAAG;KAClC,YAAY,OAAO,OAAO;KAC3B,CAAC;IACH,CACF,EACF;;EAEJ;AAED,QAAO;EAEL,MAAM;GACJ,MAAM;GACN,SAAS;GACT,aAAa;GACd;EAGD,iBACE,aACG,QAAQ,MAAM,iBAAiB,GAAoC,CACnE,KAAK,MAAM,iBAAiB,GAAoC;EAGrE,UAAU,OAAO,MAAc,SAAc;GAC3C,MAAM,UAAU,SAAS;AACzB,OAAI,CAAC,QACH,OAAM,IAAI,MAAM,iBAAiB,OAAO;AAE1C,UAAO,QAAQ,KAAK;;EAItB,QAAQ;EACT;;;;;AAMH,eAAsB,eAAe,UAA4B,EAAE,EAAE;CACnE,MAAM,SAAS,MAAM,gBAAgB,QAAQ;CAI7C,MAAM,MADW,MAAM,OAAO,aACV,gBAAgB;EAClC,OAAO,QAAQ;EACf,QAAQ,QAAQ;EAChB,UAAU;EACX,CAAC;AAEF,SAAQ,MAAM,uBAAuB;AACrC,SAAQ,MAAM,aAAa,QAAQ,SAAS,eAAe;AAC3D,SAAQ,MACN,aAAa,OACV,WAAW,CACX,KAAK,MAAM,EAAE,KAAK,CAClB,KAAK,KAAK,GACd;AACD,SAAQ,MAAM,8BAA8B;AAE5C,IAAG,GAAG,QAAQ,OAAO,SAAS;AAC5B,MAAI;GACF,MAAM,UAAU,KAAK,MAAM,KAAK;GAEhC,IAAIC;AAEJ,WAAQ,QAAQ,QAAhB;IACE,KAAK;AACH,gBAAW;MACT,iBAAiB;MACjB,YAAY,OAAO;MACnB,cAAc,EACZ,OAAO,EAAE,EACV;MACF;AACD;IAEF,KAAK;AACH,gBAAW,EAAE,OAAO,OAAO,WAAW,EAAE;AACxC;IAEF,KAAK;AACH,gBAAW,MAAM,OAAO,SAAS,QAAQ,OAAO,MAAM,QAAQ,OAAO,UAAU;AAC/E;IAEF,QACE,YAAW,EAAE,OAAO;KAAE,MAAM;KAAS,SAAS;KAAoB,EAAE;;AAGxE,WAAQ,IAAI,KAAK,UAAU;IAAE,SAAS;IAAO,IAAI,QAAQ;IAAI,QAAQ;IAAU,CAAC,CAAC;WAC1E,OAAO;AACd,WAAQ,IACN,KAAK,UAAU;IACb,SAAS;IACT,IAAI;IACJ,OAAO;KAAE,MAAM;KAAS,SAAS,OAAO,MAAM;KAAE;IACjD,CAAC,CACH;;GAEH;;AAGJ,kBAAe;CAAE;CAAiB;CAAgB"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"models-DKULvhOr.mjs","names":["BUILTIN_MODELS: Record<string, ModelConfig>","family: ModelConfig[\"family\"]"],"sources":["../src/core/models.ts"],"sourcesContent":["/**\n * Model Registry\n *\n * Supports built-in models and any HuggingFace model via hf:org/model syntax\n */\n\nimport type { ModelConfig, ModelSource } from \"./types.js\";\n\n// ============================================\n// Built-in Models (curated & tested)\n// ============================================\n\nexport const BUILTIN_MODELS: Record<string, ModelConfig> = {\n \"qwen3-0.6b\": {\n id: \"qwen3-0.6b\",\n repo: \"onnx-community/Qwen3-0.6B-ONNX\",\n description: \"Qwen3 0.6B - Best balance of speed and quality, supports thinking\",\n size: \"~400MB\",\n contextLength: 32_768,\n supportsThinking: true,\n supportsJson: true,\n family: \"qwen\",\n },\n \"qwen2.5-0.5b\": {\n id: \"qwen2.5-0.5b\",\n repo: \"onnx-community/Qwen2.5-0.5B-Instruct\",\n description: \"Qwen2.5 0.5B - Fast and capable\",\n size: \"~350MB\",\n contextLength: 32_768,\n supportsThinking: false,\n supportsJson: true,\n family: \"qwen\",\n },\n \"qwen2.5-coder-0.5b\": {\n id: \"qwen2.5-coder-0.5b\",\n repo: \"onnx-community/Qwen2.5-Coder-0.5B-Instruct\",\n description: \"Qwen2.5 Coder 0.5B - Optimized for code\",\n size: \"~400MB\",\n contextLength: 32_768,\n supportsThinking: false,\n supportsJson: true,\n family: \"qwen\",\n },\n \"smollm2-360m\": {\n id: \"smollm2-360m\",\n repo: \"HuggingFaceTB/SmolLM2-360M-Instruct\",\n description: \"SmolLM2 360M - Fast, good for simple tasks\",\n size: \"~250MB\",\n contextLength: 8192,\n supportsThinking: false,\n supportsJson: false,\n family: \"smollm\",\n },\n \"smollm2-135m\": {\n id: \"smollm2-135m\",\n repo: \"HuggingFaceTB/SmolLM2-135M-Instruct\",\n description: \"SmolLM2 135M - Fastest, basic generation\",\n size: \"~100MB\",\n contextLength: 8192,\n supportsThinking: false,\n supportsJson: false,\n family: \"smollm\",\n },\n \"phi-3-mini\": {\n id: \"phi-3-mini\",\n repo: \"microsoft/Phi-3-mini-4k-instruct-onnx\",\n description: \"Phi-3 Mini - High quality, larger model\",\n size: \"~2.1GB\",\n contextLength: 4096,\n supportsThinking: false,\n supportsJson: true,\n family: \"phi\",\n },\n};\n\n// ============================================\n// Model Resolution\n// ============================================\n\n/**\n * Parse model identifier and resolve to source\n *\n * Supported formats:\n * - \"qwen3-0.6b\" (built-in)\n * - \"hf:org/model\" (HuggingFace shorthand)\n * - \"https://huggingface.co/org/model\" (full URL)\n * - \"file:./path/to/model\" (local path)\n */\nexport function resolveModel(modelId: string): ModelSource {\n // Built-in model\n if (BUILTIN_MODELS[modelId]) {\n return {\n type: \"builtin\",\n path: BUILTIN_MODELS[modelId].repo,\n };\n }\n\n // HuggingFace shorthand: hf:org/model\n if (modelId.startsWith(\"hf:\")) {\n const repo = modelId.slice(3);\n return {\n type: \"huggingface\",\n path: repo,\n };\n }\n\n // HuggingFace URL\n if (modelId.startsWith(\"https://huggingface.co/\")) {\n const repo = modelId.replace(\"https://huggingface.co/\", \"\");\n return {\n type: \"huggingface\",\n path: repo,\n };\n }\n\n // Local file\n if (modelId.startsWith(\"file:\")) {\n const path = modelId.slice(5);\n return {\n type: \"local\",\n path,\n };\n }\n\n // Assume it's a HuggingFace repo if it contains a slash\n if (modelId.includes(\"/\")) {\n return {\n type: \"huggingface\",\n path: modelId,\n };\n }\n\n // Unknown - treat as HuggingFace\n return {\n type: \"huggingface\",\n path: modelId,\n };\n}\n\n/**\n * Get model config (built-in only)\n */\nexport function getModelConfig(modelId: string): ModelConfig | null {\n return BUILTIN_MODELS[modelId] || null;\n}\n\n/**\n * Create model config for external model\n */\nexport function createExternalModelConfig(modelId: string, repo: string): ModelConfig {\n // Try to infer family from repo name\n let family: ModelConfig[\"family\"] = \"other\";\n const repoLower = repo.toLowerCase();\n\n if (repoLower.includes(\"qwen\")) {\n family = \"qwen\";\n } else if (repoLower.includes(\"smollm\")) {\n family = \"smollm\";\n } else if (repoLower.includes(\"phi\")) {\n family = \"phi\";\n } else if (repoLower.includes(\"mistral\")) {\n family = \"mistral\";\n } else if (repoLower.includes(\"llama\")) {\n family = \"llama\";\n }\n\n return {\n id: modelId,\n repo,\n description: `External model: ${repo}`,\n size: \"Unknown\",\n contextLength: 4096, // Conservative default\n supportsThinking: family === \"qwen\",\n supportsJson: family === \"qwen\" || family === \"phi\",\n family,\n };\n}\n\n/**\n * List all built-in models\n */\nexport function listBuiltinModels(): ModelConfig[] {\n return Object.values(BUILTIN_MODELS);\n}\n\n/**\n * Search HuggingFace models (placeholder - would need HF API)\n */\nexport async function searchModels(query: string): Promise<ModelConfig[]> {\n // TODO: Implement HuggingFace API search\n // For now, filter built-in models\n const q = query.toLowerCase();\n return listBuiltinModels().filter(\n (m) =>\n m.id.toLowerCase().includes(q) ||\n m.description.toLowerCase().includes(q) ||\n m.family.toLowerCase().includes(q),\n );\n}\n"],"mappings":";AAYA,MAAaA,iBAA8C;CACzD,cAAc;EACZ,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,eAAe;EACf,kBAAkB;EAClB,cAAc;EACd,QAAQ;EACT;CACD,gBAAgB;EACd,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,eAAe;EACf,kBAAkB;EAClB,cAAc;EACd,QAAQ;EACT;CACD,sBAAsB;EACpB,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,eAAe;EACf,kBAAkB;EAClB,cAAc;EACd,QAAQ;EACT;CACD,gBAAgB;EACd,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,eAAe;EACf,kBAAkB;EAClB,cAAc;EACd,QAAQ;EACT;CACD,gBAAgB;EACd,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,eAAe;EACf,kBAAkB;EAClB,cAAc;EACd,QAAQ;EACT;CACD,cAAc;EACZ,IAAI;EACJ,MAAM;EACN,aAAa;EACb,MAAM;EACN,eAAe;EACf,kBAAkB;EAClB,cAAc;EACd,QAAQ;EACT;CACF;;;;;;;;;;AAeD,SAAgB,aAAa,SAA8B;AAEzD,KAAI,eAAe,SACjB,QAAO;EACL,MAAM;EACN,MAAM,eAAe,SAAS;EAC/B;AAIH,KAAI,QAAQ,WAAW,MAAM,CAE3B,QAAO;EACL,MAAM;EACN,MAHW,QAAQ,MAAM,EAAE;EAI5B;AAIH,KAAI,QAAQ,WAAW,0BAA0B,CAE/C,QAAO;EACL,MAAM;EACN,MAHW,QAAQ,QAAQ,2BAA2B,GAAG;EAI1D;AAIH,KAAI,QAAQ,WAAW,QAAQ,CAE7B,QAAO;EACL,MAAM;EACN,MAHW,QAAQ,MAAM,EAAE;EAI5B;AAIH,KAAI,QAAQ,SAAS,IAAI,CACvB,QAAO;EACL,MAAM;EACN,MAAM;EACP;AAIH,QAAO;EACL,MAAM;EACN,MAAM;EACP;;;;;AAMH,SAAgB,eAAe,SAAqC;AAClE,QAAO,eAAe,YAAY;;;;;AAMpC,SAAgB,0BAA0B,SAAiB,MAA2B;CAEpF,IAAIC,SAAgC;CACpC,MAAM,YAAY,KAAK,aAAa;AAEpC,KAAI,UAAU,SAAS,OAAO,CAC5B,UAAS;UACA,UAAU,SAAS,SAAS,CACrC,UAAS;UACA,UAAU,SAAS,MAAM,CAClC,UAAS;UACA,UAAU,SAAS,UAAU,CACtC,UAAS;UACA,UAAU,SAAS,QAAQ,CACpC,UAAS;AAGX,QAAO;EACL,IAAI;EACJ;EACA,aAAa,mBAAmB;EAChC,MAAM;EACN,eAAe;EACf,kBAAkB,WAAW;EAC7B,cAAc,WAAW,UAAU,WAAW;EAC9C;EACD;;;;;AAMH,SAAgB,oBAAmC;AACjD,QAAO,OAAO,OAAO,eAAe"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"models-De2-_GmQ.d.mts","names":[],"sources":["../src/core/models.ts"],"sourcesContent":[],"mappings":";;;;cAYa,gBAAgB,eAAe;;;;;;;;;;iBA4E5B,YAAA,mBAA+B;;;;iBA6F/B,iBAAA,CAAA,GAAqB"}