@tryhamster/gerbil 1.0.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/LICENSE +23 -0
  2. package/README.md +253 -0
  3. package/bin/cli.js +2 -0
  4. package/dist/auto-update-BbNHbSU1.mjs +3 -0
  5. package/dist/browser/index.d.mts +262 -0
  6. package/dist/browser/index.d.mts.map +1 -0
  7. package/dist/browser/index.mjs +755 -0
  8. package/dist/browser/index.mjs.map +1 -0
  9. package/dist/chrome-backend-C5Un08O4.mjs +771 -0
  10. package/dist/chrome-backend-C5Un08O4.mjs.map +1 -0
  11. package/dist/chrome-backend-CtwPENIW.mjs +3 -0
  12. package/dist/chunk-Ct1HF2bE.mjs +7 -0
  13. package/dist/cli.d.mts +1 -0
  14. package/dist/cli.mjs +7078 -0
  15. package/dist/cli.mjs.map +1 -0
  16. package/dist/frameworks/express.d.mts +22 -0
  17. package/dist/frameworks/express.d.mts.map +1 -0
  18. package/dist/frameworks/express.mjs +123 -0
  19. package/dist/frameworks/express.mjs.map +1 -0
  20. package/dist/frameworks/fastify.d.mts +11 -0
  21. package/dist/frameworks/fastify.d.mts.map +1 -0
  22. package/dist/frameworks/fastify.mjs +73 -0
  23. package/dist/frameworks/fastify.mjs.map +1 -0
  24. package/dist/frameworks/hono.d.mts +14 -0
  25. package/dist/frameworks/hono.d.mts.map +1 -0
  26. package/dist/frameworks/hono.mjs +82 -0
  27. package/dist/frameworks/hono.mjs.map +1 -0
  28. package/dist/frameworks/next.d.mts +31 -0
  29. package/dist/frameworks/next.d.mts.map +1 -0
  30. package/dist/frameworks/next.mjs +116 -0
  31. package/dist/frameworks/next.mjs.map +1 -0
  32. package/dist/frameworks/react.d.mts +56 -0
  33. package/dist/frameworks/react.d.mts.map +1 -0
  34. package/dist/frameworks/react.mjs +172 -0
  35. package/dist/frameworks/react.mjs.map +1 -0
  36. package/dist/frameworks/trpc.d.mts +12 -0
  37. package/dist/frameworks/trpc.d.mts.map +1 -0
  38. package/dist/frameworks/trpc.mjs +80 -0
  39. package/dist/frameworks/trpc.mjs.map +1 -0
  40. package/dist/gerbil-BfnsFWRE.mjs +644 -0
  41. package/dist/gerbil-BfnsFWRE.mjs.map +1 -0
  42. package/dist/gerbil-BjW-z7Fq.mjs +5 -0
  43. package/dist/gerbil-DZ1k3ChC.d.mts +138 -0
  44. package/dist/gerbil-DZ1k3ChC.d.mts.map +1 -0
  45. package/dist/index.d.mts +223 -0
  46. package/dist/index.d.mts.map +1 -0
  47. package/dist/index.mjs +13 -0
  48. package/dist/index.mjs.map +1 -0
  49. package/dist/integrations/ai-sdk.d.mts +78 -0
  50. package/dist/integrations/ai-sdk.d.mts.map +1 -0
  51. package/dist/integrations/ai-sdk.mjs +199 -0
  52. package/dist/integrations/ai-sdk.mjs.map +1 -0
  53. package/dist/integrations/langchain.d.mts +41 -0
  54. package/dist/integrations/langchain.d.mts.map +1 -0
  55. package/dist/integrations/langchain.mjs +93 -0
  56. package/dist/integrations/langchain.mjs.map +1 -0
  57. package/dist/integrations/llamaindex.d.mts +45 -0
  58. package/dist/integrations/llamaindex.d.mts.map +1 -0
  59. package/dist/integrations/llamaindex.mjs +86 -0
  60. package/dist/integrations/llamaindex.mjs.map +1 -0
  61. package/dist/integrations/mcp-client.d.mts +206 -0
  62. package/dist/integrations/mcp-client.d.mts.map +1 -0
  63. package/dist/integrations/mcp-client.mjs +507 -0
  64. package/dist/integrations/mcp-client.mjs.map +1 -0
  65. package/dist/integrations/mcp.d.mts +177 -0
  66. package/dist/integrations/mcp.d.mts.map +1 -0
  67. package/dist/integrations/mcp.mjs +8 -0
  68. package/dist/mcp-R8kRLIKb.mjs +348 -0
  69. package/dist/mcp-R8kRLIKb.mjs.map +1 -0
  70. package/dist/models-DKULvhOr.mjs +136 -0
  71. package/dist/models-DKULvhOr.mjs.map +1 -0
  72. package/dist/models-De2-_GmQ.d.mts +22 -0
  73. package/dist/models-De2-_GmQ.d.mts.map +1 -0
  74. package/dist/one-liner-BUQR0nqq.mjs +98 -0
  75. package/dist/one-liner-BUQR0nqq.mjs.map +1 -0
  76. package/dist/skills/index.d.mts +390 -0
  77. package/dist/skills/index.d.mts.map +1 -0
  78. package/dist/skills/index.mjs +7 -0
  79. package/dist/skills-D3CEpgDc.mjs +630 -0
  80. package/dist/skills-D3CEpgDc.mjs.map +1 -0
  81. package/dist/tools-BsiEE6f2.mjs +567 -0
  82. package/dist/tools-BsiEE6f2.mjs.map +1 -0
  83. package/dist/types-BS1N92Jt.d.mts +183 -0
  84. package/dist/types-BS1N92Jt.d.mts.map +1 -0
  85. package/dist/utils-7vXqtq2Q.mjs +63 -0
  86. package/dist/utils-7vXqtq2Q.mjs.map +1 -0
  87. package/docs/ai-sdk.md +80 -0
  88. package/docs/architecture/README.md +84 -0
  89. package/docs/architecture/caching.md +227 -0
  90. package/docs/architecture/inference.md +176 -0
  91. package/docs/architecture/overview.md +179 -0
  92. package/docs/architecture/streaming.md +261 -0
  93. package/docs/architecture/webgpu.md +213 -0
  94. package/docs/browser.md +328 -0
  95. package/docs/cli.md +155 -0
  96. package/docs/frameworks.md +90 -0
  97. package/docs/mcp-client.md +224 -0
  98. package/docs/mcp.md +109 -0
  99. package/docs/memory.md +229 -0
  100. package/docs/repl.md +473 -0
  101. package/docs/skills.md +261 -0
  102. package/docs/tools.md +304 -0
  103. package/package.json +207 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gerbil-BfnsFWRE.mjs","names":["pipeline","rawPipeline","isBrowser","tfDevice: \"webgpu\" | \"wasm\" | \"cpu\"","chromeErr: any","tokenQueue: string[]","resolveNext: ((value: string | null) => void) | null","results: EmbedResult[]","messages: Array<{ role: string; content: string }>"],"sources":["../src/core/gerbil.ts"],"sourcesContent":["/**\n * Gerbil - Local GPU-accelerated LLM inference\n */\n\nimport {\n AutoModelForCausalLM,\n AutoTokenizer,\n env,\n type FeatureExtractionPipeline,\n type PreTrainedTokenizer,\n pipeline as rawPipeline,\n type TextGenerationPipeline,\n} from \"@huggingface/transformers\";\n\n// Wrapper to avoid TypeScript complexity issues with transformers.js types\nconst pipeline = rawPipeline as (task: string, model: string, options?: any) => Promise<any>;\n\n// Suppress noisy transformers.js warnings during model loading\nfunction suppressNoisyWarnings<T>(fn: () => Promise<T>): Promise<T> {\n const originalWarn = console.warn;\n console.warn = (...args: any[]) => {\n const msg = args[0]?.toString?.() || \"\";\n // Suppress \"Unable to determine content-length\" warnings from transformers.js\n if (msg.includes(\"content-length\") || msg.includes(\"Unable to determine\")) {\n return;\n }\n originalWarn.apply(console, args);\n };\n\n return fn().finally(() => {\n console.warn = originalWarn;\n });\n}\n\nimport {\n BUILTIN_MODELS,\n createExternalModelConfig,\n getModelConfig,\n resolveModel,\n} from \"./models.js\";\nimport type {\n EmbedOptions,\n EmbedResult,\n GenerateOptions,\n GenerateResult,\n GerbilConfig,\n JsonOptions,\n LoadOptions,\n ModelConfig,\n SessionStats,\n SystemInfo,\n} from \"./types.js\";\n\nimport { extractJson, zodToJsonSchema } from \"./utils.js\";\n\n// Configure transformers.js based on environment\nconst isBrowser = typeof window !== \"undefined\";\nenv.allowLocalModels = !isBrowser; // false in browser (fetch from HuggingFace)\nenv.useBrowserCache = isBrowser; // true in browser (cache in IndexedDB)\n\n// ============================================\n// Gerbil Class\n// ============================================\n\n// WebGPU initialization state for Node.js\nlet webgpuInitialized = false;\nlet webgpuAvailable = false;\n\n/**\n * Initialize WebGPU for Node.js environments\n * Called automatically before model loading\n */\nasync function initNodeWebGPU(): Promise<boolean> {\n if (webgpuInitialized) {\n return webgpuAvailable;\n }\n webgpuInitialized = true;\n\n // Skip if in browser (already has WebGPU)\n if (typeof window !== \"undefined\") {\n webgpuAvailable = \"gpu\" in navigator;\n return webgpuAvailable;\n }\n\n // Try to initialize WebGPU in Node.js via Dawn\n // Use Function constructor to hide import from bundlers\n try {\n const dynamicImport = new Function(\"specifier\", \"return import(specifier)\");\n const webgpuModule = await dynamicImport(\"webgpu\");\n const { create, globals } = webgpuModule;\n\n // Extend globalThis with WebGPU globals\n Object.assign(globalThis, globals);\n\n // Create navigator.gpu\n if (!(globalThis as any).navigator) {\n (globalThis as any).navigator = {};\n }\n (globalThis as any).navigator.gpu = create([]);\n\n webgpuAvailable = true;\n } catch {\n // WebGPU not available, will fall back to CPU\n webgpuAvailable = false;\n }\n\n return webgpuAvailable;\n}\n\n// ChromeGPUBackend is dynamically imported only in Node.js to avoid bundling puppeteer in browser\ntype ChromeGPUBackendType = import(\"./chrome-backend.js\").ChromeGPUBackend;\n\nexport class Gerbil {\n private generator: TextGenerationPipeline | null = null;\n private tokenizer: PreTrainedTokenizer | null = null;\n private model: any = null; // AutoModelForCausalLM instance\n private embedder: FeatureExtractionPipeline | null = null;\n private currentModel: string | null = null;\n private modelConfig: ModelConfig | null = null;\n private readonly config: GerbilConfig;\n private stats: SessionStats;\n private useDirect = false; // Use direct model loading (for WebGPU)\n private chromeBackend: ChromeGPUBackendType | null = null; // Chrome backend for Node.js WebGPU\n private _deviceMode: \"webgpu\" | \"cpu\" | \"wasm\" = \"cpu\"; // Track which backend is active\n\n constructor(config: GerbilConfig = {}) {\n this.config = config;\n this.stats = {\n prompts: 0,\n tokensIn: 0,\n tokensOut: 0,\n avgSpeed: 0,\n totalTime: 0,\n cacheHits: 0,\n cacheMisses: 0,\n };\n }\n\n // ============================================\n // Static Methods\n // ============================================\n\n static listModels(): ModelConfig[] {\n return Object.values(BUILTIN_MODELS);\n }\n\n static getModel(modelId: string): ModelConfig | undefined {\n return BUILTIN_MODELS[modelId];\n }\n\n // ============================================\n // Model Loading\n // ============================================\n\n /**\n * Load a model\n *\n * @example\n * ```ts\n * // Built-in model\n * await g.loadModel(\"qwen3-0.6b\");\n *\n * // HuggingFace model\n * await g.loadModel(\"hf:microsoft/Phi-3-mini\");\n *\n * // Local model\n * await g.loadModel(\"file:./models/my-model\");\n * ```\n */\n async loadModel(modelId = \"qwen3-0.6b\", options: LoadOptions = {}): Promise<void> {\n // Initialize WebGPU for Node.js if needed\n await initNodeWebGPU();\n\n const source = resolveModel(modelId);\n const { onProgress, device = \"auto\", dtype: userDtype } = options;\n\n // Get or create model config\n let config = getModelConfig(modelId);\n if (!config) {\n config = createExternalModelConfig(modelId, source.path);\n }\n\n onProgress?.({ status: `Loading ${modelId}...` });\n\n // Map device to transformers.js device\n // Browser supports: webgpu, wasm (no cpu)\n // Node supports: webgpu, cpu\n const isBrowser = typeof window !== \"undefined\";\n const fallbackDevice = isBrowser ? \"wasm\" : \"cpu\";\n let tfDevice: \"webgpu\" | \"wasm\" | \"cpu\" = fallbackDevice;\n if (device === \"webgpu\" || device === \"gpu\" || device === \"auto\") {\n tfDevice = \"webgpu\";\n }\n\n // Use q4f16 for WebGPU (required for Qwen3), q4 for CPU/WASM\n const dtype = userDtype ?? (tfDevice === \"webgpu\" ? \"q4f16\" : \"q4\");\n\n // Track if we're still in loading phase (to suppress progress during inference)\n let isLoading = true;\n let lastFile = \"\";\n let lastPct = -1;\n\n const progressCallback = (progress: any) => {\n if (!isLoading) {\n return; // Suppress progress after initial load\n }\n\n if (progress.status === \"progress\" && progress.file) {\n const pct = Math.round(progress.progress || 0);\n // Only report if file changed or progress increased significantly\n if (progress.file !== lastFile || pct >= lastPct + 5) {\n lastFile = progress.file;\n lastPct = pct;\n onProgress?.({\n status: `Downloading ${progress.file}`,\n progress: pct,\n file: progress.file,\n });\n }\n }\n };\n\n try {\n // Use direct model loading for browser WebGPU (like qwen-web does)\n // This bypasses pipeline() which may have different ONNX session config\n if (isBrowser && tfDevice === \"webgpu\") {\n onProgress?.({ status: \"Loading tokenizer...\" });\n this.tokenizer = (await suppressNoisyWarnings(() =>\n AutoTokenizer.from_pretrained(source.path, {\n progress_callback: progressCallback,\n }),\n )) as PreTrainedTokenizer;\n\n onProgress?.({ status: \"Loading model...\" });\n this.model = await suppressNoisyWarnings(() =>\n AutoModelForCausalLM.from_pretrained(source.path, {\n dtype,\n device: tfDevice,\n progress_callback: progressCallback,\n }),\n );\n\n this.useDirect = true;\n this._deviceMode = \"webgpu\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n onProgress?.({ status: \"Ready (WebGPU)!\" });\n } else if (!isBrowser && tfDevice === \"webgpu\") {\n // Node.js + WebGPU: Use Chrome backend for real GPU acceleration\n onProgress?.({ status: \"Starting Chrome WebGPU backend...\" });\n\n // Dynamic import to avoid bundling puppeteer in browser builds\n const { ChromeGPUBackend } = await import(\"./chrome-backend.js\");\n this.chromeBackend = await ChromeGPUBackend.create({\n modelId: source.path,\n onProgress,\n });\n\n this.useDirect = false;\n this._deviceMode = \"webgpu\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n // Ready status is set by ChromeGPUBackend\n } else {\n // Use pipeline for CPU / WASM\n const pipelineOptions = {\n dtype,\n device: tfDevice,\n progress_callback: progressCallback,\n };\n this.generator = (await suppressNoisyWarnings(() =>\n pipeline(\"text-generation\", source.path, pipelineOptions as any),\n )) as TextGenerationPipeline;\n\n this.useDirect = false;\n this._deviceMode = tfDevice as \"cpu\" | \"wasm\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });\n }\n } catch (err) {\n // Fallback to CPU/WASM if GPU fails (silently)\n if (tfDevice !== fallbackDevice) {\n onProgress?.({ status: `Using ${fallbackDevice.toUpperCase()}...` });\n\n // Clean up Chrome backend if it was partially initialized\n if (this.chromeBackend) {\n await this.chromeBackend.dispose();\n this.chromeBackend = null;\n }\n\n // Fallback always uses pipeline (WASM/CPU don't need direct loading)\n this.generator = (await suppressNoisyWarnings(() =>\n pipeline(\"text-generation\", source.path, {\n dtype: \"q4\",\n device: fallbackDevice,\n progress_callback: progressCallback,\n } as any),\n )) as TextGenerationPipeline;\n\n this.useDirect = false;\n this._deviceMode = fallbackDevice as \"cpu\" | \"wasm\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n onProgress?.({ status: `Ready (${fallbackDevice.toUpperCase()})!` });\n } else {\n throw err;\n }\n }\n }\n\n /**\n * Check if a model is loaded\n */\n isLoaded(): boolean {\n return (\n this.generator !== null ||\n (this.useDirect && this.model !== null) ||\n this.chromeBackend !== null\n );\n }\n\n /**\n * Get current model info\n */\n getModelInfo(): ModelConfig | null {\n return this.modelConfig;\n }\n\n /**\n * Get current device mode (webgpu, cpu, or wasm)\n */\n getDeviceMode(): \"webgpu\" | \"cpu\" | \"wasm\" {\n return this._deviceMode;\n }\n\n /**\n * Get dtype used for current model\n */\n getDtype(): string {\n // WebGPU uses q4f16, CPU/WASM use q4\n return this._deviceMode === \"webgpu\" ? \"q4f16\" : \"q4\";\n }\n\n /**\n * Get Chrome backend status (if using WebGPU via Chrome)\n */\n getChromeStatus(): {\n pid: number | null;\n port: number;\n modelId: string;\n startedAt: Date | null;\n } | null {\n if (!this.chromeBackend) {\n return null;\n }\n return this.chromeBackend.getStatus();\n }\n\n /**\n * Get Chrome memory usage (if using WebGPU via Chrome)\n * Returns JS heap memory in bytes\n */\n async getChromeMemory(): Promise<{ jsHeapUsed: number; jsHeapTotal: number } | null> {\n if (!this.chromeBackend) {\n return null;\n }\n return this.chromeBackend.getMemoryUsage();\n }\n\n /**\n * Get memory usage in GB (if using WebGPU via Chrome)\n */\n async getMemoryUsage(): Promise<{ usedGB: number; totalGB: number; usedPercent: number } | null> {\n if (!this.chromeBackend) {\n return null;\n }\n return this.chromeBackend.getMemoryStats();\n }\n\n /**\n * Clear KV cache to free memory\n * This will reset the conversation context but free up memory\n */\n async clearCache(): Promise<void> {\n if (this.chromeBackend) {\n await this.chromeBackend.reset();\n }\n }\n\n /**\n * Check memory usage and cleanup if needed\n * @param thresholdGB Memory threshold in GB (default: 8)\n * @returns true if cleanup was performed\n */\n async checkMemoryAndCleanup(thresholdGB = 8): Promise<boolean> {\n if (!this.chromeBackend) {\n return false;\n }\n return this.chromeBackend.checkMemoryAndCleanup(thresholdGB);\n }\n\n // ============================================\n // Text Generation\n // ============================================\n\n /**\n * Generate text\n */\n async generate(prompt: string, options: GenerateOptions = {}): Promise<GenerateResult> {\n if (!this.isLoaded()) {\n // Auto-load default model\n await this.loadModel(this.config.model || \"qwen3-0.6b\");\n }\n\n const {\n maxTokens = 256,\n temperature = 0.7,\n topP = 0.9,\n topK = 50,\n thinking = false,\n system,\n } = options;\n\n const startTime = performance.now();\n\n try {\n let rawText = \"\";\n\n if (this.chromeBackend) {\n // Chrome backend approach (for Node.js WebGPU via Chrome)\n try {\n rawText = await this.chromeBackend.generate(prompt, {\n maxTokens,\n temperature,\n topP,\n topK,\n thinking,\n system,\n // Wrap onToken to match Gerbil's simpler signature\n onToken: options.onToken ? (t) => options.onToken!(t.text) : undefined,\n });\n } catch (chromeErr: any) {\n // If Chrome died (OOM, crash), fall back to CPU silently\n if (chromeErr?.message === \"CHROME_BACKEND_DEAD\" || !this.chromeBackend?.isAlive()) {\n await this.chromeBackend?.dispose().catch(() => {});\n this.chromeBackend = null;\n this._deviceMode = \"cpu\";\n // Load CPU fallback and retry\n const modelPath = this.currentModel || \"qwen3-0.6b\";\n this.generator = (await pipeline(\"text-generation\", modelPath, {\n dtype: \"q4\",\n device: \"cpu\",\n } as any)) as TextGenerationPipeline;\n // Retry with CPU\n return this.generate(prompt, options);\n }\n throw chromeErr;\n }\n } else if (this.useDirect && this.model && this.tokenizer) {\n // Direct model approach (for browser WebGPU)\n const messages = this.buildMessages(prompt, { ...options, thinking });\n\n const inputs = (this.tokenizer as any).apply_chat_template(messages, {\n add_generation_prompt: true,\n return_dict: true,\n enable_thinking: thinking, // Qwen3 thinking mode\n });\n\n const output = await this.model.generate({\n ...inputs,\n max_new_tokens: maxTokens,\n temperature: temperature > 0 ? temperature : undefined,\n top_p: topP,\n top_k: topK,\n do_sample: temperature > 0,\n });\n\n // Get input length to extract only generated tokens\n const inputLength = inputs.input_ids.dims?.[1] || inputs.input_ids.data?.length || 0;\n\n // Slice output tensor to get only new tokens (skip prompt)\n const outputTokens = output.slice(null, [inputLength, null]);\n const decoded = this.tokenizer.batch_decode(outputTokens, {\n skip_special_tokens: true,\n });\n\n rawText = decoded[0] || \"\";\n\n // If we still have prompt artifacts, extract assistant response\n if (rawText.toLowerCase().includes(\"assistant\")) {\n const match = rawText.match(/assistant[:\\s]*([\\s\\S]*)/i);\n if (match) {\n rawText = match[1].trim();\n }\n }\n } else if (this.generator) {\n // Pipeline approach (for Node.js / CPU / WASM)\n const formattedPrompt = this.formatPrompt(prompt, { ...options, thinking });\n\n const output = await this.generator(formattedPrompt, {\n max_new_tokens: maxTokens,\n temperature,\n top_p: topP,\n top_k: topK,\n do_sample: temperature > 0,\n return_full_text: false,\n });\n\n // Extract text from pipeline output\n if (Array.isArray(output) && output[0]) {\n const result = output[0] as any;\n if (Array.isArray(result.generated_text)) {\n const last = result.generated_text.at(-1);\n rawText = last?.content || \"\";\n } else {\n rawText = result.generated_text || \"\";\n }\n }\n } else {\n throw new Error(\"No model loaded\");\n }\n\n const endTime = performance.now();\n const totalTime = endTime - startTime;\n\n rawText = this.cleanOutput(rawText);\n\n // Always parse thinking to strip <think> tags from output\n // (model may generate them even without thinking mode enabled)\n const { thinking: thinkingText, response } = this.parseThinking(rawText);\n\n // Only include thinking in result if mode was enabled\n const finalThinking = thinking ? thinkingText : undefined;\n\n const tokensGenerated = Math.ceil(response.length / 4);\n\n // Update stats\n this.stats.prompts++;\n this.stats.tokensOut += tokensGenerated;\n this.stats.totalTime += totalTime;\n this.stats.avgSpeed = (this.stats.tokensOut / this.stats.totalTime) * 1000;\n\n return {\n text: response,\n thinking: finalThinking,\n tokensGenerated,\n tokensPerSecond: (tokensGenerated / totalTime) * 1000,\n totalTime,\n finishReason: \"stop\",\n provider: \"local\",\n cached: false,\n };\n } catch (_error) {\n return {\n text: \"\",\n tokensGenerated: 0,\n tokensPerSecond: 0,\n totalTime: performance.now() - startTime,\n finishReason: \"error\",\n provider: \"local\",\n cached: false,\n };\n }\n }\n\n /**\n * Stream text generation (simulated token-by-token)\n *\n * Note: Yields the raw output including <think> tags if thinking mode is enabled.\n * The final result has parsed thinking separated out.\n */\n async *stream(\n prompt: string,\n options: GenerateOptions = {},\n ): AsyncGenerator<string, GenerateResult, unknown> {\n if (!this.isLoaded()) {\n await this.loadModel(this.config.model || \"qwen3-0.6b\");\n }\n\n const startTime = performance.now();\n\n // For Chrome backend, use real streaming via onToken callback\n if (this.chromeBackend) {\n let fullText = \"\";\n const tokenQueue: string[] = [];\n let resolveNext: ((value: string | null) => void) | null = null;\n let done = false;\n\n // Start generation with streaming callback\n const generatePromise = this.chromeBackend\n .generate(prompt, {\n ...options,\n onToken: (token) => {\n fullText += token.text;\n if (resolveNext) {\n resolveNext(token.text);\n resolveNext = null;\n } else {\n tokenQueue.push(token.text);\n }\n },\n })\n .then(() => {\n done = true;\n if (resolveNext) {\n resolveNext(null);\n }\n })\n .catch((err) => {\n done = true;\n if (resolveNext) {\n resolveNext(null);\n }\n throw err;\n });\n\n // Yield tokens as they arrive\n while (!done || tokenQueue.length > 0) {\n if (tokenQueue.length > 0) {\n const token = tokenQueue.shift()!;\n yield token;\n options.onToken?.(token);\n } else if (!done) {\n const token = await new Promise<string | null>((resolve) => {\n resolveNext = resolve;\n });\n if (token) {\n yield token;\n options.onToken?.(token);\n }\n }\n }\n\n await generatePromise;\n\n const { thinking: thinkingText, response } = this.parseThinking(fullText);\n const tokensGenerated = Math.ceil(response.length / 4);\n const totalTime = performance.now() - startTime;\n\n return {\n text: response,\n thinking: options.thinking ? thinkingText : undefined,\n tokensGenerated,\n totalTime,\n tokensPerSecond: (tokensGenerated / totalTime) * 1000,\n finishReason: \"stop\" as const,\n };\n }\n\n // For pipeline/direct model, use fake streaming (generate then yield)\n const result = await this.generateRaw(prompt, options);\n\n // Yield word by word for more accurate token simulation\n // (actual tokens average ~4 chars, words are a reasonable approximation)\n const words = result.rawText.split(/(\\s+)/);\n for (const word of words) {\n if (word) {\n yield word;\n options.onToken?.(word);\n }\n }\n\n return result.result;\n }\n\n /**\n * Internal: Generate with raw text access for streaming\n */\n private async generateRaw(\n prompt: string,\n options: GenerateOptions = {},\n ): Promise<{ rawText: string; result: GenerateResult }> {\n const { maxTokens = 256, temperature = 0.7, topP = 0.9, topK = 50, thinking = false } = options;\n\n const startTime = performance.now();\n const formattedPrompt = this.formatPrompt(prompt, { ...options, thinking });\n\n try {\n const output = await this.generator!(formattedPrompt, {\n max_new_tokens: maxTokens,\n temperature,\n top_p: topP,\n top_k: topK,\n do_sample: temperature > 0,\n return_full_text: false,\n });\n\n const endTime = performance.now();\n const totalTime = endTime - startTime;\n\n // Extract text from output\n let rawText = \"\";\n if (Array.isArray(output) && output[0]) {\n const result = output[0] as any;\n if (Array.isArray(result.generated_text)) {\n const last = result.generated_text.at(-1);\n rawText = last?.content || \"\";\n } else {\n rawText = result.generated_text || \"\";\n }\n }\n\n rawText = this.cleanOutput(rawText);\n const { thinking: thinkingText, response } = this.parseThinking(rawText);\n const finalThinking = thinking ? thinkingText : undefined;\n const tokensGenerated = Math.ceil(response.length / 4);\n\n // Update stats\n this.stats.prompts++;\n this.stats.tokensOut += tokensGenerated;\n this.stats.totalTime += totalTime;\n this.stats.avgSpeed = (this.stats.tokensOut / this.stats.totalTime) * 1000;\n\n return {\n rawText,\n result: {\n text: response,\n thinking: finalThinking,\n tokensGenerated,\n tokensPerSecond: (tokensGenerated / totalTime) * 1000,\n totalTime,\n finishReason: \"stop\",\n provider: \"local\",\n cached: false,\n },\n };\n } catch (_error) {\n return {\n rawText: \"\",\n result: {\n text: \"\",\n tokensGenerated: 0,\n tokensPerSecond: 0,\n totalTime: performance.now() - startTime,\n finishReason: \"error\",\n provider: \"local\",\n cached: false,\n },\n };\n }\n }\n\n // ============================================\n // Structured Output (JSON)\n // ============================================\n\n /**\n * Generate structured JSON output\n */\n async json<T>(prompt: string, options: JsonOptions<T>): Promise<T> {\n const { schema, retries = 3, temperature = 0.3 } = options;\n\n const systemPrompt = `You are a JSON generator. You MUST respond with valid JSON only.\nNo explanations, no markdown, no code blocks. Just pure JSON.\nThe JSON must conform to this schema: ${JSON.stringify(zodToJsonSchema(schema))}`;\n\n for (let attempt = 0; attempt < retries; attempt++) {\n const result = await this.generate(prompt, {\n system: options.system || systemPrompt,\n temperature,\n maxTokens: 1000,\n });\n\n try {\n // Try to extract JSON from response\n const jsonStr = extractJson(result.text);\n const parsed = JSON.parse(jsonStr);\n const validated = schema.parse(parsed);\n return validated;\n } catch (error) {\n if (attempt === retries - 1) {\n throw new Error(`Failed to generate valid JSON after ${retries} attempts: ${error}`);\n }\n }\n }\n\n throw new Error(\"Failed to generate valid JSON\");\n }\n\n // ============================================\n // Embeddings\n // ============================================\n\n /**\n * Generate embeddings\n */\n async embed(text: string, options: EmbedOptions = {}): Promise<EmbedResult> {\n if (!this.embedder) {\n // Load embedding model\n const model = options.model || \"Xenova/all-MiniLM-L6-v2\";\n this.embedder = (await pipeline(\"feature-extraction\", model)) as FeatureExtractionPipeline;\n }\n\n const startTime = performance.now();\n const output = await this.embedder(text, {\n pooling: \"mean\",\n normalize: options.normalize !== false,\n });\n\n const vector = Array.from(output.data as Float32Array);\n\n return {\n vector,\n text,\n totalTime: performance.now() - startTime,\n };\n }\n\n /**\n * Generate embeddings for multiple texts\n */\n async embedBatch(texts: string[], options: EmbedOptions = {}): Promise<EmbedResult[]> {\n const results: EmbedResult[] = [];\n for (const text of texts) {\n results.push(await this.embed(text, options));\n }\n return results;\n }\n\n // ============================================\n // Stats & Info\n // ============================================\n\n /**\n * Get session stats\n */\n getStats(): SessionStats {\n return { ...this.stats };\n }\n\n /**\n * Get system info\n */\n getInfo(): SystemInfo {\n return {\n version: \"1.0.0\",\n model: this.modelConfig,\n device: {\n backend: \"transformers.js\",\n gpu: null, // TODO: detect GPU\n vram: null,\n status: this.isLoaded() ? \"ready\" : \"loading\",\n },\n context: {\n max: this.modelConfig?.contextLength || 0,\n used: 0,\n available: this.modelConfig?.contextLength || 0,\n },\n cache: {\n location: \"~/.gerbil/models\",\n size: \"0 MB\",\n modelCount: 0,\n },\n };\n }\n\n /**\n * Reset stats\n */\n resetStats(): void {\n this.stats = {\n prompts: 0,\n tokensIn: 0,\n tokensOut: 0,\n avgSpeed: 0,\n totalTime: 0,\n cacheHits: 0,\n cacheMisses: 0,\n };\n }\n\n // ============================================\n // Cleanup\n // ============================================\n\n /**\n * Dispose of resources\n */\n async dispose(): Promise<void> {\n // Clean up Chrome backend first (most important to release resources)\n if (this.chromeBackend) {\n try {\n await this.chromeBackend.dispose();\n } catch {\n // Ignore errors during cleanup\n }\n this.chromeBackend = null;\n }\n\n if (this.generator) {\n if (typeof (this.generator as any).dispose === \"function\") {\n try {\n await (this.generator as any).dispose();\n } catch {\n // Ignore errors during cleanup\n }\n }\n this.generator = null;\n }\n if (this.embedder) {\n if (typeof (this.embedder as any).dispose === \"function\") {\n try {\n await (this.embedder as any).dispose();\n } catch {\n // Ignore errors during cleanup\n }\n }\n this.embedder = null;\n }\n this.currentModel = null;\n this.modelConfig = null;\n }\n\n // ============================================\n // Private Methods\n // ============================================\n\n private formatPrompt(prompt: string, options: GenerateOptions): string {\n const system = options.system || \"You are a helpful assistant.\";\n const isQwen = this.currentModel?.includes(\"qwen\");\n\n if (options.thinking && this.modelConfig?.supportsThinking) {\n const thinkSystem = `${system}\\n\\nThink step-by-step before answering. Wrap your reasoning in <think></think> tags, then provide your answer.`;\n return `<|im_start|>system\\n${thinkSystem}<|im_end|>\\n<|im_start|>user\\n${prompt}<|im_end|>\\n<|im_start|>assistant\\n`;\n }\n\n if (isQwen) {\n return `<|im_start|>system\\n${system}<|im_end|>\\n<|im_start|>user\\n${prompt} /no_think<|im_end|>\\n<|im_start|>assistant\\n`;\n }\n\n return `<|im_start|>system\\n${system}<|im_end|>\\n<|im_start|>user\\n${prompt}<|im_end|>\\n<|im_start|>assistant\\n`;\n }\n\n private buildMessages(\n prompt: string,\n options: GenerateOptions,\n ): Array<{ role: string; content: string }> {\n const system = options.system || \"You are a helpful assistant.\";\n const messages: Array<{ role: string; content: string }> = [];\n\n // For direct model (WebGPU), enable_thinking is passed to apply_chat_template\n // so we don't need to add /no_think or modify the system prompt\n messages.push({ role: \"system\", content: system });\n messages.push({ role: \"user\", content: prompt });\n\n return messages;\n }\n\n private parseThinking(text: string): {\n thinking?: string;\n response: string;\n } {\n // Handle complete <think>...</think> blocks\n const match = text.match(/<think>([\\s\\S]*?)<\\/think>/);\n if (match) {\n const thinking = match[1].trim();\n const response = text.replace(/<think>[\\s\\S]*?<\\/think>/, \"\").trim();\n return { thinking, response };\n }\n\n // Handle unclosed <think> tags (model stopped mid-thought)\n const unclosedMatch = text.match(/<think>([\\s\\S]*)$/);\n if (unclosedMatch) {\n const thinking = unclosedMatch[1].trim();\n const response = text.replace(/<think>[\\s\\S]*$/, \"\").trim();\n return { thinking: thinking || undefined, response };\n }\n\n // Handle any remaining think tags\n const response = text.replace(/<\\/?think>/g, \"\").trim();\n return { response };\n }\n\n private cleanOutput(text: string): string {\n return (\n text\n .replace(/<\\|im_end\\|>/g, \"\")\n .replace(/<\\|im_start\\|>/g, \"\")\n .replace(/<\\|endoftext\\|>/g, \"\")\n .replace(/<\\/s>/g, \"\")\n // Clean up artifacts from direct model output\n .replace(/^\\/no_think\\s*/i, \"\")\n .replace(/^assistant\\s*/i, \"\")\n .replace(/^\\s*\\/no_think\\s*/gim, \"\")\n .replace(/^\\s*assistant\\s*/gim, \"\")\n // Clean up role markers that might appear\n .replace(/^(system|user|assistant):\\s*/gim, \"\")\n .trim()\n );\n }\n}\n\nexport default Gerbil;\n"],"mappings":";;;;;;;;AAeA,MAAMA,aAAWC;AAGjB,SAAS,sBAAyB,IAAkC;CAClE,MAAM,eAAe,QAAQ;AAC7B,SAAQ,QAAQ,GAAG,SAAgB;EACjC,MAAM,MAAM,KAAK,IAAI,YAAY,IAAI;AAErC,MAAI,IAAI,SAAS,iBAAiB,IAAI,IAAI,SAAS,sBAAsB,CACvE;AAEF,eAAa,MAAM,SAAS,KAAK;;AAGnC,QAAO,IAAI,CAAC,cAAc;AACxB,UAAQ,OAAO;GACf;;AAyBJ,MAAM,YAAY,OAAO,WAAW;AACpC,IAAI,mBAAmB,CAAC;AACxB,IAAI,kBAAkB;AAOtB,IAAI,oBAAoB;AACxB,IAAI,kBAAkB;;;;;AAMtB,eAAe,iBAAmC;AAChD,KAAI,kBACF,QAAO;AAET,qBAAoB;AAGpB,KAAI,OAAO,WAAW,aAAa;AACjC,oBAAkB,SAAS;AAC3B,SAAO;;AAKT,KAAI;EAGF,MAAM,EAAE,QAAQ,YADK,MADC,IAAI,SAAS,aAAa,2BAA2B,CAClC,SAAS;AAIlD,SAAO,OAAO,YAAY,QAAQ;AAGlC,MAAI,CAAE,WAAmB,UACvB,CAAC,WAAmB,YAAY,EAAE;AAEpC,EAAC,WAAmB,UAAU,MAAM,OAAO,EAAE,CAAC;AAE9C,oBAAkB;SACZ;AAEN,oBAAkB;;AAGpB,QAAO;;AAMT,IAAa,SAAb,MAAoB;CAClB,AAAQ,YAA2C;CACnD,AAAQ,YAAwC;CAChD,AAAQ,QAAa;CACrB,AAAQ,WAA6C;CACrD,AAAQ,eAA8B;CACtC,AAAQ,cAAkC;CAC1C,AAAiB;CACjB,AAAQ;CACR,AAAQ,YAAY;CACpB,AAAQ,gBAA6C;CACrD,AAAQ,cAAyC;CAEjD,YAAY,SAAuB,EAAE,EAAE;AACrC,OAAK,SAAS;AACd,OAAK,QAAQ;GACX,SAAS;GACT,UAAU;GACV,WAAW;GACX,UAAU;GACV,WAAW;GACX,WAAW;GACX,aAAa;GACd;;CAOH,OAAO,aAA4B;AACjC,SAAO,OAAO,OAAO,eAAe;;CAGtC,OAAO,SAAS,SAA0C;AACxD,SAAO,eAAe;;;;;;;;;;;;;;;;;CAsBxB,MAAM,UAAU,UAAU,cAAc,UAAuB,EAAE,EAAiB;AAEhF,QAAM,gBAAgB;EAEtB,MAAM,SAAS,aAAa,QAAQ;EACpC,MAAM,EAAE,YAAY,SAAS,QAAQ,OAAO,cAAc;EAG1D,IAAI,SAAS,eAAe,QAAQ;AACpC,MAAI,CAAC,OACH,UAAS,0BAA0B,SAAS,OAAO,KAAK;AAG1D,eAAa,EAAE,QAAQ,WAAW,QAAQ,MAAM,CAAC;EAKjD,MAAMC,cAAY,OAAO,WAAW;EACpC,MAAM,iBAAiBA,cAAY,SAAS;EAC5C,IAAIC,WAAsC;AAC1C,MAAI,WAAW,YAAY,WAAW,SAAS,WAAW,OACxD,YAAW;EAIb,MAAM,QAAQ,cAAc,aAAa,WAAW,UAAU;EAG9D,IAAI,YAAY;EAChB,IAAI,WAAW;EACf,IAAI,UAAU;EAEd,MAAM,oBAAoB,aAAkB;AAC1C,OAAI,CAAC,UACH;AAGF,OAAI,SAAS,WAAW,cAAc,SAAS,MAAM;IACnD,MAAM,MAAM,KAAK,MAAM,SAAS,YAAY,EAAE;AAE9C,QAAI,SAAS,SAAS,YAAY,OAAO,UAAU,GAAG;AACpD,gBAAW,SAAS;AACpB,eAAU;AACV,kBAAa;MACX,QAAQ,eAAe,SAAS;MAChC,UAAU;MACV,MAAM,SAAS;MAChB,CAAC;;;;AAKR,MAAI;AAGF,OAAID,eAAa,aAAa,UAAU;AACtC,iBAAa,EAAE,QAAQ,wBAAwB,CAAC;AAChD,SAAK,YAAa,MAAM,4BACtB,cAAc,gBAAgB,OAAO,MAAM,EACzC,mBAAmB,kBACpB,CAAC,CACH;AAED,iBAAa,EAAE,QAAQ,oBAAoB,CAAC;AAC5C,SAAK,QAAQ,MAAM,4BACjB,qBAAqB,gBAAgB,OAAO,MAAM;KAChD;KACA,QAAQ;KACR,mBAAmB;KACpB,CAAC,CACH;AAED,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,iBAAa,EAAE,QAAQ,mBAAmB,CAAC;cAClC,CAACA,eAAa,aAAa,UAAU;AAE9C,iBAAa,EAAE,QAAQ,qCAAqC,CAAC;IAG7D,MAAM,EAAE,qBAAqB,MAAM,OAAO;AAC1C,SAAK,gBAAgB,MAAM,iBAAiB,OAAO;KACjD,SAAS,OAAO;KAChB;KACD,CAAC;AAEF,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;UAEd;IAEL,MAAM,kBAAkB;KACtB;KACA,QAAQ;KACR,mBAAmB;KACpB;AACD,SAAK,YAAa,MAAM,4BACtBF,WAAS,mBAAmB,OAAO,MAAM,gBAAuB,CACjE;AAED,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,iBAAa,EAAE,QAAQ,UAAU,SAAS,aAAa,CAAC,KAAK,CAAC;;WAEzD,KAAK;AAEZ,OAAI,aAAa,gBAAgB;AAC/B,iBAAa,EAAE,QAAQ,SAAS,eAAe,aAAa,CAAC,MAAM,CAAC;AAGpE,QAAI,KAAK,eAAe;AACtB,WAAM,KAAK,cAAc,SAAS;AAClC,UAAK,gBAAgB;;AAIvB,SAAK,YAAa,MAAM,4BACtBA,WAAS,mBAAmB,OAAO,MAAM;KACvC,OAAO;KACP,QAAQ;KACR,mBAAmB;KACpB,CAAQ,CACV;AAED,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,iBAAa,EAAE,QAAQ,UAAU,eAAe,aAAa,CAAC,KAAK,CAAC;SAEpE,OAAM;;;;;;CAQZ,WAAoB;AAClB,SACE,KAAK,cAAc,QAClB,KAAK,aAAa,KAAK,UAAU,QAClC,KAAK,kBAAkB;;;;;CAO3B,eAAmC;AACjC,SAAO,KAAK;;;;;CAMd,gBAA2C;AACzC,SAAO,KAAK;;;;;CAMd,WAAmB;AAEjB,SAAO,KAAK,gBAAgB,WAAW,UAAU;;;;;CAMnD,kBAKS;AACP,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,WAAW;;;;;;CAOvC,MAAM,kBAA+E;AACnF,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,gBAAgB;;;;;CAM5C,MAAM,iBAA2F;AAC/F,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,gBAAgB;;;;;;CAO5C,MAAM,aAA4B;AAChC,MAAI,KAAK,cACP,OAAM,KAAK,cAAc,OAAO;;;;;;;CASpC,MAAM,sBAAsB,cAAc,GAAqB;AAC7D,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,sBAAsB,YAAY;;;;;CAU9D,MAAM,SAAS,QAAgB,UAA2B,EAAE,EAA2B;AACrF,MAAI,CAAC,KAAK,UAAU,CAElB,OAAM,KAAK,UAAU,KAAK,OAAO,SAAS,aAAa;EAGzD,MAAM,EACJ,YAAY,KACZ,cAAc,IACd,OAAO,IACP,OAAO,IACP,WAAW,OACX,WACE;EAEJ,MAAM,YAAY,YAAY,KAAK;AAEnC,MAAI;GACF,IAAI,UAAU;AAEd,OAAI,KAAK,cAEP,KAAI;AACF,cAAU,MAAM,KAAK,cAAc,SAAS,QAAQ;KAClD;KACA;KACA;KACA;KACA;KACA;KAEA,SAAS,QAAQ,WAAW,MAAM,QAAQ,QAAS,EAAE,KAAK,GAAG;KAC9D,CAAC;YACKI,WAAgB;AAEvB,QAAI,WAAW,YAAY,yBAAyB,CAAC,KAAK,eAAe,SAAS,EAAE;AAClF,WAAM,KAAK,eAAe,SAAS,CAAC,YAAY,GAAG;AACnD,UAAK,gBAAgB;AACrB,UAAK,cAAc;AAGnB,UAAK,YAAa,MAAMJ,WAAS,mBADf,KAAK,gBAAgB,cACwB;MAC7D,OAAO;MACP,QAAQ;MACT,CAAQ;AAET,YAAO,KAAK,SAAS,QAAQ,QAAQ;;AAEvC,UAAM;;YAEC,KAAK,aAAa,KAAK,SAAS,KAAK,WAAW;IAEzD,MAAM,WAAW,KAAK,cAAc,QAAQ;KAAE,GAAG;KAAS;KAAU,CAAC;IAErE,MAAM,SAAU,KAAK,UAAkB,oBAAoB,UAAU;KACnE,uBAAuB;KACvB,aAAa;KACb,iBAAiB;KAClB,CAAC;IAEF,MAAM,SAAS,MAAM,KAAK,MAAM,SAAS;KACvC,GAAG;KACH,gBAAgB;KAChB,aAAa,cAAc,IAAI,cAAc;KAC7C,OAAO;KACP,OAAO;KACP,WAAW,cAAc;KAC1B,CAAC;IAGF,MAAM,cAAc,OAAO,UAAU,OAAO,MAAM,OAAO,UAAU,MAAM,UAAU;IAGnF,MAAM,eAAe,OAAO,MAAM,MAAM,CAAC,aAAa,KAAK,CAAC;AAK5D,cAJgB,KAAK,UAAU,aAAa,cAAc,EACxD,qBAAqB,MACtB,CAAC,CAEgB,MAAM;AAGxB,QAAI,QAAQ,aAAa,CAAC,SAAS,YAAY,EAAE;KAC/C,MAAM,QAAQ,QAAQ,MAAM,4BAA4B;AACxD,SAAI,MACF,WAAU,MAAM,GAAG,MAAM;;cAGpB,KAAK,WAAW;IAEzB,MAAM,kBAAkB,KAAK,aAAa,QAAQ;KAAE,GAAG;KAAS;KAAU,CAAC;IAE3E,MAAM,SAAS,MAAM,KAAK,UAAU,iBAAiB;KACnD,gBAAgB;KAChB;KACA,OAAO;KACP,OAAO;KACP,WAAW,cAAc;KACzB,kBAAkB;KACnB,CAAC;AAGF,QAAI,MAAM,QAAQ,OAAO,IAAI,OAAO,IAAI;KACtC,MAAM,SAAS,OAAO;AACtB,SAAI,MAAM,QAAQ,OAAO,eAAe,CAEtC,WADa,OAAO,eAAe,GAAG,GAAG,EACzB,WAAW;SAE3B,WAAU,OAAO,kBAAkB;;SAIvC,OAAM,IAAI,MAAM,kBAAkB;GAIpC,MAAM,YADU,YAAY,KAAK,GACL;AAE5B,aAAU,KAAK,YAAY,QAAQ;GAInC,MAAM,EAAE,UAAU,cAAc,aAAa,KAAK,cAAc,QAAQ;GAGxE,MAAM,gBAAgB,WAAW,eAAe;GAEhD,MAAM,kBAAkB,KAAK,KAAK,SAAS,SAAS,EAAE;AAGtD,QAAK,MAAM;AACX,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,WAAY,KAAK,MAAM,YAAY,KAAK,MAAM,YAAa;AAEtE,UAAO;IACL,MAAM;IACN,UAAU;IACV;IACA,iBAAkB,kBAAkB,YAAa;IACjD;IACA,cAAc;IACd,UAAU;IACV,QAAQ;IACT;WACM,QAAQ;AACf,UAAO;IACL,MAAM;IACN,iBAAiB;IACjB,iBAAiB;IACjB,WAAW,YAAY,KAAK,GAAG;IAC/B,cAAc;IACd,UAAU;IACV,QAAQ;IACT;;;;;;;;;CAUL,OAAO,OACL,QACA,UAA2B,EAAE,EACoB;AACjD,MAAI,CAAC,KAAK,UAAU,CAClB,OAAM,KAAK,UAAU,KAAK,OAAO,SAAS,aAAa;EAGzD,MAAM,YAAY,YAAY,KAAK;AAGnC,MAAI,KAAK,eAAe;GACtB,IAAI,WAAW;GACf,MAAMK,aAAuB,EAAE;GAC/B,IAAIC,cAAuD;GAC3D,IAAI,OAAO;GAGX,MAAM,kBAAkB,KAAK,cAC1B,SAAS,QAAQ;IAChB,GAAG;IACH,UAAU,UAAU;AAClB,iBAAY,MAAM;AAClB,SAAI,aAAa;AACf,kBAAY,MAAM,KAAK;AACvB,oBAAc;WAEd,YAAW,KAAK,MAAM,KAAK;;IAGhC,CAAC,CACD,WAAW;AACV,WAAO;AACP,QAAI,YACF,aAAY,KAAK;KAEnB,CACD,OAAO,QAAQ;AACd,WAAO;AACP,QAAI,YACF,aAAY,KAAK;AAEnB,UAAM;KACN;AAGJ,UAAO,CAAC,QAAQ,WAAW,SAAS,EAClC,KAAI,WAAW,SAAS,GAAG;IACzB,MAAM,QAAQ,WAAW,OAAO;AAChC,UAAM;AACN,YAAQ,UAAU,MAAM;cACf,CAAC,MAAM;IAChB,MAAM,QAAQ,MAAM,IAAI,SAAwB,YAAY;AAC1D,mBAAc;MACd;AACF,QAAI,OAAO;AACT,WAAM;AACN,aAAQ,UAAU,MAAM;;;AAK9B,SAAM;GAEN,MAAM,EAAE,UAAU,cAAc,aAAa,KAAK,cAAc,SAAS;GACzE,MAAM,kBAAkB,KAAK,KAAK,SAAS,SAAS,EAAE;GACtD,MAAM,YAAY,YAAY,KAAK,GAAG;AAEtC,UAAO;IACL,MAAM;IACN,UAAU,QAAQ,WAAW,eAAe;IAC5C;IACA;IACA,iBAAkB,kBAAkB,YAAa;IACjD,cAAc;IACf;;EAIH,MAAM,SAAS,MAAM,KAAK,YAAY,QAAQ,QAAQ;EAItD,MAAM,QAAQ,OAAO,QAAQ,MAAM,QAAQ;AAC3C,OAAK,MAAM,QAAQ,MACjB,KAAI,MAAM;AACR,SAAM;AACN,WAAQ,UAAU,KAAK;;AAI3B,SAAO,OAAO;;;;;CAMhB,MAAc,YACZ,QACA,UAA2B,EAAE,EACyB;EACtD,MAAM,EAAE,YAAY,KAAK,cAAc,IAAK,OAAO,IAAK,OAAO,IAAI,WAAW,UAAU;EAExF,MAAM,YAAY,YAAY,KAAK;EACnC,MAAM,kBAAkB,KAAK,aAAa,QAAQ;GAAE,GAAG;GAAS;GAAU,CAAC;AAE3E,MAAI;GACF,MAAM,SAAS,MAAM,KAAK,UAAW,iBAAiB;IACpD,gBAAgB;IAChB;IACA,OAAO;IACP,OAAO;IACP,WAAW,cAAc;IACzB,kBAAkB;IACnB,CAAC;GAGF,MAAM,YADU,YAAY,KAAK,GACL;GAG5B,IAAI,UAAU;AACd,OAAI,MAAM,QAAQ,OAAO,IAAI,OAAO,IAAI;IACtC,MAAM,SAAS,OAAO;AACtB,QAAI,MAAM,QAAQ,OAAO,eAAe,CAEtC,WADa,OAAO,eAAe,GAAG,GAAG,EACzB,WAAW;QAE3B,WAAU,OAAO,kBAAkB;;AAIvC,aAAU,KAAK,YAAY,QAAQ;GACnC,MAAM,EAAE,UAAU,cAAc,aAAa,KAAK,cAAc,QAAQ;GACxE,MAAM,gBAAgB,WAAW,eAAe;GAChD,MAAM,kBAAkB,KAAK,KAAK,SAAS,SAAS,EAAE;AAGtD,QAAK,MAAM;AACX,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,WAAY,KAAK,MAAM,YAAY,KAAK,MAAM,YAAa;AAEtE,UAAO;IACL;IACA,QAAQ;KACN,MAAM;KACN,UAAU;KACV;KACA,iBAAkB,kBAAkB,YAAa;KACjD;KACA,cAAc;KACd,UAAU;KACV,QAAQ;KACT;IACF;WACM,QAAQ;AACf,UAAO;IACL,SAAS;IACT,QAAQ;KACN,MAAM;KACN,iBAAiB;KACjB,iBAAiB;KACjB,WAAW,YAAY,KAAK,GAAG;KAC/B,cAAc;KACd,UAAU;KACV,QAAQ;KACT;IACF;;;;;;CAWL,MAAM,KAAQ,QAAgB,SAAqC;EACjE,MAAM,EAAE,QAAQ,UAAU,GAAG,cAAc,OAAQ;EAEnD,MAAM,eAAe;;wCAEe,KAAK,UAAU,gBAAgB,OAAO,CAAC;AAE3E,OAAK,IAAI,UAAU,GAAG,UAAU,SAAS,WAAW;GAClD,MAAM,SAAS,MAAM,KAAK,SAAS,QAAQ;IACzC,QAAQ,QAAQ,UAAU;IAC1B;IACA,WAAW;IACZ,CAAC;AAEF,OAAI;IAEF,MAAM,UAAU,YAAY,OAAO,KAAK;IACxC,MAAM,SAAS,KAAK,MAAM,QAAQ;AAElC,WADkB,OAAO,MAAM,OAAO;YAE/B,OAAO;AACd,QAAI,YAAY,UAAU,EACxB,OAAM,IAAI,MAAM,uCAAuC,QAAQ,aAAa,QAAQ;;;AAK1F,QAAM,IAAI,MAAM,gCAAgC;;;;;CAUlD,MAAM,MAAM,MAAc,UAAwB,EAAE,EAAwB;AAC1E,MAAI,CAAC,KAAK,SAGR,MAAK,WAAY,MAAMN,WAAS,sBADlB,QAAQ,SAAS,0BAC6B;EAG9D,MAAM,YAAY,YAAY,KAAK;EACnC,MAAM,SAAS,MAAM,KAAK,SAAS,MAAM;GACvC,SAAS;GACT,WAAW,QAAQ,cAAc;GAClC,CAAC;AAIF,SAAO;GACL,QAHa,MAAM,KAAK,OAAO,KAAqB;GAIpD;GACA,WAAW,YAAY,KAAK,GAAG;GAChC;;;;;CAMH,MAAM,WAAW,OAAiB,UAAwB,EAAE,EAA0B;EACpF,MAAMO,UAAyB,EAAE;AACjC,OAAK,MAAM,QAAQ,MACjB,SAAQ,KAAK,MAAM,KAAK,MAAM,MAAM,QAAQ,CAAC;AAE/C,SAAO;;;;;CAUT,WAAyB;AACvB,SAAO,EAAE,GAAG,KAAK,OAAO;;;;;CAM1B,UAAsB;AACpB,SAAO;GACL,SAAS;GACT,OAAO,KAAK;GACZ,QAAQ;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,QAAQ,KAAK,UAAU,GAAG,UAAU;IACrC;GACD,SAAS;IACP,KAAK,KAAK,aAAa,iBAAiB;IACxC,MAAM;IACN,WAAW,KAAK,aAAa,iBAAiB;IAC/C;GACD,OAAO;IACL,UAAU;IACV,MAAM;IACN,YAAY;IACb;GACF;;;;;CAMH,aAAmB;AACjB,OAAK,QAAQ;GACX,SAAS;GACT,UAAU;GACV,WAAW;GACX,UAAU;GACV,WAAW;GACX,WAAW;GACX,aAAa;GACd;;;;;CAUH,MAAM,UAAyB;AAE7B,MAAI,KAAK,eAAe;AACtB,OAAI;AACF,UAAM,KAAK,cAAc,SAAS;WAC5B;AAGR,QAAK,gBAAgB;;AAGvB,MAAI,KAAK,WAAW;AAClB,OAAI,OAAQ,KAAK,UAAkB,YAAY,WAC7C,KAAI;AACF,UAAO,KAAK,UAAkB,SAAS;WACjC;AAIV,QAAK,YAAY;;AAEnB,MAAI,KAAK,UAAU;AACjB,OAAI,OAAQ,KAAK,SAAiB,YAAY,WAC5C,KAAI;AACF,UAAO,KAAK,SAAiB,SAAS;WAChC;AAIV,QAAK,WAAW;;AAElB,OAAK,eAAe;AACpB,OAAK,cAAc;;CAOrB,AAAQ,aAAa,QAAgB,SAAkC;EACrE,MAAM,SAAS,QAAQ,UAAU;EACjC,MAAM,SAAS,KAAK,cAAc,SAAS,OAAO;AAElD,MAAI,QAAQ,YAAY,KAAK,aAAa,iBAExC,QAAO,uBADa,GAAG,OAAO,iHACY,gCAAgC,OAAO;AAGnF,MAAI,OACF,QAAO,uBAAuB,OAAO,gCAAgC,OAAO;AAG9E,SAAO,uBAAuB,OAAO,gCAAgC,OAAO;;CAG9E,AAAQ,cACN,QACA,SAC0C;EAC1C,MAAM,SAAS,QAAQ,UAAU;EACjC,MAAMC,WAAqD,EAAE;AAI7D,WAAS,KAAK;GAAE,MAAM;GAAU,SAAS;GAAQ,CAAC;AAClD,WAAS,KAAK;GAAE,MAAM;GAAQ,SAAS;GAAQ,CAAC;AAEhD,SAAO;;CAGT,AAAQ,cAAc,MAGpB;EAEA,MAAM,QAAQ,KAAK,MAAM,6BAA6B;AACtD,MAAI,MAGF,QAAO;GAAE,UAFQ,MAAM,GAAG,MAAM;GAEb,UADF,KAAK,QAAQ,4BAA4B,GAAG,CAAC,MAAM;GACvC;EAI/B,MAAM,gBAAgB,KAAK,MAAM,oBAAoB;AACrD,MAAI,eAAe;GACjB,MAAM,WAAW,cAAc,GAAG,MAAM;GACxC,MAAM,WAAW,KAAK,QAAQ,mBAAmB,GAAG,CAAC,MAAM;AAC3D,UAAO;IAAE,UAAU,YAAY;IAAW;IAAU;;AAKtD,SAAO,EAAE,UADQ,KAAK,QAAQ,eAAe,GAAG,CAAC,MAAM,EACpC;;CAGrB,AAAQ,YAAY,MAAsB;AACxC,SACE,KACG,QAAQ,iBAAiB,GAAG,CAC5B,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,oBAAoB,GAAG,CAC/B,QAAQ,UAAU,GAAG,CAErB,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,kBAAkB,GAAG,CAC7B,QAAQ,wBAAwB,GAAG,CACnC,QAAQ,uBAAuB,GAAG,CAElC,QAAQ,mCAAmC,GAAG,CAC9C,MAAM"}
@@ -0,0 +1,5 @@
1
+ import "./models-DKULvhOr.mjs";
2
+ import "./utils-7vXqtq2Q.mjs";
3
+ import { t as Gerbil } from "./gerbil-BfnsFWRE.mjs";
4
+
5
+ export { Gerbil };
@@ -0,0 +1,138 @@
1
+ import { _ as SystemInfo, a as GenerateOptions, d as LoadOptions, f as ModelConfig, g as SessionStats, n as EmbedOptions, o as GenerateResult, r as EmbedResult, s as GerbilConfig, u as JsonOptions } from "./types-BS1N92Jt.mjs";
2
+
3
+ //#region src/core/gerbil.d.ts
4
+
5
+ declare class Gerbil {
6
+ private generator;
7
+ private tokenizer;
8
+ private model;
9
+ private embedder;
10
+ private currentModel;
11
+ private modelConfig;
12
+ private readonly config;
13
+ private stats;
14
+ private useDirect;
15
+ private chromeBackend;
16
+ private _deviceMode;
17
+ constructor(config?: GerbilConfig);
18
+ static listModels(): ModelConfig[];
19
+ static getModel(modelId: string): ModelConfig | undefined;
20
+ /**
21
+ * Load a model
22
+ *
23
+ * @example
24
+ * ```ts
25
+ * // Built-in model
26
+ * await g.loadModel("qwen3-0.6b");
27
+ *
28
+ * // HuggingFace model
29
+ * await g.loadModel("hf:microsoft/Phi-3-mini");
30
+ *
31
+ * // Local model
32
+ * await g.loadModel("file:./models/my-model");
33
+ * ```
34
+ */
35
+ loadModel(modelId?: string, options?: LoadOptions): Promise<void>;
36
+ /**
37
+ * Check if a model is loaded
38
+ */
39
+ isLoaded(): boolean;
40
+ /**
41
+ * Get current model info
42
+ */
43
+ getModelInfo(): ModelConfig | null;
44
+ /**
45
+ * Get current device mode (webgpu, cpu, or wasm)
46
+ */
47
+ getDeviceMode(): "webgpu" | "cpu" | "wasm";
48
+ /**
49
+ * Get dtype used for current model
50
+ */
51
+ getDtype(): string;
52
+ /**
53
+ * Get Chrome backend status (if using WebGPU via Chrome)
54
+ */
55
+ getChromeStatus(): {
56
+ pid: number | null;
57
+ port: number;
58
+ modelId: string;
59
+ startedAt: Date | null;
60
+ } | null;
61
+ /**
62
+ * Get Chrome memory usage (if using WebGPU via Chrome)
63
+ * Returns JS heap memory in bytes
64
+ */
65
+ getChromeMemory(): Promise<{
66
+ jsHeapUsed: number;
67
+ jsHeapTotal: number;
68
+ } | null>;
69
+ /**
70
+ * Get memory usage in GB (if using WebGPU via Chrome)
71
+ */
72
+ getMemoryUsage(): Promise<{
73
+ usedGB: number;
74
+ totalGB: number;
75
+ usedPercent: number;
76
+ } | null>;
77
+ /**
78
+ * Clear KV cache to free memory
79
+ * This will reset the conversation context but free up memory
80
+ */
81
+ clearCache(): Promise<void>;
82
+ /**
83
+ * Check memory usage and cleanup if needed
84
+ * @param thresholdGB Memory threshold in GB (default: 8)
85
+ * @returns true if cleanup was performed
86
+ */
87
+ checkMemoryAndCleanup(thresholdGB?: number): Promise<boolean>;
88
+ /**
89
+ * Generate text
90
+ */
91
+ generate(prompt: string, options?: GenerateOptions): Promise<GenerateResult>;
92
+ /**
93
+ * Stream text generation (simulated token-by-token)
94
+ *
95
+ * Note: Yields the raw output including <think> tags if thinking mode is enabled.
96
+ * The final result has parsed thinking separated out.
97
+ */
98
+ stream(prompt: string, options?: GenerateOptions): AsyncGenerator<string, GenerateResult, unknown>;
99
+ /**
100
+ * Internal: Generate with raw text access for streaming
101
+ */
102
+ private generateRaw;
103
+ /**
104
+ * Generate structured JSON output
105
+ */
106
+ json<T>(prompt: string, options: JsonOptions<T>): Promise<T>;
107
+ /**
108
+ * Generate embeddings
109
+ */
110
+ embed(text: string, options?: EmbedOptions): Promise<EmbedResult>;
111
+ /**
112
+ * Generate embeddings for multiple texts
113
+ */
114
+ embedBatch(texts: string[], options?: EmbedOptions): Promise<EmbedResult[]>;
115
+ /**
116
+ * Get session stats
117
+ */
118
+ getStats(): SessionStats;
119
+ /**
120
+ * Get system info
121
+ */
122
+ getInfo(): SystemInfo;
123
+ /**
124
+ * Reset stats
125
+ */
126
+ resetStats(): void;
127
+ /**
128
+ * Dispose of resources
129
+ */
130
+ dispose(): Promise<void>;
131
+ private formatPrompt;
132
+ private buildMessages;
133
+ private parseThinking;
134
+ private cleanOutput;
135
+ }
136
+ //#endregion
137
+ export { Gerbil as t };
138
+ //# sourceMappingURL=gerbil-DZ1k3ChC.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gerbil-DZ1k3ChC.d.mts","names":[],"sources":["../src/core/gerbil.ts"],"sourcesContent":[],"mappings":";;;;AA6HsB,cAbT,MAAA,CAaS;EAiBC,QAAA,SAAA;EAIa,QAAA,SAAA;EAuBe,QAAA,KAAA;EAAmB,QAAA,QAAA;EAgKpD,QAAA,YAAA;EA0BH,QAAA,WAAA;EAYY,iBAAA,MAAA;EAUD,QAAA,KAAA;EAWJ,QAAA,SAAA;EAW0B,QAAA,aAAA;EAcN,QAAA,WAAA;EAA+B,WAAA,CAAA,MAAA,CAAA,EAhSnD,YAgSmD;EAAR,OAAA,UAAA,CAAA,CAAA,EA/Q1C,WA+Q0C,EAAA;EAqKpD,OAAA,QAAA,CAAA,OAAA,EAAA,MAAA,CAAA,EAhbuB,WAgbvB,GAAA,SAAA;EACe;;;;;;;;;;;;;;;EAgTF,SAAA,CAAA,OAAA,CAAA,EAAA,MAAA,EAAA,OAAA,CAAA,EA1sByB,WA0sBzB,CAAA,EA1sB4C,OA0sB5C,CAAA,IAAA,CAAA;;;;;;;;kBA1iBR;;;;;;;;;;;;;;;;eA0BH;;;;;;qBAYY;;;;;;;oBAUD;;;;;;;;;gBAWJ;;;;;;+CAW0B;;;;qCAcN,kBAAuB,QAAQ;;;;;;;mCAqK5D,kBACR,uBAAuB;;;;;;;;mCA+Ka,YAAY,KAAK,QAAQ;;;;gCAqC7B,eAAoB,QAAQ;;;;wCAyBpB,eAAoB,QAAQ;;;;cAe3D;;;;aAOD;;;;;;;;aA6CM"}
@@ -0,0 +1,223 @@
1
+ import { _ as SystemInfo, a as GenerateOptions, c as GerbilModelSettings, d as LoadOptions, f as ModelConfig, g as SessionStats, h as ProgressInfo, i as FallbackConfig, l as GerbilProviderSettings, m as ModelStats, n as EmbedOptions, o as GenerateResult, p as ModelSource, r as EmbedResult, s as GerbilConfig, t as CacheConfig, u as JsonOptions } from "./types-BS1N92Jt.mjs";
2
+ import { n as listBuiltinModels, r as resolveModel, t as BUILTIN_MODELS } from "./models-De2-_GmQ.mjs";
3
+ import { t as Gerbil } from "./gerbil-DZ1k3ChC.mjs";
4
+ import { Server } from "http";
5
+
6
+ //#region src/core/chrome-backend.d.ts
7
+
8
+ type CachedModelEntry = {
9
+ modelId: string;
10
+ downloadedAt: string;
11
+ lastUsed: string;
12
+ sizeBytes?: number;
13
+ };
14
+ /** Get list of models cached in Chrome's IndexedDB */
15
+ declare function getChromeCachedModels(): CachedModelEntry[];
16
+ /** Refresh sizes for cached models that don't have them */
17
+ declare function refreshCachedModelSizes(): Promise<void>;
18
+ type ChromeBackendOptions = {
19
+ /** Custom Chrome executable path */
20
+ chromePath?: string;
21
+ /** Model ID to load */
22
+ modelId?: string;
23
+ /** Progress callback */
24
+ onProgress?: (info: {
25
+ status: string;
26
+ progress?: number;
27
+ file?: string;
28
+ }) => void;
29
+ /** Token callback for streaming */
30
+ onToken?: (token: {
31
+ text: string;
32
+ state: string;
33
+ numTokens: number;
34
+ tps: number;
35
+ }) => void;
36
+ };
37
+ type GenerateOptions$1 = {
38
+ maxTokens?: number;
39
+ temperature?: number;
40
+ topP?: number;
41
+ topK?: number;
42
+ thinking?: boolean;
43
+ system?: string;
44
+ onToken?: (token: {
45
+ text: string;
46
+ state: string;
47
+ numTokens: number;
48
+ tps: number;
49
+ }) => void;
50
+ };
51
+ declare class ChromeGPUBackend {
52
+ private browser;
53
+ private page;
54
+ private cdp;
55
+ private serverPort;
56
+ private userDataDir;
57
+ private readonly modelId;
58
+ private isReady;
59
+ private readonly messageHandlers;
60
+ private pendingRejects;
61
+ server: Server | null;
62
+ private constructor();
63
+ /**
64
+ * Create and initialize a Chrome GPU backend
65
+ */
66
+ static create(options?: ChromeBackendOptions): Promise<ChromeGPUBackend>;
67
+ /**
68
+ * Get existing browser or launch a new one (singleton pattern)
69
+ * Multiple Gerbil instances share the same browser process
70
+ */
71
+ private getOrCreateBrowser;
72
+ /**
73
+ * Launch a new Chrome browser instance
74
+ */
75
+ private launchBrowser;
76
+ /**
77
+ * Launch Chrome and initialize the worker page
78
+ */
79
+ private launch;
80
+ /**
81
+ * Handle incoming messages from the page
82
+ */
83
+ private handleMessage;
84
+ /**
85
+ * Wait for a specific message type
86
+ */
87
+ private waitForMessage;
88
+ /**
89
+ * Check if Chrome backend is still alive
90
+ */
91
+ isAlive(): boolean;
92
+ /**
93
+ * Get Chrome backend status information
94
+ */
95
+ getStatus(): {
96
+ pid: number | null;
97
+ port: number;
98
+ modelId: string;
99
+ startedAt: Date | null;
100
+ };
101
+ /**
102
+ * Get Chrome memory usage via CDP Performance metrics
103
+ * Returns memory in bytes or null if unavailable
104
+ */
105
+ getMemoryUsage(): Promise<{
106
+ jsHeapUsed: number;
107
+ jsHeapTotal: number;
108
+ } | null>;
109
+ /**
110
+ * Check memory usage and auto-cleanup if threshold exceeded
111
+ * @param thresholdGB Memory threshold in GB (default: 8)
112
+ * @returns true if cleanup was performed
113
+ */
114
+ checkMemoryAndCleanup(thresholdGB?: number): Promise<boolean>;
115
+ /**
116
+ * Get memory usage in a human-readable format
117
+ */
118
+ getMemoryStats(): Promise<{
119
+ usedGB: number;
120
+ totalGB: number;
121
+ usedPercent: number;
122
+ } | null>;
123
+ /**
124
+ * Generate text with streaming
125
+ */
126
+ generate(prompt: string, options?: GenerateOptions$1): Promise<string>;
127
+ /**
128
+ * Interrupt current generation
129
+ */
130
+ interrupt(): Promise<void>;
131
+ /**
132
+ * Reset conversation cache
133
+ */
134
+ reset(): Promise<void>;
135
+ /**
136
+ * Check if backend is ready
137
+ */
138
+ ready(): boolean;
139
+ /**
140
+ * Start or reuse the global HTTP server
141
+ * Uses singleton pattern to prevent killing our own server
142
+ */
143
+ private startServer;
144
+ /**
145
+ * Dispose of the backend and clean up
146
+ * Note: We keep the shared browser running for other backends
147
+ */
148
+ dispose(): Promise<void>;
149
+ /**
150
+ * Reject all pending waits (called on browser disconnect or dispose)
151
+ */
152
+ private rejectPendingWaits;
153
+ /**
154
+ * Clear the model cache (forces re-download on next start)
155
+ */
156
+ static clearCache(): void;
157
+ /**
158
+ * Get the number of active Chrome pages
159
+ */
160
+ static getActivePageCount(): number;
161
+ /**
162
+ * Get memory usage info for all active pages
163
+ */
164
+ static getMemoryInfo(): {
165
+ activePagesCount: number;
166
+ maxPages: number;
167
+ };
168
+ /**
169
+ * Gracefully close the shared browser (call on process exit)
170
+ */
171
+ static closeSharedBrowser(): Promise<void>;
172
+ }
173
+ //#endregion
174
+ //#region src/core/one-liner.d.ts
175
+ interface GerbilOptions extends GenerateOptions {
176
+ model?: string;
177
+ }
178
+ /**
179
+ * Generate text with zero setup
180
+ *
181
+ * @example
182
+ * ```ts
183
+ * const text = await gerbil("Write a haiku");
184
+ * const text = await gerbil("Explain this", { model: "qwen3-0.6b", thinking: true });
185
+ * ```
186
+ */
187
+ declare function gerbil(prompt: string, options?: GerbilOptions): Promise<string>;
188
+ /**
189
+ * Generate text (returns full result)
190
+ */
191
+ declare function generate(prompt: string, options?: GerbilOptions): Promise<GenerateResult>;
192
+ /**
193
+ * Stream text generation
194
+ */
195
+ declare function stream(prompt: string, options?: GerbilOptions): AsyncGenerator<string, GenerateResult, unknown>;
196
+ /**
197
+ * Generate structured JSON
198
+ */
199
+ declare function json<T>(prompt: string, options: JsonOptions<T> & {
200
+ model?: string;
201
+ }): Promise<T>;
202
+ /**
203
+ * Generate embeddings
204
+ */
205
+ declare function embed(text: string, options?: EmbedOptions & {
206
+ model?: string;
207
+ }): Promise<EmbedResult>;
208
+ /**
209
+ * Generate embeddings for multiple texts
210
+ */
211
+ declare function embedBatch(texts: string[], options?: EmbedOptions & {
212
+ model?: string;
213
+ }): Promise<EmbedResult[]>;
214
+ /**
215
+ * Dispose singleton instance
216
+ */
217
+ declare function dispose(): Promise<void>;
218
+ //#endregion
219
+ //#region src/index.d.ts
220
+ declare const VERSION = "0.1.0";
221
+ //#endregion
222
+ export { BUILTIN_MODELS, type CacheConfig, ChromeGPUBackend, type EmbedOptions, type EmbedResult, type FallbackConfig, type GenerateOptions, type GenerateResult, Gerbil, type GerbilConfig, type GerbilModelSettings, type GerbilProviderSettings, type JsonOptions, type LoadOptions, type ModelConfig, type ModelSource, type ModelStats, type ProgressInfo, type SessionStats, type SystemInfo, VERSION, gerbil as default, dispose, embed, embedBatch, generate, getChromeCachedModels, json, listBuiltinModels, refreshCachedModelSizes, resolveModel, stream };
223
+ //# sourceMappingURL=index.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/core/chrome-backend.ts","../src/core/one-liner.ts","../src/index.ts"],"sourcesContent":[],"mappings":";;;;;;;AA+BA,KARK,gBAAA,GAQW;EAuGM,OAAA,EAAA,MAAA;EAgDV,YAAA,EAAA,MAAA;EAWA,QAAA,EAAA,MAAA;EAuTC,SAAA,CAAA,EAAA,MAAA;CAWI;;AASkD,iBA7enD,qBAAA,CAAA,CA6emD,EA7e1B,gBA6e0B,EAAA;;AA+XzB,iBArwBpB,uBAAA,CAAA,CAqwBoB,EArwBO,OAqwBP,CAAA,IAAA,CAAA;AAAuB,KArtBrD,oBAAA,GAqtBqD;EAqD5C;EASJ,UAAA,CAAA,EAAA,MAAA;EA0DE;EAqEkB,OAAA,CAAA,EAAA,MAAA;EAAO;;;;IC9gClC,IAAA,CAAA,EAAA,MAAc;EAaT,CAAA,EAAA,GAAA,IAAM;EAcC;EAEX,OAAA,CAAA,EAAA,CAAA,KAAA,EAAA;IACA,IAAA,EAAA,MAAA;IAAR,KAAA,EAAA,MAAA;IAAO,SAAA,EAAA,MAAA;IASa,GAAA,EAAM,MAAA;EAElB,CAAA,EAAA,GAAA,IAAA;CACe;AAAvB,KD6FS,iBAAA,GC7FT;EAAc,SAAA,CAAA,EAAA,MAAA;EASK,WAAI,CAAA,EAAA,MAAA;EAEH,IAAA,CAAA,EAAA,MAAA;EAAZ,IAAA,CAAA,EAAA,MAAA;EACA,QAAA,CAAA,EAAA,OAAA;EAAR,MAAA,CAAA,EAAA,MAAA;EAAO,OAAA,CAAA,EAAA,CAAA,KAAA,EAAA;IASY,IAAK,EAAA,MAAA;IAEhB,KAAA,EAAA,MAAA;IACA,SAAA,EAAA,MAAA;IAAR,GAAA,EAAA,MAAA;EAAO,CAAA,EAAA,GAAA,IAAA;AASV,CAAA;AAEW,cDiXE,gBAAA,CCjXF;EACA,QAAA,OAAA;EAAR,QAAA,IAAA;EAAO,QAAA,GAAA;EASY,QAAA,UAAO;;;;EC1DhB,iBAAO,eAAA;;UF4aH;;;;;0BASc,uBAA4B,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;eAiSpD;;;;;;oBAoBW;;;;;;;;;+CA8BsB;;;;oBAwBtB;;;;;;;;qCAoBgB,oBAAuB;;;;eAqD5C;;;;WASJ;;;;;;;;;;;;;;aA0DE;;;;;;;;;;;;;;;;;;;;;;;+BAqEkB;;;;UC9gC3B,aAAA,SAAsB,eDkdD,CAAA;EAAoC,KAAA,CAAA,EAAA,MAAA;;;;;;;;;;;iBCrcpD,MAAA,CDigCsB,MAAA,EAAA,MAAA,EAAA,OAAA,CAAA,ECjgCU,aDigCV,CAAA,ECjgC+B,ODigC/B,CAAA,MAAA,CAAA;;;;iBCn/Bf,QAAA,2BAEX,gBACR,QAAQ;AA/DwC;AAiCJ;AA2B/C;AAEW,iBAUY,MAAA,CAVZ,MAAA,EAAA,MAAA,EAAA,OAAA,CAAA,EAYA,aAZA,CAAA,EAaR,cAbQ,CAAA,MAAA,EAae,cAbf,EAAA,OAAA,CAAA;;;;AAUY,iBAYD,IAZO,CAAA,CAAA,CAAA,CAAA,MAAA,EAAA,MAAA,EAAA,OAAA,EAclB,WAdkB,CAcN,CAdM,CAAA,GAAA;EAElB,KAAA,CAAA,EAAA,MAAA;CACe,CAAA,EAYvB,OAZuB,CAYf,CAZe,CAAA;;;AAS1B;AAEuB,iBAUD,KAAA,CAVC,IAAA,EAAA,MAAA,EAAA,OACZ,CADY,EAYZ,YAZY,GAAA;EAAZ,KAAA,CAAA,EAAA,MAAA;CACA,CAAA,EAYR,OAZQ,CAYA,WAZA,CAAA;;;AASX;AAEW,iBAUW,UAAA,CAVX,KAAA,EAAA,MAAA,EAAA,EAAA,OACR,CADQ,EAYA,YAZA,GAAA;EACA,KAAA,CAAA,EAAA,MAAA;CAAR,CAAA,EAYA,OAZA,CAYQ,WAZR,EAAA,CAAA;;AASH;;AAGW,iBASW,OAAA,CAAA,CATX,EASsB,OATtB,CAAA,IAAA,CAAA;;;AAxBD,cCzBG,OAAA,GDyBH,OAAA"}
package/dist/index.mjs ADDED
@@ -0,0 +1,13 @@
1
+ import { n as getChromeCachedModels, r as refreshCachedModelSizes, t as ChromeGPUBackend } from "./chrome-backend-C5Un08O4.mjs";
2
+ import { a as resolveModel, i as listBuiltinModels, t as BUILTIN_MODELS } from "./models-DKULvhOr.mjs";
3
+ import "./utils-7vXqtq2Q.mjs";
4
+ import { t as Gerbil } from "./gerbil-BfnsFWRE.mjs";
5
+ import { c as stream, i as generate, n as embed, o as json, r as embedBatch, s as one_liner_default, t as dispose } from "./one-liner-BUQR0nqq.mjs";
6
+
7
+ //#region src/index.ts
8
+ var src_default = one_liner_default;
9
+ const VERSION = "0.1.0";
10
+
11
+ //#endregion
12
+ export { BUILTIN_MODELS, ChromeGPUBackend, Gerbil, VERSION, src_default as default, dispose, embed, embedBatch, generate, getChromeCachedModels, json, listBuiltinModels, refreshCachedModelSizes, resolveModel, stream };
13
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.mjs","names":["gerbil"],"sources":["../src/index.ts"],"sourcesContent":["/**\n * Gerbil - Local LLM inference for Node.js\n *\n * @example Standalone\n * ```ts\n * import { Gerbil } from \"gerbil\";\n *\n * const g = new Gerbil();\n * await g.loadModel(\"qwen3-0.6b\");\n *\n * const result = await g.generate(\"Write a haiku\");\n * console.log(result.text);\n * ```\n *\n * @example One-liner\n * ```ts\n * import gerbil from \"gerbil\";\n *\n * const text = await gerbil(\"Write a haiku\");\n * ```\n *\n * @example AI SDK\n * ```ts\n * import { generateText } from \"ai\";\n * import { gerbil } from \"gerbil/ai\";\n *\n * const { text } = await generateText({\n * model: gerbil(\"qwen3-0.6b\"),\n * prompt: \"Write a haiku\",\n * });\n * ```\n *\n * @example Skills\n * ```ts\n * import { commit, summarize, explain, defineSkill } from \"gerbil/skills\";\n *\n * const msg = await commit({ type: \"conventional\" });\n * const summary = await summarize({ content: document });\n * ```\n */\n\nexport {\n ChromeGPUBackend,\n getChromeCachedModels,\n refreshCachedModelSizes,\n} from \"./core/chrome-backend.js\";\n// Core\nexport { Gerbil } from \"./core/gerbil.js\";\nexport { BUILTIN_MODELS, listBuiltinModels, resolveModel } from \"./core/models.js\";\n\n// One-liner API\nexport { dispose, embed, embedBatch, generate, json, stream } from \"./core/one-liner.js\";\n\nimport gerbil from \"./core/one-liner.js\";\nexport default gerbil;\n\n// Types\nexport type {\n CacheConfig,\n EmbedOptions,\n EmbedResult,\n FallbackConfig,\n // Generation types\n GenerateOptions,\n GenerateResult,\n // Config types\n GerbilConfig,\n // Provider types\n GerbilModelSettings,\n GerbilProviderSettings,\n JsonOptions,\n // Load types\n LoadOptions,\n // Model types\n ModelConfig,\n ModelSource,\n ModelStats,\n ProgressInfo,\n // Stats types\n SessionStats,\n SystemInfo,\n} from \"./core/types.js\";\n\n// Note: Task/Skill types are now in \"gerbil/skills\"\n// import { CommitInput, SummarizeInput, ... } from \"gerbil/skills\";\n\n// Version\nexport const VERSION = \"0.1.0\";\n"],"mappings":";;;;;;;AAsDA,kBAAeA;AAiCf,MAAa,UAAU"}
@@ -0,0 +1,78 @@
1
+ import { c as GerbilModelSettings, f as ModelConfig, l as GerbilProviderSettings } from "../types-BS1N92Jt.mjs";
2
+ import { LanguageModelV2, LanguageModelV2CallOptions, LanguageModelV2CallWarning, LanguageModelV2Content, LanguageModelV2FinishReason, LanguageModelV2StreamPart, LanguageModelV2Usage } from "@ai-sdk/provider";
3
+
4
+ //#region src/integrations/ai-sdk.d.ts
5
+
6
+ declare class GerbilLanguageModel implements LanguageModelV2 {
7
+ readonly specificationVersion: "v2";
8
+ readonly provider = "gerbil";
9
+ readonly modelId: string;
10
+ readonly supportedUrls: Record<string, RegExp[]>;
11
+ private instance;
12
+ private readonly settings;
13
+ private readonly providerSettings;
14
+ private loadPromise;
15
+ constructor(modelId: string, settings: GerbilModelSettings, providerSettings: GerbilProviderSettings);
16
+ private ensureLoaded;
17
+ private convertPrompt;
18
+ private mapFinishReason;
19
+ doGenerate(options: LanguageModelV2CallOptions): Promise<{
20
+ content: LanguageModelV2Content[];
21
+ finishReason: LanguageModelV2FinishReason;
22
+ usage: LanguageModelV2Usage;
23
+ request: {
24
+ body: {
25
+ model: string;
26
+ prompt: string;
27
+ };
28
+ };
29
+ warnings: LanguageModelV2CallWarning[];
30
+ }>;
31
+ doStream(options: LanguageModelV2CallOptions): Promise<{
32
+ stream: ReadableStream<LanguageModelV2StreamPart>;
33
+ request: {
34
+ body: {
35
+ model: string;
36
+ prompt: string;
37
+ };
38
+ };
39
+ }>;
40
+ }
41
+ type GerbilProvider = {
42
+ (modelId: string, settings?: GerbilModelSettings): GerbilLanguageModel;
43
+ languageModel(modelId: string, settings?: GerbilModelSettings): GerbilLanguageModel;
44
+ listModels(): ModelConfig[];
45
+ getModel(modelId: string): ModelConfig | undefined;
46
+ };
47
+ /**
48
+ * Create a Gerbil provider
49
+ *
50
+ * @example
51
+ * ```ts
52
+ * const local = createGerbil({ device: "gpu", dtype: "q4" });
53
+ *
54
+ * const { text } = await generateText({
55
+ * model: local("qwen3-0.6b"),
56
+ * prompt: "Hello",
57
+ * });
58
+ * ```
59
+ */
60
+ declare function createGerbil(options?: GerbilProviderSettings): GerbilProvider;
61
+ /**
62
+ * Default Gerbil provider
63
+ *
64
+ * @example
65
+ * ```ts
66
+ * import { generateText } from "ai";
67
+ * import { gerbil } from "gerbil/ai";
68
+ *
69
+ * const { text } = await generateText({
70
+ * model: gerbil("qwen3-0.6b"),
71
+ * prompt: "Hello",
72
+ * });
73
+ * ```
74
+ */
75
+ declare const gerbil: GerbilProvider;
76
+ //#endregion
77
+ export { GerbilProvider, createGerbil, gerbil as default, gerbil };
78
+ //# sourceMappingURL=ai-sdk.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ai-sdk.d.mts","names":[],"sources":["../../src/integrations/ai-sdk.ts"],"sourcesContent":[],"mappings":";;;;;cA0CM,mBAAA,YAA+B,eAqIe,CAAA;EArIf,SAAA,oBAAA,EAAA,IAAA;EAAe,SAAA,QAAA,GAAA,QAAA;EAmNxC,SAAA,OAAA,EAAc,MAAA;EACK,SAAA,aAAA,EA9ML,MA8MK,CAAA,MAAA,EA9MU,MA8MV,EAAA,CAAA;EAAsB,QAAA,QAAA;EACT,iBAAA,QAAA;EAAsB,iBAAA,gBAAA;EAClD,QAAA,WAAA;EACa,WAAA,CAAA,OAAA,EAAA,MAAA,EAAA,QAAA,EAxMf,mBAwMe,EAAA,gBAAA,EAvMP,sBAuMO;EAAW,QAAA,YAAA;EAgBxB,QAAA,aAAY;EA4Bf,QAAA,eAAM;sBA7KS,6BAA0B;;;;;;;;;;;;oBA+C5B,6BAA0B;;;;;;;;;;KA8ExC,cAAA;+BACmB,sBAAsB;4CACT,sBAAsB;gBAClD;6BACa;;;;;;;;;;;;;;;iBAgBb,YAAA,WAAsB,yBAA8B;;;;;;;;;;;;;;;cA4BvD,QAAM"}