@tryhamster/gerbil 1.0.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +23 -0
- package/README.md +253 -0
- package/bin/cli.js +2 -0
- package/dist/auto-update-BbNHbSU1.mjs +3 -0
- package/dist/browser/index.d.mts +262 -0
- package/dist/browser/index.d.mts.map +1 -0
- package/dist/browser/index.mjs +755 -0
- package/dist/browser/index.mjs.map +1 -0
- package/dist/chrome-backend-C5Un08O4.mjs +771 -0
- package/dist/chrome-backend-C5Un08O4.mjs.map +1 -0
- package/dist/chrome-backend-CtwPENIW.mjs +3 -0
- package/dist/chunk-Ct1HF2bE.mjs +7 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.mjs +7078 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/frameworks/express.d.mts +22 -0
- package/dist/frameworks/express.d.mts.map +1 -0
- package/dist/frameworks/express.mjs +123 -0
- package/dist/frameworks/express.mjs.map +1 -0
- package/dist/frameworks/fastify.d.mts +11 -0
- package/dist/frameworks/fastify.d.mts.map +1 -0
- package/dist/frameworks/fastify.mjs +73 -0
- package/dist/frameworks/fastify.mjs.map +1 -0
- package/dist/frameworks/hono.d.mts +14 -0
- package/dist/frameworks/hono.d.mts.map +1 -0
- package/dist/frameworks/hono.mjs +82 -0
- package/dist/frameworks/hono.mjs.map +1 -0
- package/dist/frameworks/next.d.mts +31 -0
- package/dist/frameworks/next.d.mts.map +1 -0
- package/dist/frameworks/next.mjs +116 -0
- package/dist/frameworks/next.mjs.map +1 -0
- package/dist/frameworks/react.d.mts +56 -0
- package/dist/frameworks/react.d.mts.map +1 -0
- package/dist/frameworks/react.mjs +172 -0
- package/dist/frameworks/react.mjs.map +1 -0
- package/dist/frameworks/trpc.d.mts +12 -0
- package/dist/frameworks/trpc.d.mts.map +1 -0
- package/dist/frameworks/trpc.mjs +80 -0
- package/dist/frameworks/trpc.mjs.map +1 -0
- package/dist/gerbil-BfnsFWRE.mjs +644 -0
- package/dist/gerbil-BfnsFWRE.mjs.map +1 -0
- package/dist/gerbil-BjW-z7Fq.mjs +5 -0
- package/dist/gerbil-DZ1k3ChC.d.mts +138 -0
- package/dist/gerbil-DZ1k3ChC.d.mts.map +1 -0
- package/dist/index.d.mts +223 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +13 -0
- package/dist/index.mjs.map +1 -0
- package/dist/integrations/ai-sdk.d.mts +78 -0
- package/dist/integrations/ai-sdk.d.mts.map +1 -0
- package/dist/integrations/ai-sdk.mjs +199 -0
- package/dist/integrations/ai-sdk.mjs.map +1 -0
- package/dist/integrations/langchain.d.mts +41 -0
- package/dist/integrations/langchain.d.mts.map +1 -0
- package/dist/integrations/langchain.mjs +93 -0
- package/dist/integrations/langchain.mjs.map +1 -0
- package/dist/integrations/llamaindex.d.mts +45 -0
- package/dist/integrations/llamaindex.d.mts.map +1 -0
- package/dist/integrations/llamaindex.mjs +86 -0
- package/dist/integrations/llamaindex.mjs.map +1 -0
- package/dist/integrations/mcp-client.d.mts +206 -0
- package/dist/integrations/mcp-client.d.mts.map +1 -0
- package/dist/integrations/mcp-client.mjs +507 -0
- package/dist/integrations/mcp-client.mjs.map +1 -0
- package/dist/integrations/mcp.d.mts +177 -0
- package/dist/integrations/mcp.d.mts.map +1 -0
- package/dist/integrations/mcp.mjs +8 -0
- package/dist/mcp-R8kRLIKb.mjs +348 -0
- package/dist/mcp-R8kRLIKb.mjs.map +1 -0
- package/dist/models-DKULvhOr.mjs +136 -0
- package/dist/models-DKULvhOr.mjs.map +1 -0
- package/dist/models-De2-_GmQ.d.mts +22 -0
- package/dist/models-De2-_GmQ.d.mts.map +1 -0
- package/dist/one-liner-BUQR0nqq.mjs +98 -0
- package/dist/one-liner-BUQR0nqq.mjs.map +1 -0
- package/dist/skills/index.d.mts +390 -0
- package/dist/skills/index.d.mts.map +1 -0
- package/dist/skills/index.mjs +7 -0
- package/dist/skills-D3CEpgDc.mjs +630 -0
- package/dist/skills-D3CEpgDc.mjs.map +1 -0
- package/dist/tools-BsiEE6f2.mjs +567 -0
- package/dist/tools-BsiEE6f2.mjs.map +1 -0
- package/dist/types-BS1N92Jt.d.mts +183 -0
- package/dist/types-BS1N92Jt.d.mts.map +1 -0
- package/dist/utils-7vXqtq2Q.mjs +63 -0
- package/dist/utils-7vXqtq2Q.mjs.map +1 -0
- package/docs/ai-sdk.md +80 -0
- package/docs/architecture/README.md +84 -0
- package/docs/architecture/caching.md +227 -0
- package/docs/architecture/inference.md +176 -0
- package/docs/architecture/overview.md +179 -0
- package/docs/architecture/streaming.md +261 -0
- package/docs/architecture/webgpu.md +213 -0
- package/docs/browser.md +328 -0
- package/docs/cli.md +155 -0
- package/docs/frameworks.md +90 -0
- package/docs/mcp-client.md +224 -0
- package/docs/mcp.md +109 -0
- package/docs/memory.md +229 -0
- package/docs/repl.md +473 -0
- package/docs/skills.md +261 -0
- package/docs/tools.md +304 -0
- package/package.json +207 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gerbil-BfnsFWRE.mjs","names":["pipeline","rawPipeline","isBrowser","tfDevice: \"webgpu\" | \"wasm\" | \"cpu\"","chromeErr: any","tokenQueue: string[]","resolveNext: ((value: string | null) => void) | null","results: EmbedResult[]","messages: Array<{ role: string; content: string }>"],"sources":["../src/core/gerbil.ts"],"sourcesContent":["/**\n * Gerbil - Local GPU-accelerated LLM inference\n */\n\nimport {\n AutoModelForCausalLM,\n AutoTokenizer,\n env,\n type FeatureExtractionPipeline,\n type PreTrainedTokenizer,\n pipeline as rawPipeline,\n type TextGenerationPipeline,\n} from \"@huggingface/transformers\";\n\n// Wrapper to avoid TypeScript complexity issues with transformers.js types\nconst pipeline = rawPipeline as (task: string, model: string, options?: any) => Promise<any>;\n\n// Suppress noisy transformers.js warnings during model loading\nfunction suppressNoisyWarnings<T>(fn: () => Promise<T>): Promise<T> {\n const originalWarn = console.warn;\n console.warn = (...args: any[]) => {\n const msg = args[0]?.toString?.() || \"\";\n // Suppress \"Unable to determine content-length\" warnings from transformers.js\n if (msg.includes(\"content-length\") || msg.includes(\"Unable to determine\")) {\n return;\n }\n originalWarn.apply(console, args);\n };\n\n return fn().finally(() => {\n console.warn = originalWarn;\n });\n}\n\nimport {\n BUILTIN_MODELS,\n createExternalModelConfig,\n getModelConfig,\n resolveModel,\n} from \"./models.js\";\nimport type {\n EmbedOptions,\n EmbedResult,\n GenerateOptions,\n GenerateResult,\n GerbilConfig,\n JsonOptions,\n LoadOptions,\n ModelConfig,\n SessionStats,\n SystemInfo,\n} from \"./types.js\";\n\nimport { extractJson, zodToJsonSchema } from \"./utils.js\";\n\n// Configure transformers.js based on environment\nconst isBrowser = typeof window !== \"undefined\";\nenv.allowLocalModels = !isBrowser; // false in browser (fetch from HuggingFace)\nenv.useBrowserCache = isBrowser; // true in browser (cache in IndexedDB)\n\n// ============================================\n// Gerbil Class\n// ============================================\n\n// WebGPU initialization state for Node.js\nlet webgpuInitialized = false;\nlet webgpuAvailable = false;\n\n/**\n * Initialize WebGPU for Node.js environments\n * Called automatically before model loading\n */\nasync function initNodeWebGPU(): Promise<boolean> {\n if (webgpuInitialized) {\n return webgpuAvailable;\n }\n webgpuInitialized = true;\n\n // Skip if in browser (already has WebGPU)\n if (typeof window !== \"undefined\") {\n webgpuAvailable = \"gpu\" in navigator;\n return webgpuAvailable;\n }\n\n // Try to initialize WebGPU in Node.js via Dawn\n // Use Function constructor to hide import from bundlers\n try {\n const dynamicImport = new Function(\"specifier\", \"return import(specifier)\");\n const webgpuModule = await dynamicImport(\"webgpu\");\n const { create, globals } = webgpuModule;\n\n // Extend globalThis with WebGPU globals\n Object.assign(globalThis, globals);\n\n // Create navigator.gpu\n if (!(globalThis as any).navigator) {\n (globalThis as any).navigator = {};\n }\n (globalThis as any).navigator.gpu = create([]);\n\n webgpuAvailable = true;\n } catch {\n // WebGPU not available, will fall back to CPU\n webgpuAvailable = false;\n }\n\n return webgpuAvailable;\n}\n\n// ChromeGPUBackend is dynamically imported only in Node.js to avoid bundling puppeteer in browser\ntype ChromeGPUBackendType = import(\"./chrome-backend.js\").ChromeGPUBackend;\n\nexport class Gerbil {\n private generator: TextGenerationPipeline | null = null;\n private tokenizer: PreTrainedTokenizer | null = null;\n private model: any = null; // AutoModelForCausalLM instance\n private embedder: FeatureExtractionPipeline | null = null;\n private currentModel: string | null = null;\n private modelConfig: ModelConfig | null = null;\n private readonly config: GerbilConfig;\n private stats: SessionStats;\n private useDirect = false; // Use direct model loading (for WebGPU)\n private chromeBackend: ChromeGPUBackendType | null = null; // Chrome backend for Node.js WebGPU\n private _deviceMode: \"webgpu\" | \"cpu\" | \"wasm\" = \"cpu\"; // Track which backend is active\n\n constructor(config: GerbilConfig = {}) {\n this.config = config;\n this.stats = {\n prompts: 0,\n tokensIn: 0,\n tokensOut: 0,\n avgSpeed: 0,\n totalTime: 0,\n cacheHits: 0,\n cacheMisses: 0,\n };\n }\n\n // ============================================\n // Static Methods\n // ============================================\n\n static listModels(): ModelConfig[] {\n return Object.values(BUILTIN_MODELS);\n }\n\n static getModel(modelId: string): ModelConfig | undefined {\n return BUILTIN_MODELS[modelId];\n }\n\n // ============================================\n // Model Loading\n // ============================================\n\n /**\n * Load a model\n *\n * @example\n * ```ts\n * // Built-in model\n * await g.loadModel(\"qwen3-0.6b\");\n *\n * // HuggingFace model\n * await g.loadModel(\"hf:microsoft/Phi-3-mini\");\n *\n * // Local model\n * await g.loadModel(\"file:./models/my-model\");\n * ```\n */\n async loadModel(modelId = \"qwen3-0.6b\", options: LoadOptions = {}): Promise<void> {\n // Initialize WebGPU for Node.js if needed\n await initNodeWebGPU();\n\n const source = resolveModel(modelId);\n const { onProgress, device = \"auto\", dtype: userDtype } = options;\n\n // Get or create model config\n let config = getModelConfig(modelId);\n if (!config) {\n config = createExternalModelConfig(modelId, source.path);\n }\n\n onProgress?.({ status: `Loading ${modelId}...` });\n\n // Map device to transformers.js device\n // Browser supports: webgpu, wasm (no cpu)\n // Node supports: webgpu, cpu\n const isBrowser = typeof window !== \"undefined\";\n const fallbackDevice = isBrowser ? \"wasm\" : \"cpu\";\n let tfDevice: \"webgpu\" | \"wasm\" | \"cpu\" = fallbackDevice;\n if (device === \"webgpu\" || device === \"gpu\" || device === \"auto\") {\n tfDevice = \"webgpu\";\n }\n\n // Use q4f16 for WebGPU (required for Qwen3), q4 for CPU/WASM\n const dtype = userDtype ?? (tfDevice === \"webgpu\" ? \"q4f16\" : \"q4\");\n\n // Track if we're still in loading phase (to suppress progress during inference)\n let isLoading = true;\n let lastFile = \"\";\n let lastPct = -1;\n\n const progressCallback = (progress: any) => {\n if (!isLoading) {\n return; // Suppress progress after initial load\n }\n\n if (progress.status === \"progress\" && progress.file) {\n const pct = Math.round(progress.progress || 0);\n // Only report if file changed or progress increased significantly\n if (progress.file !== lastFile || pct >= lastPct + 5) {\n lastFile = progress.file;\n lastPct = pct;\n onProgress?.({\n status: `Downloading ${progress.file}`,\n progress: pct,\n file: progress.file,\n });\n }\n }\n };\n\n try {\n // Use direct model loading for browser WebGPU (like qwen-web does)\n // This bypasses pipeline() which may have different ONNX session config\n if (isBrowser && tfDevice === \"webgpu\") {\n onProgress?.({ status: \"Loading tokenizer...\" });\n this.tokenizer = (await suppressNoisyWarnings(() =>\n AutoTokenizer.from_pretrained(source.path, {\n progress_callback: progressCallback,\n }),\n )) as PreTrainedTokenizer;\n\n onProgress?.({ status: \"Loading model...\" });\n this.model = await suppressNoisyWarnings(() =>\n AutoModelForCausalLM.from_pretrained(source.path, {\n dtype,\n device: tfDevice,\n progress_callback: progressCallback,\n }),\n );\n\n this.useDirect = true;\n this._deviceMode = \"webgpu\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n onProgress?.({ status: \"Ready (WebGPU)!\" });\n } else if (!isBrowser && tfDevice === \"webgpu\") {\n // Node.js + WebGPU: Use Chrome backend for real GPU acceleration\n onProgress?.({ status: \"Starting Chrome WebGPU backend...\" });\n\n // Dynamic import to avoid bundling puppeteer in browser builds\n const { ChromeGPUBackend } = await import(\"./chrome-backend.js\");\n this.chromeBackend = await ChromeGPUBackend.create({\n modelId: source.path,\n onProgress,\n });\n\n this.useDirect = false;\n this._deviceMode = \"webgpu\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n // Ready status is set by ChromeGPUBackend\n } else {\n // Use pipeline for CPU / WASM\n const pipelineOptions = {\n dtype,\n device: tfDevice,\n progress_callback: progressCallback,\n };\n this.generator = (await suppressNoisyWarnings(() =>\n pipeline(\"text-generation\", source.path, pipelineOptions as any),\n )) as TextGenerationPipeline;\n\n this.useDirect = false;\n this._deviceMode = tfDevice as \"cpu\" | \"wasm\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });\n }\n } catch (err) {\n // Fallback to CPU/WASM if GPU fails (silently)\n if (tfDevice !== fallbackDevice) {\n onProgress?.({ status: `Using ${fallbackDevice.toUpperCase()}...` });\n\n // Clean up Chrome backend if it was partially initialized\n if (this.chromeBackend) {\n await this.chromeBackend.dispose();\n this.chromeBackend = null;\n }\n\n // Fallback always uses pipeline (WASM/CPU don't need direct loading)\n this.generator = (await suppressNoisyWarnings(() =>\n pipeline(\"text-generation\", source.path, {\n dtype: \"q4\",\n device: fallbackDevice,\n progress_callback: progressCallback,\n } as any),\n )) as TextGenerationPipeline;\n\n this.useDirect = false;\n this._deviceMode = fallbackDevice as \"cpu\" | \"wasm\";\n isLoading = false;\n this.currentModel = modelId;\n this.modelConfig = config;\n onProgress?.({ status: `Ready (${fallbackDevice.toUpperCase()})!` });\n } else {\n throw err;\n }\n }\n }\n\n /**\n * Check if a model is loaded\n */\n isLoaded(): boolean {\n return (\n this.generator !== null ||\n (this.useDirect && this.model !== null) ||\n this.chromeBackend !== null\n );\n }\n\n /**\n * Get current model info\n */\n getModelInfo(): ModelConfig | null {\n return this.modelConfig;\n }\n\n /**\n * Get current device mode (webgpu, cpu, or wasm)\n */\n getDeviceMode(): \"webgpu\" | \"cpu\" | \"wasm\" {\n return this._deviceMode;\n }\n\n /**\n * Get dtype used for current model\n */\n getDtype(): string {\n // WebGPU uses q4f16, CPU/WASM use q4\n return this._deviceMode === \"webgpu\" ? \"q4f16\" : \"q4\";\n }\n\n /**\n * Get Chrome backend status (if using WebGPU via Chrome)\n */\n getChromeStatus(): {\n pid: number | null;\n port: number;\n modelId: string;\n startedAt: Date | null;\n } | null {\n if (!this.chromeBackend) {\n return null;\n }\n return this.chromeBackend.getStatus();\n }\n\n /**\n * Get Chrome memory usage (if using WebGPU via Chrome)\n * Returns JS heap memory in bytes\n */\n async getChromeMemory(): Promise<{ jsHeapUsed: number; jsHeapTotal: number } | null> {\n if (!this.chromeBackend) {\n return null;\n }\n return this.chromeBackend.getMemoryUsage();\n }\n\n /**\n * Get memory usage in GB (if using WebGPU via Chrome)\n */\n async getMemoryUsage(): Promise<{ usedGB: number; totalGB: number; usedPercent: number } | null> {\n if (!this.chromeBackend) {\n return null;\n }\n return this.chromeBackend.getMemoryStats();\n }\n\n /**\n * Clear KV cache to free memory\n * This will reset the conversation context but free up memory\n */\n async clearCache(): Promise<void> {\n if (this.chromeBackend) {\n await this.chromeBackend.reset();\n }\n }\n\n /**\n * Check memory usage and cleanup if needed\n * @param thresholdGB Memory threshold in GB (default: 8)\n * @returns true if cleanup was performed\n */\n async checkMemoryAndCleanup(thresholdGB = 8): Promise<boolean> {\n if (!this.chromeBackend) {\n return false;\n }\n return this.chromeBackend.checkMemoryAndCleanup(thresholdGB);\n }\n\n // ============================================\n // Text Generation\n // ============================================\n\n /**\n * Generate text\n */\n async generate(prompt: string, options: GenerateOptions = {}): Promise<GenerateResult> {\n if (!this.isLoaded()) {\n // Auto-load default model\n await this.loadModel(this.config.model || \"qwen3-0.6b\");\n }\n\n const {\n maxTokens = 256,\n temperature = 0.7,\n topP = 0.9,\n topK = 50,\n thinking = false,\n system,\n } = options;\n\n const startTime = performance.now();\n\n try {\n let rawText = \"\";\n\n if (this.chromeBackend) {\n // Chrome backend approach (for Node.js WebGPU via Chrome)\n try {\n rawText = await this.chromeBackend.generate(prompt, {\n maxTokens,\n temperature,\n topP,\n topK,\n thinking,\n system,\n // Wrap onToken to match Gerbil's simpler signature\n onToken: options.onToken ? (t) => options.onToken!(t.text) : undefined,\n });\n } catch (chromeErr: any) {\n // If Chrome died (OOM, crash), fall back to CPU silently\n if (chromeErr?.message === \"CHROME_BACKEND_DEAD\" || !this.chromeBackend?.isAlive()) {\n await this.chromeBackend?.dispose().catch(() => {});\n this.chromeBackend = null;\n this._deviceMode = \"cpu\";\n // Load CPU fallback and retry\n const modelPath = this.currentModel || \"qwen3-0.6b\";\n this.generator = (await pipeline(\"text-generation\", modelPath, {\n dtype: \"q4\",\n device: \"cpu\",\n } as any)) as TextGenerationPipeline;\n // Retry with CPU\n return this.generate(prompt, options);\n }\n throw chromeErr;\n }\n } else if (this.useDirect && this.model && this.tokenizer) {\n // Direct model approach (for browser WebGPU)\n const messages = this.buildMessages(prompt, { ...options, thinking });\n\n const inputs = (this.tokenizer as any).apply_chat_template(messages, {\n add_generation_prompt: true,\n return_dict: true,\n enable_thinking: thinking, // Qwen3 thinking mode\n });\n\n const output = await this.model.generate({\n ...inputs,\n max_new_tokens: maxTokens,\n temperature: temperature > 0 ? temperature : undefined,\n top_p: topP,\n top_k: topK,\n do_sample: temperature > 0,\n });\n\n // Get input length to extract only generated tokens\n const inputLength = inputs.input_ids.dims?.[1] || inputs.input_ids.data?.length || 0;\n\n // Slice output tensor to get only new tokens (skip prompt)\n const outputTokens = output.slice(null, [inputLength, null]);\n const decoded = this.tokenizer.batch_decode(outputTokens, {\n skip_special_tokens: true,\n });\n\n rawText = decoded[0] || \"\";\n\n // If we still have prompt artifacts, extract assistant response\n if (rawText.toLowerCase().includes(\"assistant\")) {\n const match = rawText.match(/assistant[:\\s]*([\\s\\S]*)/i);\n if (match) {\n rawText = match[1].trim();\n }\n }\n } else if (this.generator) {\n // Pipeline approach (for Node.js / CPU / WASM)\n const formattedPrompt = this.formatPrompt(prompt, { ...options, thinking });\n\n const output = await this.generator(formattedPrompt, {\n max_new_tokens: maxTokens,\n temperature,\n top_p: topP,\n top_k: topK,\n do_sample: temperature > 0,\n return_full_text: false,\n });\n\n // Extract text from pipeline output\n if (Array.isArray(output) && output[0]) {\n const result = output[0] as any;\n if (Array.isArray(result.generated_text)) {\n const last = result.generated_text.at(-1);\n rawText = last?.content || \"\";\n } else {\n rawText = result.generated_text || \"\";\n }\n }\n } else {\n throw new Error(\"No model loaded\");\n }\n\n const endTime = performance.now();\n const totalTime = endTime - startTime;\n\n rawText = this.cleanOutput(rawText);\n\n // Always parse thinking to strip <think> tags from output\n // (model may generate them even without thinking mode enabled)\n const { thinking: thinkingText, response } = this.parseThinking(rawText);\n\n // Only include thinking in result if mode was enabled\n const finalThinking = thinking ? thinkingText : undefined;\n\n const tokensGenerated = Math.ceil(response.length / 4);\n\n // Update stats\n this.stats.prompts++;\n this.stats.tokensOut += tokensGenerated;\n this.stats.totalTime += totalTime;\n this.stats.avgSpeed = (this.stats.tokensOut / this.stats.totalTime) * 1000;\n\n return {\n text: response,\n thinking: finalThinking,\n tokensGenerated,\n tokensPerSecond: (tokensGenerated / totalTime) * 1000,\n totalTime,\n finishReason: \"stop\",\n provider: \"local\",\n cached: false,\n };\n } catch (_error) {\n return {\n text: \"\",\n tokensGenerated: 0,\n tokensPerSecond: 0,\n totalTime: performance.now() - startTime,\n finishReason: \"error\",\n provider: \"local\",\n cached: false,\n };\n }\n }\n\n /**\n * Stream text generation (simulated token-by-token)\n *\n * Note: Yields the raw output including <think> tags if thinking mode is enabled.\n * The final result has parsed thinking separated out.\n */\n async *stream(\n prompt: string,\n options: GenerateOptions = {},\n ): AsyncGenerator<string, GenerateResult, unknown> {\n if (!this.isLoaded()) {\n await this.loadModel(this.config.model || \"qwen3-0.6b\");\n }\n\n const startTime = performance.now();\n\n // For Chrome backend, use real streaming via onToken callback\n if (this.chromeBackend) {\n let fullText = \"\";\n const tokenQueue: string[] = [];\n let resolveNext: ((value: string | null) => void) | null = null;\n let done = false;\n\n // Start generation with streaming callback\n const generatePromise = this.chromeBackend\n .generate(prompt, {\n ...options,\n onToken: (token) => {\n fullText += token.text;\n if (resolveNext) {\n resolveNext(token.text);\n resolveNext = null;\n } else {\n tokenQueue.push(token.text);\n }\n },\n })\n .then(() => {\n done = true;\n if (resolveNext) {\n resolveNext(null);\n }\n })\n .catch((err) => {\n done = true;\n if (resolveNext) {\n resolveNext(null);\n }\n throw err;\n });\n\n // Yield tokens as they arrive\n while (!done || tokenQueue.length > 0) {\n if (tokenQueue.length > 0) {\n const token = tokenQueue.shift()!;\n yield token;\n options.onToken?.(token);\n } else if (!done) {\n const token = await new Promise<string | null>((resolve) => {\n resolveNext = resolve;\n });\n if (token) {\n yield token;\n options.onToken?.(token);\n }\n }\n }\n\n await generatePromise;\n\n const { thinking: thinkingText, response } = this.parseThinking(fullText);\n const tokensGenerated = Math.ceil(response.length / 4);\n const totalTime = performance.now() - startTime;\n\n return {\n text: response,\n thinking: options.thinking ? thinkingText : undefined,\n tokensGenerated,\n totalTime,\n tokensPerSecond: (tokensGenerated / totalTime) * 1000,\n finishReason: \"stop\" as const,\n };\n }\n\n // For pipeline/direct model, use fake streaming (generate then yield)\n const result = await this.generateRaw(prompt, options);\n\n // Yield word by word for more accurate token simulation\n // (actual tokens average ~4 chars, words are a reasonable approximation)\n const words = result.rawText.split(/(\\s+)/);\n for (const word of words) {\n if (word) {\n yield word;\n options.onToken?.(word);\n }\n }\n\n return result.result;\n }\n\n /**\n * Internal: Generate with raw text access for streaming\n */\n private async generateRaw(\n prompt: string,\n options: GenerateOptions = {},\n ): Promise<{ rawText: string; result: GenerateResult }> {\n const { maxTokens = 256, temperature = 0.7, topP = 0.9, topK = 50, thinking = false } = options;\n\n const startTime = performance.now();\n const formattedPrompt = this.formatPrompt(prompt, { ...options, thinking });\n\n try {\n const output = await this.generator!(formattedPrompt, {\n max_new_tokens: maxTokens,\n temperature,\n top_p: topP,\n top_k: topK,\n do_sample: temperature > 0,\n return_full_text: false,\n });\n\n const endTime = performance.now();\n const totalTime = endTime - startTime;\n\n // Extract text from output\n let rawText = \"\";\n if (Array.isArray(output) && output[0]) {\n const result = output[0] as any;\n if (Array.isArray(result.generated_text)) {\n const last = result.generated_text.at(-1);\n rawText = last?.content || \"\";\n } else {\n rawText = result.generated_text || \"\";\n }\n }\n\n rawText = this.cleanOutput(rawText);\n const { thinking: thinkingText, response } = this.parseThinking(rawText);\n const finalThinking = thinking ? thinkingText : undefined;\n const tokensGenerated = Math.ceil(response.length / 4);\n\n // Update stats\n this.stats.prompts++;\n this.stats.tokensOut += tokensGenerated;\n this.stats.totalTime += totalTime;\n this.stats.avgSpeed = (this.stats.tokensOut / this.stats.totalTime) * 1000;\n\n return {\n rawText,\n result: {\n text: response,\n thinking: finalThinking,\n tokensGenerated,\n tokensPerSecond: (tokensGenerated / totalTime) * 1000,\n totalTime,\n finishReason: \"stop\",\n provider: \"local\",\n cached: false,\n },\n };\n } catch (_error) {\n return {\n rawText: \"\",\n result: {\n text: \"\",\n tokensGenerated: 0,\n tokensPerSecond: 0,\n totalTime: performance.now() - startTime,\n finishReason: \"error\",\n provider: \"local\",\n cached: false,\n },\n };\n }\n }\n\n // ============================================\n // Structured Output (JSON)\n // ============================================\n\n /**\n * Generate structured JSON output\n */\n async json<T>(prompt: string, options: JsonOptions<T>): Promise<T> {\n const { schema, retries = 3, temperature = 0.3 } = options;\n\n const systemPrompt = `You are a JSON generator. You MUST respond with valid JSON only.\nNo explanations, no markdown, no code blocks. Just pure JSON.\nThe JSON must conform to this schema: ${JSON.stringify(zodToJsonSchema(schema))}`;\n\n for (let attempt = 0; attempt < retries; attempt++) {\n const result = await this.generate(prompt, {\n system: options.system || systemPrompt,\n temperature,\n maxTokens: 1000,\n });\n\n try {\n // Try to extract JSON from response\n const jsonStr = extractJson(result.text);\n const parsed = JSON.parse(jsonStr);\n const validated = schema.parse(parsed);\n return validated;\n } catch (error) {\n if (attempt === retries - 1) {\n throw new Error(`Failed to generate valid JSON after ${retries} attempts: ${error}`);\n }\n }\n }\n\n throw new Error(\"Failed to generate valid JSON\");\n }\n\n // ============================================\n // Embeddings\n // ============================================\n\n /**\n * Generate embeddings\n */\n async embed(text: string, options: EmbedOptions = {}): Promise<EmbedResult> {\n if (!this.embedder) {\n // Load embedding model\n const model = options.model || \"Xenova/all-MiniLM-L6-v2\";\n this.embedder = (await pipeline(\"feature-extraction\", model)) as FeatureExtractionPipeline;\n }\n\n const startTime = performance.now();\n const output = await this.embedder(text, {\n pooling: \"mean\",\n normalize: options.normalize !== false,\n });\n\n const vector = Array.from(output.data as Float32Array);\n\n return {\n vector,\n text,\n totalTime: performance.now() - startTime,\n };\n }\n\n /**\n * Generate embeddings for multiple texts\n */\n async embedBatch(texts: string[], options: EmbedOptions = {}): Promise<EmbedResult[]> {\n const results: EmbedResult[] = [];\n for (const text of texts) {\n results.push(await this.embed(text, options));\n }\n return results;\n }\n\n // ============================================\n // Stats & Info\n // ============================================\n\n /**\n * Get session stats\n */\n getStats(): SessionStats {\n return { ...this.stats };\n }\n\n /**\n * Get system info\n */\n getInfo(): SystemInfo {\n return {\n version: \"1.0.0\",\n model: this.modelConfig,\n device: {\n backend: \"transformers.js\",\n gpu: null, // TODO: detect GPU\n vram: null,\n status: this.isLoaded() ? \"ready\" : \"loading\",\n },\n context: {\n max: this.modelConfig?.contextLength || 0,\n used: 0,\n available: this.modelConfig?.contextLength || 0,\n },\n cache: {\n location: \"~/.gerbil/models\",\n size: \"0 MB\",\n modelCount: 0,\n },\n };\n }\n\n /**\n * Reset stats\n */\n resetStats(): void {\n this.stats = {\n prompts: 0,\n tokensIn: 0,\n tokensOut: 0,\n avgSpeed: 0,\n totalTime: 0,\n cacheHits: 0,\n cacheMisses: 0,\n };\n }\n\n // ============================================\n // Cleanup\n // ============================================\n\n /**\n * Dispose of resources\n */\n async dispose(): Promise<void> {\n // Clean up Chrome backend first (most important to release resources)\n if (this.chromeBackend) {\n try {\n await this.chromeBackend.dispose();\n } catch {\n // Ignore errors during cleanup\n }\n this.chromeBackend = null;\n }\n\n if (this.generator) {\n if (typeof (this.generator as any).dispose === \"function\") {\n try {\n await (this.generator as any).dispose();\n } catch {\n // Ignore errors during cleanup\n }\n }\n this.generator = null;\n }\n if (this.embedder) {\n if (typeof (this.embedder as any).dispose === \"function\") {\n try {\n await (this.embedder as any).dispose();\n } catch {\n // Ignore errors during cleanup\n }\n }\n this.embedder = null;\n }\n this.currentModel = null;\n this.modelConfig = null;\n }\n\n // ============================================\n // Private Methods\n // ============================================\n\n private formatPrompt(prompt: string, options: GenerateOptions): string {\n const system = options.system || \"You are a helpful assistant.\";\n const isQwen = this.currentModel?.includes(\"qwen\");\n\n if (options.thinking && this.modelConfig?.supportsThinking) {\n const thinkSystem = `${system}\\n\\nThink step-by-step before answering. Wrap your reasoning in <think></think> tags, then provide your answer.`;\n return `<|im_start|>system\\n${thinkSystem}<|im_end|>\\n<|im_start|>user\\n${prompt}<|im_end|>\\n<|im_start|>assistant\\n`;\n }\n\n if (isQwen) {\n return `<|im_start|>system\\n${system}<|im_end|>\\n<|im_start|>user\\n${prompt} /no_think<|im_end|>\\n<|im_start|>assistant\\n`;\n }\n\n return `<|im_start|>system\\n${system}<|im_end|>\\n<|im_start|>user\\n${prompt}<|im_end|>\\n<|im_start|>assistant\\n`;\n }\n\n private buildMessages(\n prompt: string,\n options: GenerateOptions,\n ): Array<{ role: string; content: string }> {\n const system = options.system || \"You are a helpful assistant.\";\n const messages: Array<{ role: string; content: string }> = [];\n\n // For direct model (WebGPU), enable_thinking is passed to apply_chat_template\n // so we don't need to add /no_think or modify the system prompt\n messages.push({ role: \"system\", content: system });\n messages.push({ role: \"user\", content: prompt });\n\n return messages;\n }\n\n private parseThinking(text: string): {\n thinking?: string;\n response: string;\n } {\n // Handle complete <think>...</think> blocks\n const match = text.match(/<think>([\\s\\S]*?)<\\/think>/);\n if (match) {\n const thinking = match[1].trim();\n const response = text.replace(/<think>[\\s\\S]*?<\\/think>/, \"\").trim();\n return { thinking, response };\n }\n\n // Handle unclosed <think> tags (model stopped mid-thought)\n const unclosedMatch = text.match(/<think>([\\s\\S]*)$/);\n if (unclosedMatch) {\n const thinking = unclosedMatch[1].trim();\n const response = text.replace(/<think>[\\s\\S]*$/, \"\").trim();\n return { thinking: thinking || undefined, response };\n }\n\n // Handle any remaining think tags\n const response = text.replace(/<\\/?think>/g, \"\").trim();\n return { response };\n }\n\n private cleanOutput(text: string): string {\n return (\n text\n .replace(/<\\|im_end\\|>/g, \"\")\n .replace(/<\\|im_start\\|>/g, \"\")\n .replace(/<\\|endoftext\\|>/g, \"\")\n .replace(/<\\/s>/g, \"\")\n // Clean up artifacts from direct model output\n .replace(/^\\/no_think\\s*/i, \"\")\n .replace(/^assistant\\s*/i, \"\")\n .replace(/^\\s*\\/no_think\\s*/gim, \"\")\n .replace(/^\\s*assistant\\s*/gim, \"\")\n // Clean up role markers that might appear\n .replace(/^(system|user|assistant):\\s*/gim, \"\")\n .trim()\n );\n }\n}\n\nexport default Gerbil;\n"],"mappings":";;;;;;;;AAeA,MAAMA,aAAWC;AAGjB,SAAS,sBAAyB,IAAkC;CAClE,MAAM,eAAe,QAAQ;AAC7B,SAAQ,QAAQ,GAAG,SAAgB;EACjC,MAAM,MAAM,KAAK,IAAI,YAAY,IAAI;AAErC,MAAI,IAAI,SAAS,iBAAiB,IAAI,IAAI,SAAS,sBAAsB,CACvE;AAEF,eAAa,MAAM,SAAS,KAAK;;AAGnC,QAAO,IAAI,CAAC,cAAc;AACxB,UAAQ,OAAO;GACf;;AAyBJ,MAAM,YAAY,OAAO,WAAW;AACpC,IAAI,mBAAmB,CAAC;AACxB,IAAI,kBAAkB;AAOtB,IAAI,oBAAoB;AACxB,IAAI,kBAAkB;;;;;AAMtB,eAAe,iBAAmC;AAChD,KAAI,kBACF,QAAO;AAET,qBAAoB;AAGpB,KAAI,OAAO,WAAW,aAAa;AACjC,oBAAkB,SAAS;AAC3B,SAAO;;AAKT,KAAI;EAGF,MAAM,EAAE,QAAQ,YADK,MADC,IAAI,SAAS,aAAa,2BAA2B,CAClC,SAAS;AAIlD,SAAO,OAAO,YAAY,QAAQ;AAGlC,MAAI,CAAE,WAAmB,UACvB,CAAC,WAAmB,YAAY,EAAE;AAEpC,EAAC,WAAmB,UAAU,MAAM,OAAO,EAAE,CAAC;AAE9C,oBAAkB;SACZ;AAEN,oBAAkB;;AAGpB,QAAO;;AAMT,IAAa,SAAb,MAAoB;CAClB,AAAQ,YAA2C;CACnD,AAAQ,YAAwC;CAChD,AAAQ,QAAa;CACrB,AAAQ,WAA6C;CACrD,AAAQ,eAA8B;CACtC,AAAQ,cAAkC;CAC1C,AAAiB;CACjB,AAAQ;CACR,AAAQ,YAAY;CACpB,AAAQ,gBAA6C;CACrD,AAAQ,cAAyC;CAEjD,YAAY,SAAuB,EAAE,EAAE;AACrC,OAAK,SAAS;AACd,OAAK,QAAQ;GACX,SAAS;GACT,UAAU;GACV,WAAW;GACX,UAAU;GACV,WAAW;GACX,WAAW;GACX,aAAa;GACd;;CAOH,OAAO,aAA4B;AACjC,SAAO,OAAO,OAAO,eAAe;;CAGtC,OAAO,SAAS,SAA0C;AACxD,SAAO,eAAe;;;;;;;;;;;;;;;;;CAsBxB,MAAM,UAAU,UAAU,cAAc,UAAuB,EAAE,EAAiB;AAEhF,QAAM,gBAAgB;EAEtB,MAAM,SAAS,aAAa,QAAQ;EACpC,MAAM,EAAE,YAAY,SAAS,QAAQ,OAAO,cAAc;EAG1D,IAAI,SAAS,eAAe,QAAQ;AACpC,MAAI,CAAC,OACH,UAAS,0BAA0B,SAAS,OAAO,KAAK;AAG1D,eAAa,EAAE,QAAQ,WAAW,QAAQ,MAAM,CAAC;EAKjD,MAAMC,cAAY,OAAO,WAAW;EACpC,MAAM,iBAAiBA,cAAY,SAAS;EAC5C,IAAIC,WAAsC;AAC1C,MAAI,WAAW,YAAY,WAAW,SAAS,WAAW,OACxD,YAAW;EAIb,MAAM,QAAQ,cAAc,aAAa,WAAW,UAAU;EAG9D,IAAI,YAAY;EAChB,IAAI,WAAW;EACf,IAAI,UAAU;EAEd,MAAM,oBAAoB,aAAkB;AAC1C,OAAI,CAAC,UACH;AAGF,OAAI,SAAS,WAAW,cAAc,SAAS,MAAM;IACnD,MAAM,MAAM,KAAK,MAAM,SAAS,YAAY,EAAE;AAE9C,QAAI,SAAS,SAAS,YAAY,OAAO,UAAU,GAAG;AACpD,gBAAW,SAAS;AACpB,eAAU;AACV,kBAAa;MACX,QAAQ,eAAe,SAAS;MAChC,UAAU;MACV,MAAM,SAAS;MAChB,CAAC;;;;AAKR,MAAI;AAGF,OAAID,eAAa,aAAa,UAAU;AACtC,iBAAa,EAAE,QAAQ,wBAAwB,CAAC;AAChD,SAAK,YAAa,MAAM,4BACtB,cAAc,gBAAgB,OAAO,MAAM,EACzC,mBAAmB,kBACpB,CAAC,CACH;AAED,iBAAa,EAAE,QAAQ,oBAAoB,CAAC;AAC5C,SAAK,QAAQ,MAAM,4BACjB,qBAAqB,gBAAgB,OAAO,MAAM;KAChD;KACA,QAAQ;KACR,mBAAmB;KACpB,CAAC,CACH;AAED,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,iBAAa,EAAE,QAAQ,mBAAmB,CAAC;cAClC,CAACA,eAAa,aAAa,UAAU;AAE9C,iBAAa,EAAE,QAAQ,qCAAqC,CAAC;IAG7D,MAAM,EAAE,qBAAqB,MAAM,OAAO;AAC1C,SAAK,gBAAgB,MAAM,iBAAiB,OAAO;KACjD,SAAS,OAAO;KAChB;KACD,CAAC;AAEF,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;UAEd;IAEL,MAAM,kBAAkB;KACtB;KACA,QAAQ;KACR,mBAAmB;KACpB;AACD,SAAK,YAAa,MAAM,4BACtBF,WAAS,mBAAmB,OAAO,MAAM,gBAAuB,CACjE;AAED,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,iBAAa,EAAE,QAAQ,UAAU,SAAS,aAAa,CAAC,KAAK,CAAC;;WAEzD,KAAK;AAEZ,OAAI,aAAa,gBAAgB;AAC/B,iBAAa,EAAE,QAAQ,SAAS,eAAe,aAAa,CAAC,MAAM,CAAC;AAGpE,QAAI,KAAK,eAAe;AACtB,WAAM,KAAK,cAAc,SAAS;AAClC,UAAK,gBAAgB;;AAIvB,SAAK,YAAa,MAAM,4BACtBA,WAAS,mBAAmB,OAAO,MAAM;KACvC,OAAO;KACP,QAAQ;KACR,mBAAmB;KACpB,CAAQ,CACV;AAED,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,gBAAY;AACZ,SAAK,eAAe;AACpB,SAAK,cAAc;AACnB,iBAAa,EAAE,QAAQ,UAAU,eAAe,aAAa,CAAC,KAAK,CAAC;SAEpE,OAAM;;;;;;CAQZ,WAAoB;AAClB,SACE,KAAK,cAAc,QAClB,KAAK,aAAa,KAAK,UAAU,QAClC,KAAK,kBAAkB;;;;;CAO3B,eAAmC;AACjC,SAAO,KAAK;;;;;CAMd,gBAA2C;AACzC,SAAO,KAAK;;;;;CAMd,WAAmB;AAEjB,SAAO,KAAK,gBAAgB,WAAW,UAAU;;;;;CAMnD,kBAKS;AACP,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,WAAW;;;;;;CAOvC,MAAM,kBAA+E;AACnF,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,gBAAgB;;;;;CAM5C,MAAM,iBAA2F;AAC/F,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,gBAAgB;;;;;;CAO5C,MAAM,aAA4B;AAChC,MAAI,KAAK,cACP,OAAM,KAAK,cAAc,OAAO;;;;;;;CASpC,MAAM,sBAAsB,cAAc,GAAqB;AAC7D,MAAI,CAAC,KAAK,cACR,QAAO;AAET,SAAO,KAAK,cAAc,sBAAsB,YAAY;;;;;CAU9D,MAAM,SAAS,QAAgB,UAA2B,EAAE,EAA2B;AACrF,MAAI,CAAC,KAAK,UAAU,CAElB,OAAM,KAAK,UAAU,KAAK,OAAO,SAAS,aAAa;EAGzD,MAAM,EACJ,YAAY,KACZ,cAAc,IACd,OAAO,IACP,OAAO,IACP,WAAW,OACX,WACE;EAEJ,MAAM,YAAY,YAAY,KAAK;AAEnC,MAAI;GACF,IAAI,UAAU;AAEd,OAAI,KAAK,cAEP,KAAI;AACF,cAAU,MAAM,KAAK,cAAc,SAAS,QAAQ;KAClD;KACA;KACA;KACA;KACA;KACA;KAEA,SAAS,QAAQ,WAAW,MAAM,QAAQ,QAAS,EAAE,KAAK,GAAG;KAC9D,CAAC;YACKI,WAAgB;AAEvB,QAAI,WAAW,YAAY,yBAAyB,CAAC,KAAK,eAAe,SAAS,EAAE;AAClF,WAAM,KAAK,eAAe,SAAS,CAAC,YAAY,GAAG;AACnD,UAAK,gBAAgB;AACrB,UAAK,cAAc;AAGnB,UAAK,YAAa,MAAMJ,WAAS,mBADf,KAAK,gBAAgB,cACwB;MAC7D,OAAO;MACP,QAAQ;MACT,CAAQ;AAET,YAAO,KAAK,SAAS,QAAQ,QAAQ;;AAEvC,UAAM;;YAEC,KAAK,aAAa,KAAK,SAAS,KAAK,WAAW;IAEzD,MAAM,WAAW,KAAK,cAAc,QAAQ;KAAE,GAAG;KAAS;KAAU,CAAC;IAErE,MAAM,SAAU,KAAK,UAAkB,oBAAoB,UAAU;KACnE,uBAAuB;KACvB,aAAa;KACb,iBAAiB;KAClB,CAAC;IAEF,MAAM,SAAS,MAAM,KAAK,MAAM,SAAS;KACvC,GAAG;KACH,gBAAgB;KAChB,aAAa,cAAc,IAAI,cAAc;KAC7C,OAAO;KACP,OAAO;KACP,WAAW,cAAc;KAC1B,CAAC;IAGF,MAAM,cAAc,OAAO,UAAU,OAAO,MAAM,OAAO,UAAU,MAAM,UAAU;IAGnF,MAAM,eAAe,OAAO,MAAM,MAAM,CAAC,aAAa,KAAK,CAAC;AAK5D,cAJgB,KAAK,UAAU,aAAa,cAAc,EACxD,qBAAqB,MACtB,CAAC,CAEgB,MAAM;AAGxB,QAAI,QAAQ,aAAa,CAAC,SAAS,YAAY,EAAE;KAC/C,MAAM,QAAQ,QAAQ,MAAM,4BAA4B;AACxD,SAAI,MACF,WAAU,MAAM,GAAG,MAAM;;cAGpB,KAAK,WAAW;IAEzB,MAAM,kBAAkB,KAAK,aAAa,QAAQ;KAAE,GAAG;KAAS;KAAU,CAAC;IAE3E,MAAM,SAAS,MAAM,KAAK,UAAU,iBAAiB;KACnD,gBAAgB;KAChB;KACA,OAAO;KACP,OAAO;KACP,WAAW,cAAc;KACzB,kBAAkB;KACnB,CAAC;AAGF,QAAI,MAAM,QAAQ,OAAO,IAAI,OAAO,IAAI;KACtC,MAAM,SAAS,OAAO;AACtB,SAAI,MAAM,QAAQ,OAAO,eAAe,CAEtC,WADa,OAAO,eAAe,GAAG,GAAG,EACzB,WAAW;SAE3B,WAAU,OAAO,kBAAkB;;SAIvC,OAAM,IAAI,MAAM,kBAAkB;GAIpC,MAAM,YADU,YAAY,KAAK,GACL;AAE5B,aAAU,KAAK,YAAY,QAAQ;GAInC,MAAM,EAAE,UAAU,cAAc,aAAa,KAAK,cAAc,QAAQ;GAGxE,MAAM,gBAAgB,WAAW,eAAe;GAEhD,MAAM,kBAAkB,KAAK,KAAK,SAAS,SAAS,EAAE;AAGtD,QAAK,MAAM;AACX,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,WAAY,KAAK,MAAM,YAAY,KAAK,MAAM,YAAa;AAEtE,UAAO;IACL,MAAM;IACN,UAAU;IACV;IACA,iBAAkB,kBAAkB,YAAa;IACjD;IACA,cAAc;IACd,UAAU;IACV,QAAQ;IACT;WACM,QAAQ;AACf,UAAO;IACL,MAAM;IACN,iBAAiB;IACjB,iBAAiB;IACjB,WAAW,YAAY,KAAK,GAAG;IAC/B,cAAc;IACd,UAAU;IACV,QAAQ;IACT;;;;;;;;;CAUL,OAAO,OACL,QACA,UAA2B,EAAE,EACoB;AACjD,MAAI,CAAC,KAAK,UAAU,CAClB,OAAM,KAAK,UAAU,KAAK,OAAO,SAAS,aAAa;EAGzD,MAAM,YAAY,YAAY,KAAK;AAGnC,MAAI,KAAK,eAAe;GACtB,IAAI,WAAW;GACf,MAAMK,aAAuB,EAAE;GAC/B,IAAIC,cAAuD;GAC3D,IAAI,OAAO;GAGX,MAAM,kBAAkB,KAAK,cAC1B,SAAS,QAAQ;IAChB,GAAG;IACH,UAAU,UAAU;AAClB,iBAAY,MAAM;AAClB,SAAI,aAAa;AACf,kBAAY,MAAM,KAAK;AACvB,oBAAc;WAEd,YAAW,KAAK,MAAM,KAAK;;IAGhC,CAAC,CACD,WAAW;AACV,WAAO;AACP,QAAI,YACF,aAAY,KAAK;KAEnB,CACD,OAAO,QAAQ;AACd,WAAO;AACP,QAAI,YACF,aAAY,KAAK;AAEnB,UAAM;KACN;AAGJ,UAAO,CAAC,QAAQ,WAAW,SAAS,EAClC,KAAI,WAAW,SAAS,GAAG;IACzB,MAAM,QAAQ,WAAW,OAAO;AAChC,UAAM;AACN,YAAQ,UAAU,MAAM;cACf,CAAC,MAAM;IAChB,MAAM,QAAQ,MAAM,IAAI,SAAwB,YAAY;AAC1D,mBAAc;MACd;AACF,QAAI,OAAO;AACT,WAAM;AACN,aAAQ,UAAU,MAAM;;;AAK9B,SAAM;GAEN,MAAM,EAAE,UAAU,cAAc,aAAa,KAAK,cAAc,SAAS;GACzE,MAAM,kBAAkB,KAAK,KAAK,SAAS,SAAS,EAAE;GACtD,MAAM,YAAY,YAAY,KAAK,GAAG;AAEtC,UAAO;IACL,MAAM;IACN,UAAU,QAAQ,WAAW,eAAe;IAC5C;IACA;IACA,iBAAkB,kBAAkB,YAAa;IACjD,cAAc;IACf;;EAIH,MAAM,SAAS,MAAM,KAAK,YAAY,QAAQ,QAAQ;EAItD,MAAM,QAAQ,OAAO,QAAQ,MAAM,QAAQ;AAC3C,OAAK,MAAM,QAAQ,MACjB,KAAI,MAAM;AACR,SAAM;AACN,WAAQ,UAAU,KAAK;;AAI3B,SAAO,OAAO;;;;;CAMhB,MAAc,YACZ,QACA,UAA2B,EAAE,EACyB;EACtD,MAAM,EAAE,YAAY,KAAK,cAAc,IAAK,OAAO,IAAK,OAAO,IAAI,WAAW,UAAU;EAExF,MAAM,YAAY,YAAY,KAAK;EACnC,MAAM,kBAAkB,KAAK,aAAa,QAAQ;GAAE,GAAG;GAAS;GAAU,CAAC;AAE3E,MAAI;GACF,MAAM,SAAS,MAAM,KAAK,UAAW,iBAAiB;IACpD,gBAAgB;IAChB;IACA,OAAO;IACP,OAAO;IACP,WAAW,cAAc;IACzB,kBAAkB;IACnB,CAAC;GAGF,MAAM,YADU,YAAY,KAAK,GACL;GAG5B,IAAI,UAAU;AACd,OAAI,MAAM,QAAQ,OAAO,IAAI,OAAO,IAAI;IACtC,MAAM,SAAS,OAAO;AACtB,QAAI,MAAM,QAAQ,OAAO,eAAe,CAEtC,WADa,OAAO,eAAe,GAAG,GAAG,EACzB,WAAW;QAE3B,WAAU,OAAO,kBAAkB;;AAIvC,aAAU,KAAK,YAAY,QAAQ;GACnC,MAAM,EAAE,UAAU,cAAc,aAAa,KAAK,cAAc,QAAQ;GACxE,MAAM,gBAAgB,WAAW,eAAe;GAChD,MAAM,kBAAkB,KAAK,KAAK,SAAS,SAAS,EAAE;AAGtD,QAAK,MAAM;AACX,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,aAAa;AACxB,QAAK,MAAM,WAAY,KAAK,MAAM,YAAY,KAAK,MAAM,YAAa;AAEtE,UAAO;IACL;IACA,QAAQ;KACN,MAAM;KACN,UAAU;KACV;KACA,iBAAkB,kBAAkB,YAAa;KACjD;KACA,cAAc;KACd,UAAU;KACV,QAAQ;KACT;IACF;WACM,QAAQ;AACf,UAAO;IACL,SAAS;IACT,QAAQ;KACN,MAAM;KACN,iBAAiB;KACjB,iBAAiB;KACjB,WAAW,YAAY,KAAK,GAAG;KAC/B,cAAc;KACd,UAAU;KACV,QAAQ;KACT;IACF;;;;;;CAWL,MAAM,KAAQ,QAAgB,SAAqC;EACjE,MAAM,EAAE,QAAQ,UAAU,GAAG,cAAc,OAAQ;EAEnD,MAAM,eAAe;;wCAEe,KAAK,UAAU,gBAAgB,OAAO,CAAC;AAE3E,OAAK,IAAI,UAAU,GAAG,UAAU,SAAS,WAAW;GAClD,MAAM,SAAS,MAAM,KAAK,SAAS,QAAQ;IACzC,QAAQ,QAAQ,UAAU;IAC1B;IACA,WAAW;IACZ,CAAC;AAEF,OAAI;IAEF,MAAM,UAAU,YAAY,OAAO,KAAK;IACxC,MAAM,SAAS,KAAK,MAAM,QAAQ;AAElC,WADkB,OAAO,MAAM,OAAO;YAE/B,OAAO;AACd,QAAI,YAAY,UAAU,EACxB,OAAM,IAAI,MAAM,uCAAuC,QAAQ,aAAa,QAAQ;;;AAK1F,QAAM,IAAI,MAAM,gCAAgC;;;;;CAUlD,MAAM,MAAM,MAAc,UAAwB,EAAE,EAAwB;AAC1E,MAAI,CAAC,KAAK,SAGR,MAAK,WAAY,MAAMN,WAAS,sBADlB,QAAQ,SAAS,0BAC6B;EAG9D,MAAM,YAAY,YAAY,KAAK;EACnC,MAAM,SAAS,MAAM,KAAK,SAAS,MAAM;GACvC,SAAS;GACT,WAAW,QAAQ,cAAc;GAClC,CAAC;AAIF,SAAO;GACL,QAHa,MAAM,KAAK,OAAO,KAAqB;GAIpD;GACA,WAAW,YAAY,KAAK,GAAG;GAChC;;;;;CAMH,MAAM,WAAW,OAAiB,UAAwB,EAAE,EAA0B;EACpF,MAAMO,UAAyB,EAAE;AACjC,OAAK,MAAM,QAAQ,MACjB,SAAQ,KAAK,MAAM,KAAK,MAAM,MAAM,QAAQ,CAAC;AAE/C,SAAO;;;;;CAUT,WAAyB;AACvB,SAAO,EAAE,GAAG,KAAK,OAAO;;;;;CAM1B,UAAsB;AACpB,SAAO;GACL,SAAS;GACT,OAAO,KAAK;GACZ,QAAQ;IACN,SAAS;IACT,KAAK;IACL,MAAM;IACN,QAAQ,KAAK,UAAU,GAAG,UAAU;IACrC;GACD,SAAS;IACP,KAAK,KAAK,aAAa,iBAAiB;IACxC,MAAM;IACN,WAAW,KAAK,aAAa,iBAAiB;IAC/C;GACD,OAAO;IACL,UAAU;IACV,MAAM;IACN,YAAY;IACb;GACF;;;;;CAMH,aAAmB;AACjB,OAAK,QAAQ;GACX,SAAS;GACT,UAAU;GACV,WAAW;GACX,UAAU;GACV,WAAW;GACX,WAAW;GACX,aAAa;GACd;;;;;CAUH,MAAM,UAAyB;AAE7B,MAAI,KAAK,eAAe;AACtB,OAAI;AACF,UAAM,KAAK,cAAc,SAAS;WAC5B;AAGR,QAAK,gBAAgB;;AAGvB,MAAI,KAAK,WAAW;AAClB,OAAI,OAAQ,KAAK,UAAkB,YAAY,WAC7C,KAAI;AACF,UAAO,KAAK,UAAkB,SAAS;WACjC;AAIV,QAAK,YAAY;;AAEnB,MAAI,KAAK,UAAU;AACjB,OAAI,OAAQ,KAAK,SAAiB,YAAY,WAC5C,KAAI;AACF,UAAO,KAAK,SAAiB,SAAS;WAChC;AAIV,QAAK,WAAW;;AAElB,OAAK,eAAe;AACpB,OAAK,cAAc;;CAOrB,AAAQ,aAAa,QAAgB,SAAkC;EACrE,MAAM,SAAS,QAAQ,UAAU;EACjC,MAAM,SAAS,KAAK,cAAc,SAAS,OAAO;AAElD,MAAI,QAAQ,YAAY,KAAK,aAAa,iBAExC,QAAO,uBADa,GAAG,OAAO,iHACY,gCAAgC,OAAO;AAGnF,MAAI,OACF,QAAO,uBAAuB,OAAO,gCAAgC,OAAO;AAG9E,SAAO,uBAAuB,OAAO,gCAAgC,OAAO;;CAG9E,AAAQ,cACN,QACA,SAC0C;EAC1C,MAAM,SAAS,QAAQ,UAAU;EACjC,MAAMC,WAAqD,EAAE;AAI7D,WAAS,KAAK;GAAE,MAAM;GAAU,SAAS;GAAQ,CAAC;AAClD,WAAS,KAAK;GAAE,MAAM;GAAQ,SAAS;GAAQ,CAAC;AAEhD,SAAO;;CAGT,AAAQ,cAAc,MAGpB;EAEA,MAAM,QAAQ,KAAK,MAAM,6BAA6B;AACtD,MAAI,MAGF,QAAO;GAAE,UAFQ,MAAM,GAAG,MAAM;GAEb,UADF,KAAK,QAAQ,4BAA4B,GAAG,CAAC,MAAM;GACvC;EAI/B,MAAM,gBAAgB,KAAK,MAAM,oBAAoB;AACrD,MAAI,eAAe;GACjB,MAAM,WAAW,cAAc,GAAG,MAAM;GACxC,MAAM,WAAW,KAAK,QAAQ,mBAAmB,GAAG,CAAC,MAAM;AAC3D,UAAO;IAAE,UAAU,YAAY;IAAW;IAAU;;AAKtD,SAAO,EAAE,UADQ,KAAK,QAAQ,eAAe,GAAG,CAAC,MAAM,EACpC;;CAGrB,AAAQ,YAAY,MAAsB;AACxC,SACE,KACG,QAAQ,iBAAiB,GAAG,CAC5B,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,oBAAoB,GAAG,CAC/B,QAAQ,UAAU,GAAG,CAErB,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,kBAAkB,GAAG,CAC7B,QAAQ,wBAAwB,GAAG,CACnC,QAAQ,uBAAuB,GAAG,CAElC,QAAQ,mCAAmC,GAAG,CAC9C,MAAM"}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import { _ as SystemInfo, a as GenerateOptions, d as LoadOptions, f as ModelConfig, g as SessionStats, n as EmbedOptions, o as GenerateResult, r as EmbedResult, s as GerbilConfig, u as JsonOptions } from "./types-BS1N92Jt.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/core/gerbil.d.ts
|
|
4
|
+
|
|
5
|
+
declare class Gerbil {
|
|
6
|
+
private generator;
|
|
7
|
+
private tokenizer;
|
|
8
|
+
private model;
|
|
9
|
+
private embedder;
|
|
10
|
+
private currentModel;
|
|
11
|
+
private modelConfig;
|
|
12
|
+
private readonly config;
|
|
13
|
+
private stats;
|
|
14
|
+
private useDirect;
|
|
15
|
+
private chromeBackend;
|
|
16
|
+
private _deviceMode;
|
|
17
|
+
constructor(config?: GerbilConfig);
|
|
18
|
+
static listModels(): ModelConfig[];
|
|
19
|
+
static getModel(modelId: string): ModelConfig | undefined;
|
|
20
|
+
/**
|
|
21
|
+
* Load a model
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```ts
|
|
25
|
+
* // Built-in model
|
|
26
|
+
* await g.loadModel("qwen3-0.6b");
|
|
27
|
+
*
|
|
28
|
+
* // HuggingFace model
|
|
29
|
+
* await g.loadModel("hf:microsoft/Phi-3-mini");
|
|
30
|
+
*
|
|
31
|
+
* // Local model
|
|
32
|
+
* await g.loadModel("file:./models/my-model");
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
loadModel(modelId?: string, options?: LoadOptions): Promise<void>;
|
|
36
|
+
/**
|
|
37
|
+
* Check if a model is loaded
|
|
38
|
+
*/
|
|
39
|
+
isLoaded(): boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Get current model info
|
|
42
|
+
*/
|
|
43
|
+
getModelInfo(): ModelConfig | null;
|
|
44
|
+
/**
|
|
45
|
+
* Get current device mode (webgpu, cpu, or wasm)
|
|
46
|
+
*/
|
|
47
|
+
getDeviceMode(): "webgpu" | "cpu" | "wasm";
|
|
48
|
+
/**
|
|
49
|
+
* Get dtype used for current model
|
|
50
|
+
*/
|
|
51
|
+
getDtype(): string;
|
|
52
|
+
/**
|
|
53
|
+
* Get Chrome backend status (if using WebGPU via Chrome)
|
|
54
|
+
*/
|
|
55
|
+
getChromeStatus(): {
|
|
56
|
+
pid: number | null;
|
|
57
|
+
port: number;
|
|
58
|
+
modelId: string;
|
|
59
|
+
startedAt: Date | null;
|
|
60
|
+
} | null;
|
|
61
|
+
/**
|
|
62
|
+
* Get Chrome memory usage (if using WebGPU via Chrome)
|
|
63
|
+
* Returns JS heap memory in bytes
|
|
64
|
+
*/
|
|
65
|
+
getChromeMemory(): Promise<{
|
|
66
|
+
jsHeapUsed: number;
|
|
67
|
+
jsHeapTotal: number;
|
|
68
|
+
} | null>;
|
|
69
|
+
/**
|
|
70
|
+
* Get memory usage in GB (if using WebGPU via Chrome)
|
|
71
|
+
*/
|
|
72
|
+
getMemoryUsage(): Promise<{
|
|
73
|
+
usedGB: number;
|
|
74
|
+
totalGB: number;
|
|
75
|
+
usedPercent: number;
|
|
76
|
+
} | null>;
|
|
77
|
+
/**
|
|
78
|
+
* Clear KV cache to free memory
|
|
79
|
+
* This will reset the conversation context but free up memory
|
|
80
|
+
*/
|
|
81
|
+
clearCache(): Promise<void>;
|
|
82
|
+
/**
|
|
83
|
+
* Check memory usage and cleanup if needed
|
|
84
|
+
* @param thresholdGB Memory threshold in GB (default: 8)
|
|
85
|
+
* @returns true if cleanup was performed
|
|
86
|
+
*/
|
|
87
|
+
checkMemoryAndCleanup(thresholdGB?: number): Promise<boolean>;
|
|
88
|
+
/**
|
|
89
|
+
* Generate text
|
|
90
|
+
*/
|
|
91
|
+
generate(prompt: string, options?: GenerateOptions): Promise<GenerateResult>;
|
|
92
|
+
/**
|
|
93
|
+
* Stream text generation (simulated token-by-token)
|
|
94
|
+
*
|
|
95
|
+
* Note: Yields the raw output including <think> tags if thinking mode is enabled.
|
|
96
|
+
* The final result has parsed thinking separated out.
|
|
97
|
+
*/
|
|
98
|
+
stream(prompt: string, options?: GenerateOptions): AsyncGenerator<string, GenerateResult, unknown>;
|
|
99
|
+
/**
|
|
100
|
+
* Internal: Generate with raw text access for streaming
|
|
101
|
+
*/
|
|
102
|
+
private generateRaw;
|
|
103
|
+
/**
|
|
104
|
+
* Generate structured JSON output
|
|
105
|
+
*/
|
|
106
|
+
json<T>(prompt: string, options: JsonOptions<T>): Promise<T>;
|
|
107
|
+
/**
|
|
108
|
+
* Generate embeddings
|
|
109
|
+
*/
|
|
110
|
+
embed(text: string, options?: EmbedOptions): Promise<EmbedResult>;
|
|
111
|
+
/**
|
|
112
|
+
* Generate embeddings for multiple texts
|
|
113
|
+
*/
|
|
114
|
+
embedBatch(texts: string[], options?: EmbedOptions): Promise<EmbedResult[]>;
|
|
115
|
+
/**
|
|
116
|
+
* Get session stats
|
|
117
|
+
*/
|
|
118
|
+
getStats(): SessionStats;
|
|
119
|
+
/**
|
|
120
|
+
* Get system info
|
|
121
|
+
*/
|
|
122
|
+
getInfo(): SystemInfo;
|
|
123
|
+
/**
|
|
124
|
+
* Reset stats
|
|
125
|
+
*/
|
|
126
|
+
resetStats(): void;
|
|
127
|
+
/**
|
|
128
|
+
* Dispose of resources
|
|
129
|
+
*/
|
|
130
|
+
dispose(): Promise<void>;
|
|
131
|
+
private formatPrompt;
|
|
132
|
+
private buildMessages;
|
|
133
|
+
private parseThinking;
|
|
134
|
+
private cleanOutput;
|
|
135
|
+
}
|
|
136
|
+
//#endregion
|
|
137
|
+
export { Gerbil as t };
|
|
138
|
+
//# sourceMappingURL=gerbil-DZ1k3ChC.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gerbil-DZ1k3ChC.d.mts","names":[],"sources":["../src/core/gerbil.ts"],"sourcesContent":[],"mappings":";;;;AA6HsB,cAbT,MAAA,CAaS;EAiBC,QAAA,SAAA;EAIa,QAAA,SAAA;EAuBe,QAAA,KAAA;EAAmB,QAAA,QAAA;EAgKpD,QAAA,YAAA;EA0BH,QAAA,WAAA;EAYY,iBAAA,MAAA;EAUD,QAAA,KAAA;EAWJ,QAAA,SAAA;EAW0B,QAAA,aAAA;EAcN,QAAA,WAAA;EAA+B,WAAA,CAAA,MAAA,CAAA,EAhSnD,YAgSmD;EAAR,OAAA,UAAA,CAAA,CAAA,EA/Q1C,WA+Q0C,EAAA;EAqKpD,OAAA,QAAA,CAAA,OAAA,EAAA,MAAA,CAAA,EAhbuB,WAgbvB,GAAA,SAAA;EACe;;;;;;;;;;;;;;;EAgTF,SAAA,CAAA,OAAA,CAAA,EAAA,MAAA,EAAA,OAAA,CAAA,EA1sByB,WA0sBzB,CAAA,EA1sB4C,OA0sB5C,CAAA,IAAA,CAAA;;;;;;;;kBA1iBR;;;;;;;;;;;;;;;;eA0BH;;;;;;qBAYY;;;;;;;oBAUD;;;;;;;;;gBAWJ;;;;;;+CAW0B;;;;qCAcN,kBAAuB,QAAQ;;;;;;;mCAqK5D,kBACR,uBAAuB;;;;;;;;mCA+Ka,YAAY,KAAK,QAAQ;;;;gCAqC7B,eAAoB,QAAQ;;;;wCAyBpB,eAAoB,QAAQ;;;;cAe3D;;;;aAOD;;;;;;;;aA6CM"}
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import { _ as SystemInfo, a as GenerateOptions, c as GerbilModelSettings, d as LoadOptions, f as ModelConfig, g as SessionStats, h as ProgressInfo, i as FallbackConfig, l as GerbilProviderSettings, m as ModelStats, n as EmbedOptions, o as GenerateResult, p as ModelSource, r as EmbedResult, s as GerbilConfig, t as CacheConfig, u as JsonOptions } from "./types-BS1N92Jt.mjs";
|
|
2
|
+
import { n as listBuiltinModels, r as resolveModel, t as BUILTIN_MODELS } from "./models-De2-_GmQ.mjs";
|
|
3
|
+
import { t as Gerbil } from "./gerbil-DZ1k3ChC.mjs";
|
|
4
|
+
import { Server } from "http";
|
|
5
|
+
|
|
6
|
+
//#region src/core/chrome-backend.d.ts
|
|
7
|
+
|
|
8
|
+
type CachedModelEntry = {
|
|
9
|
+
modelId: string;
|
|
10
|
+
downloadedAt: string;
|
|
11
|
+
lastUsed: string;
|
|
12
|
+
sizeBytes?: number;
|
|
13
|
+
};
|
|
14
|
+
/** Get list of models cached in Chrome's IndexedDB */
|
|
15
|
+
declare function getChromeCachedModels(): CachedModelEntry[];
|
|
16
|
+
/** Refresh sizes for cached models that don't have them */
|
|
17
|
+
declare function refreshCachedModelSizes(): Promise<void>;
|
|
18
|
+
type ChromeBackendOptions = {
|
|
19
|
+
/** Custom Chrome executable path */
|
|
20
|
+
chromePath?: string;
|
|
21
|
+
/** Model ID to load */
|
|
22
|
+
modelId?: string;
|
|
23
|
+
/** Progress callback */
|
|
24
|
+
onProgress?: (info: {
|
|
25
|
+
status: string;
|
|
26
|
+
progress?: number;
|
|
27
|
+
file?: string;
|
|
28
|
+
}) => void;
|
|
29
|
+
/** Token callback for streaming */
|
|
30
|
+
onToken?: (token: {
|
|
31
|
+
text: string;
|
|
32
|
+
state: string;
|
|
33
|
+
numTokens: number;
|
|
34
|
+
tps: number;
|
|
35
|
+
}) => void;
|
|
36
|
+
};
|
|
37
|
+
type GenerateOptions$1 = {
|
|
38
|
+
maxTokens?: number;
|
|
39
|
+
temperature?: number;
|
|
40
|
+
topP?: number;
|
|
41
|
+
topK?: number;
|
|
42
|
+
thinking?: boolean;
|
|
43
|
+
system?: string;
|
|
44
|
+
onToken?: (token: {
|
|
45
|
+
text: string;
|
|
46
|
+
state: string;
|
|
47
|
+
numTokens: number;
|
|
48
|
+
tps: number;
|
|
49
|
+
}) => void;
|
|
50
|
+
};
|
|
51
|
+
declare class ChromeGPUBackend {
|
|
52
|
+
private browser;
|
|
53
|
+
private page;
|
|
54
|
+
private cdp;
|
|
55
|
+
private serverPort;
|
|
56
|
+
private userDataDir;
|
|
57
|
+
private readonly modelId;
|
|
58
|
+
private isReady;
|
|
59
|
+
private readonly messageHandlers;
|
|
60
|
+
private pendingRejects;
|
|
61
|
+
server: Server | null;
|
|
62
|
+
private constructor();
|
|
63
|
+
/**
|
|
64
|
+
* Create and initialize a Chrome GPU backend
|
|
65
|
+
*/
|
|
66
|
+
static create(options?: ChromeBackendOptions): Promise<ChromeGPUBackend>;
|
|
67
|
+
/**
|
|
68
|
+
* Get existing browser or launch a new one (singleton pattern)
|
|
69
|
+
* Multiple Gerbil instances share the same browser process
|
|
70
|
+
*/
|
|
71
|
+
private getOrCreateBrowser;
|
|
72
|
+
/**
|
|
73
|
+
* Launch a new Chrome browser instance
|
|
74
|
+
*/
|
|
75
|
+
private launchBrowser;
|
|
76
|
+
/**
|
|
77
|
+
* Launch Chrome and initialize the worker page
|
|
78
|
+
*/
|
|
79
|
+
private launch;
|
|
80
|
+
/**
|
|
81
|
+
* Handle incoming messages from the page
|
|
82
|
+
*/
|
|
83
|
+
private handleMessage;
|
|
84
|
+
/**
|
|
85
|
+
* Wait for a specific message type
|
|
86
|
+
*/
|
|
87
|
+
private waitForMessage;
|
|
88
|
+
/**
|
|
89
|
+
* Check if Chrome backend is still alive
|
|
90
|
+
*/
|
|
91
|
+
isAlive(): boolean;
|
|
92
|
+
/**
|
|
93
|
+
* Get Chrome backend status information
|
|
94
|
+
*/
|
|
95
|
+
getStatus(): {
|
|
96
|
+
pid: number | null;
|
|
97
|
+
port: number;
|
|
98
|
+
modelId: string;
|
|
99
|
+
startedAt: Date | null;
|
|
100
|
+
};
|
|
101
|
+
/**
|
|
102
|
+
* Get Chrome memory usage via CDP Performance metrics
|
|
103
|
+
* Returns memory in bytes or null if unavailable
|
|
104
|
+
*/
|
|
105
|
+
getMemoryUsage(): Promise<{
|
|
106
|
+
jsHeapUsed: number;
|
|
107
|
+
jsHeapTotal: number;
|
|
108
|
+
} | null>;
|
|
109
|
+
/**
|
|
110
|
+
* Check memory usage and auto-cleanup if threshold exceeded
|
|
111
|
+
* @param thresholdGB Memory threshold in GB (default: 8)
|
|
112
|
+
* @returns true if cleanup was performed
|
|
113
|
+
*/
|
|
114
|
+
checkMemoryAndCleanup(thresholdGB?: number): Promise<boolean>;
|
|
115
|
+
/**
|
|
116
|
+
* Get memory usage in a human-readable format
|
|
117
|
+
*/
|
|
118
|
+
getMemoryStats(): Promise<{
|
|
119
|
+
usedGB: number;
|
|
120
|
+
totalGB: number;
|
|
121
|
+
usedPercent: number;
|
|
122
|
+
} | null>;
|
|
123
|
+
/**
|
|
124
|
+
* Generate text with streaming
|
|
125
|
+
*/
|
|
126
|
+
generate(prompt: string, options?: GenerateOptions$1): Promise<string>;
|
|
127
|
+
/**
|
|
128
|
+
* Interrupt current generation
|
|
129
|
+
*/
|
|
130
|
+
interrupt(): Promise<void>;
|
|
131
|
+
/**
|
|
132
|
+
* Reset conversation cache
|
|
133
|
+
*/
|
|
134
|
+
reset(): Promise<void>;
|
|
135
|
+
/**
|
|
136
|
+
* Check if backend is ready
|
|
137
|
+
*/
|
|
138
|
+
ready(): boolean;
|
|
139
|
+
/**
|
|
140
|
+
* Start or reuse the global HTTP server
|
|
141
|
+
* Uses singleton pattern to prevent killing our own server
|
|
142
|
+
*/
|
|
143
|
+
private startServer;
|
|
144
|
+
/**
|
|
145
|
+
* Dispose of the backend and clean up
|
|
146
|
+
* Note: We keep the shared browser running for other backends
|
|
147
|
+
*/
|
|
148
|
+
dispose(): Promise<void>;
|
|
149
|
+
/**
|
|
150
|
+
* Reject all pending waits (called on browser disconnect or dispose)
|
|
151
|
+
*/
|
|
152
|
+
private rejectPendingWaits;
|
|
153
|
+
/**
|
|
154
|
+
* Clear the model cache (forces re-download on next start)
|
|
155
|
+
*/
|
|
156
|
+
static clearCache(): void;
|
|
157
|
+
/**
|
|
158
|
+
* Get the number of active Chrome pages
|
|
159
|
+
*/
|
|
160
|
+
static getActivePageCount(): number;
|
|
161
|
+
/**
|
|
162
|
+
* Get memory usage info for all active pages
|
|
163
|
+
*/
|
|
164
|
+
static getMemoryInfo(): {
|
|
165
|
+
activePagesCount: number;
|
|
166
|
+
maxPages: number;
|
|
167
|
+
};
|
|
168
|
+
/**
|
|
169
|
+
* Gracefully close the shared browser (call on process exit)
|
|
170
|
+
*/
|
|
171
|
+
static closeSharedBrowser(): Promise<void>;
|
|
172
|
+
}
|
|
173
|
+
//#endregion
|
|
174
|
+
//#region src/core/one-liner.d.ts
|
|
175
|
+
interface GerbilOptions extends GenerateOptions {
|
|
176
|
+
model?: string;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Generate text with zero setup
|
|
180
|
+
*
|
|
181
|
+
* @example
|
|
182
|
+
* ```ts
|
|
183
|
+
* const text = await gerbil("Write a haiku");
|
|
184
|
+
* const text = await gerbil("Explain this", { model: "qwen3-0.6b", thinking: true });
|
|
185
|
+
* ```
|
|
186
|
+
*/
|
|
187
|
+
declare function gerbil(prompt: string, options?: GerbilOptions): Promise<string>;
|
|
188
|
+
/**
|
|
189
|
+
* Generate text (returns full result)
|
|
190
|
+
*/
|
|
191
|
+
declare function generate(prompt: string, options?: GerbilOptions): Promise<GenerateResult>;
|
|
192
|
+
/**
|
|
193
|
+
* Stream text generation
|
|
194
|
+
*/
|
|
195
|
+
declare function stream(prompt: string, options?: GerbilOptions): AsyncGenerator<string, GenerateResult, unknown>;
|
|
196
|
+
/**
|
|
197
|
+
* Generate structured JSON
|
|
198
|
+
*/
|
|
199
|
+
declare function json<T>(prompt: string, options: JsonOptions<T> & {
|
|
200
|
+
model?: string;
|
|
201
|
+
}): Promise<T>;
|
|
202
|
+
/**
|
|
203
|
+
* Generate embeddings
|
|
204
|
+
*/
|
|
205
|
+
declare function embed(text: string, options?: EmbedOptions & {
|
|
206
|
+
model?: string;
|
|
207
|
+
}): Promise<EmbedResult>;
|
|
208
|
+
/**
|
|
209
|
+
* Generate embeddings for multiple texts
|
|
210
|
+
*/
|
|
211
|
+
declare function embedBatch(texts: string[], options?: EmbedOptions & {
|
|
212
|
+
model?: string;
|
|
213
|
+
}): Promise<EmbedResult[]>;
|
|
214
|
+
/**
|
|
215
|
+
* Dispose singleton instance
|
|
216
|
+
*/
|
|
217
|
+
declare function dispose(): Promise<void>;
|
|
218
|
+
//#endregion
|
|
219
|
+
//#region src/index.d.ts
|
|
220
|
+
declare const VERSION = "0.1.0";
|
|
221
|
+
//#endregion
|
|
222
|
+
export { BUILTIN_MODELS, type CacheConfig, ChromeGPUBackend, type EmbedOptions, type EmbedResult, type FallbackConfig, type GenerateOptions, type GenerateResult, Gerbil, type GerbilConfig, type GerbilModelSettings, type GerbilProviderSettings, type JsonOptions, type LoadOptions, type ModelConfig, type ModelSource, type ModelStats, type ProgressInfo, type SessionStats, type SystemInfo, VERSION, gerbil as default, dispose, embed, embedBatch, generate, getChromeCachedModels, json, listBuiltinModels, refreshCachedModelSizes, resolveModel, stream };
|
|
223
|
+
//# sourceMappingURL=index.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/core/chrome-backend.ts","../src/core/one-liner.ts","../src/index.ts"],"sourcesContent":[],"mappings":";;;;;;;AA+BA,KARK,gBAAA,GAQW;EAuGM,OAAA,EAAA,MAAA;EAgDV,YAAA,EAAA,MAAA;EAWA,QAAA,EAAA,MAAA;EAuTC,SAAA,CAAA,EAAA,MAAA;CAWI;;AASkD,iBA7enD,qBAAA,CAAA,CA6emD,EA7e1B,gBA6e0B,EAAA;;AA+XzB,iBArwBpB,uBAAA,CAAA,CAqwBoB,EArwBO,OAqwBP,CAAA,IAAA,CAAA;AAAuB,KArtBrD,oBAAA,GAqtBqD;EAqD5C;EASJ,UAAA,CAAA,EAAA,MAAA;EA0DE;EAqEkB,OAAA,CAAA,EAAA,MAAA;EAAO;;;;IC9gClC,IAAA,CAAA,EAAA,MAAc;EAaT,CAAA,EAAA,GAAA,IAAM;EAcC;EAEX,OAAA,CAAA,EAAA,CAAA,KAAA,EAAA;IACA,IAAA,EAAA,MAAA;IAAR,KAAA,EAAA,MAAA;IAAO,SAAA,EAAA,MAAA;IASa,GAAA,EAAM,MAAA;EAElB,CAAA,EAAA,GAAA,IAAA;CACe;AAAvB,KD6FS,iBAAA,GC7FT;EAAc,SAAA,CAAA,EAAA,MAAA;EASK,WAAI,CAAA,EAAA,MAAA;EAEH,IAAA,CAAA,EAAA,MAAA;EAAZ,IAAA,CAAA,EAAA,MAAA;EACA,QAAA,CAAA,EAAA,OAAA;EAAR,MAAA,CAAA,EAAA,MAAA;EAAO,OAAA,CAAA,EAAA,CAAA,KAAA,EAAA;IASY,IAAK,EAAA,MAAA;IAEhB,KAAA,EAAA,MAAA;IACA,SAAA,EAAA,MAAA;IAAR,GAAA,EAAA,MAAA;EAAO,CAAA,EAAA,GAAA,IAAA;AASV,CAAA;AAEW,cDiXE,gBAAA,CCjXF;EACA,QAAA,OAAA;EAAR,QAAA,IAAA;EAAO,QAAA,GAAA;EASY,QAAA,UAAO;;;;EC1DhB,iBAAO,eAAA;;UF4aH;;;;;0BASc,uBAA4B,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;eAiSpD;;;;;;oBAoBW;;;;;;;;;+CA8BsB;;;;oBAwBtB;;;;;;;;qCAoBgB,oBAAuB;;;;eAqD5C;;;;WASJ;;;;;;;;;;;;;;aA0DE;;;;;;;;;;;;;;;;;;;;;;;+BAqEkB;;;;UC9gC3B,aAAA,SAAsB,eDkdD,CAAA;EAAoC,KAAA,CAAA,EAAA,MAAA;;;;;;;;;;;iBCrcpD,MAAA,CDigCsB,MAAA,EAAA,MAAA,EAAA,OAAA,CAAA,ECjgCU,aDigCV,CAAA,ECjgC+B,ODigC/B,CAAA,MAAA,CAAA;;;;iBCn/Bf,QAAA,2BAEX,gBACR,QAAQ;AA/DwC;AAiCJ;AA2B/C;AAEW,iBAUY,MAAA,CAVZ,MAAA,EAAA,MAAA,EAAA,OAAA,CAAA,EAYA,aAZA,CAAA,EAaR,cAbQ,CAAA,MAAA,EAae,cAbf,EAAA,OAAA,CAAA;;;;AAUY,iBAYD,IAZO,CAAA,CAAA,CAAA,CAAA,MAAA,EAAA,MAAA,EAAA,OAAA,EAclB,WAdkB,CAcN,CAdM,CAAA,GAAA;EAElB,KAAA,CAAA,EAAA,MAAA;CACe,CAAA,EAYvB,OAZuB,CAYf,CAZe,CAAA;;;AAS1B;AAEuB,iBAUD,KAAA,CAVC,IAAA,EAAA,MAAA,EAAA,OACZ,CADY,EAYZ,YAZY,GAAA;EAAZ,KAAA,CAAA,EAAA,MAAA;CACA,CAAA,EAYR,OAZQ,CAYA,WAZA,CAAA;;;AASX;AAEW,iBAUW,UAAA,CAVX,KAAA,EAAA,MAAA,EAAA,EAAA,OACR,CADQ,EAYA,YAZA,GAAA;EACA,KAAA,CAAA,EAAA,MAAA;CAAR,CAAA,EAYA,OAZA,CAYQ,WAZR,EAAA,CAAA;;AASH;;AAGW,iBASW,OAAA,CAAA,CATX,EASsB,OATtB,CAAA,IAAA,CAAA;;;AAxBD,cCzBG,OAAA,GDyBH,OAAA"}
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { n as getChromeCachedModels, r as refreshCachedModelSizes, t as ChromeGPUBackend } from "./chrome-backend-C5Un08O4.mjs";
|
|
2
|
+
import { a as resolveModel, i as listBuiltinModels, t as BUILTIN_MODELS } from "./models-DKULvhOr.mjs";
|
|
3
|
+
import "./utils-7vXqtq2Q.mjs";
|
|
4
|
+
import { t as Gerbil } from "./gerbil-BfnsFWRE.mjs";
|
|
5
|
+
import { c as stream, i as generate, n as embed, o as json, r as embedBatch, s as one_liner_default, t as dispose } from "./one-liner-BUQR0nqq.mjs";
|
|
6
|
+
|
|
7
|
+
//#region src/index.ts
|
|
8
|
+
var src_default = one_liner_default;
|
|
9
|
+
const VERSION = "0.1.0";
|
|
10
|
+
|
|
11
|
+
//#endregion
|
|
12
|
+
export { BUILTIN_MODELS, ChromeGPUBackend, Gerbil, VERSION, src_default as default, dispose, embed, embedBatch, generate, getChromeCachedModels, json, listBuiltinModels, refreshCachedModelSizes, resolveModel, stream };
|
|
13
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.mjs","names":["gerbil"],"sources":["../src/index.ts"],"sourcesContent":["/**\n * Gerbil - Local LLM inference for Node.js\n *\n * @example Standalone\n * ```ts\n * import { Gerbil } from \"gerbil\";\n *\n * const g = new Gerbil();\n * await g.loadModel(\"qwen3-0.6b\");\n *\n * const result = await g.generate(\"Write a haiku\");\n * console.log(result.text);\n * ```\n *\n * @example One-liner\n * ```ts\n * import gerbil from \"gerbil\";\n *\n * const text = await gerbil(\"Write a haiku\");\n * ```\n *\n * @example AI SDK\n * ```ts\n * import { generateText } from \"ai\";\n * import { gerbil } from \"gerbil/ai\";\n *\n * const { text } = await generateText({\n * model: gerbil(\"qwen3-0.6b\"),\n * prompt: \"Write a haiku\",\n * });\n * ```\n *\n * @example Skills\n * ```ts\n * import { commit, summarize, explain, defineSkill } from \"gerbil/skills\";\n *\n * const msg = await commit({ type: \"conventional\" });\n * const summary = await summarize({ content: document });\n * ```\n */\n\nexport {\n ChromeGPUBackend,\n getChromeCachedModels,\n refreshCachedModelSizes,\n} from \"./core/chrome-backend.js\";\n// Core\nexport { Gerbil } from \"./core/gerbil.js\";\nexport { BUILTIN_MODELS, listBuiltinModels, resolveModel } from \"./core/models.js\";\n\n// One-liner API\nexport { dispose, embed, embedBatch, generate, json, stream } from \"./core/one-liner.js\";\n\nimport gerbil from \"./core/one-liner.js\";\nexport default gerbil;\n\n// Types\nexport type {\n CacheConfig,\n EmbedOptions,\n EmbedResult,\n FallbackConfig,\n // Generation types\n GenerateOptions,\n GenerateResult,\n // Config types\n GerbilConfig,\n // Provider types\n GerbilModelSettings,\n GerbilProviderSettings,\n JsonOptions,\n // Load types\n LoadOptions,\n // Model types\n ModelConfig,\n ModelSource,\n ModelStats,\n ProgressInfo,\n // Stats types\n SessionStats,\n SystemInfo,\n} from \"./core/types.js\";\n\n// Note: Task/Skill types are now in \"gerbil/skills\"\n// import { CommitInput, SummarizeInput, ... } from \"gerbil/skills\";\n\n// Version\nexport const VERSION = \"0.1.0\";\n"],"mappings":";;;;;;;AAsDA,kBAAeA;AAiCf,MAAa,UAAU"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { c as GerbilModelSettings, f as ModelConfig, l as GerbilProviderSettings } from "../types-BS1N92Jt.mjs";
|
|
2
|
+
import { LanguageModelV2, LanguageModelV2CallOptions, LanguageModelV2CallWarning, LanguageModelV2Content, LanguageModelV2FinishReason, LanguageModelV2StreamPart, LanguageModelV2Usage } from "@ai-sdk/provider";
|
|
3
|
+
|
|
4
|
+
//#region src/integrations/ai-sdk.d.ts
|
|
5
|
+
|
|
6
|
+
declare class GerbilLanguageModel implements LanguageModelV2 {
|
|
7
|
+
readonly specificationVersion: "v2";
|
|
8
|
+
readonly provider = "gerbil";
|
|
9
|
+
readonly modelId: string;
|
|
10
|
+
readonly supportedUrls: Record<string, RegExp[]>;
|
|
11
|
+
private instance;
|
|
12
|
+
private readonly settings;
|
|
13
|
+
private readonly providerSettings;
|
|
14
|
+
private loadPromise;
|
|
15
|
+
constructor(modelId: string, settings: GerbilModelSettings, providerSettings: GerbilProviderSettings);
|
|
16
|
+
private ensureLoaded;
|
|
17
|
+
private convertPrompt;
|
|
18
|
+
private mapFinishReason;
|
|
19
|
+
doGenerate(options: LanguageModelV2CallOptions): Promise<{
|
|
20
|
+
content: LanguageModelV2Content[];
|
|
21
|
+
finishReason: LanguageModelV2FinishReason;
|
|
22
|
+
usage: LanguageModelV2Usage;
|
|
23
|
+
request: {
|
|
24
|
+
body: {
|
|
25
|
+
model: string;
|
|
26
|
+
prompt: string;
|
|
27
|
+
};
|
|
28
|
+
};
|
|
29
|
+
warnings: LanguageModelV2CallWarning[];
|
|
30
|
+
}>;
|
|
31
|
+
doStream(options: LanguageModelV2CallOptions): Promise<{
|
|
32
|
+
stream: ReadableStream<LanguageModelV2StreamPart>;
|
|
33
|
+
request: {
|
|
34
|
+
body: {
|
|
35
|
+
model: string;
|
|
36
|
+
prompt: string;
|
|
37
|
+
};
|
|
38
|
+
};
|
|
39
|
+
}>;
|
|
40
|
+
}
|
|
41
|
+
type GerbilProvider = {
|
|
42
|
+
(modelId: string, settings?: GerbilModelSettings): GerbilLanguageModel;
|
|
43
|
+
languageModel(modelId: string, settings?: GerbilModelSettings): GerbilLanguageModel;
|
|
44
|
+
listModels(): ModelConfig[];
|
|
45
|
+
getModel(modelId: string): ModelConfig | undefined;
|
|
46
|
+
};
|
|
47
|
+
/**
|
|
48
|
+
* Create a Gerbil provider
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* ```ts
|
|
52
|
+
* const local = createGerbil({ device: "gpu", dtype: "q4" });
|
|
53
|
+
*
|
|
54
|
+
* const { text } = await generateText({
|
|
55
|
+
* model: local("qwen3-0.6b"),
|
|
56
|
+
* prompt: "Hello",
|
|
57
|
+
* });
|
|
58
|
+
* ```
|
|
59
|
+
*/
|
|
60
|
+
declare function createGerbil(options?: GerbilProviderSettings): GerbilProvider;
|
|
61
|
+
/**
|
|
62
|
+
* Default Gerbil provider
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* ```ts
|
|
66
|
+
* import { generateText } from "ai";
|
|
67
|
+
* import { gerbil } from "gerbil/ai";
|
|
68
|
+
*
|
|
69
|
+
* const { text } = await generateText({
|
|
70
|
+
* model: gerbil("qwen3-0.6b"),
|
|
71
|
+
* prompt: "Hello",
|
|
72
|
+
* });
|
|
73
|
+
* ```
|
|
74
|
+
*/
|
|
75
|
+
declare const gerbil: GerbilProvider;
|
|
76
|
+
//#endregion
|
|
77
|
+
export { GerbilProvider, createGerbil, gerbil as default, gerbil };
|
|
78
|
+
//# sourceMappingURL=ai-sdk.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-sdk.d.mts","names":[],"sources":["../../src/integrations/ai-sdk.ts"],"sourcesContent":[],"mappings":";;;;;cA0CM,mBAAA,YAA+B,eAqIe,CAAA;EArIf,SAAA,oBAAA,EAAA,IAAA;EAAe,SAAA,QAAA,GAAA,QAAA;EAmNxC,SAAA,OAAA,EAAc,MAAA;EACK,SAAA,aAAA,EA9ML,MA8MK,CAAA,MAAA,EA9MU,MA8MV,EAAA,CAAA;EAAsB,QAAA,QAAA;EACT,iBAAA,QAAA;EAAsB,iBAAA,gBAAA;EAClD,QAAA,WAAA;EACa,WAAA,CAAA,OAAA,EAAA,MAAA,EAAA,QAAA,EAxMf,mBAwMe,EAAA,gBAAA,EAvMP,sBAuMO;EAAW,QAAA,YAAA;EAgBxB,QAAA,aAAY;EA4Bf,QAAA,eAAM;sBA7KS,6BAA0B;;;;;;;;;;;;oBA+C5B,6BAA0B;;;;;;;;;;KA8ExC,cAAA;+BACmB,sBAAsB;4CACT,sBAAsB;gBAClD;6BACa;;;;;;;;;;;;;;;iBAgBb,YAAA,WAAsB,yBAA8B;;;;;;;;;;;;;;;cA4BvD,QAAM"}
|