npm - @tryhamster/gerbil - Versions diffs - 1.0.0-rc.9 → 1.0.0 - Mend

@tryhamster/gerbil 1.0.0-rc.9 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

package/LICENSE +1 -1
package/README.md +247 -84
package/dist/architectures-C1I5V3Dt.mjs +6070 -0
package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
package/dist/browser/index.d.ts +264 -588
package/dist/browser/index.d.ts.map +1 -1
package/dist/browser/index.js +585 -2334
package/dist/browser/index.js.map +1 -1
package/dist/cli.mjs +625 -1098
package/dist/cli.mjs.map +1 -1
package/dist/defaults-9komdrbY.mjs +24 -0
package/dist/defaults-9komdrbY.mjs.map +1 -0
package/dist/frameworks/express.d.mts +1 -3
package/dist/frameworks/express.d.mts.map +1 -1
package/dist/frameworks/express.mjs +7 -7
package/dist/frameworks/express.mjs.map +1 -1
package/dist/frameworks/fastify.d.mts +1 -1
package/dist/frameworks/fastify.d.mts.map +1 -1
package/dist/frameworks/fastify.mjs +3 -3
package/dist/frameworks/fastify.mjs.map +1 -1
package/dist/frameworks/hono.d.mts +1 -1
package/dist/frameworks/hono.d.mts.map +1 -1
package/dist/frameworks/hono.mjs +4 -4
package/dist/frameworks/hono.mjs.map +1 -1
package/dist/frameworks/next.d.mts +3 -2
package/dist/frameworks/next.d.mts.map +1 -1
package/dist/frameworks/next.mjs +4 -4
package/dist/frameworks/next.mjs.map +1 -1
package/dist/frameworks/react.d.mts +1 -1
package/dist/frameworks/trpc.d.mts +1 -1
package/dist/frameworks/trpc.d.mts.map +1 -1
package/dist/frameworks/trpc.mjs +4 -4
package/dist/frameworks/trpc.mjs.map +1 -1
package/dist/gerbil-BHrJJIa4.mjs +1656 -0
package/dist/gerbil-BHrJJIa4.mjs.map +1 -0
package/dist/gerbil-BT9fCydo.d.mts +488 -0
package/dist/gerbil-BT9fCydo.d.mts.map +1 -0
package/dist/gerbil-DomNfIr1.mjs +4 -0
package/dist/gpu/hooks.d.mts +520 -0
package/dist/gpu/hooks.d.mts.map +1 -0
package/dist/gpu/hooks.mjs +1188 -0
package/dist/gpu/hooks.mjs.map +1 -0
package/dist/gpu/index.d.mts +2 -0
package/dist/gpu/index.mjs +6 -0
package/dist/gpu-33qCAtHW.mjs +3615 -0
package/dist/gpu-33qCAtHW.mjs.map +1 -0
package/dist/index-Dgmb2kE3.d.mts +245 -0
package/dist/index-Dgmb2kE3.d.mts.map +1 -0
package/dist/index-jEAL2s-A.d.mts +2022 -0
package/dist/index-jEAL2s-A.d.mts.map +1 -0
package/dist/index.d.mts +22 -487
package/dist/index.d.mts.map +1 -1
package/dist/index.mjs +13 -8
package/dist/index.mjs.map +1 -1
package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
package/dist/integrations/ai-sdk.d.mts +75 -6
package/dist/integrations/ai-sdk.d.mts.map +1 -1
package/dist/integrations/ai-sdk.mjs +131 -15
package/dist/integrations/ai-sdk.mjs.map +1 -1
package/dist/integrations/langchain.d.mts +1 -1
package/dist/integrations/langchain.d.mts.map +1 -1
package/dist/integrations/langchain.mjs +5 -5
package/dist/integrations/langchain.mjs.map +1 -1
package/dist/integrations/llamaindex.d.mts +1 -1
package/dist/integrations/llamaindex.d.mts.map +1 -1
package/dist/integrations/llamaindex.mjs +5 -5
package/dist/integrations/llamaindex.mjs.map +1 -1
package/dist/integrations/mcp-client.mjs +3 -3
package/dist/integrations/mcp-client.mjs.map +1 -1
package/dist/integrations/mcp.d.mts +3 -2
package/dist/integrations/mcp.d.mts.map +1 -1
package/dist/integrations/mcp.mjs +5 -5
package/dist/{mcp-BvbriaBy.mjs → mcp-1DaMsaBc.mjs} +4 -4
package/dist/mcp-1DaMsaBc.mjs.map +1 -0
package/dist/memory/index.d.mts +3 -0
package/dist/memory/index.mjs +6 -0
package/dist/memory-D1P7Tmda.mjs +4 -0
package/dist/memory-DVN0MnIG.mjs +132 -0
package/dist/memory-DVN0MnIG.mjs.map +1 -0
package/dist/memory-Dj0J1v88.mjs +294 -0
package/dist/memory-Dj0J1v88.mjs.map +1 -0
package/dist/moonshine-stt-BLyVoRpB.mjs +4 -0
package/dist/moonshine-stt-v_P_Ci_m.mjs +11936 -0
package/dist/moonshine-stt-v_P_Ci_m.mjs.map +1 -0
package/dist/{one-liner-s-lD8rCC.mjs → one-liner-DnQn7HJK.mjs} +14 -16
package/dist/one-liner-DnQn7HJK.mjs.map +1 -0
package/dist/repl-jV5gcJFA.mjs +9 -0
package/dist/skills/index.d.mts +270 -320
package/dist/skills/index.d.mts.map +1 -1
package/dist/skills/index.mjs +5 -5
package/dist/{skills-CD3Orlex.mjs → skills-DX8D59UH.mjs} +187 -32
package/dist/skills-DX8D59UH.mjs.map +1 -0
package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
package/dist/tools-DQ1mPUw5.mjs.map +1 -0
package/dist/{types-CiTc7ez3.d.mts → types-D6FiR_oh.d.mts} +106 -12
package/dist/types-D6FiR_oh.d.mts.map +1 -0
package/dist/types-DQBe2lFo.d.mts +165 -0
package/dist/types-DQBe2lFo.d.mts.map +1 -0
package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
package/dist/vector-B0panuy6.mjs +95 -0
package/dist/vector-B0panuy6.mjs.map +1 -0
package/docs/PROJECT-STATE.md +321 -0
package/docs/adding-a-model-family.md +280 -0
package/docs/ai-sdk.md +70 -61
package/docs/architecture/overview.md +17 -7
package/docs/browser.md +203 -8
package/docs/embeddings.md +156 -0
package/docs/gerbil-site-native-migration.md +217 -0
package/docs/gpu-engine/architectures.md +398 -0
package/docs/gpu-engine/ir.md +372 -0
package/docs/gpu-engine/kernels.md +718 -0
package/docs/gpu-engine/paper.html +1759 -0
package/docs/gpu-engine/paper.md +2109 -0
package/docs/gpu-engine/safetensors.md +312 -0
package/docs/gpu-engine/tokenizer.md +302 -0
package/docs/memory-rag.md +91 -0
package/docs/metal-safari-intel.md +190 -0
package/docs/mobile-failure-diagnosis.md +124 -0
package/docs/mobile.md +99 -0
package/docs/observability.md +230 -0
package/docs/onnx-removal-plan.md +339 -0
package/docs/research/autoresearch-portable.md +904 -0
package/docs/research/dispatch-reduction-hivemind.md +84 -0
package/docs/research/ios-safari-model-caching.md +117 -0
package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
package/docs/research/native-stt-model-selection.md +49 -0
package/docs/research/native-tts-model-selection.md +90 -0
package/docs/research/native-vs-chromium-decision.md +152 -0
package/docs/research/nemotron-mamba2-inference.md +910 -0
package/docs/research/qwen35-multimodal.md +293 -0
package/docs/research/qwen36-gemma4-targets.md +337 -0
package/docs/research/sota-embedding-models.md +179 -0
package/docs/research/sota-mobile-models-2026.md +263 -0
package/docs/research/sota-modality-models.md +202 -0
package/docs/research/tps-baselines.md +71 -0
package/docs/research/webgpu-m4-reference.md +104 -0
package/docs/site-update-plan.md +155 -0
package/docs/structured-output.md +123 -0
package/docs/stt.md +63 -446
package/docs/tts.md +77 -499
package/docs/vision.md +100 -338
package/package.json +22 -7
package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
package/dist/gerbil-CJ3ifloF.mjs +0 -4
package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
package/dist/gerbil-qOTe1nl2.d.mts +0 -431
package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
package/dist/kokoro-BNTb6egA.mjs +0 -20210
package/dist/kokoro-BNTb6egA.mjs.map +0 -1
package/dist/kokoro-CMOGDSgT.js +0 -20212
package/dist/kokoro-CMOGDSgT.js.map +0 -1
package/dist/mcp-BvbriaBy.mjs.map +0 -1
package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
package/dist/repl-DveXw36T.mjs +0 -9
package/dist/skills-CD3Orlex.mjs.map +0 -1
package/dist/stt-Bu-E23Sc.js +0 -433
package/dist/stt-Bu-E23Sc.js.map +0 -1
package/dist/stt-CpLYbGFd.mjs +0 -433
package/dist/stt-CpLYbGFd.mjs.map +0 -1
package/dist/stt-DRPLEEHB.mjs +0 -3
package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
package/dist/transformers.web-DiD1gTwk.js +0 -44695
package/dist/transformers.web-DiD1gTwk.js.map +0 -1
package/dist/transformers.web-u34VxRFM.js +0 -3
package/dist/tts-CqroPaSK.js +0 -724
package/dist/tts-CqroPaSK.js.map +0 -1
package/dist/tts-DXgsKGCe.mjs +0 -3
package/dist/tts-DeGANMNV.mjs +0 -730
package/dist/tts-DeGANMNV.mjs.map +0 -1
package/dist/types-CiTc7ez3.d.mts.map +0 -1
/package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
/package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
/package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0

package/docs/vision.md CHANGED Viewed

@@ -1,396 +1,158 @@
-# Vision Models in Gerbil
+# Vision (Image → Text)
-Gerbil supports **Vision Language Models (VLMs)** like Ministral 3B that can understand and describe images. This guide covers how to use vision capabilities across all Gerbil interfaces.
+Gerbil's native WebGPU engine runs **vision language models** image-in → text-out. Two
+vision towers are supported natively, both with the same `describeImage` API:
-## Quick Start
-```typescript
-import { Gerbil } from "@tryhamster/gerbil";
-const g = new Gerbil();
-await g.loadModel("ministral-3b"); // Vision-capable model
-const result = await g.generate("What's in this image?", {
-  images: [{ source: "https://example.com/photo.jpg" }]
-});
-console.log(result.text);
-```
-## Supported Models
-| Model ID | Vision | Reasoning | Context | Size |
-|----------|--------|-----------|---------|------|
-| `ministral-3b` | ✅ | ✅ | 256K | ~2.5GB |
-More vision models coming soon as they become available in ONNX format.
-## Image Input Types
-Gerbil accepts images in several formats:
-```typescript
-// URL (recommended for web images)
-images: [{ source: "https://example.com/image.jpg" }]
-// Data URI (base64 encoded)
-images: [{ source: "data:image/png;base64,iVBORw0KGgo..." }]
-// Local file path (Node.js only, auto-converted to data URI)
-images: [{ source: "/path/to/image.png" }]
-// With alt text (optional, provides context)
-images: [{ source: "...", alt: "A photo of a sunset" }]
-```
-## Multiple Images
-You can pass multiple images for comparison or multi-image understanding:
-```typescript
-const result = await g.generate("What's the difference between these two images?", {
-  images: [
-    { source: "https://example.com/before.jpg" },
-    { source: "https://example.com/after.jpg" }
-  ]
-});
-```
-## Model Capability Detection
-Check if the loaded model supports vision:
+- **Qwen3.5 ViT** — bundled with `Qwen/Qwen3.5-0.8B` (the 0.8B text model's own built-in
+  vision tower). Bit-exact vs HuggingFace transformers.
+- **Gemma 4 ViT** — bundled with `mlx-community/gemma-4-e2b-it-4bit`.
-```typescript
-await g.loadModel("ministral-3b");
+There is no separate "vision model" — vision is the text model's own ViT, loaded on
+demand with `enableVision: true`.
-if (g.supportsVision()) {
-  // Use vision features
-} else {
-  // Text-only mode
-}
-```
+> **Pre-1.0.** This is the native engine surface. The legacy `Gerbil` class (ONNX /
+> transformers.js) once exposed an `images:` array on `generate()`; that path is retired and
+> not documented here.
-## Graceful Fallback
+## Quick Start
-If you pass images to a non-vision model, Gerbil will:
-1. Log a warning to console
-2. Ignore the images
-3. Process the text prompt normally
+### Node
-This allows you to write code that works with any model:
+In Node you decode the image to RGB pixels yourself (HWC layout, 0..255), then pass
+`{ pixels, width, height }`. The engine handles smart-resize, normalization, and patchify
+internally to match the HF image processor.
 ```typescript
-// This works with any model - images are used if supported
-const result = await g.generate("Describe this", {
-  images: [{ source: imageUrl }]
-});
-```
----
+import { WebGPUEngine } from "@tryhamster/gerbil/gpu";
-## AI SDK Integration
-Use vision models with Vercel AI SDK v5+:
-```typescript
-import { generateText } from "ai";
-import { gerbil } from "@tryhamster/gerbil/ai";
-const { text } = await generateText({
-  model: gerbil("ministral-3b"),
-  messages: [
-    {
-      role: "user",
-      content: [
-        { type: "image", image: new URL("https://example.com/photo.jpg") },
-        { type: "text", text: "Describe this image in detail" },
-      ],
-    },
-  ],
+const engine = await WebGPUEngine.create({
+  repo: "Qwen/Qwen3.5-0.8B", // BF16 checkpoint ships the ViT weights
+  enableVision: true,
 });
-```
-### Image Part Formats
-The AI SDK integration accepts images in these formats:
-```typescript
-// URL object
-{ type: "image", image: new URL("https://...") }
+// pixels: Uint8ClampedArray | Uint8Array | Float32Array, RGB, length = width*height*3
+const { text, tokensPerSecond } = await engine.describeImage(
+  { pixels, width, height },
+  "What's in this image?",
+  { maxTokens: 150 },
+);
-// URL string
-{ type: "image", image: "https://..." }
-// Base64 string
-{ type: "image", image: "data:image/png;base64,..." }
-// Uint8Array with mime type
-{ type: "image", image: imageBytes, mimeType: "image/png" }
+console.log(text, `(${tokensPerSecond.toFixed(1)} tok/s)`);
+engine.destroy();
 ```
----
-## Express & Next.js Integration
-### Express
-```typescript
-import express from "express";
-import { gerbil } from "@tryhamster/gerbil/express";
-const app = express();
-app.use("/ai", gerbil({ model: "ministral-3b" })());
-// POST /ai/generate
-// Body: { prompt: "Describe this", images: [{ source: "https://..." }] }
-```
-### Next.js App Router
-```typescript
-// app/api/chat/route.ts
-import { gerbil } from "@tryhamster/gerbil/next";
-export const POST = gerbil.handler({ model: "ministral-3b" });
-// Fetch from client:
-// fetch("/api/chat", {
-//   method: "POST",
-//   body: JSON.stringify({
-//     prompt: "What's in this image?",
-//     images: [{ source: dataUri }]
-//   })
-// })
-```
+### React (Browser)
----
+The React hook decodes URLs and data-URLs for you, so you can pass an image source string
+directly. Load with `enableVision: true`.
-## React Hooks (Browser)
+```tsx
+import { useEngine } from "@tryhamster/gerbil/gpu/hooks";
-### useChat with Images
+function ImageDescriber() {
+  const { describeImage, completion, isLoading, isGenerating } = useEngine({
+    model: "Qwen/Qwen3.5-0.8B",
+    enableVision: true,
+    autoLoad: true,
+  });
-```tsx
-import { useChat } from "@tryhamster/gerbil/browser";
-function VisionChat() {
-  const {
-    messages,
-    input,
-    setInput,
-    handleSubmit,
-    attachImage,
-    attachedImages,
-    clearImages,
-    sendWithImages,
-  } = useChat({ model: "ministral-3b" });
-  const handleFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
-    const file = e.target.files?.[0];
-    if (file) {
-      const reader = new FileReader();
-      reader.onload = () => attachImage(reader.result as string);
-      reader.readAsDataURL(file);
-    }
-  };
+  if (isLoading) return <div>Loading vision model…</div>;
   return (
     <div>
-      {/* Messages */}
-      {messages.map(m => (
-        <div key={m.id}>
-          {m.images?.map((img, i) => (
-            <img key={i} src={img} alt="" className="max-w-xs" />
-          ))}
-          <p>{m.content}</p>
-        </div>
-      ))}
-      {/* Image attachment */}
-      <input type="file" accept="image/*" onChange={handleFileSelect} />
-      {attachedImages.length > 0 && (
-        <div>
-          📎 {attachedImages.length} image(s) attached
-          <button onClick={clearImages}>Clear</button>
-        </div>
-      )}
-      {/* Input */}
-      <form onSubmit={handleSubmit}>
-        <input
-          value={input}
-          onChange={e => setInput(e.target.value)}
-          placeholder="Describe the image..."
-        />
-        <button type="submit">Send</button>
-      </form>
+      <button
+        disabled={isGenerating}
+        onClick={() =>
+          describeImage("https://example.com/photo.jpg", "Describe this image in detail")
+        }
+      >
+        Describe
+      </button>
+      <p>{completion}</p>
     </div>
   );
 }
 ```
-### Direct Image Send
+`describeImage` accepts either an image source string (data URL or http(s) URL) or
+pre-decoded `{ pixels, width, height }`. The hook caps the longest side at ~448px before
+decoding — ViT attention memory scales with patch-count², so this keeps mobile coherent.
-```tsx
-// Send a message with specific images
-sendWithImages("Compare these two photos", [image1DataUri, image2DataUri]);
-```
+## Supported Towers
----
+| Tower | Load from | dtype | Notes |
+|-------|-----------|-------|-------|
+| Qwen3.5 ViT | `Qwen/Qwen3.5-0.8B` (`enableVision: true`) | BF16 (ships ViT) | Bit-exact vs HF (per-token cosine 1.000000) |
+| Gemma 4 ViT | `mlx-community/gemma-4-e2b-it-4bit` (`enableVision: true`) | MLX 4-bit | Native projector + multimodal-embedder norms |
-## Built-in Vision Skills
+> The MLX 4-bit Qwen3.5 repo (`mlx-community/Qwen3.5-0.8B-4bit`) is text-only — load the
+> BF16 `Qwen/Qwen3.5-0.8B` repo to get the ViT weights for vision.
-Gerbil includes pre-built skills for common vision tasks:
+## How it works
-### Describe Image
+`describeImage` runs the full image-in → text-out pipeline natively:
-```typescript
-import { describeImage } from "@tryhamster/gerbil/skills";
+1. **Preprocess** — pixels are smart-resized, normalized, and patchified to match the HF
+   image processor (`preprocessImage` / `preprocessImageGemma4`).
+2. **Encode** — the ViT turns patches into merged image-embedding tokens (`encodeImage`).
+3. **Splice** — image tokens are scattered into the `image_token_id` rows of the text
+   sequence (`EmbedSplice` in the multimodal graph).
+4. **Decode** — Qwen3.5 uses multimodal M-RoPE positions; Gemma 4 uses standard sequential
+   1D RoPE. The LM then generates the description.
-const description = await describeImage({
-  image: "https://example.com/photo.jpg",
-  focus: "details", // "general" | "details" | "text" | "objects" | "scene"
-  format: "bullets", // "paragraph" | "bullets" | "structured"
-});
-```
+The low-level pieces are exposed if you need them: `engine.encodeImage(patches, gridTHW)`
+returns the merged image tokens, and `engine.hasVision` reports whether the engine was
+built with a ViT.
-### Analyze Screenshot
+## Image input
 ```typescript
-import { analyzeScreenshot } from "@tryhamster/gerbil/skills";
+// Pre-decoded RGB pixels (Node, or browser if you've already decoded)
+await engine.describeImage({ pixels, width, height }, prompt);
-const analysis = await analyzeScreenshot({
-  image: screenshotDataUri,
-  type: "accessibility", // "ui-review" | "accessibility" | "suggestions" | "qa"
-});
+// Already-built patch tensor + grid (skips host preprocessing — for reference parity)
+await engine.describeImage({ patches, gridTHW: [1, gridH, gridW] }, prompt);
 ```
-### Extract from Image
-```typescript
-import { extractFromImage } from "@tryhamster/gerbil/skills";
+In React, the hook also accepts a source string:
-const extracted = await extractFromImage({
-  image: documentPhoto,
-  extract: "text", // "text" | "data" | "code" | "table" | "diagram"
-  outputFormat: "markdown", // "raw" | "json" | "markdown"
-});
+```tsx
+describeImage("data:image/png;base64,iVBOR…", "What is this?");
+describeImage("https://example.com/photo.jpg", "Describe it"); // must be CORS-accessible
 ```
-### Compare Images
+## Options
-```typescript
-import { compareImages } from "@tryhamster/gerbil/skills";
-const comparison = await compareImages({
-  image1: beforeScreenshot,
-  image2: afterScreenshot,
-  focus: "differences", // "differences" | "similarities" | "detailed"
-});
-```
-### Caption Image
+`describeImage(image, prompt?, options?)` takes the standard generation options:
 ```typescript
-import { captionImage } from "@tryhamster/gerbil/skills";
-const caption = await captionImage({
-  image: photo,
-  style: "descriptive", // "concise" | "descriptive" | "creative" | "funny"
+await engine.describeImage(image, "Describe this image.", {
+  maxTokens: 150,
+  sampling: { temperature: 0.7 },
+  stopSequences: ["\n\n"],
+  onToken: (t) => process.stdout.write(t),
 });
 ```
----
+Returns a `GenerateResult`: `{ text, tokensGenerated, tokensPerSecond, totalTime, finishReason }`.
 ## Performance Tips
-### WebGPU Acceleration
-Vision models benefit significantly from GPU acceleration:
-```typescript
-// Node.js: Uses Chrome backend for WebGPU
-await g.loadModel("ministral-3b"); // Auto-detects WebGPU
-// Browser: Native WebGPU
-await g.loadModel("ministral-3b", { device: "webgpu" });
-```
-### Image Size
-- Larger images take longer to process
-- Consider resizing before sending to the model
-- 512x512 to 1024x1024 is generally optimal
-### Caching
-The model caches in the browser's IndexedDB (via Chrome backend in Node.js), so subsequent loads are fast.
----
+- **Resize before sending.** ViT attention cost grows with the square of the patch count.
+  The React hook already caps the longest side at 448px; in Node, downscale large photos
+  (≈448–512px longest side) before decoding to pixels.
+- **Mobile.** On iOS/iPadOS the engine reserves fewer vision patches by default to stay
+  under the WebKit GPU watchdog and memory budget; very large images may still need a
+  smaller `maxVisionPatches` (a `WebGPUEngine.create` option).
 ## Troubleshooting
-### "Model doesn't support vision"
-Make sure you're using a vision-capable model like `ministral-3b`.
-### Slow image processing
-- Ensure WebGPU is being used (check `g.getDeviceMode()`)
-- Resize large images before sending
-- In Node.js, the Chrome backend provides GPU acceleration
-### Image not loading
-- Check the URL is accessible
-- For local files, ensure the path is absolute
-- Base64 data URIs must include the mime type prefix
----
-## API Reference
-### ImageInput
-```typescript
-interface ImageInput {
-  /** Image source: URL, base64 data URI, or local file path */
-  source: string;
-  /** Optional alt text for context */
-  alt?: string;
-}
-```
-### GenerateOptions (with images)
+### "describeImage() requires a vision encoder"
-```typescript
-interface GenerateOptions {
-  // ... standard options ...
-  /** Images to include (only used if model supports vision) */
-  images?: ImageInput[];
-}
-```
+Load with `{ enableVision: true }` on a vision-capable checkpoint (Qwen3.5 BF16 or Gemma 4).
+The text-only MLX-4bit Qwen3.5 repo does not ship the ViT weights.
-### supportsVision()
-```typescript
-g.supportsVision(): boolean
-```
-Returns `true` if the loaded model supports vision input.
-### ModelConfig
-```typescript
-interface ModelConfig {
-  // ... standard properties ...
-  /** Whether model supports vision/image input */
-  supportsVision?: boolean;
-  /** Size of vision encoder (if applicable) */
-  visionEncoderSize?: string;
-}
-```
+### Out of memory on mobile
+Use a smaller image and/or lower `maxVisionPatches` at create time. WebKit kills the page
+content process around 1.5–2 GB.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tryhamster/gerbil",
-  "version": "1.0.0-rc.9",
+  "version": "1.0.0",
   "description": "Local LLM inference for Node.js. GPU-accelerated. Zero config. Works standalone or with Vercel AI SDK.",
   "type": "module",
   "main": "dist/index.mjs",
@@ -17,6 +17,10 @@
       "import": "./dist/skills/index.mjs",
       "types": "./dist/skills/index.d.mts"
     },
+    "./memory": {
+      "import": "./dist/memory/index.mjs",
+      "types": "./dist/memory/index.d.mts"
+    },
     "./ai": {
       "import": "./dist/integrations/ai-sdk.mjs",
       "types": "./dist/integrations/ai-sdk.d.mts"
@@ -64,6 +68,18 @@
     "./browser": {
       "import": "./dist/browser/index.js",
       "types": "./dist/browser/index.d.ts"
+    },
+    "./gpu": {
+      "import": "./dist/gpu/index.mjs",
+      "types": "./dist/gpu/index.d.mts"
+    },
+    "./hooks": {
+      "import": "./dist/gpu/hooks.mjs",
+      "types": "./dist/gpu/hooks.d.mts"
+    },
+    "./gpu/hooks": {
+      "import": "./dist/gpu/hooks.mjs",
+      "types": "./dist/gpu/hooks.d.mts"
     }
   },
   "dependencies": {
@@ -72,10 +88,10 @@
     "cli-progress": "^3.12.0",
     "commander": "^12.1.0",
     "ora": "^8.0.1",
-    "puppeteer-core": "^24.31.0",
+    "p-queue": "^9.0.1",
     "react": "^19.0.0",
-    "webgpu": "^0.3.8",
-    "zod": "^3.23.0"
+    "webgpu": "^0.4.0",
+    "zod": "^4.3.6"
   },
   "peerDependencies": {
     "@ai-sdk/provider": ">=2.0.0",
@@ -127,9 +143,6 @@
   "devDependencies": {
     "@ai-sdk/provider": "^2.0.0",
     "@biomejs/biome": "^2.3.8",
-    "@huggingface/transformers": "^3.8.0",
-    "kokoro-js": "^1.2.1",
-    "onnxruntime-web": "^1.21.0-dev.20250114-228dd16893",
     "@changesets/changelog-github": "^0.5.1",
     "@changesets/cli": "^2.28.1",
     "@types/cli-progress": "^3.11.6",
@@ -137,7 +150,9 @@
     "@types/ink-big-text": "^1.2.4",
     "@types/node": "^20.14.0",
     "@types/react": "^19.0.0",
+    "@webgpu/types": "^0.1.69",
     "ai": "^5.0.0",
+    "esbuild": "^0.27.3",
     "express": "^4.19.0",
     "ink": "^6.5.1",
     "ink-big-text": "^2.0.0",