@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +79 -14
  2. package/dist/auto-update-S9s5-g0C.mjs +3 -0
  3. package/dist/browser/index.d.ts +1009 -0
  4. package/dist/browser/index.d.ts.map +1 -0
  5. package/dist/browser/index.js +2492 -0
  6. package/dist/browser/index.js.map +1 -0
  7. package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-CORwaIyC.mjs} +514 -73
  8. package/dist/chrome-backend-CORwaIyC.mjs.map +1 -0
  9. package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-DIKYoWj-.mjs} +1 -1
  10. package/dist/cli.mjs +3359 -647
  11. package/dist/cli.mjs.map +1 -1
  12. package/dist/frameworks/express.d.mts +1 -1
  13. package/dist/frameworks/express.mjs +3 -4
  14. package/dist/frameworks/express.mjs.map +1 -1
  15. package/dist/frameworks/fastify.d.mts +1 -1
  16. package/dist/frameworks/fastify.mjs +2 -3
  17. package/dist/frameworks/fastify.mjs.map +1 -1
  18. package/dist/frameworks/hono.d.mts +1 -1
  19. package/dist/frameworks/hono.mjs +2 -3
  20. package/dist/frameworks/hono.mjs.map +1 -1
  21. package/dist/frameworks/next.d.mts +2 -2
  22. package/dist/frameworks/next.mjs +2 -3
  23. package/dist/frameworks/next.mjs.map +1 -1
  24. package/dist/frameworks/react.d.mts +1 -1
  25. package/dist/frameworks/trpc.d.mts +1 -1
  26. package/dist/frameworks/trpc.mjs +2 -3
  27. package/dist/frameworks/trpc.mjs.map +1 -1
  28. package/dist/gerbil-DJGqq7BX.mjs +4 -0
  29. package/dist/gerbil-DoDGHe6Z.mjs +1631 -0
  30. package/dist/gerbil-DoDGHe6Z.mjs.map +1 -0
  31. package/dist/gerbil-qOTe1nl2.d.mts +431 -0
  32. package/dist/gerbil-qOTe1nl2.d.mts.map +1 -0
  33. package/dist/index.d.mts +411 -9
  34. package/dist/index.d.mts.map +1 -1
  35. package/dist/index.mjs +7 -6
  36. package/dist/index.mjs.map +1 -1
  37. package/dist/integrations/ai-sdk.d.mts +122 -4
  38. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  39. package/dist/integrations/ai-sdk.mjs +238 -11
  40. package/dist/integrations/ai-sdk.mjs.map +1 -1
  41. package/dist/integrations/langchain.d.mts +132 -2
  42. package/dist/integrations/langchain.d.mts.map +1 -1
  43. package/dist/integrations/langchain.mjs +175 -8
  44. package/dist/integrations/langchain.mjs.map +1 -1
  45. package/dist/integrations/llamaindex.d.mts +1 -1
  46. package/dist/integrations/llamaindex.mjs +2 -3
  47. package/dist/integrations/llamaindex.mjs.map +1 -1
  48. package/dist/integrations/mcp-client.mjs +4 -4
  49. package/dist/integrations/mcp-client.mjs.map +1 -1
  50. package/dist/integrations/mcp.d.mts +2 -2
  51. package/dist/integrations/mcp.d.mts.map +1 -1
  52. package/dist/integrations/mcp.mjs +5 -6
  53. package/dist/kokoro-BNTb6egA.mjs +20210 -0
  54. package/dist/kokoro-BNTb6egA.mjs.map +1 -0
  55. package/dist/kokoro-CMOGDSgT.js +20212 -0
  56. package/dist/kokoro-CMOGDSgT.js.map +1 -0
  57. package/dist/{mcp-R8kRLIKb.mjs → mcp-kzDDWIoS.mjs} +10 -37
  58. package/dist/mcp-kzDDWIoS.mjs.map +1 -0
  59. package/dist/microphone-DaMZFRuR.mjs +3 -0
  60. package/dist/{one-liner-BUQR0nqq.mjs → one-liner-DxnNs_JK.mjs} +2 -2
  61. package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-DxnNs_JK.mjs.map} +1 -1
  62. package/dist/repl-DGUw4fCc.mjs +9 -0
  63. package/dist/skills/index.d.mts +305 -14
  64. package/dist/skills/index.d.mts.map +1 -1
  65. package/dist/skills/index.mjs +5 -6
  66. package/dist/skills-DulrOPeP.mjs +1435 -0
  67. package/dist/skills-DulrOPeP.mjs.map +1 -0
  68. package/dist/stt-1WIefHwc.mjs +3 -0
  69. package/dist/stt-CG_7KB_0.mjs +434 -0
  70. package/dist/stt-CG_7KB_0.mjs.map +1 -0
  71. package/dist/stt-Dne6SENv.js +434 -0
  72. package/dist/stt-Dne6SENv.js.map +1 -0
  73. package/dist/{tools-BsiEE6f2.mjs → tools-Bi1P7Xoy.mjs} +6 -7
  74. package/dist/{tools-BsiEE6f2.mjs.map → tools-Bi1P7Xoy.mjs.map} +1 -1
  75. package/dist/transformers.web-DiD1gTwk.js +44695 -0
  76. package/dist/transformers.web-DiD1gTwk.js.map +1 -0
  77. package/dist/transformers.web-u34VxRFM.js +3 -0
  78. package/dist/tts-B1pZMlDv.mjs +3 -0
  79. package/dist/tts-C2FzKuSx.js +725 -0
  80. package/dist/tts-C2FzKuSx.js.map +1 -0
  81. package/dist/tts-CyHhcLtN.mjs +731 -0
  82. package/dist/tts-CyHhcLtN.mjs.map +1 -0
  83. package/dist/types-CiTc7ez3.d.mts +353 -0
  84. package/dist/types-CiTc7ez3.d.mts.map +1 -0
  85. package/dist/{utils-7vXqtq2Q.mjs → utils-CZBZ8dgR.mjs} +1 -1
  86. package/dist/{utils-7vXqtq2Q.mjs.map → utils-CZBZ8dgR.mjs.map} +1 -1
  87. package/docs/ai-sdk.md +137 -21
  88. package/docs/browser.md +241 -2
  89. package/docs/memory.md +72 -0
  90. package/docs/stt.md +494 -0
  91. package/docs/tts.md +569 -0
  92. package/docs/vision.md +396 -0
  93. package/package.json +21 -22
  94. package/dist/auto-update-BbNHbSU1.mjs +0 -3
  95. package/dist/browser/index.d.mts +0 -262
  96. package/dist/browser/index.d.mts.map +0 -1
  97. package/dist/browser/index.mjs +0 -755
  98. package/dist/browser/index.mjs.map +0 -1
  99. package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
  100. package/dist/gerbil-BfnsFWRE.mjs +0 -644
  101. package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
  102. package/dist/gerbil-BjW-z7Fq.mjs +0 -5
  103. package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
  104. package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
  105. package/dist/mcp-R8kRLIKb.mjs.map +0 -1
  106. package/dist/models-DKULvhOr.mjs +0 -136
  107. package/dist/models-DKULvhOr.mjs.map +0 -1
  108. package/dist/models-De2-_GmQ.d.mts +0 -22
  109. package/dist/models-De2-_GmQ.d.mts.map +0 -1
  110. package/dist/skills-D3CEpgDc.mjs +0 -630
  111. package/dist/skills-D3CEpgDc.mjs.map +0 -1
  112. package/dist/types-BS1N92Jt.d.mts +0 -183
  113. package/dist/types-BS1N92Jt.d.mts.map +0 -1
  114. /package/dist/{chunk-Ct1HF2bE.mjs → chunk-CkXuGtQK.mjs} +0 -0
package/docs/vision.md ADDED
@@ -0,0 +1,396 @@
1
+ # Vision Models in Gerbil
2
+
3
+ Gerbil supports **Vision Language Models (VLMs)** like Ministral 3B that can understand and describe images. This guide covers how to use vision capabilities across all Gerbil interfaces.
4
+
5
+ ## Quick Start
6
+
7
+ ```typescript
8
+ import { Gerbil } from "@tryhamster/gerbil";
9
+
10
+ const g = new Gerbil();
11
+ await g.loadModel("ministral-3b"); // Vision-capable model
12
+
13
+ const result = await g.generate("What's in this image?", {
14
+ images: [{ source: "https://example.com/photo.jpg" }]
15
+ });
16
+
17
+ console.log(result.text);
18
+ ```
19
+
20
+ ## Supported Models
21
+
22
+ | Model ID | Vision | Reasoning | Context | Size |
23
+ |----------|--------|-----------|---------|------|
24
+ | `ministral-3b` | ✅ | ✅ | 256K | ~2.5GB |
25
+
26
+ More vision models coming soon as they become available in ONNX format.
27
+
28
+ ## Image Input Types
29
+
30
+ Gerbil accepts images in several formats:
31
+
32
+ ```typescript
33
+ // URL (recommended for web images)
34
+ images: [{ source: "https://example.com/image.jpg" }]
35
+
36
+ // Data URI (base64 encoded)
37
+ images: [{ source: "data:image/png;base64,iVBORw0KGgo..." }]
38
+
39
+ // Local file path (Node.js only, auto-converted to data URI)
40
+ images: [{ source: "/path/to/image.png" }]
41
+
42
+ // With alt text (optional, provides context)
43
+ images: [{ source: "...", alt: "A photo of a sunset" }]
44
+ ```
45
+
46
+ ## Multiple Images
47
+
48
+ You can pass multiple images for comparison or multi-image understanding:
49
+
50
+ ```typescript
51
+ const result = await g.generate("What's the difference between these two images?", {
52
+ images: [
53
+ { source: "https://example.com/before.jpg" },
54
+ { source: "https://example.com/after.jpg" }
55
+ ]
56
+ });
57
+ ```
58
+
59
+ ## Model Capability Detection
60
+
61
+ Check if the loaded model supports vision:
62
+
63
+ ```typescript
64
+ await g.loadModel("ministral-3b");
65
+
66
+ if (g.supportsVision()) {
67
+ // Use vision features
68
+ } else {
69
+ // Text-only mode
70
+ }
71
+ ```
72
+
73
+ ## Graceful Fallback
74
+
75
+ If you pass images to a non-vision model, Gerbil will:
76
+ 1. Log a warning to console
77
+ 2. Ignore the images
78
+ 3. Process the text prompt normally
79
+
80
+ This allows you to write code that works with any model:
81
+
82
+ ```typescript
83
+ // This works with any model - images are used if supported
84
+ const result = await g.generate("Describe this", {
85
+ images: [{ source: imageUrl }]
86
+ });
87
+ ```
88
+
89
+ ---
90
+
91
+ ## AI SDK Integration
92
+
93
+ Use vision models with Vercel AI SDK v5+:
94
+
95
+ ```typescript
96
+ import { generateText } from "ai";
97
+ import { gerbil } from "@tryhamster/gerbil/ai";
98
+
99
+ const { text } = await generateText({
100
+ model: gerbil("ministral-3b"),
101
+ messages: [
102
+ {
103
+ role: "user",
104
+ content: [
105
+ { type: "image", image: new URL("https://example.com/photo.jpg") },
106
+ { type: "text", text: "Describe this image in detail" },
107
+ ],
108
+ },
109
+ ],
110
+ });
111
+ ```
112
+
113
+ ### Image Part Formats
114
+
115
+ The AI SDK integration accepts images in these formats:
116
+
117
+ ```typescript
118
+ // URL object
119
+ { type: "image", image: new URL("https://...") }
120
+
121
+ // URL string
122
+ { type: "image", image: "https://..." }
123
+
124
+ // Base64 string
125
+ { type: "image", image: "data:image/png;base64,..." }
126
+
127
+ // Uint8Array with mime type
128
+ { type: "image", image: imageBytes, mimeType: "image/png" }
129
+ ```
130
+
131
+ ---
132
+
133
+ ## Express & Next.js Integration
134
+
135
+ ### Express
136
+
137
+ ```typescript
138
+ import express from "express";
139
+ import { gerbil } from "@tryhamster/gerbil/express";
140
+
141
+ const app = express();
142
+ app.use("/ai", gerbil({ model: "ministral-3b" })());
143
+
144
+ // POST /ai/generate
145
+ // Body: { prompt: "Describe this", images: [{ source: "https://..." }] }
146
+ ```
147
+
148
+ ### Next.js App Router
149
+
150
+ ```typescript
151
+ // app/api/chat/route.ts
152
+ import { gerbil } from "@tryhamster/gerbil/next";
153
+
154
+ export const POST = gerbil.handler({ model: "ministral-3b" });
155
+
156
+ // Fetch from client:
157
+ // fetch("/api/chat", {
158
+ // method: "POST",
159
+ // body: JSON.stringify({
160
+ // prompt: "What's in this image?",
161
+ // images: [{ source: dataUri }]
162
+ // })
163
+ // })
164
+ ```
165
+
166
+ ---
167
+
168
+ ## React Hooks (Browser)
169
+
170
+ ### useChat with Images
171
+
172
+ ```tsx
173
+ import { useChat } from "@tryhamster/gerbil/browser";
174
+
175
+ function VisionChat() {
176
+ const {
177
+ messages,
178
+ input,
179
+ setInput,
180
+ handleSubmit,
181
+ attachImage,
182
+ attachedImages,
183
+ clearImages,
184
+ sendWithImages,
185
+ } = useChat({ model: "ministral-3b" });
186
+
187
+ const handleFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
188
+ const file = e.target.files?.[0];
189
+ if (file) {
190
+ const reader = new FileReader();
191
+ reader.onload = () => attachImage(reader.result as string);
192
+ reader.readAsDataURL(file);
193
+ }
194
+ };
195
+
196
+ return (
197
+ <div>
198
+ {/* Messages */}
199
+ {messages.map(m => (
200
+ <div key={m.id}>
201
+ {m.images?.map((img, i) => (
202
+ <img key={i} src={img} alt="" className="max-w-xs" />
203
+ ))}
204
+ <p>{m.content}</p>
205
+ </div>
206
+ ))}
207
+
208
+ {/* Image attachment */}
209
+ <input type="file" accept="image/*" onChange={handleFileSelect} />
210
+
211
+ {attachedImages.length > 0 && (
212
+ <div>
213
+ 📎 {attachedImages.length} image(s) attached
214
+ <button onClick={clearImages}>Clear</button>
215
+ </div>
216
+ )}
217
+
218
+ {/* Input */}
219
+ <form onSubmit={handleSubmit}>
220
+ <input
221
+ value={input}
222
+ onChange={e => setInput(e.target.value)}
223
+ placeholder="Describe the image..."
224
+ />
225
+ <button type="submit">Send</button>
226
+ </form>
227
+ </div>
228
+ );
229
+ }
230
+ ```
231
+
232
+ ### Direct Image Send
233
+
234
+ ```tsx
235
+ // Send a message with specific images
236
+ sendWithImages("Compare these two photos", [image1DataUri, image2DataUri]);
237
+ ```
238
+
239
+ ---
240
+
241
+ ## Built-in Vision Skills
242
+
243
+ Gerbil includes pre-built skills for common vision tasks:
244
+
245
+ ### Describe Image
246
+
247
+ ```typescript
248
+ import { describeImage } from "@tryhamster/gerbil/skills";
249
+
250
+ const description = await describeImage({
251
+ image: "https://example.com/photo.jpg",
252
+ focus: "details", // "general" | "details" | "text" | "objects" | "scene"
253
+ format: "bullets", // "paragraph" | "bullets" | "structured"
254
+ });
255
+ ```
256
+
257
+ ### Analyze Screenshot
258
+
259
+ ```typescript
260
+ import { analyzeScreenshot } from "@tryhamster/gerbil/skills";
261
+
262
+ const analysis = await analyzeScreenshot({
263
+ image: screenshotDataUri,
264
+ type: "accessibility", // "ui-review" | "accessibility" | "suggestions" | "qa"
265
+ });
266
+ ```
267
+
268
+ ### Extract from Image
269
+
270
+ ```typescript
271
+ import { extractFromImage } from "@tryhamster/gerbil/skills";
272
+
273
+ const extracted = await extractFromImage({
274
+ image: documentPhoto,
275
+ extract: "text", // "text" | "data" | "code" | "table" | "diagram"
276
+ outputFormat: "markdown", // "raw" | "json" | "markdown"
277
+ });
278
+ ```
279
+
280
+ ### Compare Images
281
+
282
+ ```typescript
283
+ import { compareImages } from "@tryhamster/gerbil/skills";
284
+
285
+ const comparison = await compareImages({
286
+ image1: beforeScreenshot,
287
+ image2: afterScreenshot,
288
+ focus: "differences", // "differences" | "similarities" | "detailed"
289
+ });
290
+ ```
291
+
292
+ ### Caption Image
293
+
294
+ ```typescript
295
+ import { captionImage } from "@tryhamster/gerbil/skills";
296
+
297
+ const caption = await captionImage({
298
+ image: photo,
299
+ style: "descriptive", // "concise" | "descriptive" | "creative" | "funny"
300
+ });
301
+ ```
302
+
303
+ ---
304
+
305
+ ## Performance Tips
306
+
307
+ ### WebGPU Acceleration
308
+
309
+ Vision models benefit significantly from GPU acceleration:
310
+
311
+ ```typescript
312
+ // Node.js: Uses Chrome backend for WebGPU
313
+ await g.loadModel("ministral-3b"); // Auto-detects WebGPU
314
+
315
+ // Browser: Native WebGPU
316
+ await g.loadModel("ministral-3b", { device: "webgpu" });
317
+ ```
318
+
319
+ ### Image Size
320
+
321
+ - Larger images take longer to process
322
+ - Consider resizing before sending to the model
323
+ - 512x512 to 1024x1024 is generally optimal
324
+
325
+ ### Caching
326
+
327
+ The model caches in the browser's IndexedDB (via Chrome backend in Node.js), so subsequent loads are fast.
328
+
329
+ ---
330
+
331
+ ## Troubleshooting
332
+
333
+ ### "Model doesn't support vision"
334
+
335
+ Make sure you're using a vision-capable model like `ministral-3b`.
336
+
337
+ ### Slow image processing
338
+
339
+ - Ensure WebGPU is being used (check `g.getDeviceMode()`)
340
+ - Resize large images before sending
341
+ - In Node.js, the Chrome backend provides GPU acceleration
342
+
343
+ ### Image not loading
344
+
345
+ - Check the URL is accessible
346
+ - For local files, ensure the path is absolute
347
+ - Base64 data URIs must include the mime type prefix
348
+
349
+ ---
350
+
351
+ ## API Reference
352
+
353
+ ### ImageInput
354
+
355
+ ```typescript
356
+ interface ImageInput {
357
+ /** Image source: URL, base64 data URI, or local file path */
358
+ source: string;
359
+ /** Optional alt text for context */
360
+ alt?: string;
361
+ }
362
+ ```
363
+
364
+ ### GenerateOptions (with images)
365
+
366
+ ```typescript
367
+ interface GenerateOptions {
368
+ // ... standard options ...
369
+
370
+ /** Images to include (only used if model supports vision) */
371
+ images?: ImageInput[];
372
+ }
373
+ ```
374
+
375
+ ### supportsVision()
376
+
377
+ ```typescript
378
+ g.supportsVision(): boolean
379
+ ```
380
+
381
+ Returns `true` if the loaded model supports vision input.
382
+
383
+ ### ModelConfig
384
+
385
+ ```typescript
386
+ interface ModelConfig {
387
+ // ... standard properties ...
388
+
389
+ /** Whether model supports vision/image input */
390
+ supportsVision?: boolean;
391
+
392
+ /** Size of vision encoder (if applicable) */
393
+ visionEncoderSize?: string;
394
+ }
395
+ ```
396
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tryhamster/gerbil",
3
- "version": "1.0.0-rc.0",
3
+ "version": "1.0.0-rc.10",
4
4
  "description": "Local LLM inference for Node.js. GPU-accelerated. Zero config. Works standalone or with Vercel AI SDK.",
5
5
  "type": "module",
6
6
  "main": "dist/index.mjs",
@@ -62,32 +62,18 @@
62
62
  "types": "./dist/integrations/mcp-client.d.mts"
63
63
  },
64
64
  "./browser": {
65
- "import": "./dist/browser/index.mjs",
66
- "types": "./dist/browser/index.d.mts"
65
+ "import": "./dist/browser/index.js",
66
+ "types": "./dist/browser/index.d.ts"
67
67
  }
68
68
  },
69
- "scripts": {
70
- "build": "tsdown",
71
- "dev": "tsx src/cli/index.ts",
72
- "typecheck": "tsc --noEmit",
73
- "check": "ultracite check",
74
- "fix": "ultracite fix",
75
- "test": "vitest run",
76
- "test:watch": "vitest",
77
- "prepublishOnly": "pnpm build",
78
- "changeset": "changeset",
79
- "release": "changeset publish",
80
- "prepare": "lefthook install"
81
- },
82
69
  "dependencies": {
83
70
  "@huggingface/hub": "^2.7.1",
84
- "@huggingface/transformers": "^3.8.0",
85
71
  "chalk": "^5.3.0",
86
72
  "cli-progress": "^3.12.0",
87
73
  "commander": "^12.1.0",
88
- "onnxruntime-web": "^1.21.0-dev.20250114-228dd16893",
89
74
  "ora": "^8.0.1",
90
75
  "puppeteer-core": "^24.31.0",
76
+ "react": "^19.0.0",
91
77
  "webgpu": "^0.3.8",
92
78
  "zod": "^3.23.0"
93
79
  },
@@ -101,8 +87,7 @@
101
87
  "hono": ">=4.0.0",
102
88
  "langchain": ">=0.1.0",
103
89
  "llamaindex": ">=0.1.0",
104
- "next": ">=14.0.0",
105
- "react": ">=18.0.0"
90
+ "next": ">=14.0.0"
106
91
  },
107
92
  "peerDependenciesMeta": {
108
93
  "@ai-sdk/provider": {
@@ -142,6 +127,9 @@
142
127
  "devDependencies": {
143
128
  "@ai-sdk/provider": "^2.0.0",
144
129
  "@biomejs/biome": "^2.3.8",
130
+ "@huggingface/transformers": "^3.8.0",
131
+ "kokoro-js": "^1.2.1",
132
+ "onnxruntime-web": "^1.21.0-dev.20250114-228dd16893",
145
133
  "@changesets/changelog-github": "^0.5.1",
146
134
  "@changesets/cli": "^2.28.1",
147
135
  "@types/cli-progress": "^3.11.6",
@@ -158,7 +146,6 @@
158
146
  "ink-spinner": "^5.0.0",
159
147
  "ink-text-input": "^6.0.0",
160
148
  "lefthook": "^2.0.5",
161
- "react": "^19.0.0",
162
149
  "tsdown": "^0.17.0-beta.3",
163
150
  "tsx": "^4.15.0",
164
151
  "typescript": "^5.4.5",
@@ -203,5 +190,17 @@
203
190
  ],
204
191
  "publishConfig": {
205
192
  "access": "public"
193
+ },
194
+ "scripts": {
195
+ "build": "tsdown",
196
+ "dev": "tsx src/cli/index.ts",
197
+ "typecheck": "tsc --noEmit",
198
+ "check": "ultracite check",
199
+ "fix": "ultracite fix",
200
+ "test": "vitest run",
201
+ "test:watch": "vitest",
202
+ "changeset": "changeset",
203
+ "version": "changeset version",
204
+ "release": "pnpm publish && changeset tag"
206
205
  }
207
- }
206
+ }
@@ -1,3 +0,0 @@
1
- import { i as installUpdate, n as checkForUpdate, r as compareVersions, t as CURRENT_VERSION } from "./cli.mjs";
2
-
3
- export { CURRENT_VERSION, checkForUpdate, installUpdate };