@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -14
- package/dist/auto-update-S9s5-g0C.mjs +3 -0
- package/dist/browser/index.d.ts +1009 -0
- package/dist/browser/index.d.ts.map +1 -0
- package/dist/browser/index.js +2492 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-CORwaIyC.mjs} +514 -73
- package/dist/chrome-backend-CORwaIyC.mjs.map +1 -0
- package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-DIKYoWj-.mjs} +1 -1
- package/dist/cli.mjs +3359 -647
- package/dist/cli.mjs.map +1 -1
- package/dist/frameworks/express.d.mts +1 -1
- package/dist/frameworks/express.mjs +3 -4
- package/dist/frameworks/express.mjs.map +1 -1
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.mjs +2 -3
- package/dist/frameworks/fastify.mjs.map +1 -1
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.mjs +2 -3
- package/dist/frameworks/hono.mjs.map +1 -1
- package/dist/frameworks/next.d.mts +2 -2
- package/dist/frameworks/next.mjs +2 -3
- package/dist/frameworks/next.mjs.map +1 -1
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.mjs +2 -3
- package/dist/frameworks/trpc.mjs.map +1 -1
- package/dist/gerbil-DJGqq7BX.mjs +4 -0
- package/dist/gerbil-DoDGHe6Z.mjs +1631 -0
- package/dist/gerbil-DoDGHe6Z.mjs.map +1 -0
- package/dist/gerbil-qOTe1nl2.d.mts +431 -0
- package/dist/gerbil-qOTe1nl2.d.mts.map +1 -0
- package/dist/index.d.mts +411 -9
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +7 -6
- package/dist/index.mjs.map +1 -1
- package/dist/integrations/ai-sdk.d.mts +122 -4
- package/dist/integrations/ai-sdk.d.mts.map +1 -1
- package/dist/integrations/ai-sdk.mjs +238 -11
- package/dist/integrations/ai-sdk.mjs.map +1 -1
- package/dist/integrations/langchain.d.mts +132 -2
- package/dist/integrations/langchain.d.mts.map +1 -1
- package/dist/integrations/langchain.mjs +175 -8
- package/dist/integrations/langchain.mjs.map +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.mjs +2 -3
- package/dist/integrations/llamaindex.mjs.map +1 -1
- package/dist/integrations/mcp-client.mjs +4 -4
- package/dist/integrations/mcp-client.mjs.map +1 -1
- package/dist/integrations/mcp.d.mts +2 -2
- package/dist/integrations/mcp.d.mts.map +1 -1
- package/dist/integrations/mcp.mjs +5 -6
- package/dist/kokoro-BNTb6egA.mjs +20210 -0
- package/dist/kokoro-BNTb6egA.mjs.map +1 -0
- package/dist/kokoro-CMOGDSgT.js +20212 -0
- package/dist/kokoro-CMOGDSgT.js.map +1 -0
- package/dist/{mcp-R8kRLIKb.mjs → mcp-kzDDWIoS.mjs} +10 -37
- package/dist/mcp-kzDDWIoS.mjs.map +1 -0
- package/dist/microphone-DaMZFRuR.mjs +3 -0
- package/dist/{one-liner-BUQR0nqq.mjs → one-liner-DxnNs_JK.mjs} +2 -2
- package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-DxnNs_JK.mjs.map} +1 -1
- package/dist/repl-DGUw4fCc.mjs +9 -0
- package/dist/skills/index.d.mts +305 -14
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +5 -6
- package/dist/skills-DulrOPeP.mjs +1435 -0
- package/dist/skills-DulrOPeP.mjs.map +1 -0
- package/dist/stt-1WIefHwc.mjs +3 -0
- package/dist/stt-CG_7KB_0.mjs +434 -0
- package/dist/stt-CG_7KB_0.mjs.map +1 -0
- package/dist/stt-Dne6SENv.js +434 -0
- package/dist/stt-Dne6SENv.js.map +1 -0
- package/dist/{tools-BsiEE6f2.mjs → tools-Bi1P7Xoy.mjs} +6 -7
- package/dist/{tools-BsiEE6f2.mjs.map → tools-Bi1P7Xoy.mjs.map} +1 -1
- package/dist/transformers.web-DiD1gTwk.js +44695 -0
- package/dist/transformers.web-DiD1gTwk.js.map +1 -0
- package/dist/transformers.web-u34VxRFM.js +3 -0
- package/dist/tts-B1pZMlDv.mjs +3 -0
- package/dist/tts-C2FzKuSx.js +725 -0
- package/dist/tts-C2FzKuSx.js.map +1 -0
- package/dist/tts-CyHhcLtN.mjs +731 -0
- package/dist/tts-CyHhcLtN.mjs.map +1 -0
- package/dist/types-CiTc7ez3.d.mts +353 -0
- package/dist/types-CiTc7ez3.d.mts.map +1 -0
- package/dist/{utils-7vXqtq2Q.mjs → utils-CZBZ8dgR.mjs} +1 -1
- package/dist/{utils-7vXqtq2Q.mjs.map → utils-CZBZ8dgR.mjs.map} +1 -1
- package/docs/ai-sdk.md +137 -21
- package/docs/browser.md +241 -2
- package/docs/memory.md +72 -0
- package/docs/stt.md +494 -0
- package/docs/tts.md +569 -0
- package/docs/vision.md +396 -0
- package/package.json +21 -22
- package/dist/auto-update-BbNHbSU1.mjs +0 -3
- package/dist/browser/index.d.mts +0 -262
- package/dist/browser/index.d.mts.map +0 -1
- package/dist/browser/index.mjs +0 -755
- package/dist/browser/index.mjs.map +0 -1
- package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
- package/dist/gerbil-BfnsFWRE.mjs +0 -644
- package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
- package/dist/gerbil-BjW-z7Fq.mjs +0 -5
- package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
- package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
- package/dist/mcp-R8kRLIKb.mjs.map +0 -1
- package/dist/models-DKULvhOr.mjs +0 -136
- package/dist/models-DKULvhOr.mjs.map +0 -1
- package/dist/models-De2-_GmQ.d.mts +0 -22
- package/dist/models-De2-_GmQ.d.mts.map +0 -1
- package/dist/skills-D3CEpgDc.mjs +0 -630
- package/dist/skills-D3CEpgDc.mjs.map +0 -1
- package/dist/types-BS1N92Jt.d.mts +0 -183
- package/dist/types-BS1N92Jt.d.mts.map +0 -1
- /package/dist/{chunk-Ct1HF2bE.mjs → chunk-CkXuGtQK.mjs} +0 -0
package/docs/vision.md
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
# Vision Models in Gerbil
|
|
2
|
+
|
|
3
|
+
Gerbil supports **Vision Language Models (VLMs)** like Ministral 3B that can understand and describe images. This guide covers how to use vision capabilities across all Gerbil interfaces.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
import { Gerbil } from "@tryhamster/gerbil";
|
|
9
|
+
|
|
10
|
+
const g = new Gerbil();
|
|
11
|
+
await g.loadModel("ministral-3b"); // Vision-capable model
|
|
12
|
+
|
|
13
|
+
const result = await g.generate("What's in this image?", {
|
|
14
|
+
images: [{ source: "https://example.com/photo.jpg" }]
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
console.log(result.text);
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Supported Models
|
|
21
|
+
|
|
22
|
+
| Model ID | Vision | Reasoning | Context | Size |
|
|
23
|
+
|----------|--------|-----------|---------|------|
|
|
24
|
+
| `ministral-3b` | ✅ | ✅ | 256K | ~2.5GB |
|
|
25
|
+
|
|
26
|
+
More vision models coming soon as they become available in ONNX format.
|
|
27
|
+
|
|
28
|
+
## Image Input Types
|
|
29
|
+
|
|
30
|
+
Gerbil accepts images in several formats:
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
// URL (recommended for web images)
|
|
34
|
+
images: [{ source: "https://example.com/image.jpg" }]
|
|
35
|
+
|
|
36
|
+
// Data URI (base64 encoded)
|
|
37
|
+
images: [{ source: "data:image/png;base64,iVBORw0KGgo..." }]
|
|
38
|
+
|
|
39
|
+
// Local file path (Node.js only, auto-converted to data URI)
|
|
40
|
+
images: [{ source: "/path/to/image.png" }]
|
|
41
|
+
|
|
42
|
+
// With alt text (optional, provides context)
|
|
43
|
+
images: [{ source: "...", alt: "A photo of a sunset" }]
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Multiple Images
|
|
47
|
+
|
|
48
|
+
You can pass multiple images for comparison or multi-image understanding:
|
|
49
|
+
|
|
50
|
+
```typescript
|
|
51
|
+
const result = await g.generate("What's the difference between these two images?", {
|
|
52
|
+
images: [
|
|
53
|
+
{ source: "https://example.com/before.jpg" },
|
|
54
|
+
{ source: "https://example.com/after.jpg" }
|
|
55
|
+
]
|
|
56
|
+
});
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Model Capability Detection
|
|
60
|
+
|
|
61
|
+
Check if the loaded model supports vision:
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
await g.loadModel("ministral-3b");
|
|
65
|
+
|
|
66
|
+
if (g.supportsVision()) {
|
|
67
|
+
// Use vision features
|
|
68
|
+
} else {
|
|
69
|
+
// Text-only mode
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Graceful Fallback
|
|
74
|
+
|
|
75
|
+
If you pass images to a non-vision model, Gerbil will:
|
|
76
|
+
1. Log a warning to console
|
|
77
|
+
2. Ignore the images
|
|
78
|
+
3. Process the text prompt normally
|
|
79
|
+
|
|
80
|
+
This allows you to write code that works with any model:
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
// This works with any model - images are used if supported
|
|
84
|
+
const result = await g.generate("Describe this", {
|
|
85
|
+
images: [{ source: imageUrl }]
|
|
86
|
+
});
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## AI SDK Integration
|
|
92
|
+
|
|
93
|
+
Use vision models with Vercel AI SDK v5+:
|
|
94
|
+
|
|
95
|
+
```typescript
|
|
96
|
+
import { generateText } from "ai";
|
|
97
|
+
import { gerbil } from "@tryhamster/gerbil/ai";
|
|
98
|
+
|
|
99
|
+
const { text } = await generateText({
|
|
100
|
+
model: gerbil("ministral-3b"),
|
|
101
|
+
messages: [
|
|
102
|
+
{
|
|
103
|
+
role: "user",
|
|
104
|
+
content: [
|
|
105
|
+
{ type: "image", image: new URL("https://example.com/photo.jpg") },
|
|
106
|
+
{ type: "text", text: "Describe this image in detail" },
|
|
107
|
+
],
|
|
108
|
+
},
|
|
109
|
+
],
|
|
110
|
+
});
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Image Part Formats
|
|
114
|
+
|
|
115
|
+
The AI SDK integration accepts images in these formats:
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
// URL object
|
|
119
|
+
{ type: "image", image: new URL("https://...") }
|
|
120
|
+
|
|
121
|
+
// URL string
|
|
122
|
+
{ type: "image", image: "https://..." }
|
|
123
|
+
|
|
124
|
+
// Base64 string
|
|
125
|
+
{ type: "image", image: "data:image/png;base64,..." }
|
|
126
|
+
|
|
127
|
+
// Uint8Array with mime type
|
|
128
|
+
{ type: "image", image: imageBytes, mimeType: "image/png" }
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## Express & Next.js Integration
|
|
134
|
+
|
|
135
|
+
### Express
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
import express from "express";
|
|
139
|
+
import { gerbil } from "@tryhamster/gerbil/express";
|
|
140
|
+
|
|
141
|
+
const app = express();
|
|
142
|
+
app.use("/ai", gerbil({ model: "ministral-3b" })());
|
|
143
|
+
|
|
144
|
+
// POST /ai/generate
|
|
145
|
+
// Body: { prompt: "Describe this", images: [{ source: "https://..." }] }
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Next.js App Router
|
|
149
|
+
|
|
150
|
+
```typescript
|
|
151
|
+
// app/api/chat/route.ts
|
|
152
|
+
import { gerbil } from "@tryhamster/gerbil/next";
|
|
153
|
+
|
|
154
|
+
export const POST = gerbil.handler({ model: "ministral-3b" });
|
|
155
|
+
|
|
156
|
+
// Fetch from client:
|
|
157
|
+
// fetch("/api/chat", {
|
|
158
|
+
// method: "POST",
|
|
159
|
+
// body: JSON.stringify({
|
|
160
|
+
// prompt: "What's in this image?",
|
|
161
|
+
// images: [{ source: dataUri }]
|
|
162
|
+
// })
|
|
163
|
+
// })
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## React Hooks (Browser)
|
|
169
|
+
|
|
170
|
+
### useChat with Images
|
|
171
|
+
|
|
172
|
+
```tsx
|
|
173
|
+
import { useChat } from "@tryhamster/gerbil/browser";
|
|
174
|
+
|
|
175
|
+
function VisionChat() {
|
|
176
|
+
const {
|
|
177
|
+
messages,
|
|
178
|
+
input,
|
|
179
|
+
setInput,
|
|
180
|
+
handleSubmit,
|
|
181
|
+
attachImage,
|
|
182
|
+
attachedImages,
|
|
183
|
+
clearImages,
|
|
184
|
+
sendWithImages,
|
|
185
|
+
} = useChat({ model: "ministral-3b" });
|
|
186
|
+
|
|
187
|
+
const handleFileSelect = (e: React.ChangeEvent<HTMLInputElement>) => {
|
|
188
|
+
const file = e.target.files?.[0];
|
|
189
|
+
if (file) {
|
|
190
|
+
const reader = new FileReader();
|
|
191
|
+
reader.onload = () => attachImage(reader.result as string);
|
|
192
|
+
reader.readAsDataURL(file);
|
|
193
|
+
}
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
return (
|
|
197
|
+
<div>
|
|
198
|
+
{/* Messages */}
|
|
199
|
+
{messages.map(m => (
|
|
200
|
+
<div key={m.id}>
|
|
201
|
+
{m.images?.map((img, i) => (
|
|
202
|
+
<img key={i} src={img} alt="" className="max-w-xs" />
|
|
203
|
+
))}
|
|
204
|
+
<p>{m.content}</p>
|
|
205
|
+
</div>
|
|
206
|
+
))}
|
|
207
|
+
|
|
208
|
+
{/* Image attachment */}
|
|
209
|
+
<input type="file" accept="image/*" onChange={handleFileSelect} />
|
|
210
|
+
|
|
211
|
+
{attachedImages.length > 0 && (
|
|
212
|
+
<div>
|
|
213
|
+
📎 {attachedImages.length} image(s) attached
|
|
214
|
+
<button onClick={clearImages}>Clear</button>
|
|
215
|
+
</div>
|
|
216
|
+
)}
|
|
217
|
+
|
|
218
|
+
{/* Input */}
|
|
219
|
+
<form onSubmit={handleSubmit}>
|
|
220
|
+
<input
|
|
221
|
+
value={input}
|
|
222
|
+
onChange={e => setInput(e.target.value)}
|
|
223
|
+
placeholder="Describe the image..."
|
|
224
|
+
/>
|
|
225
|
+
<button type="submit">Send</button>
|
|
226
|
+
</form>
|
|
227
|
+
</div>
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Direct Image Send
|
|
233
|
+
|
|
234
|
+
```tsx
|
|
235
|
+
// Send a message with specific images
|
|
236
|
+
sendWithImages("Compare these two photos", [image1DataUri, image2DataUri]);
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
## Built-in Vision Skills
|
|
242
|
+
|
|
243
|
+
Gerbil includes pre-built skills for common vision tasks:
|
|
244
|
+
|
|
245
|
+
### Describe Image
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
import { describeImage } from "@tryhamster/gerbil/skills";
|
|
249
|
+
|
|
250
|
+
const description = await describeImage({
|
|
251
|
+
image: "https://example.com/photo.jpg",
|
|
252
|
+
focus: "details", // "general" | "details" | "text" | "objects" | "scene"
|
|
253
|
+
format: "bullets", // "paragraph" | "bullets" | "structured"
|
|
254
|
+
});
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Analyze Screenshot
|
|
258
|
+
|
|
259
|
+
```typescript
|
|
260
|
+
import { analyzeScreenshot } from "@tryhamster/gerbil/skills";
|
|
261
|
+
|
|
262
|
+
const analysis = await analyzeScreenshot({
|
|
263
|
+
image: screenshotDataUri,
|
|
264
|
+
type: "accessibility", // "ui-review" | "accessibility" | "suggestions" | "qa"
|
|
265
|
+
});
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### Extract from Image
|
|
269
|
+
|
|
270
|
+
```typescript
|
|
271
|
+
import { extractFromImage } from "@tryhamster/gerbil/skills";
|
|
272
|
+
|
|
273
|
+
const extracted = await extractFromImage({
|
|
274
|
+
image: documentPhoto,
|
|
275
|
+
extract: "text", // "text" | "data" | "code" | "table" | "diagram"
|
|
276
|
+
outputFormat: "markdown", // "raw" | "json" | "markdown"
|
|
277
|
+
});
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### Compare Images
|
|
281
|
+
|
|
282
|
+
```typescript
|
|
283
|
+
import { compareImages } from "@tryhamster/gerbil/skills";
|
|
284
|
+
|
|
285
|
+
const comparison = await compareImages({
|
|
286
|
+
image1: beforeScreenshot,
|
|
287
|
+
image2: afterScreenshot,
|
|
288
|
+
focus: "differences", // "differences" | "similarities" | "detailed"
|
|
289
|
+
});
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
### Caption Image
|
|
293
|
+
|
|
294
|
+
```typescript
|
|
295
|
+
import { captionImage } from "@tryhamster/gerbil/skills";
|
|
296
|
+
|
|
297
|
+
const caption = await captionImage({
|
|
298
|
+
image: photo,
|
|
299
|
+
style: "descriptive", // "concise" | "descriptive" | "creative" | "funny"
|
|
300
|
+
});
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## Performance Tips
|
|
306
|
+
|
|
307
|
+
### WebGPU Acceleration
|
|
308
|
+
|
|
309
|
+
Vision models benefit significantly from GPU acceleration:
|
|
310
|
+
|
|
311
|
+
```typescript
|
|
312
|
+
// Node.js: Uses Chrome backend for WebGPU
|
|
313
|
+
await g.loadModel("ministral-3b"); // Auto-detects WebGPU
|
|
314
|
+
|
|
315
|
+
// Browser: Native WebGPU
|
|
316
|
+
await g.loadModel("ministral-3b", { device: "webgpu" });
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
### Image Size
|
|
320
|
+
|
|
321
|
+
- Larger images take longer to process
|
|
322
|
+
- Consider resizing before sending to the model
|
|
323
|
+
- 512x512 to 1024x1024 is generally optimal
|
|
324
|
+
|
|
325
|
+
### Caching
|
|
326
|
+
|
|
327
|
+
The model caches in the browser's IndexedDB (via Chrome backend in Node.js), so subsequent loads are fast.
|
|
328
|
+
|
|
329
|
+
---
|
|
330
|
+
|
|
331
|
+
## Troubleshooting
|
|
332
|
+
|
|
333
|
+
### "Model doesn't support vision"
|
|
334
|
+
|
|
335
|
+
Make sure you're using a vision-capable model like `ministral-3b`.
|
|
336
|
+
|
|
337
|
+
### Slow image processing
|
|
338
|
+
|
|
339
|
+
- Ensure WebGPU is being used (check `g.getDeviceMode()`)
|
|
340
|
+
- Resize large images before sending
|
|
341
|
+
- In Node.js, the Chrome backend provides GPU acceleration
|
|
342
|
+
|
|
343
|
+
### Image not loading
|
|
344
|
+
|
|
345
|
+
- Check the URL is accessible
|
|
346
|
+
- For local files, ensure the path is absolute
|
|
347
|
+
- Base64 data URIs must include the mime type prefix
|
|
348
|
+
|
|
349
|
+
---
|
|
350
|
+
|
|
351
|
+
## API Reference
|
|
352
|
+
|
|
353
|
+
### ImageInput
|
|
354
|
+
|
|
355
|
+
```typescript
|
|
356
|
+
interface ImageInput {
|
|
357
|
+
/** Image source: URL, base64 data URI, or local file path */
|
|
358
|
+
source: string;
|
|
359
|
+
/** Optional alt text for context */
|
|
360
|
+
alt?: string;
|
|
361
|
+
}
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
### GenerateOptions (with images)
|
|
365
|
+
|
|
366
|
+
```typescript
|
|
367
|
+
interface GenerateOptions {
|
|
368
|
+
// ... standard options ...
|
|
369
|
+
|
|
370
|
+
/** Images to include (only used if model supports vision) */
|
|
371
|
+
images?: ImageInput[];
|
|
372
|
+
}
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
### supportsVision()
|
|
376
|
+
|
|
377
|
+
```typescript
|
|
378
|
+
g.supportsVision(): boolean
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
Returns `true` if the loaded model supports vision input.
|
|
382
|
+
|
|
383
|
+
### ModelConfig
|
|
384
|
+
|
|
385
|
+
```typescript
|
|
386
|
+
interface ModelConfig {
|
|
387
|
+
// ... standard properties ...
|
|
388
|
+
|
|
389
|
+
/** Whether model supports vision/image input */
|
|
390
|
+
supportsVision?: boolean;
|
|
391
|
+
|
|
392
|
+
/** Size of vision encoder (if applicable) */
|
|
393
|
+
visionEncoderSize?: string;
|
|
394
|
+
}
|
|
395
|
+
```
|
|
396
|
+
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tryhamster/gerbil",
|
|
3
|
-
"version": "1.0.0-rc.
|
|
3
|
+
"version": "1.0.0-rc.10",
|
|
4
4
|
"description": "Local LLM inference for Node.js. GPU-accelerated. Zero config. Works standalone or with Vercel AI SDK.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.mjs",
|
|
@@ -62,32 +62,18 @@
|
|
|
62
62
|
"types": "./dist/integrations/mcp-client.d.mts"
|
|
63
63
|
},
|
|
64
64
|
"./browser": {
|
|
65
|
-
"import": "./dist/browser/index.
|
|
66
|
-
"types": "./dist/browser/index.d.
|
|
65
|
+
"import": "./dist/browser/index.js",
|
|
66
|
+
"types": "./dist/browser/index.d.ts"
|
|
67
67
|
}
|
|
68
68
|
},
|
|
69
|
-
"scripts": {
|
|
70
|
-
"build": "tsdown",
|
|
71
|
-
"dev": "tsx src/cli/index.ts",
|
|
72
|
-
"typecheck": "tsc --noEmit",
|
|
73
|
-
"check": "ultracite check",
|
|
74
|
-
"fix": "ultracite fix",
|
|
75
|
-
"test": "vitest run",
|
|
76
|
-
"test:watch": "vitest",
|
|
77
|
-
"prepublishOnly": "pnpm build",
|
|
78
|
-
"changeset": "changeset",
|
|
79
|
-
"release": "changeset publish",
|
|
80
|
-
"prepare": "lefthook install"
|
|
81
|
-
},
|
|
82
69
|
"dependencies": {
|
|
83
70
|
"@huggingface/hub": "^2.7.1",
|
|
84
|
-
"@huggingface/transformers": "^3.8.0",
|
|
85
71
|
"chalk": "^5.3.0",
|
|
86
72
|
"cli-progress": "^3.12.0",
|
|
87
73
|
"commander": "^12.1.0",
|
|
88
|
-
"onnxruntime-web": "^1.21.0-dev.20250114-228dd16893",
|
|
89
74
|
"ora": "^8.0.1",
|
|
90
75
|
"puppeteer-core": "^24.31.0",
|
|
76
|
+
"react": "^19.0.0",
|
|
91
77
|
"webgpu": "^0.3.8",
|
|
92
78
|
"zod": "^3.23.0"
|
|
93
79
|
},
|
|
@@ -101,8 +87,7 @@
|
|
|
101
87
|
"hono": ">=4.0.0",
|
|
102
88
|
"langchain": ">=0.1.0",
|
|
103
89
|
"llamaindex": ">=0.1.0",
|
|
104
|
-
"next": ">=14.0.0"
|
|
105
|
-
"react": ">=18.0.0"
|
|
90
|
+
"next": ">=14.0.0"
|
|
106
91
|
},
|
|
107
92
|
"peerDependenciesMeta": {
|
|
108
93
|
"@ai-sdk/provider": {
|
|
@@ -142,6 +127,9 @@
|
|
|
142
127
|
"devDependencies": {
|
|
143
128
|
"@ai-sdk/provider": "^2.0.0",
|
|
144
129
|
"@biomejs/biome": "^2.3.8",
|
|
130
|
+
"@huggingface/transformers": "^3.8.0",
|
|
131
|
+
"kokoro-js": "^1.2.1",
|
|
132
|
+
"onnxruntime-web": "^1.21.0-dev.20250114-228dd16893",
|
|
145
133
|
"@changesets/changelog-github": "^0.5.1",
|
|
146
134
|
"@changesets/cli": "^2.28.1",
|
|
147
135
|
"@types/cli-progress": "^3.11.6",
|
|
@@ -158,7 +146,6 @@
|
|
|
158
146
|
"ink-spinner": "^5.0.0",
|
|
159
147
|
"ink-text-input": "^6.0.0",
|
|
160
148
|
"lefthook": "^2.0.5",
|
|
161
|
-
"react": "^19.0.0",
|
|
162
149
|
"tsdown": "^0.17.0-beta.3",
|
|
163
150
|
"tsx": "^4.15.0",
|
|
164
151
|
"typescript": "^5.4.5",
|
|
@@ -203,5 +190,17 @@
|
|
|
203
190
|
],
|
|
204
191
|
"publishConfig": {
|
|
205
192
|
"access": "public"
|
|
193
|
+
},
|
|
194
|
+
"scripts": {
|
|
195
|
+
"build": "tsdown",
|
|
196
|
+
"dev": "tsx src/cli/index.ts",
|
|
197
|
+
"typecheck": "tsc --noEmit",
|
|
198
|
+
"check": "ultracite check",
|
|
199
|
+
"fix": "ultracite fix",
|
|
200
|
+
"test": "vitest run",
|
|
201
|
+
"test:watch": "vitest",
|
|
202
|
+
"changeset": "changeset",
|
|
203
|
+
"version": "changeset version",
|
|
204
|
+
"release": "pnpm publish && changeset tag"
|
|
206
205
|
}
|
|
207
|
-
}
|
|
206
|
+
}
|