@tryhamster/gerbil 1.0.0-rc.11 → 1.0.0-rc.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/README.md +52 -1
  2. package/dist/browser/index.d.ts +159 -1
  3. package/dist/browser/index.d.ts.map +1 -1
  4. package/dist/browser/index.js +473 -6
  5. package/dist/browser/index.js.map +1 -1
  6. package/dist/cli.mjs +7 -7
  7. package/dist/cli.mjs.map +1 -1
  8. package/dist/frameworks/express.d.mts +1 -1
  9. package/dist/frameworks/express.mjs +2 -1
  10. package/dist/frameworks/express.mjs.map +1 -1
  11. package/dist/frameworks/fastify.d.mts +1 -1
  12. package/dist/frameworks/fastify.mjs +2 -1
  13. package/dist/frameworks/fastify.mjs.map +1 -1
  14. package/dist/frameworks/hono.d.mts +1 -1
  15. package/dist/frameworks/hono.mjs +2 -1
  16. package/dist/frameworks/hono.mjs.map +1 -1
  17. package/dist/frameworks/next.d.mts +2 -2
  18. package/dist/frameworks/next.mjs +2 -1
  19. package/dist/frameworks/next.mjs.map +1 -1
  20. package/dist/frameworks/react.d.mts +1 -1
  21. package/dist/frameworks/trpc.d.mts +1 -1
  22. package/dist/frameworks/trpc.mjs +2 -1
  23. package/dist/frameworks/trpc.mjs.map +1 -1
  24. package/dist/{gerbil-DoDGHe6Z.mjs → gerbil-BZklpDhM.mjs} +289 -1
  25. package/dist/gerbil-BZklpDhM.mjs.map +1 -0
  26. package/dist/gerbil-CAMb_nrK.mjs +5 -0
  27. package/dist/{gerbil-qOTe1nl2.d.mts → gerbil-DJygY0sJ.d.mts} +120 -2
  28. package/dist/gerbil-DJygY0sJ.d.mts.map +1 -0
  29. package/dist/index.d.mts +3 -3
  30. package/dist/index.d.mts.map +1 -1
  31. package/dist/index.mjs +2 -2
  32. package/dist/index.mjs.map +1 -1
  33. package/dist/integrations/ai-sdk.d.mts +72 -3
  34. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  35. package/dist/integrations/ai-sdk.mjs +106 -3
  36. package/dist/integrations/ai-sdk.mjs.map +1 -1
  37. package/dist/integrations/langchain.d.mts +1 -1
  38. package/dist/integrations/langchain.mjs +2 -1
  39. package/dist/integrations/langchain.mjs.map +1 -1
  40. package/dist/integrations/llamaindex.d.mts +1 -1
  41. package/dist/integrations/llamaindex.mjs +2 -1
  42. package/dist/integrations/llamaindex.mjs.map +1 -1
  43. package/dist/integrations/mcp.d.mts +2 -2
  44. package/dist/integrations/mcp.mjs +5 -4
  45. package/dist/{mcp-kzDDWIoS.mjs → mcp-ZCC5OR7B.mjs} +3 -3
  46. package/dist/{mcp-kzDDWIoS.mjs.map → mcp-ZCC5OR7B.mjs.map} +1 -1
  47. package/dist/{one-liner-DxnNs_JK.mjs → one-liner-mH5SKPvT.mjs} +2 -2
  48. package/dist/{one-liner-DxnNs_JK.mjs.map → one-liner-mH5SKPvT.mjs.map} +1 -1
  49. package/dist/{repl-DGUw4fCc.mjs → repl-CSM1IBP1.mjs} +3 -3
  50. package/dist/skills/index.d.mts +3 -3
  51. package/dist/skills/index.d.mts.map +1 -1
  52. package/dist/skills/index.mjs +4 -3
  53. package/dist/{skills-DulrOPeP.mjs → skills-CPB_9YfF.mjs} +2 -2
  54. package/dist/{skills-DulrOPeP.mjs.map → skills-CPB_9YfF.mjs.map} +1 -1
  55. package/dist/{types-CiTc7ez3.d.mts → types-evP8RShr.d.mts} +26 -2
  56. package/dist/types-evP8RShr.d.mts.map +1 -0
  57. package/docs/ai-sdk.md +56 -1
  58. package/docs/browser.md +103 -0
  59. package/docs/embeddings.md +311 -0
  60. package/package.json +1 -1
  61. package/dist/gerbil-DJGqq7BX.mjs +0 -4
  62. package/dist/gerbil-DoDGHe6Z.mjs.map +0 -1
  63. package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
  64. package/dist/types-CiTc7ez3.d.mts.map +0 -1
package/docs/browser.md CHANGED
@@ -34,6 +34,40 @@ function Chat() {
34
34
 
35
35
  That's it! The hook handles model loading, streaming, and state management.
36
36
 
37
+ ## Model Preloading
38
+
39
+ Download models during app initialization so they're ready when users need them:
40
+
41
+ ```typescript
42
+ import {
43
+ preloadChatModel,
44
+ preloadEmbeddingModel,
45
+ preloadTTSModel,
46
+ preloadSTTModel
47
+ } from "@tryhamster/gerbil/browser";
48
+
49
+ // During app initialization
50
+ async function initApp() {
51
+ // Preload LLM
52
+ await preloadChatModel("qwen3-0.6b", {
53
+ onProgress: (p) => {
54
+ if (p.status === "downloading") {
55
+ console.log(`Downloading ${p.file}: ${p.progress}%`);
56
+ }
57
+ },
58
+ });
59
+
60
+ // Preload other models as needed
61
+ await preloadEmbeddingModel("Xenova/all-MiniLM-L6-v2");
62
+ await preloadTTSModel("kokoro-82m");
63
+ await preloadSTTModel("whisper-tiny.en");
64
+ }
65
+
66
+ initApp();
67
+ ```
68
+
69
+ After preloading, hooks like `useChat` will load instantly from IndexedDB cache.
70
+
37
71
  ## React Hooks
38
72
 
39
73
  ### `useChat`
@@ -456,6 +490,75 @@ for await (const chunk of gerbil.speakStream("Long text...")) {
456
490
  }
457
491
  ```
458
492
 
493
+ ## Embeddings Hook
494
+
495
+ ### `useEmbedding`
496
+
497
+ Generate embeddings for semantic search and similarity:
498
+
499
+ ```tsx
500
+ import { useEmbedding } from "@tryhamster/gerbil/browser";
501
+
502
+ function SemanticSearch() {
503
+ const { embed, similarity, search, isLoading, isReady, load } = useEmbedding({
504
+ model: "Xenova/all-MiniLM-L6-v2", // Default
505
+ autoLoad: false,
506
+ });
507
+
508
+ if (isLoading) return <div>Loading embedding model...</div>;
509
+
510
+ const handleSearch = async () => {
511
+ const results = await search("capital of France", [
512
+ "Paris is beautiful",
513
+ "London is in England",
514
+ "Dogs are pets",
515
+ ], 2); // topK = 2
516
+
517
+ console.log(results);
518
+ // [{ text: "Paris is beautiful", score: 0.89, index: 0 }, ...]
519
+ };
520
+
521
+ const handleSimilarity = async () => {
522
+ const score = await similarity("Hello world", "Hi there");
523
+ console.log(score); // 0.85
524
+ };
525
+
526
+ return (
527
+ <div>
528
+ <button onClick={handleSearch}>Search</button>
529
+ <button onClick={handleSimilarity}>Compare</button>
530
+ </div>
531
+ );
532
+ }
533
+ ```
534
+
535
+ ### Options
536
+
537
+ ```typescript
538
+ const {
539
+ // Actions
540
+ embed, // (text: string) => Promise<number[]>
541
+ embedBatch, // (texts: string[]) => Promise<{ vector, text }[]>
542
+ similarity, // (a: string, b: string) => Promise<number>
543
+ search, // (query: string, corpus: string[], topK?) => Promise<SearchResult[]>
544
+ findNearest, // (embedding: number[], candidates: string[], topK?) => Promise<SearchResult[]>
545
+ cosineSimilarity, // (a: number[], b: number[]) => number (sync)
546
+ load, // () => void - manually load model
547
+
548
+ // State
549
+ isLoading, // boolean - model loading
550
+ isReady, // boolean - model ready
551
+ loadingProgress, // { status, message?, progress? }
552
+ error, // string | null
553
+ } = useEmbedding({
554
+ model: "Xenova/all-MiniLM-L6-v2", // Embedding model
555
+ normalize: true, // Normalize vectors (default: true)
556
+ autoLoad: false, // Load on mount (default: false)
557
+ onReady: () => {},
558
+ onError: (err) => {},
559
+ });
560
+ ```
561
+
459
562
  ## Low-Level API
460
563
 
461
564
  For full control, use `createGerbilWorker` directly:
@@ -0,0 +1,311 @@
1
+ # Embeddings
2
+
3
+ Gerbil provides local text embeddings using transformer models via ONNX. Generate semantic vectors for similarity search, clustering, and retrieval - all on-device with no API keys.
4
+
5
+ ## Quick Start
6
+
7
+ ### Node.js
8
+
9
+ ```typescript
10
+ import { Gerbil } from "@tryhamster/gerbil";
11
+
12
+ const g = new Gerbil();
13
+
14
+ // Generate embedding
15
+ const result = await g.embed("Hello world");
16
+ console.log(result.vector); // number[384]
17
+
18
+ // Compare similarity
19
+ const similarity = await g.similarity("Hello world", "Hi there");
20
+ console.log(similarity.score); // 0.85
21
+
22
+ // Semantic search
23
+ const results = await g.search("capital of France", [
24
+ "Paris is beautiful",
25
+ "London is in England",
26
+ "Dogs are pets"
27
+ ]);
28
+ // [{ text: "Paris is beautiful", score: 0.89, index: 0 }, ...]
29
+ ```
30
+
31
+ ### React (Browser)
32
+
33
+ ```tsx
34
+ import { useEmbedding } from "@tryhamster/gerbil/browser";
35
+
36
+ function SemanticSearch() {
37
+ const { search, isLoading, isReady } = useEmbedding();
38
+
39
+ if (isLoading) return <div>Loading embedding model...</div>;
40
+
41
+ const handleSearch = async () => {
42
+ const results = await search("capital of France", [
43
+ "Paris is beautiful",
44
+ "London is in England",
45
+ "Dogs are pets"
46
+ ]);
47
+ console.log(results);
48
+ };
49
+
50
+ return <button onClick={handleSearch}>Search</button>;
51
+ }
52
+ ```
53
+
54
+ ### AI SDK
55
+
56
+ ```typescript
57
+ import { embed, embedMany } from "ai";
58
+ import { gerbil } from "@tryhamster/gerbil/ai";
59
+
60
+ // Single embedding
61
+ const { embedding } = await embed({
62
+ model: gerbil.embedding(),
63
+ value: "Hello world",
64
+ });
65
+
66
+ // Multiple embeddings
67
+ const { embeddings } = await embedMany({
68
+ model: gerbil.embedding(),
69
+ values: ["Hello", "World", "How are you?"],
70
+ });
71
+ ```
72
+
73
+ ## Available Models
74
+
75
+ | Model | Dimensions | Size | Description |
76
+ |-------|------------|------|-------------|
77
+ | `all-MiniLM-L6-v2` | 384 | ~23MB | Default, fast and versatile |
78
+ | `bge-small-en-v1.5` | 384 | ~33MB | High quality English embeddings |
79
+ | `gte-small` | 384 | ~33MB | General text embeddings |
80
+
81
+ Use any ONNX model from HuggingFace:
82
+
83
+ ```typescript
84
+ await g.embed("text", { model: "Xenova/all-MiniLM-L6-v2" });
85
+ ```
86
+
87
+ ## API Reference
88
+
89
+ ### Gerbil Class Methods
90
+
91
+ ```typescript
92
+ class Gerbil {
93
+ // Generate embedding for text
94
+ async embed(text: string, options?: EmbedOptions): Promise<EmbedResult>;
95
+
96
+ // Batch embedding
97
+ async embedBatch(texts: string[], options?: EmbedOptions): Promise<EmbedResult[]>;
98
+
99
+ // Compare two texts
100
+ async similarity(textA: string, textB: string, options?: EmbedOptions): Promise<SimilarityResult>;
101
+
102
+ // Semantic search
103
+ async search(query: string, corpus: string[], options?: SearchOptions): Promise<SearchResult[]>;
104
+
105
+ // Find nearest text to an embedding
106
+ async findNearest(embedding: number[], candidates: string[], options?: SearchOptions): Promise<SearchResult[]>;
107
+
108
+ // Raw vector similarity (synchronous)
109
+ cosineSimilarity(a: number[], b: number[]): number;
110
+ }
111
+ ```
112
+
113
+ ### Types
114
+
115
+ ```typescript
116
+ interface EmbedOptions {
117
+ /** Embedding model (default: "Xenova/all-MiniLM-L6-v2") */
118
+ model?: string;
119
+ /** Normalize vectors (default: true) */
120
+ normalize?: boolean;
121
+ }
122
+
123
+ interface EmbedResult {
124
+ /** Embedding vector */
125
+ vector: number[];
126
+ /** Original text */
127
+ text: string;
128
+ /** Time in ms */
129
+ totalTime: number;
130
+ }
131
+
132
+ interface SimilarityResult {
133
+ /** Similarity score (0-1) */
134
+ score: number;
135
+ /** First text */
136
+ textA: string;
137
+ /** Second text */
138
+ textB: string;
139
+ /** Time in ms */
140
+ totalTime: number;
141
+ }
142
+
143
+ interface SearchResult {
144
+ /** Matched text */
145
+ text: string;
146
+ /** Similarity score (0-1) */
147
+ score: number;
148
+ /** Index in original corpus */
149
+ index: number;
150
+ }
151
+
152
+ interface SearchOptions extends EmbedOptions {
153
+ /** Return only top K results */
154
+ topK?: number;
155
+ }
156
+ ```
157
+
158
+ ## Use Cases
159
+
160
+ ### Semantic Search
161
+
162
+ Find the most relevant documents for a query:
163
+
164
+ ```typescript
165
+ const documents = [
166
+ "JavaScript is a programming language",
167
+ "Python is great for data science",
168
+ "The weather is sunny today",
169
+ "Machine learning uses algorithms",
170
+ ];
171
+
172
+ const results = await g.search("coding languages", documents, { topK: 2 });
173
+ // Returns JavaScript and Python documents
174
+ ```
175
+
176
+ ### Duplicate Detection
177
+
178
+ Find similar or duplicate content:
179
+
180
+ ```typescript
181
+ const similarity = await g.similarity(
182
+ "The quick brown fox jumps over the lazy dog",
183
+ "A fast brown fox leaps over a sleepy dog"
184
+ );
185
+
186
+ if (similarity.score > 0.9) {
187
+ console.log("Potential duplicate detected!");
188
+ }
189
+ ```
190
+
191
+ ### Clustering
192
+
193
+ Group similar items together:
194
+
195
+ ```typescript
196
+ const items = ["apple", "banana", "car", "truck", "orange"];
197
+ const embeddings = await g.embedBatch(items);
198
+
199
+ // Use embeddings for k-means or hierarchical clustering
200
+ // Each embedding.vector is a 384-dimensional vector
201
+ ```
202
+
203
+ ### RAG (Retrieval-Augmented Generation)
204
+
205
+ Build a simple RAG pipeline:
206
+
207
+ ```typescript
208
+ // 1. Index documents
209
+ const documents = await loadDocuments();
210
+ const docEmbeddings = await g.embedBatch(documents);
211
+
212
+ // 2. Store embeddings (in-memory or vector DB)
213
+ const index = docEmbeddings.map((e, i) => ({
214
+ embedding: e.vector,
215
+ text: documents[i]
216
+ }));
217
+
218
+ // 3. Retrieve relevant docs
219
+ const queryEmbedding = (await g.embed(userQuestion)).vector;
220
+ const relevant = await g.findNearest(
221
+ queryEmbedding,
222
+ documents,
223
+ { topK: 3 }
224
+ );
225
+
226
+ // 4. Generate answer with context
227
+ const context = relevant.map(r => r.text).join("\n");
228
+ const answer = await g.generate(`Context:\n${context}\n\nQuestion: ${userQuestion}`);
229
+ ```
230
+
231
+ ## useEmbedding Hook Reference
232
+
233
+ ```typescript
234
+ const {
235
+ // Actions
236
+ embed, // (text: string) => Promise<number[]>
237
+ embedBatch, // (texts: string[]) => Promise<BrowserEmbedResult[]>
238
+ similarity, // (a: string, b: string) => Promise<number>
239
+ search, // (query: string, corpus: string[], topK?: number) => Promise<SearchResult[]>
240
+ findNearest, // (embedding: number[], candidates: string[], topK?: number) => Promise<SearchResult[]>
241
+ cosineSimilarity,// (a: number[], b: number[]) => number (sync)
242
+ load, // () => void - manually load model
243
+
244
+ // State
245
+ isLoading, // boolean - model loading
246
+ isReady, // boolean - model ready
247
+ loadingProgress, // { status, message?, progress? }
248
+ error, // string | null
249
+ } = useEmbedding({
250
+ model: "Xenova/all-MiniLM-L6-v2", // Embedding model
251
+ normalize: true, // Normalize vectors
252
+ autoLoad: false, // Load on first use
253
+ onReady: () => {},
254
+ onError: (err) => {},
255
+ });
256
+ ```
257
+
258
+ ## Performance
259
+
260
+ | Operation | Time (M1 Mac) |
261
+ |-----------|---------------|
262
+ | First load | 2-5s (downloads model) |
263
+ | Cached load | <500ms |
264
+ | Single embed | ~20ms |
265
+ | Batch (10 texts) | ~150ms |
266
+ | Search (100 docs) | ~300ms |
267
+
268
+ ## Limitations
269
+
270
+ - **No reverse mapping**: Embeddings cannot be converted back to text
271
+ - **English-optimized**: Default models work best with English text
272
+ - **Fixed dimensions**: Each model produces fixed-size vectors (384 for default)
273
+
274
+ ## Troubleshooting
275
+
276
+ ### "Model not found"
277
+
278
+ Use the full HuggingFace model ID:
279
+
280
+ ```typescript
281
+ // ❌ Won't work
282
+ await g.embed("text", { model: "MiniLM" });
283
+
284
+ // ✅ Use full ID
285
+ await g.embed("text", { model: "Xenova/all-MiniLM-L6-v2" });
286
+ ```
287
+
288
+ ### Slow first embedding
289
+
290
+ The first call downloads the model (~23MB). Subsequent calls use the cached model.
291
+
292
+ ### Out of memory with large batches
293
+
294
+ Process in smaller batches:
295
+
296
+ ```typescript
297
+ const batchSize = 100;
298
+ const allEmbeddings = [];
299
+
300
+ for (let i = 0; i < texts.length; i += batchSize) {
301
+ const batch = texts.slice(i, i + batchSize);
302
+ const embeddings = await g.embedBatch(batch);
303
+ allEmbeddings.push(...embeddings);
304
+ }
305
+ ```
306
+
307
+ ## See Also
308
+
309
+ - [Browser Hooks](./browser.md) - useChat, useCompletion, useEmbedding
310
+ - [AI SDK Integration](./ai-sdk.md) - embed, embedMany
311
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tryhamster/gerbil",
3
- "version": "1.0.0-rc.11",
3
+ "version": "1.0.0-rc.13",
4
4
  "description": "Local LLM inference for Node.js. GPU-accelerated. Zero config. Works standalone or with Vercel AI SDK.",
5
5
  "type": "module",
6
6
  "main": "dist/index.mjs",
@@ -1,4 +0,0 @@
1
- import { t as Gerbil } from "./gerbil-DoDGHe6Z.mjs";
2
- import "./utils-CZBZ8dgR.mjs";
3
-
4
- export { Gerbil };