@tryhamster/gerbil 1.0.0-rc.11 → 1.0.0-rc.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -1
- package/dist/browser/index.d.ts +98 -1
- package/dist/browser/index.d.ts.map +1 -1
- package/dist/browser/index.js +311 -1
- package/dist/browser/index.js.map +1 -1
- package/dist/cli.mjs +7 -7
- package/dist/cli.mjs.map +1 -1
- package/dist/frameworks/express.d.mts +1 -1
- package/dist/frameworks/express.mjs +1 -1
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.mjs +1 -1
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.mjs +1 -1
- package/dist/frameworks/next.d.mts +2 -2
- package/dist/frameworks/next.mjs +1 -1
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/react.d.mts.map +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.mjs +1 -1
- package/dist/{gerbil-DoDGHe6Z.mjs → gerbil-BcWjCGtM.mjs} +83 -1
- package/dist/{gerbil-DoDGHe6Z.mjs.map → gerbil-BcWjCGtM.mjs.map} +1 -1
- package/dist/gerbil-CBQkuQ9i.mjs +4 -0
- package/dist/{gerbil-qOTe1nl2.d.mts → gerbil-E12cYLNi.d.mts} +51 -2
- package/dist/gerbil-E12cYLNi.d.mts.map +1 -0
- package/dist/index.d.mts +3 -3
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +2 -2
- package/dist/index.mjs.map +1 -1
- package/dist/integrations/ai-sdk.d.mts +57 -3
- package/dist/integrations/ai-sdk.d.mts.map +1 -1
- package/dist/integrations/ai-sdk.mjs +88 -2
- package/dist/integrations/ai-sdk.mjs.map +1 -1
- package/dist/integrations/langchain.d.mts +1 -1
- package/dist/integrations/langchain.mjs +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.mjs +1 -1
- package/dist/integrations/mcp.d.mts +2 -2
- package/dist/integrations/mcp.mjs +4 -4
- package/dist/{mcp-kzDDWIoS.mjs → mcp-DXqxF7ri.mjs} +3 -3
- package/dist/{mcp-kzDDWIoS.mjs.map → mcp-DXqxF7ri.mjs.map} +1 -1
- package/dist/{one-liner-DxnNs_JK.mjs → one-liner-UtQX47IT.mjs} +2 -2
- package/dist/{one-liner-DxnNs_JK.mjs.map → one-liner-UtQX47IT.mjs.map} +1 -1
- package/dist/{repl-DGUw4fCc.mjs → repl-FjIaBVFD.mjs} +3 -3
- package/dist/skills/index.d.mts +6 -6
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +3 -3
- package/dist/{skills-DulrOPeP.mjs → skills-BmlseBpJ.mjs} +2 -2
- package/dist/{skills-DulrOPeP.mjs.map → skills-BmlseBpJ.mjs.map} +1 -1
- package/dist/{types-CiTc7ez3.d.mts → types-Bgb_89Bh.d.mts} +20 -2
- package/dist/types-Bgb_89Bh.d.mts.map +1 -0
- package/docs/ai-sdk.md +38 -1
- package/docs/browser.md +69 -0
- package/docs/embeddings.md +311 -0
- package/package.json +1 -1
- package/dist/gerbil-DJGqq7BX.mjs +0 -4
- package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
- package/dist/types-CiTc7ez3.d.mts.map +0 -1
|
@@ -92,6 +92,24 @@ type EmbedResult = {
|
|
|
92
92
|
/** Time in ms */
|
|
93
93
|
totalTime: number;
|
|
94
94
|
};
|
|
95
|
+
type SearchResult = {
|
|
96
|
+
/** The matched text */
|
|
97
|
+
text: string;
|
|
98
|
+
/** Similarity score (0-1, higher is more similar) */
|
|
99
|
+
score: number;
|
|
100
|
+
/** Index in the original corpus */
|
|
101
|
+
index: number;
|
|
102
|
+
};
|
|
103
|
+
type SimilarityResult = {
|
|
104
|
+
/** Similarity score (0-1, higher is more similar) */
|
|
105
|
+
score: number;
|
|
106
|
+
/** First text */
|
|
107
|
+
textA: string;
|
|
108
|
+
/** Second text */
|
|
109
|
+
textB: string;
|
|
110
|
+
/** Time in ms */
|
|
111
|
+
totalTime: number;
|
|
112
|
+
};
|
|
95
113
|
type LoadOptions = {
|
|
96
114
|
/** Progress callback */
|
|
97
115
|
onProgress?: (info: ProgressInfo) => void;
|
|
@@ -349,5 +367,5 @@ type StreamingTranscriptionSession = {
|
|
|
349
367
|
reset: () => void;
|
|
350
368
|
};
|
|
351
369
|
//#endregion
|
|
352
|
-
export {
|
|
353
|
-
//# sourceMappingURL=types-
|
|
370
|
+
export { TranscribeOptions as A, SimilarityResult as C, StreamingTranscriptionSession as D, StreamingTranscriptionOptions as E, TranscribeSegment as M, VoiceInfo as N, SystemInfo as O, SessionStats as S, SpeakResult as T, ModelSource as _, FallbackConfig as a, STTModelConfig as b, GerbilConfig as c, ImageInput as d, JsonOptions as f, ModelConfig as g, LoadTTSOptions as h, EmbedResult as i, TranscribeResult as j, TTSModelConfig as k, GerbilModelSettings as l, LoadSTTOptions as m, CacheConfig as n, GenerateOptions as o, LoadOptions as p, EmbedOptions as r, GenerateResult as s, AudioChunk as t, GerbilProviderSettings as u, ModelStats as v, SpeakOptions as w, SearchResult as x, ProgressInfo as y };
|
|
371
|
+
//# sourceMappingURL=types-Bgb_89Bh.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types-Bgb_89Bh.d.mts","names":[],"sources":["../src/core/types.ts"],"sourcesContent":[],"mappings":";;;;AAyBY,KAfA,WAAA,GAeW;EASX,EAAA,EAAA,MAAA;EAWA,IAAA,EAAA,MAAA;EAmCA,WAAA,EAAA,MAAc;EA8Bd,IAAA,EAAA,MAAA;EAkBA,aAAA,EAAY,MAAA;EAQZ,gBAAW,EAAA,OAAA;EAWX,YAAA,EAAA,OAAY;EAWZ;EAkBA,cAAW,CAAA,EAAA,OAAA;EAcX;EAYA,iBAAY,CAAA,EAAA,MAAA;EAiBZ,MAAA,EAAA,MAAW,GAAA,QAAA,GAAA,KAAA,GAAA,SAAA,GAAA,OAAA,GAAA,OAAA;AAiBvB,CAAA;AAqBY,KAxOA,WAAA,GAwOY;EAUZ,IAAA,EAAA,SAAU,GAAA,aAAA,GAAA,OAAA;EAOV,IAAA,EAAA,MAAA;AAyBZ,CAAA;AAWY,KApRA,UAAA,GAoRA;EAYA;EAeA,MAAA,EAAA,MAAA;EAmBA;EAWA,GAAA,CAAA,EAAA,MAAA;AAWZ,CAAA;AAaY,KA1VA,eAAA,GA0Vc;EAWd;EAiBA,SAAA,CAAA,EAAA,MAAA;EASA;EASA,WAAA,CAAA,EAAA,MAAgB;EAahB;EAWA,IAAA,CAAA,EAAA,MAAA;EAeA;EAES,IAAA,CAAA,EAAA,MAAA;EAEN;EAID,aAAA,CAAA,EAAA,MAAA,EAAA;EAAO;;;;;;;WA7ZV;;;;;;KASC,cAAA;;;;;;;;;;;;;;;;;;KA8BA;;UAEF,CAAA,CAAE,QAAQ;;;;;;;;KAgBR,YAAA;;;;;;KAQA,WAAA;;;;;;;;KAWA,YAAA;;;;;;;;KAWA,gBAAA;;;;;;;;;;KAkBA,WAAA;;sBAEU;;;;;;;;KAYV,YAAA;;;;;;;KAYA,YAAA;;;;;;;;UAWF;;aAGG;;KAGD,WAAA;;;;;;;;;;;;KAiBA,cAAA;;;;;;;;;;;;KAqBA,YAAA;;;;;;;;;KAUA,UAAA;;;;;;KAOA,UAAA;;SAEH;;;;;;;;;;;;;;;;;;KAuBG,mBAAA;;;;;;;;KAWA,sBAAA;;;;;;KAYA,SAAA;;;;;;;;;;;;;;KAeA,cAAA;;;;;;;;;;;;UAYF;;;;;;KAOE,YAAA;;;;;;sBAMU;;yBAEG;;KAGb,UAAA;;WAED;;;;;;;;KASC,WAAA;;SAEH;;;;;;;;;;KAWG,cAAA;;sBAEU;;;;KASV,cAAA;;;;;;;;;;;;;;;;KAiBA,iBAAA;;;;;;sBAMU;;KAGV,iBAAA;;;;;;;;KASA,gBAAA;;;;;;aAMC;;;;;;KAOD,cAAA;;sBAEU;;;;KASV,6BAAA;;;;;;;;;;;;;;KAeA,6BAAA;;qBAES;;eAEN;;;;cAID"}
|
package/docs/ai-sdk.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Gerbil + AI SDK
|
|
2
2
|
|
|
3
|
-
Gerbil works as a [Vercel AI SDK v5](https://sdk.vercel.ai/) provider, supporting text generation, speech synthesis (TTS), and transcription (STT).
|
|
3
|
+
Gerbil works as a [Vercel AI SDK v5](https://sdk.vercel.ai/) provider, supporting text generation, embeddings, speech synthesis (TTS), and transcription (STT).
|
|
4
4
|
|
|
5
5
|
## Setup
|
|
6
6
|
|
|
@@ -56,6 +56,43 @@ const { text } = await generateText({
|
|
|
56
56
|
});
|
|
57
57
|
```
|
|
58
58
|
|
|
59
|
+
## Embeddings
|
|
60
|
+
|
|
61
|
+
Generate text embeddings for semantic search, similarity, and RAG:
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
import { embed, embedMany } from "ai";
|
|
65
|
+
import { gerbil } from "@tryhamster/gerbil/ai";
|
|
66
|
+
|
|
67
|
+
// Single embedding
|
|
68
|
+
const { embedding } = await embed({
|
|
69
|
+
model: gerbil.embedding(), // all-MiniLM-L6-v2 by default
|
|
70
|
+
value: "Hello world",
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
// Multiple embeddings
|
|
74
|
+
const { embeddings } = await embedMany({
|
|
75
|
+
model: gerbil.embedding(),
|
|
76
|
+
values: ["Hello", "World", "How are you?"],
|
|
77
|
+
});
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Available Models
|
|
81
|
+
|
|
82
|
+
| Model | Dimensions | Description |
|
|
83
|
+
|-------|------------|-------------|
|
|
84
|
+
| `all-MiniLM-L6-v2` | 384 | Default, fast and versatile |
|
|
85
|
+
| `bge-small-en-v1.5` | 384 | High quality English |
|
|
86
|
+
| `gte-small` | 384 | General text embeddings |
|
|
87
|
+
|
|
88
|
+
```typescript
|
|
89
|
+
// Use a specific model
|
|
90
|
+
const { embedding } = await embed({
|
|
91
|
+
model: gerbil.embedding("bge-small-en-v1.5"),
|
|
92
|
+
value: "Hello world",
|
|
93
|
+
});
|
|
94
|
+
```
|
|
95
|
+
|
|
59
96
|
## Speech Generation (TTS)
|
|
60
97
|
|
|
61
98
|
Generate speech from text using Kokoro TTS:
|
package/docs/browser.md
CHANGED
|
@@ -456,6 +456,75 @@ for await (const chunk of gerbil.speakStream("Long text...")) {
|
|
|
456
456
|
}
|
|
457
457
|
```
|
|
458
458
|
|
|
459
|
+
## Embeddings Hook
|
|
460
|
+
|
|
461
|
+
### `useEmbedding`
|
|
462
|
+
|
|
463
|
+
Generate embeddings for semantic search and similarity:
|
|
464
|
+
|
|
465
|
+
```tsx
|
|
466
|
+
import { useEmbedding } from "@tryhamster/gerbil/browser";
|
|
467
|
+
|
|
468
|
+
function SemanticSearch() {
|
|
469
|
+
const { embed, similarity, search, isLoading, isReady, load } = useEmbedding({
|
|
470
|
+
model: "Xenova/all-MiniLM-L6-v2", // Default
|
|
471
|
+
autoLoad: false,
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
if (isLoading) return <div>Loading embedding model...</div>;
|
|
475
|
+
|
|
476
|
+
const handleSearch = async () => {
|
|
477
|
+
const results = await search("capital of France", [
|
|
478
|
+
"Paris is beautiful",
|
|
479
|
+
"London is in England",
|
|
480
|
+
"Dogs are pets",
|
|
481
|
+
], 2); // topK = 2
|
|
482
|
+
|
|
483
|
+
console.log(results);
|
|
484
|
+
// [{ text: "Paris is beautiful", score: 0.89, index: 0 }, ...]
|
|
485
|
+
};
|
|
486
|
+
|
|
487
|
+
const handleSimilarity = async () => {
|
|
488
|
+
const score = await similarity("Hello world", "Hi there");
|
|
489
|
+
console.log(score); // 0.85
|
|
490
|
+
};
|
|
491
|
+
|
|
492
|
+
return (
|
|
493
|
+
<div>
|
|
494
|
+
<button onClick={handleSearch}>Search</button>
|
|
495
|
+
<button onClick={handleSimilarity}>Compare</button>
|
|
496
|
+
</div>
|
|
497
|
+
);
|
|
498
|
+
}
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
### Options
|
|
502
|
+
|
|
503
|
+
```typescript
|
|
504
|
+
const {
|
|
505
|
+
// Actions
|
|
506
|
+
embed, // (text: string) => Promise<number[]>
|
|
507
|
+
embedBatch, // (texts: string[]) => Promise<{ vector, text }[]>
|
|
508
|
+
similarity, // (a: string, b: string) => Promise<number>
|
|
509
|
+
search, // (query: string, corpus: string[], topK?) => Promise<SearchResult[]>
|
|
510
|
+
findNearest, // (embedding: number[], candidates: string[], topK?) => Promise<SearchResult[]>
|
|
511
|
+
cosineSimilarity, // (a: number[], b: number[]) => number (sync)
|
|
512
|
+
load, // () => void - manually load model
|
|
513
|
+
|
|
514
|
+
// State
|
|
515
|
+
isLoading, // boolean - model loading
|
|
516
|
+
isReady, // boolean - model ready
|
|
517
|
+
loadingProgress, // { status, message?, progress? }
|
|
518
|
+
error, // string | null
|
|
519
|
+
} = useEmbedding({
|
|
520
|
+
model: "Xenova/all-MiniLM-L6-v2", // Embedding model
|
|
521
|
+
normalize: true, // Normalize vectors (default: true)
|
|
522
|
+
autoLoad: false, // Load on mount (default: false)
|
|
523
|
+
onReady: () => {},
|
|
524
|
+
onError: (err) => {},
|
|
525
|
+
});
|
|
526
|
+
```
|
|
527
|
+
|
|
459
528
|
## Low-Level API
|
|
460
529
|
|
|
461
530
|
For full control, use `createGerbilWorker` directly:
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# Embeddings
|
|
2
|
+
|
|
3
|
+
Gerbil provides local text embeddings using transformer models via ONNX. Generate semantic vectors for similarity search, clustering, and retrieval - all on-device with no API keys.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
### Node.js
|
|
8
|
+
|
|
9
|
+
```typescript
|
|
10
|
+
import { Gerbil } from "@tryhamster/gerbil";
|
|
11
|
+
|
|
12
|
+
const g = new Gerbil();
|
|
13
|
+
|
|
14
|
+
// Generate embedding
|
|
15
|
+
const result = await g.embed("Hello world");
|
|
16
|
+
console.log(result.vector); // number[384]
|
|
17
|
+
|
|
18
|
+
// Compare similarity
|
|
19
|
+
const similarity = await g.similarity("Hello world", "Hi there");
|
|
20
|
+
console.log(similarity.score); // 0.85
|
|
21
|
+
|
|
22
|
+
// Semantic search
|
|
23
|
+
const results = await g.search("capital of France", [
|
|
24
|
+
"Paris is beautiful",
|
|
25
|
+
"London is in England",
|
|
26
|
+
"Dogs are pets"
|
|
27
|
+
]);
|
|
28
|
+
// [{ text: "Paris is beautiful", score: 0.89, index: 0 }, ...]
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### React (Browser)
|
|
32
|
+
|
|
33
|
+
```tsx
|
|
34
|
+
import { useEmbedding } from "@tryhamster/gerbil/browser";
|
|
35
|
+
|
|
36
|
+
function SemanticSearch() {
|
|
37
|
+
const { search, isLoading, isReady } = useEmbedding();
|
|
38
|
+
|
|
39
|
+
if (isLoading) return <div>Loading embedding model...</div>;
|
|
40
|
+
|
|
41
|
+
const handleSearch = async () => {
|
|
42
|
+
const results = await search("capital of France", [
|
|
43
|
+
"Paris is beautiful",
|
|
44
|
+
"London is in England",
|
|
45
|
+
"Dogs are pets"
|
|
46
|
+
]);
|
|
47
|
+
console.log(results);
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
return <button onClick={handleSearch}>Search</button>;
|
|
51
|
+
}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### AI SDK
|
|
55
|
+
|
|
56
|
+
```typescript
|
|
57
|
+
import { embed, embedMany } from "ai";
|
|
58
|
+
import { gerbil } from "@tryhamster/gerbil/ai";
|
|
59
|
+
|
|
60
|
+
// Single embedding
|
|
61
|
+
const { embedding } = await embed({
|
|
62
|
+
model: gerbil.embedding(),
|
|
63
|
+
value: "Hello world",
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
// Multiple embeddings
|
|
67
|
+
const { embeddings } = await embedMany({
|
|
68
|
+
model: gerbil.embedding(),
|
|
69
|
+
values: ["Hello", "World", "How are you?"],
|
|
70
|
+
});
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Available Models
|
|
74
|
+
|
|
75
|
+
| Model | Dimensions | Size | Description |
|
|
76
|
+
|-------|------------|------|-------------|
|
|
77
|
+
| `all-MiniLM-L6-v2` | 384 | ~23MB | Default, fast and versatile |
|
|
78
|
+
| `bge-small-en-v1.5` | 384 | ~33MB | High quality English embeddings |
|
|
79
|
+
| `gte-small` | 384 | ~33MB | General text embeddings |
|
|
80
|
+
|
|
81
|
+
Use any ONNX model from HuggingFace:
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
await g.embed("text", { model: "Xenova/all-MiniLM-L6-v2" });
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## API Reference
|
|
88
|
+
|
|
89
|
+
### Gerbil Class Methods
|
|
90
|
+
|
|
91
|
+
```typescript
|
|
92
|
+
class Gerbil {
|
|
93
|
+
// Generate embedding for text
|
|
94
|
+
async embed(text: string, options?: EmbedOptions): Promise<EmbedResult>;
|
|
95
|
+
|
|
96
|
+
// Batch embedding
|
|
97
|
+
async embedBatch(texts: string[], options?: EmbedOptions): Promise<EmbedResult[]>;
|
|
98
|
+
|
|
99
|
+
// Compare two texts
|
|
100
|
+
async similarity(textA: string, textB: string, options?: EmbedOptions): Promise<SimilarityResult>;
|
|
101
|
+
|
|
102
|
+
// Semantic search
|
|
103
|
+
async search(query: string, corpus: string[], options?: SearchOptions): Promise<SearchResult[]>;
|
|
104
|
+
|
|
105
|
+
// Find nearest text to an embedding
|
|
106
|
+
async findNearest(embedding: number[], candidates: string[], options?: SearchOptions): Promise<SearchResult[]>;
|
|
107
|
+
|
|
108
|
+
// Raw vector similarity (synchronous)
|
|
109
|
+
cosineSimilarity(a: number[], b: number[]): number;
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Types
|
|
114
|
+
|
|
115
|
+
```typescript
|
|
116
|
+
interface EmbedOptions {
|
|
117
|
+
/** Embedding model (default: "Xenova/all-MiniLM-L6-v2") */
|
|
118
|
+
model?: string;
|
|
119
|
+
/** Normalize vectors (default: true) */
|
|
120
|
+
normalize?: boolean;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
interface EmbedResult {
|
|
124
|
+
/** Embedding vector */
|
|
125
|
+
vector: number[];
|
|
126
|
+
/** Original text */
|
|
127
|
+
text: string;
|
|
128
|
+
/** Time in ms */
|
|
129
|
+
totalTime: number;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
interface SimilarityResult {
|
|
133
|
+
/** Similarity score (0-1) */
|
|
134
|
+
score: number;
|
|
135
|
+
/** First text */
|
|
136
|
+
textA: string;
|
|
137
|
+
/** Second text */
|
|
138
|
+
textB: string;
|
|
139
|
+
/** Time in ms */
|
|
140
|
+
totalTime: number;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
interface SearchResult {
|
|
144
|
+
/** Matched text */
|
|
145
|
+
text: string;
|
|
146
|
+
/** Similarity score (0-1) */
|
|
147
|
+
score: number;
|
|
148
|
+
/** Index in original corpus */
|
|
149
|
+
index: number;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
interface SearchOptions extends EmbedOptions {
|
|
153
|
+
/** Return only top K results */
|
|
154
|
+
topK?: number;
|
|
155
|
+
}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Use Cases
|
|
159
|
+
|
|
160
|
+
### Semantic Search
|
|
161
|
+
|
|
162
|
+
Find the most relevant documents for a query:
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
const documents = [
|
|
166
|
+
"JavaScript is a programming language",
|
|
167
|
+
"Python is great for data science",
|
|
168
|
+
"The weather is sunny today",
|
|
169
|
+
"Machine learning uses algorithms",
|
|
170
|
+
];
|
|
171
|
+
|
|
172
|
+
const results = await g.search("coding languages", documents, { topK: 2 });
|
|
173
|
+
// Returns JavaScript and Python documents
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Duplicate Detection
|
|
177
|
+
|
|
178
|
+
Find similar or duplicate content:
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
const similarity = await g.similarity(
|
|
182
|
+
"The quick brown fox jumps over the lazy dog",
|
|
183
|
+
"A fast brown fox leaps over a sleepy dog"
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
if (similarity.score > 0.9) {
|
|
187
|
+
console.log("Potential duplicate detected!");
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Clustering
|
|
192
|
+
|
|
193
|
+
Group similar items together:
|
|
194
|
+
|
|
195
|
+
```typescript
|
|
196
|
+
const items = ["apple", "banana", "car", "truck", "orange"];
|
|
197
|
+
const embeddings = await g.embedBatch(items);
|
|
198
|
+
|
|
199
|
+
// Use embeddings for k-means or hierarchical clustering
|
|
200
|
+
// Each embedding.vector is a 384-dimensional vector
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### RAG (Retrieval-Augmented Generation)
|
|
204
|
+
|
|
205
|
+
Build a simple RAG pipeline:
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
// 1. Index documents
|
|
209
|
+
const documents = await loadDocuments();
|
|
210
|
+
const docEmbeddings = await g.embedBatch(documents);
|
|
211
|
+
|
|
212
|
+
// 2. Store embeddings (in-memory or vector DB)
|
|
213
|
+
const index = docEmbeddings.map((e, i) => ({
|
|
214
|
+
embedding: e.vector,
|
|
215
|
+
text: documents[i]
|
|
216
|
+
}));
|
|
217
|
+
|
|
218
|
+
// 3. Retrieve relevant docs
|
|
219
|
+
const queryEmbedding = (await g.embed(userQuestion)).vector;
|
|
220
|
+
const relevant = await g.findNearest(
|
|
221
|
+
queryEmbedding,
|
|
222
|
+
documents,
|
|
223
|
+
{ topK: 3 }
|
|
224
|
+
);
|
|
225
|
+
|
|
226
|
+
// 4. Generate answer with context
|
|
227
|
+
const context = relevant.map(r => r.text).join("\n");
|
|
228
|
+
const answer = await g.generate(`Context:\n${context}\n\nQuestion: ${userQuestion}`);
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
## useEmbedding Hook Reference
|
|
232
|
+
|
|
233
|
+
```typescript
|
|
234
|
+
const {
|
|
235
|
+
// Actions
|
|
236
|
+
embed, // (text: string) => Promise<number[]>
|
|
237
|
+
embedBatch, // (texts: string[]) => Promise<BrowserEmbedResult[]>
|
|
238
|
+
similarity, // (a: string, b: string) => Promise<number>
|
|
239
|
+
search, // (query: string, corpus: string[], topK?: number) => Promise<SearchResult[]>
|
|
240
|
+
findNearest, // (embedding: number[], candidates: string[], topK?: number) => Promise<SearchResult[]>
|
|
241
|
+
cosineSimilarity,// (a: number[], b: number[]) => number (sync)
|
|
242
|
+
load, // () => void - manually load model
|
|
243
|
+
|
|
244
|
+
// State
|
|
245
|
+
isLoading, // boolean - model loading
|
|
246
|
+
isReady, // boolean - model ready
|
|
247
|
+
loadingProgress, // { status, message?, progress? }
|
|
248
|
+
error, // string | null
|
|
249
|
+
} = useEmbedding({
|
|
250
|
+
model: "Xenova/all-MiniLM-L6-v2", // Embedding model
|
|
251
|
+
normalize: true, // Normalize vectors
|
|
252
|
+
autoLoad: false, // Load on first use
|
|
253
|
+
onReady: () => {},
|
|
254
|
+
onError: (err) => {},
|
|
255
|
+
});
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## Performance
|
|
259
|
+
|
|
260
|
+
| Operation | Time (M1 Mac) |
|
|
261
|
+
|-----------|---------------|
|
|
262
|
+
| First load | 2-5s (downloads model) |
|
|
263
|
+
| Cached load | <500ms |
|
|
264
|
+
| Single embed | ~20ms |
|
|
265
|
+
| Batch (10 texts) | ~150ms |
|
|
266
|
+
| Search (100 docs) | ~300ms |
|
|
267
|
+
|
|
268
|
+
## Limitations
|
|
269
|
+
|
|
270
|
+
- **No reverse mapping**: Embeddings cannot be converted back to text
|
|
271
|
+
- **English-optimized**: Default models work best with English text
|
|
272
|
+
- **Fixed dimensions**: Each model produces fixed-size vectors (384 for default)
|
|
273
|
+
|
|
274
|
+
## Troubleshooting
|
|
275
|
+
|
|
276
|
+
### "Model not found"
|
|
277
|
+
|
|
278
|
+
Use the full HuggingFace model ID:
|
|
279
|
+
|
|
280
|
+
```typescript
|
|
281
|
+
// ❌ Won't work
|
|
282
|
+
await g.embed("text", { model: "MiniLM" });
|
|
283
|
+
|
|
284
|
+
// ✅ Use full ID
|
|
285
|
+
await g.embed("text", { model: "Xenova/all-MiniLM-L6-v2" });
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### Slow first embedding
|
|
289
|
+
|
|
290
|
+
The first call downloads the model (~23MB). Subsequent calls use the cached model.
|
|
291
|
+
|
|
292
|
+
### Out of memory with large batches
|
|
293
|
+
|
|
294
|
+
Process in smaller batches:
|
|
295
|
+
|
|
296
|
+
```typescript
|
|
297
|
+
const batchSize = 100;
|
|
298
|
+
const allEmbeddings = [];
|
|
299
|
+
|
|
300
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
301
|
+
const batch = texts.slice(i, i + batchSize);
|
|
302
|
+
const embeddings = await g.embedBatch(batch);
|
|
303
|
+
allEmbeddings.push(...embeddings);
|
|
304
|
+
}
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
## See Also
|
|
308
|
+
|
|
309
|
+
- [Browser Hooks](./browser.md) - useChat, useCompletion, useEmbedding
|
|
310
|
+
- [AI SDK Integration](./ai-sdk.md) - embed, embedMany
|
|
311
|
+
|
package/package.json
CHANGED
package/dist/gerbil-DJGqq7BX.mjs
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"gerbil-qOTe1nl2.d.mts","names":[],"sources":["../src/core/gerbil.ts"],"sourcesContent":[],"mappings":";;;;AAqOsB,cAlBT,MAAA,CAkBS;EAiBC,QAAA,SAAA;EAIa,QAAA,SAAA;EA0Be,QAAA,KAAA;EAAmB,QAAA,QAAA;EA8SpD,QAAA,YAAA;EA+CH,QAAA,WAAA;EAYY,iBAAA,MAAA;EAUD,QAAA,KAAA;EAWJ,QAAA,SAAA;EAW0B,QAAA,aAAA;EAyBN,QAAA,WAAA;EAA+B,QAAA,SAAA;EAAR,QAAA,WAAA;EAsNpD,QAAA,aAAA;EACe,WAAA,CAAA,MAAA,CAAA,EAxqBN,YAwqBM;EAAvB,OAAA,UAAA,CAAA,CAAA,EAvpBkB,WAupBlB,EAAA;EA0SgD,OAAA,QAAA,CAAA,OAAA,EAAA,MAAA,CAAA,EA77BjB,WA67BiB,GAAA,SAAA;EAAZ;;;;;;;;;;;;;;;;;;EAgN5B,SAAA,CAAA,OAAA,CAAA,EAAA,MAAA,EAAA,OAAA,CAAA,EAnnCsC,WAmnCtC,CAAA,EAnnCyD,OAmnCzD,CAAA,IAAA,CAAA;EACO;;;;EAyChB,QAAA,eAAA;EADqB;;;EAiDkC,QAAA,CAAA,CAAA,EAAA,OAAA;EAAiB;;;EA+B/D,cAAA,CAAA,CAAA,EAAA,OAAA;EACA;;;EAkCA,YAAA,CAAA,CAAA,EAj+BK,WAi+BL,GAAA,IAAA;EAAR;;;EAyDQ,aAAA,CAAA,CAAA,EAAA,QAAA,GAAA,KAAA,GAAA,MAAA;EAAR;;;EAiIsB,QAAA,CAAA,CAAA,EAAA,MAAA;EAkBb;;;EA6CmC,qBAAA,CAAA,CAAA,EAAA;IAkBD,IAAA,EAAA,MAAA;IAAR,MAAA,EAAA,MAAA;IAuBM,IAAA,EAAA,MAAA;IAgBJ,OAAA,EAAA,MAAA;EAAO,CAAA;;;;;;;;;;;;eApuClC;;;;;;qBAYY;;;;;;;oBAUD;;;;;;;;;gBAWJ;;;;;;+CAW0B;;;;;;;;;;;;;;;qCAyBN,kBAAuB,QAAQ;;;;;;;mCAsN5D,kBACR,uBAAuB;;;;;;;;;;;;;mCA0Sa,YAAY,KAAK,QAAQ;;;;gCAqC7B,eAAoB,QAAQ;;;;wCAyBpB,eAAoB,QAAQ;;;;cAe3D;;;;aAOD;;;;;;;;;;;;;;;;;;;;;;oBA4DY;;MAA2C;;;;4BA4BlC,iBAAiB;;;;;;;;;;;;gCAiBd,eAAoB,QAAQ;;;;;;;;;;;;;sCAmBpD,eACR,eAAe,YAAY;;;;gBAQhB;;;;;;;;;;;;;;;;mBAgCS,QACrB;;;;;;;;;;;;;;;;;;;;sCA8BuC,iBAAsB;;;;8CAkBN,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;oBA8BjE,eAAe,sBACb,oBACR,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;yCAiCA,gCACR,QAAQ;;;;mBAQY,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAiD5B,QAAQ;;;;2BAmCoB;;;;;iCAiBI;;;;;;qBA6EV;;;;;+BAUU;;;;;;;;cAQvB;;;;;;;;;;;;;;;0BA2BkB;;;;;;;;2CAkBiB;;;;;kCAkBT,QAAQ;;;;;;;;;;;;;;wCAuBF;;;;oCAgBJ"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"types-CiTc7ez3.d.mts","names":[],"sources":["../src/core/types.ts"],"sourcesContent":[],"mappings":";;;;AAyBY,KAfA,WAAA,GAeW;EASX,EAAA,EAAA,MAAA;EAWA,IAAA,EAAA,MAAA;EAmCA,WAAA,EAAA,MAAc;EA8Bd,IAAA,EAAA,MAAA;EAkBA,aAAA,EAAY,MAAA;EAQZ,gBAAW,EAAA,OAAA;EAeX,YAAA,EAAW,OAAA;EAcX;EAYA,cAAA,CAAY,EAAA,OAAA;EAiBZ;EAiBA,iBAAc,CAAA,EAAA,MAAA;EAqBd,MAAA,EAAA,MAAA,GAAY,QAAA,GAAA,KAAA,GAAA,SAAA,GAAA,OAAA,GAAA,OAAA;AAUxB,CAAA;AAOY,KAhOA,WAAA,GAgOU;EAyBV,IAAA,EAAA,SAAA,GAAA,aAAmB,GAAA,OAAA;EAWnB,IAAA,EAAA,MAAA;AAYZ,CAAA;AAeY,KAtRA,UAAA,GAsRc;EAmBd;EAWA,MAAA,EAAA,MAAU;EAWV;EAaA,GAAA,CAAA,EAAA,MAAA;AAWZ,CAAA;AAiBY,KA7VA,eAAA,GA6ViB;EASjB;EASA,SAAA,CAAA,EAAA,MAAA;EAaA;EAWA,WAAA,CAAA,EAAA,MAAA;EAeA;EAES,IAAA,CAAA,EAAA,MAAA;EAEN;EAID,IAAA,CAAA,EAAA,MAAA;EAAO;;;;;;;;;WApYV;;;;;;KASC,cAAA;;;;;;;;;;;;;;;;;;KA8BA;;UAEF,CAAA,CAAE,QAAQ;;;;;;;;KAgBR,YAAA;;;;;;KAQA,WAAA;;;;;;;;KAeA,WAAA;;sBAEU;;;;;;;;KAYV,YAAA;;;;;;;KAYA,YAAA;;;;;;;;UAWF;;aAGG;;KAGD,WAAA;;;;;;;;;;;;KAiBA,cAAA;;;;;;;;;;;;KAqBA,YAAA;;;;;;;;;KAUA,UAAA;;;;;;KAOA,UAAA;;SAEH;;;;;;;;;;;;;;;;;;KAuBG,mBAAA;;;;;;;;KAWA,sBAAA;;;;;;KAYA,SAAA;;;;;;;;;;;;;;KAeA,cAAA;;;;;;;;;;;;UAYF;;;;;;KAOE,YAAA;;;;;;sBAMU;;yBAEG;;KAGb,UAAA;;WAED;;;;;;;;KASC,WAAA;;SAEH;;;;;;;;;;KAWG,cAAA;;sBAEU;;;;KASV,cAAA;;;;;;;;;;;;;;;;KAiBA,iBAAA;;;;;;sBAMU;;KAGV,iBAAA;;;;;;;;KASA,gBAAA;;;;;;aAMC;;;;;;KAOD,cAAA;;sBAEU;;;;KASV,6BAAA;;;;;;;;;;;;;;KAeA,6BAAA;;qBAES;;eAEN;;;;cAID"}
|