retriv 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -7
- package/dist/db/sqlite-fts.d.mts +1 -0
- package/dist/db/sqlite-fts.mjs +25 -16
- package/dist/embeddings/cohere.mjs +9 -5
- package/dist/embeddings/google.mjs +9 -5
- package/dist/embeddings/mistral.mjs +9 -5
- package/dist/embeddings/model-info.d.mts +26 -0
- package/dist/embeddings/model-info.mjs +65 -0
- package/dist/embeddings/ollama.mjs +9 -5
- package/dist/embeddings/openai.mjs +9 -5
- package/dist/embeddings/transformers-js.d.mts +27 -0
- package/dist/embeddings/transformers-js.mjs +42 -0
- package/package.json +14 -15
- package/dist/embeddings/transformers.d.mts +0 -24
- package/dist/embeddings/transformers.mjs +0 -26
package/README.md
CHANGED
|
@@ -244,18 +244,18 @@ interface SearchResult {
|
|
|
244
244
|
|
|
245
245
|
## Benchmarks
|
|
246
246
|
|
|
247
|
-
Retrieval accuracy on Nuxt documentation (
|
|
247
|
+
Retrieval accuracy on Nuxt documentation (2,817 chunks):
|
|
248
248
|
|
|
249
249
|
| Test Type | FTS | Vector | Hybrid |
|
|
250
250
|
|-----------|-----|--------|--------|
|
|
251
|
-
| Exact terminology (ports, config names) |
|
|
252
|
-
| Doc retrieval (keyword overlap) |
|
|
253
|
-
| Semantic queries (synonyms, no overlap) |
|
|
254
|
-
| **Total** | **
|
|
251
|
+
| Exact terminology (ports, config names) | 7/7 | 5/7 | 7/7 |
|
|
252
|
+
| Doc retrieval (keyword overlap) | 0/7 | 5/7 | 5/7 |
|
|
253
|
+
| Semantic queries (synonyms, no overlap) | 1/6 | 5/6 | 5/6 |
|
|
254
|
+
| **Total** | **8/20 (40%)** | **15/20 (75%)** | **17/20 (85%)** |
|
|
255
255
|
|
|
256
256
|
- **FTS** excels at exact terms but fails semantic queries ("reuse logic" → composables)
|
|
257
|
-
- **Vector** understands meaning but misses precise terminology ("
|
|
258
|
-
- **Hybrid** combines both -
|
|
257
|
+
- **Vector** understands meaning but misses precise terminology (".global" suffix)
|
|
258
|
+
- **Hybrid** combines both - best overall recall across query types
|
|
259
259
|
|
|
260
260
|
Run locally: `pnpm test:eval`
|
|
261
261
|
|
package/dist/db/sqlite-fts.d.mts
CHANGED
package/dist/db/sqlite-fts.mjs
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { mkdirSync } from "node:fs";
|
|
2
2
|
import { dirname } from "node:path";
|
|
3
|
-
import Database from "better-sqlite3";
|
|
4
3
|
async function sqliteFts(config = {}) {
|
|
5
4
|
const dbPath = config.path || ":memory:";
|
|
5
|
+
const nodeSqlite = globalThis.process?.getBuiltinModule?.("node:sqlite");
|
|
6
|
+
if (!nodeSqlite) throw new Error("node:sqlite not available. Requires Node.js >= 22.5");
|
|
6
7
|
if (dbPath !== ":memory:") mkdirSync(dirname(dbPath), { recursive: true });
|
|
7
|
-
const db = new
|
|
8
|
+
const db = new nodeSqlite.DatabaseSync(dbPath);
|
|
8
9
|
db.exec(`
|
|
9
10
|
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
|
|
10
11
|
id,
|
|
@@ -15,14 +16,18 @@ async function sqliteFts(config = {}) {
|
|
|
15
16
|
`);
|
|
16
17
|
return {
|
|
17
18
|
async index(docs) {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
19
|
+
db.prepare("BEGIN").run();
|
|
20
|
+
try {
|
|
21
|
+
for (const doc of docs) {
|
|
22
|
+
db.prepare("DELETE FROM documents_fts WHERE id = ?").run(doc.id);
|
|
23
|
+
db.prepare("INSERT INTO documents_fts (id, content, metadata) VALUES (?, ?, ?)").run(doc.id, doc.content, doc.metadata ? JSON.stringify(doc.metadata) : null);
|
|
24
|
+
}
|
|
25
|
+
db.prepare("COMMIT").run();
|
|
26
|
+
return { count: docs.length };
|
|
27
|
+
} catch (error) {
|
|
28
|
+
db.prepare("ROLLBACK").run();
|
|
29
|
+
throw error;
|
|
30
|
+
}
|
|
26
31
|
},
|
|
27
32
|
async search(query, options = {}) {
|
|
28
33
|
const { limit = 10, returnContent = false, returnMetadata = true } = options;
|
|
@@ -50,17 +55,21 @@ async function sqliteFts(config = {}) {
|
|
|
50
55
|
});
|
|
51
56
|
},
|
|
52
57
|
async remove(ids) {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
for (const id of
|
|
56
|
-
|
|
57
|
-
|
|
58
|
+
db.prepare("BEGIN").run();
|
|
59
|
+
try {
|
|
60
|
+
for (const id of ids) db.prepare("DELETE FROM documents_fts WHERE id = ?").run(id);
|
|
61
|
+
db.prepare("COMMIT").run();
|
|
62
|
+
return { count: ids.length };
|
|
63
|
+
} catch (error) {
|
|
64
|
+
db.prepare("ROLLBACK").run();
|
|
65
|
+
throw error;
|
|
66
|
+
}
|
|
58
67
|
},
|
|
59
68
|
async clear() {
|
|
60
69
|
db.exec("DELETE FROM documents_fts");
|
|
61
70
|
},
|
|
62
71
|
async close() {
|
|
63
|
-
db.close();
|
|
72
|
+
db.close?.();
|
|
64
73
|
}
|
|
65
74
|
};
|
|
66
75
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { embed, embedMany } from "ai";
|
|
2
3
|
import { createCohere } from "@ai-sdk/cohere";
|
|
3
4
|
function cohere(options = {}) {
|
|
@@ -9,11 +10,14 @@ function cohere(options = {}) {
|
|
|
9
10
|
apiKey,
|
|
10
11
|
baseURL: baseUrl
|
|
11
12
|
}).textEmbeddingModel(model);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
let dimensions = getModelDimensions(model);
|
|
14
|
+
if (!dimensions) {
|
|
15
|
+
const { embedding } = await embed({
|
|
16
|
+
model: embeddingModel,
|
|
17
|
+
value: "test"
|
|
18
|
+
});
|
|
19
|
+
dimensions = embedding.length;
|
|
20
|
+
}
|
|
17
21
|
const embedder = async (texts) => {
|
|
18
22
|
if (texts.length === 0) return [];
|
|
19
23
|
if (texts.length === 1) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { embed, embedMany } from "ai";
|
|
2
3
|
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
3
4
|
function google(options = {}) {
|
|
@@ -9,11 +10,14 @@ function google(options = {}) {
|
|
|
9
10
|
apiKey,
|
|
10
11
|
baseURL: baseUrl
|
|
11
12
|
}).textEmbeddingModel(model);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
let dimensions = getModelDimensions(model);
|
|
14
|
+
if (!dimensions) {
|
|
15
|
+
const { embedding } = await embed({
|
|
16
|
+
model: embeddingModel,
|
|
17
|
+
value: "test"
|
|
18
|
+
});
|
|
19
|
+
dimensions = embedding.length;
|
|
20
|
+
}
|
|
17
21
|
const embedder = async (texts) => {
|
|
18
22
|
if (texts.length === 0) return [];
|
|
19
23
|
if (texts.length === 1) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { embed, embedMany } from "ai";
|
|
2
3
|
import { createMistral } from "@ai-sdk/mistral";
|
|
3
4
|
function mistral(options = {}) {
|
|
@@ -9,11 +10,14 @@ function mistral(options = {}) {
|
|
|
9
10
|
apiKey,
|
|
10
11
|
baseURL: baseUrl
|
|
11
12
|
}).textEmbeddingModel(model);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
let dimensions = getModelDimensions(model);
|
|
14
|
+
if (!dimensions) {
|
|
15
|
+
const { embedding } = await embed({
|
|
16
|
+
model: embeddingModel,
|
|
17
|
+
value: "test"
|
|
18
|
+
});
|
|
19
|
+
dimensions = embedding.length;
|
|
20
|
+
}
|
|
17
21
|
const embedder = async (texts) => {
|
|
18
22
|
if (texts.length === 0) return [];
|
|
19
23
|
if (texts.length === 1) {
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
//#region src/embeddings/model-info.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* Embedding provider types
|
|
4
|
+
*/
|
|
5
|
+
type EmbeddingPreset = 'openai' | 'google' | 'mistral' | 'cohere' | 'ollama' | 'transformers.js';
|
|
6
|
+
/**
|
|
7
|
+
* Default models per provider
|
|
8
|
+
*/
|
|
9
|
+
declare const DEFAULT_MODELS: Record<EmbeddingPreset, {
|
|
10
|
+
model: string;
|
|
11
|
+
dimensions: number;
|
|
12
|
+
}>;
|
|
13
|
+
/**
|
|
14
|
+
* Known dimensions for common models
|
|
15
|
+
*/
|
|
16
|
+
declare const MODEL_DIMENSIONS: Record<string, number>;
|
|
17
|
+
/**
|
|
18
|
+
* Get dimensions for a model (returns undefined if unknown)
|
|
19
|
+
*/
|
|
20
|
+
declare function getModelDimensions(model: string): number | undefined;
|
|
21
|
+
/**
|
|
22
|
+
* Resolve model name for a specific preset
|
|
23
|
+
*/
|
|
24
|
+
declare function resolveModelForPreset(model: string, preset: string): string;
|
|
25
|
+
//#endregion
|
|
26
|
+
export { DEFAULT_MODELS, EmbeddingPreset, MODEL_DIMENSIONS, getModelDimensions, resolveModelForPreset };
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
const DEFAULT_MODELS = {
|
|
2
|
+
"openai": {
|
|
3
|
+
model: "text-embedding-3-small",
|
|
4
|
+
dimensions: 1536
|
|
5
|
+
},
|
|
6
|
+
"google": {
|
|
7
|
+
model: "text-embedding-004",
|
|
8
|
+
dimensions: 768
|
|
9
|
+
},
|
|
10
|
+
"mistral": {
|
|
11
|
+
model: "mistral-embed",
|
|
12
|
+
dimensions: 1024
|
|
13
|
+
},
|
|
14
|
+
"cohere": {
|
|
15
|
+
model: "embed-english-v3.0",
|
|
16
|
+
dimensions: 1024
|
|
17
|
+
},
|
|
18
|
+
"ollama": {
|
|
19
|
+
model: "nomic-embed-text",
|
|
20
|
+
dimensions: 768
|
|
21
|
+
},
|
|
22
|
+
"transformers.js": {
|
|
23
|
+
model: "Xenova/bge-small-en-v1.5",
|
|
24
|
+
dimensions: 384
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
const MODEL_DIMENSIONS = {
|
|
28
|
+
"text-embedding-3-small": 1536,
|
|
29
|
+
"text-embedding-3-large": 3072,
|
|
30
|
+
"text-embedding-ada-002": 1536,
|
|
31
|
+
"text-embedding-004": 768,
|
|
32
|
+
"embedding-001": 768,
|
|
33
|
+
"mistral-embed": 1024,
|
|
34
|
+
"embed-english-v3.0": 1024,
|
|
35
|
+
"embed-multilingual-v3.0": 1024,
|
|
36
|
+
"embed-english-light-v3.0": 384,
|
|
37
|
+
"embed-multilingual-light-v3.0": 384,
|
|
38
|
+
"nomic-embed-text": 768,
|
|
39
|
+
"mxbai-embed-large": 1024,
|
|
40
|
+
"all-minilm": 384,
|
|
41
|
+
"snowflake-arctic-embed": 1024,
|
|
42
|
+
"bge-small-en-v1.5": 384,
|
|
43
|
+
"bge-base-en-v1.5": 768,
|
|
44
|
+
"bge-large-en-v1.5": 1024,
|
|
45
|
+
"bge-m3": 1024,
|
|
46
|
+
"all-MiniLM-L6-v2": 384,
|
|
47
|
+
"embeddinggemma-300m": 256,
|
|
48
|
+
"plamo-embedding-1b": 1024
|
|
49
|
+
};
|
|
50
|
+
function getModelDimensions(model) {
|
|
51
|
+
if (MODEL_DIMENSIONS[model]) return MODEL_DIMENSIONS[model];
|
|
52
|
+
return MODEL_DIMENSIONS[model.replace(/^(Xenova\/|onnx-community\/)/, "")];
|
|
53
|
+
}
|
|
54
|
+
const MODEL_MAPPINGS = { "transformers.js": {
|
|
55
|
+
"bge-base-en-v1.5": "Xenova/bge-base-en-v1.5",
|
|
56
|
+
"bge-large-en-v1.5": "onnx-community/bge-large-en-v1.5",
|
|
57
|
+
"bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
|
|
58
|
+
"bge-m3": "Xenova/bge-m3",
|
|
59
|
+
"all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
|
|
60
|
+
"embeddinggemma-300m": "onnx-community/embeddinggemma-300m-ONNX"
|
|
61
|
+
} };
|
|
62
|
+
function resolveModelForPreset(model, preset) {
|
|
63
|
+
return MODEL_MAPPINGS[preset]?.[model] ?? model;
|
|
64
|
+
}
|
|
65
|
+
export { DEFAULT_MODELS, MODEL_DIMENSIONS, getModelDimensions, resolveModelForPreset };
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { embed, embedMany } from "ai";
|
|
2
3
|
import { createOllama } from "ollama-ai-provider-v2";
|
|
3
4
|
function ollama(options = {}) {
|
|
@@ -7,11 +8,14 @@ function ollama(options = {}) {
|
|
|
7
8
|
if (cached) return cached;
|
|
8
9
|
const ollamaBaseUrl = baseUrl || process.env.OLLAMA_BASE_URL || "http://localhost:11434";
|
|
9
10
|
const embeddingModel = createOllama({ baseURL: ollamaBaseUrl.endsWith("/api") ? ollamaBaseUrl : `${ollamaBaseUrl}/api` }).textEmbeddingModel(model);
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
let dimensions = getModelDimensions(model);
|
|
12
|
+
if (!dimensions) {
|
|
13
|
+
const { embedding } = await embed({
|
|
14
|
+
model: embeddingModel,
|
|
15
|
+
value: "test"
|
|
16
|
+
});
|
|
17
|
+
dimensions = embedding.length;
|
|
18
|
+
}
|
|
15
19
|
const embedder = async (texts) => {
|
|
16
20
|
if (texts.length === 0) return [];
|
|
17
21
|
if (texts.length === 1) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
3
|
import { embed, embedMany } from "ai";
|
|
3
4
|
function openai(options = {}) {
|
|
@@ -9,11 +10,14 @@ function openai(options = {}) {
|
|
|
9
10
|
apiKey,
|
|
10
11
|
baseURL: baseUrl
|
|
11
12
|
}).textEmbeddingModel(model);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
let dimensions = getModelDimensions(model);
|
|
14
|
+
if (!dimensions) {
|
|
15
|
+
const { embedding } = await embed({
|
|
16
|
+
model: embeddingModel,
|
|
17
|
+
value: "test"
|
|
18
|
+
});
|
|
19
|
+
dimensions = embedding.length;
|
|
20
|
+
}
|
|
17
21
|
const embedder = async (texts) => {
|
|
18
22
|
if (texts.length === 0) return [];
|
|
19
23
|
if (texts.length === 1) {
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/transformers-js.d.ts
|
|
4
|
+
interface TransformersEmbeddingOptions {
|
|
5
|
+
/** Model name (e.g., 'bge-base-en-v1.5' or 'Xenova/bge-base-en-v1.5') */
|
|
6
|
+
model?: string;
|
|
7
|
+
/** Embedding dimensions (auto-detected for known models) */
|
|
8
|
+
dimensions?: number;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Transformers.js embedding provider (local, in-browser compatible)
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```ts
|
|
15
|
+
* import { transformersJs } from 'retriv/embeddings/transformers-js'
|
|
16
|
+
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
17
|
+
*
|
|
18
|
+
* // Auto-resolves model name and dimensions for known models
|
|
19
|
+
* const db = await sqliteVec({
|
|
20
|
+
* path: 'vectors.db',
|
|
21
|
+
* embeddings: transformersJs({ model: 'bge-base-en-v1.5' }),
|
|
22
|
+
* })
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
declare function transformersJs(options?: TransformersEmbeddingOptions): EmbeddingConfig;
|
|
26
|
+
//#endregion
|
|
27
|
+
export { TransformersEmbeddingOptions, transformersJs };
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { getModelDimensions, resolveModelForPreset } from "./model-info.mjs";
|
|
2
|
+
import { rm } from "node:fs/promises";
|
|
3
|
+
import { env, pipeline } from "@huggingface/transformers";
|
|
4
|
+
async function clearCorruptedCache(error, model) {
|
|
5
|
+
if (!(error instanceof Error && (error.message?.includes("Protobuf parsing failed") || String(error.cause)?.includes("Protobuf parsing failed"))) || !env.cacheDir) return false;
|
|
6
|
+
await rm(`${env.cacheDir}/${model}`, {
|
|
7
|
+
recursive: true,
|
|
8
|
+
force: true
|
|
9
|
+
}).catch(() => {});
|
|
10
|
+
console.warn(`[retriv] Cleared corrupted model cache for ${model}, retrying...`);
|
|
11
|
+
return true;
|
|
12
|
+
}
|
|
13
|
+
function transformersJs(options = {}) {
|
|
14
|
+
const model = resolveModelForPreset(options.model ?? "bge-small-en-v1.5", "transformers.js");
|
|
15
|
+
let cached = null;
|
|
16
|
+
return { async resolve() {
|
|
17
|
+
if (cached) return cached;
|
|
18
|
+
const extractor = await pipeline("feature-extraction", model, { dtype: "fp32" }).catch(async (err) => {
|
|
19
|
+
if (await clearCorruptedCache(err, model)) return pipeline("feature-extraction", model, { dtype: "fp32" });
|
|
20
|
+
throw err;
|
|
21
|
+
});
|
|
22
|
+
const dimensions = options.dimensions ?? getModelDimensions(model);
|
|
23
|
+
if (!dimensions) throw new Error(`Unknown dimensions for model ${model}. Please specify dimensions option.`);
|
|
24
|
+
const embedder = async (texts) => {
|
|
25
|
+
const results = [];
|
|
26
|
+
for (const text of texts) {
|
|
27
|
+
const output = await extractor(text, {
|
|
28
|
+
pooling: "mean",
|
|
29
|
+
normalize: true
|
|
30
|
+
});
|
|
31
|
+
results.push(Array.from(output.data));
|
|
32
|
+
}
|
|
33
|
+
return results;
|
|
34
|
+
};
|
|
35
|
+
cached = {
|
|
36
|
+
embedder,
|
|
37
|
+
dimensions
|
|
38
|
+
};
|
|
39
|
+
return cached;
|
|
40
|
+
} };
|
|
41
|
+
}
|
|
42
|
+
export { transformersJs };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "retriv",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.0
|
|
4
|
+
"version": "0.2.0",
|
|
5
5
|
"description": "Unified document retrieval for search - semantic, full-text, and fuzzy.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -58,9 +58,9 @@
|
|
|
58
58
|
"types": "./dist/embeddings/ollama.d.mts",
|
|
59
59
|
"import": "./dist/embeddings/ollama.mjs"
|
|
60
60
|
},
|
|
61
|
-
"./embeddings/transformers": {
|
|
62
|
-
"types": "./dist/embeddings/transformers.d.mts",
|
|
63
|
-
"import": "./dist/embeddings/transformers.mjs"
|
|
61
|
+
"./embeddings/transformers-js": {
|
|
62
|
+
"types": "./dist/embeddings/transformers-js.d.mts",
|
|
63
|
+
"import": "./dist/embeddings/transformers-js.mjs"
|
|
64
64
|
},
|
|
65
65
|
"./embeddings/mistral": {
|
|
66
66
|
"types": "./dist/embeddings/mistral.d.mts",
|
|
@@ -69,6 +69,10 @@
|
|
|
69
69
|
"./embeddings/cohere": {
|
|
70
70
|
"types": "./dist/embeddings/cohere.d.mts",
|
|
71
71
|
"import": "./dist/embeddings/cohere.mjs"
|
|
72
|
+
},
|
|
73
|
+
"./embeddings/model-info": {
|
|
74
|
+
"types": "./dist/embeddings/model-info.d.mts",
|
|
75
|
+
"import": "./dist/embeddings/model-info.mjs"
|
|
72
76
|
}
|
|
73
77
|
},
|
|
74
78
|
"main": "./dist/index.mjs",
|
|
@@ -85,7 +89,6 @@
|
|
|
85
89
|
"@libsql/client": "^0.14.0 || ^0.15.0 || ^0.16.0 || ^0.17.0",
|
|
86
90
|
"@upstash/vector": "^1.0.0",
|
|
87
91
|
"ai": "^4.0.0 || ^5.0.0 || ^6.0.0",
|
|
88
|
-
"better-sqlite3": "^11.0.0 || ^12.0.0",
|
|
89
92
|
"ollama-ai-provider-v2": "^1.0.0",
|
|
90
93
|
"pg": "^8.0.0",
|
|
91
94
|
"sqlite-vec": "^0.1.0-alpha.0"
|
|
@@ -115,9 +118,6 @@
|
|
|
115
118
|
"ai": {
|
|
116
119
|
"optional": true
|
|
117
120
|
},
|
|
118
|
-
"better-sqlite3": {
|
|
119
|
-
"optional": true
|
|
120
|
-
},
|
|
121
121
|
"ollama-ai-provider-v2": {
|
|
122
122
|
"optional": true
|
|
123
123
|
},
|
|
@@ -138,20 +138,19 @@
|
|
|
138
138
|
"@cloudflare/workers-types": "^4.20260131.0",
|
|
139
139
|
"@huggingface/transformers": "^3.8.1",
|
|
140
140
|
"@libsql/client": "^0.17.0",
|
|
141
|
-
"@types/better-sqlite3": "^7.6.13",
|
|
142
141
|
"@types/node": "^25.2.0",
|
|
143
|
-
"@types/pg": "^8.
|
|
144
|
-
"@upstash/vector": "^1.
|
|
142
|
+
"@types/pg": "^8.16.0",
|
|
143
|
+
"@upstash/vector": "^1.2.2",
|
|
145
144
|
"@vitest/coverage-v8": "^4.0.18",
|
|
146
145
|
"ai": "^6.0.67",
|
|
147
|
-
"
|
|
148
|
-
"bumpp": "^10.
|
|
146
|
+
"ai-sdk-provider-gemini-cli": "^2.0.1",
|
|
147
|
+
"bumpp": "^10.4.0",
|
|
149
148
|
"eslint": "^9.39.2",
|
|
150
|
-
"obuild": "^0.4.
|
|
149
|
+
"obuild": "^0.4.22",
|
|
151
150
|
"pg": "^8.18.0",
|
|
152
151
|
"sqlite-vec": "^0.1.7-alpha.2",
|
|
153
152
|
"typescript": "^5.9.3",
|
|
154
|
-
"vitest": "^4.0.
|
|
153
|
+
"vitest": "^4.0.18",
|
|
155
154
|
"zod": "^4.3.6"
|
|
156
155
|
},
|
|
157
156
|
"scripts": {
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
-
|
|
3
|
-
//#region src/embeddings/transformers.d.ts
|
|
4
|
-
interface TransformersEmbeddingOptions {
|
|
5
|
-
/** Model name (default: Xenova/bge-base-en-v1.5) */
|
|
6
|
-
model?: string;
|
|
7
|
-
}
|
|
8
|
-
/**
|
|
9
|
-
* Transformers.js embedding provider (local, in-browser compatible)
|
|
10
|
-
*
|
|
11
|
-
* @example
|
|
12
|
-
* ```ts
|
|
13
|
-
* import { transformers } from 'retriv/embeddings/transformers'
|
|
14
|
-
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
15
|
-
*
|
|
16
|
-
* const db = await sqliteVec({
|
|
17
|
-
* path: 'vectors.db',
|
|
18
|
-
* embeddings: transformers({ model: 'Xenova/bge-base-en-v1.5' }),
|
|
19
|
-
* })
|
|
20
|
-
* ```
|
|
21
|
-
*/
|
|
22
|
-
declare function transformers(options?: TransformersEmbeddingOptions): EmbeddingConfig;
|
|
23
|
-
//#endregion
|
|
24
|
-
export { TransformersEmbeddingOptions, transformers };
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import { pipeline } from "@huggingface/transformers";
|
|
2
|
-
function transformers(options = {}) {
|
|
3
|
-
const { model = "Xenova/bge-base-en-v1.5" } = options;
|
|
4
|
-
let cached = null;
|
|
5
|
-
return { async resolve() {
|
|
6
|
-
if (cached) return cached;
|
|
7
|
-
const extractor = await pipeline("feature-extraction", model, { dtype: "fp32" });
|
|
8
|
-
const embedder = async (texts) => {
|
|
9
|
-
const results = [];
|
|
10
|
-
for (const text of texts) {
|
|
11
|
-
const output = await extractor(text, {
|
|
12
|
-
pooling: "mean",
|
|
13
|
-
normalize: true
|
|
14
|
-
});
|
|
15
|
-
results.push(Array.from(output.data));
|
|
16
|
-
}
|
|
17
|
-
return results;
|
|
18
|
-
};
|
|
19
|
-
cached = {
|
|
20
|
-
embedder,
|
|
21
|
-
dimensions: (await embedder(["test"]))[0].length
|
|
22
|
-
};
|
|
23
|
-
return cached;
|
|
24
|
-
} };
|
|
25
|
-
}
|
|
26
|
-
export { transformers };
|