retriv 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db/sqlite-fts.d.mts +1 -0
- package/dist/db/sqlite-fts.mjs +25 -16
- package/dist/embeddings/cohere.mjs +9 -5
- package/dist/embeddings/google.mjs +9 -5
- package/dist/embeddings/mistral.mjs +9 -5
- package/dist/embeddings/model-info.d.mts +26 -0
- package/dist/embeddings/model-info.mjs +65 -0
- package/dist/embeddings/ollama.mjs +9 -5
- package/dist/embeddings/openai.mjs +9 -5
- package/dist/embeddings/transformers-js.d.mts +27 -0
- package/dist/embeddings/transformers-js.mjs +42 -0
- package/package.json +8 -10
- package/dist/embeddings/transformers.d.mts +0 -24
- package/dist/embeddings/transformers.mjs +0 -26
package/dist/db/sqlite-fts.d.mts
CHANGED
package/dist/db/sqlite-fts.mjs
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { mkdirSync } from "node:fs";
|
|
2
2
|
import { dirname } from "node:path";
|
|
3
|
-
import Database from "better-sqlite3";
|
|
4
3
|
async function sqliteFts(config = {}) {
|
|
5
4
|
const dbPath = config.path || ":memory:";
|
|
5
|
+
const nodeSqlite = globalThis.process?.getBuiltinModule?.("node:sqlite");
|
|
6
|
+
if (!nodeSqlite) throw new Error("node:sqlite not available. Requires Node.js >= 22.5");
|
|
6
7
|
if (dbPath !== ":memory:") mkdirSync(dirname(dbPath), { recursive: true });
|
|
7
|
-
const db = new
|
|
8
|
+
const db = new nodeSqlite.DatabaseSync(dbPath);
|
|
8
9
|
db.exec(`
|
|
9
10
|
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
|
|
10
11
|
id,
|
|
@@ -15,14 +16,18 @@ async function sqliteFts(config = {}) {
|
|
|
15
16
|
`);
|
|
16
17
|
return {
|
|
17
18
|
async index(docs) {
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
19
|
+
db.prepare("BEGIN").run();
|
|
20
|
+
try {
|
|
21
|
+
for (const doc of docs) {
|
|
22
|
+
db.prepare("DELETE FROM documents_fts WHERE id = ?").run(doc.id);
|
|
23
|
+
db.prepare("INSERT INTO documents_fts (id, content, metadata) VALUES (?, ?, ?)").run(doc.id, doc.content, doc.metadata ? JSON.stringify(doc.metadata) : null);
|
|
24
|
+
}
|
|
25
|
+
db.prepare("COMMIT").run();
|
|
26
|
+
return { count: docs.length };
|
|
27
|
+
} catch (error) {
|
|
28
|
+
db.prepare("ROLLBACK").run();
|
|
29
|
+
throw error;
|
|
30
|
+
}
|
|
26
31
|
},
|
|
27
32
|
async search(query, options = {}) {
|
|
28
33
|
const { limit = 10, returnContent = false, returnMetadata = true } = options;
|
|
@@ -50,17 +55,21 @@ async function sqliteFts(config = {}) {
|
|
|
50
55
|
});
|
|
51
56
|
},
|
|
52
57
|
async remove(ids) {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
for (const id of
|
|
56
|
-
|
|
57
|
-
|
|
58
|
+
db.prepare("BEGIN").run();
|
|
59
|
+
try {
|
|
60
|
+
for (const id of ids) db.prepare("DELETE FROM documents_fts WHERE id = ?").run(id);
|
|
61
|
+
db.prepare("COMMIT").run();
|
|
62
|
+
return { count: ids.length };
|
|
63
|
+
} catch (error) {
|
|
64
|
+
db.prepare("ROLLBACK").run();
|
|
65
|
+
throw error;
|
|
66
|
+
}
|
|
58
67
|
},
|
|
59
68
|
async clear() {
|
|
60
69
|
db.exec("DELETE FROM documents_fts");
|
|
61
70
|
},
|
|
62
71
|
async close() {
|
|
63
|
-
db.close();
|
|
72
|
+
db.close?.();
|
|
64
73
|
}
|
|
65
74
|
};
|
|
66
75
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { embed, embedMany } from "ai";
|
|
2
3
|
import { createCohere } from "@ai-sdk/cohere";
|
|
3
4
|
function cohere(options = {}) {
|
|
@@ -9,11 +10,14 @@ function cohere(options = {}) {
|
|
|
9
10
|
apiKey,
|
|
10
11
|
baseURL: baseUrl
|
|
11
12
|
}).textEmbeddingModel(model);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
let dimensions = getModelDimensions(model);
|
|
14
|
+
if (!dimensions) {
|
|
15
|
+
const { embedding } = await embed({
|
|
16
|
+
model: embeddingModel,
|
|
17
|
+
value: "test"
|
|
18
|
+
});
|
|
19
|
+
dimensions = embedding.length;
|
|
20
|
+
}
|
|
17
21
|
const embedder = async (texts) => {
|
|
18
22
|
if (texts.length === 0) return [];
|
|
19
23
|
if (texts.length === 1) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { embed, embedMany } from "ai";
|
|
2
3
|
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
3
4
|
function google(options = {}) {
|
|
@@ -9,11 +10,14 @@ function google(options = {}) {
|
|
|
9
10
|
apiKey,
|
|
10
11
|
baseURL: baseUrl
|
|
11
12
|
}).textEmbeddingModel(model);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
let dimensions = getModelDimensions(model);
|
|
14
|
+
if (!dimensions) {
|
|
15
|
+
const { embedding } = await embed({
|
|
16
|
+
model: embeddingModel,
|
|
17
|
+
value: "test"
|
|
18
|
+
});
|
|
19
|
+
dimensions = embedding.length;
|
|
20
|
+
}
|
|
17
21
|
const embedder = async (texts) => {
|
|
18
22
|
if (texts.length === 0) return [];
|
|
19
23
|
if (texts.length === 1) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { embed, embedMany } from "ai";
|
|
2
3
|
import { createMistral } from "@ai-sdk/mistral";
|
|
3
4
|
function mistral(options = {}) {
|
|
@@ -9,11 +10,14 @@ function mistral(options = {}) {
|
|
|
9
10
|
apiKey,
|
|
10
11
|
baseURL: baseUrl
|
|
11
12
|
}).textEmbeddingModel(model);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
let dimensions = getModelDimensions(model);
|
|
14
|
+
if (!dimensions) {
|
|
15
|
+
const { embedding } = await embed({
|
|
16
|
+
model: embeddingModel,
|
|
17
|
+
value: "test"
|
|
18
|
+
});
|
|
19
|
+
dimensions = embedding.length;
|
|
20
|
+
}
|
|
17
21
|
const embedder = async (texts) => {
|
|
18
22
|
if (texts.length === 0) return [];
|
|
19
23
|
if (texts.length === 1) {
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
//#region src/embeddings/model-info.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* Embedding provider types
|
|
4
|
+
*/
|
|
5
|
+
type EmbeddingPreset = 'openai' | 'google' | 'mistral' | 'cohere' | 'ollama' | 'transformers.js';
|
|
6
|
+
/**
|
|
7
|
+
* Default models per provider
|
|
8
|
+
*/
|
|
9
|
+
declare const DEFAULT_MODELS: Record<EmbeddingPreset, {
|
|
10
|
+
model: string;
|
|
11
|
+
dimensions: number;
|
|
12
|
+
}>;
|
|
13
|
+
/**
|
|
14
|
+
* Known dimensions for common models
|
|
15
|
+
*/
|
|
16
|
+
declare const MODEL_DIMENSIONS: Record<string, number>;
|
|
17
|
+
/**
|
|
18
|
+
* Get dimensions for a model (returns undefined if unknown)
|
|
19
|
+
*/
|
|
20
|
+
declare function getModelDimensions(model: string): number | undefined;
|
|
21
|
+
/**
|
|
22
|
+
* Resolve model name for a specific preset
|
|
23
|
+
*/
|
|
24
|
+
declare function resolveModelForPreset(model: string, preset: string): string;
|
|
25
|
+
//#endregion
|
|
26
|
+
export { DEFAULT_MODELS, EmbeddingPreset, MODEL_DIMENSIONS, getModelDimensions, resolveModelForPreset };
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
const DEFAULT_MODELS = {
|
|
2
|
+
"openai": {
|
|
3
|
+
model: "text-embedding-3-small",
|
|
4
|
+
dimensions: 1536
|
|
5
|
+
},
|
|
6
|
+
"google": {
|
|
7
|
+
model: "text-embedding-004",
|
|
8
|
+
dimensions: 768
|
|
9
|
+
},
|
|
10
|
+
"mistral": {
|
|
11
|
+
model: "mistral-embed",
|
|
12
|
+
dimensions: 1024
|
|
13
|
+
},
|
|
14
|
+
"cohere": {
|
|
15
|
+
model: "embed-english-v3.0",
|
|
16
|
+
dimensions: 1024
|
|
17
|
+
},
|
|
18
|
+
"ollama": {
|
|
19
|
+
model: "nomic-embed-text",
|
|
20
|
+
dimensions: 768
|
|
21
|
+
},
|
|
22
|
+
"transformers.js": {
|
|
23
|
+
model: "Xenova/bge-base-en-v1.5",
|
|
24
|
+
dimensions: 768
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
const MODEL_DIMENSIONS = {
|
|
28
|
+
"text-embedding-3-small": 1536,
|
|
29
|
+
"text-embedding-3-large": 3072,
|
|
30
|
+
"text-embedding-ada-002": 1536,
|
|
31
|
+
"text-embedding-004": 768,
|
|
32
|
+
"embedding-001": 768,
|
|
33
|
+
"mistral-embed": 1024,
|
|
34
|
+
"embed-english-v3.0": 1024,
|
|
35
|
+
"embed-multilingual-v3.0": 1024,
|
|
36
|
+
"embed-english-light-v3.0": 384,
|
|
37
|
+
"embed-multilingual-light-v3.0": 384,
|
|
38
|
+
"nomic-embed-text": 768,
|
|
39
|
+
"mxbai-embed-large": 1024,
|
|
40
|
+
"all-minilm": 384,
|
|
41
|
+
"snowflake-arctic-embed": 1024,
|
|
42
|
+
"bge-small-en-v1.5": 384,
|
|
43
|
+
"bge-base-en-v1.5": 768,
|
|
44
|
+
"bge-large-en-v1.5": 1024,
|
|
45
|
+
"bge-m3": 1024,
|
|
46
|
+
"all-MiniLM-L6-v2": 384,
|
|
47
|
+
"embeddinggemma-300m": 256,
|
|
48
|
+
"plamo-embedding-1b": 1024
|
|
49
|
+
};
|
|
50
|
+
function getModelDimensions(model) {
|
|
51
|
+
if (MODEL_DIMENSIONS[model]) return MODEL_DIMENSIONS[model];
|
|
52
|
+
return MODEL_DIMENSIONS[model.replace(/^(Xenova\/|onnx-community\/)/, "")];
|
|
53
|
+
}
|
|
54
|
+
const MODEL_MAPPINGS = { "transformers.js": {
|
|
55
|
+
"bge-base-en-v1.5": "Xenova/bge-base-en-v1.5",
|
|
56
|
+
"bge-large-en-v1.5": "onnx-community/bge-large-en-v1.5",
|
|
57
|
+
"bge-small-en-v1.5": "Xenova/bge-small-en-v1.5",
|
|
58
|
+
"bge-m3": "Xenova/bge-m3",
|
|
59
|
+
"all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
|
|
60
|
+
"embeddinggemma-300m": "onnx-community/embeddinggemma-300m-ONNX"
|
|
61
|
+
} };
|
|
62
|
+
function resolveModelForPreset(model, preset) {
|
|
63
|
+
return MODEL_MAPPINGS[preset]?.[model] ?? model;
|
|
64
|
+
}
|
|
65
|
+
export { DEFAULT_MODELS, MODEL_DIMENSIONS, getModelDimensions, resolveModelForPreset };
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { embed, embedMany } from "ai";
|
|
2
3
|
import { createOllama } from "ollama-ai-provider-v2";
|
|
3
4
|
function ollama(options = {}) {
|
|
@@ -7,11 +8,14 @@ function ollama(options = {}) {
|
|
|
7
8
|
if (cached) return cached;
|
|
8
9
|
const ollamaBaseUrl = baseUrl || process.env.OLLAMA_BASE_URL || "http://localhost:11434";
|
|
9
10
|
const embeddingModel = createOllama({ baseURL: ollamaBaseUrl.endsWith("/api") ? ollamaBaseUrl : `${ollamaBaseUrl}/api` }).textEmbeddingModel(model);
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
let dimensions = getModelDimensions(model);
|
|
12
|
+
if (!dimensions) {
|
|
13
|
+
const { embedding } = await embed({
|
|
14
|
+
model: embeddingModel,
|
|
15
|
+
value: "test"
|
|
16
|
+
});
|
|
17
|
+
dimensions = embedding.length;
|
|
18
|
+
}
|
|
15
19
|
const embedder = async (texts) => {
|
|
16
20
|
if (texts.length === 0) return [];
|
|
17
21
|
if (texts.length === 1) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { getModelDimensions } from "./model-info.mjs";
|
|
1
2
|
import { createOpenAI } from "@ai-sdk/openai";
|
|
2
3
|
import { embed, embedMany } from "ai";
|
|
3
4
|
function openai(options = {}) {
|
|
@@ -9,11 +10,14 @@ function openai(options = {}) {
|
|
|
9
10
|
apiKey,
|
|
10
11
|
baseURL: baseUrl
|
|
11
12
|
}).textEmbeddingModel(model);
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
let dimensions = getModelDimensions(model);
|
|
14
|
+
if (!dimensions) {
|
|
15
|
+
const { embedding } = await embed({
|
|
16
|
+
model: embeddingModel,
|
|
17
|
+
value: "test"
|
|
18
|
+
});
|
|
19
|
+
dimensions = embedding.length;
|
|
20
|
+
}
|
|
17
21
|
const embedder = async (texts) => {
|
|
18
22
|
if (texts.length === 0) return [];
|
|
19
23
|
if (texts.length === 1) {
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/transformers-js.d.ts
|
|
4
|
+
interface TransformersEmbeddingOptions {
|
|
5
|
+
/** Model name (e.g., 'bge-base-en-v1.5' or 'Xenova/bge-base-en-v1.5') */
|
|
6
|
+
model?: string;
|
|
7
|
+
/** Embedding dimensions (auto-detected for known models) */
|
|
8
|
+
dimensions?: number;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Transformers.js embedding provider (local, in-browser compatible)
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```ts
|
|
15
|
+
* import { transformersJs } from 'retriv/embeddings/transformers-js'
|
|
16
|
+
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
17
|
+
*
|
|
18
|
+
* // Auto-resolves model name and dimensions for known models
|
|
19
|
+
* const db = await sqliteVec({
|
|
20
|
+
* path: 'vectors.db',
|
|
21
|
+
* embeddings: transformersJs({ model: 'bge-base-en-v1.5' }),
|
|
22
|
+
* })
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
declare function transformersJs(options?: TransformersEmbeddingOptions): EmbeddingConfig;
|
|
26
|
+
//#endregion
|
|
27
|
+
export { TransformersEmbeddingOptions, transformersJs };
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { getModelDimensions, resolveModelForPreset } from "./model-info.mjs";
|
|
2
|
+
import { rm } from "node:fs/promises";
|
|
3
|
+
import { env, pipeline } from "@huggingface/transformers";
|
|
4
|
+
async function clearCorruptedCache(error, model) {
|
|
5
|
+
if (!(error instanceof Error && (error.message?.includes("Protobuf parsing failed") || String(error.cause)?.includes("Protobuf parsing failed"))) || !env.cacheDir) return false;
|
|
6
|
+
await rm(`${env.cacheDir}/${model}`, {
|
|
7
|
+
recursive: true,
|
|
8
|
+
force: true
|
|
9
|
+
}).catch(() => {});
|
|
10
|
+
console.warn(`[retriv] Cleared corrupted model cache for ${model}, retrying...`);
|
|
11
|
+
return true;
|
|
12
|
+
}
|
|
13
|
+
function transformersJs(options = {}) {
|
|
14
|
+
const model = resolveModelForPreset(options.model ?? "bge-base-en-v1.5", "transformers.js");
|
|
15
|
+
let cached = null;
|
|
16
|
+
return { async resolve() {
|
|
17
|
+
if (cached) return cached;
|
|
18
|
+
const extractor = await pipeline("feature-extraction", model, { dtype: "fp32" }).catch(async (err) => {
|
|
19
|
+
if (await clearCorruptedCache(err, model)) return pipeline("feature-extraction", model, { dtype: "fp32" });
|
|
20
|
+
throw err;
|
|
21
|
+
});
|
|
22
|
+
const dimensions = options.dimensions ?? getModelDimensions(model);
|
|
23
|
+
if (!dimensions) throw new Error(`Unknown dimensions for model ${model}. Please specify dimensions option.`);
|
|
24
|
+
const embedder = async (texts) => {
|
|
25
|
+
const results = [];
|
|
26
|
+
for (const text of texts) {
|
|
27
|
+
const output = await extractor(text, {
|
|
28
|
+
pooling: "mean",
|
|
29
|
+
normalize: true
|
|
30
|
+
});
|
|
31
|
+
results.push(Array.from(output.data));
|
|
32
|
+
}
|
|
33
|
+
return results;
|
|
34
|
+
};
|
|
35
|
+
cached = {
|
|
36
|
+
embedder,
|
|
37
|
+
dimensions
|
|
38
|
+
};
|
|
39
|
+
return cached;
|
|
40
|
+
} };
|
|
41
|
+
}
|
|
42
|
+
export { transformersJs };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "retriv",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.0
|
|
4
|
+
"version": "0.1.0",
|
|
5
5
|
"description": "Unified document retrieval for search - semantic, full-text, and fuzzy.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -58,9 +58,9 @@
|
|
|
58
58
|
"types": "./dist/embeddings/ollama.d.mts",
|
|
59
59
|
"import": "./dist/embeddings/ollama.mjs"
|
|
60
60
|
},
|
|
61
|
-
"./embeddings/transformers": {
|
|
62
|
-
"types": "./dist/embeddings/transformers.d.mts",
|
|
63
|
-
"import": "./dist/embeddings/transformers.mjs"
|
|
61
|
+
"./embeddings/transformers-js": {
|
|
62
|
+
"types": "./dist/embeddings/transformers-js.d.mts",
|
|
63
|
+
"import": "./dist/embeddings/transformers-js.mjs"
|
|
64
64
|
},
|
|
65
65
|
"./embeddings/mistral": {
|
|
66
66
|
"types": "./dist/embeddings/mistral.d.mts",
|
|
@@ -69,6 +69,10 @@
|
|
|
69
69
|
"./embeddings/cohere": {
|
|
70
70
|
"types": "./dist/embeddings/cohere.d.mts",
|
|
71
71
|
"import": "./dist/embeddings/cohere.mjs"
|
|
72
|
+
},
|
|
73
|
+
"./embeddings/model-info": {
|
|
74
|
+
"types": "./dist/embeddings/model-info.d.mts",
|
|
75
|
+
"import": "./dist/embeddings/model-info.mjs"
|
|
72
76
|
}
|
|
73
77
|
},
|
|
74
78
|
"main": "./dist/index.mjs",
|
|
@@ -85,7 +89,6 @@
|
|
|
85
89
|
"@libsql/client": "^0.14.0 || ^0.15.0 || ^0.16.0 || ^0.17.0",
|
|
86
90
|
"@upstash/vector": "^1.0.0",
|
|
87
91
|
"ai": "^4.0.0 || ^5.0.0 || ^6.0.0",
|
|
88
|
-
"better-sqlite3": "^11.0.0 || ^12.0.0",
|
|
89
92
|
"ollama-ai-provider-v2": "^1.0.0",
|
|
90
93
|
"pg": "^8.0.0",
|
|
91
94
|
"sqlite-vec": "^0.1.0-alpha.0"
|
|
@@ -115,9 +118,6 @@
|
|
|
115
118
|
"ai": {
|
|
116
119
|
"optional": true
|
|
117
120
|
},
|
|
118
|
-
"better-sqlite3": {
|
|
119
|
-
"optional": true
|
|
120
|
-
},
|
|
121
121
|
"ollama-ai-provider-v2": {
|
|
122
122
|
"optional": true
|
|
123
123
|
},
|
|
@@ -138,13 +138,11 @@
|
|
|
138
138
|
"@cloudflare/workers-types": "^4.20260131.0",
|
|
139
139
|
"@huggingface/transformers": "^3.8.1",
|
|
140
140
|
"@libsql/client": "^0.17.0",
|
|
141
|
-
"@types/better-sqlite3": "^7.6.13",
|
|
142
141
|
"@types/node": "^25.2.0",
|
|
143
142
|
"@types/pg": "^8.11.0",
|
|
144
143
|
"@upstash/vector": "^1.1.0",
|
|
145
144
|
"@vitest/coverage-v8": "^4.0.18",
|
|
146
145
|
"ai": "^6.0.67",
|
|
147
|
-
"better-sqlite3": "^12.5.0",
|
|
148
146
|
"bumpp": "^10.3.2",
|
|
149
147
|
"eslint": "^9.39.2",
|
|
150
148
|
"obuild": "^0.4.14",
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
-
|
|
3
|
-
//#region src/embeddings/transformers.d.ts
|
|
4
|
-
interface TransformersEmbeddingOptions {
|
|
5
|
-
/** Model name (default: Xenova/bge-base-en-v1.5) */
|
|
6
|
-
model?: string;
|
|
7
|
-
}
|
|
8
|
-
/**
|
|
9
|
-
* Transformers.js embedding provider (local, in-browser compatible)
|
|
10
|
-
*
|
|
11
|
-
* @example
|
|
12
|
-
* ```ts
|
|
13
|
-
* import { transformers } from 'retriv/embeddings/transformers'
|
|
14
|
-
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
15
|
-
*
|
|
16
|
-
* const db = await sqliteVec({
|
|
17
|
-
* path: 'vectors.db',
|
|
18
|
-
* embeddings: transformers({ model: 'Xenova/bge-base-en-v1.5' }),
|
|
19
|
-
* })
|
|
20
|
-
* ```
|
|
21
|
-
*/
|
|
22
|
-
declare function transformers(options?: TransformersEmbeddingOptions): EmbeddingConfig;
|
|
23
|
-
//#endregion
|
|
24
|
-
export { TransformersEmbeddingOptions, transformers };
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import { pipeline } from "@huggingface/transformers";
|
|
2
|
-
function transformers(options = {}) {
|
|
3
|
-
const { model = "Xenova/bge-base-en-v1.5" } = options;
|
|
4
|
-
let cached = null;
|
|
5
|
-
return { async resolve() {
|
|
6
|
-
if (cached) return cached;
|
|
7
|
-
const extractor = await pipeline("feature-extraction", model, { dtype: "fp32" });
|
|
8
|
-
const embedder = async (texts) => {
|
|
9
|
-
const results = [];
|
|
10
|
-
for (const text of texts) {
|
|
11
|
-
const output = await extractor(text, {
|
|
12
|
-
pooling: "mean",
|
|
13
|
-
normalize: true
|
|
14
|
-
});
|
|
15
|
-
results.push(Array.from(output.data));
|
|
16
|
-
}
|
|
17
|
-
return results;
|
|
18
|
-
};
|
|
19
|
-
cached = {
|
|
20
|
-
embedder,
|
|
21
|
-
dimensions: (await embedder(["test"]))[0].length
|
|
22
|
-
};
|
|
23
|
-
return cached;
|
|
24
|
-
} };
|
|
25
|
-
}
|
|
26
|
-
export { transformers };
|