retriv 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +282 -0
- package/dist/_chunks/split-text.mjs +85 -0
- package/dist/db/cloudflare.d.mts +36 -0
- package/dist/db/cloudflare.mjs +55 -0
- package/dist/db/libsql.d.mts +30 -0
- package/dist/db/libsql.mjs +87 -0
- package/dist/db/pgvector.d.mts +30 -0
- package/dist/db/pgvector.mjs +80 -0
- package/dist/db/sqlite-fts.d.mts +23 -0
- package/dist/db/sqlite-fts.mjs +68 -0
- package/dist/db/sqlite-vec.d.mts +27 -0
- package/dist/db/sqlite-vec.mjs +108 -0
- package/dist/db/upstash.d.mts +28 -0
- package/dist/db/upstash.mjs +56 -0
- package/dist/embeddings/cohere.d.mts +28 -0
- package/dist/embeddings/cohere.mjs +39 -0
- package/dist/embeddings/google.d.mts +28 -0
- package/dist/embeddings/google.mjs +39 -0
- package/dist/embeddings/mistral.d.mts +28 -0
- package/dist/embeddings/mistral.mjs +39 -0
- package/dist/embeddings/ollama.d.mts +26 -0
- package/dist/embeddings/ollama.mjs +37 -0
- package/dist/embeddings/openai.d.mts +28 -0
- package/dist/embeddings/openai.mjs +39 -0
- package/dist/embeddings/resolve.d.mts +10 -0
- package/dist/embeddings/resolve.mjs +4 -0
- package/dist/embeddings/transformers.d.mts +24 -0
- package/dist/embeddings/transformers.mjs +26 -0
- package/dist/index.d.mts +3 -0
- package/dist/index.mjs +2 -0
- package/dist/retriv.d.mts +9 -0
- package/dist/retriv.mjs +112 -0
- package/dist/types.d.mts +203 -0
- package/dist/types.mjs +1 -0
- package/dist/utils/split-text.d.mts +23 -0
- package/dist/utils/split-text.mjs +2 -0
- package/package.json +167 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { resolveEmbedding } from "../embeddings/resolve.mjs";
|
|
2
|
+
import pg from "pg";
|
|
3
|
+
async function pgvector(config) {
|
|
4
|
+
const { url, table = "vectors", metric = "cosine" } = config;
|
|
5
|
+
if (!url) throw new Error("[pgvector] url is required");
|
|
6
|
+
if (!config.embeddings) throw new Error("[pgvector] embeddings is required");
|
|
7
|
+
const { embedder, dimensions } = await resolveEmbedding(config.embeddings);
|
|
8
|
+
const pool = new pg.Pool({ connectionString: url });
|
|
9
|
+
await pool.query("CREATE EXTENSION IF NOT EXISTS vector");
|
|
10
|
+
await pool.query(`
|
|
11
|
+
CREATE TABLE IF NOT EXISTS ${table} (
|
|
12
|
+
id TEXT PRIMARY KEY,
|
|
13
|
+
content TEXT,
|
|
14
|
+
metadata JSONB,
|
|
15
|
+
embedding vector(${dimensions})
|
|
16
|
+
)
|
|
17
|
+
`);
|
|
18
|
+
const indexName = `${table}_embedding_idx`;
|
|
19
|
+
const opClass = metric === "cosine" ? "vector_cosine_ops" : metric === "euclidean" ? "vector_l2_ops" : "vector_ip_ops";
|
|
20
|
+
await pool.query(`
|
|
21
|
+
CREATE INDEX IF NOT EXISTS ${indexName}
|
|
22
|
+
ON ${table} USING ivfflat (embedding ${opClass})
|
|
23
|
+
WITH (lists = 100)
|
|
24
|
+
`).catch(() => {});
|
|
25
|
+
const distanceOp = metric === "cosine" ? "<=>" : metric === "euclidean" ? "<->" : "<#>";
|
|
26
|
+
return {
|
|
27
|
+
async index(docs) {
|
|
28
|
+
if (docs.length === 0) return { count: 0 };
|
|
29
|
+
const embeddings = await embedder(docs.map((d) => d.content));
|
|
30
|
+
if (embeddings.length !== docs.length) throw new Error(`Embedding count mismatch: expected ${docs.length}, got ${embeddings.length}`);
|
|
31
|
+
for (let i = 0; i < docs.length; i++) {
|
|
32
|
+
const doc = docs[i];
|
|
33
|
+
const vectorStr = `[${embeddings[i].join(",")}]`;
|
|
34
|
+
await pool.query(`INSERT INTO ${table} (id, content, metadata, embedding)
|
|
35
|
+
VALUES ($1, $2, $3, $4)
|
|
36
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
37
|
+
content = EXCLUDED.content,
|
|
38
|
+
metadata = EXCLUDED.metadata,
|
|
39
|
+
embedding = EXCLUDED.embedding`, [
|
|
40
|
+
doc.id,
|
|
41
|
+
doc.content,
|
|
42
|
+
doc.metadata || null,
|
|
43
|
+
vectorStr
|
|
44
|
+
]);
|
|
45
|
+
}
|
|
46
|
+
return { count: docs.length };
|
|
47
|
+
},
|
|
48
|
+
async search(query, options = {}) {
|
|
49
|
+
const { limit = 10, returnContent = false, returnMetadata = true } = options;
|
|
50
|
+
const [embedding] = await embedder([query]);
|
|
51
|
+
if (!embedding) throw new Error("Failed to generate query embedding");
|
|
52
|
+
const vectorStr = `[${embedding.join(",")}]`;
|
|
53
|
+
return (await pool.query(`SELECT id, content, metadata, embedding ${distanceOp} $1::vector as distance
|
|
54
|
+
FROM ${table}
|
|
55
|
+
ORDER BY embedding ${distanceOp} $1::vector
|
|
56
|
+
LIMIT $2`, [vectorStr, limit])).rows.map((row) => {
|
|
57
|
+
const score = metric === "inner_product" ? Math.max(0, Math.min(1, (row.distance + 1) / 2)) : Math.max(0, 1 - row.distance);
|
|
58
|
+
const searchResult = {
|
|
59
|
+
id: row.id,
|
|
60
|
+
score
|
|
61
|
+
};
|
|
62
|
+
if (returnContent && row.content) searchResult.content = row.content;
|
|
63
|
+
if (returnMetadata && row.metadata) searchResult.metadata = row.metadata;
|
|
64
|
+
return searchResult;
|
|
65
|
+
});
|
|
66
|
+
},
|
|
67
|
+
async remove(ids) {
|
|
68
|
+
await pool.query(`DELETE FROM ${table} WHERE id = ANY($1)`, [ids]);
|
|
69
|
+
return { count: ids.length };
|
|
70
|
+
},
|
|
71
|
+
async clear() {
|
|
72
|
+
await pool.query(`DELETE FROM ${table}`);
|
|
73
|
+
},
|
|
74
|
+
async close() {
|
|
75
|
+
await pool.end();
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
var pgvector_default = pgvector;
|
|
80
|
+
export { pgvector_default as default, pgvector };
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { BaseDriverConfig, SearchProvider } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/db/sqlite-fts.d.ts
|
|
4
|
+
interface SqliteFtsConfig extends BaseDriverConfig {
|
|
5
|
+
/** Path to SQLite database file. Use ':memory:' for in-memory. */
|
|
6
|
+
path?: string;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Create a SQLite FTS5 full-text search provider
|
|
10
|
+
* Uses the built-in FTS5 extension for fast BM25-based search
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```ts
|
|
14
|
+
* import { sqliteFts } from 'retriv/db/sqlite-fts'
|
|
15
|
+
*
|
|
16
|
+
* const db = await sqliteFts({
|
|
17
|
+
* path: 'search.db',
|
|
18
|
+
* })
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
declare function sqliteFts(config?: SqliteFtsConfig): Promise<SearchProvider>;
|
|
22
|
+
//#endregion
|
|
23
|
+
export { SqliteFtsConfig, sqliteFts as default, sqliteFts };
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { mkdirSync } from "node:fs";
|
|
2
|
+
import { dirname } from "node:path";
|
|
3
|
+
import Database from "better-sqlite3";
|
|
4
|
+
async function sqliteFts(config = {}) {
|
|
5
|
+
const dbPath = config.path || ":memory:";
|
|
6
|
+
if (dbPath !== ":memory:") mkdirSync(dirname(dbPath), { recursive: true });
|
|
7
|
+
const db = new Database(dbPath);
|
|
8
|
+
db.exec(`
|
|
9
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
|
|
10
|
+
id,
|
|
11
|
+
content,
|
|
12
|
+
metadata,
|
|
13
|
+
tokenize='porter unicode61'
|
|
14
|
+
)
|
|
15
|
+
`);
|
|
16
|
+
return {
|
|
17
|
+
async index(docs) {
|
|
18
|
+
const insert = db.prepare(`
|
|
19
|
+
INSERT OR REPLACE INTO documents_fts (id, content, metadata)
|
|
20
|
+
VALUES (?, ?, ?)
|
|
21
|
+
`);
|
|
22
|
+
db.transaction((documents) => {
|
|
23
|
+
for (const doc of documents) insert.run(doc.id, doc.content, doc.metadata ? JSON.stringify(doc.metadata) : null);
|
|
24
|
+
})(docs);
|
|
25
|
+
return { count: docs.length };
|
|
26
|
+
},
|
|
27
|
+
async search(query, options = {}) {
|
|
28
|
+
const { limit = 10, returnContent = false, returnMetadata = true } = options;
|
|
29
|
+
const sanitized = query.replace(/[?"():^*-]/g, " ").replace(/\s+/g, " ").trim();
|
|
30
|
+
if (!sanitized) return [];
|
|
31
|
+
return db.prepare(`
|
|
32
|
+
SELECT
|
|
33
|
+
id,
|
|
34
|
+
${returnContent ? "content," : ""}
|
|
35
|
+
${returnMetadata ? "metadata," : ""}
|
|
36
|
+
bm25(documents_fts) as score
|
|
37
|
+
FROM documents_fts
|
|
38
|
+
WHERE documents_fts MATCH ?
|
|
39
|
+
ORDER BY bm25(documents_fts)
|
|
40
|
+
LIMIT ?
|
|
41
|
+
`).all(sanitized, limit).map((row) => {
|
|
42
|
+
const normalizedScore = Math.max(0, Math.min(1, 1 / (1 + Math.abs(row.score))));
|
|
43
|
+
const result = {
|
|
44
|
+
id: row.id,
|
|
45
|
+
score: normalizedScore
|
|
46
|
+
};
|
|
47
|
+
if (returnContent && row.content) result.content = row.content;
|
|
48
|
+
if (returnMetadata && row.metadata) result.metadata = JSON.parse(row.metadata);
|
|
49
|
+
return result;
|
|
50
|
+
});
|
|
51
|
+
},
|
|
52
|
+
async remove(ids) {
|
|
53
|
+
const del = db.prepare("DELETE FROM documents_fts WHERE id = ?");
|
|
54
|
+
db.transaction((docIds) => {
|
|
55
|
+
for (const id of docIds) del.run(id);
|
|
56
|
+
})(ids);
|
|
57
|
+
return { count: ids.length };
|
|
58
|
+
},
|
|
59
|
+
async clear() {
|
|
60
|
+
db.exec("DELETE FROM documents_fts");
|
|
61
|
+
},
|
|
62
|
+
async close() {
|
|
63
|
+
db.close();
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
var sqlite_fts_default = sqliteFts;
|
|
68
|
+
export { sqlite_fts_default as default, sqliteFts };
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { BaseDriverConfig, EmbeddingConfig, SearchProvider } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/db/sqlite-vec.d.ts
|
|
4
|
+
interface SqliteVecConfig extends BaseDriverConfig {
|
|
5
|
+
/** Path to SQLite database file. Use ':memory:' for in-memory. */
|
|
6
|
+
path?: string;
|
|
7
|
+
/** Embedding provider from retriv/embeddings/ */
|
|
8
|
+
embeddings: EmbeddingConfig;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Create a sqlite-vec vector search provider
|
|
12
|
+
* Requires Node.js >= 22.5
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```ts
|
|
16
|
+
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
17
|
+
* import { openai } from 'retriv/embeddings/openai'
|
|
18
|
+
*
|
|
19
|
+
* const db = await sqliteVec({
|
|
20
|
+
* path: 'vectors.db',
|
|
21
|
+
* embeddings: openai({ model: 'text-embedding-3-small' }),
|
|
22
|
+
* })
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
declare function sqliteVec(config: SqliteVecConfig): Promise<SearchProvider>;
|
|
26
|
+
//#endregion
|
|
27
|
+
export { SqliteVecConfig, sqliteVec as default, sqliteVec };
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import { resolveEmbedding } from "../embeddings/resolve.mjs";
|
|
2
|
+
import { mkdirSync } from "node:fs";
|
|
3
|
+
import { dirname } from "node:path";
|
|
4
|
+
import * as sqliteVecExt from "sqlite-vec";
|
|
5
|
+
async function sqliteVec(config) {
|
|
6
|
+
const dbPath = config.path || ":memory:";
|
|
7
|
+
if (!config.embeddings) throw new Error("[sqlite-vec] embeddings is required");
|
|
8
|
+
const { embedder, dimensions } = await resolveEmbedding(config.embeddings);
|
|
9
|
+
const nodeSqlite = globalThis.process?.getBuiltinModule?.("node:sqlite");
|
|
10
|
+
if (!nodeSqlite) throw new Error("node:sqlite not available. Requires Node.js >= 22.5");
|
|
11
|
+
if (dbPath !== ":memory:") mkdirSync(dirname(dbPath), { recursive: true });
|
|
12
|
+
const db = new nodeSqlite.DatabaseSync(dbPath, {
|
|
13
|
+
allowExtension: true,
|
|
14
|
+
open: true,
|
|
15
|
+
readOnly: false
|
|
16
|
+
});
|
|
17
|
+
sqliteVecExt.load(db);
|
|
18
|
+
db.exec("PRAGMA foreign_keys = ON");
|
|
19
|
+
db.exec(`
|
|
20
|
+
CREATE TABLE IF NOT EXISTS vector_metadata (
|
|
21
|
+
id TEXT PRIMARY KEY,
|
|
22
|
+
content TEXT,
|
|
23
|
+
metadata TEXT
|
|
24
|
+
)
|
|
25
|
+
`);
|
|
26
|
+
db.exec(`
|
|
27
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vectors
|
|
28
|
+
USING vec0(embedding float[${dimensions}])
|
|
29
|
+
`);
|
|
30
|
+
return {
|
|
31
|
+
async index(docs) {
|
|
32
|
+
if (docs.length === 0) return { count: 0 };
|
|
33
|
+
const embeddings = await embedder(docs.map((d) => d.content));
|
|
34
|
+
if (embeddings.length !== docs.length) throw new Error(`Embedding count mismatch: expected ${docs.length}, got ${embeddings.length}`);
|
|
35
|
+
db.prepare("BEGIN").run();
|
|
36
|
+
try {
|
|
37
|
+
for (let i = 0; i < docs.length; i++) {
|
|
38
|
+
const doc = docs[i];
|
|
39
|
+
const vector = embeddings[i];
|
|
40
|
+
if (vector.length !== dimensions) throw new Error(`Vector dimension mismatch: expected ${dimensions}, got ${vector.length}`);
|
|
41
|
+
const embedding = new Float32Array(vector);
|
|
42
|
+
const existing = db.prepare("SELECT rowid FROM vector_metadata WHERE id = ?").get(doc.id);
|
|
43
|
+
if (existing) {
|
|
44
|
+
db.prepare("UPDATE vectors SET embedding = ? WHERE rowid = ?").run(embedding, existing.rowid);
|
|
45
|
+
db.prepare("UPDATE vector_metadata SET content = ?, metadata = ? WHERE rowid = ?").run(doc.content, doc.metadata ? JSON.stringify(doc.metadata) : null, existing.rowid);
|
|
46
|
+
} else {
|
|
47
|
+
const rowid = db.prepare("INSERT INTO vectors (embedding) VALUES (?)").run(embedding).lastInsertRowid;
|
|
48
|
+
db.prepare("INSERT INTO vector_metadata (rowid, id, content, metadata) VALUES (?, ?, ?, ?)").run(rowid, doc.id, doc.content, doc.metadata ? JSON.stringify(doc.metadata) : null);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
db.prepare("COMMIT").run();
|
|
52
|
+
return { count: docs.length };
|
|
53
|
+
} catch (error) {
|
|
54
|
+
db.prepare("ROLLBACK").run();
|
|
55
|
+
throw error;
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
async search(query, options = {}) {
|
|
59
|
+
const { limit = 10, returnContent = false, returnMetadata = true } = options;
|
|
60
|
+
const [embedding] = await embedder([query]);
|
|
61
|
+
if (!embedding) throw new Error("Failed to generate query embedding");
|
|
62
|
+
const queryEmbedding = new Float32Array(embedding);
|
|
63
|
+
return db.prepare(`
|
|
64
|
+
SELECT rowid, distance
|
|
65
|
+
FROM vectors
|
|
66
|
+
WHERE embedding MATCH ?
|
|
67
|
+
ORDER BY distance
|
|
68
|
+
LIMIT ?
|
|
69
|
+
`).all(queryEmbedding, limit).map((row) => {
|
|
70
|
+
const meta = db.prepare("SELECT id, content, metadata FROM vector_metadata WHERE rowid = ?").get(row.rowid);
|
|
71
|
+
if (!meta) return null;
|
|
72
|
+
const result = {
|
|
73
|
+
id: meta.id,
|
|
74
|
+
score: 1 / (1 + row.distance)
|
|
75
|
+
};
|
|
76
|
+
if (returnContent && meta.content) result.content = meta.content;
|
|
77
|
+
if (returnMetadata && meta.metadata) result.metadata = JSON.parse(meta.metadata);
|
|
78
|
+
return result;
|
|
79
|
+
}).filter(Boolean);
|
|
80
|
+
},
|
|
81
|
+
async remove(ids) {
|
|
82
|
+
db.prepare("BEGIN").run();
|
|
83
|
+
try {
|
|
84
|
+
for (const id of ids) {
|
|
85
|
+
const meta = db.prepare("SELECT rowid FROM vector_metadata WHERE id = ?").get(id);
|
|
86
|
+
if (meta) {
|
|
87
|
+
db.prepare("DELETE FROM vectors WHERE rowid = ?").run(meta.rowid);
|
|
88
|
+
db.prepare("DELETE FROM vector_metadata WHERE rowid = ?").run(meta.rowid);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
db.prepare("COMMIT").run();
|
|
92
|
+
return { count: ids.length };
|
|
93
|
+
} catch (error) {
|
|
94
|
+
db.prepare("ROLLBACK").run();
|
|
95
|
+
throw error;
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
async clear() {
|
|
99
|
+
db.exec("DELETE FROM vectors");
|
|
100
|
+
db.exec("DELETE FROM vector_metadata");
|
|
101
|
+
},
|
|
102
|
+
async close() {
|
|
103
|
+
db.close?.();
|
|
104
|
+
}
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
var sqlite_vec_default = sqliteVec;
|
|
108
|
+
export { sqlite_vec_default as default, sqliteVec };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { BaseDriverConfig, SearchProvider } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/db/upstash.d.ts
|
|
4
|
+
interface UpstashConfig extends BaseDriverConfig {
|
|
5
|
+
/** Upstash Vector REST URL */
|
|
6
|
+
url: string;
|
|
7
|
+
/** Upstash Vector REST token */
|
|
8
|
+
token: string;
|
|
9
|
+
/** Optional namespace for vectors */
|
|
10
|
+
namespace?: string;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Create an Upstash Vector search provider
|
|
14
|
+
* Text-native: Uses Upstash's server-side embeddings (no local embedding needed)
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```ts
|
|
18
|
+
* import { upstash } from 'retriv/db/upstash'
|
|
19
|
+
*
|
|
20
|
+
* const db = await upstash({
|
|
21
|
+
* url: process.env.UPSTASH_VECTOR_URL,
|
|
22
|
+
* token: process.env.UPSTASH_VECTOR_TOKEN,
|
|
23
|
+
* })
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
declare function upstash(config: UpstashConfig): Promise<SearchProvider>;
|
|
27
|
+
//#endregion
|
|
28
|
+
export { UpstashConfig, upstash as default, upstash };
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { Index } from "@upstash/vector";
|
|
2
|
+
async function upstash(config) {
|
|
3
|
+
const { url, token, namespace } = config;
|
|
4
|
+
if (!url) throw new Error("[upstash] url is required");
|
|
5
|
+
if (!token) throw new Error("[upstash] token is required");
|
|
6
|
+
const index = new Index({
|
|
7
|
+
url,
|
|
8
|
+
token
|
|
9
|
+
});
|
|
10
|
+
const ns = namespace || "chunks";
|
|
11
|
+
return {
|
|
12
|
+
async index(docs) {
|
|
13
|
+
if (docs.length === 0) return { count: 0 };
|
|
14
|
+
const upstashVectors = docs.map((doc) => ({
|
|
15
|
+
id: doc.id,
|
|
16
|
+
data: doc.content,
|
|
17
|
+
metadata: {
|
|
18
|
+
...doc.metadata,
|
|
19
|
+
_content: doc.content
|
|
20
|
+
}
|
|
21
|
+
}));
|
|
22
|
+
await index.upsert(upstashVectors, { namespace: ns });
|
|
23
|
+
return { count: docs.length };
|
|
24
|
+
},
|
|
25
|
+
async search(query, options = {}) {
|
|
26
|
+
const { limit = 10, returnContent = false, returnMetadata = true } = options;
|
|
27
|
+
return (await index.query({
|
|
28
|
+
data: query,
|
|
29
|
+
topK: limit,
|
|
30
|
+
includeMetadata: true,
|
|
31
|
+
includeData: true
|
|
32
|
+
}, { namespace: ns }) || []).map((m) => {
|
|
33
|
+
const result = {
|
|
34
|
+
id: m.id,
|
|
35
|
+
score: Math.max(0, Math.min(1, m.score))
|
|
36
|
+
};
|
|
37
|
+
if (returnContent && m.metadata?._content) result.content = m.metadata._content;
|
|
38
|
+
if (returnMetadata && m.metadata) {
|
|
39
|
+
const { _content, ...rest } = m.metadata;
|
|
40
|
+
if (Object.keys(rest).length > 0) result.metadata = rest;
|
|
41
|
+
}
|
|
42
|
+
return result;
|
|
43
|
+
});
|
|
44
|
+
},
|
|
45
|
+
async remove(ids) {
|
|
46
|
+
await index.delete(ids, { namespace: ns });
|
|
47
|
+
return { count: ids.length };
|
|
48
|
+
},
|
|
49
|
+
async clear() {
|
|
50
|
+
await index.reset({ namespace: ns });
|
|
51
|
+
},
|
|
52
|
+
async close() {}
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
var upstash_default = upstash;
|
|
56
|
+
export { upstash_default as default, upstash };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/cohere.d.ts
|
|
4
|
+
interface CohereEmbeddingOptions {
|
|
5
|
+
/** Model name (default: embed-english-v3.0) */
|
|
6
|
+
model?: string;
|
|
7
|
+
/** API key (falls back to COHERE_API_KEY env) */
|
|
8
|
+
apiKey?: string;
|
|
9
|
+
/** Base URL override */
|
|
10
|
+
baseUrl?: string;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Cohere embedding provider
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* import { cohere } from 'retriv/embeddings/cohere'
|
|
18
|
+
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
19
|
+
*
|
|
20
|
+
* const db = await sqliteVec({
|
|
21
|
+
* path: 'vectors.db',
|
|
22
|
+
* embeddings: cohere({ model: 'embed-english-v3.0' }),
|
|
23
|
+
* })
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
declare function cohere(options?: CohereEmbeddingOptions): EmbeddingConfig;
|
|
27
|
+
//#endregion
|
|
28
|
+
export { CohereEmbeddingOptions, cohere };
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { embed, embedMany } from "ai";
|
|
2
|
+
import { createCohere } from "@ai-sdk/cohere";
|
|
3
|
+
function cohere(options = {}) {
|
|
4
|
+
const { model = "embed-english-v3.0", apiKey, baseUrl } = options;
|
|
5
|
+
let cached = null;
|
|
6
|
+
return { async resolve() {
|
|
7
|
+
if (cached) return cached;
|
|
8
|
+
const embeddingModel = createCohere({
|
|
9
|
+
apiKey,
|
|
10
|
+
baseURL: baseUrl
|
|
11
|
+
}).textEmbeddingModel(model);
|
|
12
|
+
const { embedding: testEmbedding } = await embed({
|
|
13
|
+
model: embeddingModel,
|
|
14
|
+
value: "test"
|
|
15
|
+
});
|
|
16
|
+
const dimensions = testEmbedding.length;
|
|
17
|
+
const embedder = async (texts) => {
|
|
18
|
+
if (texts.length === 0) return [];
|
|
19
|
+
if (texts.length === 1) {
|
|
20
|
+
const { embedding } = await embed({
|
|
21
|
+
model: embeddingModel,
|
|
22
|
+
value: texts[0]
|
|
23
|
+
});
|
|
24
|
+
return [embedding];
|
|
25
|
+
}
|
|
26
|
+
const { embeddings } = await embedMany({
|
|
27
|
+
model: embeddingModel,
|
|
28
|
+
values: texts
|
|
29
|
+
});
|
|
30
|
+
return embeddings;
|
|
31
|
+
};
|
|
32
|
+
cached = {
|
|
33
|
+
embedder,
|
|
34
|
+
dimensions
|
|
35
|
+
};
|
|
36
|
+
return cached;
|
|
37
|
+
} };
|
|
38
|
+
}
|
|
39
|
+
export { cohere };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/google.d.ts
|
|
4
|
+
interface GoogleEmbeddingOptions {
|
|
5
|
+
/** Model name (default: text-embedding-004) */
|
|
6
|
+
model?: string;
|
|
7
|
+
/** API key (falls back to GOOGLE_API_KEY env) */
|
|
8
|
+
apiKey?: string;
|
|
9
|
+
/** Base URL override */
|
|
10
|
+
baseUrl?: string;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Google AI embedding provider
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* import { google } from 'retriv/embeddings/google'
|
|
18
|
+
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
19
|
+
*
|
|
20
|
+
* const db = await sqliteVec({
|
|
21
|
+
* path: 'vectors.db',
|
|
22
|
+
* embeddings: google({ model: 'text-embedding-004' }),
|
|
23
|
+
* })
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
declare function google(options?: GoogleEmbeddingOptions): EmbeddingConfig;
|
|
27
|
+
//#endregion
|
|
28
|
+
export { GoogleEmbeddingOptions, google };
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { embed, embedMany } from "ai";
|
|
2
|
+
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
3
|
+
function google(options = {}) {
|
|
4
|
+
const { model = "text-embedding-004", apiKey, baseUrl } = options;
|
|
5
|
+
let cached = null;
|
|
6
|
+
return { async resolve() {
|
|
7
|
+
if (cached) return cached;
|
|
8
|
+
const embeddingModel = createGoogleGenerativeAI({
|
|
9
|
+
apiKey,
|
|
10
|
+
baseURL: baseUrl
|
|
11
|
+
}).textEmbeddingModel(model);
|
|
12
|
+
const { embedding: testEmbedding } = await embed({
|
|
13
|
+
model: embeddingModel,
|
|
14
|
+
value: "test"
|
|
15
|
+
});
|
|
16
|
+
const dimensions = testEmbedding.length;
|
|
17
|
+
const embedder = async (texts) => {
|
|
18
|
+
if (texts.length === 0) return [];
|
|
19
|
+
if (texts.length === 1) {
|
|
20
|
+
const { embedding } = await embed({
|
|
21
|
+
model: embeddingModel,
|
|
22
|
+
value: texts[0]
|
|
23
|
+
});
|
|
24
|
+
return [embedding];
|
|
25
|
+
}
|
|
26
|
+
const { embeddings } = await embedMany({
|
|
27
|
+
model: embeddingModel,
|
|
28
|
+
values: texts
|
|
29
|
+
});
|
|
30
|
+
return embeddings;
|
|
31
|
+
};
|
|
32
|
+
cached = {
|
|
33
|
+
embedder,
|
|
34
|
+
dimensions
|
|
35
|
+
};
|
|
36
|
+
return cached;
|
|
37
|
+
} };
|
|
38
|
+
}
|
|
39
|
+
export { google };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/mistral.d.ts
|
|
4
|
+
interface MistralEmbeddingOptions {
|
|
5
|
+
/** Model name (default: mistral-embed) */
|
|
6
|
+
model?: string;
|
|
7
|
+
/** API key (falls back to MISTRAL_API_KEY env) */
|
|
8
|
+
apiKey?: string;
|
|
9
|
+
/** Base URL override */
|
|
10
|
+
baseUrl?: string;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Mistral AI embedding provider
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* import { mistral } from 'retriv/embeddings/mistral'
|
|
18
|
+
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
19
|
+
*
|
|
20
|
+
* const db = await sqliteVec({
|
|
21
|
+
* path: 'vectors.db',
|
|
22
|
+
* embeddings: mistral({ model: 'mistral-embed' }),
|
|
23
|
+
* })
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
declare function mistral(options?: MistralEmbeddingOptions): EmbeddingConfig;
|
|
27
|
+
//#endregion
|
|
28
|
+
export { MistralEmbeddingOptions, mistral };
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { embed, embedMany } from "ai";
|
|
2
|
+
import { createMistral } from "@ai-sdk/mistral";
|
|
3
|
+
function mistral(options = {}) {
|
|
4
|
+
const { model = "mistral-embed", apiKey, baseUrl } = options;
|
|
5
|
+
let cached = null;
|
|
6
|
+
return { async resolve() {
|
|
7
|
+
if (cached) return cached;
|
|
8
|
+
const embeddingModel = createMistral({
|
|
9
|
+
apiKey,
|
|
10
|
+
baseURL: baseUrl
|
|
11
|
+
}).textEmbeddingModel(model);
|
|
12
|
+
const { embedding: testEmbedding } = await embed({
|
|
13
|
+
model: embeddingModel,
|
|
14
|
+
value: "test"
|
|
15
|
+
});
|
|
16
|
+
const dimensions = testEmbedding.length;
|
|
17
|
+
const embedder = async (texts) => {
|
|
18
|
+
if (texts.length === 0) return [];
|
|
19
|
+
if (texts.length === 1) {
|
|
20
|
+
const { embedding } = await embed({
|
|
21
|
+
model: embeddingModel,
|
|
22
|
+
value: texts[0]
|
|
23
|
+
});
|
|
24
|
+
return [embedding];
|
|
25
|
+
}
|
|
26
|
+
const { embeddings } = await embedMany({
|
|
27
|
+
model: embeddingModel,
|
|
28
|
+
values: texts
|
|
29
|
+
});
|
|
30
|
+
return embeddings;
|
|
31
|
+
};
|
|
32
|
+
cached = {
|
|
33
|
+
embedder,
|
|
34
|
+
dimensions
|
|
35
|
+
};
|
|
36
|
+
return cached;
|
|
37
|
+
} };
|
|
38
|
+
}
|
|
39
|
+
export { mistral };
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { EmbeddingConfig } from "../types.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/embeddings/ollama.d.ts
|
|
4
|
+
interface OllamaEmbeddingOptions {
|
|
5
|
+
/** Model name (default: nomic-embed-text) */
|
|
6
|
+
model?: string;
|
|
7
|
+
/** Base URL (default: http://localhost:11434) */
|
|
8
|
+
baseUrl?: string;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Ollama embedding provider (local)
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```ts
|
|
15
|
+
* import { ollama } from 'retriv/embeddings/ollama'
|
|
16
|
+
* import { sqliteVec } from 'retriv/db/sqlite-vec'
|
|
17
|
+
*
|
|
18
|
+
* const db = await sqliteVec({
|
|
19
|
+
* path: 'vectors.db',
|
|
20
|
+
* embeddings: ollama({ model: 'nomic-embed-text' }),
|
|
21
|
+
* })
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
declare function ollama(options?: OllamaEmbeddingOptions): EmbeddingConfig;
|
|
25
|
+
//#endregion
|
|
26
|
+
export { OllamaEmbeddingOptions, ollama };
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { embed, embedMany } from "ai";
|
|
2
|
+
import { createOllama } from "ollama-ai-provider-v2";
|
|
3
|
+
function ollama(options = {}) {
|
|
4
|
+
const { model = "nomic-embed-text", baseUrl } = options;
|
|
5
|
+
let cached = null;
|
|
6
|
+
return { async resolve() {
|
|
7
|
+
if (cached) return cached;
|
|
8
|
+
const ollamaBaseUrl = baseUrl || process.env.OLLAMA_BASE_URL || "http://localhost:11434";
|
|
9
|
+
const embeddingModel = createOllama({ baseURL: ollamaBaseUrl.endsWith("/api") ? ollamaBaseUrl : `${ollamaBaseUrl}/api` }).textEmbeddingModel(model);
|
|
10
|
+
const { embedding: testEmbedding } = await embed({
|
|
11
|
+
model: embeddingModel,
|
|
12
|
+
value: "test"
|
|
13
|
+
});
|
|
14
|
+
const dimensions = testEmbedding.length;
|
|
15
|
+
const embedder = async (texts) => {
|
|
16
|
+
if (texts.length === 0) return [];
|
|
17
|
+
if (texts.length === 1) {
|
|
18
|
+
const { embedding } = await embed({
|
|
19
|
+
model: embeddingModel,
|
|
20
|
+
value: texts[0]
|
|
21
|
+
});
|
|
22
|
+
return [embedding];
|
|
23
|
+
}
|
|
24
|
+
const { embeddings } = await embedMany({
|
|
25
|
+
model: embeddingModel,
|
|
26
|
+
values: texts
|
|
27
|
+
});
|
|
28
|
+
return embeddings;
|
|
29
|
+
};
|
|
30
|
+
cached = {
|
|
31
|
+
embedder,
|
|
32
|
+
dimensions
|
|
33
|
+
};
|
|
34
|
+
return cached;
|
|
35
|
+
} };
|
|
36
|
+
}
|
|
37
|
+
export { ollama };
|