retriv 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026-present Harlan Wilton
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,282 @@
1
+ <h1>retriv</h1>
2
+
3
+ [![npm version][npm-version-src]][npm-version-href]
4
+ [![npm downloads][npm-downloads-src]][npm-downloads-href]
5
+ [![License][license-src]][license-href]
6
+
7
+ Index and retrieve Markdown documents with [up to 30% better recall](https://ragaboutit.com/hybrid-retrieval-for-enterprise-rag-when-to-use-bm25-vectors-or-both/) using hybrid search.
8
+
9
+ Keyword search (BM25) finds exact matches but misses synonyms. Semantic search understands meaning but struggles with names, codes, and precise terminology. Hybrid search combines both using [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) - [research shows up to 5.8x improvement](https://www.researchgate.net/publication/399428523_Hybrid_Dense-Sparse_Retrieval_for_High-Recall_Information_Retrieval) on standard benchmarks.
10
+
11
+ <p align="center">
12
+ <table>
13
+ <tbody>
14
+ <td align="center">
15
+ <sub>Made possible by my <a href="https://github.com/sponsors/harlan-zw">Sponsor Program 💖</a><br> Follow me <a href="https://twitter.com/harlan_zw">@harlan_zw</a> 🐦 • Join <a href="https://discord.gg/275MBUBvgP">Discord</a> for help</sub><br>
16
+ </td>
17
+ </tbody>
18
+ </table>
19
+ </p>
20
+
21
+ ## Features
22
+
23
+ - 🔀 **[Hybrid search](#local-first-sqlite)** - BM25 + vectors with [RRF fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) in a single SQLite file
24
+ - 🔌 **[Swappable backends](#drivers)** - SQLite, LibSQL/Turso, pgvector, Upstash, Cloudflare Vectorize
25
+ - 🧠 **[Any embedding provider](#embedding-providers)** - OpenAI, Google, Mistral, Cohere, Ollama, or local [Transformers.js](https://huggingface.co/docs/transformers.js)
26
+ - ✂️ **[Automatic chunking](#with-chunking)** - Split large documents with configurable overlap
27
+ - 📦 **[Unified interface](#api)** - Same `SearchProvider` API across all drivers
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pnpm add retriv
33
+ ```
34
+
35
+ ## Usage
36
+
37
+ ### Local-First (SQLite)
38
+
39
+ Single file with BM25 + vector search. No external services needed.
40
+
41
+ ```bash
42
+ pnpm add @huggingface/transformers sqlite-vec
43
+ ```
44
+
45
+ ```ts
46
+ import { createRetriv } from 'retriv'
47
+ import sqlite from 'retriv/db/sqlite'
48
+ import { transformers } from 'retriv/embeddings/transformers'
49
+
50
+ const search = await createRetriv({
51
+ driver: sqlite({
52
+ path: './search.db',
53
+ embeddings: transformers(), // runs locally, no API key
54
+ }),
55
+ })
56
+
57
+ await search.index([
58
+ {
59
+ id: '1',
60
+ content: 'How to mass delete Gmail emails using filters',
61
+ metadata: { source: 'https://support.google.com/mail', title: 'Gmail Help' },
62
+ },
63
+ {
64
+ id: '2',
65
+ content: 'Setting up email forwarding rules in Outlook',
66
+ metadata: { source: 'https://support.microsoft.com', title: 'Outlook Help' },
67
+ },
68
+ ])
69
+
70
+ const results = await search.search('bulk remove messages', { returnMetadata: true })
71
+ // Finds #1 via semantic similarity even without keyword overlap
72
+ // results[0].metadata.source → 'https://support.google.com/mail'
73
+ ```
74
+
75
+ ### Swap to Cloud Embeddings
76
+
77
+ Same hybrid driver, better embeddings:
78
+
79
+ ```bash
80
+ pnpm add @ai-sdk/openai ai sqlite-vec
81
+ ```
82
+
83
+ ```ts
84
+ import { createRetriv } from 'retriv'
85
+ import sqlite from 'retriv/db/sqlite'
86
+ import { openai } from 'retriv/embeddings/openai'
87
+
88
+ const search = await createRetriv({
89
+ driver: sqlite({
90
+ path: './search.db',
91
+ embeddings: openai(), // uses OPENAI_API_KEY env
92
+ }),
93
+ })
94
+ ```
95
+
96
+ ### Swap to Cloud Vector DB
97
+
98
+ For serverless or edge deployments:
99
+
100
+ ```bash
101
+ pnpm add @libsql/client better-sqlite3 @ai-sdk/openai ai
102
+ ```
103
+
104
+ ```ts
105
+ import { createRetriv } from 'retriv'
106
+ import libsql from 'retriv/db/libsql'
107
+ import sqliteFts from 'retriv/db/sqlite-fts'
108
+ import { openai } from 'retriv/embeddings/openai'
109
+
110
+ const search = await createRetriv({
111
+ driver: {
112
+ // Turso for vectors
113
+ vector: libsql({
114
+ url: 'libsql://your-db.turso.io',
115
+ authToken: process.env.TURSO_AUTH_TOKEN,
116
+ embeddings: openai(),
117
+ }),
118
+ // Local SQLite for BM25
119
+ keyword: sqliteFts({ path: './search.db' }),
120
+ },
121
+ })
122
+ ```
123
+
124
+ ### With Chunking
125
+
126
+ Automatically split large documents:
127
+
128
+ ```bash
129
+ pnpm add @huggingface/transformers sqlite-vec
130
+ ```
131
+
132
+ ```ts
133
+ import { createRetriv } from 'retriv'
134
+ import sqlite from 'retriv/db/sqlite'
135
+ import { transformers } from 'retriv/embeddings/transformers'
136
+
137
+ const search = await createRetriv({
138
+ driver: sqlite({
139
+ path: './search.db',
140
+ embeddings: transformers(),
141
+ }),
142
+ chunking: {
143
+ chunkSize: 1000,
144
+ chunkOverlap: 200,
145
+ },
146
+ })
147
+
148
+ await search.index([
149
+ { id: 'doc-1', content: veryLongArticle },
150
+ ])
151
+
152
+ const results = await search.search('specific topic')
153
+ // Results include _chunk: { parentId, index, range }
154
+ ```
155
+
156
+ ## Drivers
157
+
158
+ ### Hybrid (Recommended)
159
+
160
+ | Driver | Import | Peer Dependencies |
161
+ |--------|--------|-------------------|
162
+ | SQLite | `retriv/db/sqlite` | `sqlite-vec` (Node.js >= 22.5) |
163
+
164
+ ### Vector-Only (for composed hybrid)
165
+
166
+ | Driver | Import | Peer Dependencies |
167
+ |--------|--------|-------------------|
168
+ | LibSQL | `retriv/db/libsql` | `@libsql/client` |
169
+ | Upstash | `retriv/db/upstash` | `@upstash/vector` |
170
+ | Cloudflare | `retriv/db/cloudflare` | — (uses Cloudflare bindings) |
171
+ | pgvector | `retriv/db/pgvector` | `pg` |
172
+ | sqlite-vec | `retriv/db/sqlite-vec` | `sqlite-vec` (Node.js >= 22.5) |
173
+
174
+ ### Keyword-Only (for composed hybrid)
175
+
176
+ | Driver | Import | Peer Dependencies |
177
+ |--------|--------|-------------------|
178
+ | SQLite FTS5 | `retriv/db/sqlite-fts` | `better-sqlite3` |
179
+
180
+ ## Embedding Providers
181
+
182
+ All vector drivers accept an `embeddings` config:
183
+
184
+ | Provider | Import | Peer Dependencies |
185
+ |----------|--------|-------------------|
186
+ | OpenAI | `retriv/embeddings/openai` | `@ai-sdk/openai ai` |
187
+ | Google | `retriv/embeddings/google` | `@ai-sdk/google ai` |
188
+ | Mistral | `retriv/embeddings/mistral` | `@ai-sdk/mistral ai` |
189
+ | Cohere | `retriv/embeddings/cohere` | `@ai-sdk/cohere ai` |
190
+ | Ollama | `retriv/embeddings/ollama` | `ollama-ai-provider-v2 ai` |
191
+ | Transformers | `retriv/embeddings/transformers` | `@huggingface/transformers` |
192
+
193
+ ```ts
194
+ // Cloud providers (require API keys)
195
+ openai({ model: 'text-embedding-3-small' })
196
+ google({ model: 'text-embedding-004' })
197
+ mistral({ model: 'mistral-embed' })
198
+ cohere({ model: 'embed-english-v3.0' })
199
+
200
+ // Local (no API key)
201
+ ollama({ model: 'nomic-embed-text' })
202
+ transformers({ model: 'Xenova/all-MiniLM-L6-v2' })
203
+ ```
204
+
205
+ ## API
206
+
207
+ ### SearchProvider Interface
208
+
209
+ All drivers implement the same interface:
210
+
211
+ ```ts
212
+ interface SearchProvider {
213
+ index: (docs: Document[]) => Promise<{ count: number }>
214
+ search: (query: string, options?: SearchOptions) => Promise<SearchResult[]>
215
+ remove?: (ids: string[]) => Promise<{ count: number }>
216
+ clear?: () => Promise<void>
217
+ close?: () => Promise<void>
218
+ }
219
+ ```
220
+
221
+ ### Search Options
222
+
223
+ ```ts
224
+ interface SearchOptions {
225
+ limit?: number // Max results (default varies by driver)
226
+ returnContent?: boolean // Include original content in results
227
+ returnMetadata?: boolean // Include metadata in results
228
+ returnMeta?: boolean // Include driver-specific _meta
229
+ }
230
+ ```
231
+
232
+ ### Search Result
233
+
234
+ ```ts
235
+ interface SearchResult {
236
+ id: string // Document ID
237
+ score: number // 0-1, higher is better
238
+ content?: string // If returnContent: true
239
+ metadata?: Record<string, any> // If returnMetadata: true
240
+ _chunk?: ChunkInfo // When chunking enabled
241
+ _meta?: SearchMeta // If returnMeta: true (driver-specific extras)
242
+ }
243
+ ```
244
+
245
+ ## Benchmarks
246
+
247
+ Retrieval accuracy on Nuxt documentation (639 docs):
248
+
249
+ | Test Type | FTS | Vector | Hybrid |
250
+ |-----------|-----|--------|--------|
251
+ | Exact terminology (ports, config names) | 3/3 | 2/3 | 3/3 |
252
+ | Doc retrieval (keyword overlap) | 3/3 | 2/3 | 3/3 |
253
+ | Semantic queries (synonyms, no overlap) | 0/3 | 3/3 | 3/3 |
254
+ | **Total** | **6/9 (67%)** | **7/9 (78%)** | **9/9 (100%)** |
255
+
256
+ - **FTS** excels at exact terms but fails semantic queries ("reuse logic" → composables)
257
+ - **Vector** understands meaning but misses precise terminology ("port 3000")
258
+ - **Hybrid** combines both - never worse than either method alone
259
+
260
+ Run locally: `pnpm test:eval`
261
+
262
+ ## Sponsors
263
+
264
+ <p align="center">
265
+ <a href="https://raw.githubusercontent.com/harlan-zw/static/main/sponsors.svg">
266
+ <img src='https://raw.githubusercontent.com/harlan-zw/static/main/sponsors.svg'/>
267
+ </a>
268
+ </p>
269
+
270
+ ## License
271
+
272
+ Licensed under the [MIT license](https://github.com/harlan-zw/retriv/blob/main/LICENSE).
273
+
274
+ <!-- Badges -->
275
+ [npm-version-src]: https://img.shields.io/npm/v/retriv/latest.svg?style=flat&colorA=18181B&colorB=28CF8D
276
+ [npm-version-href]: https://npmjs.com/package/retriv
277
+
278
+ [npm-downloads-src]: https://img.shields.io/npm/dm/retriv.svg?style=flat&colorA=18181B&colorB=28CF8D
279
+ [npm-downloads-href]: https://npmjs.com/package/retriv
280
+
281
+ [license-src]: https://img.shields.io/github/license/harlan-zw/retriv.svg?style=flat&colorA=18181B&colorB=28CF8D
282
+ [license-href]: https://github.com/harlan-zw/retriv/blob/main/LICENSE
@@ -0,0 +1,85 @@
1
+ const MARKDOWN_SEPARATORS = [
2
+ "\n## ",
3
+ "\n### ",
4
+ "\n#### ",
5
+ "\n##### ",
6
+ "\n###### ",
7
+ "```\n\n",
8
+ "\n\n***\n\n",
9
+ "\n\n---\n\n",
10
+ "\n\n___\n\n",
11
+ "\n\n",
12
+ "\n",
13
+ " ",
14
+ ""
15
+ ];
16
+ function offsetToLine(text, offset) {
17
+ let line = 1;
18
+ for (let i = 0; i < offset && i < text.length; i++) if (text[i] === "\n") line++;
19
+ return line;
20
+ }
21
+ function splitText(text, options = {}) {
22
+ const { chunkSize = 1e3, chunkOverlap = 200, separators = MARKDOWN_SEPARATORS } = options;
23
+ if (text.length <= chunkSize) {
24
+ const endLine = offsetToLine(text, text.length);
25
+ return [{
26
+ text,
27
+ index: 0,
28
+ range: [0, text.length],
29
+ lines: [1, endLine]
30
+ }];
31
+ }
32
+ return mergeChunks(splitRecursive(text, chunkSize, separators), chunkSize, chunkOverlap, text);
33
+ }
34
+ function splitRecursive(text, chunkSize, separators) {
35
+ if (text.length <= chunkSize || separators.length === 0) return [text];
36
+ const separator = separators.find((sep) => sep === "" || text.includes(sep));
37
+ if (!separator && separator !== "") return [text];
38
+ const parts = separator === "" ? [...text] : text.split(separator);
39
+ const results = [];
40
+ for (let i = 0; i < parts.length; i++) {
41
+ const part = parts[i];
42
+ const withSep = i < parts.length - 1 && separator !== "" ? part + separator : part;
43
+ if (withSep.length <= chunkSize) results.push(withSep);
44
+ else {
45
+ const subParts = splitRecursive(withSep, chunkSize, separators.slice(1));
46
+ results.push(...subParts);
47
+ }
48
+ }
49
+ return results;
50
+ }
51
+ function mergeChunks(parts, chunkSize, chunkOverlap, originalText) {
52
+ const chunks = [];
53
+ let current = "";
54
+ let currentStart = 0;
55
+ for (const part of parts) if (current.length + part.length <= chunkSize) current += part;
56
+ else {
57
+ if (current) {
58
+ const start = originalText.indexOf(current, currentStart);
59
+ const actualStart = start >= 0 ? start : currentStart;
60
+ const actualEnd = actualStart + current.length;
61
+ chunks.push({
62
+ text: current,
63
+ index: chunks.length,
64
+ range: [actualStart, actualEnd],
65
+ lines: [offsetToLine(originalText, actualStart), offsetToLine(originalText, actualEnd)]
66
+ });
67
+ currentStart = Math.max(0, actualStart + current.length - chunkOverlap);
68
+ }
69
+ if (chunkOverlap > 0 && current.length > chunkOverlap) current = current.slice(-chunkOverlap) + part;
70
+ else current = part;
71
+ }
72
+ if (current) {
73
+ const start = originalText.indexOf(current, currentStart);
74
+ const actualStart = start >= 0 ? start : currentStart;
75
+ const actualEnd = start >= 0 ? start + current.length : originalText.length;
76
+ chunks.push({
77
+ text: current,
78
+ index: chunks.length,
79
+ range: [actualStart, actualEnd],
80
+ lines: [offsetToLine(originalText, actualStart), offsetToLine(originalText, actualEnd)]
81
+ });
82
+ }
83
+ return chunks;
84
+ }
85
+ export { splitText as t };
@@ -0,0 +1,36 @@
1
+ import { BaseDriverConfig, EmbeddingConfig, SearchProvider } from "../types.mjs";
2
+
3
+ //#region src/db/cloudflare.d.ts
4
+ interface VectorizeIndexBinding {
5
+ query: (vector: number[], options?: any) => Promise<{
6
+ matches: any[];
7
+ count?: number;
8
+ }>;
9
+ insert: (vectors: any[]) => Promise<void>;
10
+ upsert: (vectors: any[]) => Promise<void>;
11
+ deleteByIds: (ids: string[]) => Promise<void>;
12
+ }
13
+ interface CloudflareConfig extends BaseDriverConfig {
14
+ /** Cloudflare Vectorize binding instance */
15
+ binding: VectorizeIndexBinding;
16
+ /** Embedding provider from retriv/embeddings/ */
17
+ embeddings: EmbeddingConfig;
18
+ }
19
+ /**
20
+ * Create a Cloudflare Vectorize search provider
21
+ * For use in Cloudflare Workers at runtime
22
+ *
23
+ * @example
24
+ * ```ts
25
+ * import { cloudflare } from 'retriv/db/cloudflare'
26
+ * import { openai } from 'retriv/embeddings/openai'
27
+ *
28
+ * const db = await cloudflare({
29
+ * binding: env.VECTORIZE,
30
+ * embeddings: openai({ model: 'text-embedding-3-small' }),
31
+ * })
32
+ * ```
33
+ */
34
+ declare function cloudflare(config: CloudflareConfig): Promise<SearchProvider>;
35
+ //#endregion
36
+ export { CloudflareConfig, cloudflare, cloudflare as default };
@@ -0,0 +1,55 @@
1
+ import { resolveEmbedding } from "../embeddings/resolve.mjs";
2
+ async function cloudflare(config) {
3
+ const { binding } = config;
4
+ if (!binding) throw new Error("[cloudflare] binding is required");
5
+ if (!config.embeddings) throw new Error("[cloudflare] embeddings is required");
6
+ const { embedder } = await resolveEmbedding(config.embeddings);
7
+ return {
8
+ async index(docs) {
9
+ if (docs.length === 0) return { count: 0 };
10
+ const embeddings = await embedder(docs.map((d) => d.content));
11
+ if (embeddings.length !== docs.length) throw new Error(`Embedding count mismatch: expected ${docs.length}, got ${embeddings.length}`);
12
+ const vectors = docs.map((doc, i) => ({
13
+ id: doc.id,
14
+ values: embeddings[i],
15
+ metadata: {
16
+ ...doc.metadata,
17
+ _content: doc.content
18
+ }
19
+ }));
20
+ await binding.upsert(vectors);
21
+ return { count: docs.length };
22
+ },
23
+ async search(query, options = {}) {
24
+ const { limit = 10, returnContent = false, returnMetadata = true } = options;
25
+ const [embedding] = await embedder([query]);
26
+ if (!embedding) throw new Error("Failed to generate query embedding");
27
+ return ((await binding.query(embedding, {
28
+ topK: limit,
29
+ returnValues: false,
30
+ returnMetadata: true
31
+ })).matches || []).map((m) => {
32
+ const result = {
33
+ id: m.id,
34
+ score: Math.max(0, Math.min(1, m.score))
35
+ };
36
+ if (returnContent && m.metadata?._content) result.content = m.metadata._content;
37
+ if (returnMetadata && m.metadata) {
38
+ const { _content, ...rest } = m.metadata;
39
+ if (Object.keys(rest).length > 0) result.metadata = rest;
40
+ }
41
+ return result;
42
+ });
43
+ },
44
+ async remove(ids) {
45
+ await binding.deleteByIds(ids);
46
+ return { count: ids.length };
47
+ },
48
+ async clear() {
49
+ throw new Error("[cloudflare] clear() is not supported - use wrangler CLI instead");
50
+ },
51
+ async close() {}
52
+ };
53
+ }
54
+ var cloudflare_default = cloudflare;
55
+ export { cloudflare, cloudflare_default as default };
@@ -0,0 +1,30 @@
1
+ import { BaseDriverConfig, EmbeddingConfig, SearchProvider } from "../types.mjs";
2
+
3
+ //#region src/db/libsql.d.ts
4
+ interface LibsqlConfig extends BaseDriverConfig {
5
+ /** Database URL (file:path.db for local, libsql://... for remote) */
6
+ url?: string;
7
+ /** Auth token for remote LibSQL/Turso */
8
+ authToken?: string;
9
+ /** Embedding provider from retriv/embeddings/ */
10
+ embeddings: EmbeddingConfig;
11
+ }
12
+ /**
13
+ * Create a LibSQL/Turso vector search provider
14
+ * Supports local SQLite files and remote Turso databases
15
+ *
16
+ * @example
17
+ * ```ts
18
+ * import { libsql } from 'retriv/db/libsql'
19
+ * import { openai } from 'retriv/embeddings/openai'
20
+ *
21
+ * const db = await libsql({
22
+ * url: 'libsql://your-db.turso.io',
23
+ * authToken: process.env.TURSO_AUTH_TOKEN,
24
+ * embeddings: openai({ model: 'text-embedding-3-small' }),
25
+ * })
26
+ * ```
27
+ */
28
+ declare function libsql(config: LibsqlConfig): Promise<SearchProvider>;
29
+ //#endregion
30
+ export { LibsqlConfig, libsql as default, libsql };
@@ -0,0 +1,87 @@
1
+ import { resolveEmbedding } from "../embeddings/resolve.mjs";
2
+ import { createClient } from "@libsql/client";
3
+ async function libsql(config) {
4
+ const url = config.url || config.path || "file:vectors.db";
5
+ const { authToken } = config;
6
+ if (!config.embeddings) throw new Error("[libsql] embeddings is required");
7
+ const { embedder, dimensions } = await resolveEmbedding(config.embeddings);
8
+ const client = createClient({
9
+ url,
10
+ ...authToken && { authToken }
11
+ });
12
+ await client.execute(`
13
+ CREATE TABLE IF NOT EXISTS vectors (
14
+ id TEXT PRIMARY KEY,
15
+ content TEXT,
16
+ metadata TEXT,
17
+ embedding F32_BLOB(${dimensions})
18
+ )
19
+ `);
20
+ return {
21
+ async index(docs) {
22
+ if (docs.length === 0) return { count: 0 };
23
+ const embeddings = await embedder(docs.map((d) => d.content));
24
+ if (embeddings.length !== docs.length) throw new Error(`Embedding count mismatch: expected ${docs.length}, got ${embeddings.length}`);
25
+ for (let i = 0; i < docs.length; i++) {
26
+ const doc = docs[i];
27
+ const vector = embeddings[i];
28
+ const vectorStr = JSON.stringify(vector);
29
+ await client.execute({
30
+ sql: `
31
+ INSERT OR REPLACE INTO vectors (id, content, metadata, embedding)
32
+ VALUES (?, ?, ?, vector(?))
33
+ `,
34
+ args: [
35
+ doc.id,
36
+ doc.content,
37
+ doc.metadata ? JSON.stringify(doc.metadata) : null,
38
+ vectorStr
39
+ ]
40
+ });
41
+ }
42
+ return { count: docs.length };
43
+ },
44
+ async search(query, options = {}) {
45
+ const { limit = 10, returnContent = false, returnMetadata = true } = options;
46
+ const [embedding] = await embedder([query]);
47
+ if (!embedding) throw new Error("Failed to generate query embedding");
48
+ const vectorStr = JSON.stringify(embedding);
49
+ return ((await client.execute({
50
+ sql: `
51
+ SELECT
52
+ id,
53
+ content,
54
+ metadata,
55
+ vector_distance_cos(embedding, vector32(?)) as distance
56
+ FROM vectors
57
+ ORDER BY distance
58
+ LIMIT ?
59
+ `,
60
+ args: [vectorStr, limit]
61
+ })).rows || []).map((row) => {
62
+ const result = {
63
+ id: row.id,
64
+ score: Math.max(0, 1 - row.distance)
65
+ };
66
+ if (returnContent && row.content) result.content = row.content;
67
+ if (returnMetadata && row.metadata) result.metadata = JSON.parse(row.metadata);
68
+ return result;
69
+ });
70
+ },
71
+ async remove(ids) {
72
+ for (const id of ids) await client.execute({
73
+ sql: "DELETE FROM vectors WHERE id = ?",
74
+ args: [id]
75
+ });
76
+ return { count: ids.length };
77
+ },
78
+ async clear() {
79
+ await client.execute("DELETE FROM vectors");
80
+ },
81
+ async close() {
82
+ client.close();
83
+ }
84
+ };
85
+ }
86
+ var libsql_default = libsql;
87
+ export { libsql_default as default, libsql };
@@ -0,0 +1,30 @@
1
+ import { BaseDriverConfig, EmbeddingConfig, SearchProvider } from "../types.mjs";
2
+
3
+ //#region src/db/pgvector.d.ts
4
+ interface PgvectorConfig extends BaseDriverConfig {
5
+ /** PostgreSQL connection URL */
6
+ url: string;
7
+ /** Table name for vectors */
8
+ table?: string;
9
+ /** Embedding provider from retriv/embeddings/ */
10
+ embeddings: EmbeddingConfig;
11
+ /** Distance metric */
12
+ metric?: 'cosine' | 'euclidean' | 'inner_product';
13
+ }
14
+ /**
15
+ * Create a PostgreSQL pgvector search provider
16
+ *
17
+ * @example
18
+ * ```ts
19
+ * import { pgvector } from 'retriv/db/pgvector'
20
+ * import { openai } from 'retriv/embeddings/openai'
21
+ *
22
+ * const db = await pgvector({
23
+ * url: process.env.DATABASE_URL,
24
+ * embeddings: openai({ model: 'text-embedding-3-small' }),
25
+ * })
26
+ * ```
27
+ */
28
+ declare function pgvector(config: PgvectorConfig): Promise<SearchProvider>;
29
+ //#endregion
30
+ export { PgvectorConfig, pgvector as default, pgvector };