@chatbot-packages/rag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunking/index.d.ts +51 -0
- package/dist/chunking/index.js +248 -0
- package/dist/chunking/index.js.map +1 -0
- package/dist/embeddings/index.d.ts +103 -0
- package/dist/embeddings/index.js +195 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/extractors/index.d.ts +95 -0
- package/dist/extractors/index.js +343 -0
- package/dist/extractors/index.js.map +1 -0
- package/dist/index.d.ts +78 -0
- package/dist/index.js +1576 -0
- package/dist/index.js.map +1 -0
- package/dist/retrieval/index.d.ts +65 -0
- package/dist/retrieval/index.js +144 -0
- package/dist/retrieval/index.js.map +1 -0
- package/dist/types-CjnplPJD.d.ts +242 -0
- package/dist/vectorstore/index.d.ts +109 -0
- package/dist/vectorstore/index.js +422 -0
- package/dist/vectorstore/index.js.map +1 -0
- package/package.json +83 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
// src/vectorstore/sqlite.ts
|
|
2
|
+
import Database from "better-sqlite3";
|
|
3
|
+
var SQLiteVectorStore = class {
|
|
4
|
+
db;
|
|
5
|
+
tableName;
|
|
6
|
+
dimensions;
|
|
7
|
+
constructor(options) {
|
|
8
|
+
this.db = new Database(options.path || ":memory:");
|
|
9
|
+
this.tableName = options.tableName || "chunks";
|
|
10
|
+
this.dimensions = options.dimensions;
|
|
11
|
+
this.db.pragma("journal_mode = WAL");
|
|
12
|
+
}
|
|
13
|
+
async initialize() {
|
|
14
|
+
this.db.exec(`
|
|
15
|
+
CREATE TABLE IF NOT EXISTS ${this.tableName} (
|
|
16
|
+
id TEXT PRIMARY KEY,
|
|
17
|
+
document_id TEXT NOT NULL,
|
|
18
|
+
text TEXT NOT NULL,
|
|
19
|
+
embedding TEXT,
|
|
20
|
+
metadata TEXT,
|
|
21
|
+
created_at TEXT DEFAULT CURRENT_TIMESTAMP
|
|
22
|
+
)
|
|
23
|
+
`);
|
|
24
|
+
this.db.exec(`
|
|
25
|
+
CREATE INDEX IF NOT EXISTS idx_${this.tableName}_document_id
|
|
26
|
+
ON ${this.tableName}(document_id)
|
|
27
|
+
`);
|
|
28
|
+
this.db.exec(`
|
|
29
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS ${this.tableName}_fts
|
|
30
|
+
USING fts5(id, text, content='${this.tableName}', content_rowid='rowid')
|
|
31
|
+
`);
|
|
32
|
+
this.db.exec(`
|
|
33
|
+
CREATE TRIGGER IF NOT EXISTS ${this.tableName}_ai AFTER INSERT ON ${this.tableName} BEGIN
|
|
34
|
+
INSERT INTO ${this.tableName}_fts(rowid, id, text) VALUES (new.rowid, new.id, new.text);
|
|
35
|
+
END
|
|
36
|
+
`);
|
|
37
|
+
this.db.exec(`
|
|
38
|
+
CREATE TRIGGER IF NOT EXISTS ${this.tableName}_ad AFTER DELETE ON ${this.tableName} BEGIN
|
|
39
|
+
INSERT INTO ${this.tableName}_fts(${this.tableName}_fts, rowid, id, text)
|
|
40
|
+
VALUES('delete', old.rowid, old.id, old.text);
|
|
41
|
+
END
|
|
42
|
+
`);
|
|
43
|
+
}
|
|
44
|
+
async insert(chunks) {
|
|
45
|
+
const stmt = this.db.prepare(`
|
|
46
|
+
INSERT OR REPLACE INTO ${this.tableName} (id, document_id, text, embedding, metadata, created_at)
|
|
47
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
48
|
+
`);
|
|
49
|
+
const insertMany = this.db.transaction((items) => {
|
|
50
|
+
for (const chunk of items) {
|
|
51
|
+
stmt.run(
|
|
52
|
+
chunk.id,
|
|
53
|
+
chunk.documentId,
|
|
54
|
+
chunk.text,
|
|
55
|
+
chunk.embedding ? JSON.stringify(chunk.embedding) : null,
|
|
56
|
+
JSON.stringify(chunk.metadata),
|
|
57
|
+
chunk.createdAt.toISOString()
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
insertMany(chunks);
|
|
62
|
+
}
|
|
63
|
+
async denseSearch(embedding, topK, filter) {
|
|
64
|
+
let query = `SELECT * FROM ${this.tableName} WHERE embedding IS NOT NULL`;
|
|
65
|
+
const params = [];
|
|
66
|
+
if (filter?.documentId) {
|
|
67
|
+
query += ` AND document_id = ?`;
|
|
68
|
+
params.push(filter.documentId);
|
|
69
|
+
}
|
|
70
|
+
const rows = this.db.prepare(query).all(...params);
|
|
71
|
+
const results = [];
|
|
72
|
+
for (const row of rows) {
|
|
73
|
+
const chunkEmbedding = JSON.parse(row.embedding);
|
|
74
|
+
const score = this.cosineSimilarity(embedding, chunkEmbedding);
|
|
75
|
+
results.push({
|
|
76
|
+
chunk: this.rowToChunk(row),
|
|
77
|
+
score
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
results.sort((a, b) => b.score - a.score);
|
|
81
|
+
return results.slice(0, topK).map((r) => ({
|
|
82
|
+
...r,
|
|
83
|
+
searchType: "dense"
|
|
84
|
+
}));
|
|
85
|
+
}
|
|
86
|
+
async sparseSearch(query, topK, filter) {
|
|
87
|
+
let ftsQuery = `
|
|
88
|
+
SELECT c.*, bm25(${this.tableName}_fts) as score
|
|
89
|
+
FROM ${this.tableName}_fts fts
|
|
90
|
+
JOIN ${this.tableName} c ON fts.id = c.id
|
|
91
|
+
WHERE ${this.tableName}_fts MATCH ?
|
|
92
|
+
`;
|
|
93
|
+
const params = [this.escapeFTSQuery(query)];
|
|
94
|
+
if (filter?.documentId) {
|
|
95
|
+
ftsQuery += ` AND c.document_id = ?`;
|
|
96
|
+
params.push(filter.documentId);
|
|
97
|
+
}
|
|
98
|
+
ftsQuery += ` ORDER BY score LIMIT ?`;
|
|
99
|
+
params.push(topK);
|
|
100
|
+
try {
|
|
101
|
+
const rows = this.db.prepare(ftsQuery).all(...params);
|
|
102
|
+
return rows.map((row) => ({
|
|
103
|
+
chunk: this.rowToChunk(row),
|
|
104
|
+
score: Math.abs(row.score),
|
|
105
|
+
// BM25 returns negative scores
|
|
106
|
+
searchType: "sparse"
|
|
107
|
+
}));
|
|
108
|
+
} catch {
|
|
109
|
+
return [];
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
async deleteByDocumentId(documentId) {
|
|
113
|
+
const result = this.db.prepare(`DELETE FROM ${this.tableName} WHERE document_id = ?`).run(documentId);
|
|
114
|
+
return result.changes;
|
|
115
|
+
}
|
|
116
|
+
async getById(id) {
|
|
117
|
+
const row = this.db.prepare(`SELECT * FROM ${this.tableName} WHERE id = ?`).get(id);
|
|
118
|
+
return row ? this.rowToChunk(row) : null;
|
|
119
|
+
}
|
|
120
|
+
async close() {
|
|
121
|
+
this.db.close();
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Calculate cosine similarity between two vectors
|
|
125
|
+
*/
|
|
126
|
+
cosineSimilarity(a, b) {
|
|
127
|
+
if (a.length !== b.length) {
|
|
128
|
+
throw new Error("Vectors must have same length");
|
|
129
|
+
}
|
|
130
|
+
let dotProduct = 0;
|
|
131
|
+
let normA = 0;
|
|
132
|
+
let normB = 0;
|
|
133
|
+
for (let i = 0; i < a.length; i++) {
|
|
134
|
+
dotProduct += a[i] * b[i];
|
|
135
|
+
normA += a[i] * a[i];
|
|
136
|
+
normB += b[i] * b[i];
|
|
137
|
+
}
|
|
138
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
139
|
+
return denominator === 0 ? 0 : dotProduct / denominator;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Convert database row to DocumentChunk
|
|
143
|
+
*/
|
|
144
|
+
rowToChunk(row) {
|
|
145
|
+
return {
|
|
146
|
+
id: row.id,
|
|
147
|
+
documentId: row.document_id,
|
|
148
|
+
text: row.text,
|
|
149
|
+
embedding: row.embedding ? JSON.parse(row.embedding) : void 0,
|
|
150
|
+
metadata: JSON.parse(row.metadata),
|
|
151
|
+
createdAt: new Date(row.created_at)
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Escape FTS query to prevent syntax errors
|
|
156
|
+
*/
|
|
157
|
+
escapeFTSQuery(query) {
|
|
158
|
+
const cleaned = query.replace(/['"(){}[\]^~*?:\\]/g, " ").trim();
|
|
159
|
+
return cleaned.split(/\s+/).filter((w) => w.length > 0).map((w) => `"${w}"`).join(" OR ");
|
|
160
|
+
}
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
// src/vectorstore/postgres.ts
|
|
164
|
+
import { Pool } from "pg";
|
|
165
|
+
var PostgresVectorStore = class {
|
|
166
|
+
pool;
|
|
167
|
+
tableName;
|
|
168
|
+
schema;
|
|
169
|
+
dimensions;
|
|
170
|
+
fullTableName;
|
|
171
|
+
constructor(options) {
|
|
172
|
+
const poolConfig = options.poolConfig || {
|
|
173
|
+
connectionString: options.connectionString,
|
|
174
|
+
max: 10,
|
|
175
|
+
idleTimeoutMillis: 3e4
|
|
176
|
+
};
|
|
177
|
+
this.pool = new Pool(poolConfig);
|
|
178
|
+
this.tableName = options.tableName || "chunks";
|
|
179
|
+
this.schema = options.schema || "public";
|
|
180
|
+
this.dimensions = options.dimensions;
|
|
181
|
+
this.fullTableName = `${this.schema}.${this.tableName}`;
|
|
182
|
+
}
|
|
183
|
+
async initialize() {
|
|
184
|
+
const client = await this.pool.connect();
|
|
185
|
+
try {
|
|
186
|
+
await client.query("CREATE EXTENSION IF NOT EXISTS vector");
|
|
187
|
+
await client.query("CREATE EXTENSION IF NOT EXISTS pg_trgm");
|
|
188
|
+
await client.query(`
|
|
189
|
+
CREATE TABLE IF NOT EXISTS ${this.schema}.documents (
|
|
190
|
+
id TEXT PRIMARY KEY,
|
|
191
|
+
source_id TEXT NOT NULL,
|
|
192
|
+
path TEXT NOT NULL,
|
|
193
|
+
title TEXT NOT NULL,
|
|
194
|
+
metadata JSONB DEFAULT '{}',
|
|
195
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
196
|
+
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
197
|
+
)
|
|
198
|
+
`);
|
|
199
|
+
await client.query(`
|
|
200
|
+
CREATE TABLE IF NOT EXISTS ${this.fullTableName} (
|
|
201
|
+
id TEXT PRIMARY KEY,
|
|
202
|
+
document_id TEXT NOT NULL REFERENCES ${this.schema}.documents(id) ON DELETE CASCADE,
|
|
203
|
+
text TEXT NOT NULL,
|
|
204
|
+
embedding vector(${this.dimensions}),
|
|
205
|
+
metadata JSONB DEFAULT '{}',
|
|
206
|
+
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
207
|
+
)
|
|
208
|
+
`);
|
|
209
|
+
await client.query(`
|
|
210
|
+
CREATE INDEX IF NOT EXISTS idx_${this.tableName}_document_id
|
|
211
|
+
ON ${this.fullTableName}(document_id)
|
|
212
|
+
`);
|
|
213
|
+
await client.query(`
|
|
214
|
+
CREATE INDEX IF NOT EXISTS idx_${this.tableName}_embedding_hnsw
|
|
215
|
+
ON ${this.fullTableName}
|
|
216
|
+
USING hnsw (embedding vector_cosine_ops)
|
|
217
|
+
WITH (m = 16, ef_construction = 64)
|
|
218
|
+
`);
|
|
219
|
+
await client.query(`
|
|
220
|
+
CREATE INDEX IF NOT EXISTS idx_${this.tableName}_text_trgm
|
|
221
|
+
ON ${this.fullTableName}
|
|
222
|
+
USING gin (text gin_trgm_ops)
|
|
223
|
+
`);
|
|
224
|
+
await client.query(`
|
|
225
|
+
CREATE INDEX IF NOT EXISTS idx_${this.tableName}_metadata
|
|
226
|
+
ON ${this.fullTableName}
|
|
227
|
+
USING gin (metadata)
|
|
228
|
+
`);
|
|
229
|
+
} finally {
|
|
230
|
+
client.release();
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
async insert(chunks) {
|
|
234
|
+
if (chunks.length === 0) return;
|
|
235
|
+
const client = await this.pool.connect();
|
|
236
|
+
try {
|
|
237
|
+
await client.query("BEGIN");
|
|
238
|
+
for (const chunk of chunks) {
|
|
239
|
+
const embedding = chunk.embedding ? `[${chunk.embedding.join(",")}]` : null;
|
|
240
|
+
await client.query(
|
|
241
|
+
`
|
|
242
|
+
INSERT INTO ${this.fullTableName} (id, document_id, text, embedding, metadata, created_at)
|
|
243
|
+
VALUES ($1, $2, $3, $4::vector, $5, $6)
|
|
244
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
245
|
+
text = EXCLUDED.text,
|
|
246
|
+
embedding = EXCLUDED.embedding,
|
|
247
|
+
metadata = EXCLUDED.metadata
|
|
248
|
+
`,
|
|
249
|
+
[
|
|
250
|
+
chunk.id,
|
|
251
|
+
chunk.documentId,
|
|
252
|
+
chunk.text,
|
|
253
|
+
embedding,
|
|
254
|
+
JSON.stringify(chunk.metadata),
|
|
255
|
+
chunk.createdAt
|
|
256
|
+
]
|
|
257
|
+
);
|
|
258
|
+
}
|
|
259
|
+
await client.query("COMMIT");
|
|
260
|
+
} catch (error) {
|
|
261
|
+
await client.query("ROLLBACK");
|
|
262
|
+
throw error;
|
|
263
|
+
} finally {
|
|
264
|
+
client.release();
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
async denseSearch(embedding, topK, filter) {
|
|
268
|
+
const embeddingStr = `[${embedding.join(",")}]`;
|
|
269
|
+
let query = `
|
|
270
|
+
SELECT
|
|
271
|
+
id,
|
|
272
|
+
document_id,
|
|
273
|
+
text,
|
|
274
|
+
metadata,
|
|
275
|
+
created_at,
|
|
276
|
+
1 - (embedding <=> $1::vector) as score
|
|
277
|
+
FROM ${this.fullTableName}
|
|
278
|
+
WHERE embedding IS NOT NULL
|
|
279
|
+
`;
|
|
280
|
+
const params = [embeddingStr];
|
|
281
|
+
let paramIndex = 2;
|
|
282
|
+
if (filter?.documentId) {
|
|
283
|
+
query += ` AND document_id = $${paramIndex}`;
|
|
284
|
+
params.push(filter.documentId);
|
|
285
|
+
paramIndex++;
|
|
286
|
+
}
|
|
287
|
+
if (filter?.metadata) {
|
|
288
|
+
query += ` AND metadata @> $${paramIndex}`;
|
|
289
|
+
params.push(JSON.stringify(filter.metadata));
|
|
290
|
+
paramIndex++;
|
|
291
|
+
}
|
|
292
|
+
query += ` ORDER BY embedding <=> $1::vector LIMIT $${paramIndex}`;
|
|
293
|
+
params.push(topK);
|
|
294
|
+
const result = await this.pool.query(query, params);
|
|
295
|
+
return result.rows.map((row) => ({
|
|
296
|
+
chunk: this.rowToChunk(row),
|
|
297
|
+
score: row.score,
|
|
298
|
+
searchType: "dense"
|
|
299
|
+
}));
|
|
300
|
+
}
|
|
301
|
+
async sparseSearch(query, topK, filter) {
|
|
302
|
+
let sql = `
|
|
303
|
+
SELECT
|
|
304
|
+
id,
|
|
305
|
+
document_id,
|
|
306
|
+
text,
|
|
307
|
+
metadata,
|
|
308
|
+
created_at,
|
|
309
|
+
similarity(text, $1) as score
|
|
310
|
+
FROM ${this.fullTableName}
|
|
311
|
+
WHERE text % $1
|
|
312
|
+
`;
|
|
313
|
+
const params = [query];
|
|
314
|
+
let paramIndex = 2;
|
|
315
|
+
if (filter?.documentId) {
|
|
316
|
+
sql += ` AND document_id = $${paramIndex}`;
|
|
317
|
+
params.push(filter.documentId);
|
|
318
|
+
paramIndex++;
|
|
319
|
+
}
|
|
320
|
+
sql += ` ORDER BY score DESC LIMIT $${paramIndex}`;
|
|
321
|
+
params.push(topK);
|
|
322
|
+
const result = await this.pool.query(sql, params);
|
|
323
|
+
return result.rows.map((row) => ({
|
|
324
|
+
chunk: this.rowToChunk(row),
|
|
325
|
+
score: row.score,
|
|
326
|
+
searchType: "sparse"
|
|
327
|
+
}));
|
|
328
|
+
}
|
|
329
|
+
async deleteByDocumentId(documentId) {
|
|
330
|
+
const result = await this.pool.query(
|
|
331
|
+
`DELETE FROM ${this.fullTableName} WHERE document_id = $1`,
|
|
332
|
+
[documentId]
|
|
333
|
+
);
|
|
334
|
+
return result.rowCount || 0;
|
|
335
|
+
}
|
|
336
|
+
async getById(id) {
|
|
337
|
+
const result = await this.pool.query(
|
|
338
|
+
`SELECT * FROM ${this.fullTableName} WHERE id = $1`,
|
|
339
|
+
[id]
|
|
340
|
+
);
|
|
341
|
+
return result.rows.length > 0 ? this.rowToChunk(result.rows[0]) : null;
|
|
342
|
+
}
|
|
343
|
+
async close() {
|
|
344
|
+
await this.pool.end();
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Get chunk and document counts
|
|
348
|
+
*/
|
|
349
|
+
async getStats() {
|
|
350
|
+
const chunksResult = await this.pool.query(
|
|
351
|
+
`SELECT COUNT(*) FROM ${this.fullTableName}`
|
|
352
|
+
);
|
|
353
|
+
const docsResult = await this.pool.query(
|
|
354
|
+
`SELECT COUNT(*) FROM ${this.schema}.documents`
|
|
355
|
+
);
|
|
356
|
+
return {
|
|
357
|
+
chunks: parseInt(chunksResult.rows[0].count, 10),
|
|
358
|
+
documents: parseInt(docsResult.rows[0].count, 10)
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Insert or update a document
|
|
363
|
+
*/
|
|
364
|
+
async upsertDocument(doc) {
|
|
365
|
+
await this.pool.query(
|
|
366
|
+
`
|
|
367
|
+
INSERT INTO ${this.schema}.documents (id, source_id, path, title, metadata)
|
|
368
|
+
VALUES ($1, $2, $3, $4, $5)
|
|
369
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
370
|
+
path = EXCLUDED.path,
|
|
371
|
+
title = EXCLUDED.title,
|
|
372
|
+
metadata = EXCLUDED.metadata,
|
|
373
|
+
updated_at = NOW()
|
|
374
|
+
`,
|
|
375
|
+
[doc.id, doc.sourceId, doc.path, doc.title, JSON.stringify(doc.metadata || {})]
|
|
376
|
+
);
|
|
377
|
+
}
|
|
378
|
+
rowToChunk(row) {
|
|
379
|
+
return {
|
|
380
|
+
id: row.id,
|
|
381
|
+
documentId: row.document_id,
|
|
382
|
+
text: row.text,
|
|
383
|
+
metadata: row.metadata,
|
|
384
|
+
createdAt: new Date(row.created_at)
|
|
385
|
+
};
|
|
386
|
+
}
|
|
387
|
+
};
|
|
388
|
+
|
|
389
|
+
// src/vectorstore/index.ts
|
|
390
|
+
function createVectorStore(options) {
|
|
391
|
+
switch (options.type) {
|
|
392
|
+
case "sqlite":
|
|
393
|
+
return new SQLiteVectorStore({
|
|
394
|
+
path: options.connectionString || ":memory:",
|
|
395
|
+
tableName: options.tableName,
|
|
396
|
+
dimensions: options.dimensions
|
|
397
|
+
});
|
|
398
|
+
case "postgres":
|
|
399
|
+
if (!options.connectionString) {
|
|
400
|
+
throw new Error("PostgreSQL requires a connection string");
|
|
401
|
+
}
|
|
402
|
+
return new PostgresVectorStore({
|
|
403
|
+
connectionString: options.connectionString,
|
|
404
|
+
tableName: options.tableName,
|
|
405
|
+
dimensions: options.dimensions
|
|
406
|
+
});
|
|
407
|
+
case "memory":
|
|
408
|
+
return new SQLiteVectorStore({
|
|
409
|
+
path: ":memory:",
|
|
410
|
+
tableName: options.tableName,
|
|
411
|
+
dimensions: options.dimensions
|
|
412
|
+
});
|
|
413
|
+
default:
|
|
414
|
+
throw new Error(`Unknown vector store type: ${options.type}`);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
export {
|
|
418
|
+
PostgresVectorStore,
|
|
419
|
+
SQLiteVectorStore,
|
|
420
|
+
createVectorStore
|
|
421
|
+
};
|
|
422
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/vectorstore/sqlite.ts","../../src/vectorstore/postgres.ts","../../src/vectorstore/index.ts"],"sourcesContent":["/**\n * SQLite Vector Store\n *\n * Simple vector store using SQLite with better-sqlite3.\n * Stores embeddings as JSON arrays and performs similarity search in JavaScript.\n * Best for development, small datasets, or when PostgreSQL isn't available.\n */\n\nimport Database from 'better-sqlite3';\nimport type { VectorStore, DocumentChunk, SearchResult } from '../types.js';\n\nexport interface SQLiteVectorStoreOptions {\n /** Database file path (use ':memory:' for in-memory) */\n path?: string;\n /** Table name (default: 'chunks') */\n tableName?: string;\n /** Embedding dimensions */\n dimensions: number;\n}\n\nexport class SQLiteVectorStore implements VectorStore {\n private db: Database.Database;\n private tableName: string;\n private dimensions: number;\n\n constructor(options: SQLiteVectorStoreOptions) {\n this.db = new Database(options.path || ':memory:');\n this.tableName = options.tableName || 'chunks';\n this.dimensions = options.dimensions;\n\n // Enable WAL mode for better concurrent access\n this.db.pragma('journal_mode = WAL');\n }\n\n async initialize(): Promise<void> {\n // Create chunks table\n this.db.exec(`\n CREATE TABLE IF NOT EXISTS ${this.tableName} (\n id TEXT PRIMARY KEY,\n document_id TEXT NOT NULL,\n text TEXT NOT NULL,\n embedding TEXT,\n metadata TEXT,\n created_at TEXT DEFAULT CURRENT_TIMESTAMP\n )\n `);\n\n // Create indexes\n this.db.exec(`\n CREATE INDEX IF NOT EXISTS idx_${this.tableName}_document_id\n ON ${this.tableName}(document_id)\n `);\n\n // Create FTS5 virtual table for text search\n this.db.exec(`\n CREATE VIRTUAL TABLE IF NOT EXISTS ${this.tableName}_fts\n USING fts5(id, text, content='${this.tableName}', content_rowid='rowid')\n `);\n\n // Create triggers to keep FTS in sync\n this.db.exec(`\n CREATE TRIGGER IF NOT EXISTS ${this.tableName}_ai AFTER INSERT ON ${this.tableName} BEGIN\n INSERT INTO ${this.tableName}_fts(rowid, id, text) VALUES (new.rowid, new.id, new.text);\n END\n `);\n\n this.db.exec(`\n CREATE TRIGGER IF NOT EXISTS ${this.tableName}_ad AFTER DELETE ON ${this.tableName} BEGIN\n INSERT INTO ${this.tableName}_fts(${this.tableName}_fts, rowid, id, text)\n VALUES('delete', old.rowid, old.id, old.text);\n END\n `);\n }\n\n async insert(chunks: DocumentChunk[]): Promise<void> {\n const stmt = this.db.prepare(`\n INSERT OR REPLACE INTO ${this.tableName} (id, document_id, text, embedding, metadata, created_at)\n VALUES (?, ?, ?, ?, ?, ?)\n `);\n\n const insertMany = this.db.transaction((items: DocumentChunk[]) => {\n for (const chunk of items) {\n stmt.run(\n chunk.id,\n chunk.documentId,\n chunk.text,\n chunk.embedding ? JSON.stringify(chunk.embedding) : null,\n JSON.stringify(chunk.metadata),\n chunk.createdAt.toISOString()\n );\n }\n });\n\n insertMany(chunks);\n }\n\n async denseSearch(\n embedding: number[],\n topK: number,\n filter?: Record<string, unknown>\n ): Promise<SearchResult[]> {\n // Get all chunks with embeddings\n let query = `SELECT * FROM ${this.tableName} WHERE embedding IS NOT NULL`;\n const params: unknown[] = [];\n\n // Apply filters\n if (filter?.documentId) {\n query += ` AND document_id = ?`;\n params.push(filter.documentId);\n }\n\n const rows = this.db.prepare(query).all(...params) as Array<{\n id: string;\n document_id: string;\n text: string;\n embedding: string;\n metadata: string;\n created_at: string;\n }>;\n\n // Calculate cosine similarity for each chunk\n const results: Array<{ chunk: DocumentChunk; score: number }> = [];\n\n for (const row of rows) {\n const chunkEmbedding = JSON.parse(row.embedding) as number[];\n const score = this.cosineSimilarity(embedding, chunkEmbedding);\n\n results.push({\n chunk: this.rowToChunk(row),\n score,\n });\n }\n\n // Sort by score and take top K\n results.sort((a, b) => b.score - a.score);\n\n return results.slice(0, topK).map((r) => ({\n ...r,\n searchType: 'dense' as const,\n }));\n }\n\n async sparseSearch(\n query: string,\n topK: number,\n filter?: Record<string, unknown>\n ): Promise<SearchResult[]> {\n // Use FTS5 for text search\n let ftsQuery = `\n SELECT c.*, bm25(${this.tableName}_fts) as score\n FROM ${this.tableName}_fts fts\n JOIN ${this.tableName} c ON fts.id = c.id\n WHERE ${this.tableName}_fts MATCH ?\n `;\n const params: unknown[] = [this.escapeFTSQuery(query)];\n\n // Apply filters\n if (filter?.documentId) {\n ftsQuery += ` AND c.document_id = ?`;\n params.push(filter.documentId);\n }\n\n ftsQuery += ` ORDER BY score LIMIT ?`;\n params.push(topK);\n\n try {\n const rows = this.db.prepare(ftsQuery).all(...params) as Array<{\n id: string;\n document_id: string;\n text: string;\n embedding: string | null;\n metadata: string;\n created_at: string;\n score: number;\n }>;\n\n return rows.map((row) => ({\n chunk: this.rowToChunk(row),\n score: Math.abs(row.score), // BM25 returns negative scores\n searchType: 'sparse' as const,\n }));\n } catch {\n // FTS query failed, return empty results\n return [];\n }\n }\n\n async deleteByDocumentId(documentId: string): Promise<number> {\n const result = this.db\n .prepare(`DELETE FROM ${this.tableName} WHERE document_id = ?`)\n .run(documentId);\n return result.changes;\n }\n\n async getById(id: string): Promise<DocumentChunk | null> {\n const row = this.db\n .prepare(`SELECT * FROM ${this.tableName} WHERE id = ?`)\n .get(id) as {\n id: string;\n document_id: string;\n text: string;\n embedding: string | null;\n metadata: string;\n created_at: string;\n } | undefined;\n\n return row ? this.rowToChunk(row) : null;\n }\n\n async close(): Promise<void> {\n this.db.close();\n }\n\n /**\n * Calculate cosine similarity between two vectors\n */\n private cosineSimilarity(a: number[], b: number[]): number {\n if (a.length !== b.length) {\n throw new Error('Vectors must have same length');\n }\n\n let dotProduct = 0;\n let normA = 0;\n let normB = 0;\n\n for (let i = 0; i < a.length; i++) {\n dotProduct += a[i] * b[i];\n normA += a[i] * a[i];\n normB += b[i] * b[i];\n }\n\n const denominator = Math.sqrt(normA) * Math.sqrt(normB);\n return denominator === 0 ? 0 : dotProduct / denominator;\n }\n\n /**\n * Convert database row to DocumentChunk\n */\n private rowToChunk(row: {\n id: string;\n document_id: string;\n text: string;\n embedding: string | null;\n metadata: string;\n created_at: string;\n }): DocumentChunk {\n return {\n id: row.id,\n documentId: row.document_id,\n text: row.text,\n embedding: row.embedding ? JSON.parse(row.embedding) : undefined,\n metadata: JSON.parse(row.metadata),\n createdAt: new Date(row.created_at),\n };\n }\n\n /**\n * Escape FTS query to prevent syntax errors\n */\n private escapeFTSQuery(query: string): string {\n // Remove special FTS characters and wrap in quotes for phrase search\n const cleaned = query.replace(/['\"(){}[\\]^~*?:\\\\]/g, ' ').trim();\n return cleaned\n .split(/\\s+/)\n .filter((w) => w.length > 0)\n .map((w) => `\"${w}\"`)\n .join(' OR ');\n }\n}\n","/**\n * PostgreSQL Vector Store with pgvector\n *\n * Production-ready vector store using PostgreSQL with pgvector extension.\n * Supports HNSW indexing for fast similarity search.\n */\n\nimport { Pool, type PoolConfig } from 'pg';\nimport type { VectorStore, DocumentChunk, SearchResult } from '../types.js';\n\nexport interface PostgresVectorStoreOptions {\n /** Connection string or pool config */\n connectionString?: string;\n poolConfig?: PoolConfig;\n /** Table name (default: 'chunks') */\n tableName?: string;\n /** Embedding dimensions */\n dimensions: number;\n /** Schema name (default: 'public') */\n schema?: string;\n}\n\nexport class PostgresVectorStore implements VectorStore {\n private pool: Pool;\n private tableName: string;\n private schema: string;\n private dimensions: number;\n private fullTableName: string;\n\n constructor(options: PostgresVectorStoreOptions) {\n const poolConfig: PoolConfig = options.poolConfig || {\n connectionString: options.connectionString,\n max: 10,\n idleTimeoutMillis: 30000,\n };\n\n this.pool = new Pool(poolConfig);\n this.tableName = options.tableName || 'chunks';\n this.schema = options.schema || 'public';\n this.dimensions = options.dimensions;\n this.fullTableName = `${this.schema}.${this.tableName}`;\n }\n\n async initialize(): Promise<void> {\n const client = await this.pool.connect();\n\n try {\n // Enable required extensions\n await client.query('CREATE EXTENSION IF NOT EXISTS vector');\n await client.query('CREATE EXTENSION IF NOT EXISTS pg_trgm');\n\n // Create documents table\n await client.query(`\n CREATE TABLE IF NOT EXISTS ${this.schema}.documents (\n id TEXT PRIMARY KEY,\n source_id TEXT NOT NULL,\n path TEXT NOT NULL,\n title TEXT NOT NULL,\n metadata JSONB DEFAULT '{}',\n created_at TIMESTAMPTZ DEFAULT NOW(),\n updated_at TIMESTAMPTZ DEFAULT NOW()\n )\n `);\n\n // Create chunks table\n await client.query(`\n CREATE TABLE IF NOT EXISTS ${this.fullTableName} (\n id TEXT PRIMARY KEY,\n document_id TEXT NOT NULL REFERENCES ${this.schema}.documents(id) ON DELETE CASCADE,\n text TEXT NOT NULL,\n embedding vector(${this.dimensions}),\n metadata JSONB DEFAULT '{}',\n created_at TIMESTAMPTZ DEFAULT NOW()\n )\n `);\n\n // Create indexes\n await client.query(`\n CREATE INDEX IF NOT EXISTS idx_${this.tableName}_document_id\n ON ${this.fullTableName}(document_id)\n `);\n\n // HNSW index for vector similarity (better than IVFFlat for most cases)\n await client.query(`\n CREATE INDEX IF NOT EXISTS idx_${this.tableName}_embedding_hnsw\n ON ${this.fullTableName}\n USING hnsw (embedding vector_cosine_ops)\n WITH (m = 16, ef_construction = 64)\n `);\n\n // GIN index for text search\n await client.query(`\n CREATE INDEX IF NOT EXISTS idx_${this.tableName}_text_trgm\n ON ${this.fullTableName}\n USING gin (text gin_trgm_ops)\n `);\n\n // GIN index for metadata\n await client.query(`\n CREATE INDEX IF NOT EXISTS idx_${this.tableName}_metadata\n ON ${this.fullTableName}\n USING gin (metadata)\n `);\n } finally {\n client.release();\n }\n }\n\n async insert(chunks: DocumentChunk[]): Promise<void> {\n if (chunks.length === 0) return;\n\n const client = await this.pool.connect();\n\n try {\n await client.query('BEGIN');\n\n // Use COPY for bulk insert (much faster than individual inserts)\n for (const chunk of chunks) {\n const embedding = chunk.embedding\n ? `[${chunk.embedding.join(',')}]`\n : null;\n\n await client.query(\n `\n INSERT INTO ${this.fullTableName} (id, document_id, text, embedding, metadata, created_at)\n VALUES ($1, $2, $3, $4::vector, $5, $6)\n ON CONFLICT (id) DO UPDATE SET\n text = EXCLUDED.text,\n embedding = EXCLUDED.embedding,\n metadata = EXCLUDED.metadata\n `,\n [\n chunk.id,\n chunk.documentId,\n chunk.text,\n embedding,\n JSON.stringify(chunk.metadata),\n chunk.createdAt,\n ]\n );\n }\n\n await client.query('COMMIT');\n } catch (error) {\n await client.query('ROLLBACK');\n throw error;\n } finally {\n client.release();\n }\n }\n\n async denseSearch(\n embedding: number[],\n topK: number,\n filter?: Record<string, unknown>\n ): Promise<SearchResult[]> {\n const embeddingStr = `[${embedding.join(',')}]`;\n\n let query = `\n SELECT\n id,\n document_id,\n text,\n metadata,\n created_at,\n 1 - (embedding <=> $1::vector) as score\n FROM ${this.fullTableName}\n WHERE embedding IS NOT NULL\n `;\n const params: unknown[] = [embeddingStr];\n let paramIndex = 2;\n\n // Apply filters\n if (filter?.documentId) {\n query += ` AND document_id = $${paramIndex}`;\n params.push(filter.documentId);\n paramIndex++;\n }\n\n if (filter?.metadata) {\n query += ` AND metadata @> $${paramIndex}`;\n params.push(JSON.stringify(filter.metadata));\n paramIndex++;\n }\n\n query += ` ORDER BY embedding <=> $1::vector LIMIT $${paramIndex}`;\n params.push(topK);\n\n const result = await this.pool.query(query, params);\n\n return result.rows.map((row) => ({\n chunk: this.rowToChunk(row),\n score: row.score,\n searchType: 'dense' as const,\n }));\n }\n\n async sparseSearch(\n query: string,\n topK: number,\n filter?: Record<string, unknown>\n ): Promise<SearchResult[]> {\n // Use trigram similarity for BM25-like text search\n let sql = `\n SELECT\n id,\n document_id,\n text,\n metadata,\n created_at,\n similarity(text, $1) as score\n FROM ${this.fullTableName}\n WHERE text % $1\n `;\n const params: unknown[] = [query];\n let paramIndex = 2;\n\n // Apply filters\n if (filter?.documentId) {\n sql += ` AND document_id = $${paramIndex}`;\n params.push(filter.documentId);\n paramIndex++;\n }\n\n sql += ` ORDER BY score DESC LIMIT $${paramIndex}`;\n params.push(topK);\n\n const result = await this.pool.query(sql, params);\n\n return result.rows.map((row) => ({\n chunk: this.rowToChunk(row),\n score: row.score,\n searchType: 'sparse' as const,\n }));\n }\n\n async deleteByDocumentId(documentId: string): Promise<number> {\n const result = await this.pool.query(\n `DELETE FROM ${this.fullTableName} WHERE document_id = $1`,\n [documentId]\n );\n return result.rowCount || 0;\n }\n\n async getById(id: string): Promise<DocumentChunk | null> {\n const result = await this.pool.query(\n `SELECT * FROM ${this.fullTableName} WHERE id = $1`,\n [id]\n );\n return result.rows.length > 0 ? this.rowToChunk(result.rows[0]) : null;\n }\n\n async close(): Promise<void> {\n await this.pool.end();\n }\n\n /**\n * Get chunk and document counts\n */\n async getStats(): Promise<{ chunks: number; documents: number }> {\n const chunksResult = await this.pool.query(\n `SELECT COUNT(*) FROM ${this.fullTableName}`\n );\n const docsResult = await this.pool.query(\n `SELECT COUNT(*) FROM ${this.schema}.documents`\n );\n\n return {\n chunks: parseInt(chunksResult.rows[0].count, 10),\n documents: parseInt(docsResult.rows[0].count, 10),\n };\n }\n\n /**\n * Insert or update a document\n */\n async upsertDocument(doc: {\n id: string;\n sourceId: string;\n path: string;\n title: string;\n metadata?: Record<string, unknown>;\n }): Promise<void> {\n await this.pool.query(\n `\n INSERT INTO ${this.schema}.documents (id, source_id, path, title, metadata)\n VALUES ($1, $2, $3, $4, $5)\n ON CONFLICT (id) DO UPDATE SET\n path = EXCLUDED.path,\n title = EXCLUDED.title,\n metadata = EXCLUDED.metadata,\n updated_at = NOW()\n `,\n [doc.id, doc.sourceId, doc.path, doc.title, JSON.stringify(doc.metadata || {})]\n );\n }\n\n private rowToChunk(row: {\n id: string;\n document_id: string;\n text: string;\n embedding?: string;\n metadata: Record<string, unknown>;\n created_at: Date;\n }): DocumentChunk {\n return {\n id: row.id,\n documentId: row.document_id,\n text: row.text,\n metadata: row.metadata as DocumentChunk['metadata'],\n createdAt: new Date(row.created_at),\n };\n }\n}\n","/**\n * Vector Store Implementations\n *\n * Store and search document chunks using vector similarity.\n */\n\nexport { SQLiteVectorStore, type SQLiteVectorStoreOptions } from './sqlite.js';\nexport { PostgresVectorStore, type PostgresVectorStoreOptions } from './postgres.js';\n\nimport type { VectorStore, VectorStoreOptions } from '../types.js';\nimport { SQLiteVectorStore } from './sqlite.js';\nimport { PostgresVectorStore } from './postgres.js';\n\n/**\n * Create a vector store based on options\n */\nexport function createVectorStore(options: VectorStoreOptions): VectorStore {\n switch (options.type) {\n case 'sqlite':\n return new SQLiteVectorStore({\n path: options.connectionString || ':memory:',\n tableName: options.tableName,\n dimensions: options.dimensions,\n });\n\n case 'postgres':\n if (!options.connectionString) {\n throw new Error('PostgreSQL requires a connection string');\n }\n return new PostgresVectorStore({\n connectionString: options.connectionString,\n tableName: options.tableName,\n dimensions: options.dimensions,\n });\n\n case 'memory':\n // Use SQLite in-memory mode\n return new SQLiteVectorStore({\n path: ':memory:',\n tableName: options.tableName,\n dimensions: options.dimensions,\n });\n\n default:\n throw new Error(`Unknown vector store type: ${options.type}`);\n }\n}\n"],"mappings":";AAQA,OAAO,cAAc;AAYd,IAAM,oBAAN,MAA+C;AAAA,EAC5C;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,SAAmC;AAC7C,SAAK,KAAK,IAAI,SAAS,QAAQ,QAAQ,UAAU;AACjD,SAAK,YAAY,QAAQ,aAAa;AACtC,SAAK,aAAa,QAAQ;AAG1B,SAAK,GAAG,OAAO,oBAAoB;AAAA,EACrC;AAAA,EAEA,MAAM,aAA4B;AAEhC,SAAK,GAAG,KAAK;AAAA,mCACkB,KAAK,SAAS;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,KAQ5C;AAGD,SAAK,GAAG,KAAK;AAAA,uCACsB,KAAK,SAAS;AAAA,WAC1C,KAAK,SAAS;AAAA,KACpB;AAGD,SAAK,GAAG,KAAK;AAAA,2CAC0B,KAAK,SAAS;AAAA,sCACnB,KAAK,SAAS;AAAA,KAC/C;AAGD,SAAK,GAAG,KAAK;AAAA,qCACoB,KAAK,SAAS,uBAAuB,KAAK,SAAS;AAAA,sBAClE,KAAK,SAAS;AAAA;AAAA,KAE/B;AAED,SAAK,GAAG,KAAK;AAAA,qCACoB,KAAK,SAAS,uBAAuB,KAAK,SAAS;AAAA,sBAClE,KAAK,SAAS,QAAQ,KAAK,SAAS;AAAA;AAAA;AAAA,KAGrD;AAAA,EACH;AAAA,EAEA,MAAM,OAAO,QAAwC;AACnD,UAAM,OAAO,KAAK,GAAG,QAAQ;AAAA,+BACF,KAAK,SAAS;AAAA;AAAA,KAExC;AAED,UAAM,aAAa,KAAK,GAAG,YAAY,CAAC,UAA2B;AACjE,iBAAW,SAAS,OAAO;AACzB,aAAK;AAAA,UACH,MAAM;AAAA,UACN,MAAM;AAAA,UACN,MAAM;AAAA,UACN,MAAM,YAAY,KAAK,UAAU,MAAM,SAAS,IAAI;AAAA,UACpD,KAAK,UAAU,MAAM,QAAQ;AAAA,UAC7B,MAAM,UAAU,YAAY;AAAA,QAC9B;AAAA,MACF;AAAA,IACF,CAAC;AAED,eAAW,MAAM;AAAA,EACnB;AAAA,EAEA,MAAM,YACJ,WACA,MACA,QACyB;AAEzB,QAAI,QAAQ,iBAAiB,KAAK,SAAS;AAC3C,UAAM,SAAoB,CAAC;AAG3B,QAAI,QAAQ,YAAY;AACtB,eAAS;AACT,aAAO,KAAK,OAAO,UAAU;AAAA,IAC/B;AAEA,UAAM,OAAO,KAAK,GAAG,QAAQ,KAAK,EAAE,IAAI,GAAG,MAAM;AAUjD,UAAM,UAA0D,CAAC;AAEjE,eAAW,OAAO,MAAM;AACtB,YAAM,iBAAiB,KAAK,MAAM,IAAI,SAAS;AAC/C,YAAM,QAAQ,KAAK,iBAAiB,WAAW,cAAc;AAE7D,cAAQ,KAAK;AAAA,QACX,OAAO,KAAK,WAAW,GAAG;AAAA,QAC1B;AAAA,MACF,CAAC;AAAA,IACH;AAGA,YAAQ,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAExC,WAAO,QAAQ,MAAM,GAAG,IAAI,EAAE,IAAI,CAAC,OAAO;AAAA,MACxC,GAAG;AAAA,MACH,YAAY;AAAA,IACd,EAAE;AAAA,EACJ;AAAA,EAEA,MAAM,aACJ,OACA,MACA,QACyB;AAEzB,QAAI,WAAW;AAAA,yBACM,KAAK,SAAS;AAAA,aAC1B,KAAK,SAAS;AAAA,aACd,KAAK,SAAS;AAAA,cACb,KAAK,SAAS;AAAA;AAExB,UAAM,SAAoB,CAAC,KAAK,eAAe,KAAK,CAAC;AAGrD,QAAI,QAAQ,YAAY;AACtB,kBAAY;AACZ,aAAO,KAAK,OAAO,UAAU;AAAA,IAC/B;AAEA,gBAAY;AACZ,WAAO,KAAK,IAAI;AAEhB,QAAI;AACF,YAAM,OAAO,KAAK,GAAG,QAAQ,QAAQ,EAAE,IAAI,GAAG,MAAM;AAUpD,aAAO,KAAK,IAAI,CAAC,SAAS;AAAA,QACxB,OAAO,KAAK,WAAW,GAAG;AAAA,QAC1B,OAAO,KAAK,IAAI,IAAI,KAAK;AAAA;AAAA,QACzB,YAAY;AAAA,MACd,EAAE;AAAA,IACJ,QAAQ;AAEN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AAAA,EAEA,MAAM,mBAAmB,YAAqC;AAC5D,UAAM,SAAS,KAAK,GACjB,QAAQ,eAAe,KAAK,SAAS,wBAAwB,EAC7D,IAAI,UAAU;AACjB,WAAO,OAAO;AAAA,EAChB;AAAA,EAEA,MAAM,QAAQ,IAA2C;AACvD,UAAM,MAAM,KAAK,GACd,QAAQ,iBAAiB,KAAK,SAAS,eAAe,EACtD,IAAI,EAAE;AAST,WAAO,MAAM,KAAK,WAAW,GAAG,IAAI;AAAA,EACtC;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,GAAG,MAAM;AAAA,EAChB;AAAA;AAAA;AAAA;AAAA,EAKQ,iBAAiB,GAAa,GAAqB;AACzD,QAAI,EAAE,WAAW,EAAE,QAAQ;AACzB,YAAM,IAAI,MAAM,+BAA+B;AAAA,IACjD;AAEA,QAAI,aAAa;AACjB,QAAI,QAAQ;AACZ,QAAI,QAAQ;AAEZ,aAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,oBAAc,EAAE,CAAC,IAAI,EAAE,CAAC;AACxB,eAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AACnB,eAAS,EAAE,CAAC,IAAI,EAAE,CAAC;AAAA,IACrB;AAEA,UAAM,cAAc,KAAK,KAAK,KAAK,IAAI,KAAK,KAAK,KAAK;AACtD,WAAO,gBAAgB,IAAI,IAAI,aAAa;AAAA,EAC9C;AAAA;AAAA;AAAA;AAAA,EAKQ,WAAW,KAOD;AAChB,WAAO;AAAA,MACL,IAAI,IAAI;AAAA,MACR,YAAY,IAAI;AAAA,MAChB,MAAM,IAAI;AAAA,MACV,WAAW,IAAI,YAAY,KAAK,MAAM,IAAI,SAAS,IAAI;AAAA,MACvD,UAAU,KAAK,MAAM,IAAI,QAAQ;AAAA,MACjC,WAAW,IAAI,KAAK,IAAI,UAAU;AAAA,IACpC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKQ,eAAe,OAAuB;AAE5C,UAAM,UAAU,MAAM,QAAQ,uBAAuB,GAAG,EAAE,KAAK;AAC/D,WAAO,QACJ,MAAM,KAAK,EACX,OAAO,CAAC,MAAM,EAAE,SAAS,CAAC,EAC1B,IAAI,CAAC,MAAM,IAAI,CAAC,GAAG,EACnB,KAAK,MAAM;AAAA,EAChB;AACF;;;ACrQA,SAAS,YAA6B;AAe/B,IAAM,sBAAN,MAAiD;AAAA,EAC9C;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,SAAqC;AAC/C,UAAM,aAAyB,QAAQ,cAAc;AAAA,MACnD,kBAAkB,QAAQ;AAAA,MAC1B,KAAK;AAAA,MACL,mBAAmB;AAAA,IACrB;AAEA,SAAK,OAAO,IAAI,KAAK,UAAU;AAC/B,SAAK,YAAY,QAAQ,aAAa;AACtC,SAAK,SAAS,QAAQ,UAAU;AAChC,SAAK,aAAa,QAAQ;AAC1B,SAAK,gBAAgB,GAAG,KAAK,MAAM,IAAI,KAAK,SAAS;AAAA,EACvD;AAAA,EAEA,MAAM,aAA4B;AAChC,UAAM,SAAS,MAAM,KAAK,KAAK,QAAQ;AAEvC,QAAI;AAEF,YAAM,OAAO,MAAM,uCAAuC;AAC1D,YAAM,OAAO,MAAM,wCAAwC;AAG3D,YAAM,OAAO,MAAM;AAAA,qCACY,KAAK,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,OASzC;AAGD,YAAM,OAAO,MAAM;AAAA,qCACY,KAAK,aAAa;AAAA;AAAA,iDAEN,KAAK,MAAM;AAAA;AAAA,6BAE/B,KAAK,UAAU;AAAA;AAAA;AAAA;AAAA,OAIrC;AAGD,YAAM,OAAO,MAAM;AAAA,yCACgB,KAAK,SAAS;AAAA,aAC1C,KAAK,aAAa;AAAA,OACxB;AAGD,YAAM,OAAO,MAAM;AAAA,yCACgB,KAAK,SAAS;AAAA,aAC1C,KAAK,aAAa;AAAA;AAAA;AAAA,OAGxB;AAGD,YAAM,OAAO,MAAM;AAAA,yCACgB,KAAK,SAAS;AAAA,aAC1C,KAAK,aAAa;AAAA;AAAA,OAExB;AAGD,YAAM,OAAO,MAAM;AAAA,yCACgB,KAAK,SAAS;AAAA,aAC1C,KAAK,aAAa;AAAA;AAAA,OAExB;AAAA,IACH,UAAE;AACA,aAAO,QAAQ;AAAA,IACjB;AAAA,EACF;AAAA,EAEA,MAAM,OAAO,QAAwC;AACnD,QAAI,OAAO,WAAW,EAAG;AAEzB,UAAM,SAAS,MAAM,KAAK,KAAK,QAAQ;AAEvC,QAAI;AACF,YAAM,OAAO,MAAM,OAAO;AAG1B,iBAAW,SAAS,QAAQ;AAC1B,cAAM,YAAY,MAAM,YACpB,IAAI,MAAM,UAAU,KAAK,GAAG,CAAC,MAC7B;AAEJ,cAAM,OAAO;AAAA,UACX;AAAA,wBACc,KAAK,aAAa;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,UAOhC;AAAA,YACE,MAAM;AAAA,YACN,MAAM;AAAA,YACN,MAAM;AAAA,YACN;AAAA,YACA,KAAK,UAAU,MAAM,QAAQ;AAAA,YAC7B,MAAM;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAEA,YAAM,OAAO,MAAM,QAAQ;AAAA,IAC7B,SAAS,OAAO;AACd,YAAM,OAAO,MAAM,UAAU;AAC7B,YAAM;AAAA,IACR,UAAE;AACA,aAAO,QAAQ;AAAA,IACjB;AAAA,EACF;AAAA,EAEA,MAAM,YACJ,WACA,MACA,QACyB;AACzB,UAAM,eAAe,IAAI,UAAU,KAAK,GAAG,CAAC;AAE5C,QAAI,QAAQ;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aAQH,KAAK,aAAa;AAAA;AAAA;AAG3B,UAAM,SAAoB,CAAC,YAAY;AACvC,QAAI,aAAa;AAGjB,QAAI,QAAQ,YAAY;AACtB,eAAS,uBAAuB,UAAU;AAC1C,aAAO,KAAK,OAAO,UAAU;AAC7B;AAAA,IACF;AAEA,QAAI,QAAQ,UAAU;AACpB,eAAS,qBAAqB,UAAU;AACxC,aAAO,KAAK,KAAK,UAAU,OAAO,QAAQ,CAAC;AAC3C;AAAA,IACF;AAEA,aAAS,6CAA6C,UAAU;AAChE,WAAO,KAAK,IAAI;AAEhB,UAAM,SAAS,MAAM,KAAK,KAAK,MAAM,OAAO,MAAM;AAElD,WAAO,OAAO,KAAK,IAAI,CAAC,SAAS;AAAA,MAC/B,OAAO,KAAK,WAAW,GAAG;AAAA,MAC1B,OAAO,IAAI;AAAA,MACX,YAAY;AAAA,IACd,EAAE;AAAA,EACJ;AAAA,EAEA,MAAM,aACJ,OACA,MACA,QACyB;AAEzB,QAAI,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,aAQD,KAAK,aAAa;AAAA;AAAA;AAG3B,UAAM,SAAoB,CAAC,KAAK;AAChC,QAAI,aAAa;AAGjB,QAAI,QAAQ,YAAY;AACtB,aAAO,uBAAuB,UAAU;AACxC,aAAO,KAAK,OAAO,UAAU;AAC7B;AAAA,IACF;AAEA,WAAO,+BAA+B,UAAU;AAChD,WAAO,KAAK,IAAI;AAEhB,UAAM,SAAS,MAAM,KAAK,KAAK,MAAM,KAAK,MAAM;AAEhD,WAAO,OAAO,KAAK,IAAI,CAAC,SAAS;AAAA,MAC/B,OAAO,KAAK,WAAW,GAAG;AAAA,MAC1B,OAAO,IAAI;AAAA,MACX,YAAY;AAAA,IACd,EAAE;AAAA,EACJ;AAAA,EAEA,MAAM,mBAAmB,YAAqC;AAC5D,UAAM,SAAS,MAAM,KAAK,KAAK;AAAA,MAC7B,eAAe,KAAK,aAAa;AAAA,MACjC,CAAC,UAAU;AAAA,IACb;AACA,WAAO,OAAO,YAAY;AAAA,EAC5B;AAAA,EAEA,MAAM,QAAQ,IAA2C;AACvD,UAAM,SAAS,MAAM,KAAK,KAAK;AAAA,MAC7B,iBAAiB,KAAK,aAAa;AAAA,MACnC,CAAC,EAAE;AAAA,IACL;AACA,WAAO,OAAO,KAAK,SAAS,IAAI,KAAK,WAAW,OAAO,KAAK,CAAC,CAAC,IAAI;AAAA,EACpE;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,KAAK,KAAK,IAAI;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,WAA2D;AAC/D,UAAM,eAAe,MAAM,KAAK,KAAK;AAAA,MACnC,wBAAwB,KAAK,aAAa;AAAA,IAC5C;AACA,UAAM,aAAa,MAAM,KAAK,KAAK;AAAA,MACjC,wBAAwB,KAAK,MAAM;AAAA,IACrC;AAEA,WAAO;AAAA,MACL,QAAQ,SAAS,aAAa,KAAK,CAAC,EAAE,OAAO,EAAE;AAAA,MAC/C,WAAW,SAAS,WAAW,KAAK,CAAC,EAAE,OAAO,EAAE;AAAA,IAClD;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAM,eAAe,KAMH;AAChB,UAAM,KAAK,KAAK;AAAA,MACd;AAAA,oBACc,KAAK,MAAM;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,MAQzB,CAAC,IAAI,IAAI,IAAI,UAAU,IAAI,MAAM,IAAI,OAAO,KAAK,UAAU,IAAI,YAAY,CAAC,CAAC,CAAC;AAAA,IAChF;AAAA,EACF;AAAA,EAEQ,WAAW,KAOD;AAChB,WAAO;AAAA,MACL,IAAI,IAAI;AAAA,MACR,YAAY,IAAI;AAAA,MAChB,MAAM,IAAI;AAAA,MACV,UAAU,IAAI;AAAA,MACd,WAAW,IAAI,KAAK,IAAI,UAAU;AAAA,IACpC;AAAA,EACF;AACF;;;ACzSO,SAAS,kBAAkB,SAA0C;AAC1E,UAAQ,QAAQ,MAAM;AAAA,IACpB,KAAK;AACH,aAAO,IAAI,kBAAkB;AAAA,QAC3B,MAAM,QAAQ,oBAAoB;AAAA,QAClC,WAAW,QAAQ;AAAA,QACnB,YAAY,QAAQ;AAAA,MACtB,CAAC;AAAA,IAEH,KAAK;AACH,UAAI,CAAC,QAAQ,kBAAkB;AAC7B,cAAM,IAAI,MAAM,yCAAyC;AAAA,MAC3D;AACA,aAAO,IAAI,oBAAoB;AAAA,QAC7B,kBAAkB,QAAQ;AAAA,QAC1B,WAAW,QAAQ;AAAA,QACnB,YAAY,QAAQ;AAAA,MACtB,CAAC;AAAA,IAEH,KAAK;AAEH,aAAO,IAAI,kBAAkB;AAAA,QAC3B,MAAM;AAAA,QACN,WAAW,QAAQ;AAAA,QACnB,YAAY,QAAQ;AAAA,MACtB,CAAC;AAAA,IAEH;AACE,YAAM,IAAI,MAAM,8BAA8B,QAAQ,IAAI,EAAE;AAAA,EAChE;AACF;","names":[]}
|
package/package.json
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@chatbot-packages/rag",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "RAG (Retrieval-Augmented Generation) system for documentation Q&A",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"import": "./dist/index.js"
|
|
13
|
+
},
|
|
14
|
+
"./extractors": {
|
|
15
|
+
"types": "./dist/extractors/index.d.ts",
|
|
16
|
+
"import": "./dist/extractors/index.js"
|
|
17
|
+
},
|
|
18
|
+
"./chunking": {
|
|
19
|
+
"types": "./dist/chunking/index.d.ts",
|
|
20
|
+
"import": "./dist/chunking/index.js"
|
|
21
|
+
},
|
|
22
|
+
"./embeddings": {
|
|
23
|
+
"types": "./dist/embeddings/index.d.ts",
|
|
24
|
+
"import": "./dist/embeddings/index.js"
|
|
25
|
+
},
|
|
26
|
+
"./vectorstore": {
|
|
27
|
+
"types": "./dist/vectorstore/index.d.ts",
|
|
28
|
+
"import": "./dist/vectorstore/index.js"
|
|
29
|
+
},
|
|
30
|
+
"./retrieval": {
|
|
31
|
+
"types": "./dist/retrieval/index.d.ts",
|
|
32
|
+
"import": "./dist/retrieval/index.js"
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
"files": [
|
|
36
|
+
"dist",
|
|
37
|
+
"README.md"
|
|
38
|
+
],
|
|
39
|
+
"dependencies": {
|
|
40
|
+
"@xenova/transformers": "^2.17.0",
|
|
41
|
+
"better-sqlite3": "^11.0.0",
|
|
42
|
+
"cheerio": "^1.0.0",
|
|
43
|
+
"html-to-text": "^9.0.5",
|
|
44
|
+
"pg": "^8.12.0",
|
|
45
|
+
"pgvector": "^0.2.0",
|
|
46
|
+
"@chatbot-packages/ai": "0.1.0",
|
|
47
|
+
"@chatbot-packages/types": "0.1.0",
|
|
48
|
+
"@chatbot-packages/utils": "0.1.0"
|
|
49
|
+
},
|
|
50
|
+
"devDependencies": {
|
|
51
|
+
"@types/better-sqlite3": "^7.6.11",
|
|
52
|
+
"@types/html-to-text": "^9.0.4",
|
|
53
|
+
"@types/node": "^20.0.0",
|
|
54
|
+
"@types/pg": "^8.11.6",
|
|
55
|
+
"tsup": "^8.0.0",
|
|
56
|
+
"typescript": "^5.4.0",
|
|
57
|
+
"vitest": "^1.6.0"
|
|
58
|
+
},
|
|
59
|
+
"publishConfig": {
|
|
60
|
+
"access": "public"
|
|
61
|
+
},
|
|
62
|
+
"keywords": [
|
|
63
|
+
"rag",
|
|
64
|
+
"retrieval-augmented-generation",
|
|
65
|
+
"chatbot",
|
|
66
|
+
"documentation",
|
|
67
|
+
"help-files",
|
|
68
|
+
"chm",
|
|
69
|
+
"embeddings",
|
|
70
|
+
"vector-search"
|
|
71
|
+
],
|
|
72
|
+
"author": "Robi",
|
|
73
|
+
"license": "MIT",
|
|
74
|
+
"scripts": {
|
|
75
|
+
"build": "tsup",
|
|
76
|
+
"dev": "tsup --watch",
|
|
77
|
+
"typecheck": "tsc --noEmit",
|
|
78
|
+
"test": "vitest run",
|
|
79
|
+
"test:watch": "vitest",
|
|
80
|
+
"lint": "eslint src/",
|
|
81
|
+
"clean": "rm -rf dist"
|
|
82
|
+
}
|
|
83
|
+
}
|