@mcp-monorepo/notion-query 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +53 -6
- package/dist/index.js.map +1 -1
- package/dist/lib/config.d.ts +44 -0
- package/dist/lib/config.d.ts.map +1 -0
- package/dist/lib/config.js +49 -0
- package/dist/lib/config.js.map +1 -0
- package/dist/lib/notion-syncer.d.ts +27 -0
- package/dist/lib/notion-syncer.d.ts.map +1 -0
- package/dist/lib/notion-syncer.js +212 -0
- package/dist/lib/notion-syncer.js.map +1 -0
- package/dist/lib/property-parser.d.ts +8 -10
- package/dist/lib/property-parser.d.ts.map +1 -1
- package/dist/lib/property-parser.js +102 -15
- package/dist/lib/property-parser.js.map +1 -1
- package/dist/lib/sync-state-manager.d.ts +29 -0
- package/dist/lib/sync-state-manager.d.ts.map +1 -0
- package/dist/lib/sync-state-manager.js +45 -0
- package/dist/lib/sync-state-manager.js.map +1 -0
- package/dist/local-rag/DEMO.d.ts +22 -0
- package/dist/local-rag/DEMO.d.ts.map +1 -0
- package/dist/local-rag/DEMO.js +142 -0
- package/dist/local-rag/DEMO.js.map +1 -0
- package/dist/local-rag/chunker.d.ts +24 -0
- package/dist/local-rag/chunker.d.ts.map +1 -0
- package/dist/local-rag/chunker.js +58 -0
- package/dist/local-rag/chunker.js.map +1 -0
- package/dist/local-rag/embedder.d.ts +43 -0
- package/dist/local-rag/embedder.d.ts.map +1 -0
- package/dist/local-rag/embedder.js +74 -0
- package/dist/local-rag/embedder.js.map +1 -0
- package/dist/local-rag/embedder.service.d.ts +15 -0
- package/dist/local-rag/embedder.service.d.ts.map +1 -0
- package/dist/local-rag/embedder.service.js +84 -0
- package/dist/local-rag/embedder.service.js.map +1 -0
- package/dist/local-rag/embedder.worker.d.ts +2 -0
- package/dist/local-rag/embedder.worker.d.ts.map +1 -0
- package/dist/local-rag/embedder.worker.js +34 -0
- package/dist/local-rag/embedder.worker.js.map +1 -0
- package/dist/local-rag/errors.d.ts +31 -0
- package/dist/local-rag/errors.d.ts.map +1 -0
- package/dist/local-rag/errors.js +47 -0
- package/dist/local-rag/errors.js.map +1 -0
- package/dist/local-rag/html-parser.d.ts +2 -0
- package/dist/local-rag/html-parser.d.ts.map +1 -0
- package/dist/local-rag/html-parser.js +32 -0
- package/dist/local-rag/html-parser.js.map +1 -0
- package/dist/local-rag/index.d.ts +67 -0
- package/dist/local-rag/index.d.ts.map +1 -0
- package/dist/local-rag/index.js +410 -0
- package/dist/local-rag/index.js.map +1 -0
- package/dist/local-rag/parser.d.ts +59 -0
- package/dist/local-rag/parser.d.ts.map +1 -0
- package/dist/local-rag/parser.js +206 -0
- package/dist/local-rag/parser.js.map +1 -0
- package/dist/local-rag/types.d.ts +209 -0
- package/dist/local-rag/types.d.ts.map +1 -0
- package/dist/local-rag/types.js +5 -0
- package/dist/local-rag/types.js.map +1 -0
- package/dist/local-rag/utils/pool.d.ts +60 -0
- package/dist/local-rag/utils/pool.d.ts.map +1 -0
- package/dist/local-rag/utils/pool.js +140 -0
- package/dist/local-rag/utils/pool.js.map +1 -0
- package/dist/local-rag/utils/typed-emitter.d.ts +28 -0
- package/dist/local-rag/utils/typed-emitter.d.ts.map +1 -0
- package/dist/local-rag/utils/typed-emitter.js +44 -0
- package/dist/local-rag/utils/typed-emitter.js.map +1 -0
- package/dist/local-rag/vectordb/index.d.ts +91 -0
- package/dist/local-rag/vectordb/index.d.ts.map +1 -0
- package/dist/local-rag/vectordb/index.js +278 -0
- package/dist/local-rag/vectordb/index.js.map +1 -0
- package/dist/local-rag/vectordb/manager.d.ts +28 -0
- package/dist/local-rag/vectordb/manager.d.ts.map +1 -0
- package/dist/local-rag/vectordb/manager.js +91 -0
- package/dist/local-rag/vectordb/manager.js.map +1 -0
- package/dist/local-rag/vectordb/migration.d.ts +27 -0
- package/dist/local-rag/vectordb/migration.d.ts.map +1 -0
- package/dist/local-rag/vectordb/migration.js +121 -0
- package/dist/local-rag/vectordb/migration.js.map +1 -0
- package/dist/local-rag/vectordb/retriever.d.ts +51 -0
- package/dist/local-rag/vectordb/retriever.d.ts.map +1 -0
- package/dist/local-rag/vectordb/retriever.js +157 -0
- package/dist/local-rag/vectordb/retriever.js.map +1 -0
- package/dist/local-rag/vectordb/schema.d.ts +33 -0
- package/dist/local-rag/vectordb/schema.d.ts.map +1 -0
- package/dist/local-rag/vectordb/schema.js +102 -0
- package/dist/local-rag/vectordb/schema.js.map +1 -0
- package/dist/local-rag/watcher.d.ts +48 -0
- package/dist/local-rag/watcher.d.ts.map +1 -0
- package/dist/local-rag/watcher.js +102 -0
- package/dist/local-rag/watcher.js.map +1 -0
- package/dist/tools/create-pages.d.ts +2 -1
- package/dist/tools/create-pages.d.ts.map +1 -1
- package/dist/tools/create-pages.js +3 -2
- package/dist/tools/create-pages.js.map +1 -1
- package/dist/tools/fetch.d.ts +2 -1
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +2 -1
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/query-datasource.d.ts +2 -1
- package/dist/tools/query-datasource.d.ts.map +1 -1
- package/dist/tools/query-datasource.js +3 -3
- package/dist/tools/query-datasource.js.map +1 -1
- package/dist/tools/search.d.ts +12 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +75 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/update-page.d.ts +4 -0
- package/dist/tools/update-page.d.ts.map +1 -0
- package/dist/tools/update-page.js +135 -0
- package/dist/tools/update-page.js.map +1 -0
- package/package.json +15 -1
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import { buildWhereClause } from './retriever.js'; // Assuming you export this helper
|
|
2
|
+
import { DataMapper } from './schema.js';
|
|
3
|
+
/**
|
|
4
|
+
* Handles administrative and management tasks for the vector store, like listing and cleaning documents.
|
|
5
|
+
*/
|
|
6
|
+
export class StoreManager {
|
|
7
|
+
table;
|
|
8
|
+
constructor(table) {
|
|
9
|
+
this.table = table;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Lists all unique documents in the store, with optional filtering and pagination.
|
|
13
|
+
* This method first filters in the DB, then aggregates unique files in memory.
|
|
14
|
+
* @param options - Optional filters and pagination settings.
|
|
15
|
+
* @returns A promise that resolves to an array of list items.
|
|
16
|
+
*/
|
|
17
|
+
async listFiles(options = {}) {
|
|
18
|
+
const { limit = 20, offset = 0, filters } = options;
|
|
19
|
+
const query = this.table.query().select(['filePath', 'timestamp', 'metadata']);
|
|
20
|
+
const whereClause = buildWhereClause(filters);
|
|
21
|
+
if (whereClause) {
|
|
22
|
+
query.where(whereClause);
|
|
23
|
+
}
|
|
24
|
+
// This still loads all *filtered* records into memory, but it's far more
|
|
25
|
+
// efficient than loading the entire table, especially with filters applied.
|
|
26
|
+
const filteredRecords = await query.toArray();
|
|
27
|
+
const fileMap = new Map();
|
|
28
|
+
for (const record of filteredRecords) {
|
|
29
|
+
const filePath = String(record.filePath);
|
|
30
|
+
const timestamp = String(record.timestamp);
|
|
31
|
+
const metadata = DataMapper.toDocumentMetadata(record.metadata);
|
|
32
|
+
const existing = fileMap.get(filePath);
|
|
33
|
+
if (existing) {
|
|
34
|
+
existing.chunkCount++;
|
|
35
|
+
if (timestamp > existing.latestTimestamp) {
|
|
36
|
+
existing.latestTimestamp = timestamp;
|
|
37
|
+
existing.metadata = metadata;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
fileMap.set(filePath, { chunkCount: 1, latestTimestamp: timestamp, metadata });
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
const sortedResults = Array.from(fileMap.entries())
|
|
45
|
+
.map(([filePath, data]) => ({
|
|
46
|
+
filePath,
|
|
47
|
+
chunkCount: data.chunkCount,
|
|
48
|
+
timestamp: data.latestTimestamp,
|
|
49
|
+
metadata: data.metadata,
|
|
50
|
+
}))
|
|
51
|
+
.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
52
|
+
// Apply pagination to the final, aggregated list
|
|
53
|
+
return sortedResults.slice(offset, offset + limit);
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Removes all documents (and their chunks) that have expired.
|
|
57
|
+
* @returns A promise that resolves to the number of documents deleted.
|
|
58
|
+
*/
|
|
59
|
+
async cleanupExpired() {
|
|
60
|
+
// LanceDB's WHERE clause is limited for string-based date comparisons.
|
|
61
|
+
// Fetch candidates and filter locally for safety.
|
|
62
|
+
const candidates = await this.table.query().where('metadata.expiresAt IS NOT NULL').toArray();
|
|
63
|
+
const now = new Date().toISOString();
|
|
64
|
+
const expiredFilePaths = new Set();
|
|
65
|
+
for (const record of candidates) {
|
|
66
|
+
const metadata = record.metadata;
|
|
67
|
+
if (metadata.expiresAt && metadata.expiresAt < now) {
|
|
68
|
+
expiredFilePaths.add(record.filePath);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (expiredFilePaths.size > 0) {
|
|
72
|
+
const filePathsArray = Array.from(expiredFilePaths);
|
|
73
|
+
const deletePromises = filePathsArray.map((filePath) => this.table.delete(`\`filePath\` = '${filePath.replace(/'/g, "''")}'`));
|
|
74
|
+
await Promise.all(deletePromises);
|
|
75
|
+
}
|
|
76
|
+
return expiredFilePaths.size;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Retrieves all chunks associated with a specific file path.
|
|
80
|
+
* @param filePath - The full path of the document (e.g., '/path/to/file.pdf' or 'memory://label').
|
|
81
|
+
* @returns A promise that resolves to an array of vector chunks.
|
|
82
|
+
*/
|
|
83
|
+
async getChunksByPath(filePath) {
|
|
84
|
+
const records = await this.table
|
|
85
|
+
.query()
|
|
86
|
+
.where(`\`filePath\` = '${filePath.replace(/'/g, "''")}'`)
|
|
87
|
+
.toArray();
|
|
88
|
+
return records.map(DataMapper.toVectorChunk);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"manager.js","sourceRoot":"","sources":["../../../src/local-rag/vectordb/manager.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAA,CAAC,kCAAkC;AACpF,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AAKxC;;GAEG;AACH,MAAM,OAAO,YAAY;IACN,KAAK,CAAO;IAE7B,YAAY,KAAY;QACtB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;IACpB,CAAC;IAED;;;;;OAKG;IACI,KAAK,CAAC,SAAS,CAAC,UAAuB,EAAE;QAC9C,MAAM,EAAE,KAAK,GAAG,EAAE,EAAE,MAAM,GAAG,CAAC,EAAE,OAAO,EAAE,GAAG,OAAO,CAAA;QACnD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,UAAU,EAAE,WAAW,EAAE,UAAU,CAAC,CAAC,CAAA;QAE9E,MAAM,WAAW,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAA;QAC7C,IAAI,WAAW,EAAE,CAAC;YAChB,KAAK,CAAC,KAAK,CAAC,WAAW,CAAC,CAAA;QAC1B,CAAC;QAED,yEAAyE;QACzE,4EAA4E;QAC5E,MAAM,eAAe,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE,CAAA;QAE7C,MAAM,OAAO,GAAG,IAAI,GAAG,EAGpB,CAAA;QAEH,KAAK,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC;YACrC,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;YACxC,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;YAC1C,MAAM,QAAQ,GAAG,UAAU,CAAC,kBAAkB,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;YAE/D,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAA;YACtC,IAAI,QAAQ,EAAE,CAAC;gBACb,QAAQ,CAAC,UAAU,EAAE,CAAA;gBACrB,IAAI,SAAS,GAAG,QAAQ,CAAC,eAAe,EAAE,CAAC;oBACzC,QAAQ,CAAC,eAAe,GAAG,SAAS,CAAA;oBACpC,QAAQ,CAAC,QAAQ,GAAG,QAAQ,CAAA;gBAC9B,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE,eAAe,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;YAChF,CAAC;QACH,CAAC;QAED,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;aAChD,GAAG,CACF,CAAC,CAAC,QAAQ,EAAE,IAAI,CAAC,EAAY,EAAE,CAAC,CAAC;YAC/B,QAAQ;YACR,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,SAAS,EAAE,IAAI,CAAC,eAAe;YAC/B,QAAQ,EAAE,IAAI,CAAC,QAAQ;SACxB,CAAC,CACH;aACA,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAA;QAEzD,iDAAiD;QACjD,OAAO,aAAa,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,CAAC,CAAA;IACpD,CAAC;IAED;;;OAGG;IACI,KAAK,CAAC,cAAc;QACzB,uEAAuE;QACvE,kDAAkD;QAClD,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC,OAAO,EAAE,CAAA;QAE7F,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;QACpC,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAA;QAC1C,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;YAChC,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAkC,CAAA;YAC1D,IAAI,QAAQ,CAAC,SAAS,IAAI,QAAQ,CAAC,SAAS,GAAG,GAAG,EAAE,CAAC;gBACnD,gBAAgB,CAAC,GAAG,CAAC,MAAM,CAAC,QAAkB,CAAC,CAAA;YACjD,CAAC;QACH,CAAC;QAED,IAAI,gBAAgB,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YAC9B,MAAM,cAAc,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAA;YACnD,MAAM,cAAc,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CACrD,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,mBAAmB,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CACtE,CAAA;YACD,MAAM,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAA;QACnC,CAAC;QAED,OAAO,gBAAgB,CAAC,IAAI,CAAA;IAC9B,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,eAAe,CAAC,QAAgB;QAC3C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,KAAK;aAC7B,KAAK,EAAE;aACP,KAAK,CAAC,mBAAmB,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC;aACzD,OAAO,EAAE,CAAA;QACZ,OAAO,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,aAAa,CAAC,CAAA;IAC9C,CAAC;CACF"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { Connection, Table } from '@lancedb/lancedb';
|
|
2
|
+
/**
|
|
3
|
+
* Manages the detection and execution of database schema migrations.
|
|
4
|
+
*/
|
|
5
|
+
export declare class SchemaMigrator {
|
|
6
|
+
private readonly db;
|
|
7
|
+
private readonly tableName;
|
|
8
|
+
constructor(db: Connection, tableName: string);
|
|
9
|
+
/**
|
|
10
|
+
* Checks if the table exists, and if so, whether it needs migration.
|
|
11
|
+
* @returns A Promise that resolves to the migrated or validated table, or undefined if the table does not exist.
|
|
12
|
+
*/
|
|
13
|
+
run(): Promise<Table | undefined>;
|
|
14
|
+
/**
|
|
15
|
+
* Checks if the table's schema is outdated.
|
|
16
|
+
* @param table - The table to inspect.
|
|
17
|
+
* @returns A Promise that resolves to true if migration is needed.
|
|
18
|
+
*/
|
|
19
|
+
private _needsMigration;
|
|
20
|
+
/**
|
|
21
|
+
* Performs the migration of data from the old table to a new one with the correct schema.
|
|
22
|
+
* @param oldTable - The outdated table instance.
|
|
23
|
+
* @returns A Promise that resolves to the new, migrated table instance.
|
|
24
|
+
*/
|
|
25
|
+
private _migrate;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=migration.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"migration.d.ts","sourceRoot":"","sources":["../../../src/local-rag/vectordb/migration.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAA;AAEzD;;GAEG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAY;IAC/B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAQ;gBAEtB,EAAE,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM;IAK7C;;;OAGG;IACU,GAAG,IAAI,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC;IAmB9C;;;;OAIG;YACW,eAAe;IAoB7B;;;;OAIG;YACW,QAAQ;CAsEvB"}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { logger } from '@mcp-monorepo/shared';
|
|
2
|
+
import * as arrow from 'apache-arrow';
|
|
3
|
+
import { DatabaseError } from '../errors.js';
|
|
4
|
+
import { getDocumentsSchema } from './schema.js';
|
|
5
|
+
/**
|
|
6
|
+
* Manages the detection and execution of database schema migrations.
|
|
7
|
+
*/
|
|
8
|
+
export class SchemaMigrator {
|
|
9
|
+
db;
|
|
10
|
+
tableName;
|
|
11
|
+
constructor(db, tableName) {
|
|
12
|
+
this.db = db;
|
|
13
|
+
this.tableName = tableName;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Checks if the table exists, and if so, whether it needs migration.
|
|
17
|
+
* @returns A Promise that resolves to the migrated or validated table, or undefined if the table does not exist.
|
|
18
|
+
*/
|
|
19
|
+
async run() {
|
|
20
|
+
const tableNames = await this.db.tableNames();
|
|
21
|
+
if (!tableNames.includes(this.tableName)) {
|
|
22
|
+
logger.info(`VectorStore: Table "${this.tableName}" will be created on first data insertion.`);
|
|
23
|
+
return undefined;
|
|
24
|
+
}
|
|
25
|
+
const table = await this.db.openTable(this.tableName);
|
|
26
|
+
logger.info(`VectorStore: Opened existing table "${this.tableName}".`);
|
|
27
|
+
if (await this._needsMigration(table)) {
|
|
28
|
+
logger.warn('VectorStore: Schema migration required. Starting migration process...');
|
|
29
|
+
return this._migrate(table);
|
|
30
|
+
}
|
|
31
|
+
logger.info('VectorStore: Table schema is up-to-date.');
|
|
32
|
+
return table;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Checks if the table's schema is outdated.
|
|
36
|
+
* @param table - The table to inspect.
|
|
37
|
+
* @returns A Promise that resolves to true if migration is needed.
|
|
38
|
+
*/
|
|
39
|
+
async _needsMigration(table) {
|
|
40
|
+
try {
|
|
41
|
+
const schema = await table.schema();
|
|
42
|
+
const metadataField = schema.fields.find((f) => f.name === 'metadata');
|
|
43
|
+
if (!metadataField || metadataField.type.typeId !== arrow.Type.Struct) {
|
|
44
|
+
return true; // Missing metadata struct entirely
|
|
45
|
+
}
|
|
46
|
+
const metadataChildren = metadataField.type.children;
|
|
47
|
+
const fieldNames = new Set(metadataChildren.map((f) => f.name));
|
|
48
|
+
// Check for key fields introduced in newer schemas.
|
|
49
|
+
return !fieldNames.has('createdAt') || !fieldNames.has('updatedAt') || !fieldNames.has('tags');
|
|
50
|
+
}
|
|
51
|
+
catch (error) {
|
|
52
|
+
logger.error('VectorStore: Error checking schema, assuming migration is needed.', error);
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Performs the migration of data from the old table to a new one with the correct schema.
|
|
58
|
+
* @param oldTable - The outdated table instance.
|
|
59
|
+
* @returns A Promise that resolves to the new, migrated table instance.
|
|
60
|
+
*/
|
|
61
|
+
async _migrate(oldTable) {
|
|
62
|
+
try {
|
|
63
|
+
const allRecords = await oldTable.query().toArray();
|
|
64
|
+
logger.info(`VectorStore: Read ${allRecords.length} records for migration.`);
|
|
65
|
+
if (allRecords.length === 0) {
|
|
66
|
+
await this.db.dropTable(this.tableName);
|
|
67
|
+
logger.info('VectorStore: Dropped empty table. It will be recreated on the next insert.');
|
|
68
|
+
// This will result in `run()` returning `undefined`, which is correct.
|
|
69
|
+
throw new Error('Migration resulted in an empty table, which was dropped.');
|
|
70
|
+
}
|
|
71
|
+
const now = new Date().toISOString();
|
|
72
|
+
const migratedRecords = allRecords.map((record) => {
|
|
73
|
+
const rawMetadata = (record.metadata ?? {});
|
|
74
|
+
// Normalize tags from various possible formats
|
|
75
|
+
let tags = [];
|
|
76
|
+
if (Array.isArray(rawMetadata.tags)) {
|
|
77
|
+
tags = rawMetadata.tags;
|
|
78
|
+
}
|
|
79
|
+
else if (rawMetadata.tags &&
|
|
80
|
+
typeof rawMetadata.tags.toArray === 'function') {
|
|
81
|
+
tags = rawMetadata.tags.toArray();
|
|
82
|
+
}
|
|
83
|
+
const migratedMetadata = {
|
|
84
|
+
fileName: String(rawMetadata.fileName ?? 'unknown'),
|
|
85
|
+
fileSize: Number(rawMetadata.fileSize ?? 0),
|
|
86
|
+
fileType: String(rawMetadata.fileType ?? 'unknown'),
|
|
87
|
+
language: rawMetadata.language || undefined,
|
|
88
|
+
memoryType: rawMetadata.memoryType || undefined,
|
|
89
|
+
tags,
|
|
90
|
+
project: rawMetadata.project || undefined,
|
|
91
|
+
expiresAt: rawMetadata.expiresAt || undefined,
|
|
92
|
+
createdAt: String(rawMetadata.createdAt ?? record.timestamp ?? now),
|
|
93
|
+
updatedAt: String(rawMetadata.updatedAt ?? record.timestamp ?? now),
|
|
94
|
+
sourceUrl: rawMetadata.sourceUrl || undefined,
|
|
95
|
+
};
|
|
96
|
+
return {
|
|
97
|
+
id: String(record.id),
|
|
98
|
+
filePath: String(record.filePath),
|
|
99
|
+
chunkIndex: Number(record.chunkIndex),
|
|
100
|
+
text: String(record.text),
|
|
101
|
+
vector: Array.from(record.vector ?? []),
|
|
102
|
+
metadata: migratedMetadata,
|
|
103
|
+
timestamp: String(record.timestamp ?? now),
|
|
104
|
+
};
|
|
105
|
+
});
|
|
106
|
+
await this.db.dropTable(this.tableName);
|
|
107
|
+
logger.info('VectorStore: Dropped old table.');
|
|
108
|
+
const newTable = await this.db.createTable(this.tableName, migratedRecords, { schema: getDocumentsSchema() });
|
|
109
|
+
logger.info(`VectorStore: Created new table with ${migratedRecords.length} migrated records. Migration complete.`);
|
|
110
|
+
return newTable;
|
|
111
|
+
}
|
|
112
|
+
catch (error) {
|
|
113
|
+
if (error instanceof Error && error.message.includes('Migration resulted in an empty table')) {
|
|
114
|
+
// This is an expected outcome, not a true failure. Re-throw to be handled by the caller.
|
|
115
|
+
throw error;
|
|
116
|
+
}
|
|
117
|
+
throw new DatabaseError('Failed to migrate table schema.', error);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
//# sourceMappingURL=migration.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"migration.js","sourceRoot":"","sources":["../../../src/local-rag/vectordb/migration.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAA;AAC7C,OAAO,KAAK,KAAK,MAAM,cAAc,CAAA;AAErC,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAA;AAC5C,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAA;AAKhD;;GAEG;AACH,MAAM,OAAO,cAAc;IACR,EAAE,CAAY;IACd,SAAS,CAAQ;IAElC,YAAY,EAAc,EAAE,SAAiB;QAC3C,IAAI,CAAC,EAAE,GAAG,EAAE,CAAA;QACZ,IAAI,CAAC,SAAS,GAAG,SAAS,CAAA;IAC5B,CAAC;IAED;;;OAGG;IACI,KAAK,CAAC,GAAG;QACd,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,UAAU,EAAE,CAAA;QAC7C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;YACzC,MAAM,CAAC,IAAI,CAAC,uBAAuB,IAAI,CAAC,SAAS,4CAA4C,CAAC,CAAA;YAC9F,OAAO,SAAS,CAAA;QAClB,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;QACrD,MAAM,CAAC,IAAI,CAAC,uCAAuC,IAAI,CAAC,SAAS,IAAI,CAAC,CAAA;QAEtE,IAAI,MAAM,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,EAAE,CAAC;YACtC,MAAM,CAAC,IAAI,CAAC,uEAAuE,CAAC,CAAA;YACpF,OAAO,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAA;QAC7B,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAA;QACvD,OAAO,KAAK,CAAA;IACd,CAAC;IAED;;;;OAIG;IACK,KAAK,CAAC,eAAe,CAAC,KAAY;QACxC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,MAAM,EAAE,CAAA;YACnC,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,CAAA;YAEtE,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,KAAK,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,CAAA,CAAC,mCAAmC;YACjD,CAAC;YAED,MAAM,gBAAgB,GAAI,aAAa,CAAC,IAAqB,CAAC,QAAQ,CAAA;YACtE,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;YAE/D,oDAAoD;YACpD,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;QAChG,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,mEAAmE,EAAE,KAAK,CAAC,CAAA;YACxF,OAAO,IAAI,CAAA;QACb,CAAC;IACH,CAAC;IAED;;;;OAIG;IACK,KAAK,CAAC,QAAQ,CAAC,QAAe;QACpC,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAA;YACnD,MAAM,CAAC,IAAI,CAAC,qBAAqB,UAAU,CAAC,MAAM,yBAAyB,CAAC,CAAA;YAE5E,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC5B,MAAM,IAAI,CAAC,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;gBACvC,MAAM,CAAC,IAAI,CAAC,4EAA4E,CAAC,CAAA;gBACzF,uEAAuE;gBACvE,MAAM,IAAI,KAAK,CAAC,0DAA0D,CAAC,CAAA;YAC7E,CAAC;YAED,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;YACpC,MAAM,eAAe,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,MAAM,EAAe,EAAE;gBAC7D,MAAM,WAAW,GAAG,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAA4B,CAAA;gBAEtE,+CAA+C;gBAC/C,IAAI,IAAI,GAAa,EAAE,CAAA;gBACvB,IAAI,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;oBACpC,IAAI,GAAG,WAAW,CAAC,IAAI,CAAA;gBACzB,CAAC;qBAAM,IACL,WAAW,CAAC,IAAI;oBAChB,OAAQ,WAAW,CAAC,IAAqC,CAAC,OAAO,KAAK,UAAU,EAChF,CAAC;oBACD,IAAI,GAAI,WAAW,CAAC,IAAoC,CAAC,OAAO,EAAE,CAAA;gBACpE,CAAC;gBAED,MAAM,gBAAgB,GAAqB;oBACzC,QAAQ,EAAE,MAAM,CAAC,WAAW,CAAC,QAAQ,IAAI,SAAS,CAAC;oBACnD,QAAQ,EAAE,MAAM,CAAC,WAAW,CAAC,QAAQ,IAAI,CAAC,CAAC;oBAC3C,QAAQ,EAAE,MAAM,CAAC,WAAW,CAAC,QAAQ,IAAI,SAAS,CAAC;oBACnD,QAAQ,EAAG,WAAW,CAAC,QAAmB,IAAI,SAAS;oBACvD,UAAU,EAAG,WAAW,CAAC,UAAsC,IAAI,SAAS;oBAC5E,IAAI;oBACJ,OAAO,EAAG,WAAW,CAAC,OAAkB,IAAI,SAAS;oBACrD,SAAS,EAAG,WAAW,CAAC,SAAoB,IAAI,SAAS;oBACzD,SAAS,EAAE,MAAM,CAAC,WAAW,CAAC,SAAS,IAAI,MAAM,CAAC,SAAS,IAAI,GAAG,CAAC;oBACnE,SAAS,EAAE,MAAM,CAAC,WAAW,CAAC,SAAS,IAAI,MAAM,CAAC,SAAS,IAAI,GAAG,CAAC;oBACnE,SAAS,EAAG,WAAW,CAAC,SAAoB,IAAI,SAAS;iBAC1D,CAAA;gBAED,OAAO;oBACL,EAAE,EAAE,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;oBACrB,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC;oBACjC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC;oBACrC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC;oBACzB,MAAM,EAAE,KAAK,CAAC,IAAI,CAAE,MAAM,CAAC,MAAuB,IAAI,EAAE,CAAC;oBACzD,QAAQ,EAAE,gBAAgB;oBAC1B,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,SAAS,IAAI,GAAG,CAAC;iBAC3C,CAAA;YACH,CAAC,CAAC,CAAA;YAEF,MAAM,IAAI,CAAC,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;YACvC,MAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAA;YAE9C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,WAAW,CACxC,IAAI,CAAC,SAAS,EACd,eAAuD,EACvD,EAAE,MAAM,EAAE,kBAAkB,EAAE,EAAE,CACjC,CAAA;YACD,MAAM,CAAC,IAAI,CAAC,uCAAuC,eAAe,CAAC,MAAM,wCAAwC,CAAC,CAAA;YAClH,OAAO,QAAQ,CAAA;QACjB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,sCAAsC,CAAC,EAAE,CAAC;gBAC7F,yFAAyF;gBACzF,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,IAAI,aAAa,CAAC,iCAAiC,EAAE,KAAK,CAAC,CAAA;QACnE,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import type { GroupingMode, QueryFilters, QueryResult } from '../types.js';
|
|
2
|
+
import type { Table } from '@lancedb/lancedb';
|
|
3
|
+
export interface RetrieverConfig {
|
|
4
|
+
hybridWeight?: number;
|
|
5
|
+
maxDistance?: number;
|
|
6
|
+
grouping?: GroupingMode;
|
|
7
|
+
ftsEnabled: boolean;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Handles the complex logic of searching and retrieving data from the vector store.
|
|
11
|
+
*/
|
|
12
|
+
export declare class Retriever {
|
|
13
|
+
private readonly table;
|
|
14
|
+
private readonly config;
|
|
15
|
+
constructor(table: Table, config: RetrieverConfig);
|
|
16
|
+
setFtsEnabled(enabled: boolean): void;
|
|
17
|
+
/**
|
|
18
|
+
* Executes a search query, handling hybrid search, filtering, and result processing.
|
|
19
|
+
* @param queryVector - The vector representation of the query.
|
|
20
|
+
* @param queryText - The original text of the query for FTS.
|
|
21
|
+
* @param limit - The maximum number of results to return.
|
|
22
|
+
* @param filters - Optional filters to apply to the search.
|
|
23
|
+
* @returns A promise that resolves to an array of query results.
|
|
24
|
+
*/
|
|
25
|
+
search(queryVector: number[], queryText: string, limit?: number, filters?: QueryFilters): Promise<QueryResult[]>;
|
|
26
|
+
/**
|
|
27
|
+
* Performs a hybrid search by combining FTS and vector search results.
|
|
28
|
+
*/
|
|
29
|
+
private _performHybridSearch;
|
|
30
|
+
/**
|
|
31
|
+
* Performs a standard vector-only search.
|
|
32
|
+
*/
|
|
33
|
+
private _performVectorSearch;
|
|
34
|
+
/**
|
|
35
|
+
* Filters results based on the configured maximum distance.
|
|
36
|
+
*/
|
|
37
|
+
private _applyDistanceFilter;
|
|
38
|
+
/**
|
|
39
|
+
* Groups results by relevance gaps to improve quality.
|
|
40
|
+
*/
|
|
41
|
+
private _applyGrouping;
|
|
42
|
+
/**
|
|
43
|
+
* Reranks and merges FTS and vector search results.
|
|
44
|
+
*/
|
|
45
|
+
private _hybridRerank;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Constructs a SQL WHERE clause from the filter object.
|
|
49
|
+
*/
|
|
50
|
+
export declare function buildWhereClause(filters?: QueryFilters): string | undefined;
|
|
51
|
+
//# sourceMappingURL=retriever.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retriever.d.ts","sourceRoot":"","sources":["../../../src/local-rag/vectordb/retriever.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AAC1E,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAA;AAO7C,MAAM,WAAW,eAAe;IAC9B,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,QAAQ,CAAC,EAAE,YAAY,CAAA;IACvB,UAAU,EAAE,OAAO,CAAA;CACpB;AAED;;GAEG;AACH,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAO;IAC7B,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiB;gBAE5B,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe;IAK1C,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI;IAI5C;;;;;;;OAOG;IACU,MAAM,CACjB,WAAW,EAAE,MAAM,EAAE,EACrB,SAAS,EAAE,MAAM,EACjB,KAAK,SAAK,EACV,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,WAAW,EAAE,CAAC;IA2BzB;;OAEG;YACW,oBAAoB;IAmBlC;;OAEG;YACW,oBAAoB;IAUlC;;OAEG;IACH,OAAO,CAAC,oBAAoB;IAQ5B;;OAEG;IACH,OAAO,CAAC,cAAc;IAwBtB;;OAEG;IACH,OAAO,CAAC,aAAa;CAiCtB;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,CAAC,EAAE,YAAY,GAAG,MAAM,GAAG,SAAS,CAa3E"}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import { DatabaseError, ValidationError } from '../errors.js';
|
|
2
|
+
import { DataMapper } from './schema.js';
|
|
3
|
+
// Constants for quality retrieval logic
|
|
4
|
+
const GROUPING_BOUNDARY_STD_MULTIPLIER = 1.5;
|
|
5
|
+
const HYBRID_SEARCH_CANDIDATE_MULTIPLIER = 3;
|
|
6
|
+
const DOT_PRODUCT_MAX_DISTANCE = 2; // Theoretical max for dot product on normalized vectors
|
|
7
|
+
/**
|
|
8
|
+
* Handles the complex logic of searching and retrieving data from the vector store.
|
|
9
|
+
*/
|
|
10
|
+
export class Retriever {
|
|
11
|
+
table;
|
|
12
|
+
config;
|
|
13
|
+
constructor(table, config) {
|
|
14
|
+
this.table = table;
|
|
15
|
+
this.config = config;
|
|
16
|
+
}
|
|
17
|
+
setFtsEnabled(enabled) {
|
|
18
|
+
this.config.ftsEnabled = enabled;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Executes a search query, handling hybrid search, filtering, and result processing.
|
|
22
|
+
* @param queryVector - The vector representation of the query.
|
|
23
|
+
* @param queryText - The original text of the query for FTS.
|
|
24
|
+
* @param limit - The maximum number of results to return.
|
|
25
|
+
* @param filters - Optional filters to apply to the search.
|
|
26
|
+
* @returns A promise that resolves to an array of query results.
|
|
27
|
+
*/
|
|
28
|
+
async search(queryVector, queryText, limit = 10, filters) {
|
|
29
|
+
if (limit < 1 || limit > 50) {
|
|
30
|
+
throw new ValidationError(`Invalid limit: expected 1-50, got ${limit}`);
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
const whereClause = buildWhereClause(filters);
|
|
34
|
+
let rawResults;
|
|
35
|
+
const useHybrid = this.config.ftsEnabled && queryText.trim().length > 0 && (this.config.hybridWeight ?? 0.6) > 0;
|
|
36
|
+
if (useHybrid) {
|
|
37
|
+
rawResults = await this._performHybridSearch(queryVector, queryText, limit, whereClause);
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
rawResults = await this._performVectorSearch(queryVector, limit, whereClause);
|
|
41
|
+
}
|
|
42
|
+
let results = rawResults.map(DataMapper.toQueryResult);
|
|
43
|
+
results = this._applyDistanceFilter(results);
|
|
44
|
+
results = this._applyGrouping(results);
|
|
45
|
+
return results;
|
|
46
|
+
}
|
|
47
|
+
catch (error) {
|
|
48
|
+
throw new DatabaseError('Failed to search vectors.', error);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Performs a hybrid search by combining FTS and vector search results.
|
|
53
|
+
*/
|
|
54
|
+
async _performHybridSearch(queryVector, queryText, limit, whereClause) {
|
|
55
|
+
const candidateLimit = limit * HYBRID_SEARCH_CANDIDATE_MULTIPLIER;
|
|
56
|
+
const ftsQuery = this.table.search(queryText, 'fts', 'text').limit(candidateLimit);
|
|
57
|
+
if (whereClause)
|
|
58
|
+
ftsQuery.where(whereClause);
|
|
59
|
+
const ftsResults = await ftsQuery.toArray();
|
|
60
|
+
const vectorQuery = this.table.vectorSearch(queryVector).distanceType('dot').limit(candidateLimit);
|
|
61
|
+
if (whereClause)
|
|
62
|
+
vectorQuery.where(whereClause);
|
|
63
|
+
const vectorResults = await vectorQuery.toArray();
|
|
64
|
+
return this._hybridRerank(ftsResults, vectorResults, limit);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Performs a standard vector-only search.
|
|
68
|
+
*/
|
|
69
|
+
async _performVectorSearch(queryVector, limit, whereClause) {
|
|
70
|
+
const query = this.table.vectorSearch(queryVector).distanceType('dot').limit(limit);
|
|
71
|
+
if (whereClause)
|
|
72
|
+
query.where(whereClause);
|
|
73
|
+
return query.toArray();
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Filters results based on the configured maximum distance.
|
|
77
|
+
*/
|
|
78
|
+
_applyDistanceFilter(results) {
|
|
79
|
+
const maxDistance = this.config.maxDistance;
|
|
80
|
+
if (maxDistance !== undefined) {
|
|
81
|
+
return results.filter((r) => r.score <= maxDistance);
|
|
82
|
+
}
|
|
83
|
+
return results;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Groups results by relevance gaps to improve quality.
|
|
87
|
+
*/
|
|
88
|
+
_applyGrouping(results) {
|
|
89
|
+
if (!this.config.grouping || results.length <= 1) {
|
|
90
|
+
return results;
|
|
91
|
+
}
|
|
92
|
+
const gaps = [];
|
|
93
|
+
for (let i = 0; i < results.length - 1; i++) {
|
|
94
|
+
gaps.push({ index: i + 1, gap: results[i + 1].score - results[i].score });
|
|
95
|
+
}
|
|
96
|
+
const gapValues = gaps.map((g) => g.gap);
|
|
97
|
+
const mean = gapValues.reduce((a, b) => a + b, 0) / gapValues.length;
|
|
98
|
+
const std = Math.sqrt(gapValues.map((x) => (x - mean) ** 2).reduce((a, b) => a + b, 0) / gapValues.length);
|
|
99
|
+
const threshold = mean + GROUPING_BOUNDARY_STD_MULTIPLIER * std;
|
|
100
|
+
const boundaries = gaps.filter((g) => g.gap > threshold).map((g) => g.index);
|
|
101
|
+
if (boundaries.length === 0)
|
|
102
|
+
return results;
|
|
103
|
+
const groupsToInclude = this.config.grouping === 'similar' ? 1 : 2;
|
|
104
|
+
const cutoffIndex = boundaries[groupsToInclude - 1];
|
|
105
|
+
return cutoffIndex !== undefined ? results.slice(0, cutoffIndex) : results;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Reranks and merges FTS and vector search results.
|
|
109
|
+
*/
|
|
110
|
+
_hybridRerank(ftsResults, vectorResults, limit) {
|
|
111
|
+
const scoreMap = new Map();
|
|
112
|
+
const bm25Weight = this.config.hybridWeight ?? 0.6;
|
|
113
|
+
const vectorWeight = 1.0 - bm25Weight;
|
|
114
|
+
for (const result of vectorResults) {
|
|
115
|
+
const key = `${result['filePath']}:${result['chunkIndex']}`;
|
|
116
|
+
const distance = result['_distance'] ?? DOT_PRODUCT_MAX_DISTANCE;
|
|
117
|
+
const vectorScore = Math.max(0, 1 - distance / DOT_PRODUCT_MAX_DISTANCE);
|
|
118
|
+
scoreMap.set(key, { result, score: vectorScore * vectorWeight });
|
|
119
|
+
}
|
|
120
|
+
for (let i = 0; i < ftsResults.length; i++) {
|
|
121
|
+
const result = ftsResults[i];
|
|
122
|
+
const key = `${result['filePath']}:${result['chunkIndex']}`;
|
|
123
|
+
const ftsScore = 1 - i / (ftsResults.length || 1);
|
|
124
|
+
const entry = scoreMap.get(key);
|
|
125
|
+
if (entry) {
|
|
126
|
+
entry.score += ftsScore * bm25Weight;
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
scoreMap.set(key, { result, score: ftsScore * bm25Weight });
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return Array.from(scoreMap.values())
|
|
133
|
+
.sort((a, b) => b.score - a.score)
|
|
134
|
+
.slice(0, limit)
|
|
135
|
+
.map((item) => ({ ...item.result, _distance: 1 - item.score }));
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Constructs a SQL WHERE clause from the filter object.
|
|
140
|
+
*/
|
|
141
|
+
export function buildWhereClause(filters) {
|
|
142
|
+
if (!filters)
|
|
143
|
+
return undefined;
|
|
144
|
+
const conditions = [];
|
|
145
|
+
if (filters.type)
|
|
146
|
+
conditions.push(`metadata.memoryType = '${filters.type}'`);
|
|
147
|
+
if (filters.project)
|
|
148
|
+
conditions.push(`metadata.project = '${filters.project.replace(/'/g, "''")}'`);
|
|
149
|
+
if (filters.fileName)
|
|
150
|
+
conditions.push(`metadata.fileName = '${filters.fileName.replace(/'/g, "''")}'`);
|
|
151
|
+
if (filters.tags && filters.tags.length > 0) {
|
|
152
|
+
const tagsList = `[${filters.tags.map((t) => `'${t.replace(/'/g, "''")}'`).join(', ')}]`;
|
|
153
|
+
conditions.push(`array_has_all(metadata.tags, ${tagsList})`);
|
|
154
|
+
}
|
|
155
|
+
return conditions.length > 0 ? conditions.join(' AND ') : undefined;
|
|
156
|
+
}
|
|
157
|
+
//# sourceMappingURL=retriever.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retriever.js","sourceRoot":"","sources":["../../../src/local-rag/vectordb/retriever.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,cAAc,CAAA;AAC7D,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AAKxC,wCAAwC;AACxC,MAAM,gCAAgC,GAAG,GAAG,CAAA;AAC5C,MAAM,kCAAkC,GAAG,CAAC,CAAA;AAC5C,MAAM,wBAAwB,GAAG,CAAC,CAAA,CAAC,wDAAwD;AAS3F;;GAEG;AACH,MAAM,OAAO,SAAS;IACH,KAAK,CAAO;IACZ,MAAM,CAAiB;IAExC,YAAY,KAAY,EAAE,MAAuB;QAC/C,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;QAClB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAEM,aAAa,CAAC,OAAgB;QACnC,IAAI,CAAC,MAAM,CAAC,UAAU,GAAG,OAAO,CAAA;IAClC,CAAC;IAED;;;;;;;OAOG;IACI,KAAK,CAAC,MAAM,CACjB,WAAqB,EACrB,SAAiB,EACjB,KAAK,GAAG,EAAE,EACV,OAAsB;QAEtB,IAAI,KAAK,GAAG,CAAC,IAAI,KAAK,GAAG,EAAE,EAAE,CAAC;YAC5B,MAAM,IAAI,eAAe,CAAC,qCAAqC,KAAK,EAAE,CAAC,CAAA;QACzE,CAAC;QAED,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAA;YAC7C,IAAI,UAAqC,CAAA;YAEzC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;YAChH,IAAI,SAAS,EAAE,CAAC;gBACd,UAAU,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,WAAW,CAAC,CAAA;YAC1F,CAAC;iBAAM,CAAC;gBACN,UAAU,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,WAAW,EAAE,KAAK,EAAE,WAAW,CAAC,CAAA;YAC/E,CAAC;YAED,IAAI,OAAO,GAAkB,UAAU,CAAC,GAAG,CAAC,UAAU,CAAC,aAAa,CAAC,CAAA;YAErE,OAAO,GAAG,IAAI,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAA;YAC5C,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAA;YAEtC,OAAO,OAAO,CAAA;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,aAAa,CAAC,2BAA2B,EAAE,KAAK,CAAC,CAAA;QAC7D,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,oBAAoB,CAChC,WAAqB,EACrB,SAAiB,EACjB,KAAa,EACb,WAAoB;QAEpB,MAAM,cAAc,GAAG,KAAK,GAAG,kCAAkC,CAAA;QAEjE,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,EAAE,KAAK,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,cAAc,CAAC,CAAA;QAClF,IAAI,WAAW;YAAE,QAAQ,CAAC,KAAK,CAAC,WAAW,CAAC,CAAA;QAC5C,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,OAAO,EAAE,CAAA;QAE3C,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,cAAc,CAAC,CAAA;QAClG,IAAI,WAAW;YAAE,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,CAAA;QAC/C,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,OAAO,EAAE,CAAA;QAEjD,OAAO,IAAI,CAAC,aAAa,CAAC,UAAU,EAAE,aAAa,EAAE,KAAK,CAAC,CAAA;IAC7D,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,oBAAoB,CAChC,WAAqB,EACrB,KAAa,EACb,WAAoB;QAEpB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;QACnF,IAAI,WAAW;YAAE,KAAK,CAAC,KAAK,CAAC,WAAW,CAAC,CAAA;QACzC,OAAO,KAAK,CAAC,OAAO,EAAE,CAAA;IACxB,CAAC;IAED;;OAEG;IACK,oBAAoB,CAAC,OAAsB;QACjD,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,CAAA;QAC3C,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;YAC9B,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,WAAW,CAAC,CAAA;QACtD,CAAC;QACD,OAAO,OAAO,CAAA;IAChB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,OAAsB;QAC3C,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACjD,OAAO,OAAO,CAAA;QAChB,CAAC;QAED,MAAM,IAAI,GAAqC,EAAE,CAAA;QACjD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,IAAI,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAA;QAC3E,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;QACxC,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAA;QACpE,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,CAAA;QAC1G,MAAM,SAAS,GAAG,IAAI,GAAG,gCAAgC,GAAG,GAAG,CAAA;QAE/D,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAA;QAC5E,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,OAAO,CAAA;QAE3C,MAAM,eAAe,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;QAClE,MAAM,WAAW,GAAG,UAAU,CAAC,eAAe,GAAG,CAAC,CAAC,CAAA;QAEnD,OAAO,WAAW,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAA;IAC5E,CAAC;IAED;;OAEG;IACK,aAAa,CACnB,UAAqC,EACrC,aAAwC,EACxC,KAAa;QAEb,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA8D,CAAA;QACtF,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC,YAAY,IAAI,GAAG,CAAA;QAClD,MAAM,YAAY,GAAG,GAAG,GAAG,UAAU,CAAA;QAErC,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;YACnC,MAAM,GAAG,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,IAAI,MAAM,CAAC,YAAY,CAAC,EAAE,CAAA;YAC3D,MAAM,QAAQ,GAAI,MAAM,CAAC,WAAW,CAAY,IAAI,wBAAwB,CAAA;YAC5E,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,QAAQ,GAAG,wBAAwB,CAAC,CAAA;YACxE,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,WAAW,GAAG,YAAY,EAAE,CAAC,CAAA;QAClE,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC,CAAA;YAC5B,MAAM,GAAG,GAAG,GAAG,MAAM,CAAC,UAAU,CAAC,IAAI,MAAM,CAAC,YAAY,CAAC,EAAE,CAAA;YAC3D,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC,CAAA;YACjD,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;YAC/B,IAAI,KAAK,EAAE,CAAC;gBACV,KAAK,CAAC,KAAK,IAAI,QAAQ,GAAG,UAAU,CAAA;YACtC,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,GAAG,UAAU,EAAE,CAAC,CAAA;YAC7D,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;aACjC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;aACjC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;aACf,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAA;IACnE,CAAC;CACF;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAsB;IACrD,IAAI,CAAC,OAAO;QAAE,OAAO,SAAS,CAAA;IAC9B,MAAM,UAAU,GAAa,EAAE,CAAA;IAE/B,IAAI,OAAO,CAAC,IAAI;QAAE,UAAU,CAAC,IAAI,CAAC,0BAA0B,OAAO,CAAC,IAAI,GAAG,CAAC,CAAA;IAC5E,IAAI,OAAO,CAAC,OAAO;QAAE,UAAU,CAAC,IAAI,CAAC,uBAAuB,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAA;IACnG,IAAI,OAAO,CAAC,QAAQ;QAAE,UAAU,CAAC,IAAI,CAAC,wBAAwB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAA;IACtG,IAAI,OAAO,CAAC,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5C,MAAM,QAAQ,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAA;QACxF,UAAU,CAAC,IAAI,CAAC,gCAAgC,QAAQ,GAAG,CAAC,CAAA;IAC9D,CAAC;IAED,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;AACrE,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import * as arrow from 'apache-arrow';
|
|
2
|
+
import type { DocumentMetadata, QueryResult, VectorChunk } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Defines the robust Arrow schema for the vector table.
|
|
5
|
+
* This schema serves as the single source of truth for the table structure.
|
|
6
|
+
* @returns {arrow.Schema} The Apache Arrow schema definition.
|
|
7
|
+
*/
|
|
8
|
+
export declare function getDocumentsSchema(): arrow.Schema;
|
|
9
|
+
/**
|
|
10
|
+
* A utility class for safely mapping raw database records to strictly-typed application objects.
|
|
11
|
+
* Handles type conversions and guards against undefined or malformed data.
|
|
12
|
+
*/
|
|
13
|
+
export declare class DataMapper {
|
|
14
|
+
/**
|
|
15
|
+
* Safely converts a raw metadata object from the database into a strict DocumentMetadata type.
|
|
16
|
+
* @param raw - The raw metadata object, typically from a LanceDB record.
|
|
17
|
+
* @returns A structured DocumentMetadata object.
|
|
18
|
+
*/
|
|
19
|
+
static toDocumentMetadata(raw: unknown): DocumentMetadata;
|
|
20
|
+
/**
|
|
21
|
+
* Converts a raw database record into a QueryResult object.
|
|
22
|
+
* @param raw - The raw database record.
|
|
23
|
+
* @returns A structured QueryResult object.
|
|
24
|
+
*/
|
|
25
|
+
static toQueryResult(raw: Record<string, unknown>): QueryResult;
|
|
26
|
+
/**
|
|
27
|
+
* Converts a raw database record into a VectorChunk object.
|
|
28
|
+
* @param raw - The raw database record.
|
|
29
|
+
* @returns A structured VectorChunk object.
|
|
30
|
+
*/
|
|
31
|
+
static toVectorChunk(raw: Record<string, unknown>): VectorChunk;
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/local-rag/vectordb/schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,cAAc,CAAA;AAErC,OAAO,KAAK,EAAE,gBAAgB,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AAE7E;;;;GAIG;AACH,wBAAgB,kBAAkB,IAAI,KAAK,CAAC,MAAM,CA2BjD;AAED;;;GAGG;AACH,qBAAa,UAAU;IACrB;;;;OAIG;WACW,kBAAkB,CAAC,GAAG,EAAE,OAAO,GAAG,gBAAgB;IA6BhE;;;;OAIG;WACW,aAAa,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,WAAW;IAUtE;;;;OAIG;WACW,aAAa,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,WAAW;CAWvE"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import * as arrow from 'apache-arrow';
|
|
2
|
+
/**
|
|
3
|
+
* Defines the robust Arrow schema for the vector table.
|
|
4
|
+
* This schema serves as the single source of truth for the table structure.
|
|
5
|
+
* @returns {arrow.Schema} The Apache Arrow schema definition.
|
|
6
|
+
*/
|
|
7
|
+
export function getDocumentsSchema() {
|
|
8
|
+
const metadataFields = [
|
|
9
|
+
new arrow.Field('fileName', new arrow.Utf8(), false),
|
|
10
|
+
new arrow.Field('fileSize', new arrow.Int32(), false),
|
|
11
|
+
new arrow.Field('fileType', new arrow.Utf8(), false),
|
|
12
|
+
new arrow.Field('language', new arrow.Utf8(), true),
|
|
13
|
+
new arrow.Field('memoryType', new arrow.Utf8(), true),
|
|
14
|
+
new arrow.Field('tags', new arrow.List(new arrow.Field('item', new arrow.Utf8(), false)), true),
|
|
15
|
+
new arrow.Field('project', new arrow.Utf8(), true),
|
|
16
|
+
new arrow.Field('expiresAt', new arrow.Utf8(), true),
|
|
17
|
+
new arrow.Field('createdAt', new arrow.Utf8(), false),
|
|
18
|
+
new arrow.Field('updatedAt', new arrow.Utf8(), false),
|
|
19
|
+
new arrow.Field('sourceUrl', new arrow.Utf8(), true),
|
|
20
|
+
new arrow.Field('author', new arrow.Utf8(), true),
|
|
21
|
+
new arrow.Field('fileCreatedAt', new arrow.Utf8(), true),
|
|
22
|
+
new arrow.Field('fileModifiedAt', new arrow.Utf8(), true),
|
|
23
|
+
];
|
|
24
|
+
return new arrow.Schema([
|
|
25
|
+
new arrow.Field('id', new arrow.Utf8(), false),
|
|
26
|
+
new arrow.Field('filePath', new arrow.Utf8(), false),
|
|
27
|
+
new arrow.Field('chunkIndex', new arrow.Int32(), false),
|
|
28
|
+
new arrow.Field('text', new arrow.Utf8(), false),
|
|
29
|
+
new arrow.Field('vector', new arrow.FixedSizeList(384, new arrow.Field('item', new arrow.Float32(), false)), false),
|
|
30
|
+
new arrow.Field('metadata', new arrow.Struct(metadataFields), true),
|
|
31
|
+
new arrow.Field('timestamp', new arrow.Utf8(), false),
|
|
32
|
+
]);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* A utility class for safely mapping raw database records to strictly-typed application objects.
|
|
36
|
+
* Handles type conversions and guards against undefined or malformed data.
|
|
37
|
+
*/
|
|
38
|
+
export class DataMapper {
|
|
39
|
+
/**
|
|
40
|
+
* Safely converts a raw metadata object from the database into a strict DocumentMetadata type.
|
|
41
|
+
* @param raw - The raw metadata object, typically from a LanceDB record.
|
|
42
|
+
* @returns A structured DocumentMetadata object.
|
|
43
|
+
*/
|
|
44
|
+
static toDocumentMetadata(raw) {
|
|
45
|
+
const data = raw;
|
|
46
|
+
// LanceDB can return Arrow vectors for lists, so we need to safely convert them.
|
|
47
|
+
const rawTags = data.tags;
|
|
48
|
+
let tags = [];
|
|
49
|
+
if (Array.isArray(rawTags)) {
|
|
50
|
+
tags = rawTags;
|
|
51
|
+
}
|
|
52
|
+
else if (rawTags && typeof rawTags.toArray === 'function') {
|
|
53
|
+
tags = rawTags.toArray();
|
|
54
|
+
}
|
|
55
|
+
return {
|
|
56
|
+
fileName: String(data.fileName ?? ''),
|
|
57
|
+
fileSize: typeof data.fileSize === 'number' ? data.fileSize : 0,
|
|
58
|
+
fileType: String(data.fileType ?? ''),
|
|
59
|
+
language: typeof data.language === 'string' ? data.language : undefined,
|
|
60
|
+
tags,
|
|
61
|
+
project: typeof data.project === 'string' ? data.project : undefined,
|
|
62
|
+
memoryType: data.memoryType === 'text' || data.memoryType === 'file' || data.memoryType === 'url'
|
|
63
|
+
? data.memoryType
|
|
64
|
+
: undefined,
|
|
65
|
+
expiresAt: typeof data.expiresAt === 'string' ? data.expiresAt : undefined,
|
|
66
|
+
createdAt: String(data.createdAt ?? new Date().toISOString()),
|
|
67
|
+
updatedAt: String(data.updatedAt ?? new Date().toISOString()),
|
|
68
|
+
sourceUrl: typeof data.sourceUrl === 'string' ? data.sourceUrl : undefined,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Converts a raw database record into a QueryResult object.
|
|
73
|
+
* @param raw - The raw database record.
|
|
74
|
+
* @returns A structured QueryResult object.
|
|
75
|
+
*/
|
|
76
|
+
static toQueryResult(raw) {
|
|
77
|
+
return {
|
|
78
|
+
filePath: String(raw.filePath ?? ''),
|
|
79
|
+
chunkIndex: raw.chunkIndex,
|
|
80
|
+
text: String(raw.text ?? ''),
|
|
81
|
+
score: raw._distance,
|
|
82
|
+
metadata: DataMapper.toDocumentMetadata(raw.metadata),
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Converts a raw database record into a VectorChunk object.
|
|
87
|
+
* @param raw - The raw database record.
|
|
88
|
+
* @returns A structured VectorChunk object.
|
|
89
|
+
*/
|
|
90
|
+
static toVectorChunk(raw) {
|
|
91
|
+
return {
|
|
92
|
+
id: String(raw.id ?? ''),
|
|
93
|
+
filePath: String(raw.filePath ?? ''),
|
|
94
|
+
chunkIndex: raw.chunkIndex,
|
|
95
|
+
text: String(raw.text ?? ''),
|
|
96
|
+
vector: Array.from(raw.vector ?? []),
|
|
97
|
+
metadata: this.toDocumentMetadata(raw.metadata),
|
|
98
|
+
timestamp: String(raw.timestamp ?? ''),
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../../src/local-rag/vectordb/schema.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,cAAc,CAAA;AAIrC;;;;GAIG;AACH,MAAM,UAAU,kBAAkB;IAChC,MAAM,cAAc,GAAG;QACrB,IAAI,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,KAAK,CAAC;QACpD,IAAI,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,KAAK,CAAC,KAAK,EAAE,EAAE,KAAK,CAAC;QACrD,IAAI,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,KAAK,CAAC;QACpD,IAAI,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC;QACnD,IAAI,KAAK,CAAC,KAAK,CAAC,YAAY,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC;QACrD,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;QAC/F,IAAI,KAAK,CAAC,KAAK,CAAC,SAAS,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC;QAClD,IAAI,KAAK,CAAC,KAAK,CAAC,WAAW,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC;QACpD,IAAI,KAAK,CAAC,KAAK,CAAC,WAAW,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,KAAK,CAAC;QACrD,IAAI,KAAK,CAAC,KAAK,CAAC,WAAW,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,KAAK,CAAC;QACrD,IAAI,KAAK,CAAC,KAAK,CAAC,WAAW,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC;QACpD,IAAI,KAAK,CAAC,KAAK,CAAC,QAAQ,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC;QACjD,IAAI,KAAK,CAAC,KAAK,CAAC,eAAe,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC;QACxD,IAAI,KAAK,CAAC,KAAK,CAAC,gBAAgB,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC;KAC1D,CAAA;IAED,OAAO,IAAI,KAAK,CAAC,MAAM,CAAC;QACtB,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,KAAK,CAAC;QAC9C,IAAI,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,KAAK,CAAC;QACpD,IAAI,KAAK,CAAC,KAAK,CAAC,YAAY,EAAE,IAAI,KAAK,CAAC,KAAK,EAAE,EAAE,KAAK,CAAC;QACvD,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,KAAK,CAAC;QAChD,IAAI,KAAK,CAAC,KAAK,CAAC,QAAQ,EAAE,IAAI,KAAK,CAAC,aAAa,CAAC,GAAG,EAAE,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC;QACnH,IAAI,KAAK,CAAC,KAAK,CAAC,UAAU,EAAE,IAAI,KAAK,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE,IAAI,CAAC;QACnE,IAAI,KAAK,CAAC,KAAK,CAAC,WAAW,EAAE,IAAI,KAAK,CAAC,IAAI,EAAE,EAAE,KAAK,CAAC;KACtD,CAAC,CAAA;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,OAAO,UAAU;IACrB;;;;OAIG;IACI,MAAM,CAAC,kBAAkB,CAAC,GAAY;QAC3C,MAAM,IAAI,GAAG,GAA8B,CAAA;QAC3C,iFAAiF;QACjF,MAAM,OAAO,GAAG,IAAI,CAAC,IAAe,CAAA;QACpC,IAAI,IAAI,GAAa,EAAE,CAAA;QACvB,IAAI,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3B,IAAI,GAAG,OAAO,CAAA;QAChB,CAAC;aAAM,IAAI,OAAO,IAAI,OAAQ,OAAwC,CAAC,OAAO,KAAK,UAAU,EAAE,CAAC;YAC9F,IAAI,GAAI,OAAuC,CAAC,OAAO,EAAE,CAAA;QAC3D,CAAC;QAED,OAAO;YACL,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;YACrC,QAAQ,EAAE,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAC/D,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;YACrC,QAAQ,EAAE,OAAO,IAAI,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;YACvE,IAAI;YACJ,OAAO,EAAE,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS;YACpE,UAAU,EACR,IAAI,CAAC,UAAU,KAAK,MAAM,IAAI,IAAI,CAAC,UAAU,KAAK,MAAM,IAAI,IAAI,CAAC,UAAU,KAAK,KAAK;gBACnF,CAAC,CAAC,IAAI,CAAC,UAAU;gBACjB,CAAC,CAAC,SAAS;YACf,SAAS,EAAE,OAAO,IAAI,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;YAC1E,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC7D,SAAS,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YAC7D,SAAS,EAAE,OAAO,IAAI,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;SAC3E,CAAA;IACH,CAAC;IAED;;;;OAIG;IACI,MAAM,CAAC,aAAa,CAAC,GAA4B;QACtD,OAAO;YACL,QAAQ,EAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,IAAI,EAAE,CAAC;YACpC,UAAU,EAAE,GAAG,CAAC,UAAoB;YACpC,IAAI,EAAE,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC;YAC5B,KAAK,EAAE,GAAG,CAAC,SAAmB;YAC9B,QAAQ,EAAE,UAAU,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC;SACtD,CAAA;IACH,CAAC;IAED;;;;OAIG;IACI,MAAM,CAAC,aAAa,CAAC,GAA4B;QACtD,OAAO;YACL,EAAE,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,CAAC;YACxB,QAAQ,EAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,IAAI,EAAE,CAAC;YACpC,UAAU,EAAE,GAAG,CAAC,UAAoB;YACpC,IAAI,EAAE,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC;YAC5B,MAAM,EAAE,KAAK,CAAC,IAAI,CAAE,GAAG,CAAC,MAAuB,IAAI,EAAE,CAAC;YACtD,QAAQ,EAAE,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC;YAC/C,SAAS,EAAE,MAAM,CAAC,GAAG,CAAC,SAAS,IAAI,EAAE,CAAC;SACvC,CAAA;IACH,CAAC;CACF"}
|