agentlang 0.10.3 → 0.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -9
- package/out/extension/main.cjs +250 -250
- package/out/extension/main.cjs.map +2 -2
- package/out/language/generated/ast.d.ts +7 -7
- package/out/language/generated/ast.d.ts.map +1 -1
- package/out/language/generated/grammar.js +2 -2
- package/out/language/main.cjs +506 -506
- package/out/language/main.cjs.map +3 -3
- package/out/language/parser.d.ts.map +1 -1
- package/out/language/parser.js +9 -2
- package/out/language/parser.js.map +1 -1
- package/out/language/syntax.d.ts +2 -2
- package/out/language/syntax.d.ts.map +1 -1
- package/out/runtime/api.d.ts.map +1 -1
- package/out/runtime/api.js +0 -14
- package/out/runtime/api.js.map +1 -1
- package/out/runtime/defs.d.ts +0 -1
- package/out/runtime/defs.d.ts.map +1 -1
- package/out/runtime/defs.js +1 -2
- package/out/runtime/defs.js.map +1 -1
- package/out/runtime/embeddings/chunker.d.ts +0 -18
- package/out/runtime/embeddings/chunker.d.ts.map +1 -1
- package/out/runtime/embeddings/chunker.js +15 -47
- package/out/runtime/embeddings/chunker.js.map +1 -1
- package/out/runtime/embeddings/openai.d.ts.map +1 -1
- package/out/runtime/embeddings/openai.js +11 -22
- package/out/runtime/embeddings/openai.js.map +1 -1
- package/out/runtime/embeddings/provider.d.ts +0 -1
- package/out/runtime/embeddings/provider.d.ts.map +1 -1
- package/out/runtime/embeddings/provider.js +1 -20
- package/out/runtime/embeddings/provider.js.map +1 -1
- package/out/runtime/exec-graph.js +5 -5
- package/out/runtime/exec-graph.js.map +1 -1
- package/out/runtime/interpreter.d.ts +4 -4
- package/out/runtime/interpreter.d.ts.map +1 -1
- package/out/runtime/interpreter.js +27 -16
- package/out/runtime/interpreter.js.map +1 -1
- package/out/runtime/loader.d.ts.map +1 -1
- package/out/runtime/loader.js +6 -2
- package/out/runtime/loader.js.map +1 -1
- package/out/runtime/logger.d.ts.map +1 -1
- package/out/runtime/logger.js +1 -8
- package/out/runtime/logger.js.map +1 -1
- package/out/runtime/module.d.ts +0 -6
- package/out/runtime/module.d.ts.map +1 -1
- package/out/runtime/module.js +1 -58
- package/out/runtime/module.js.map +1 -1
- package/out/runtime/modules/ai.d.ts +4 -4
- package/out/runtime/modules/ai.d.ts.map +1 -1
- package/out/runtime/modules/ai.js +70 -166
- package/out/runtime/modules/ai.js.map +1 -1
- package/out/runtime/modules/auth.d.ts.map +1 -1
- package/out/runtime/modules/auth.js +6 -4
- package/out/runtime/modules/auth.js.map +1 -1
- package/out/runtime/modules/core.d.ts.map +1 -1
- package/out/runtime/modules/core.js +3 -0
- package/out/runtime/modules/core.js.map +1 -1
- package/out/runtime/modules/messaging.d.ts +10 -0
- package/out/runtime/modules/messaging.d.ts.map +1 -0
- package/out/runtime/modules/messaging.js +210 -0
- package/out/runtime/modules/messaging.js.map +1 -0
- package/out/runtime/monitor.d.ts +2 -1
- package/out/runtime/monitor.d.ts.map +1 -1
- package/out/runtime/monitor.js +5 -1
- package/out/runtime/monitor.js.map +1 -1
- package/out/runtime/resolvers/sqldb/database.d.ts.map +1 -1
- package/out/runtime/resolvers/sqldb/database.js +126 -128
- package/out/runtime/resolvers/sqldb/database.js.map +1 -1
- package/out/runtime/resolvers/sqldb/impl.d.ts +0 -1
- package/out/runtime/resolvers/sqldb/impl.d.ts.map +1 -1
- package/out/runtime/resolvers/sqldb/impl.js +0 -3
- package/out/runtime/resolvers/sqldb/impl.js.map +1 -1
- package/out/runtime/services/documentFetcher.d.ts.map +1 -1
- package/out/runtime/services/documentFetcher.js +6 -21
- package/out/runtime/services/documentFetcher.js.map +1 -1
- package/out/runtime/state.d.ts +0 -14
- package/out/runtime/state.d.ts.map +1 -1
- package/out/runtime/state.js +0 -28
- package/out/runtime/state.js.map +1 -1
- package/package.json +19 -19
- package/src/language/agentlang.langium +2 -2
- package/src/language/generated/ast.ts +7 -7
- package/src/language/generated/grammar.ts +2 -2
- package/src/language/parser.ts +9 -2
- package/src/language/syntax.ts +2 -2
- package/src/runtime/api.ts +0 -15
- package/src/runtime/defs.ts +1 -2
- package/src/runtime/embeddings/chunker.ts +14 -52
- package/src/runtime/embeddings/openai.ts +9 -27
- package/src/runtime/embeddings/provider.ts +1 -22
- package/src/runtime/exec-graph.ts +4 -4
- package/src/runtime/interpreter.ts +32 -17
- package/src/runtime/loader.ts +10 -2
- package/src/runtime/logger.ts +1 -12
- package/src/runtime/module.ts +1 -64
- package/src/runtime/modules/ai.ts +81 -206
- package/src/runtime/modules/auth.ts +6 -4
- package/src/runtime/modules/core.ts +4 -0
- package/src/runtime/modules/messaging.ts +228 -0
- package/src/runtime/monitor.ts +10 -1
- package/src/runtime/resolvers/sqldb/database.ts +130 -142
- package/src/runtime/resolvers/sqldb/impl.ts +0 -4
- package/src/runtime/services/documentFetcher.ts +6 -21
- package/src/runtime/state.ts +0 -29
- package/out/runtime/document-retriever.d.ts +0 -24
- package/out/runtime/document-retriever.d.ts.map +0 -1
- package/out/runtime/document-retriever.js +0 -258
- package/out/runtime/document-retriever.js.map +0 -1
- package/out/runtime/resolvers/vector/lancedb-store.d.ts +0 -16
- package/out/runtime/resolvers/vector/lancedb-store.d.ts.map +0 -1
- package/out/runtime/resolvers/vector/lancedb-store.js +0 -159
- package/out/runtime/resolvers/vector/lancedb-store.js.map +0 -1
- package/out/runtime/resolvers/vector/types.d.ts +0 -32
- package/out/runtime/resolvers/vector/types.d.ts.map +0 -1
- package/out/runtime/resolvers/vector/types.js +0 -2
- package/out/runtime/resolvers/vector/types.js.map +0 -1
- package/src/runtime/document-retriever.ts +0 -311
- package/src/runtime/resolvers/vector/lancedb-store.ts +0 -187
- package/src/runtime/resolvers/vector/types.ts +0 -39
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
export interface VectorRecord {
|
|
2
|
-
id: string;
|
|
3
|
-
embedding: number[];
|
|
4
|
-
tenantId?: string;
|
|
5
|
-
agentId?: string;
|
|
6
|
-
documentId?: string;
|
|
7
|
-
metadata?: Record<string, any>;
|
|
8
|
-
}
|
|
9
|
-
export interface SearchResult {
|
|
10
|
-
id: string;
|
|
11
|
-
distance: number;
|
|
12
|
-
tenantId?: string;
|
|
13
|
-
agentId?: string;
|
|
14
|
-
documentId?: string;
|
|
15
|
-
metadata?: Record<string, any>;
|
|
16
|
-
}
|
|
17
|
-
export interface VectorStore {
|
|
18
|
-
init(): Promise<void>;
|
|
19
|
-
addEmbedding(record: VectorRecord): Promise<void>;
|
|
20
|
-
addEmbeddings(records: VectorRecord[]): Promise<void>;
|
|
21
|
-
search(embedding: number[], tenantId?: string, agentId?: string, limit?: number): Promise<SearchResult[]>;
|
|
22
|
-
delete(id: string): Promise<void>;
|
|
23
|
-
exists(id: string): Promise<boolean>;
|
|
24
|
-
close(): Promise<void>;
|
|
25
|
-
}
|
|
26
|
-
export interface VectorStoreConfig {
|
|
27
|
-
dbname?: string;
|
|
28
|
-
moduleName: string;
|
|
29
|
-
vectorDimension: number;
|
|
30
|
-
inMemory?: boolean;
|
|
31
|
-
}
|
|
32
|
-
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../../src/runtime/resolvers/vector/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACtB,YAAY,CAAC,MAAM,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClD,aAAa,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,MAAM,CACJ,SAAS,EAAE,MAAM,EAAE,EACnB,QAAQ,CAAC,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,MAAM,EAChB,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IAC3B,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IACrC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/runtime/resolvers/vector/types.ts"],"names":[],"mappings":""}
|
|
@@ -1,311 +0,0 @@
|
|
|
1
|
-
import { logger } from './logger.js';
|
|
2
|
-
import { AppConfig } from './state.js';
|
|
3
|
-
import { TextChunker } from './embeddings/chunker.js';
|
|
4
|
-
import { OpenAIEmbeddingProvider } from './embeddings/openai.js';
|
|
5
|
-
import { LanceDBVectorStore } from './resolvers/vector/lancedb-store.js';
|
|
6
|
-
import type { VectorStore } from './resolvers/vector/types.js';
|
|
7
|
-
import crypto from 'crypto';
|
|
8
|
-
import { readFileSync } from 'fs';
|
|
9
|
-
import { resolve as pathResolve } from 'path';
|
|
10
|
-
|
|
11
|
-
const VECTOR_DIMENSION = 1536;
|
|
12
|
-
|
|
13
|
-
interface LocalChunk {
|
|
14
|
-
id: string;
|
|
15
|
-
content: string;
|
|
16
|
-
documentTitle: string;
|
|
17
|
-
chunkIndex: number;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
function usePgvector(): boolean {
|
|
21
|
-
if (AppConfig?.vectorStore?.type === 'pgvector') return true;
|
|
22
|
-
if (AppConfig?.vectorStore?.type === 'lancedb') return false;
|
|
23
|
-
return AppConfig?.store?.type === 'postgres';
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Local document retriever — embeds documents into pgvector or LanceDB
|
|
28
|
-
* and retrieves relevant chunks via vector similarity search.
|
|
29
|
-
*/
|
|
30
|
-
class DocumentRetriever {
|
|
31
|
-
private vectorStore: VectorStore | null = null;
|
|
32
|
-
private embeddingProvider: OpenAIEmbeddingProvider | null = null;
|
|
33
|
-
private chunker: TextChunker | null = null;
|
|
34
|
-
private localChunks: Map<string, LocalChunk> = new Map();
|
|
35
|
-
private processedDocuments: Set<string> = new Set();
|
|
36
|
-
private initialized = false;
|
|
37
|
-
|
|
38
|
-
private async ensureInit(): Promise<void> {
|
|
39
|
-
if (this.initialized) return;
|
|
40
|
-
|
|
41
|
-
this.chunker = new TextChunker(1000, 200);
|
|
42
|
-
this.embeddingProvider = new OpenAIEmbeddingProvider({
|
|
43
|
-
model: 'text-embedding-3-small',
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
if (!usePgvector()) {
|
|
47
|
-
const dbPath =
|
|
48
|
-
AppConfig?.vectorStore?.type === 'lancedb'
|
|
49
|
-
? (AppConfig.vectorStore as any).dbname || './data/document-vectors.lance'
|
|
50
|
-
: './data/document-vectors.lance';
|
|
51
|
-
|
|
52
|
-
this.vectorStore = new LanceDBVectorStore({
|
|
53
|
-
moduleName: 'documents',
|
|
54
|
-
vectorDimension: VECTOR_DIMENSION,
|
|
55
|
-
dbname: dbPath,
|
|
56
|
-
});
|
|
57
|
-
await this.vectorStore.init();
|
|
58
|
-
logger.info(`[DOCUMENT-RETRIEVER] LanceDB vector store initialized at ${dbPath}`);
|
|
59
|
-
} else {
|
|
60
|
-
try {
|
|
61
|
-
const ag = (globalThis as any).agentlang;
|
|
62
|
-
if (ag?.rawQuery) {
|
|
63
|
-
await ag.rawQuery(`
|
|
64
|
-
CREATE TABLE IF NOT EXISTS document_local_chunks (
|
|
65
|
-
id TEXT PRIMARY KEY,
|
|
66
|
-
content TEXT NOT NULL,
|
|
67
|
-
document_title TEXT NOT NULL,
|
|
68
|
-
chunk_index INTEGER NOT NULL,
|
|
69
|
-
embedding vector(${VECTOR_DIMENSION})
|
|
70
|
-
)
|
|
71
|
-
`);
|
|
72
|
-
try {
|
|
73
|
-
await ag.rawQuery(`
|
|
74
|
-
CREATE INDEX IF NOT EXISTS idx_document_local_chunks_embedding
|
|
75
|
-
ON document_local_chunks USING hnsw (embedding vector_cosine_ops)
|
|
76
|
-
`);
|
|
77
|
-
} catch {
|
|
78
|
-
// Index may already exist or pgvector extension not loaded
|
|
79
|
-
}
|
|
80
|
-
logger.info('[DOCUMENT-RETRIEVER] pgvector local chunks table initialized');
|
|
81
|
-
}
|
|
82
|
-
} catch (err) {
|
|
83
|
-
logger.warn(`[DOCUMENT-RETRIEVER] Failed to initialize pgvector table: ${err}`);
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
this.initialized = true;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
async processDocument(title: string, url: string): Promise<void> {
|
|
91
|
-
if (this.processedDocuments.has(title)) return;
|
|
92
|
-
|
|
93
|
-
await this.ensureInit();
|
|
94
|
-
|
|
95
|
-
try {
|
|
96
|
-
let content: string;
|
|
97
|
-
if (url.startsWith('http://') || url.startsWith('https://')) {
|
|
98
|
-
const resp = await fetch(url);
|
|
99
|
-
if (!resp.ok) {
|
|
100
|
-
logger.warn(
|
|
101
|
-
`[DOCUMENT-RETRIEVER] Failed to fetch "${title}" from ${url}: ${resp.status}`
|
|
102
|
-
);
|
|
103
|
-
return;
|
|
104
|
-
}
|
|
105
|
-
content = await resp.text();
|
|
106
|
-
} else {
|
|
107
|
-
const filePath = pathResolve(url);
|
|
108
|
-
try {
|
|
109
|
-
content = readFileSync(filePath, 'utf-8');
|
|
110
|
-
} catch (err) {
|
|
111
|
-
logger.warn(`[DOCUMENT-RETRIEVER] Failed to read "${title}" from ${filePath}: ${err}`);
|
|
112
|
-
return;
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
if (!content || content.trim().length === 0) {
|
|
117
|
-
logger.debug(`[DOCUMENT-RETRIEVER] Document "${title}" is empty, skipping`);
|
|
118
|
-
this.processedDocuments.add(title);
|
|
119
|
-
return;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
const chunks = this.chunker!.splitText(content);
|
|
123
|
-
logger.debug(`[DOCUMENT-RETRIEVER] Document "${title}": ${chunks.length} chunks`);
|
|
124
|
-
|
|
125
|
-
if (chunks.length === 0) {
|
|
126
|
-
this.processedDocuments.add(title);
|
|
127
|
-
return;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
const embeddings = await this.embeddingProvider!.embedTexts(chunks);
|
|
131
|
-
|
|
132
|
-
if (usePgvector()) {
|
|
133
|
-
await this.storePgvectorChunks(title, chunks, embeddings);
|
|
134
|
-
} else {
|
|
135
|
-
await this.storeLanceDBChunks(title, chunks, embeddings);
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
this.processedDocuments.add(title);
|
|
139
|
-
logger.info(
|
|
140
|
-
`[DOCUMENT-RETRIEVER] Processed "${title}": ${chunks.length} chunks embedded and stored`
|
|
141
|
-
);
|
|
142
|
-
} catch (err) {
|
|
143
|
-
logger.warn(`[DOCUMENT-RETRIEVER] Error processing "${title}": ${err}`);
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
private async storePgvectorChunks(
|
|
148
|
-
title: string,
|
|
149
|
-
chunks: string[],
|
|
150
|
-
embeddings: number[][]
|
|
151
|
-
): Promise<void> {
|
|
152
|
-
const ag = (globalThis as any).agentlang;
|
|
153
|
-
if (!ag?.rawQuery) return;
|
|
154
|
-
|
|
155
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
156
|
-
const id = crypto.randomUUID();
|
|
157
|
-
const embeddingStr = `[${embeddings[i].join(',')}]`;
|
|
158
|
-
await ag.rawQuery(
|
|
159
|
-
`INSERT INTO document_local_chunks (id, content, document_title, chunk_index, embedding)
|
|
160
|
-
VALUES ($1, $2, $3, $4, $5::vector)
|
|
161
|
-
ON CONFLICT (id) DO NOTHING`,
|
|
162
|
-
[id, chunks[i], title, i, embeddingStr]
|
|
163
|
-
);
|
|
164
|
-
this.localChunks.set(id, {
|
|
165
|
-
id,
|
|
166
|
-
content: chunks[i],
|
|
167
|
-
documentTitle: title,
|
|
168
|
-
chunkIndex: i,
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
private async storeLanceDBChunks(
|
|
174
|
-
title: string,
|
|
175
|
-
chunks: string[],
|
|
176
|
-
embeddings: number[][]
|
|
177
|
-
): Promise<void> {
|
|
178
|
-
if (!this.vectorStore) return;
|
|
179
|
-
|
|
180
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
181
|
-
const id = crypto.randomUUID();
|
|
182
|
-
await this.vectorStore.addEmbedding({
|
|
183
|
-
id,
|
|
184
|
-
embedding: embeddings[i],
|
|
185
|
-
documentId: title,
|
|
186
|
-
});
|
|
187
|
-
this.localChunks.set(id, {
|
|
188
|
-
id,
|
|
189
|
-
content: chunks[i],
|
|
190
|
-
documentTitle: title,
|
|
191
|
-
chunkIndex: i,
|
|
192
|
-
});
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
async query(queryText: string, documentTitles?: string[], limit: number = 10): Promise<string> {
|
|
197
|
-
await this.ensureInit();
|
|
198
|
-
|
|
199
|
-
try {
|
|
200
|
-
const results = usePgvector()
|
|
201
|
-
? await this.queryPgvector(queryText, documentTitles, limit)
|
|
202
|
-
: await this.queryLanceDB(queryText, documentTitles, limit);
|
|
203
|
-
|
|
204
|
-
if (results.length === 0) return '';
|
|
205
|
-
|
|
206
|
-
return results.map(r => r.content).join('\n\n---\n\n');
|
|
207
|
-
} catch (err) {
|
|
208
|
-
logger.debug(`[DOCUMENT-RETRIEVER] Query failed: ${err}`);
|
|
209
|
-
return '';
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
private async queryPgvector(
|
|
214
|
-
queryText: string,
|
|
215
|
-
documentTitles?: string[],
|
|
216
|
-
limit: number = 10
|
|
217
|
-
): Promise<Array<{ id: string; content: string; similarity: number }>> {
|
|
218
|
-
const ag = (globalThis as any).agentlang;
|
|
219
|
-
if (!ag?.rawQuery) return [];
|
|
220
|
-
|
|
221
|
-
const queryEmbedding = await this.embeddingProvider!.embedText(queryText);
|
|
222
|
-
const embeddingStr = `[${queryEmbedding.join(',')}]`;
|
|
223
|
-
|
|
224
|
-
let sql: string;
|
|
225
|
-
let params: any[];
|
|
226
|
-
|
|
227
|
-
if (documentTitles && documentTitles.length > 0) {
|
|
228
|
-
const placeholders = documentTitles.map((_, i) => `$${i + 2}`).join(', ');
|
|
229
|
-
sql = `SELECT id, content, document_title, 1 - (embedding <=> $1::vector) AS similarity
|
|
230
|
-
FROM document_local_chunks
|
|
231
|
-
WHERE document_title IN (${placeholders})
|
|
232
|
-
ORDER BY embedding <=> $1::vector
|
|
233
|
-
LIMIT ${limit}`;
|
|
234
|
-
params = [embeddingStr, ...documentTitles];
|
|
235
|
-
} else {
|
|
236
|
-
sql = `SELECT id, content, document_title, 1 - (embedding <=> $1::vector) AS similarity
|
|
237
|
-
FROM document_local_chunks
|
|
238
|
-
ORDER BY embedding <=> $1::vector
|
|
239
|
-
LIMIT ${limit}`;
|
|
240
|
-
params = [embeddingStr];
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
const rows: any[] = await ag.rawQuery(sql, params);
|
|
244
|
-
return (rows || []).map((r: any) => ({
|
|
245
|
-
id: r.id,
|
|
246
|
-
content: r.content,
|
|
247
|
-
similarity: parseFloat(r.similarity) || 0,
|
|
248
|
-
}));
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
private async queryLanceDB(
|
|
252
|
-
queryText: string,
|
|
253
|
-
documentTitles?: string[],
|
|
254
|
-
limit: number = 10
|
|
255
|
-
): Promise<Array<{ id: string; content: string; similarity: number }>> {
|
|
256
|
-
if (!this.vectorStore) return [];
|
|
257
|
-
|
|
258
|
-
const queryEmbedding = await this.embeddingProvider!.embedText(queryText);
|
|
259
|
-
const searchResults = await this.vectorStore.search(
|
|
260
|
-
queryEmbedding,
|
|
261
|
-
undefined,
|
|
262
|
-
undefined,
|
|
263
|
-
limit
|
|
264
|
-
);
|
|
265
|
-
|
|
266
|
-
const results: Array<{ id: string; content: string; similarity: number }> = [];
|
|
267
|
-
|
|
268
|
-
for (const sr of searchResults) {
|
|
269
|
-
const chunk = this.localChunks.get(sr.id);
|
|
270
|
-
if (!chunk) continue;
|
|
271
|
-
|
|
272
|
-
if (documentTitles && documentTitles.length > 0) {
|
|
273
|
-
if (!documentTitles.includes(chunk.documentTitle)) continue;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
results.push({
|
|
277
|
-
id: sr.id,
|
|
278
|
-
content: chunk.content,
|
|
279
|
-
similarity: 1 - (sr.distance || 0),
|
|
280
|
-
});
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
return results.slice(0, limit);
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
async close(): Promise<void> {
|
|
287
|
-
if (this.vectorStore) {
|
|
288
|
-
await this.vectorStore.close();
|
|
289
|
-
this.vectorStore = null;
|
|
290
|
-
}
|
|
291
|
-
this.localChunks.clear();
|
|
292
|
-
this.processedDocuments.clear();
|
|
293
|
-
this.initialized = false;
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
let retrieverInstance: DocumentRetriever | null = null;
|
|
298
|
-
|
|
299
|
-
export function getDocumentRetriever(): DocumentRetriever {
|
|
300
|
-
if (!retrieverInstance) {
|
|
301
|
-
retrieverInstance = new DocumentRetriever();
|
|
302
|
-
}
|
|
303
|
-
return retrieverInstance;
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
export function resetDocumentRetriever(): void {
|
|
307
|
-
if (retrieverInstance) {
|
|
308
|
-
retrieverInstance.close().catch(() => {});
|
|
309
|
-
retrieverInstance = null;
|
|
310
|
-
}
|
|
311
|
-
}
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
import * as lancedb from '@lancedb/lancedb';
|
|
2
|
-
import { Schema, Field, Float32, Utf8, FixedSizeList } from 'apache-arrow';
|
|
3
|
-
import { logger } from '../../logger.js';
|
|
4
|
-
import { VectorStore, VectorRecord, SearchResult, VectorStoreConfig } from './types.js';
|
|
5
|
-
|
|
6
|
-
export class LanceDBVectorStore implements VectorStore {
|
|
7
|
-
private db: lancedb.Connection | null = null;
|
|
8
|
-
private table: lancedb.Table | null = null;
|
|
9
|
-
private config: VectorStoreConfig;
|
|
10
|
-
|
|
11
|
-
constructor(config: VectorStoreConfig) {
|
|
12
|
-
this.config = config;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
async init(): Promise<void> {
|
|
16
|
-
try {
|
|
17
|
-
const dbPath = this.config.inMemory
|
|
18
|
-
? 'memory://'
|
|
19
|
-
: this.config.dbname || `./data/vector-store/${this.config.moduleName}.lance`;
|
|
20
|
-
|
|
21
|
-
this.db = await lancedb.connect(dbPath);
|
|
22
|
-
|
|
23
|
-
const tableName = 'embeddings';
|
|
24
|
-
const tableNames = await this.db.tableNames();
|
|
25
|
-
|
|
26
|
-
if (tableNames.includes(tableName)) {
|
|
27
|
-
this.table = await this.db.openTable(tableName);
|
|
28
|
-
logger.info(`LanceDB table ${tableName} opened`);
|
|
29
|
-
} else {
|
|
30
|
-
const schema = new Schema([
|
|
31
|
-
new Field('id', new Utf8(), false),
|
|
32
|
-
new Field(
|
|
33
|
-
'embedding',
|
|
34
|
-
new FixedSizeList(this.config.vectorDimension, new Field('item', new Float32())),
|
|
35
|
-
false
|
|
36
|
-
),
|
|
37
|
-
new Field('tenantId', new Utf8(), true),
|
|
38
|
-
new Field('agentId', new Utf8(), true),
|
|
39
|
-
new Field('documentId', new Utf8(), true),
|
|
40
|
-
]);
|
|
41
|
-
|
|
42
|
-
this.table = await this.db.createEmptyTable(tableName, schema);
|
|
43
|
-
logger.info(`LanceDB table ${tableName} created`);
|
|
44
|
-
}
|
|
45
|
-
} catch (error) {
|
|
46
|
-
logger.error('Failed to initialize LanceDB vector store:', error);
|
|
47
|
-
throw error;
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
async addEmbedding(record: VectorRecord): Promise<void> {
|
|
52
|
-
if (!this.table) {
|
|
53
|
-
throw new Error('Vector store not initialized. Call init() first.');
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
try {
|
|
57
|
-
await this.table.add([
|
|
58
|
-
{
|
|
59
|
-
id: record.id,
|
|
60
|
-
embedding: record.embedding,
|
|
61
|
-
tenantId: record.tenantId || null,
|
|
62
|
-
agentId: record.agentId || null,
|
|
63
|
-
documentId: record.documentId || null,
|
|
64
|
-
},
|
|
65
|
-
]);
|
|
66
|
-
} catch (error) {
|
|
67
|
-
logger.error(`Failed to add embedding ${record.id}:`, error);
|
|
68
|
-
throw error;
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
async addEmbeddings(records: VectorRecord[]): Promise<void> {
|
|
73
|
-
if (!this.table) {
|
|
74
|
-
throw new Error('Vector store not initialized. Call init() first.');
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
try {
|
|
78
|
-
const data = records.map(record => ({
|
|
79
|
-
id: record.id,
|
|
80
|
-
embedding: record.embedding,
|
|
81
|
-
tenantId: record.tenantId || null,
|
|
82
|
-
agentId: record.agentId || null,
|
|
83
|
-
documentId: record.documentId || null,
|
|
84
|
-
}));
|
|
85
|
-
|
|
86
|
-
await this.table.add(data);
|
|
87
|
-
} catch (error) {
|
|
88
|
-
logger.error(`Failed to add ${records.length} embeddings:`, error);
|
|
89
|
-
throw error;
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
async search(
|
|
94
|
-
embedding: number[],
|
|
95
|
-
tenantId?: string,
|
|
96
|
-
agentId?: string,
|
|
97
|
-
limit: number = 10
|
|
98
|
-
): Promise<SearchResult[]> {
|
|
99
|
-
if (!this.table) {
|
|
100
|
-
throw new Error('Vector store not initialized. Call init() first.');
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
try {
|
|
104
|
-
let query = this.table.vectorSearch(embedding).limit(limit);
|
|
105
|
-
|
|
106
|
-
// Build filter conditions for agent-level isolation
|
|
107
|
-
const filters: string[] = [];
|
|
108
|
-
|
|
109
|
-
if (tenantId) {
|
|
110
|
-
// Use parameterized filtering to prevent SQL injection
|
|
111
|
-
const escapedTenantId = tenantId.replace(/'/g, "''");
|
|
112
|
-
filters.push(`tenantId = '${escapedTenantId}'`);
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
if (agentId) {
|
|
116
|
-
// Add agent-level filtering for strict agent isolation
|
|
117
|
-
const escapedAgentId = agentId.replace(/'/g, "''");
|
|
118
|
-
filters.push(`agentId = '${escapedAgentId}'`);
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
if (filters.length > 0) {
|
|
122
|
-
query = query.where(filters.join(' AND '));
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
const results = await query.toArray();
|
|
126
|
-
|
|
127
|
-
return results.map((row: any) => ({
|
|
128
|
-
id: row.id,
|
|
129
|
-
distance: row._distance || 0,
|
|
130
|
-
tenantId: row.tenantId,
|
|
131
|
-
agentId: row.agentId,
|
|
132
|
-
documentId: row.documentId,
|
|
133
|
-
}));
|
|
134
|
-
} catch (error) {
|
|
135
|
-
logger.error('Failed to search embeddings:', error);
|
|
136
|
-
throw error;
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
async delete(id: string): Promise<void> {
|
|
141
|
-
if (!this.table) {
|
|
142
|
-
throw new Error('Vector store not initialized. Call init() first.');
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
try {
|
|
146
|
-
await this.table.delete(`id = '${id}'`);
|
|
147
|
-
} catch (error) {
|
|
148
|
-
logger.error(`Failed to delete embedding ${id}:`, error);
|
|
149
|
-
throw error;
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
async exists(id: string): Promise<boolean> {
|
|
154
|
-
if (!this.table) {
|
|
155
|
-
throw new Error('Vector store not initialized. Call init() first.');
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
try {
|
|
159
|
-
const results = await this.table.query().where(`id = '${id}'`).limit(1).toArray();
|
|
160
|
-
return results.length > 0;
|
|
161
|
-
} catch (error) {
|
|
162
|
-
logger.error(`Failed to check existence of ${id}:`, error);
|
|
163
|
-
throw error;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
async close(): Promise<void> {
|
|
168
|
-
try {
|
|
169
|
-
if (this.table) {
|
|
170
|
-
// LanceDB tables don't have explicit close method
|
|
171
|
-
this.table = null;
|
|
172
|
-
}
|
|
173
|
-
if (this.db) {
|
|
174
|
-
// Note: LanceDB connection doesn't have explicit close method
|
|
175
|
-
this.db = null;
|
|
176
|
-
}
|
|
177
|
-
logger.info('LanceDB vector store closed');
|
|
178
|
-
} catch (error) {
|
|
179
|
-
logger.error('Failed to close LanceDB vector store:', error);
|
|
180
|
-
throw error;
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
export function createLanceDBStore(config: VectorStoreConfig): VectorStore {
|
|
186
|
-
return new LanceDBVectorStore(config);
|
|
187
|
-
}
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
export interface VectorRecord {
|
|
2
|
-
id: string;
|
|
3
|
-
embedding: number[];
|
|
4
|
-
tenantId?: string;
|
|
5
|
-
agentId?: string;
|
|
6
|
-
documentId?: string;
|
|
7
|
-
metadata?: Record<string, any>;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export interface SearchResult {
|
|
11
|
-
id: string;
|
|
12
|
-
distance: number;
|
|
13
|
-
tenantId?: string;
|
|
14
|
-
agentId?: string;
|
|
15
|
-
documentId?: string;
|
|
16
|
-
metadata?: Record<string, any>;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export interface VectorStore {
|
|
20
|
-
init(): Promise<void>;
|
|
21
|
-
addEmbedding(record: VectorRecord): Promise<void>;
|
|
22
|
-
addEmbeddings(records: VectorRecord[]): Promise<void>;
|
|
23
|
-
search(
|
|
24
|
-
embedding: number[],
|
|
25
|
-
tenantId?: string,
|
|
26
|
-
agentId?: string,
|
|
27
|
-
limit?: number
|
|
28
|
-
): Promise<SearchResult[]>;
|
|
29
|
-
delete(id: string): Promise<void>;
|
|
30
|
-
exists(id: string): Promise<boolean>;
|
|
31
|
-
close(): Promise<void>;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
export interface VectorStoreConfig {
|
|
35
|
-
dbname?: string;
|
|
36
|
-
moduleName: string;
|
|
37
|
-
vectorDimension: number;
|
|
38
|
-
inMemory?: boolean;
|
|
39
|
-
}
|