@baselineos/protocol-core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +14 -0
- package/.turbo/turbo-test.log +15 -0
- package/CHANGELOG.md +49 -0
- package/LICENSE +17 -0
- package/README.md +18 -0
- package/dist/index.d.ts +1322 -0
- package/dist/index.js +2653 -0
- package/package.json +31 -0
- package/src/__tests__/functional.test.ts +269 -0
- package/src/__tests__/smoke.test.ts +23 -0
- package/src/chromadb.d.ts +9 -0
- package/src/index.ts +117 -0
- package/src/knowledge/knowledge-graph.ts +1441 -0
- package/src/knowledge/vector-store.ts +722 -0
- package/src/lang/lang.ts +278 -0
- package/src/lexicon/lexicon.ts +414 -0
- package/src/parser/grammar.ts +240 -0
- package/src/parser/parser.ts +420 -0
- package/src/types/index.ts +799 -0
- package/tsconfig.json +9 -0
|
@@ -0,0 +1,722 @@
|
|
|
1
|
+
// ChromaDB Vector Store wrapper
|
|
2
|
+
// chromadb is an optional peer dependency - do NOT import it directly.
|
|
3
|
+
// The client and collection are typed as `unknown` and resolved at runtime.
|
|
4
|
+
|
|
5
|
+
// ── Interfaces ──────────────────────────────────────────────────────────────────
|
|
6
|
+
|
|
7
|
+
export interface VectorStoreConfig {
|
|
8
|
+
host?: string;
|
|
9
|
+
port?: number;
|
|
10
|
+
collectionName?: string;
|
|
11
|
+
embeddingFunction?: string;
|
|
12
|
+
metadata?: Record<string, string>;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface VectorStoreMetrics {
|
|
16
|
+
totalDocuments: number;
|
|
17
|
+
totalQueries: number;
|
|
18
|
+
averageQueryTime: number;
|
|
19
|
+
lastOptimization: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface DocumentInput {
|
|
23
|
+
ids: string[];
|
|
24
|
+
embeddings?: number[][];
|
|
25
|
+
metadatas?: Record<string, unknown>[];
|
|
26
|
+
documents: string[];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface VectorQueryResult {
|
|
30
|
+
success: boolean;
|
|
31
|
+
query?: string;
|
|
32
|
+
results?: unknown;
|
|
33
|
+
error?: string;
|
|
34
|
+
metadata?: Record<string, unknown>;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface VectorQueryOptions {
|
|
38
|
+
nResults?: number;
|
|
39
|
+
where?: Record<string, unknown>;
|
|
40
|
+
whereDocument?: Record<string, unknown>;
|
|
41
|
+
include?: string[];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export interface FilterSearchOptions {
|
|
45
|
+
where?: Record<string, unknown>;
|
|
46
|
+
whereDocument?: Record<string, unknown>;
|
|
47
|
+
nResults?: number;
|
|
48
|
+
include?: string[];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface BatchOperation {
|
|
52
|
+
type: 'add' | 'update' | 'delete';
|
|
53
|
+
data: Record<string, unknown>;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export interface CollectionStats {
|
|
57
|
+
name: string;
|
|
58
|
+
count: number;
|
|
59
|
+
metadata: Record<string, unknown>;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ── ChromaVectorStore ───────────────────────────────────────────────────────────
|
|
63
|
+
|
|
64
|
+
export class ChromaVectorStore {
|
|
65
|
+
private config: Required<VectorStoreConfig>;
|
|
66
|
+
private client: unknown;
|
|
67
|
+
private collection: unknown;
|
|
68
|
+
private isInitialized: boolean;
|
|
69
|
+
private metrics: VectorStoreMetrics;
|
|
70
|
+
private queryCache: Map<string, { result: unknown; timestamp: number }>;
|
|
71
|
+
private cacheMaxAge: number;
|
|
72
|
+
|
|
73
|
+
constructor(config: VectorStoreConfig = {}) {
|
|
74
|
+
this.config = {
|
|
75
|
+
host: config.host ?? 'localhost',
|
|
76
|
+
port: config.port ?? 8000,
|
|
77
|
+
collectionName: config.collectionName ?? 'baseline-vectors',
|
|
78
|
+
embeddingFunction: config.embeddingFunction ?? 'default',
|
|
79
|
+
metadata: config.metadata ?? {},
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
this.client = null;
|
|
83
|
+
this.collection = null;
|
|
84
|
+
this.isInitialized = false;
|
|
85
|
+
|
|
86
|
+
this.metrics = {
|
|
87
|
+
totalDocuments: 0,
|
|
88
|
+
totalQueries: 0,
|
|
89
|
+
averageQueryTime: 0,
|
|
90
|
+
lastOptimization: Date.now(),
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
this.queryCache = new Map();
|
|
94
|
+
this.cacheMaxAge = 5 * 60 * 1000; // 5 minutes
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ── Initialization ──────────────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
async initializeClient(): Promise<void> {
|
|
100
|
+
if (this.isInitialized) {
|
|
101
|
+
console.log('[VectorStore] Client already initialized.');
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
// Dynamic import of chromadb (optional peer dependency)
|
|
107
|
+
let chromadb: Record<string, unknown>;
|
|
108
|
+
try {
|
|
109
|
+
chromadb = (await import('chromadb')) as Record<string, unknown>;
|
|
110
|
+
} catch {
|
|
111
|
+
console.log('[VectorStore] chromadb package not found. Install it with: npm install chromadb');
|
|
112
|
+
console.log('[VectorStore] Running in fallback mode without vector storage.');
|
|
113
|
+
this.isInitialized = false;
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const ChromaClient = chromadb.ChromaClient as new (config: Record<string, unknown>) => unknown;
|
|
118
|
+
|
|
119
|
+
this.client = new ChromaClient({
|
|
120
|
+
path: `http://${this.config.host}:${this.config.port}`,
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
await this.initializeCollection();
|
|
124
|
+
this.isInitialized = true;
|
|
125
|
+
|
|
126
|
+
console.log(`[VectorStore] Client initialized. Connected to ${this.config.host}:${this.config.port}.`);
|
|
127
|
+
} catch (error) {
|
|
128
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
129
|
+
console.log(`[VectorStore] Failed to initialize client: ${message}`);
|
|
130
|
+
console.log('[VectorStore] Running in fallback mode without vector storage.');
|
|
131
|
+
this.isInitialized = false;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
private async initializeCollection(): Promise<void> {
|
|
136
|
+
if (!this.client) {
|
|
137
|
+
throw new Error('Client not initialized');
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
try {
|
|
141
|
+
const client = this.client as Record<string, (...args: unknown[]) => Promise<unknown>>;
|
|
142
|
+
const embeddingFunction = this.createEmbeddingFunction();
|
|
143
|
+
|
|
144
|
+
this.collection = await client.getOrCreateCollection({
|
|
145
|
+
name: this.config.collectionName,
|
|
146
|
+
metadata: this.config.metadata,
|
|
147
|
+
embeddingFunction,
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
console.log(`[VectorStore] Collection "${this.config.collectionName}" ready.`);
|
|
151
|
+
} catch (error) {
|
|
152
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
153
|
+
console.log(`[VectorStore] Failed to initialize collection: ${message}`);
|
|
154
|
+
throw error;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// ── Document Operations ─────────────────────────────────────────────────────
|
|
159
|
+
|
|
160
|
+
async addDocuments(input: DocumentInput): Promise<VectorQueryResult> {
|
|
161
|
+
if (!this.isInitialized || !this.collection) {
|
|
162
|
+
return {
|
|
163
|
+
success: false,
|
|
164
|
+
error: 'Vector store not initialized. Call initializeClient() first.',
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
const collection = this.collection as Record<string, (...args: unknown[]) => Promise<unknown>>;
|
|
170
|
+
|
|
171
|
+
const addPayload: Record<string, unknown> = {
|
|
172
|
+
ids: input.ids,
|
|
173
|
+
documents: input.documents,
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
if (input.embeddings) {
|
|
177
|
+
addPayload.embeddings = input.embeddings;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (input.metadatas) {
|
|
181
|
+
addPayload.metadatas = input.metadatas;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
await collection.add(addPayload);
|
|
185
|
+
|
|
186
|
+
this.metrics.totalDocuments += input.ids.length;
|
|
187
|
+
|
|
188
|
+
console.log(`[VectorStore] Added ${input.ids.length} documents.`);
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
success: true,
|
|
192
|
+
metadata: {
|
|
193
|
+
addedCount: input.ids.length,
|
|
194
|
+
totalDocuments: this.metrics.totalDocuments,
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
} catch (error) {
|
|
198
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
199
|
+
console.log(`[VectorStore] Failed to add documents: ${message}`);
|
|
200
|
+
return {
|
|
201
|
+
success: false,
|
|
202
|
+
error: message,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
async query(queryText: string, options: VectorQueryOptions = {}): Promise<VectorQueryResult> {
|
|
208
|
+
if (!this.isInitialized || !this.collection) {
|
|
209
|
+
return {
|
|
210
|
+
success: false,
|
|
211
|
+
query: queryText,
|
|
212
|
+
error: 'Vector store not initialized. Call initializeClient() first.',
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const startTime = Date.now();
|
|
217
|
+
|
|
218
|
+
// Check cache
|
|
219
|
+
const cacheKey = `${queryText}:${JSON.stringify(options)}`;
|
|
220
|
+
const cached = this.queryCache.get(cacheKey);
|
|
221
|
+
if (cached && Date.now() - cached.timestamp < this.cacheMaxAge) {
|
|
222
|
+
this.metrics.totalQueries++;
|
|
223
|
+
return {
|
|
224
|
+
success: true,
|
|
225
|
+
query: queryText,
|
|
226
|
+
results: cached.result,
|
|
227
|
+
metadata: { fromCache: true, queryTime: 0 },
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
try {
|
|
232
|
+
const collection = this.collection as Record<string, (...args: unknown[]) => Promise<unknown>>;
|
|
233
|
+
|
|
234
|
+
const queryPayload: Record<string, unknown> = {
|
|
235
|
+
queryTexts: [queryText],
|
|
236
|
+
nResults: options.nResults ?? 10,
|
|
237
|
+
};
|
|
238
|
+
|
|
239
|
+
if (options.where) {
|
|
240
|
+
queryPayload.where = options.where;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (options.whereDocument) {
|
|
244
|
+
queryPayload.whereDocument = options.whereDocument;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
if (options.include) {
|
|
248
|
+
queryPayload.include = options.include;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const results = await collection.query(queryPayload);
|
|
252
|
+
|
|
253
|
+
const queryTime = Date.now() - startTime;
|
|
254
|
+
|
|
255
|
+
// Update metrics
|
|
256
|
+
this.metrics.totalQueries++;
|
|
257
|
+
this.metrics.averageQueryTime =
|
|
258
|
+
(this.metrics.averageQueryTime * (this.metrics.totalQueries - 1) + queryTime) /
|
|
259
|
+
this.metrics.totalQueries;
|
|
260
|
+
|
|
261
|
+
// Update cache
|
|
262
|
+
this.queryCache.set(cacheKey, { result: results, timestamp: Date.now() });
|
|
263
|
+
|
|
264
|
+
return {
|
|
265
|
+
success: true,
|
|
266
|
+
query: queryText,
|
|
267
|
+
results,
|
|
268
|
+
metadata: {
|
|
269
|
+
queryTime,
|
|
270
|
+
nResults: options.nResults ?? 10,
|
|
271
|
+
fromCache: false,
|
|
272
|
+
},
|
|
273
|
+
};
|
|
274
|
+
} catch (error) {
|
|
275
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
276
|
+
console.log(`[VectorStore] Query failed: ${message}`);
|
|
277
|
+
return {
|
|
278
|
+
success: false,
|
|
279
|
+
query: queryText,
|
|
280
|
+
error: message,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
async semanticSearch(queryText: string, nResults: number = 10): Promise<VectorQueryResult> {
|
|
286
|
+
return this.query(queryText, {
|
|
287
|
+
nResults,
|
|
288
|
+
include: ['documents', 'metadatas', 'distances'],
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
async filterSearch(queryText: string, options: FilterSearchOptions = {}): Promise<VectorQueryResult> {
|
|
293
|
+
return this.query(queryText, {
|
|
294
|
+
nResults: options.nResults ?? 10,
|
|
295
|
+
where: options.where,
|
|
296
|
+
whereDocument: options.whereDocument,
|
|
297
|
+
include: options.include ?? ['documents', 'metadatas', 'distances'],
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
async fullTextSearch(searchText: string, nResults: number = 10): Promise<VectorQueryResult> {
|
|
302
|
+
return this.query(searchText, {
|
|
303
|
+
nResults,
|
|
304
|
+
whereDocument: { $contains: searchText },
|
|
305
|
+
include: ['documents', 'metadatas', 'distances'],
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
async regexSearch(pattern: string, nResults: number = 10): Promise<VectorQueryResult> {
|
|
310
|
+
if (!this.isInitialized || !this.collection) {
|
|
311
|
+
return {
|
|
312
|
+
success: false,
|
|
313
|
+
query: pattern,
|
|
314
|
+
error: 'Vector store not initialized. Call initializeClient() first.',
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
try {
|
|
319
|
+
const collection = this.collection as Record<string, (...args: unknown[]) => Promise<unknown>>;
|
|
320
|
+
|
|
321
|
+
// ChromaDB doesn't natively support regex, so we do a broad query then filter
|
|
322
|
+
const allResults = await collection.get({
|
|
323
|
+
include: ['documents', 'metadatas'],
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
const resultsObj = allResults as Record<string, unknown[] | null>;
|
|
327
|
+
const ids = (resultsObj.ids as string[]) ?? [];
|
|
328
|
+
const documents = (resultsObj.documents as (string | null)[]) ?? [];
|
|
329
|
+
const metadatas = (resultsObj.metadatas as (Record<string, unknown> | null)[]) ?? [];
|
|
330
|
+
|
|
331
|
+
const regex = new RegExp(pattern, 'i');
|
|
332
|
+
const matchedIds: string[] = [];
|
|
333
|
+
const matchedDocuments: string[] = [];
|
|
334
|
+
const matchedMetadatas: Record<string, unknown>[] = [];
|
|
335
|
+
|
|
336
|
+
for (let i = 0; i < ids.length; i++) {
|
|
337
|
+
const doc = documents[i];
|
|
338
|
+
if (doc && regex.test(doc)) {
|
|
339
|
+
matchedIds.push(ids[i]);
|
|
340
|
+
matchedDocuments.push(doc);
|
|
341
|
+
matchedMetadatas.push(metadatas[i] ?? {});
|
|
342
|
+
|
|
343
|
+
if (matchedIds.length >= nResults) break;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return {
|
|
348
|
+
success: true,
|
|
349
|
+
query: pattern,
|
|
350
|
+
results: {
|
|
351
|
+
ids: [matchedIds],
|
|
352
|
+
documents: [matchedDocuments],
|
|
353
|
+
metadatas: [matchedMetadatas],
|
|
354
|
+
},
|
|
355
|
+
metadata: {
|
|
356
|
+
matchCount: matchedIds.length,
|
|
357
|
+
searchType: 'regex',
|
|
358
|
+
},
|
|
359
|
+
};
|
|
360
|
+
} catch (error) {
|
|
361
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
362
|
+
console.log(`[VectorStore] Regex search failed: ${message}`);
|
|
363
|
+
return {
|
|
364
|
+
success: false,
|
|
365
|
+
query: pattern,
|
|
366
|
+
error: message,
|
|
367
|
+
};
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
async updateDocuments(input: DocumentInput): Promise<VectorQueryResult> {
|
|
372
|
+
if (!this.isInitialized || !this.collection) {
|
|
373
|
+
return {
|
|
374
|
+
success: false,
|
|
375
|
+
error: 'Vector store not initialized. Call initializeClient() first.',
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
try {
|
|
380
|
+
const collection = this.collection as Record<string, (...args: unknown[]) => Promise<unknown>>;
|
|
381
|
+
|
|
382
|
+
const updatePayload: Record<string, unknown> = {
|
|
383
|
+
ids: input.ids,
|
|
384
|
+
documents: input.documents,
|
|
385
|
+
};
|
|
386
|
+
|
|
387
|
+
if (input.embeddings) {
|
|
388
|
+
updatePayload.embeddings = input.embeddings;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
if (input.metadatas) {
|
|
392
|
+
updatePayload.metadatas = input.metadatas;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
await collection.update(updatePayload);
|
|
396
|
+
|
|
397
|
+
// Invalidate cache for affected documents
|
|
398
|
+
this.queryCache.clear();
|
|
399
|
+
|
|
400
|
+
console.log(`[VectorStore] Updated ${input.ids.length} documents.`);
|
|
401
|
+
|
|
402
|
+
return {
|
|
403
|
+
success: true,
|
|
404
|
+
metadata: {
|
|
405
|
+
updatedCount: input.ids.length,
|
|
406
|
+
},
|
|
407
|
+
};
|
|
408
|
+
} catch (error) {
|
|
409
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
410
|
+
console.log(`[VectorStore] Failed to update documents: ${message}`);
|
|
411
|
+
return {
|
|
412
|
+
success: false,
|
|
413
|
+
error: message,
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
async deleteDocuments(ids: string[]): Promise<VectorQueryResult> {
|
|
419
|
+
if (!this.isInitialized || !this.collection) {
|
|
420
|
+
return {
|
|
421
|
+
success: false,
|
|
422
|
+
error: 'Vector store not initialized. Call initializeClient() first.',
|
|
423
|
+
};
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
try {
|
|
427
|
+
const collection = this.collection as Record<string, (...args: unknown[]) => Promise<unknown>>;
|
|
428
|
+
|
|
429
|
+
await collection.delete({ ids });
|
|
430
|
+
|
|
431
|
+
this.metrics.totalDocuments = Math.max(0, this.metrics.totalDocuments - ids.length);
|
|
432
|
+
|
|
433
|
+
// Invalidate cache
|
|
434
|
+
this.queryCache.clear();
|
|
435
|
+
|
|
436
|
+
console.log(`[VectorStore] Deleted ${ids.length} documents.`);
|
|
437
|
+
|
|
438
|
+
return {
|
|
439
|
+
success: true,
|
|
440
|
+
metadata: {
|
|
441
|
+
deletedCount: ids.length,
|
|
442
|
+
totalDocuments: this.metrics.totalDocuments,
|
|
443
|
+
},
|
|
444
|
+
};
|
|
445
|
+
} catch (error) {
|
|
446
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
447
|
+
console.log(`[VectorStore] Failed to delete documents: ${message}`);
|
|
448
|
+
return {
|
|
449
|
+
success: false,
|
|
450
|
+
error: message,
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// ── Collection Management ───────────────────────────────────────────────────
|
|
456
|
+
|
|
457
|
+
async getCollectionStats(): Promise<CollectionStats | null> {
|
|
458
|
+
if (!this.isInitialized || !this.collection) {
|
|
459
|
+
console.log('[VectorStore] Not initialized, cannot get collection stats.');
|
|
460
|
+
return null;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
try {
|
|
464
|
+
const collection = this.collection as Record<string, (...args: unknown[]) => Promise<unknown>>;
|
|
465
|
+
|
|
466
|
+
const count = (await collection.count()) as number;
|
|
467
|
+
const peek = (await collection.peek({ limit: 1 })) as Record<string, unknown>;
|
|
468
|
+
|
|
469
|
+
this.metrics.totalDocuments = count;
|
|
470
|
+
|
|
471
|
+
return {
|
|
472
|
+
name: this.config.collectionName,
|
|
473
|
+
count,
|
|
474
|
+
metadata: {
|
|
475
|
+
...this.config.metadata,
|
|
476
|
+
peekSample: peek,
|
|
477
|
+
},
|
|
478
|
+
};
|
|
479
|
+
} catch (error) {
|
|
480
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
481
|
+
console.log(`[VectorStore] Failed to get collection stats: ${message}`);
|
|
482
|
+
return null;
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
async optimizeCollection(): Promise<VectorQueryResult> {
|
|
487
|
+
if (!this.isInitialized || !this.collection) {
|
|
488
|
+
return {
|
|
489
|
+
success: false,
|
|
490
|
+
error: 'Vector store not initialized. Call initializeClient() first.',
|
|
491
|
+
};
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
console.log('[VectorStore] Starting collection optimization...');
|
|
495
|
+
|
|
496
|
+
try {
|
|
497
|
+
// Clear the query cache
|
|
498
|
+
const cacheSize = this.queryCache.size;
|
|
499
|
+
this.queryCache.clear();
|
|
500
|
+
|
|
501
|
+
// Update optimization timestamp
|
|
502
|
+
this.metrics.lastOptimization = Date.now();
|
|
503
|
+
|
|
504
|
+
// Get collection stats for reference
|
|
505
|
+
const stats = await this.getCollectionStats();
|
|
506
|
+
|
|
507
|
+
console.log(`[VectorStore] Optimization complete. Cache cleared (${cacheSize} entries).`);
|
|
508
|
+
|
|
509
|
+
return {
|
|
510
|
+
success: true,
|
|
511
|
+
metadata: {
|
|
512
|
+
cacheEntriesCleared: cacheSize,
|
|
513
|
+
collectionStats: stats,
|
|
514
|
+
optimizationTime: Date.now(),
|
|
515
|
+
},
|
|
516
|
+
};
|
|
517
|
+
} catch (error) {
|
|
518
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
519
|
+
console.log(`[VectorStore] Optimization failed: ${message}`);
|
|
520
|
+
return {
|
|
521
|
+
success: false,
|
|
522
|
+
error: message,
|
|
523
|
+
};
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// ── Embedding Function ──────────────────────────────────────────────────────
|
|
528
|
+
|
|
529
|
+
private createEmbeddingFunction(): unknown {
|
|
530
|
+
const funcType = this.config.embeddingFunction;
|
|
531
|
+
|
|
532
|
+
switch (funcType) {
|
|
533
|
+
case 'openai': {
|
|
534
|
+
console.log('[VectorStore] Using OpenAI embedding function.');
|
|
535
|
+
return {
|
|
536
|
+
generate: async (texts: string[]): Promise<number[][]> => {
|
|
537
|
+
// Placeholder: in production, call the OpenAI embedding API
|
|
538
|
+
return texts.map((_text: string) => {
|
|
539
|
+
const embedding: number[] = [];
|
|
540
|
+
for (let i = 0; i < 1536; i++) {
|
|
541
|
+
embedding.push(Math.random() * 2 - 1);
|
|
542
|
+
}
|
|
543
|
+
return embedding;
|
|
544
|
+
});
|
|
545
|
+
},
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
case 'cohere': {
|
|
550
|
+
console.log('[VectorStore] Using Cohere embedding function.');
|
|
551
|
+
return {
|
|
552
|
+
generate: async (texts: string[]): Promise<number[][]> => {
|
|
553
|
+
// Placeholder: in production, call the Cohere embedding API
|
|
554
|
+
return texts.map((_text: string) => {
|
|
555
|
+
const embedding: number[] = [];
|
|
556
|
+
for (let i = 0; i < 768; i++) {
|
|
557
|
+
embedding.push(Math.random() * 2 - 1);
|
|
558
|
+
}
|
|
559
|
+
return embedding;
|
|
560
|
+
});
|
|
561
|
+
},
|
|
562
|
+
};
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
case 'huggingface': {
|
|
566
|
+
console.log('[VectorStore] Using HuggingFace embedding function.');
|
|
567
|
+
return {
|
|
568
|
+
generate: async (texts: string[]): Promise<number[][]> => {
|
|
569
|
+
// Placeholder: in production, call the HuggingFace API
|
|
570
|
+
return texts.map((_text: string) => {
|
|
571
|
+
const embedding: number[] = [];
|
|
572
|
+
for (let i = 0; i < 384; i++) {
|
|
573
|
+
embedding.push(Math.random() * 2 - 1);
|
|
574
|
+
}
|
|
575
|
+
return embedding;
|
|
576
|
+
});
|
|
577
|
+
},
|
|
578
|
+
};
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
case 'default':
|
|
582
|
+
default: {
|
|
583
|
+
console.log('[VectorStore] Using default embedding function.');
|
|
584
|
+
return {
|
|
585
|
+
generate: async (texts: string[]): Promise<number[][]> => {
|
|
586
|
+
return texts.map((text: string) => {
|
|
587
|
+
// Simple hash-based embedding for fallback
|
|
588
|
+
const embedding: number[] = [];
|
|
589
|
+
for (let i = 0; i < 384; i++) {
|
|
590
|
+
let hash = 0;
|
|
591
|
+
for (let j = 0; j < text.length; j++) {
|
|
592
|
+
hash = ((hash << 5) - hash + text.charCodeAt(j) + i) | 0;
|
|
593
|
+
}
|
|
594
|
+
embedding.push((hash % 1000) / 1000);
|
|
595
|
+
}
|
|
596
|
+
return embedding;
|
|
597
|
+
});
|
|
598
|
+
},
|
|
599
|
+
};
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
// ── Batch Operations ────────────────────────────────────────────────────────
|
|
605
|
+
|
|
606
|
+
async batchOperation(operations: BatchOperation[]): Promise<VectorQueryResult> {
|
|
607
|
+
if (!this.isInitialized || !this.collection) {
|
|
608
|
+
return {
|
|
609
|
+
success: false,
|
|
610
|
+
error: 'Vector store not initialized. Call initializeClient() first.',
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
const results: Array<{ type: string; success: boolean; error?: string }> = [];
|
|
615
|
+
let successCount = 0;
|
|
616
|
+
let failureCount = 0;
|
|
617
|
+
|
|
618
|
+
for (const operation of operations) {
|
|
619
|
+
try {
|
|
620
|
+
switch (operation.type) {
|
|
621
|
+
case 'add': {
|
|
622
|
+
const addResult = await this.addDocuments(operation.data as unknown as DocumentInput);
|
|
623
|
+
results.push({ type: 'add', success: addResult.success, error: addResult.error });
|
|
624
|
+
if (addResult.success) successCount++;
|
|
625
|
+
else failureCount++;
|
|
626
|
+
break;
|
|
627
|
+
}
|
|
628
|
+
case 'update': {
|
|
629
|
+
const updateResult = await this.updateDocuments(operation.data as unknown as DocumentInput);
|
|
630
|
+
results.push({ type: 'update', success: updateResult.success, error: updateResult.error });
|
|
631
|
+
if (updateResult.success) successCount++;
|
|
632
|
+
else failureCount++;
|
|
633
|
+
break;
|
|
634
|
+
}
|
|
635
|
+
case 'delete': {
|
|
636
|
+
const ids = (operation.data as Record<string, unknown>).ids as string[];
|
|
637
|
+
const deleteResult = await this.deleteDocuments(ids);
|
|
638
|
+
results.push({ type: 'delete', success: deleteResult.success, error: deleteResult.error });
|
|
639
|
+
if (deleteResult.success) successCount++;
|
|
640
|
+
else failureCount++;
|
|
641
|
+
break;
|
|
642
|
+
}
|
|
643
|
+
default:
|
|
644
|
+
results.push({ type: operation.type, success: false, error: `Unknown operation type: ${operation.type}` });
|
|
645
|
+
failureCount++;
|
|
646
|
+
}
|
|
647
|
+
} catch (error) {
|
|
648
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
649
|
+
results.push({ type: operation.type, success: false, error: message });
|
|
650
|
+
failureCount++;
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
console.log(`[VectorStore] Batch operation complete: ${successCount} succeeded, ${failureCount} failed.`);
|
|
655
|
+
|
|
656
|
+
return {
|
|
657
|
+
success: failureCount === 0,
|
|
658
|
+
metadata: {
|
|
659
|
+
totalOperations: operations.length,
|
|
660
|
+
successCount,
|
|
661
|
+
failureCount,
|
|
662
|
+
results,
|
|
663
|
+
},
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
// ── Client Status ───────────────────────────────────────────────────────────
|
|
668
|
+
|
|
669
|
+
getClientStatus(): Record<string, unknown> {
|
|
670
|
+
return {
|
|
671
|
+
isInitialized: this.isInitialized,
|
|
672
|
+
config: {
|
|
673
|
+
host: this.config.host,
|
|
674
|
+
port: this.config.port,
|
|
675
|
+
collectionName: this.config.collectionName,
|
|
676
|
+
embeddingFunction: this.config.embeddingFunction,
|
|
677
|
+
},
|
|
678
|
+
metrics: { ...this.metrics },
|
|
679
|
+
cache: {
|
|
680
|
+
entries: this.queryCache.size,
|
|
681
|
+
maxAge: this.cacheMaxAge,
|
|
682
|
+
},
|
|
683
|
+
hasClient: this.client !== null,
|
|
684
|
+
hasCollection: this.collection !== null,
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
// ── Cleanup ─────────────────────────────────────────────────────────────────
|
|
689
|
+
|
|
690
|
+
async close(): Promise<void> {
|
|
691
|
+
console.log('[VectorStore] Closing vector store connection...');
|
|
692
|
+
|
|
693
|
+
this.queryCache.clear();
|
|
694
|
+
|
|
695
|
+
this.client = null;
|
|
696
|
+
this.collection = null;
|
|
697
|
+
this.isInitialized = false;
|
|
698
|
+
|
|
699
|
+
console.log('[VectorStore] Vector store connection closed.');
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
// ── Utility Accessors ──────────────────────────────────────────────────────
|
|
703
|
+
|
|
704
|
+
getMetrics(): VectorStoreMetrics {
|
|
705
|
+
return { ...this.metrics };
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
isReady(): boolean {
|
|
709
|
+
return this.isInitialized;
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
getCollectionName(): string {
|
|
713
|
+
return this.config.collectionName;
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
clearCache(): void {
|
|
717
|
+
this.queryCache.clear();
|
|
718
|
+
console.log('[VectorStore] Query cache cleared.');
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
export default ChromaVectorStore;
|