@rws-framework/ai-tools 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -85,11 +85,11 @@ export class LangChainRAGService {
|
|
|
85
85
|
* Index knowledge content for RAG with optimized per-knowledge vector storage
|
|
86
86
|
*/
|
|
87
87
|
async indexKnowledge(
|
|
88
|
-
|
|
88
|
+
fileId: string | number,
|
|
89
89
|
content: string,
|
|
90
90
|
metadata: Record<string, any> = {}
|
|
91
91
|
): Promise<IRAGResponse<{ chunkIds: string[] }>> {
|
|
92
|
-
this.log('log', `[INDEXING] Starting indexKnowledge for
|
|
92
|
+
this.log('log', `[INDEXING] Starting indexKnowledge for fileId: ${fileId}`);
|
|
93
93
|
this.log('debug', `[INDEXING] Content length: ${content.length} characters`);
|
|
94
94
|
|
|
95
95
|
try {
|
|
@@ -97,7 +97,7 @@ export class LangChainRAGService {
|
|
|
97
97
|
|
|
98
98
|
// Chunk the content using the embedding service
|
|
99
99
|
const chunks = await this.embeddingService.chunkText(content);
|
|
100
|
-
this.log('debug', `[INDEXING] Split content into ${chunks.length} chunks for
|
|
100
|
+
this.log('debug', `[INDEXING] Split content into ${chunks.length} chunks for file ${fileId}`);
|
|
101
101
|
|
|
102
102
|
// Generate embeddings for all chunks at once (batch processing for speed)
|
|
103
103
|
const embeddings = await this.embeddingService.embedTexts(chunks);
|
|
@@ -109,17 +109,17 @@ export class LangChainRAGService {
|
|
|
109
109
|
embedding: embeddings[index],
|
|
110
110
|
metadata: {
|
|
111
111
|
...metadata,
|
|
112
|
-
|
|
112
|
+
fileId,
|
|
113
113
|
chunkIndex: index,
|
|
114
|
-
id: `knowledge_${
|
|
114
|
+
id: `knowledge_${fileId}_chunk_${index}`
|
|
115
115
|
}
|
|
116
116
|
}));
|
|
117
117
|
|
|
118
118
|
// Save to per-knowledge vector file
|
|
119
|
-
await this.saveKnowledgeVector(
|
|
119
|
+
await this.saveKnowledgeVector(fileId, chunksWithEmbeddings);
|
|
120
120
|
|
|
121
121
|
const chunkIds = chunksWithEmbeddings.map(chunk => chunk.metadata.id);
|
|
122
|
-
this.log('log', `[INDEXING] Successfully indexed
|
|
122
|
+
this.log('log', `[INDEXING] Successfully indexed file ${fileId} with ${chunkIds.length} chunks using optimized approach`);
|
|
123
123
|
|
|
124
124
|
return {
|
|
125
125
|
success: true,
|
|
@@ -127,7 +127,7 @@ export class LangChainRAGService {
|
|
|
127
127
|
};
|
|
128
128
|
|
|
129
129
|
} catch (error: any) {
|
|
130
|
-
this.log('error', `[INDEXING] Failed to index
|
|
130
|
+
this.log('error', `[INDEXING] Failed to index file ${fileId}:`, error);
|
|
131
131
|
return {
|
|
132
132
|
success: false,
|
|
133
133
|
data: null,
|
|
@@ -141,32 +141,45 @@ export class LangChainRAGService {
|
|
|
141
141
|
*/
|
|
142
142
|
async searchKnowledge(request: IRAGSearchRequest): Promise<IRAGResponse<{ results: ISearchResult[] }>> {
|
|
143
143
|
this.log('log', `[SEARCH] Starting knowledge search for query: "${request.query}"`);
|
|
144
|
-
this.log('debug', `[SEARCH] Search parameters: maxResults=${request.maxResults || 5}, threshold=${request.threshold || 0.3}`);
|
|
144
|
+
this.log('debug', `[SEARCH] Search parameters: maxResults=${request.maxResults || 5}, threshold=${request.threshold || 0.3}, temporaryDocumentSearch=${request.temporaryDocumentSearch}`);
|
|
145
145
|
|
|
146
146
|
try {
|
|
147
147
|
await this.ensureInitialized();
|
|
148
148
|
|
|
149
|
-
const
|
|
150
|
-
console.log('
|
|
149
|
+
const fileIds = request.filter?.fileIds || [];
|
|
150
|
+
console.log('fileIds', fileIds, 'temporaryDocumentSearch:', request.temporaryDocumentSearch);
|
|
151
151
|
|
|
152
|
-
if (
|
|
153
|
-
this.log('warn', '[SEARCH] No
|
|
152
|
+
if (fileIds.length === 0) {
|
|
153
|
+
this.log('warn', '[SEARCH] No file IDs provided for search, returning empty results');
|
|
154
154
|
return {
|
|
155
155
|
success: true,
|
|
156
156
|
data: { results: [] }
|
|
157
157
|
};
|
|
158
158
|
}
|
|
159
159
|
|
|
160
|
-
// Load all knowledge vectors in parallel
|
|
161
|
-
const knowledgeVectorPromises =
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
160
|
+
// Load all knowledge vectors in parallel (including temporary documents)
|
|
161
|
+
const knowledgeVectorPromises = fileIds.map(async (fileId) => {
|
|
162
|
+
try {
|
|
163
|
+
const vectorData = await this.loadKnowledgeVectorWithEmbeddings(fileId);
|
|
164
|
+
return {
|
|
165
|
+
fileId,
|
|
166
|
+
chunks: vectorData.chunks
|
|
167
|
+
};
|
|
168
|
+
} catch (loadError) {
|
|
169
|
+
this.log('warn', `[SEARCH] Failed to load file ${fileId}:`, loadError);
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
167
172
|
});
|
|
168
173
|
|
|
169
|
-
const knowledgeVectors = await Promise.all(knowledgeVectorPromises);
|
|
174
|
+
const knowledgeVectors = (await Promise.all(knowledgeVectorPromises)).filter(v => v !== null);
|
|
175
|
+
|
|
176
|
+
if (knowledgeVectors.length === 0) {
|
|
177
|
+
this.log('warn', '[SEARCH] No knowledge vectors could be loaded for search');
|
|
178
|
+
return {
|
|
179
|
+
success: true,
|
|
180
|
+
data: { results: [] }
|
|
181
|
+
};
|
|
182
|
+
}
|
|
170
183
|
|
|
171
184
|
// Use optimized vector search service
|
|
172
185
|
const searchResponse = await this.vectorSearchService.searchSimilar({
|
|
@@ -178,10 +191,10 @@ export class LangChainRAGService {
|
|
|
178
191
|
|
|
179
192
|
// Convert results to expected format
|
|
180
193
|
const results: ISearchResult[] = searchResponse.results.map(result => ({
|
|
181
|
-
|
|
194
|
+
fileId: result.metadata?.fileId, // Use fileId directly
|
|
182
195
|
content: result.content,
|
|
183
196
|
score: result.score,
|
|
184
|
-
metadata: result.metadata,
|
|
197
|
+
metadata: result.metadata, // Pass metadata as-is
|
|
185
198
|
chunkId: result.chunkId,
|
|
186
199
|
}));
|
|
187
200
|
|
|
@@ -205,25 +218,25 @@ export class LangChainRAGService {
|
|
|
205
218
|
/**
|
|
206
219
|
* Remove knowledge from index
|
|
207
220
|
*/
|
|
208
|
-
async removeKnowledge(
|
|
209
|
-
this.log('log', `[REMOVE] Starting removal of
|
|
221
|
+
async removeKnowledge(fileId: string | number): Promise<boolean> {
|
|
222
|
+
this.log('log', `[REMOVE] Starting removal of file: ${fileId}`);
|
|
210
223
|
|
|
211
224
|
try {
|
|
212
225
|
await this.ensureInitialized();
|
|
213
226
|
|
|
214
227
|
// Remove the individual knowledge vector file
|
|
215
|
-
const vectorFilePath = this.getKnowledgeVectorPath(
|
|
228
|
+
const vectorFilePath = this.getKnowledgeVectorPath(fileId);
|
|
216
229
|
if (fs.existsSync(vectorFilePath)) {
|
|
217
230
|
fs.unlinkSync(vectorFilePath);
|
|
218
|
-
this.log('log', `[REMOVE] Successfully removed vector file for
|
|
231
|
+
this.log('log', `[REMOVE] Successfully removed vector file for file ${fileId}`);
|
|
219
232
|
return true;
|
|
220
233
|
} else {
|
|
221
|
-
this.log('warn', `[REMOVE] Vector file not found for
|
|
234
|
+
this.log('warn', `[REMOVE] Vector file not found for file ${fileId}`);
|
|
222
235
|
return true; // Consider it successful if file doesn't exist
|
|
223
236
|
}
|
|
224
237
|
|
|
225
238
|
} catch (error: any) {
|
|
226
|
-
this.log('error', `[REMOVE] Failed to remove
|
|
239
|
+
this.log('error', `[REMOVE] Failed to remove file ${fileId}:`, error);
|
|
227
240
|
return false;
|
|
228
241
|
}
|
|
229
242
|
}
|
|
@@ -307,8 +320,8 @@ export class LangChainRAGService {
|
|
|
307
320
|
/**
|
|
308
321
|
* Save chunks to knowledge-specific vector file with embeddings
|
|
309
322
|
*/
|
|
310
|
-
private async saveKnowledgeVector(
|
|
311
|
-
const vectorFilePath = this.getKnowledgeVectorPath(
|
|
323
|
+
private async saveKnowledgeVector(fileId: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }>): Promise<void> {
|
|
324
|
+
const vectorFilePath = this.getKnowledgeVectorPath(fileId);
|
|
312
325
|
const vectorDir = path.dirname(vectorFilePath);
|
|
313
326
|
|
|
314
327
|
// Ensure directory exists
|
|
@@ -318,16 +331,16 @@ export class LangChainRAGService {
|
|
|
318
331
|
|
|
319
332
|
try {
|
|
320
333
|
const vectorData = {
|
|
321
|
-
|
|
334
|
+
fileId,
|
|
322
335
|
chunks,
|
|
323
336
|
timestamp: new Date().toISOString()
|
|
324
337
|
};
|
|
325
338
|
|
|
326
339
|
fs.writeFileSync(vectorFilePath, JSON.stringify(vectorData, null, 2));
|
|
327
|
-
this.log('debug', `[SAVE] Successfully saved ${chunks.length} chunks with embeddings for
|
|
340
|
+
this.log('debug', `[SAVE] Successfully saved ${chunks.length} chunks with embeddings for file ${fileId}`);
|
|
328
341
|
|
|
329
342
|
} catch (error) {
|
|
330
|
-
this.log('error', `[SAVE] Failed to save vector data for
|
|
343
|
+
this.log('error', `[SAVE] Failed to save vector data for file ${fileId}:`, error);
|
|
331
344
|
throw error;
|
|
332
345
|
}
|
|
333
346
|
}
|
|
@@ -335,24 +348,24 @@ export class LangChainRAGService {
|
|
|
335
348
|
/**
|
|
336
349
|
* Load vector data for a specific knowledge item with embeddings
|
|
337
350
|
*/
|
|
338
|
-
private async loadKnowledgeVectorWithEmbeddings(
|
|
339
|
-
const vectorFilePath = this.getKnowledgeVectorPath(
|
|
351
|
+
private async loadKnowledgeVectorWithEmbeddings(fileId: string | number): Promise<{ fileId?: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }> }> {
|
|
352
|
+
const vectorFilePath = this.getKnowledgeVectorPath(fileId);
|
|
340
353
|
|
|
341
354
|
if (!fs.existsSync(vectorFilePath)) {
|
|
342
|
-
this.log('debug', `[LOAD] No vector file found for
|
|
355
|
+
this.log('debug', `[LOAD] No vector file found for file ${fileId}, skipping...`);
|
|
343
356
|
return { chunks: [] };
|
|
344
357
|
}
|
|
345
358
|
|
|
346
359
|
try {
|
|
347
|
-
this.log('debug', `[LOAD] Loading vector data with embeddings for
|
|
360
|
+
this.log('debug', `[LOAD] Loading vector data with embeddings for file ${fileId} from ${vectorFilePath}`);
|
|
348
361
|
const vectorData = JSON.parse(fs.readFileSync(vectorFilePath, 'utf8'));
|
|
349
362
|
|
|
350
363
|
return {
|
|
351
364
|
chunks: vectorData.chunks || [],
|
|
352
|
-
|
|
365
|
+
fileId
|
|
353
366
|
};
|
|
354
367
|
} catch (error) {
|
|
355
|
-
this.log('error', `[LOAD] Failed to load vector data for
|
|
368
|
+
this.log('error', `[LOAD] Failed to load vector data for file ${fileId}:`, error);
|
|
356
369
|
return { chunks: [] };
|
|
357
370
|
}
|
|
358
371
|
}
|
|
@@ -360,12 +373,12 @@ export class LangChainRAGService {
|
|
|
360
373
|
/**
|
|
361
374
|
* Get the file path for a specific knowledge's vector data
|
|
362
375
|
*/
|
|
363
|
-
private getKnowledgeVectorPath(
|
|
376
|
+
private getKnowledgeVectorPath(fileId: string | number): string {
|
|
364
377
|
const vectorDir = path.join(rwsPath.findRootWorkspacePath(), 'files', 'vectors', 'knowledge');
|
|
365
378
|
if (!fs.existsSync(vectorDir)) {
|
|
366
379
|
fs.mkdirSync(vectorDir, { recursive: true });
|
|
367
380
|
}
|
|
368
|
-
return path.join(vectorDir, `knowledge_${
|
|
381
|
+
return path.join(vectorDir, `knowledge_${fileId}.json`);
|
|
369
382
|
}
|
|
370
383
|
|
|
371
384
|
/**
|
|
@@ -54,35 +54,42 @@ export class OptimizedVectorSearchService {
|
|
|
54
54
|
candidates.push({
|
|
55
55
|
content: chunk.content,
|
|
56
56
|
score: similarity,
|
|
57
|
-
metadata:
|
|
58
|
-
|
|
59
|
-
|
|
57
|
+
metadata: {
|
|
58
|
+
...chunk.metadata,
|
|
59
|
+
fileId: knowledgeVector.fileId // Use fileId directly
|
|
60
|
+
},
|
|
61
|
+
fileId: knowledgeVector.fileId, // Always use the fileId from the knowledgeVector
|
|
62
|
+
chunkId: chunk.metadata?.id || `${knowledgeVector.fileId}_chunk_${Date.now()}`
|
|
60
63
|
});
|
|
61
64
|
}
|
|
62
65
|
}
|
|
63
66
|
|
|
67
|
+
// Sort candidates by score and take top maxResults per source
|
|
68
|
+
const topCandidates = candidates
|
|
69
|
+
.sort((a, b) => b.score - a.score)
|
|
70
|
+
.slice(0, maxResults);
|
|
71
|
+
|
|
64
72
|
// Log similarity statistics for debugging
|
|
65
73
|
if (similarities.length > 0) {
|
|
66
74
|
const maxSim = Math.max(...similarities);
|
|
67
75
|
const avgSim = similarities.reduce((a, b) => a + b, 0) / similarities.length;
|
|
68
|
-
console.log(`[VECTOR SEARCH]
|
|
76
|
+
console.log(`[VECTOR SEARCH] File ${knowledgeVector.fileId}: Max similarity: ${maxSim.toFixed(4)}, Avg: ${avgSim.toFixed(4)}, Candidates above ${threshold}: ${candidates.length}, Top results taken: ${topCandidates.length}`);
|
|
69
77
|
}
|
|
70
78
|
|
|
71
|
-
return
|
|
79
|
+
return topCandidates;
|
|
72
80
|
});
|
|
73
81
|
|
|
74
82
|
// Wait for all searches to complete
|
|
75
83
|
const allCandidateArrays = await Promise.all(searchPromises);
|
|
76
84
|
|
|
77
|
-
// Flatten results
|
|
85
|
+
// Flatten results (each source already limited to maxResults)
|
|
78
86
|
for (const candidates of allCandidateArrays) {
|
|
79
87
|
allCandidates.push(...candidates);
|
|
80
88
|
}
|
|
81
89
|
|
|
82
|
-
// Sort by similarity score
|
|
90
|
+
// Sort by similarity score (no additional limiting since each source is already limited)
|
|
83
91
|
const results = allCandidates
|
|
84
|
-
.sort((a, b) => b.score - a.score)
|
|
85
|
-
.slice(0, maxResults);
|
|
92
|
+
.sort((a, b) => b.score - a.score);
|
|
86
93
|
|
|
87
94
|
const searchTime = Date.now() - startTime;
|
|
88
95
|
|
|
@@ -122,7 +129,7 @@ export class OptimizedVectorSearchService {
|
|
|
122
129
|
async batchSearch(
|
|
123
130
|
queries: string[],
|
|
124
131
|
knowledgeVectors: Array<{
|
|
125
|
-
|
|
132
|
+
fileId: string | number;
|
|
126
133
|
chunks: Array<{
|
|
127
134
|
content: string;
|
|
128
135
|
embedding: number[];
|
|
@@ -165,7 +172,7 @@ export class OptimizedVectorSearchService {
|
|
|
165
172
|
private async searchWithEmbedding(request: {
|
|
166
173
|
queryEmbedding: number[];
|
|
167
174
|
knowledgeVectors: Array<{
|
|
168
|
-
|
|
175
|
+
fileId: string | number;
|
|
169
176
|
chunks: Array<{
|
|
170
177
|
content: string;
|
|
171
178
|
embedding: number[];
|
|
@@ -200,8 +207,8 @@ export class OptimizedVectorSearchService {
|
|
|
200
207
|
content: chunk.content,
|
|
201
208
|
score: similarity,
|
|
202
209
|
metadata: chunk.metadata,
|
|
203
|
-
|
|
204
|
-
chunkId: chunk.metadata?.id || `${knowledgeVector.
|
|
210
|
+
fileId: knowledgeVector.fileId,
|
|
211
|
+
chunkId: chunk.metadata?.id || `${knowledgeVector.fileId}_chunk_${Date.now()}`
|
|
205
212
|
});
|
|
206
213
|
}
|
|
207
214
|
}
|
|
@@ -252,7 +259,7 @@ export class OptimizedVectorSearchService {
|
|
|
252
259
|
* Search similar documents (compatibility method from LangChainVectorStoreService)
|
|
253
260
|
*/
|
|
254
261
|
async searchSimilarCompat(request: IVectorSearchRequest, knowledgeVectors: Array<{
|
|
255
|
-
|
|
262
|
+
fileId: string | number;
|
|
256
263
|
chunks: Array<{
|
|
257
264
|
content: string;
|
|
258
265
|
embedding: number[];
|
|
@@ -271,9 +278,9 @@ export class OptimizedVectorSearchService {
|
|
|
271
278
|
let filteredVectors = knowledgeVectors;
|
|
272
279
|
if (filter) {
|
|
273
280
|
filteredVectors = knowledgeVectors.filter(vector => {
|
|
274
|
-
// Check
|
|
275
|
-
if (filter.
|
|
276
|
-
return filter.
|
|
281
|
+
// Check file IDs
|
|
282
|
+
if (filter.fileIds && filter.fileIds.length > 0) {
|
|
283
|
+
return filter.fileIds.includes(String(vector.fileId));
|
|
277
284
|
}
|
|
278
285
|
return true;
|
|
279
286
|
});
|
|
@@ -293,7 +300,7 @@ export class OptimizedVectorSearchService {
|
|
|
293
300
|
score: result.score,
|
|
294
301
|
metadata: result.metadata,
|
|
295
302
|
chunkId: result.chunkId,
|
|
296
|
-
|
|
303
|
+
fileId: result.fileId
|
|
297
304
|
}));
|
|
298
305
|
|
|
299
306
|
return {
|
|
@@ -313,7 +320,7 @@ export class OptimizedVectorSearchService {
|
|
|
313
320
|
* Get search statistics
|
|
314
321
|
*/
|
|
315
322
|
getStats(knowledgeVectors: Array<{
|
|
316
|
-
|
|
323
|
+
fileId: string | number;
|
|
317
324
|
chunks: Array<{ content: string; embedding: number[]; metadata: any; }>;
|
|
318
325
|
}>): { totalChunks: number; totalKnowledge: number } {
|
|
319
326
|
const totalChunks = knowledgeVectors.reduce((total, vector) => total + vector.chunks.length, 0);
|
package/src/types/rag.types.ts
CHANGED
|
@@ -24,8 +24,9 @@ export interface IRAGSearchRequest {
|
|
|
24
24
|
query: string;
|
|
25
25
|
maxResults?: number;
|
|
26
26
|
threshold?: number;
|
|
27
|
+
temporaryDocumentSearch?: boolean; // Flag for searching temporary documents (web search)
|
|
27
28
|
filter?: {
|
|
28
|
-
|
|
29
|
+
fileIds?: (string | number)[];
|
|
29
30
|
documentIds?: (string | number)[];
|
|
30
31
|
[key: string]: any;
|
|
31
32
|
};
|
|
@@ -5,7 +5,7 @@ export interface ISearchResult {
|
|
|
5
5
|
content: string;
|
|
6
6
|
score: number;
|
|
7
7
|
metadata: any;
|
|
8
|
-
|
|
8
|
+
fileId: string | number;
|
|
9
9
|
chunkId: string;
|
|
10
10
|
}
|
|
11
11
|
|
|
@@ -30,7 +30,7 @@ export interface IVectorSearchResponse {
|
|
|
30
30
|
export interface IOptimizedSearchRequest {
|
|
31
31
|
query: string;
|
|
32
32
|
knowledgeVectors: Array<{
|
|
33
|
-
|
|
33
|
+
fileId: string | number;
|
|
34
34
|
chunks: Array<{
|
|
35
35
|
content: string;
|
|
36
36
|
embedding: number[];
|
|
@@ -45,7 +45,7 @@ export interface IOptimizedSearchResult {
|
|
|
45
45
|
content: string;
|
|
46
46
|
score: number;
|
|
47
47
|
metadata: any;
|
|
48
|
-
|
|
48
|
+
fileId: string | number;
|
|
49
49
|
chunkId: string;
|
|
50
50
|
}
|
|
51
51
|
|