@rws-framework/ai-tools 3.3.1 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -85,11 +85,11 @@ export class LangChainRAGService {
|
|
|
85
85
|
* Index knowledge content for RAG with optimized per-knowledge vector storage
|
|
86
86
|
*/
|
|
87
87
|
async indexKnowledge(
|
|
88
|
-
|
|
88
|
+
fileId: string | number,
|
|
89
89
|
content: string,
|
|
90
90
|
metadata: Record<string, any> = {}
|
|
91
91
|
): Promise<IRAGResponse<{ chunkIds: string[] }>> {
|
|
92
|
-
this.log('log', `[INDEXING] Starting indexKnowledge for
|
|
92
|
+
this.log('log', `[INDEXING] Starting indexKnowledge for fileId: ${fileId}`);
|
|
93
93
|
this.log('debug', `[INDEXING] Content length: ${content.length} characters`);
|
|
94
94
|
|
|
95
95
|
try {
|
|
@@ -97,7 +97,7 @@ export class LangChainRAGService {
|
|
|
97
97
|
|
|
98
98
|
// Chunk the content using the embedding service
|
|
99
99
|
const chunks = await this.embeddingService.chunkText(content);
|
|
100
|
-
this.log('debug', `[INDEXING] Split content into ${chunks.length} chunks for
|
|
100
|
+
this.log('debug', `[INDEXING] Split content into ${chunks.length} chunks for file ${fileId}`);
|
|
101
101
|
|
|
102
102
|
// Generate embeddings for all chunks at once (batch processing for speed)
|
|
103
103
|
const embeddings = await this.embeddingService.embedTexts(chunks);
|
|
@@ -109,17 +109,17 @@ export class LangChainRAGService {
|
|
|
109
109
|
embedding: embeddings[index],
|
|
110
110
|
metadata: {
|
|
111
111
|
...metadata,
|
|
112
|
-
|
|
112
|
+
fileId,
|
|
113
113
|
chunkIndex: index,
|
|
114
|
-
id: `knowledge_${
|
|
114
|
+
id: `knowledge_${fileId}_chunk_${index}`
|
|
115
115
|
}
|
|
116
116
|
}));
|
|
117
117
|
|
|
118
118
|
// Save to per-knowledge vector file
|
|
119
|
-
await this.saveKnowledgeVector(
|
|
119
|
+
await this.saveKnowledgeVector(fileId, chunksWithEmbeddings);
|
|
120
120
|
|
|
121
121
|
const chunkIds = chunksWithEmbeddings.map(chunk => chunk.metadata.id);
|
|
122
|
-
this.log('log', `[INDEXING] Successfully indexed
|
|
122
|
+
this.log('log', `[INDEXING] Successfully indexed file ${fileId} with ${chunkIds.length} chunks using optimized approach`);
|
|
123
123
|
|
|
124
124
|
return {
|
|
125
125
|
success: true,
|
|
@@ -127,7 +127,7 @@ export class LangChainRAGService {
|
|
|
127
127
|
};
|
|
128
128
|
|
|
129
129
|
} catch (error: any) {
|
|
130
|
-
this.log('error', `[INDEXING] Failed to index
|
|
130
|
+
this.log('error', `[INDEXING] Failed to index file ${fileId}:`, error);
|
|
131
131
|
return {
|
|
132
132
|
success: false,
|
|
133
133
|
data: null,
|
|
@@ -146,11 +146,11 @@ export class LangChainRAGService {
|
|
|
146
146
|
try {
|
|
147
147
|
await this.ensureInitialized();
|
|
148
148
|
|
|
149
|
-
const
|
|
150
|
-
console.log('
|
|
149
|
+
const fileIds = request.filter?.fileIds || [];
|
|
150
|
+
console.log('fileIds', fileIds, 'temporaryDocumentSearch:', request.temporaryDocumentSearch);
|
|
151
151
|
|
|
152
|
-
if (
|
|
153
|
-
this.log('warn', '[SEARCH] No
|
|
152
|
+
if (fileIds.length === 0) {
|
|
153
|
+
this.log('warn', '[SEARCH] No file IDs provided for search, returning empty results');
|
|
154
154
|
return {
|
|
155
155
|
success: true,
|
|
156
156
|
data: { results: [] }
|
|
@@ -158,15 +158,15 @@ export class LangChainRAGService {
|
|
|
158
158
|
}
|
|
159
159
|
|
|
160
160
|
// Load all knowledge vectors in parallel (including temporary documents)
|
|
161
|
-
const knowledgeVectorPromises =
|
|
161
|
+
const knowledgeVectorPromises = fileIds.map(async (fileId) => {
|
|
162
162
|
try {
|
|
163
|
-
const vectorData = await this.loadKnowledgeVectorWithEmbeddings(
|
|
163
|
+
const vectorData = await this.loadKnowledgeVectorWithEmbeddings(fileId);
|
|
164
164
|
return {
|
|
165
|
-
|
|
165
|
+
fileId,
|
|
166
166
|
chunks: vectorData.chunks
|
|
167
167
|
};
|
|
168
168
|
} catch (loadError) {
|
|
169
|
-
this.log('warn', `[SEARCH] Failed to load
|
|
169
|
+
this.log('warn', `[SEARCH] Failed to load file ${fileId}:`, loadError);
|
|
170
170
|
return null;
|
|
171
171
|
}
|
|
172
172
|
});
|
|
@@ -191,10 +191,10 @@ export class LangChainRAGService {
|
|
|
191
191
|
|
|
192
192
|
// Convert results to expected format
|
|
193
193
|
const results: ISearchResult[] = searchResponse.results.map(result => ({
|
|
194
|
-
|
|
194
|
+
fileId: result.metadata?.fileId, // Use fileId directly
|
|
195
195
|
content: result.content,
|
|
196
196
|
score: result.score,
|
|
197
|
-
metadata: result.metadata,
|
|
197
|
+
metadata: result.metadata, // Pass metadata as-is
|
|
198
198
|
chunkId: result.chunkId,
|
|
199
199
|
}));
|
|
200
200
|
|
|
@@ -219,7 +219,7 @@ export class LangChainRAGService {
|
|
|
219
219
|
* Remove knowledge from index
|
|
220
220
|
*/
|
|
221
221
|
async removeKnowledge(fileId: string | number): Promise<boolean> {
|
|
222
|
-
this.log('log', `[REMOVE] Starting removal of
|
|
222
|
+
this.log('log', `[REMOVE] Starting removal of file: ${fileId}`);
|
|
223
223
|
|
|
224
224
|
try {
|
|
225
225
|
await this.ensureInitialized();
|
|
@@ -228,15 +228,15 @@ export class LangChainRAGService {
|
|
|
228
228
|
const vectorFilePath = this.getKnowledgeVectorPath(fileId);
|
|
229
229
|
if (fs.existsSync(vectorFilePath)) {
|
|
230
230
|
fs.unlinkSync(vectorFilePath);
|
|
231
|
-
this.log('log', `[REMOVE] Successfully removed vector file for
|
|
231
|
+
this.log('log', `[REMOVE] Successfully removed vector file for file ${fileId}`);
|
|
232
232
|
return true;
|
|
233
233
|
} else {
|
|
234
|
-
this.log('warn', `[REMOVE] Vector file not found for
|
|
234
|
+
this.log('warn', `[REMOVE] Vector file not found for file ${fileId}`);
|
|
235
235
|
return true; // Consider it successful if file doesn't exist
|
|
236
236
|
}
|
|
237
237
|
|
|
238
238
|
} catch (error: any) {
|
|
239
|
-
this.log('error', `[REMOVE] Failed to remove
|
|
239
|
+
this.log('error', `[REMOVE] Failed to remove file ${fileId}:`, error);
|
|
240
240
|
return false;
|
|
241
241
|
}
|
|
242
242
|
}
|
|
@@ -320,8 +320,8 @@ export class LangChainRAGService {
|
|
|
320
320
|
/**
|
|
321
321
|
* Save chunks to knowledge-specific vector file with embeddings
|
|
322
322
|
*/
|
|
323
|
-
private async saveKnowledgeVector(
|
|
324
|
-
const vectorFilePath = this.getKnowledgeVectorPath(
|
|
323
|
+
private async saveKnowledgeVector(fileId: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }>): Promise<void> {
|
|
324
|
+
const vectorFilePath = this.getKnowledgeVectorPath(fileId);
|
|
325
325
|
const vectorDir = path.dirname(vectorFilePath);
|
|
326
326
|
|
|
327
327
|
// Ensure directory exists
|
|
@@ -331,16 +331,16 @@ export class LangChainRAGService {
|
|
|
331
331
|
|
|
332
332
|
try {
|
|
333
333
|
const vectorData = {
|
|
334
|
-
|
|
334
|
+
fileId,
|
|
335
335
|
chunks,
|
|
336
336
|
timestamp: new Date().toISOString()
|
|
337
337
|
};
|
|
338
338
|
|
|
339
339
|
fs.writeFileSync(vectorFilePath, JSON.stringify(vectorData, null, 2));
|
|
340
|
-
this.log('debug', `[SAVE] Successfully saved ${chunks.length} chunks with embeddings for
|
|
340
|
+
this.log('debug', `[SAVE] Successfully saved ${chunks.length} chunks with embeddings for file ${fileId}`);
|
|
341
341
|
|
|
342
342
|
} catch (error) {
|
|
343
|
-
this.log('error', `[SAVE] Failed to save vector data for
|
|
343
|
+
this.log('error', `[SAVE] Failed to save vector data for file ${fileId}:`, error);
|
|
344
344
|
throw error;
|
|
345
345
|
}
|
|
346
346
|
}
|
|
@@ -348,24 +348,24 @@ export class LangChainRAGService {
|
|
|
348
348
|
/**
|
|
349
349
|
* Load vector data for a specific knowledge item with embeddings
|
|
350
350
|
*/
|
|
351
|
-
private async loadKnowledgeVectorWithEmbeddings(
|
|
352
|
-
const vectorFilePath = this.getKnowledgeVectorPath(
|
|
351
|
+
private async loadKnowledgeVectorWithEmbeddings(fileId: string | number): Promise<{ fileId?: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }> }> {
|
|
352
|
+
const vectorFilePath = this.getKnowledgeVectorPath(fileId);
|
|
353
353
|
|
|
354
354
|
if (!fs.existsSync(vectorFilePath)) {
|
|
355
|
-
this.log('debug', `[LOAD] No vector file found for
|
|
355
|
+
this.log('debug', `[LOAD] No vector file found for file ${fileId}, skipping...`);
|
|
356
356
|
return { chunks: [] };
|
|
357
357
|
}
|
|
358
358
|
|
|
359
359
|
try {
|
|
360
|
-
this.log('debug', `[LOAD] Loading vector data with embeddings for
|
|
360
|
+
this.log('debug', `[LOAD] Loading vector data with embeddings for file ${fileId} from ${vectorFilePath}`);
|
|
361
361
|
const vectorData = JSON.parse(fs.readFileSync(vectorFilePath, 'utf8'));
|
|
362
362
|
|
|
363
363
|
return {
|
|
364
364
|
chunks: vectorData.chunks || [],
|
|
365
|
-
|
|
365
|
+
fileId
|
|
366
366
|
};
|
|
367
367
|
} catch (error) {
|
|
368
|
-
this.log('error', `[LOAD] Failed to load vector data for
|
|
368
|
+
this.log('error', `[LOAD] Failed to load vector data for file ${fileId}:`, error);
|
|
369
369
|
return { chunks: [] };
|
|
370
370
|
}
|
|
371
371
|
}
|
|
@@ -373,12 +373,12 @@ export class LangChainRAGService {
|
|
|
373
373
|
/**
|
|
374
374
|
* Get the file path for a specific knowledge's vector data
|
|
375
375
|
*/
|
|
376
|
-
private getKnowledgeVectorPath(
|
|
376
|
+
private getKnowledgeVectorPath(fileId: string | number): string {
|
|
377
377
|
const vectorDir = path.join(rwsPath.findRootWorkspacePath(), 'files', 'vectors', 'knowledge');
|
|
378
378
|
if (!fs.existsSync(vectorDir)) {
|
|
379
379
|
fs.mkdirSync(vectorDir, { recursive: true });
|
|
380
380
|
}
|
|
381
|
-
return path.join(vectorDir, `knowledge_${
|
|
381
|
+
return path.join(vectorDir, `knowledge_${fileId}.json`);
|
|
382
382
|
}
|
|
383
383
|
|
|
384
384
|
/**
|
|
@@ -54,35 +54,42 @@ export class OptimizedVectorSearchService {
|
|
|
54
54
|
candidates.push({
|
|
55
55
|
content: chunk.content,
|
|
56
56
|
score: similarity,
|
|
57
|
-
metadata:
|
|
58
|
-
|
|
59
|
-
|
|
57
|
+
metadata: {
|
|
58
|
+
...chunk.metadata,
|
|
59
|
+
fileId: knowledgeVector.fileId // Use fileId directly
|
|
60
|
+
},
|
|
61
|
+
fileId: knowledgeVector.fileId, // Always use the fileId from the knowledgeVector
|
|
62
|
+
chunkId: chunk.metadata?.id || `${knowledgeVector.fileId}_chunk_${Date.now()}`
|
|
60
63
|
});
|
|
61
64
|
}
|
|
62
65
|
}
|
|
63
66
|
|
|
67
|
+
// Sort candidates by score and take top maxResults per source
|
|
68
|
+
const topCandidates = candidates
|
|
69
|
+
.sort((a, b) => b.score - a.score)
|
|
70
|
+
.slice(0, maxResults);
|
|
71
|
+
|
|
64
72
|
// Log similarity statistics for debugging
|
|
65
73
|
if (similarities.length > 0) {
|
|
66
74
|
const maxSim = Math.max(...similarities);
|
|
67
75
|
const avgSim = similarities.reduce((a, b) => a + b, 0) / similarities.length;
|
|
68
|
-
console.log(`[VECTOR SEARCH]
|
|
76
|
+
console.log(`[VECTOR SEARCH] File ${knowledgeVector.fileId}: Max similarity: ${maxSim.toFixed(4)}, Avg: ${avgSim.toFixed(4)}, Candidates above ${threshold}: ${candidates.length}, Top results taken: ${topCandidates.length}`);
|
|
69
77
|
}
|
|
70
78
|
|
|
71
|
-
return
|
|
79
|
+
return topCandidates;
|
|
72
80
|
});
|
|
73
81
|
|
|
74
82
|
// Wait for all searches to complete
|
|
75
83
|
const allCandidateArrays = await Promise.all(searchPromises);
|
|
76
84
|
|
|
77
|
-
// Flatten results
|
|
85
|
+
// Flatten results (each source already limited to maxResults)
|
|
78
86
|
for (const candidates of allCandidateArrays) {
|
|
79
87
|
allCandidates.push(...candidates);
|
|
80
88
|
}
|
|
81
89
|
|
|
82
|
-
// Sort by similarity score
|
|
90
|
+
// Sort by similarity score (no additional limiting since each source is already limited)
|
|
83
91
|
const results = allCandidates
|
|
84
|
-
.sort((a, b) => b.score - a.score)
|
|
85
|
-
.slice(0, maxResults);
|
|
92
|
+
.sort((a, b) => b.score - a.score);
|
|
86
93
|
|
|
87
94
|
const searchTime = Date.now() - startTime;
|
|
88
95
|
|
|
@@ -122,7 +129,7 @@ export class OptimizedVectorSearchService {
|
|
|
122
129
|
async batchSearch(
|
|
123
130
|
queries: string[],
|
|
124
131
|
knowledgeVectors: Array<{
|
|
125
|
-
|
|
132
|
+
fileId: string | number;
|
|
126
133
|
chunks: Array<{
|
|
127
134
|
content: string;
|
|
128
135
|
embedding: number[];
|
|
@@ -165,7 +172,7 @@ export class OptimizedVectorSearchService {
|
|
|
165
172
|
private async searchWithEmbedding(request: {
|
|
166
173
|
queryEmbedding: number[];
|
|
167
174
|
knowledgeVectors: Array<{
|
|
168
|
-
|
|
175
|
+
fileId: string | number;
|
|
169
176
|
chunks: Array<{
|
|
170
177
|
content: string;
|
|
171
178
|
embedding: number[];
|
|
@@ -200,8 +207,8 @@ export class OptimizedVectorSearchService {
|
|
|
200
207
|
content: chunk.content,
|
|
201
208
|
score: similarity,
|
|
202
209
|
metadata: chunk.metadata,
|
|
203
|
-
|
|
204
|
-
chunkId: chunk.metadata?.id || `${knowledgeVector.
|
|
210
|
+
fileId: knowledgeVector.fileId,
|
|
211
|
+
chunkId: chunk.metadata?.id || `${knowledgeVector.fileId}_chunk_${Date.now()}`
|
|
205
212
|
});
|
|
206
213
|
}
|
|
207
214
|
}
|
|
@@ -252,7 +259,7 @@ export class OptimizedVectorSearchService {
|
|
|
252
259
|
* Search similar documents (compatibility method from LangChainVectorStoreService)
|
|
253
260
|
*/
|
|
254
261
|
async searchSimilarCompat(request: IVectorSearchRequest, knowledgeVectors: Array<{
|
|
255
|
-
|
|
262
|
+
fileId: string | number;
|
|
256
263
|
chunks: Array<{
|
|
257
264
|
content: string;
|
|
258
265
|
embedding: number[];
|
|
@@ -271,9 +278,9 @@ export class OptimizedVectorSearchService {
|
|
|
271
278
|
let filteredVectors = knowledgeVectors;
|
|
272
279
|
if (filter) {
|
|
273
280
|
filteredVectors = knowledgeVectors.filter(vector => {
|
|
274
|
-
// Check
|
|
275
|
-
if (filter.
|
|
276
|
-
return filter.
|
|
281
|
+
// Check file IDs
|
|
282
|
+
if (filter.fileIds && filter.fileIds.length > 0) {
|
|
283
|
+
return filter.fileIds.includes(String(vector.fileId));
|
|
277
284
|
}
|
|
278
285
|
return true;
|
|
279
286
|
});
|
|
@@ -293,7 +300,7 @@ export class OptimizedVectorSearchService {
|
|
|
293
300
|
score: result.score,
|
|
294
301
|
metadata: result.metadata,
|
|
295
302
|
chunkId: result.chunkId,
|
|
296
|
-
|
|
303
|
+
fileId: result.fileId
|
|
297
304
|
}));
|
|
298
305
|
|
|
299
306
|
return {
|
|
@@ -313,7 +320,7 @@ export class OptimizedVectorSearchService {
|
|
|
313
320
|
* Get search statistics
|
|
314
321
|
*/
|
|
315
322
|
getStats(knowledgeVectors: Array<{
|
|
316
|
-
|
|
323
|
+
fileId: string | number;
|
|
317
324
|
chunks: Array<{ content: string; embedding: number[]; metadata: any; }>;
|
|
318
325
|
}>): { totalChunks: number; totalKnowledge: number } {
|
|
319
326
|
const totalChunks = knowledgeVectors.reduce((total, vector) => total + vector.chunks.length, 0);
|
package/src/types/rag.types.ts
CHANGED
|
@@ -26,7 +26,7 @@ export interface IRAGSearchRequest {
|
|
|
26
26
|
threshold?: number;
|
|
27
27
|
temporaryDocumentSearch?: boolean; // Flag for searching temporary documents (web search)
|
|
28
28
|
filter?: {
|
|
29
|
-
|
|
29
|
+
fileIds?: (string | number)[];
|
|
30
30
|
documentIds?: (string | number)[];
|
|
31
31
|
[key: string]: any;
|
|
32
32
|
};
|
|
@@ -5,7 +5,7 @@ export interface ISearchResult {
|
|
|
5
5
|
content: string;
|
|
6
6
|
score: number;
|
|
7
7
|
metadata: any;
|
|
8
|
-
|
|
8
|
+
fileId: string | number;
|
|
9
9
|
chunkId: string;
|
|
10
10
|
}
|
|
11
11
|
|
|
@@ -30,7 +30,7 @@ export interface IVectorSearchResponse {
|
|
|
30
30
|
export interface IOptimizedSearchRequest {
|
|
31
31
|
query: string;
|
|
32
32
|
knowledgeVectors: Array<{
|
|
33
|
-
|
|
33
|
+
fileId: string | number;
|
|
34
34
|
chunks: Array<{
|
|
35
35
|
content: string;
|
|
36
36
|
embedding: number[];
|
|
@@ -45,7 +45,7 @@ export interface IOptimizedSearchResult {
|
|
|
45
45
|
content: string;
|
|
46
46
|
score: number;
|
|
47
47
|
metadata: any;
|
|
48
|
-
|
|
48
|
+
fileId: string | number;
|
|
49
49
|
chunkId: string;
|
|
50
50
|
}
|
|
51
51
|
|