@rws-framework/ai-tools 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@rws-framework/ai-tools",
3
3
  "private": false,
4
- "version": "3.3.0",
4
+ "version": "3.4.0",
5
5
  "description": "",
6
6
  "main": "src/index.ts",
7
7
  "scripts": {},
@@ -85,11 +85,11 @@ export class LangChainRAGService {
85
85
  * Index knowledge content for RAG with optimized per-knowledge vector storage
86
86
  */
87
87
  async indexKnowledge(
88
- knowledgeId: string | number,
88
+ fileId: string | number,
89
89
  content: string,
90
90
  metadata: Record<string, any> = {}
91
91
  ): Promise<IRAGResponse<{ chunkIds: string[] }>> {
92
- this.log('log', `[INDEXING] Starting indexKnowledge for knowledgeId: ${knowledgeId}`);
92
+ this.log('log', `[INDEXING] Starting indexKnowledge for fileId: ${fileId}`);
93
93
  this.log('debug', `[INDEXING] Content length: ${content.length} characters`);
94
94
 
95
95
  try {
@@ -97,7 +97,7 @@ export class LangChainRAGService {
97
97
 
98
98
  // Chunk the content using the embedding service
99
99
  const chunks = await this.embeddingService.chunkText(content);
100
- this.log('debug', `[INDEXING] Split content into ${chunks.length} chunks for knowledge ${knowledgeId}`);
100
+ this.log('debug', `[INDEXING] Split content into ${chunks.length} chunks for file ${fileId}`);
101
101
 
102
102
  // Generate embeddings for all chunks at once (batch processing for speed)
103
103
  const embeddings = await this.embeddingService.embedTexts(chunks);
@@ -109,17 +109,17 @@ export class LangChainRAGService {
109
109
  embedding: embeddings[index],
110
110
  metadata: {
111
111
  ...metadata,
112
- knowledgeId,
112
+ fileId,
113
113
  chunkIndex: index,
114
- id: `knowledge_${knowledgeId}_chunk_${index}`
114
+ id: `knowledge_${fileId}_chunk_${index}`
115
115
  }
116
116
  }));
117
117
 
118
118
  // Save to per-knowledge vector file
119
- await this.saveKnowledgeVector(knowledgeId, chunksWithEmbeddings);
119
+ await this.saveKnowledgeVector(fileId, chunksWithEmbeddings);
120
120
 
121
121
  const chunkIds = chunksWithEmbeddings.map(chunk => chunk.metadata.id);
122
- this.log('log', `[INDEXING] Successfully indexed knowledge ${knowledgeId} with ${chunkIds.length} chunks using optimized approach`);
122
+ this.log('log', `[INDEXING] Successfully indexed file ${fileId} with ${chunkIds.length} chunks using optimized approach`);
123
123
 
124
124
  return {
125
125
  success: true,
@@ -127,7 +127,7 @@ export class LangChainRAGService {
127
127
  };
128
128
 
129
129
  } catch (error: any) {
130
- this.log('error', `[INDEXING] Failed to index knowledge ${knowledgeId}:`, error);
130
+ this.log('error', `[INDEXING] Failed to index file ${fileId}:`, error);
131
131
  return {
132
132
  success: false,
133
133
  data: null,
@@ -141,32 +141,45 @@ export class LangChainRAGService {
141
141
  */
142
142
  async searchKnowledge(request: IRAGSearchRequest): Promise<IRAGResponse<{ results: ISearchResult[] }>> {
143
143
  this.log('log', `[SEARCH] Starting knowledge search for query: "${request.query}"`);
144
- this.log('debug', `[SEARCH] Search parameters: maxResults=${request.maxResults || 5}, threshold=${request.threshold || 0.3}`);
144
+ this.log('debug', `[SEARCH] Search parameters: maxResults=${request.maxResults || 5}, threshold=${request.threshold || 0.3}, temporaryDocumentSearch=${request.temporaryDocumentSearch}`);
145
145
 
146
146
  try {
147
147
  await this.ensureInitialized();
148
148
 
149
- const knowledgeIds = request.filter?.knowledgeIds || [];
150
- console.log('knowledgeIds', knowledgeIds);
149
+ const fileIds = request.filter?.fileIds || [];
150
+ console.log('fileIds', fileIds, 'temporaryDocumentSearch:', request.temporaryDocumentSearch);
151
151
 
152
- if (knowledgeIds.length === 0) {
153
- this.log('warn', '[SEARCH] No knowledge IDs provided for search, returning empty results');
152
+ if (fileIds.length === 0) {
153
+ this.log('warn', '[SEARCH] No file IDs provided for search, returning empty results');
154
154
  return {
155
155
  success: true,
156
156
  data: { results: [] }
157
157
  };
158
158
  }
159
159
 
160
- // Load all knowledge vectors in parallel
161
- const knowledgeVectorPromises = knowledgeIds.map(async (knowledgeId) => {
162
- const vectorData = await this.loadKnowledgeVectorWithEmbeddings(knowledgeId);
163
- return {
164
- knowledgeId,
165
- chunks: vectorData.chunks
166
- };
160
+ // Load all knowledge vectors in parallel (including temporary documents)
161
+ const knowledgeVectorPromises = fileIds.map(async (fileId) => {
162
+ try {
163
+ const vectorData = await this.loadKnowledgeVectorWithEmbeddings(fileId);
164
+ return {
165
+ fileId,
166
+ chunks: vectorData.chunks
167
+ };
168
+ } catch (loadError) {
169
+ this.log('warn', `[SEARCH] Failed to load file ${fileId}:`, loadError);
170
+ return null;
171
+ }
167
172
  });
168
173
 
169
- const knowledgeVectors = await Promise.all(knowledgeVectorPromises);
174
+ const knowledgeVectors = (await Promise.all(knowledgeVectorPromises)).filter(v => v !== null);
175
+
176
+ if (knowledgeVectors.length === 0) {
177
+ this.log('warn', '[SEARCH] No knowledge vectors could be loaded for search');
178
+ return {
179
+ success: true,
180
+ data: { results: [] }
181
+ };
182
+ }
170
183
 
171
184
  // Use optimized vector search service
172
185
  const searchResponse = await this.vectorSearchService.searchSimilar({
@@ -178,10 +191,10 @@ export class LangChainRAGService {
178
191
 
179
192
  // Convert results to expected format
180
193
  const results: ISearchResult[] = searchResponse.results.map(result => ({
181
- knowledgeId: result.metadata.knowledgeId,
194
+ fileId: result.metadata?.fileId, // Use fileId directly
182
195
  content: result.content,
183
196
  score: result.score,
184
- metadata: result.metadata,
197
+ metadata: result.metadata, // Pass metadata as-is
185
198
  chunkId: result.chunkId,
186
199
  }));
187
200
 
@@ -205,25 +218,25 @@ export class LangChainRAGService {
205
218
  /**
206
219
  * Remove knowledge from index
207
220
  */
208
- async removeKnowledge(knowledgeId: string | number): Promise<boolean> {
209
- this.log('log', `[REMOVE] Starting removal of knowledge: ${knowledgeId}`);
221
+ async removeKnowledge(fileId: string | number): Promise<boolean> {
222
+ this.log('log', `[REMOVE] Starting removal of file: ${fileId}`);
210
223
 
211
224
  try {
212
225
  await this.ensureInitialized();
213
226
 
214
227
  // Remove the individual knowledge vector file
215
- const vectorFilePath = this.getKnowledgeVectorPath(knowledgeId);
228
+ const vectorFilePath = this.getKnowledgeVectorPath(fileId);
216
229
  if (fs.existsSync(vectorFilePath)) {
217
230
  fs.unlinkSync(vectorFilePath);
218
- this.log('log', `[REMOVE] Successfully removed vector file for knowledge ${knowledgeId}`);
231
+ this.log('log', `[REMOVE] Successfully removed vector file for file ${fileId}`);
219
232
  return true;
220
233
  } else {
221
- this.log('warn', `[REMOVE] Vector file not found for knowledge ${knowledgeId}`);
234
+ this.log('warn', `[REMOVE] Vector file not found for file ${fileId}`);
222
235
  return true; // Consider it successful if file doesn't exist
223
236
  }
224
237
 
225
238
  } catch (error: any) {
226
- this.log('error', `[REMOVE] Failed to remove knowledge ${knowledgeId}:`, error);
239
+ this.log('error', `[REMOVE] Failed to remove file ${fileId}:`, error);
227
240
  return false;
228
241
  }
229
242
  }
@@ -307,8 +320,8 @@ export class LangChainRAGService {
307
320
  /**
308
321
  * Save chunks to knowledge-specific vector file with embeddings
309
322
  */
310
- private async saveKnowledgeVector(knowledgeId: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }>): Promise<void> {
311
- const vectorFilePath = this.getKnowledgeVectorPath(knowledgeId);
323
+ private async saveKnowledgeVector(fileId: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }>): Promise<void> {
324
+ const vectorFilePath = this.getKnowledgeVectorPath(fileId);
312
325
  const vectorDir = path.dirname(vectorFilePath);
313
326
 
314
327
  // Ensure directory exists
@@ -318,16 +331,16 @@ export class LangChainRAGService {
318
331
 
319
332
  try {
320
333
  const vectorData = {
321
- knowledgeId,
334
+ fileId,
322
335
  chunks,
323
336
  timestamp: new Date().toISOString()
324
337
  };
325
338
 
326
339
  fs.writeFileSync(vectorFilePath, JSON.stringify(vectorData, null, 2));
327
- this.log('debug', `[SAVE] Successfully saved ${chunks.length} chunks with embeddings for knowledge ${knowledgeId}`);
340
+ this.log('debug', `[SAVE] Successfully saved ${chunks.length} chunks with embeddings for file ${fileId}`);
328
341
 
329
342
  } catch (error) {
330
- this.log('error', `[SAVE] Failed to save vector data for knowledge ${knowledgeId}:`, error);
343
+ this.log('error', `[SAVE] Failed to save vector data for file ${fileId}:`, error);
331
344
  throw error;
332
345
  }
333
346
  }
@@ -335,24 +348,24 @@ export class LangChainRAGService {
335
348
  /**
336
349
  * Load vector data for a specific knowledge item with embeddings
337
350
  */
338
- private async loadKnowledgeVectorWithEmbeddings(knowledgeId: string | number): Promise<{ knowledgeId?: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }> }> {
339
- const vectorFilePath = this.getKnowledgeVectorPath(knowledgeId);
351
+ private async loadKnowledgeVectorWithEmbeddings(fileId: string | number): Promise<{ fileId?: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }> }> {
352
+ const vectorFilePath = this.getKnowledgeVectorPath(fileId);
340
353
 
341
354
  if (!fs.existsSync(vectorFilePath)) {
342
- this.log('debug', `[LOAD] No vector file found for knowledge ${knowledgeId}, skipping...`);
355
+ this.log('debug', `[LOAD] No vector file found for file ${fileId}, skipping...`);
343
356
  return { chunks: [] };
344
357
  }
345
358
 
346
359
  try {
347
- this.log('debug', `[LOAD] Loading vector data with embeddings for knowledge ${knowledgeId} from ${vectorFilePath}`);
360
+ this.log('debug', `[LOAD] Loading vector data with embeddings for file ${fileId} from ${vectorFilePath}`);
348
361
  const vectorData = JSON.parse(fs.readFileSync(vectorFilePath, 'utf8'));
349
362
 
350
363
  return {
351
364
  chunks: vectorData.chunks || [],
352
- knowledgeId
365
+ fileId
353
366
  };
354
367
  } catch (error) {
355
- this.log('error', `[LOAD] Failed to load vector data for knowledge ${knowledgeId}:`, error);
368
+ this.log('error', `[LOAD] Failed to load vector data for file ${fileId}:`, error);
356
369
  return { chunks: [] };
357
370
  }
358
371
  }
@@ -360,12 +373,12 @@ export class LangChainRAGService {
360
373
  /**
361
374
  * Get the file path for a specific knowledge's vector data
362
375
  */
363
- private getKnowledgeVectorPath(knowledgeId: string | number): string {
376
+ private getKnowledgeVectorPath(fileId: string | number): string {
364
377
  const vectorDir = path.join(rwsPath.findRootWorkspacePath(), 'files', 'vectors', 'knowledge');
365
378
  if (!fs.existsSync(vectorDir)) {
366
379
  fs.mkdirSync(vectorDir, { recursive: true });
367
380
  }
368
- return path.join(vectorDir, `knowledge_${knowledgeId}.json`);
381
+ return path.join(vectorDir, `knowledge_${fileId}.json`);
369
382
  }
370
383
 
371
384
  /**
@@ -54,35 +54,42 @@ export class OptimizedVectorSearchService {
54
54
  candidates.push({
55
55
  content: chunk.content,
56
56
  score: similarity,
57
- metadata: chunk.metadata,
58
- knowledgeId: knowledgeVector.knowledgeId,
59
- chunkId: chunk.metadata?.id || `${knowledgeVector.knowledgeId}_chunk_${Date.now()}`
57
+ metadata: {
58
+ ...chunk.metadata,
59
+ fileId: knowledgeVector.fileId // Use fileId directly
60
+ },
61
+ fileId: knowledgeVector.fileId, // Always use the fileId from the knowledgeVector
62
+ chunkId: chunk.metadata?.id || `${knowledgeVector.fileId}_chunk_${Date.now()}`
60
63
  });
61
64
  }
62
65
  }
63
66
 
67
+ // Sort candidates by score and take top maxResults per source
68
+ const topCandidates = candidates
69
+ .sort((a, b) => b.score - a.score)
70
+ .slice(0, maxResults);
71
+
64
72
  // Log similarity statistics for debugging
65
73
  if (similarities.length > 0) {
66
74
  const maxSim = Math.max(...similarities);
67
75
  const avgSim = similarities.reduce((a, b) => a + b, 0) / similarities.length;
68
- console.log(`[VECTOR SEARCH] Knowledge ${knowledgeVector.knowledgeId}: Max similarity: ${maxSim.toFixed(4)}, Avg: ${avgSim.toFixed(4)}, Candidates above ${threshold}: ${candidates.length}`);
76
+ console.log(`[VECTOR SEARCH] File ${knowledgeVector.fileId}: Max similarity: ${maxSim.toFixed(4)}, Avg: ${avgSim.toFixed(4)}, Candidates above ${threshold}: ${candidates.length}, Top results taken: ${topCandidates.length}`);
69
77
  }
70
78
 
71
- return candidates;
79
+ return topCandidates;
72
80
  });
73
81
 
74
82
  // Wait for all searches to complete
75
83
  const allCandidateArrays = await Promise.all(searchPromises);
76
84
 
77
- // Flatten results
85
+ // Flatten results (each source already limited to maxResults)
78
86
  for (const candidates of allCandidateArrays) {
79
87
  allCandidates.push(...candidates);
80
88
  }
81
89
 
82
- // Sort by similarity score and take top results
90
+ // Sort by similarity score (no additional limiting since each source is already limited)
83
91
  const results = allCandidates
84
- .sort((a, b) => b.score - a.score)
85
- .slice(0, maxResults);
92
+ .sort((a, b) => b.score - a.score);
86
93
 
87
94
  const searchTime = Date.now() - startTime;
88
95
 
@@ -122,7 +129,7 @@ export class OptimizedVectorSearchService {
122
129
  async batchSearch(
123
130
  queries: string[],
124
131
  knowledgeVectors: Array<{
125
- knowledgeId: string | number;
132
+ fileId: string | number;
126
133
  chunks: Array<{
127
134
  content: string;
128
135
  embedding: number[];
@@ -165,7 +172,7 @@ export class OptimizedVectorSearchService {
165
172
  private async searchWithEmbedding(request: {
166
173
  queryEmbedding: number[];
167
174
  knowledgeVectors: Array<{
168
- knowledgeId: string | number;
175
+ fileId: string | number;
169
176
  chunks: Array<{
170
177
  content: string;
171
178
  embedding: number[];
@@ -200,8 +207,8 @@ export class OptimizedVectorSearchService {
200
207
  content: chunk.content,
201
208
  score: similarity,
202
209
  metadata: chunk.metadata,
203
- knowledgeId: knowledgeVector.knowledgeId,
204
- chunkId: chunk.metadata?.id || `${knowledgeVector.knowledgeId}_chunk_${Date.now()}`
210
+ fileId: knowledgeVector.fileId,
211
+ chunkId: chunk.metadata?.id || `${knowledgeVector.fileId}_chunk_${Date.now()}`
205
212
  });
206
213
  }
207
214
  }
@@ -252,7 +259,7 @@ export class OptimizedVectorSearchService {
252
259
  * Search similar documents (compatibility method from LangChainVectorStoreService)
253
260
  */
254
261
  async searchSimilarCompat(request: IVectorSearchRequest, knowledgeVectors: Array<{
255
- knowledgeId: string | number;
262
+ fileId: string | number;
256
263
  chunks: Array<{
257
264
  content: string;
258
265
  embedding: number[];
@@ -271,9 +278,9 @@ export class OptimizedVectorSearchService {
271
278
  let filteredVectors = knowledgeVectors;
272
279
  if (filter) {
273
280
  filteredVectors = knowledgeVectors.filter(vector => {
274
- // Check knowledge IDs
275
- if (filter.knowledgeIds && filter.knowledgeIds.length > 0) {
276
- return filter.knowledgeIds.includes(String(vector.knowledgeId));
281
+ // Check file IDs
282
+ if (filter.fileIds && filter.fileIds.length > 0) {
283
+ return filter.fileIds.includes(String(vector.fileId));
277
284
  }
278
285
  return true;
279
286
  });
@@ -293,7 +300,7 @@ export class OptimizedVectorSearchService {
293
300
  score: result.score,
294
301
  metadata: result.metadata,
295
302
  chunkId: result.chunkId,
296
- knowledgeId: result.knowledgeId
303
+ fileId: result.fileId
297
304
  }));
298
305
 
299
306
  return {
@@ -313,7 +320,7 @@ export class OptimizedVectorSearchService {
313
320
  * Get search statistics
314
321
  */
315
322
  getStats(knowledgeVectors: Array<{
316
- knowledgeId: string | number;
323
+ fileId: string | number;
317
324
  chunks: Array<{ content: string; embedding: number[]; metadata: any; }>;
318
325
  }>): { totalChunks: number; totalKnowledge: number } {
319
326
  const totalChunks = knowledgeVectors.reduce((total, vector) => total + vector.chunks.length, 0);
@@ -24,8 +24,9 @@ export interface IRAGSearchRequest {
24
24
  query: string;
25
25
  maxResults?: number;
26
26
  threshold?: number;
27
+ temporaryDocumentSearch?: boolean; // Flag for searching temporary documents (web search)
27
28
  filter?: {
28
- knowledgeIds?: (string | number)[];
29
+ fileIds?: (string | number)[];
29
30
  documentIds?: (string | number)[];
30
31
  [key: string]: any;
31
32
  };
@@ -5,7 +5,7 @@ export interface ISearchResult {
5
5
  content: string;
6
6
  score: number;
7
7
  metadata: any;
8
- knowledgeId: string | number;
8
+ fileId: string | number;
9
9
  chunkId: string;
10
10
  }
11
11
 
@@ -30,7 +30,7 @@ export interface IVectorSearchResponse {
30
30
  export interface IOptimizedSearchRequest {
31
31
  query: string;
32
32
  knowledgeVectors: Array<{
33
- knowledgeId: string | number;
33
+ fileId: string | number;
34
34
  chunks: Array<{
35
35
  content: string;
36
36
  embedding: number[];
@@ -45,7 +45,7 @@ export interface IOptimizedSearchResult {
45
45
  content: string;
46
46
  score: number;
47
47
  metadata: any;
48
- knowledgeId: string | number;
48
+ fileId: string | number;
49
49
  chunkId: string;
50
50
  }
51
51