@rws-framework/ai-tools 2.2.1 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,395 @@
1
+ import { Injectable } from '@nestjs/common';
2
+ import { LangChainEmbeddingService } from './LangChainEmbeddingService';
3
+ import { OptimizedVectorSearchService } from './OptimizedVectorSearchService';
4
+ import { Document } from '@langchain/core/documents';
5
+ import fs from 'fs';
6
+ import path from 'path';
7
+ import { rwsPath } from '@rws-framework/console';
8
+ import {
9
+ IEmbeddingConfig,
10
+ IChunkConfig,
11
+ IVectorStoreConfig,
12
+ ISearchResult,
13
+ IVectorSearchRequest,
14
+ ILangChainRAGConfig,
15
+ IRAGIndexRequest,
16
+ IRAGSearchRequest,
17
+ IRAGResponse,
18
+ IRAGStats
19
+ } from '../types';
20
+
21
+ // Re-export types for convenience
22
+ export {
23
+ IEmbeddingConfig,
24
+ IChunkConfig,
25
+ IVectorStoreConfig,
26
+ ISearchResult,
27
+ IVectorSearchRequest,
28
+ ILangChainRAGConfig,
29
+ IRAGIndexRequest,
30
+ IRAGSearchRequest,
31
+ IRAGResponse,
32
+ IRAGStats
33
+ } from '../types';
34
+
35
+ /**
36
+ * Core LangChain-based RAG service with optimized per-knowledge vector storage
37
+ * This service provides the main abstraction for RAG operations using LangChain
38
+ * Uses per-knowledge vector files for lightning-fast searches
39
+ */
40
+ @Injectable()
41
+ export class LangChainRAGService {
42
+ private config: ILangChainRAGConfig;
43
+ private isInitialized = false;
44
+ private queryEmbeddingCache = new Map<string, number[]>();
45
+ private maxCacheSize = 100;
46
+ private logger?: any; // Optional logger interface
47
+
48
+ constructor(
49
+ private embeddingService: LangChainEmbeddingService,
50
+ private vectorSearchService: OptimizedVectorSearchService
51
+ ) {}
52
+
53
+ /**
54
+ * Initialize the RAG service with configuration
55
+ */
56
+ async initialize(config?: ILangChainRAGConfig, logger?: any): Promise<void> {
57
+ if (this.isInitialized) {
58
+ this.log('debug', 'RAG service already initialized, skipping...');
59
+ return;
60
+ }
61
+
62
+ if (config) {
63
+ this.config = {
64
+ persistence: { enabled: false, autoSave: true },
65
+ ...config
66
+ };
67
+ }
68
+
69
+ if (logger) {
70
+ this.logger = logger;
71
+ }
72
+
73
+ this.log('log', 'Starting LangChain RAG service initialization...');
74
+
75
+ try {
76
+ this.isInitialized = true;
77
+ this.log('log', 'LangChain RAG service initialized successfully');
78
+ } catch (error) {
79
+ this.log('error', 'Failed to initialize LangChain RAG service:', error);
80
+ throw error;
81
+ }
82
+ }
83
+
84
+ /**
85
+ * Index knowledge content for RAG with optimized per-knowledge vector storage
86
+ */
87
+ async indexKnowledge(
88
+ knowledgeId: string | number,
89
+ content: string,
90
+ metadata: Record<string, any> = {}
91
+ ): Promise<IRAGResponse<{ chunkIds: string[] }>> {
92
+ this.log('log', `[INDEXING] Starting indexKnowledge for knowledgeId: ${knowledgeId}`);
93
+ this.log('debug', `[INDEXING] Content length: ${content.length} characters`);
94
+
95
+ try {
96
+ await this.ensureInitialized();
97
+
98
+ // Chunk the content using the embedding service
99
+ const chunks = await this.embeddingService.chunkText(content);
100
+ this.log('debug', `[INDEXING] Split content into ${chunks.length} chunks for knowledge ${knowledgeId}`);
101
+
102
+ // Generate embeddings for all chunks at once (batch processing for speed)
103
+ const embeddings = await this.embeddingService.embedTexts(chunks);
104
+ this.log('debug', `[INDEXING] Generated embeddings for ${chunks.length} chunks`);
105
+
106
+ // Create chunk objects with embeddings
107
+ const chunksWithEmbeddings = chunks.map((chunkContent, index) => ({
108
+ content: chunkContent,
109
+ embedding: embeddings[index],
110
+ metadata: {
111
+ ...metadata,
112
+ knowledgeId,
113
+ chunkIndex: index,
114
+ id: `knowledge_${knowledgeId}_chunk_${index}`
115
+ }
116
+ }));
117
+
118
+ // Save to per-knowledge vector file
119
+ await this.saveKnowledgeVector(knowledgeId, chunksWithEmbeddings);
120
+
121
+ const chunkIds = chunksWithEmbeddings.map(chunk => chunk.metadata.id);
122
+ this.log('log', `[INDEXING] Successfully indexed knowledge ${knowledgeId} with ${chunkIds.length} chunks using optimized approach`);
123
+
124
+ return {
125
+ success: true,
126
+ data: { chunkIds }
127
+ };
128
+
129
+ } catch (error: any) {
130
+ this.log('error', `[INDEXING] Failed to index knowledge ${knowledgeId}:`, error);
131
+ return {
132
+ success: false,
133
+ data: null,
134
+ error: error.message || 'Unknown error'
135
+ };
136
+ }
137
+ }
138
+
139
+ /**
140
+ * Search for relevant knowledge chunks using optimized vector search
141
+ */
142
+ async searchKnowledge(request: IRAGSearchRequest): Promise<IRAGResponse<{ results: ISearchResult[] }>> {
143
+ this.log('log', `[SEARCH] Starting knowledge search for query: "${request.query}"`);
144
+ this.log('debug', `[SEARCH] Search parameters: maxResults=${request.maxResults || 5}, threshold=${request.threshold || 0.3}`);
145
+
146
+ try {
147
+ await this.ensureInitialized();
148
+
149
+ const knowledgeIds = request.filter?.knowledgeIds || [];
150
+ console.log('knowledgeIds', knowledgeIds);
151
+
152
+ if (knowledgeIds.length === 0) {
153
+ this.log('warn', '[SEARCH] No knowledge IDs provided for search, returning empty results');
154
+ return {
155
+ success: true,
156
+ data: { results: [] }
157
+ };
158
+ }
159
+
160
+ // Load all knowledge vectors in parallel
161
+ const knowledgeVectorPromises = knowledgeIds.map(async (knowledgeId) => {
162
+ const vectorData = await this.loadKnowledgeVectorWithEmbeddings(knowledgeId);
163
+ return {
164
+ knowledgeId,
165
+ chunks: vectorData.chunks
166
+ };
167
+ });
168
+
169
+ const knowledgeVectors = await Promise.all(knowledgeVectorPromises);
170
+
171
+ // Use optimized vector search service
172
+ const searchResponse = await this.vectorSearchService.searchSimilar({
173
+ query: request.query,
174
+ knowledgeVectors,
175
+ maxResults: request.maxResults || 5,
176
+ threshold: request.threshold || 0.1 // Use same default as PromptEnhancementService
177
+ });
178
+
179
+ // Convert results to expected format
180
+ const results: ISearchResult[] = searchResponse.results.map(result => ({
181
+ content: result.content,
182
+ score: result.score,
183
+ metadata: result.metadata,
184
+ chunkId: result.chunkId
185
+ }));
186
+
187
+ this.log('log', `[SEARCH] Found ${results.length} relevant chunks for query: "${request.query}"`);
188
+
189
+ return {
190
+ success: true,
191
+ data: { results }
192
+ };
193
+
194
+ } catch (error: any) {
195
+ this.log('error', '[SEARCH] Failed to search knowledge:', error);
196
+ return {
197
+ success: false,
198
+ data: null,
199
+ error: error.message || 'Unknown error'
200
+ };
201
+ }
202
+ }
203
+
204
+ /**
205
+ * Remove knowledge from index
206
+ */
207
+ async removeKnowledge(knowledgeId: string | number): Promise<boolean> {
208
+ this.log('log', `[REMOVE] Starting removal of knowledge: ${knowledgeId}`);
209
+
210
+ try {
211
+ await this.ensureInitialized();
212
+
213
+ // Remove the individual knowledge vector file
214
+ const vectorFilePath = this.getKnowledgeVectorPath(knowledgeId);
215
+ if (fs.existsSync(vectorFilePath)) {
216
+ fs.unlinkSync(vectorFilePath);
217
+ this.log('log', `[REMOVE] Successfully removed vector file for knowledge ${knowledgeId}`);
218
+ return true;
219
+ } else {
220
+ this.log('warn', `[REMOVE] Vector file not found for knowledge ${knowledgeId}`);
221
+ return true; // Consider it successful if file doesn't exist
222
+ }
223
+
224
+ } catch (error: any) {
225
+ this.log('error', `[REMOVE] Failed to remove knowledge ${knowledgeId}:`, error);
226
+ return false;
227
+ }
228
+ }
229
+
230
+ /**
231
+ * Get statistics about the RAG system
232
+ */
233
+ getStats(): IRAGStats {
234
+ try {
235
+ const vectorDir = path.join(rwsPath.findRootWorkspacePath(), 'files', 'vectors', 'knowledge');
236
+
237
+ if (!fs.existsSync(vectorDir)) {
238
+ return {
239
+ totalDocuments: 0,
240
+ totalChunks: 0,
241
+ knowledgeItems: 0
242
+ };
243
+ }
244
+
245
+ const files = fs.readdirSync(vectorDir).filter(f => f.endsWith('.json'));
246
+ let totalChunks = 0;
247
+
248
+ for (const file of files) {
249
+ try {
250
+ const filePath = path.join(vectorDir, file);
251
+ const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
252
+ totalChunks += data.chunks?.length || 0;
253
+ } catch (error) {
254
+ this.log('warn', `[STATS] Failed to read vector file ${file}:`, error);
255
+ }
256
+ }
257
+
258
+ this.log('debug', `[STATS] RAG system contains ${totalChunks} chunks across ${files.length} knowledge items`);
259
+
260
+ return {
261
+ totalChunks,
262
+ totalDocuments: files.length,
263
+ knowledgeItems: files.length
264
+ };
265
+
266
+ } catch (error: any) {
267
+ this.log('error', '[STATS] Failed to get RAG statistics:', error);
268
+ return {
269
+ totalDocuments: 0,
270
+ totalChunks: 0,
271
+ knowledgeItems: 0
272
+ };
273
+ }
274
+ }
275
+
276
+ /**
277
+ * Clear all indexed knowledge
278
+ */
279
+ async clearAll(): Promise<boolean> {
280
+ try {
281
+ const vectorDir = path.join(rwsPath.findRootWorkspacePath(), 'files', 'vectors', 'knowledge');
282
+ if (fs.existsSync(vectorDir)) {
283
+ const files = fs.readdirSync(vectorDir).filter(f => f.endsWith('.json'));
284
+ for (const file of files) {
285
+ fs.unlinkSync(path.join(vectorDir, file));
286
+ }
287
+ this.log('log', `[CLEAR] Successfully cleared ${files.length} vector files`);
288
+ }
289
+
290
+ this.log('debug', 'Cleared all indexed knowledge');
291
+ return true;
292
+ } catch (error: any) {
293
+ this.log('error', 'Failed to clear knowledge:', error);
294
+ return false;
295
+ }
296
+ }
297
+
298
+ /**
299
+ * Get embeddings for a text query
300
+ */
301
+ async getQueryEmbedding(query: string): Promise<number[]> {
302
+ await this.ensureInitialized();
303
+ return await this.embeddingService.embedText(query);
304
+ }
305
+
306
+ /**
307
+ * Save chunks to knowledge-specific vector file with embeddings
308
+ */
309
+ private async saveKnowledgeVector(knowledgeId: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }>): Promise<void> {
310
+ const vectorFilePath = this.getKnowledgeVectorPath(knowledgeId);
311
+ const vectorDir = path.dirname(vectorFilePath);
312
+
313
+ // Ensure directory exists
314
+ if (!fs.existsSync(vectorDir)) {
315
+ fs.mkdirSync(vectorDir, { recursive: true });
316
+ }
317
+
318
+ try {
319
+ const vectorData = {
320
+ knowledgeId,
321
+ chunks,
322
+ timestamp: new Date().toISOString()
323
+ };
324
+
325
+ fs.writeFileSync(vectorFilePath, JSON.stringify(vectorData, null, 2));
326
+ this.log('debug', `[SAVE] Successfully saved ${chunks.length} chunks with embeddings for knowledge ${knowledgeId}`);
327
+
328
+ } catch (error) {
329
+ this.log('error', `[SAVE] Failed to save vector data for knowledge ${knowledgeId}:`, error);
330
+ throw error;
331
+ }
332
+ }
333
+
334
+ /**
335
+ * Load vector data for a specific knowledge item with embeddings
336
+ */
337
+ private async loadKnowledgeVectorWithEmbeddings(knowledgeId: string | number): Promise<{ chunks: Array<{ content: string; embedding: number[]; metadata: any }> }> {
338
+ const vectorFilePath = this.getKnowledgeVectorPath(knowledgeId);
339
+
340
+ if (!fs.existsSync(vectorFilePath)) {
341
+ this.log('debug', `[LOAD] No vector file found for knowledge ${knowledgeId}, skipping...`);
342
+ return { chunks: [] };
343
+ }
344
+
345
+ try {
346
+ this.log('debug', `[LOAD] Loading vector data with embeddings for knowledge ${knowledgeId} from ${vectorFilePath}`);
347
+ const vectorData = JSON.parse(fs.readFileSync(vectorFilePath, 'utf8'));
348
+
349
+ return {
350
+ chunks: vectorData.chunks || []
351
+ };
352
+ } catch (error) {
353
+ this.log('error', `[LOAD] Failed to load vector data for knowledge ${knowledgeId}:`, error);
354
+ return { chunks: [] };
355
+ }
356
+ }
357
+
358
+ /**
359
+ * Get the file path for a specific knowledge's vector data
360
+ */
361
+ private getKnowledgeVectorPath(knowledgeId: string | number): string {
362
+ const vectorDir = path.join(rwsPath.findRootWorkspacePath(), 'files', 'vectors', 'knowledge');
363
+ if (!fs.existsSync(vectorDir)) {
364
+ fs.mkdirSync(vectorDir, { recursive: true });
365
+ }
366
+ return path.join(vectorDir, `knowledge_${knowledgeId}.json`);
367
+ }
368
+
369
+ /**
370
+ * Ensure the service is initialized
371
+ */
372
+ private async ensureInitialized(): Promise<void> {
373
+ if (!this.isInitialized) {
374
+ this.log('debug', '[INIT] Service not initialized, triggering initialization...');
375
+ await this.initialize();
376
+ }
377
+ }
378
+
379
+ /**
380
+ * Logging helper that uses provided logger or falls back to console
381
+ */
382
+ private log(level: 'debug' | 'log' | 'warn' | 'error', message: string, ...args: any[]): void {
383
+ if (this.logger) {
384
+ // Use provided logger (like BlackLogger)
385
+ if (typeof this.logger[level] === 'function') {
386
+ this.logger[level](message, ...args);
387
+ } else if (typeof this.logger.log === 'function') {
388
+ this.logger.log(message, ...args);
389
+ }
390
+ } else {
391
+ // Fallback to console
392
+ console[level === 'log' ? 'log' : level](message, ...args);
393
+ }
394
+ }
395
+ }