@rws-framework/ai-tools 2.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ # Tutorial-Style RAG with RWSVectorStore
2
+
3
+ This document shows how to use our ai-tools services in the same way as the LangChain tutorial, but with RWSVectorStore as the backend.
4
+
5
+ ## Quick Start
6
+
7
+ ```typescript
8
+ import { LangChainEmbeddingService } from '@rws-framework/ai-tools';
9
+ import { Document } from '@langchain/core/documents';
10
+
11
+ // Initialize embedding service
12
+ const embeddingService = new LangChainEmbeddingService();
13
+ await embeddingService.initialize({
14
+ provider: 'cohere',
15
+ apiKey: process.env.COHERE_API_KEY,
16
+ model: 'embed-v4.0'
17
+ });
18
+
19
+ // Create documents (like tutorial's document loading)
20
+ const documents = [
21
+ new Document({
22
+ pageContent: "Task decomposition breaks complex tasks into steps.",
23
+ metadata: { source: 'tutorial' }
24
+ }),
25
+ // ... more documents
26
+ ];
27
+
28
+ // Create vector store (tutorial-style)
29
+ const vectorStore = await embeddingService.createVectorStore(documents);
30
+
31
+ // Similarity search (tutorial-style)
32
+ const results = await embeddingService.similaritySearch(vectorStore, "What is task decomposition?", 3);
33
+ ```
34
+
35
+ ## Comparison with LangChain Tutorial
36
+
37
+ ### LangChain Tutorial Approach:
38
+ ```typescript
39
+ // Tutorial code
40
+ const vectorStore = new MemoryVectorStore(embeddings);
41
+ await vectorStore.addDocuments(documents);
42
+ const results = await vectorStore.similaritySearch(query, k);
43
+ ```
44
+
45
+ ### Our AI-Tools Approach:
46
+ ```typescript
47
+ // Our equivalent code using RWSVectorStore
48
+ const vectorStore = await embeddingService.createVectorStore(documents);
49
+ const results = await embeddingService.similaritySearch(vectorStore, query, k);
50
+ ```
51
+
52
+ ## Available Methods
53
+
54
+ ### 1. Simple Similarity Search
55
+ ```typescript
56
+ const docs = await embeddingService.similaritySearch(vectorStore, query, k);
57
+ // Returns: Document[]
58
+ ```
59
+
60
+ ### 2. Similarity Search with Scores
61
+ ```typescript
62
+ const results = await embeddingService.similaritySearchWithScore(vectorStore, query, k);
63
+ // Returns: [Document, number][]
64
+ ```
65
+
66
+ ### 3. Enhanced Search with Filters
67
+ ```typescript
68
+ const results = await vectorStoreService.searchSimilar({
69
+ query: "your query",
70
+ maxResults: 5,
71
+ similarityThreshold: 0.1,
72
+ filter: {
73
+ knowledgeIds: ['28'],
74
+ documentIds: ['doc1', 'doc2']
75
+ }
76
+ });
77
+ // Returns: IVectorSearchResponse
78
+ ```
79
+
80
+ ## Integration with RAG Module
81
+
82
+ The RAG module already uses these services:
83
+
84
+ ```typescript
85
+ // In backend/src/app/rag_module/rag.service.ts
86
+ constructor(
87
+ private embeddingService: LangChainEmbeddingService,
88
+ private langChainRAGService: LangChainRAGService
89
+ ) {}
90
+ ```
91
+
92
+ ## Benefits of Our Approach
93
+
94
+ 1. **Tutorial Compatibility**: Same interface as LangChain tutorial
95
+ 2. **RWSVectorStore Backend**: Uses our proven vector storage system
96
+ 3. **Memory & FAISS Support**: Can use both in-memory and persistent storage
97
+ 4. **Knowledge Filtering**: Built-in support for multi-tenant scenarios
98
+ 5. **Polish Content Optimized**: Similarity thresholds tuned for non-English content
99
+
100
+ ## Configuration
101
+
102
+ ```typescript
103
+ // Centralized configuration (from RAG service)
104
+ static EMBEDDING_CONFIG = {
105
+ provider: 'cohere',
106
+ model: 'embed-v4.0',
107
+ batchSize: 96
108
+ };
109
+
110
+ static RAG_CONFIG = {
111
+ search: {
112
+ defaultSimilarityThreshold: 0.1, // Optimized for Polish content
113
+ maxResults: 5
114
+ },
115
+ vectorStore: {
116
+ type: 'memory', // or 'faiss' for persistence
117
+ autoSave: true
118
+ }
119
+ };
120
+ ```
121
+
122
+ ## Examples
123
+
124
+ See `/examples/tutorial-style-rag.ts` for complete working examples.
@@ -0,0 +1,167 @@
1
+ /**
2
+ * Test the new RecursiveCharacterTextSplitter approach in TextChunker
3
+ * This verifies the tutorial-style chunking is working correctly
4
+ */
5
+
6
+ import { TextChunker } from '../src/services/TextChunker';
7
+
8
+ function testRecursiveChunking() {
9
+ console.log('🧪 Testing RecursiveCharacterTextSplitter approach in TextChunker\n');
10
+
11
+ // Test document with various separator types
12
+ const testDocument = `
13
+ # Introduction to AI
14
+
15
+ Artificial Intelligence (AI) is a rapidly evolving field that encompasses various technologies and methodologies. AI systems can process vast amounts of data, recognize patterns, and make decisions.
16
+
17
+ ## Machine Learning
18
+
19
+ Machine learning is a subset of AI that focuses on algorithms that can learn from data. There are several types of machine learning:
20
+
21
+ 1. Supervised Learning: Uses labeled data to train models
22
+ 2. Unsupervised Learning: Finds patterns in unlabeled data
23
+ 3. Reinforcement Learning: Learns through interaction with environment
24
+
25
+ ### Deep Learning
26
+
27
+ Deep learning uses neural networks with multiple layers to process information. These networks can automatically extract features from raw data. Common applications include:
28
+
29
+ - Image recognition and computer vision
30
+ - Natural language processing and understanding
31
+ - Speech recognition and synthesis
32
+ - Autonomous vehicles and robotics
33
+
34
+ ## Applications and Impact
35
+
36
+ AI technologies are transforming various industries. Healthcare uses AI for diagnosis and treatment planning. Finance leverages AI for fraud detection and algorithmic trading. Manufacturing employs AI for quality control and predictive maintenance.
37
+
38
+ The ethical implications of AI are increasingly important! Questions about bias, privacy, and job displacement need careful consideration? As AI becomes more prevalent, ensuring responsible development and deployment is crucial; we must balance innovation with ethical considerations.
39
+ `.trim();
40
+
41
+ console.log('📄 Original document:');
42
+ console.log(`Length: ${testDocument.length} characters`);
43
+ console.log(`Estimated tokens: ${TextChunker.estimateTokens(testDocument)}`);
44
+ console.log('First 200 chars:', testDocument.substring(0, 200) + '...\n');
45
+
46
+ // Test different chunking configurations
47
+ const testConfigs = [
48
+ { maxTokens: 150, overlap: 20, name: 'Small chunks with overlap' },
49
+ { maxTokens: 300, overlap: 50, name: 'Medium chunks with overlap' },
50
+ { maxTokens: 100, overlap: 0, name: 'Small chunks no overlap' }
51
+ ];
52
+
53
+ for (const config of testConfigs) {
54
+ console.log(`\n🔧 Testing: ${config.name}`);
55
+ console.log(`Max tokens: ${config.maxTokens}, Overlap: ${config.overlap}`);
56
+
57
+ const chunks = TextChunker.chunkText(testDocument, config.maxTokens, config.overlap);
58
+
59
+ console.log(`\n✅ Generated ${chunks.length} chunks:`);
60
+
61
+ chunks.forEach((chunk, index) => {
62
+ const tokens = TextChunker.estimateTokens(chunk);
63
+ const withinLimit = tokens <= config.maxTokens;
64
+ const status = withinLimit ? '✅' : '❌';
65
+
66
+ console.log(`${status} Chunk ${index + 1}: ${tokens} tokens, ${chunk.length} chars`);
67
+ console.log(` Preview: "${chunk.substring(0, 80)}..."`);
68
+
69
+ if (!withinLimit) {
70
+ console.log(` ⚠️ WARNING: Chunk exceeds token limit (${tokens} > ${config.maxTokens})`);
71
+ }
72
+ });
73
+
74
+ // Check for overlaps if configured
75
+ if (config.overlap > 0 && chunks.length > 1) {
76
+ console.log('\n🔄 Checking overlaps:');
77
+ for (let i = 1; i < chunks.length; i++) {
78
+ const prevChunk = chunks[i - 1];
79
+ const currentChunk = chunks[i];
80
+
81
+ // Simple overlap detection - check if chunks share common words
82
+ const prevWords = prevChunk.split(' ').slice(-10);
83
+ const currentWords = currentChunk.split(' ').slice(0, 10);
84
+
85
+ const commonWords = prevWords.filter(word =>
86
+ currentWords.some(cWord =>
87
+ word.length > 3 && cWord.includes(word.substring(0, Math.min(word.length, 5)))
88
+ )
89
+ );
90
+
91
+ if (commonWords.length > 0) {
92
+ console.log(` 📎 Chunk ${i} has overlap with chunk ${i}: "${commonWords.slice(0, 3).join(', ')}"...`);
93
+ } else {
94
+ console.log(` ❓ Chunk ${i} may not have sufficient overlap with previous chunk`);
95
+ }
96
+ }
97
+ }
98
+ }
99
+ }
100
+
101
+ function testDocumentCreation() {
102
+ console.log('\n\n📋 Testing Document Creation (Tutorial Style)');
103
+
104
+ const sampleText = `
105
+ Artificial Intelligence is transforming the world. Machine learning algorithms can process vast amounts of data and identify patterns that humans might miss.
106
+
107
+ Deep learning, a subset of machine learning, uses neural networks to solve complex problems. These networks consist of multiple layers that can automatically extract features from raw data.
108
+ `.trim();
109
+
110
+ const documents = TextChunker.createDocumentsFromChunks(
111
+ sampleText,
112
+ {
113
+ documentId: 'ai-intro',
114
+ source: 'tutorial',
115
+ category: 'technology'
116
+ },
117
+ 200, // maxTokens
118
+ 30 // overlap
119
+ );
120
+
121
+ console.log(`\n📄 Created ${documents.length} documents:`);
122
+
123
+ documents.forEach((doc, index) => {
124
+ console.log(`\nDocument ${index + 1}:`);
125
+ console.log(` ID: ${doc.metadata.id}`);
126
+ console.log(` Chunk: ${doc.metadata.chunkIndex + 1}/${doc.metadata.totalChunks}`);
127
+ console.log(` Content: "${doc.pageContent.substring(0, 100)}..."`);
128
+ console.log(` Tokens: ${TextChunker.estimateTokens(doc.pageContent)}`);
129
+ });
130
+ }
131
+
132
+ function testEdgeCases() {
133
+ console.log('\n\n🧪 Testing Edge Cases');
134
+
135
+ const testCases = [
136
+ { name: 'Empty string', text: '' },
137
+ { name: 'Very short text', text: 'Hello world!' },
138
+ { name: 'Single long word', text: 'Supercalifragilisticexpialidocious'.repeat(20) },
139
+ { name: 'No separators', text: 'abcdefghijklmnopqrstuvwxyz'.repeat(50) },
140
+ { name: 'Only separators', text: '\n\n\n. . . ! ! ! ? ? ?' }
141
+ ];
142
+
143
+ testCases.forEach(testCase => {
144
+ console.log(`\n🔬 Testing: ${testCase.name}`);
145
+ try {
146
+ const chunks = TextChunker.chunkText(testCase.text, 100, 20);
147
+ console.log(` ✅ Generated ${chunks.length} chunks`);
148
+ if (chunks.length > 0) {
149
+ console.log(` First chunk: "${chunks[0].substring(0, 50)}${chunks[0].length > 50 ? '...' : ''}"`);
150
+ }
151
+ } catch (error) {
152
+ console.log(` ❌ Error: ${error.message}`);
153
+ }
154
+ });
155
+ }
156
+
157
+ // Run all tests
158
+ console.log('🚀 Starting RecursiveCharacterTextSplitter Tests\n');
159
+ console.log('='.repeat(60));
160
+
161
+ testRecursiveChunking();
162
+ testDocumentCreation();
163
+ testEdgeCases();
164
+
165
+ console.log('\n' + '='.repeat(60));
166
+ console.log('✅ Tests completed! The TextChunker now follows LangChain tutorial approach.');
167
+ console.log('📚 It uses RecursiveCharacterTextSplitter-like logic with hierarchical separators.');
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Example: LangChain Tutorial-Style RAG with RWSVectorStore
3
+ *
4
+ * This demonstrates how to use our ai-tools services in the same way
5
+ * as the LangChain tutorial, but with RWSVectorStore backend.
6
+ */
7
+
8
+ import { LangChainEmbeddingService } from '../src/services/LangChainEmbeddingService';
9
+ import { LangChainVectorStoreService } from '../src/services/LangChainVectorStoreService';
10
+ import { Document } from '@langchain/core/documents';
11
+
12
+ async function tutorialStyleRAGExample() {
13
+ // Initialize services like the tutorial
14
+ const embeddingService = new LangChainEmbeddingService();
15
+ const vectorStoreService = new LangChainVectorStoreService();
16
+
17
+ // Configure embeddings (Cohere instead of OpenAI)
18
+ await embeddingService.initialize({
19
+ provider: 'cohere',
20
+ apiKey: process.env.COHERE_API_KEY || '',
21
+ model: 'embed-v4.0',
22
+ batchSize: 96
23
+ });
24
+
25
+ // Initialize vector store service
26
+ await vectorStoreService.initialize(embeddingService, {
27
+ type: 'memory',
28
+ similarityThreshold: 0.1,
29
+ maxResults: 5
30
+ });
31
+
32
+ // Sample documents (like the tutorial's blog post chunks)
33
+ const sampleTexts = [
34
+ "Task decomposition is the process of breaking down complex tasks into smaller, more manageable steps.",
35
+ "Chain of Thought (CoT) prompting helps models think step by step to solve complex problems.",
36
+ "Tree of Thoughts extends CoT by exploring multiple reasoning possibilities at each step.",
37
+ "RAG (Retrieval Augmented Generation) combines retrieval and generation for better answers.",
38
+ "Vector databases store embeddings to enable semantic search over documents."
39
+ ];
40
+
41
+ // Create documents from texts (like tutorial's document loading)
42
+ const documents: Document[] = [];
43
+ for (let i = 0; i < sampleTexts.length; i++) {
44
+ documents.push(new Document({
45
+ pageContent: sampleTexts[i],
46
+ metadata: {
47
+ id: `doc_${i}`,
48
+ source: 'tutorial_example',
49
+ chunkIndex: i
50
+ }
51
+ }));
52
+ }
53
+
54
+ // Add documents to vector store (like tutorial's vectorStore.addDocuments)
55
+ await vectorStoreService.addDocuments(documents);
56
+
57
+ // Create a vector store for similarity search (tutorial-style)
58
+ const vectorStore = await embeddingService.createVectorStore(documents, { type: 'memory' });
59
+
60
+ // Perform similarity search like the tutorial
61
+ const query = "What is task decomposition?";
62
+ console.log(`\n🔍 Searching for: "${query}"`);
63
+
64
+ // Method 1: Tutorial-style similarity search (returns documents only)
65
+ const similarDocs = await embeddingService.similaritySearch(vectorStore, query, 3);
66
+ console.log('\n📄 Similar documents (tutorial-style):');
67
+ similarDocs.forEach((doc, index) => {
68
+ console.log(`${index + 1}. ${doc.pageContent}`);
69
+ });
70
+
71
+ // Method 2: Similarity search with scores (tutorial-style)
72
+ const similarDocsWithScores = await embeddingService.similaritySearchWithScore(vectorStore, query, 3);
73
+ console.log('\n📊 Similar documents with scores:');
74
+ similarDocsWithScores.forEach(([doc, score], index) => {
75
+ console.log(`${index + 1}. Score: ${score.toFixed(4)} - ${doc.pageContent}`);
76
+ });
77
+
78
+ // Method 3: Using the vector store service (our enhanced approach)
79
+ const searchResults = await vectorStoreService.searchSimilar({
80
+ query,
81
+ maxResults: 3,
82
+ similarityThreshold: 0.1
83
+ });
84
+
85
+ console.log('\n🎯 Enhanced search results:');
86
+ searchResults.results.forEach((result, index) => {
87
+ console.log(`${index + 1}. Score: ${result.score.toFixed(4)} - ${result.content}`);
88
+ console.log(` Chunk ID: ${result.chunkId}`);
89
+ });
90
+
91
+ return {
92
+ tutorialStyle: similarDocs,
93
+ withScores: similarDocsWithScores,
94
+ enhanced: searchResults.results
95
+ };
96
+ }
97
+
98
+ // Example usage with knowledge filtering (like our current RAG system)
99
+ async function knowledgeFilteredExample() {
100
+ const embeddingService = new LangChainEmbeddingService();
101
+ const vectorStoreService = new LangChainVectorStoreService();
102
+
103
+ await embeddingService.initialize({
104
+ provider: 'cohere',
105
+ apiKey: process.env.COHERE_API_KEY || '',
106
+ model: 'embed-v4.0'
107
+ });
108
+
109
+ await vectorStoreService.initialize(embeddingService, {
110
+ type: 'memory',
111
+ similarityThreshold: 0.1
112
+ });
113
+
114
+ // Documents with knowledge IDs (like our current system)
115
+ const documents = [
116
+ new Document({
117
+ pageContent: "Testing prototypes is crucial for product development",
118
+ metadata: { knowledgeId: '28', documentId: 'test_doc', chunkIndex: 0 }
119
+ }),
120
+ new Document({
121
+ pageContent: "Quality assurance ensures product reliability",
122
+ metadata: { knowledgeId: '28', documentId: 'test_doc', chunkIndex: 1 }
123
+ }),
124
+ new Document({
125
+ pageContent: "User feedback drives iterative improvements",
126
+ metadata: { knowledgeId: '29', documentId: 'feedback_doc', chunkIndex: 0 }
127
+ })
128
+ ];
129
+
130
+ await vectorStoreService.addDocuments(documents);
131
+
132
+ // Search with knowledge filtering (like our RAG system does)
133
+ const results = await vectorStoreService.searchSimilar({
134
+ query: "opisz założenia dokumentu",
135
+ maxResults: 2,
136
+ similarityThreshold: 0.1,
137
+ filter: {
138
+ knowledgeIds: ['28'] // Only search in knowledge 28
139
+ }
140
+ });
141
+
142
+ console.log('\n🔍 Knowledge-filtered search results:');
143
+ results.results.forEach((result, index) => {
144
+ console.log(`${index + 1}. Score: ${result.score.toFixed(4)}`);
145
+ console.log(` Content: ${result.content}`);
146
+ console.log(` Knowledge ID: ${result.metadata.knowledgeId}`);
147
+ });
148
+
149
+ return results;
150
+ }
151
+
152
+ // Export for use in tests or other modules
153
+ export { tutorialStyleRAGExample, knowledgeFilteredExample };
package/package.json CHANGED
@@ -1,15 +1,16 @@
1
1
  {
2
2
  "name": "@rws-framework/ai-tools",
3
3
  "private": false,
4
- "version": "2.2.0",
4
+ "version": "3.0.0",
5
5
  "description": "",
6
6
  "main": "src/index.ts",
7
7
  "scripts": {},
8
8
  "author": "papablack",
9
9
  "license": "ISC",
10
10
  "dependencies": {
11
- "@langchain/community": "0.3.28",
12
- "@langchain/core": "0.3.37",
11
+ "@langchain/cohere": "^0.3.4",
12
+ "@langchain/community": "^0.3.55",
13
+ "@langchain/core": "^0.3.75",
13
14
  "@rws-framework/server": "3.*",
14
15
  "@rws-framework/db": "*",
15
16
  "@rws-framework/console": "*",
package/src/index.ts CHANGED
@@ -3,13 +3,15 @@ import RWSPrompt, { IChainCallOutput } from '@rws-framework/ai-tools/src/models/
3
3
  import { ILLMChunk, IRWSPromptRequestExecutor, IRWSSinglePromptRequestExecutor, IRWSPromptStreamExecutor, IRWSPromptJSON, ChainStreamType, IAIRequestOptions, IAITool, IAIToolSchema, IAIToolParameter, IToolCall, ToolHandler } from './types/IPrompt';
4
4
  import { EmbedLoader as RWSEmbed, IConvoDebugXMLData, IEmbeddingsHandler, ISplitterParams } from './models/convo/EmbedLoader';
5
5
  import RWSVectorStore from './models/convo/VectorStore';
6
- import { VectorStoreService } from './services/VectorStoreService';
6
+ import { LangChainEmbeddingService } from './services/LangChainEmbeddingService';
7
+ import { LangChainVectorStoreService, IVectorStoreConfig, IDocumentChunk, IVectorSearchRequest, IVectorSearchResponse, ISearchResult } from './services/LangChainVectorStoreService';
8
+ import { LangChainRAGService, ILangChainRAGConfig, IRAGIndexRequest, IRAGSearchRequest, IRAGResponse, IRAGStats } from './services/LangChainRAGService';
7
9
  import { IContextToken } from './types/IContextToken';
10
+ import { IEmbeddingConfig, IChunkConfig } from './types';
8
11
  import type { IAiCfg } from './types/IAiCfg';
9
12
 
10
13
  export {
11
14
  IAiCfg,
12
- VectorStoreService,
13
15
  RWSVectorStore,
14
16
  RWSEmbed,
15
17
  RWSPrompt,
@@ -29,5 +31,22 @@ export {
29
31
  IAIToolSchema,
30
32
  IAIToolParameter,
31
33
  IToolCall,
32
- ToolHandler
34
+ ToolHandler,
35
+ // New LangChain-based services
36
+ LangChainEmbeddingService,
37
+ LangChainVectorStoreService,
38
+ LangChainRAGService,
39
+ // Types
40
+ IEmbeddingConfig,
41
+ IChunkConfig,
42
+ IVectorStoreConfig,
43
+ IDocumentChunk,
44
+ IVectorSearchRequest,
45
+ IVectorSearchResponse,
46
+ ISearchResult,
47
+ ILangChainRAGConfig,
48
+ IRAGIndexRequest,
49
+ IRAGSearchRequest,
50
+ IRAGResponse,
51
+ IRAGStats
33
52
  };