@rws-framework/ai-tools 3.2.1 → 3.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.ts +3 -3
- package/src/services/LangChainEmbeddingService.ts +15 -15
- package/src/services/LangChainRAGService.ts +7 -5
- package/src/services/OpenAIRateLimitingService.ts +4 -22
- package/src/services/OptimizedVectorSearchService.ts +2 -1
- package/src/types/embedding.types.ts +3 -0
- package/src/types/rag.types.ts +14 -0
- package/src/types/search.types.ts +2 -2
- package/src/services/examples/OpenAICompletionService.example.ts +0 -110
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -4,11 +4,11 @@ import { ILLMChunk, IRWSPromptRequestExecutor, IRWSSinglePromptRequestExecutor,
|
|
|
4
4
|
import { EmbedLoader as RWSEmbed, IConvoDebugXMLData, IEmbeddingsHandler, ISplitterParams } from './models/convo/EmbedLoader';
|
|
5
5
|
import RWSVectorStore from './models/convo/VectorStore';
|
|
6
6
|
import { LangChainEmbeddingService } from './services/LangChainEmbeddingService';
|
|
7
|
-
import { OpenAIRateLimitingService
|
|
7
|
+
import { OpenAIRateLimitingService } from './services/OpenAIRateLimitingService';
|
|
8
8
|
import { LangChainVectorStoreService, IVectorStoreConfig, IDocumentChunk, IVectorSearchRequest, IVectorSearchResponse, ISearchResult } from './services/LangChainVectorStoreService';
|
|
9
9
|
import { LangChainRAGService, ILangChainRAGConfig, IRAGIndexRequest, IRAGSearchRequest, IRAGResponse, IRAGStats } from './services/LangChainRAGService';
|
|
10
10
|
import { IContextToken } from './types/IContextToken';
|
|
11
|
-
import { IEmbeddingConfig, IChunkConfig } from './types';
|
|
11
|
+
import { IEmbeddingConfig, IChunkConfig, IRateLimitConfig } from './types';
|
|
12
12
|
import type { IAiCfg } from './types/IAiCfg';
|
|
13
13
|
import { z as ZOD } from 'zod/v4';
|
|
14
14
|
|
|
@@ -43,7 +43,7 @@ export {
|
|
|
43
43
|
// Types
|
|
44
44
|
IEmbeddingConfig,
|
|
45
45
|
IChunkConfig,
|
|
46
|
-
IRateLimitConfig,
|
|
46
|
+
IRateLimitConfig,
|
|
47
47
|
IVectorStoreConfig,
|
|
48
48
|
IDocumentChunk,
|
|
49
49
|
IVectorSearchRequest,
|
|
@@ -51,23 +51,21 @@ export class LangChainEmbeddingService {
|
|
|
51
51
|
apiKey: this.config.apiKey,
|
|
52
52
|
model: this.config.model || 'text-embedding-3-large',
|
|
53
53
|
batchSize: 1 // We'll handle batching ourselves
|
|
54
|
-
});
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
this.rateLimitingService.initialize(this.config.model || 'text-embedding-3-large', {
|
|
58
|
-
rpm: 500,
|
|
59
|
-
tpm: 300_000,
|
|
60
|
-
concurrency: 4,
|
|
61
|
-
maxRetries: 6,
|
|
62
|
-
baseBackoffMs: 500,
|
|
63
|
-
safetyFactor: 0.75
|
|
64
|
-
});
|
|
54
|
+
});
|
|
55
|
+
|
|
65
56
|
break;
|
|
66
57
|
|
|
67
58
|
default:
|
|
68
59
|
throw new Error(`Unsupported embedding provider: ${this.config.provider}`);
|
|
69
60
|
}
|
|
70
61
|
|
|
62
|
+
if(this.config.rateLimiting){
|
|
63
|
+
const rateLimitingCfg = {...OpenAIRateLimitingService.DEFAULT_CONFIG, ...this.config.rateLimiting};
|
|
64
|
+
|
|
65
|
+
this.rateLimitingService.initialize(this.config.model || 'text-embedding-3-large', rateLimitingCfg);
|
|
66
|
+
console.log('Inintialized rate limiting with config:', rateLimitingCfg);
|
|
67
|
+
}
|
|
68
|
+
|
|
71
69
|
console.log(`Initialized ${this.config.provider} embeddings with model ${this.config.model}`, this.config.apiKey);
|
|
72
70
|
}
|
|
73
71
|
|
|
@@ -82,7 +80,7 @@ export class LangChainEmbeddingService {
|
|
|
82
80
|
async embedTexts(texts: string[]): Promise<number[][]> {
|
|
83
81
|
this.ensureInitialized();
|
|
84
82
|
|
|
85
|
-
if (this.config.
|
|
83
|
+
if (this.config.rateLimiting) {
|
|
86
84
|
return await this.rateLimitingService.executeWithRateLimit(
|
|
87
85
|
texts,
|
|
88
86
|
async (batch: string[]) => {
|
|
@@ -102,8 +100,8 @@ export class LangChainEmbeddingService {
|
|
|
102
100
|
async embedText(text: string): Promise<number[]> {
|
|
103
101
|
this.ensureInitialized();
|
|
104
102
|
|
|
105
|
-
if (this.config.
|
|
106
|
-
|
|
103
|
+
if (this.config.rateLimiting) {
|
|
104
|
+
|
|
107
105
|
const results = await this.rateLimitingService.executeWithRateLimit(
|
|
108
106
|
[text],
|
|
109
107
|
async (batch: string[]) => {
|
|
@@ -125,9 +123,11 @@ export class LangChainEmbeddingService {
|
|
|
125
123
|
|
|
126
124
|
// Use our custom TextChunker instead of LangChain's splitter
|
|
127
125
|
// Use safe token limits - the TextChunker handles token estimation internally
|
|
128
|
-
const maxTokens = 450; // Safe token limit for embedding models
|
|
126
|
+
const maxTokens = this.chunkConfig?.chunkSize || 450; // Safe token limit for embedding models
|
|
129
127
|
const overlap = this.chunkConfig?.chunkOverlap || 50; // Character overlap, not token
|
|
130
128
|
|
|
129
|
+
console.log('[LCEmbeddingService] Chunking with:', this.chunkConfig);
|
|
130
|
+
|
|
131
131
|
return TextChunker.chunkText(text, maxTokens, overlap);
|
|
132
132
|
}
|
|
133
133
|
|
|
@@ -161,7 +161,7 @@ export class LangChainRAGService {
|
|
|
161
161
|
const knowledgeVectorPromises = knowledgeIds.map(async (knowledgeId) => {
|
|
162
162
|
const vectorData = await this.loadKnowledgeVectorWithEmbeddings(knowledgeId);
|
|
163
163
|
return {
|
|
164
|
-
knowledgeId,
|
|
164
|
+
knowledgeId,
|
|
165
165
|
chunks: vectorData.chunks
|
|
166
166
|
};
|
|
167
167
|
});
|
|
@@ -178,13 +178,14 @@ export class LangChainRAGService {
|
|
|
178
178
|
|
|
179
179
|
// Convert results to expected format
|
|
180
180
|
const results: ISearchResult[] = searchResponse.results.map(result => ({
|
|
181
|
+
knowledgeId: result.metadata.knowledgeId,
|
|
181
182
|
content: result.content,
|
|
182
183
|
score: result.score,
|
|
183
184
|
metadata: result.metadata,
|
|
184
|
-
chunkId: result.chunkId
|
|
185
|
+
chunkId: result.chunkId,
|
|
185
186
|
}));
|
|
186
187
|
|
|
187
|
-
this.log('log', `[SEARCH] Found ${results.length} relevant chunks for query: "${request.query}"`);
|
|
188
|
+
this.log('log', `[SEARCH] Found ${results.length} relevant chunks for query: "${request.query}"\n`);
|
|
188
189
|
|
|
189
190
|
return {
|
|
190
191
|
success: true,
|
|
@@ -334,7 +335,7 @@ export class LangChainRAGService {
|
|
|
334
335
|
/**
|
|
335
336
|
* Load vector data for a specific knowledge item with embeddings
|
|
336
337
|
*/
|
|
337
|
-
private async loadKnowledgeVectorWithEmbeddings(knowledgeId: string | number): Promise<{ chunks: Array<{ content: string; embedding: number[]; metadata: any }> }> {
|
|
338
|
+
private async loadKnowledgeVectorWithEmbeddings(knowledgeId: string | number): Promise<{ knowledgeId?: string | number, chunks: Array<{ content: string; embedding: number[]; metadata: any }> }> {
|
|
338
339
|
const vectorFilePath = this.getKnowledgeVectorPath(knowledgeId);
|
|
339
340
|
|
|
340
341
|
if (!fs.existsSync(vectorFilePath)) {
|
|
@@ -347,7 +348,8 @@ export class LangChainRAGService {
|
|
|
347
348
|
const vectorData = JSON.parse(fs.readFileSync(vectorFilePath, 'utf8'));
|
|
348
349
|
|
|
349
350
|
return {
|
|
350
|
-
chunks: vectorData.chunks || []
|
|
351
|
+
chunks: vectorData.chunks || [],
|
|
352
|
+
knowledgeId
|
|
351
353
|
};
|
|
352
354
|
} catch (error) {
|
|
353
355
|
this.log('error', `[LOAD] Failed to load vector data for knowledge ${knowledgeId}:`, error);
|
|
@@ -1,32 +1,14 @@
|
|
|
1
1
|
import { Injectable } from '@nestjs/common';
|
|
2
2
|
import PQueue from 'p-queue';
|
|
3
|
+
import { IBatchMetadata, IRateLimitConfig } from '../types/rag.types';
|
|
4
|
+
import tiktoken from 'tiktoken';
|
|
3
5
|
|
|
4
|
-
// Optional tiktoken import
|
|
5
6
|
let encoding_for_model: any = null;
|
|
6
|
-
|
|
7
|
-
const tiktoken = require('tiktoken');
|
|
8
|
-
encoding_for_model = tiktoken.encoding_for_model;
|
|
9
|
-
} catch (e) {
|
|
10
|
-
console.warn('tiktoken not available, using character-based token estimation');
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export interface IRateLimitConfig {
|
|
14
|
-
rpm?: number; // Requests per minute
|
|
15
|
-
tpm?: number; // Tokens per minute
|
|
16
|
-
concurrency?: number; // Parallel requests
|
|
17
|
-
maxRetries?: number; // Maximum retry attempts
|
|
18
|
-
baseBackoffMs?: number; // Base backoff delay
|
|
19
|
-
safetyFactor?: number; // Safety factor for limits
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
export interface IBatchMetadata<T = any> {
|
|
23
|
-
start: number;
|
|
24
|
-
batch: T[];
|
|
25
|
-
}
|
|
7
|
+
encoding_for_model = tiktoken.encoding_for_model
|
|
26
8
|
|
|
27
9
|
@Injectable()
|
|
28
10
|
export class OpenAIRateLimitingService {
|
|
29
|
-
|
|
11
|
+
static readonly DEFAULT_CONFIG: Required<IRateLimitConfig> = {
|
|
30
12
|
rpm: 500,
|
|
31
13
|
tpm: 300_000,
|
|
32
14
|
concurrency: 4,
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { IRateLimitConfig } from "./rag.types";
|
|
2
|
+
|
|
1
3
|
/**
|
|
2
4
|
* Embedding service configuration interfaces
|
|
3
5
|
*/
|
|
@@ -6,6 +8,7 @@ export interface IEmbeddingConfig {
|
|
|
6
8
|
apiKey: string;
|
|
7
9
|
model?: string;
|
|
8
10
|
batchSize?: number;
|
|
11
|
+
rateLimiting?: IRateLimitConfig
|
|
9
12
|
}
|
|
10
13
|
|
|
11
14
|
export interface IChunkConfig {
|
package/src/types/rag.types.ts
CHANGED
|
@@ -42,3 +42,17 @@ export interface IRAGStats {
|
|
|
42
42
|
totalDocuments: number;
|
|
43
43
|
knowledgeItems: number;
|
|
44
44
|
}
|
|
45
|
+
|
|
46
|
+
export interface IRateLimitConfig {
|
|
47
|
+
rpm?: number; // Requests per minute
|
|
48
|
+
tpm?: number; // Tokens per minute
|
|
49
|
+
concurrency?: number; // Parallel requests
|
|
50
|
+
maxRetries?: number; // Maximum retry attempts
|
|
51
|
+
baseBackoffMs?: number; // Base backoff delay
|
|
52
|
+
safetyFactor?: number; // Safety factor for limits
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export interface IBatchMetadata<T = any> {
|
|
56
|
+
start: number;
|
|
57
|
+
batch: T[];
|
|
58
|
+
}
|
|
@@ -5,6 +5,7 @@ export interface ISearchResult {
|
|
|
5
5
|
content: string;
|
|
6
6
|
score: number;
|
|
7
7
|
metadata: any;
|
|
8
|
+
knowledgeId: string | number;
|
|
8
9
|
chunkId: string;
|
|
9
10
|
}
|
|
10
11
|
|
|
@@ -12,8 +13,7 @@ export interface IVectorSearchRequest {
|
|
|
12
13
|
query: string;
|
|
13
14
|
maxResults?: number;
|
|
14
15
|
similarityThreshold?: number;
|
|
15
|
-
filter?: {
|
|
16
|
-
knowledgeIds?: string[];
|
|
16
|
+
filter?: {
|
|
17
17
|
documentIds?: string[];
|
|
18
18
|
[key: string]: any;
|
|
19
19
|
};
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Example usage of OpenAIRateLimitingService for other AI operations
|
|
3
|
-
*
|
|
4
|
-
* This demonstrates how to use the rate limiting service for:
|
|
5
|
-
* - OpenAI completions
|
|
6
|
-
* - Image generation
|
|
7
|
-
* - Any other OpenAI API calls that need rate limiting
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
import { OpenAIRateLimitingService, IRateLimitConfig } from '../OpenAIRateLimitingService';
|
|
11
|
-
import { OpenAI } from 'openai';
|
|
12
|
-
|
|
13
|
-
export class OpenAICompletionService {
|
|
14
|
-
private rateLimitingService: OpenAIRateLimitingService;
|
|
15
|
-
private openai: OpenAI;
|
|
16
|
-
|
|
17
|
-
constructor(apiKey: string, config?: Partial<IRateLimitConfig>) {
|
|
18
|
-
this.openai = new OpenAI({ apiKey });
|
|
19
|
-
this.rateLimitingService = new OpenAIRateLimitingService();
|
|
20
|
-
|
|
21
|
-
// Initialize with model-specific limits
|
|
22
|
-
this.rateLimitingService.initialize('gpt-4', {
|
|
23
|
-
rpm: 500, // Adjust based on your OpenAI plan
|
|
24
|
-
tpm: 30_000, // Tokens per minute for GPT-4
|
|
25
|
-
concurrency: 3, // Lower concurrency for completion models
|
|
26
|
-
maxRetries: 5,
|
|
27
|
-
...config
|
|
28
|
-
});
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Generate completions with rate limiting
|
|
33
|
-
*/
|
|
34
|
-
async generateCompletions(
|
|
35
|
-
prompts: string[],
|
|
36
|
-
model: string = 'gpt-4-turbo'
|
|
37
|
-
): Promise<string[]> {
|
|
38
|
-
return await this.rateLimitingService.executeWithRateLimit(
|
|
39
|
-
prompts,
|
|
40
|
-
async (batch: string[]) => {
|
|
41
|
-
// Execute batch of completion requests
|
|
42
|
-
const promises = batch.map(prompt =>
|
|
43
|
-
this.openai.chat.completions.create({
|
|
44
|
-
model,
|
|
45
|
-
messages: [{ role: 'user', content: prompt }],
|
|
46
|
-
max_tokens: 500
|
|
47
|
-
})
|
|
48
|
-
);
|
|
49
|
-
|
|
50
|
-
const results = await Promise.all(promises);
|
|
51
|
-
return results.map(result =>
|
|
52
|
-
result.choices[0]?.message?.content || ''
|
|
53
|
-
);
|
|
54
|
-
},
|
|
55
|
-
(prompt: string) => prompt // Token extractor for accurate batching
|
|
56
|
-
);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Generate images with rate limiting
|
|
61
|
-
*/
|
|
62
|
-
async generateImages(prompts: string[]): Promise<string[]> {
|
|
63
|
-
return await this.rateLimitingService.executeWithRateLimit(
|
|
64
|
-
prompts,
|
|
65
|
-
async (batch: string[]) => {
|
|
66
|
-
const promises = batch.map(prompt =>
|
|
67
|
-
this.openai.images.generate({
|
|
68
|
-
model: 'dall-e-3',
|
|
69
|
-
prompt,
|
|
70
|
-
size: '1024x1024',
|
|
71
|
-
quality: 'standard',
|
|
72
|
-
n: 1
|
|
73
|
-
})
|
|
74
|
-
);
|
|
75
|
-
|
|
76
|
-
const results = await Promise.all(promises);
|
|
77
|
-
return results.map(result =>
|
|
78
|
-
result.data[0]?.url || ''
|
|
79
|
-
);
|
|
80
|
-
},
|
|
81
|
-
(prompt: string) => prompt
|
|
82
|
-
);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
/**
|
|
86
|
-
* Update rate limiting configuration
|
|
87
|
-
*/
|
|
88
|
-
updateRateLimits(config: Partial<IRateLimitConfig>): void {
|
|
89
|
-
this.rateLimitingService.updateConfig(config);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
/**
|
|
94
|
-
* Usage example:
|
|
95
|
-
*
|
|
96
|
-
* const completionService = new OpenAICompletionService(process.env.OPENAI_API_KEY, {
|
|
97
|
-
* rpm: 100, // Lower RPM for your plan
|
|
98
|
-
* tpm: 10_000, // Lower TPM
|
|
99
|
-
* concurrency: 2
|
|
100
|
-
* });
|
|
101
|
-
*
|
|
102
|
-
* const prompts = [
|
|
103
|
-
* "Explain quantum computing",
|
|
104
|
-
* "Write a haiku about AI",
|
|
105
|
-
* "Summarize the history of computing"
|
|
106
|
-
* ];
|
|
107
|
-
*
|
|
108
|
-
* const completions = await completionService.generateCompletions(prompts);
|
|
109
|
-
* console.log(completions);
|
|
110
|
-
*/
|