@rws-framework/ai-tools 3.2.2 → 3.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@rws-framework/ai-tools",
3
3
  "private": false,
4
- "version": "3.2.2",
4
+ "version": "3.2.3",
5
5
  "description": "",
6
6
  "main": "src/index.ts",
7
7
  "scripts": {},
package/src/index.ts CHANGED
@@ -4,11 +4,11 @@ import { ILLMChunk, IRWSPromptRequestExecutor, IRWSSinglePromptRequestExecutor,
4
4
  import { EmbedLoader as RWSEmbed, IConvoDebugXMLData, IEmbeddingsHandler, ISplitterParams } from './models/convo/EmbedLoader';
5
5
  import RWSVectorStore from './models/convo/VectorStore';
6
6
  import { LangChainEmbeddingService } from './services/LangChainEmbeddingService';
7
- import { OpenAIRateLimitingService, IRateLimitConfig } from './services/OpenAIRateLimitingService';
7
+ import { OpenAIRateLimitingService } from './services/OpenAIRateLimitingService';
8
8
  import { LangChainVectorStoreService, IVectorStoreConfig, IDocumentChunk, IVectorSearchRequest, IVectorSearchResponse, ISearchResult } from './services/LangChainVectorStoreService';
9
9
  import { LangChainRAGService, ILangChainRAGConfig, IRAGIndexRequest, IRAGSearchRequest, IRAGResponse, IRAGStats } from './services/LangChainRAGService';
10
10
  import { IContextToken } from './types/IContextToken';
11
- import { IEmbeddingConfig, IChunkConfig } from './types';
11
+ import { IEmbeddingConfig, IChunkConfig, IRateLimitConfig } from './types';
12
12
  import type { IAiCfg } from './types/IAiCfg';
13
13
  import { z as ZOD } from 'zod/v4';
14
14
 
@@ -43,7 +43,7 @@ export {
43
43
  // Types
44
44
  IEmbeddingConfig,
45
45
  IChunkConfig,
46
- IRateLimitConfig,
46
+ IRateLimitConfig,
47
47
  IVectorStoreConfig,
48
48
  IDocumentChunk,
49
49
  IVectorSearchRequest,
@@ -51,23 +51,21 @@ export class LangChainEmbeddingService {
51
51
  apiKey: this.config.apiKey,
52
52
  model: this.config.model || 'text-embedding-3-large',
53
53
  batchSize: 1 // We'll handle batching ourselves
54
- });
55
-
56
-
57
- this.rateLimitingService.initialize(this.config.model || 'text-embedding-3-large', {
58
- rpm: 500,
59
- tpm: 300_000,
60
- concurrency: 4,
61
- maxRetries: 6,
62
- baseBackoffMs: 500,
63
- safetyFactor: 0.75
64
- });
54
+ });
55
+
65
56
  break;
66
57
 
67
58
  default:
68
59
  throw new Error(`Unsupported embedding provider: ${this.config.provider}`);
69
60
  }
70
61
 
62
+ if(this.config.rateLimiting){
63
+ const rateLimitingCfg = {...OpenAIRateLimitingService.DEFAULT_CONFIG, ...this.config.rateLimiting};
64
+
65
+ this.rateLimitingService.initialize(this.config.model || 'text-embedding-3-large', rateLimitingCfg);
66
+ console.log('Inintialized rate limiting with config:', rateLimitingCfg);
67
+ }
68
+
71
69
  console.log(`Initialized ${this.config.provider} embeddings with model ${this.config.model}`, this.config.apiKey);
72
70
  }
73
71
 
@@ -82,7 +80,7 @@ export class LangChainEmbeddingService {
82
80
  async embedTexts(texts: string[]): Promise<number[][]> {
83
81
  this.ensureInitialized();
84
82
 
85
- if (this.config.provider === 'openai' && this.rateLimitingService) {
83
+ if (this.config.rateLimiting) {
86
84
  return await this.rateLimitingService.executeWithRateLimit(
87
85
  texts,
88
86
  async (batch: string[]) => {
@@ -102,8 +100,8 @@ export class LangChainEmbeddingService {
102
100
  async embedText(text: string): Promise<number[]> {
103
101
  this.ensureInitialized();
104
102
 
105
- if (this.config.provider === 'openai' && this.rateLimitingService) {
106
- // For single texts with OpenAI, use the rate-controlled batch method
103
+ if (this.config.rateLimiting) {
104
+
107
105
  const results = await this.rateLimitingService.executeWithRateLimit(
108
106
  [text],
109
107
  async (batch: string[]) => {
@@ -125,9 +123,11 @@ export class LangChainEmbeddingService {
125
123
 
126
124
  // Use our custom TextChunker instead of LangChain's splitter
127
125
  // Use safe token limits - the TextChunker handles token estimation internally
128
- const maxTokens = 450; // Safe token limit for embedding models
126
+ const maxTokens = this.chunkConfig?.chunkSize || 450; // Safe token limit for embedding models
129
127
  const overlap = this.chunkConfig?.chunkOverlap || 50; // Character overlap, not token
130
128
 
129
+ console.log('[LCEmbeddingService] Chunking with:', this.chunkConfig);
130
+
131
131
  return TextChunker.chunkText(text, maxTokens, overlap);
132
132
  }
133
133
 
@@ -185,7 +185,7 @@ export class LangChainRAGService {
185
185
  chunkId: result.chunkId,
186
186
  }));
187
187
 
188
- this.log('log', `[SEARCH] Found ${results.length} relevant chunks for query: "${request.query}"\n ${JSON.stringify({ results})}`);
188
+ this.log('log', `[SEARCH] Found ${results.length} relevant chunks for query: "${request.query}"\n`);
189
189
 
190
190
  return {
191
191
  success: true,
@@ -1,32 +1,14 @@
1
1
  import { Injectable } from '@nestjs/common';
2
2
  import PQueue from 'p-queue';
3
+ import { IBatchMetadata, IRateLimitConfig } from '../types/rag.types';
4
+ import tiktoken from 'tiktoken';
3
5
 
4
- // Optional tiktoken import
5
6
  let encoding_for_model: any = null;
6
- try {
7
- const tiktoken = require('tiktoken');
8
- encoding_for_model = tiktoken.encoding_for_model;
9
- } catch (e) {
10
- console.warn('tiktoken not available, using character-based token estimation');
11
- }
12
-
13
- export interface IRateLimitConfig {
14
- rpm?: number; // Requests per minute
15
- tpm?: number; // Tokens per minute
16
- concurrency?: number; // Parallel requests
17
- maxRetries?: number; // Maximum retry attempts
18
- baseBackoffMs?: number; // Base backoff delay
19
- safetyFactor?: number; // Safety factor for limits
20
- }
21
-
22
- export interface IBatchMetadata<T = any> {
23
- start: number;
24
- batch: T[];
25
- }
7
+ encoding_for_model = tiktoken.encoding_for_model
26
8
 
27
9
  @Injectable()
28
10
  export class OpenAIRateLimitingService {
29
- private static readonly DEFAULT_CONFIG: Required<IRateLimitConfig> = {
11
+ static readonly DEFAULT_CONFIG: Required<IRateLimitConfig> = {
30
12
  rpm: 500,
31
13
  tpm: 300_000,
32
14
  concurrency: 4,
@@ -1,3 +1,5 @@
1
+ import { IRateLimitConfig } from "./rag.types";
2
+
1
3
  /**
2
4
  * Embedding service configuration interfaces
3
5
  */
@@ -6,6 +8,7 @@ export interface IEmbeddingConfig {
6
8
  apiKey: string;
7
9
  model?: string;
8
10
  batchSize?: number;
11
+ rateLimiting?: IRateLimitConfig
9
12
  }
10
13
 
11
14
  export interface IChunkConfig {
@@ -42,3 +42,17 @@ export interface IRAGStats {
42
42
  totalDocuments: number;
43
43
  knowledgeItems: number;
44
44
  }
45
+
46
+ export interface IRateLimitConfig {
47
+ rpm?: number; // Requests per minute
48
+ tpm?: number; // Tokens per minute
49
+ concurrency?: number; // Parallel requests
50
+ maxRetries?: number; // Maximum retry attempts
51
+ baseBackoffMs?: number; // Base backoff delay
52
+ safetyFactor?: number; // Safety factor for limits
53
+ }
54
+
55
+ export interface IBatchMetadata<T = any> {
56
+ start: number;
57
+ batch: T[];
58
+ }
@@ -1,110 +0,0 @@
1
- /**
2
- * Example usage of OpenAIRateLimitingService for other AI operations
3
- *
4
- * This demonstrates how to use the rate limiting service for:
5
- * - OpenAI completions
6
- * - Image generation
7
- * - Any other OpenAI API calls that need rate limiting
8
- */
9
-
10
- import { OpenAIRateLimitingService, IRateLimitConfig } from '../OpenAIRateLimitingService';
11
- import { OpenAI } from 'openai';
12
-
13
- export class OpenAICompletionService {
14
- private rateLimitingService: OpenAIRateLimitingService;
15
- private openai: OpenAI;
16
-
17
- constructor(apiKey: string, config?: Partial<IRateLimitConfig>) {
18
- this.openai = new OpenAI({ apiKey });
19
- this.rateLimitingService = new OpenAIRateLimitingService();
20
-
21
- // Initialize with model-specific limits
22
- this.rateLimitingService.initialize('gpt-4', {
23
- rpm: 500, // Adjust based on your OpenAI plan
24
- tpm: 30_000, // Tokens per minute for GPT-4
25
- concurrency: 3, // Lower concurrency for completion models
26
- maxRetries: 5,
27
- ...config
28
- });
29
- }
30
-
31
- /**
32
- * Generate completions with rate limiting
33
- */
34
- async generateCompletions(
35
- prompts: string[],
36
- model: string = 'gpt-4-turbo'
37
- ): Promise<string[]> {
38
- return await this.rateLimitingService.executeWithRateLimit(
39
- prompts,
40
- async (batch: string[]) => {
41
- // Execute batch of completion requests
42
- const promises = batch.map(prompt =>
43
- this.openai.chat.completions.create({
44
- model,
45
- messages: [{ role: 'user', content: prompt }],
46
- max_tokens: 500
47
- })
48
- );
49
-
50
- const results = await Promise.all(promises);
51
- return results.map(result =>
52
- result.choices[0]?.message?.content || ''
53
- );
54
- },
55
- (prompt: string) => prompt // Token extractor for accurate batching
56
- );
57
- }
58
-
59
- /**
60
- * Generate images with rate limiting
61
- */
62
- async generateImages(prompts: string[]): Promise<string[]> {
63
- return await this.rateLimitingService.executeWithRateLimit(
64
- prompts,
65
- async (batch: string[]) => {
66
- const promises = batch.map(prompt =>
67
- this.openai.images.generate({
68
- model: 'dall-e-3',
69
- prompt,
70
- size: '1024x1024',
71
- quality: 'standard',
72
- n: 1
73
- })
74
- );
75
-
76
- const results = await Promise.all(promises);
77
- return results.map(result =>
78
- result.data[0]?.url || ''
79
- );
80
- },
81
- (prompt: string) => prompt
82
- );
83
- }
84
-
85
- /**
86
- * Update rate limiting configuration
87
- */
88
- updateRateLimits(config: Partial<IRateLimitConfig>): void {
89
- this.rateLimitingService.updateConfig(config);
90
- }
91
- }
92
-
93
- /**
94
- * Usage example:
95
- *
96
- * const completionService = new OpenAICompletionService(process.env.OPENAI_API_KEY, {
97
- * rpm: 100, // Lower RPM for your plan
98
- * tpm: 10_000, // Lower TPM
99
- * concurrency: 2
100
- * });
101
- *
102
- * const prompts = [
103
- * "Explain quantum computing",
104
- * "Write a haiku about AI",
105
- * "Summarize the history of computing"
106
- * ];
107
- *
108
- * const completions = await completionService.generateCompletions(prompts);
109
- * console.log(completions);
110
- */