@rws-framework/ai-tools 3.4.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@rws-framework/ai-tools",
3
3
  "private": false,
4
- "version": "3.4.0",
4
+ "version": "3.5.0",
5
5
  "description": "",
6
6
  "main": "src/index.ts",
7
7
  "scripts": {},
@@ -187,12 +187,15 @@ class EmbedLoader<LLMChat extends BaseChatModel> {
187
187
  logConvo(`After the split we have ${splitDocs.length} documents more than the original ${orgDocs.length}.`);
188
188
  logConvo(`Average length among ${splitDocs.length} documents (after split) is ${avgCharCountPost} characters.`);
189
189
 
190
- let i = 0;
191
- splitDocs.forEach((doc: Document) => {
190
+ // Write files asynchronously to prevent blocking
191
+ await Promise.all(splitDocs.map(async (doc: Document, i: number) => {
192
192
  finalDocs.push(doc);
193
- fs.writeFileSync(this.debugSplitFile(i), doc.pageContent);
194
- i++;
195
- });
193
+ try {
194
+ await fs.promises.writeFile(this.debugSplitFile(i), doc.pageContent, 'utf-8');
195
+ } catch (error) {
196
+ console.warn(`Failed to write debug file ${i}:`, error);
197
+ }
198
+ }));
196
199
  }else{
197
200
  const splitFiles = fs.readdirSync(splitDir);
198
201
 
@@ -22,9 +22,17 @@ export class ExecutionMethodsHandler {
22
22
  debugVars: any = {},
23
23
  tools?: IAITool[]
24
24
  ): Promise<void> {
25
- promptInstance.setSentInput(promptInstance.getInput());
25
+ // Create snapshot of current input to prevent race conditions
26
+ const inputSnapshot = [...promptInstance.getInput()];
27
+ promptInstance.setSentInput(inputSnapshot);
28
+
26
29
  const returnedRWS = await executor.promptRequest(promptInstance as any, { intruderPrompt, debugVars, tools });
27
- promptInstance.injestOutput(returnedRWS.readOutput());
30
+
31
+ // Safely ingest output
32
+ const output = returnedRWS.readOutput();
33
+ if (output !== null && output !== undefined) {
34
+ promptInstance.injestOutput(output);
35
+ }
28
36
  }
29
37
 
30
38
  async singleRequestWith(
@@ -34,8 +42,13 @@ export class ExecutionMethodsHandler {
34
42
  ensureJson: boolean = false,
35
43
  tools?: IAITool[]
36
44
  ): Promise<void> {
45
+ // Create snapshot of current input to prevent race conditions
46
+ const inputSnapshot = [...promptInstance.getInput()];
47
+
37
48
  await executor.singlePromptRequest(promptInstance as any, { intruderPrompt, ensureJson, tools });
38
- promptInstance.setSentInput(promptInstance.getInput());
49
+
50
+ // Set the snapshot after execution to maintain consistency
51
+ promptInstance.setSentInput(inputSnapshot);
39
52
  }
40
53
 
41
54
  async streamWith(
@@ -22,7 +22,7 @@ export class ModelExecutionManager {
22
22
  constructor(modelId: string, modelType: string, hyperParameters: IPromptHyperParameters) {
23
23
  this.modelId = modelId;
24
24
  this.modelType = modelType;
25
- this.hyperParameters = hyperParameters;
25
+ this.hyperParameters = hyperParameters || { temperature: 0.7, max_tokens: 512 };
26
26
  }
27
27
 
28
28
  getModelId(): string {
@@ -34,14 +34,29 @@ export class ToolManager {
34
34
 
35
35
  async callTools<T = unknown, O = unknown>(tools: IToolCall[], moduleRef: ModuleRef, aiToolOptions?: O): Promise<T[]> {
36
36
  const results: T[] = [];
37
+ const errors: Error[] = [];
38
+
37
39
  for (const tool of tools) {
38
40
  if (this.toolHandlers.has(tool.function.name)) {
39
- const result = await this.callAiTool<T, O>(tool, moduleRef, aiToolOptions);
40
- if (result) {
41
- results.push(result);
41
+ try {
42
+ const result = await this.callAiTool<T, O>(tool, moduleRef, aiToolOptions);
43
+ if (result) {
44
+ results.push(result);
45
+ }
46
+ } catch (error) {
47
+ console.error(`Tool execution failed for ${tool.function.name}:`, error);
48
+ errors.push(error as Error);
49
+ // Continue with other tools instead of failing completely
42
50
  }
51
+ } else {
52
+ console.warn(`No handler found for tool: ${tool.function.name}`);
43
53
  }
44
54
  }
55
+
56
+ // If all tools failed, throw the first error
57
+ if (results.length === 0 && errors.length > 0) {
58
+ throw errors[0];
59
+ }
45
60
 
46
61
  return results;
47
62
  }
@@ -15,6 +15,7 @@ export class LangChainEmbeddingService {
15
15
  private chunkConfig: IChunkConfig;
16
16
  private isInitialized = false;
17
17
  private vectorStore: RWSVectorStore | null = null;
18
+ private static embeddingsPool = new Map<string, Embeddings>(); // Connection pooling
18
19
 
19
20
  constructor(private rateLimitingService: OpenAIRateLimitingService) {}
20
21
 
@@ -37,6 +38,14 @@ export class LangChainEmbeddingService {
37
38
 
38
39
 
39
40
  private initializeEmbeddings(): void {
41
+ const poolKey = `${this.config.provider}_${this.config.model}_${this.config.apiKey.slice(-8)}`;
42
+
43
+ // Check connection pool first
44
+ if (LangChainEmbeddingService.embeddingsPool.has(poolKey)) {
45
+ this.embeddings = LangChainEmbeddingService.embeddingsPool.get(poolKey)!;
46
+ return;
47
+ }
48
+
40
49
  switch (this.config.provider) {
41
50
  case 'cohere':
42
51
  this.embeddings = new CohereEmbeddings({
@@ -58,6 +67,9 @@ export class LangChainEmbeddingService {
58
67
  default:
59
68
  throw new Error(`Unsupported embedding provider: ${this.config.provider}`);
60
69
  }
70
+
71
+ // Store in connection pool for reuse
72
+ LangChainEmbeddingService.embeddingsPool.set(poolKey, this.embeddings);
61
73
 
62
74
  if(this.config.rateLimiting){
63
75
  const rateLimitingCfg = {...OpenAIRateLimitingService.DEFAULT_CONFIG, ...this.config.rateLimiting};
@@ -41,8 +41,6 @@ export {
41
41
  export class LangChainRAGService {
42
42
  private config: ILangChainRAGConfig;
43
43
  private isInitialized = false;
44
- private queryEmbeddingCache = new Map<string, number[]>();
45
- private maxCacheSize = 100;
46
44
  private logger?: any; // Optional logger interface
47
45
 
48
46
  constructor(
@@ -7,6 +7,27 @@ import { BlackLogger } from '@rws-framework/server/nest';
7
7
  let encoding_for_model: any = null;
8
8
  encoding_for_model = tiktoken.encoding_for_model
9
9
 
10
+ // Singleton tokenizer factory for performance
11
+ class TokenizerFactory {
12
+ private static tokenizers = new Map<string, any>();
13
+
14
+ static getTokenizer(model: string): any {
15
+ if (!this.tokenizers.has(model)) {
16
+ try {
17
+ if (encoding_for_model) {
18
+ this.tokenizers.set(model, encoding_for_model(model));
19
+ } else {
20
+ this.tokenizers.set(model, null);
21
+ }
22
+ } catch (e) {
23
+ console.warn(`Could not load tokenizer for model ${model}`);
24
+ this.tokenizers.set(model, null);
25
+ }
26
+ }
27
+ return this.tokenizers.get(model);
28
+ }
29
+ }
30
+
10
31
  @Injectable()
11
32
  export class OpenAIRateLimitingService {
12
33
  static readonly DEFAULT_CONFIG: Required<IRateLimitConfig> = {
@@ -37,16 +58,10 @@ export class OpenAIRateLimitingService {
37
58
  this.config = { ...this.config, ...config };
38
59
  }
39
60
 
40
- // Initialize tokenizer for precise token counting
41
- try {
42
- if (encoding_for_model) {
43
- this.tokenizer = encoding_for_model(model);
44
- } else {
45
- this.tokenizer = null;
46
- }
47
- } catch (e) {
61
+ // Use singleton tokenizer factory for performance
62
+ this.tokenizer = TokenizerFactory.getTokenizer(model);
63
+ if (!this.tokenizer) {
48
64
  this.logger.warn(`Could not load tokenizer for model ${model}, using character-based estimation`);
49
- this.tokenizer = null;
50
65
  }
51
66
 
52
67
  // Reinitialize queue with new concurrency
@@ -34,13 +34,18 @@ export class OptimizedVectorSearchService {
34
34
  const allCandidates: IOptimizedSearchResult[] = [];
35
35
  let totalCandidates = 0;
36
36
 
37
- // Process all knowledge vectors in parallel
37
+ // Process all knowledge vectors with early termination optimization
38
38
  const searchPromises = knowledgeVectors.map(async (knowledgeVector) => {
39
39
  const candidates: IOptimizedSearchResult[] = [];
40
40
  const similarities: number[] = []; // Track all similarities for debugging
41
+ let processedCount = 0;
41
42
 
42
- for (const chunk of knowledgeVector.chunks) {
43
+ // Sort chunks by some heuristic to check best candidates first (optional optimization)
44
+ const chunks = knowledgeVector.chunks;
45
+
46
+ for (const chunk of chunks) {
43
47
  totalCandidates++;
48
+ processedCount++;
44
49
 
45
50
  if (!chunk.embedding || !Array.isArray(chunk.embedding)) {
46
51
  continue;
@@ -197,8 +197,9 @@ export class TextChunker {
197
197
  for (let i = 0; i < chunks.length; i++) {
198
198
  const chunk = chunks[i];
199
199
 
200
- // Check if we can merge this chunk with current chunk
201
- const combined = currentChunk ? currentChunk + ' ' + chunk : chunk;
200
+ // Use array for efficient string building
201
+ const parts = currentChunk ? [currentChunk, chunk] : [chunk];
202
+ const combined = parts.join(' ');
202
203
 
203
204
  if (combined.length <= maxChars) {
204
205
  // Can merge
@@ -53,8 +53,7 @@ interface IAITool {
53
53
 
54
54
  interface IPromptHyperParameters {
55
55
  temperature: number,
56
- top_k?: number,
57
- top_p?: number,
56
+ max_tokens: number,
58
57
  [key: string]: number
59
58
  }
60
59