@rws-framework/ai-tools 3.4.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/models/convo/EmbedLoader.ts +8 -5
- package/src/models/prompts/inc/execution-methods-handler.ts +16 -3
- package/src/models/prompts/inc/model-execution-manager.ts +1 -1
- package/src/models/prompts/inc/tool-manager.ts +18 -3
- package/src/services/LangChainEmbeddingService.ts +12 -0
- package/src/services/LangChainRAGService.ts +0 -2
- package/src/services/OpenAIRateLimitingService.ts +24 -9
- package/src/services/OptimizedVectorSearchService.ts +7 -2
- package/src/services/TextChunker.ts +3 -2
- package/src/types/IPrompt.ts +1 -2
package/package.json
CHANGED
|
@@ -187,12 +187,15 @@ class EmbedLoader<LLMChat extends BaseChatModel> {
|
|
|
187
187
|
logConvo(`After the split we have ${splitDocs.length} documents more than the original ${orgDocs.length}.`);
|
|
188
188
|
logConvo(`Average length among ${splitDocs.length} documents (after split) is ${avgCharCountPost} characters.`);
|
|
189
189
|
|
|
190
|
-
|
|
191
|
-
splitDocs.
|
|
190
|
+
// Write files asynchronously to prevent blocking
|
|
191
|
+
await Promise.all(splitDocs.map(async (doc: Document, i: number) => {
|
|
192
192
|
finalDocs.push(doc);
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
193
|
+
try {
|
|
194
|
+
await fs.promises.writeFile(this.debugSplitFile(i), doc.pageContent, 'utf-8');
|
|
195
|
+
} catch (error) {
|
|
196
|
+
console.warn(`Failed to write debug file ${i}:`, error);
|
|
197
|
+
}
|
|
198
|
+
}));
|
|
196
199
|
}else{
|
|
197
200
|
const splitFiles = fs.readdirSync(splitDir);
|
|
198
201
|
|
|
@@ -22,9 +22,17 @@ export class ExecutionMethodsHandler {
|
|
|
22
22
|
debugVars: any = {},
|
|
23
23
|
tools?: IAITool[]
|
|
24
24
|
): Promise<void> {
|
|
25
|
-
|
|
25
|
+
// Create snapshot of current input to prevent race conditions
|
|
26
|
+
const inputSnapshot = [...promptInstance.getInput()];
|
|
27
|
+
promptInstance.setSentInput(inputSnapshot);
|
|
28
|
+
|
|
26
29
|
const returnedRWS = await executor.promptRequest(promptInstance as any, { intruderPrompt, debugVars, tools });
|
|
27
|
-
|
|
30
|
+
|
|
31
|
+
// Safely ingest output
|
|
32
|
+
const output = returnedRWS.readOutput();
|
|
33
|
+
if (output !== null && output !== undefined) {
|
|
34
|
+
promptInstance.injestOutput(output);
|
|
35
|
+
}
|
|
28
36
|
}
|
|
29
37
|
|
|
30
38
|
async singleRequestWith(
|
|
@@ -34,8 +42,13 @@ export class ExecutionMethodsHandler {
|
|
|
34
42
|
ensureJson: boolean = false,
|
|
35
43
|
tools?: IAITool[]
|
|
36
44
|
): Promise<void> {
|
|
45
|
+
// Create snapshot of current input to prevent race conditions
|
|
46
|
+
const inputSnapshot = [...promptInstance.getInput()];
|
|
47
|
+
|
|
37
48
|
await executor.singlePromptRequest(promptInstance as any, { intruderPrompt, ensureJson, tools });
|
|
38
|
-
|
|
49
|
+
|
|
50
|
+
// Set the snapshot after execution to maintain consistency
|
|
51
|
+
promptInstance.setSentInput(inputSnapshot);
|
|
39
52
|
}
|
|
40
53
|
|
|
41
54
|
async streamWith(
|
|
@@ -22,7 +22,7 @@ export class ModelExecutionManager {
|
|
|
22
22
|
constructor(modelId: string, modelType: string, hyperParameters: IPromptHyperParameters) {
|
|
23
23
|
this.modelId = modelId;
|
|
24
24
|
this.modelType = modelType;
|
|
25
|
-
this.hyperParameters = hyperParameters;
|
|
25
|
+
this.hyperParameters = hyperParameters || { temperature: 0.7, max_tokens: 512 };
|
|
26
26
|
}
|
|
27
27
|
|
|
28
28
|
getModelId(): string {
|
|
@@ -34,14 +34,29 @@ export class ToolManager {
|
|
|
34
34
|
|
|
35
35
|
async callTools<T = unknown, O = unknown>(tools: IToolCall[], moduleRef: ModuleRef, aiToolOptions?: O): Promise<T[]> {
|
|
36
36
|
const results: T[] = [];
|
|
37
|
+
const errors: Error[] = [];
|
|
38
|
+
|
|
37
39
|
for (const tool of tools) {
|
|
38
40
|
if (this.toolHandlers.has(tool.function.name)) {
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
41
|
+
try {
|
|
42
|
+
const result = await this.callAiTool<T, O>(tool, moduleRef, aiToolOptions);
|
|
43
|
+
if (result) {
|
|
44
|
+
results.push(result);
|
|
45
|
+
}
|
|
46
|
+
} catch (error) {
|
|
47
|
+
console.error(`Tool execution failed for ${tool.function.name}:`, error);
|
|
48
|
+
errors.push(error as Error);
|
|
49
|
+
// Continue with other tools instead of failing completely
|
|
42
50
|
}
|
|
51
|
+
} else {
|
|
52
|
+
console.warn(`No handler found for tool: ${tool.function.name}`);
|
|
43
53
|
}
|
|
44
54
|
}
|
|
55
|
+
|
|
56
|
+
// If all tools failed, throw the first error
|
|
57
|
+
if (results.length === 0 && errors.length > 0) {
|
|
58
|
+
throw errors[0];
|
|
59
|
+
}
|
|
45
60
|
|
|
46
61
|
return results;
|
|
47
62
|
}
|
|
@@ -15,6 +15,7 @@ export class LangChainEmbeddingService {
|
|
|
15
15
|
private chunkConfig: IChunkConfig;
|
|
16
16
|
private isInitialized = false;
|
|
17
17
|
private vectorStore: RWSVectorStore | null = null;
|
|
18
|
+
private static embeddingsPool = new Map<string, Embeddings>(); // Connection pooling
|
|
18
19
|
|
|
19
20
|
constructor(private rateLimitingService: OpenAIRateLimitingService) {}
|
|
20
21
|
|
|
@@ -37,6 +38,14 @@ export class LangChainEmbeddingService {
|
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
private initializeEmbeddings(): void {
|
|
41
|
+
const poolKey = `${this.config.provider}_${this.config.model}_${this.config.apiKey.slice(-8)}`;
|
|
42
|
+
|
|
43
|
+
// Check connection pool first
|
|
44
|
+
if (LangChainEmbeddingService.embeddingsPool.has(poolKey)) {
|
|
45
|
+
this.embeddings = LangChainEmbeddingService.embeddingsPool.get(poolKey)!;
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
|
|
40
49
|
switch (this.config.provider) {
|
|
41
50
|
case 'cohere':
|
|
42
51
|
this.embeddings = new CohereEmbeddings({
|
|
@@ -58,6 +67,9 @@ export class LangChainEmbeddingService {
|
|
|
58
67
|
default:
|
|
59
68
|
throw new Error(`Unsupported embedding provider: ${this.config.provider}`);
|
|
60
69
|
}
|
|
70
|
+
|
|
71
|
+
// Store in connection pool for reuse
|
|
72
|
+
LangChainEmbeddingService.embeddingsPool.set(poolKey, this.embeddings);
|
|
61
73
|
|
|
62
74
|
if(this.config.rateLimiting){
|
|
63
75
|
const rateLimitingCfg = {...OpenAIRateLimitingService.DEFAULT_CONFIG, ...this.config.rateLimiting};
|
|
@@ -41,8 +41,6 @@ export {
|
|
|
41
41
|
export class LangChainRAGService {
|
|
42
42
|
private config: ILangChainRAGConfig;
|
|
43
43
|
private isInitialized = false;
|
|
44
|
-
private queryEmbeddingCache = new Map<string, number[]>();
|
|
45
|
-
private maxCacheSize = 100;
|
|
46
44
|
private logger?: any; // Optional logger interface
|
|
47
45
|
|
|
48
46
|
constructor(
|
|
@@ -7,6 +7,27 @@ import { BlackLogger } from '@rws-framework/server/nest';
|
|
|
7
7
|
let encoding_for_model: any = null;
|
|
8
8
|
encoding_for_model = tiktoken.encoding_for_model
|
|
9
9
|
|
|
10
|
+
// Singleton tokenizer factory for performance
|
|
11
|
+
class TokenizerFactory {
|
|
12
|
+
private static tokenizers = new Map<string, any>();
|
|
13
|
+
|
|
14
|
+
static getTokenizer(model: string): any {
|
|
15
|
+
if (!this.tokenizers.has(model)) {
|
|
16
|
+
try {
|
|
17
|
+
if (encoding_for_model) {
|
|
18
|
+
this.tokenizers.set(model, encoding_for_model(model));
|
|
19
|
+
} else {
|
|
20
|
+
this.tokenizers.set(model, null);
|
|
21
|
+
}
|
|
22
|
+
} catch (e) {
|
|
23
|
+
console.warn(`Could not load tokenizer for model ${model}`);
|
|
24
|
+
this.tokenizers.set(model, null);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return this.tokenizers.get(model);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
10
31
|
@Injectable()
|
|
11
32
|
export class OpenAIRateLimitingService {
|
|
12
33
|
static readonly DEFAULT_CONFIG: Required<IRateLimitConfig> = {
|
|
@@ -37,16 +58,10 @@ export class OpenAIRateLimitingService {
|
|
|
37
58
|
this.config = { ...this.config, ...config };
|
|
38
59
|
}
|
|
39
60
|
|
|
40
|
-
//
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
this.tokenizer = encoding_for_model(model);
|
|
44
|
-
} else {
|
|
45
|
-
this.tokenizer = null;
|
|
46
|
-
}
|
|
47
|
-
} catch (e) {
|
|
61
|
+
// Use singleton tokenizer factory for performance
|
|
62
|
+
this.tokenizer = TokenizerFactory.getTokenizer(model);
|
|
63
|
+
if (!this.tokenizer) {
|
|
48
64
|
this.logger.warn(`Could not load tokenizer for model ${model}, using character-based estimation`);
|
|
49
|
-
this.tokenizer = null;
|
|
50
65
|
}
|
|
51
66
|
|
|
52
67
|
// Reinitialize queue with new concurrency
|
|
@@ -34,13 +34,18 @@ export class OptimizedVectorSearchService {
|
|
|
34
34
|
const allCandidates: IOptimizedSearchResult[] = [];
|
|
35
35
|
let totalCandidates = 0;
|
|
36
36
|
|
|
37
|
-
// Process all knowledge vectors
|
|
37
|
+
// Process all knowledge vectors with early termination optimization
|
|
38
38
|
const searchPromises = knowledgeVectors.map(async (knowledgeVector) => {
|
|
39
39
|
const candidates: IOptimizedSearchResult[] = [];
|
|
40
40
|
const similarities: number[] = []; // Track all similarities for debugging
|
|
41
|
+
let processedCount = 0;
|
|
41
42
|
|
|
42
|
-
|
|
43
|
+
// Sort chunks by some heuristic to check best candidates first (optional optimization)
|
|
44
|
+
const chunks = knowledgeVector.chunks;
|
|
45
|
+
|
|
46
|
+
for (const chunk of chunks) {
|
|
43
47
|
totalCandidates++;
|
|
48
|
+
processedCount++;
|
|
44
49
|
|
|
45
50
|
if (!chunk.embedding || !Array.isArray(chunk.embedding)) {
|
|
46
51
|
continue;
|
|
@@ -197,8 +197,9 @@ export class TextChunker {
|
|
|
197
197
|
for (let i = 0; i < chunks.length; i++) {
|
|
198
198
|
const chunk = chunks[i];
|
|
199
199
|
|
|
200
|
-
//
|
|
201
|
-
const
|
|
200
|
+
// Use array for efficient string building
|
|
201
|
+
const parts = currentChunk ? [currentChunk, chunk] : [chunk];
|
|
202
|
+
const combined = parts.join(' ');
|
|
202
203
|
|
|
203
204
|
if (combined.length <= maxChars) {
|
|
204
205
|
// Can merge
|