agentic-qe 2.6.1 → 2.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/qe-code-intelligence.md +88 -1
- package/CHANGELOG.md +134 -0
- package/README.md +222 -159
- package/dist/agents/BaseAgent.d.ts +19 -0
- package/dist/agents/BaseAgent.d.ts.map +1 -1
- package/dist/agents/BaseAgent.js +41 -1
- package/dist/agents/BaseAgent.js.map +1 -1
- package/dist/agents/CodeIntelligenceAgent.d.ts +18 -1
- package/dist/agents/CodeIntelligenceAgent.d.ts.map +1 -1
- package/dist/agents/CodeIntelligenceAgent.js +96 -1
- package/dist/agents/CodeIntelligenceAgent.js.map +1 -1
- package/dist/agents/CoverageAnalyzerAgent.d.ts +8 -0
- package/dist/agents/CoverageAnalyzerAgent.d.ts.map +1 -1
- package/dist/agents/CoverageAnalyzerAgent.js +65 -1
- package/dist/agents/CoverageAnalyzerAgent.js.map +1 -1
- package/dist/agents/TestGeneratorAgent.d.ts +2 -2
- package/dist/agents/TestGeneratorAgent.d.ts.map +1 -1
- package/dist/agents/TestGeneratorAgent.js +16 -6
- package/dist/agents/TestGeneratorAgent.js.map +1 -1
- package/dist/agents/adapters/AgentLLMAdapter.d.ts +127 -0
- package/dist/agents/adapters/AgentLLMAdapter.d.ts.map +1 -0
- package/dist/agents/adapters/AgentLLMAdapter.js +366 -0
- package/dist/agents/adapters/AgentLLMAdapter.js.map +1 -0
- package/dist/agents/adapters/index.d.ts +1 -0
- package/dist/agents/adapters/index.d.ts.map +1 -1
- package/dist/agents/adapters/index.js +5 -1
- package/dist/agents/adapters/index.js.map +1 -1
- package/dist/agents/interfaces/IAgentLLM.d.ts +257 -0
- package/dist/agents/interfaces/IAgentLLM.d.ts.map +1 -0
- package/dist/agents/interfaces/IAgentLLM.js +39 -0
- package/dist/agents/interfaces/IAgentLLM.js.map +1 -0
- package/dist/agents/interfaces/index.d.ts +10 -0
- package/dist/agents/interfaces/index.d.ts.map +1 -0
- package/dist/agents/interfaces/index.js +14 -0
- package/dist/agents/interfaces/index.js.map +1 -0
- package/dist/agents/n8n/N8nBaseAgent.d.ts +18 -0
- package/dist/agents/n8n/N8nBaseAgent.d.ts.map +1 -1
- package/dist/agents/n8n/N8nBaseAgent.js +80 -0
- package/dist/agents/n8n/N8nBaseAgent.js.map +1 -1
- package/dist/cli/commands/knowledge-graph.d.ts +30 -0
- package/dist/cli/commands/knowledge-graph.d.ts.map +1 -1
- package/dist/cli/commands/knowledge-graph.js +206 -4
- package/dist/cli/commands/knowledge-graph.js.map +1 -1
- package/dist/cli/commands/providers.d.ts +50 -0
- package/dist/cli/commands/providers.d.ts.map +1 -0
- package/dist/cli/commands/providers.js +403 -0
- package/dist/cli/commands/providers.js.map +1 -0
- package/dist/cli/index.js +214 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/code-intelligence/indexing/FileWatcher.d.ts.map +1 -1
- package/dist/code-intelligence/indexing/FileWatcher.js +11 -8
- package/dist/code-intelligence/indexing/FileWatcher.js.map +1 -1
- package/dist/code-intelligence/inference/ComponentBoundaryAnalyzer.d.ts +75 -0
- package/dist/code-intelligence/inference/ComponentBoundaryAnalyzer.d.ts.map +1 -0
- package/dist/code-intelligence/inference/ComponentBoundaryAnalyzer.js +400 -0
- package/dist/code-intelligence/inference/ComponentBoundaryAnalyzer.js.map +1 -0
- package/dist/code-intelligence/inference/ExternalSystemDetector.d.ts +31 -0
- package/dist/code-intelligence/inference/ExternalSystemDetector.d.ts.map +1 -0
- package/dist/code-intelligence/inference/ExternalSystemDetector.js +523 -0
- package/dist/code-intelligence/inference/ExternalSystemDetector.js.map +1 -0
- package/dist/code-intelligence/inference/ProjectMetadataAnalyzer.d.ts +78 -0
- package/dist/code-intelligence/inference/ProjectMetadataAnalyzer.d.ts.map +1 -0
- package/dist/code-intelligence/inference/ProjectMetadataAnalyzer.js +491 -0
- package/dist/code-intelligence/inference/ProjectMetadataAnalyzer.js.map +1 -0
- package/dist/code-intelligence/inference/index.d.ts +36 -0
- package/dist/code-intelligence/inference/index.d.ts.map +1 -0
- package/dist/code-intelligence/inference/index.js +65 -0
- package/dist/code-intelligence/inference/index.js.map +1 -0
- package/dist/code-intelligence/inference/types.d.ts +196 -0
- package/dist/code-intelligence/inference/types.d.ts.map +1 -0
- package/dist/code-intelligence/inference/types.js +9 -0
- package/dist/code-intelligence/inference/types.js.map +1 -0
- package/dist/code-intelligence/visualization/C4ComponentDiagramBuilder.d.ts +75 -0
- package/dist/code-intelligence/visualization/C4ComponentDiagramBuilder.d.ts.map +1 -0
- package/dist/code-intelligence/visualization/C4ComponentDiagramBuilder.js +267 -0
- package/dist/code-intelligence/visualization/C4ComponentDiagramBuilder.js.map +1 -0
- package/dist/code-intelligence/visualization/C4ContainerDiagramBuilder.d.ts +138 -0
- package/dist/code-intelligence/visualization/C4ContainerDiagramBuilder.d.ts.map +1 -0
- package/dist/code-intelligence/visualization/C4ContainerDiagramBuilder.js +343 -0
- package/dist/code-intelligence/visualization/C4ContainerDiagramBuilder.js.map +1 -0
- package/dist/code-intelligence/visualization/C4ContextDiagramBuilder.d.ts +67 -0
- package/dist/code-intelligence/visualization/C4ContextDiagramBuilder.d.ts.map +1 -0
- package/dist/code-intelligence/visualization/C4ContextDiagramBuilder.js +152 -0
- package/dist/code-intelligence/visualization/C4ContextDiagramBuilder.js.map +1 -0
- package/dist/code-intelligence/visualization/MermaidGenerator.d.ts +79 -0
- package/dist/code-intelligence/visualization/MermaidGenerator.d.ts.map +1 -1
- package/dist/code-intelligence/visualization/MermaidGenerator.js +143 -0
- package/dist/code-intelligence/visualization/MermaidGenerator.js.map +1 -1
- package/dist/config/ConfigLoader.d.ts +86 -0
- package/dist/config/ConfigLoader.d.ts.map +1 -0
- package/dist/config/ConfigLoader.js +450 -0
- package/dist/config/ConfigLoader.js.map +1 -0
- package/dist/config/ProviderConfig.d.ts +153 -0
- package/dist/config/ProviderConfig.d.ts.map +1 -0
- package/dist/config/ProviderConfig.js +155 -0
- package/dist/config/ProviderConfig.js.map +1 -0
- package/dist/config/index.d.ts +35 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +45 -0
- package/dist/config/index.js.map +1 -0
- package/dist/core/memory/HNSWVectorMemory.js +1 -1
- package/dist/mcp/handlers/integration/integration-test-orchestrate.d.ts.map +1 -1
- package/dist/mcp/handlers/integration/integration-test-orchestrate.js +6 -9
- package/dist/mcp/handlers/integration/integration-test-orchestrate.js.map +1 -1
- package/dist/mcp/server-instructions.d.ts +1 -1
- package/dist/mcp/server-instructions.js +1 -1
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.d.ts.map +1 -1
- package/dist/memory/HNSWPatternStore.d.ts.map +1 -1
- package/dist/memory/HNSWPatternStore.js +23 -0
- package/dist/memory/HNSWPatternStore.js.map +1 -1
- package/dist/memory/RuVectorPatternStore.d.ts +5 -0
- package/dist/memory/RuVectorPatternStore.d.ts.map +1 -1
- package/dist/memory/RuVectorPatternStore.js +11 -0
- package/dist/memory/RuVectorPatternStore.js.map +1 -1
- package/dist/providers/CostOptimizationStrategies.d.ts +297 -0
- package/dist/providers/CostOptimizationStrategies.d.ts.map +1 -0
- package/dist/providers/CostOptimizationStrategies.js +831 -0
- package/dist/providers/CostOptimizationStrategies.js.map +1 -0
- package/dist/providers/HybridRouter.d.ts +142 -5
- package/dist/providers/HybridRouter.d.ts.map +1 -1
- package/dist/providers/HybridRouter.js +472 -6
- package/dist/providers/HybridRouter.js.map +1 -1
- package/dist/providers/HybridRouterComplexityIntegration.d.ts +169 -0
- package/dist/providers/HybridRouterComplexityIntegration.d.ts.map +1 -0
- package/dist/providers/HybridRouterComplexityIntegration.js +319 -0
- package/dist/providers/HybridRouterComplexityIntegration.js.map +1 -0
- package/dist/providers/HybridRouterModelSelection.d.ts +106 -0
- package/dist/providers/HybridRouterModelSelection.d.ts.map +1 -0
- package/dist/providers/HybridRouterModelSelection.js +420 -0
- package/dist/providers/HybridRouterModelSelection.js.map +1 -0
- package/dist/providers/LLMProviderFactory.d.ts +23 -9
- package/dist/providers/LLMProviderFactory.d.ts.map +1 -1
- package/dist/providers/LLMProviderFactory.js +54 -11
- package/dist/providers/LLMProviderFactory.js.map +1 -1
- package/dist/providers/OllamaProvider.d.ts +122 -0
- package/dist/providers/OllamaProvider.d.ts.map +1 -0
- package/dist/providers/OllamaProvider.js +425 -0
- package/dist/providers/OllamaProvider.js.map +1 -0
- package/dist/providers/index.d.ts +6 -1
- package/dist/providers/index.d.ts.map +1 -1
- package/dist/providers/index.js +17 -1
- package/dist/providers/index.js.map +1 -1
- package/dist/routing/ComplexityClassifier.d.ts +266 -0
- package/dist/routing/ComplexityClassifier.d.ts.map +1 -0
- package/dist/routing/ComplexityClassifier.js +567 -0
- package/dist/routing/ComplexityClassifier.js.map +1 -0
- package/dist/routing/ModelCapabilityRegistry.d.ts +98 -0
- package/dist/routing/ModelCapabilityRegistry.d.ts.map +1 -0
- package/dist/routing/ModelCapabilityRegistry.js +216 -0
- package/dist/routing/ModelCapabilityRegistry.js.map +1 -0
- package/dist/routing/index.d.ts +13 -0
- package/dist/routing/index.d.ts.map +1 -0
- package/dist/routing/index.js +24 -0
- package/dist/routing/index.js.map +1 -0
- package/docs/reference/model-capability-registry.md +402 -0
- package/docs/reference/provider-config-schema.md +608 -0
- package/package.json +20 -4
|
@@ -0,0 +1,831 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Cost Optimization Strategies for LLM Independence
|
|
4
|
+
*
|
|
5
|
+
* Provides intelligent cost optimization through:
|
|
6
|
+
* - Prompt compression (6-10% token savings)
|
|
7
|
+
* - Request batching for bulk operations
|
|
8
|
+
* - Smart caching with task-specific TTLs
|
|
9
|
+
* - Model right-sizing based on budget and complexity
|
|
10
|
+
*
|
|
11
|
+
* Designed as standalone utilities for integration with HybridRouter.
|
|
12
|
+
*
|
|
13
|
+
* @module providers/CostOptimizationStrategies
|
|
14
|
+
* @version 1.0.0
|
|
15
|
+
*/
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
exports.CostOptimizationManager = exports.ModelRightSizer = exports.SmartCacheStrategy = exports.RequestBatcher = exports.PromptCompressor = void 0;
|
|
18
|
+
const HybridRouter_1 = require("./HybridRouter");
|
|
19
|
+
const Logger_1 = require("../utils/Logger");
|
|
20
|
+
/**
|
|
21
|
+
* PromptCompressor - Reduce token usage through intelligent compression
|
|
22
|
+
*
|
|
23
|
+
* Achieves 6-10% token savings through:
|
|
24
|
+
* - Redundant whitespace removal
|
|
25
|
+
* - Common pattern abbreviation
|
|
26
|
+
* - Token-efficient phrasing
|
|
27
|
+
*/
|
|
28
|
+
class PromptCompressor {
|
|
29
|
+
constructor(config = {}) {
|
|
30
|
+
this.logger = Logger_1.Logger.getInstance();
|
|
31
|
+
this.config = {
|
|
32
|
+
debug: config.debug ?? false,
|
|
33
|
+
enableCompression: config.enableCompression ?? true,
|
|
34
|
+
enableBatching: config.enableBatching ?? true,
|
|
35
|
+
enableSmartCaching: config.enableSmartCaching ?? true,
|
|
36
|
+
minCompressionRatio: config.minCompressionRatio ?? 0.05,
|
|
37
|
+
maxBatchSize: config.maxBatchSize ?? 10,
|
|
38
|
+
defaultCacheTTL: config.defaultCacheTTL ?? 3600
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Compress whitespace - remove redundant spaces, newlines
|
|
43
|
+
*/
|
|
44
|
+
compressWhitespace(prompt) {
|
|
45
|
+
return prompt
|
|
46
|
+
// Replace multiple spaces with single space
|
|
47
|
+
.replace(/ +/g, ' ')
|
|
48
|
+
// Replace multiple newlines with double newline (preserve paragraph breaks)
|
|
49
|
+
.replace(/\n\n\n+/g, '\n\n')
|
|
50
|
+
// Trim lines
|
|
51
|
+
.split('\n')
|
|
52
|
+
.map(line => line.trim())
|
|
53
|
+
.join('\n')
|
|
54
|
+
// Remove trailing/leading whitespace
|
|
55
|
+
.trim();
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Abbreviate common patterns to save tokens
|
|
59
|
+
*/
|
|
60
|
+
abbreviatePatterns(prompt) {
|
|
61
|
+
const abbreviations = [
|
|
62
|
+
// Code-related patterns
|
|
63
|
+
{ pattern: /\bfunction\b/gi, replacement: 'fn' },
|
|
64
|
+
{ pattern: /\bconstant\b/gi, replacement: 'const' },
|
|
65
|
+
{ pattern: /\bvariable\b/gi, replacement: 'var' },
|
|
66
|
+
{ pattern: /\bparameter\b/gi, replacement: 'param' },
|
|
67
|
+
{ pattern: /\bargument\b/gi, replacement: 'arg' },
|
|
68
|
+
{ pattern: /\bconfiguration\b/gi, replacement: 'config' },
|
|
69
|
+
{ pattern: /\benvironment\b/gi, replacement: 'env' },
|
|
70
|
+
{ pattern: /\bdocumentation\b/gi, replacement: 'docs' },
|
|
71
|
+
{ pattern: /\bapplication\b/gi, replacement: 'app' },
|
|
72
|
+
{ pattern: /\bdatabase\b/gi, replacement: 'db' },
|
|
73
|
+
{ pattern: /\brepository\b/gi, replacement: 'repo' },
|
|
74
|
+
// Test-related patterns
|
|
75
|
+
{ pattern: /\btest case\b/gi, replacement: 'test' },
|
|
76
|
+
{ pattern: /\bunit test\b/gi, replacement: 'unit' },
|
|
77
|
+
{ pattern: /\bintegration test\b/gi, replacement: 'integration' },
|
|
78
|
+
// Common phrases
|
|
79
|
+
{ pattern: /\bfor example\b/gi, replacement: 'e.g.' },
|
|
80
|
+
{ pattern: /\bthat is\b/gi, replacement: 'i.e.' },
|
|
81
|
+
{ pattern: /\band so on\b/gi, replacement: 'etc.' }
|
|
82
|
+
];
|
|
83
|
+
let compressed = prompt;
|
|
84
|
+
const appliedAbbreviations = [];
|
|
85
|
+
for (const { pattern, replacement } of abbreviations) {
|
|
86
|
+
const before = compressed;
|
|
87
|
+
compressed = compressed.replace(pattern, replacement);
|
|
88
|
+
if (compressed !== before) {
|
|
89
|
+
appliedAbbreviations.push(`${pattern.source} → ${replacement}`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (this.config.debug && appliedAbbreviations.length > 0) {
|
|
93
|
+
this.logger.debug('Applied abbreviations', {
|
|
94
|
+
count: appliedAbbreviations.length,
|
|
95
|
+
examples: appliedAbbreviations.slice(0, 3)
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
return compressed;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Optimize token usage through efficient phrasing
|
|
102
|
+
*/
|
|
103
|
+
optimizeTokenUsage(prompt) {
|
|
104
|
+
let optimized = prompt;
|
|
105
|
+
// Remove filler words that don't add meaning
|
|
106
|
+
const fillerWords = [
|
|
107
|
+
/\bvery\s+/gi,
|
|
108
|
+
/\breally\s+/gi,
|
|
109
|
+
/\bactually\s+/gi,
|
|
110
|
+
/\bjust\s+/gi,
|
|
111
|
+
/\bbasically\s+/gi,
|
|
112
|
+
/\bsimply\s+/gi
|
|
113
|
+
];
|
|
114
|
+
fillerWords.forEach(pattern => {
|
|
115
|
+
optimized = optimized.replace(pattern, '');
|
|
116
|
+
});
|
|
117
|
+
// Simplify verbose phrases
|
|
118
|
+
const verbosePatterns = [
|
|
119
|
+
{ pattern: /\bin order to\b/gi, replacement: 'to' },
|
|
120
|
+
{ pattern: /\bdue to the fact that\b/gi, replacement: 'because' },
|
|
121
|
+
{ pattern: /\bat this point in time\b/gi, replacement: 'now' },
|
|
122
|
+
{ pattern: /\bin the event that\b/gi, replacement: 'if' },
|
|
123
|
+
{ pattern: /\bprior to\b/gi, replacement: 'before' },
|
|
124
|
+
{ pattern: /\bsubsequent to\b/gi, replacement: 'after' }
|
|
125
|
+
];
|
|
126
|
+
verbosePatterns.forEach(({ pattern, replacement }) => {
|
|
127
|
+
optimized = optimized.replace(pattern, replacement);
|
|
128
|
+
});
|
|
129
|
+
return optimized;
|
|
130
|
+
}
|
|
131
|
+
/**
|
|
132
|
+
* Full compression pipeline
|
|
133
|
+
*/
|
|
134
|
+
compress(prompt) {
|
|
135
|
+
if (!this.config.enableCompression) {
|
|
136
|
+
return {
|
|
137
|
+
compressed: prompt,
|
|
138
|
+
original: prompt,
|
|
139
|
+
tokensSaved: 0,
|
|
140
|
+
ratio: 0,
|
|
141
|
+
techniques: []
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
const original = prompt;
|
|
145
|
+
const techniques = [];
|
|
146
|
+
// Apply compression techniques
|
|
147
|
+
let compressed = this.compressWhitespace(prompt);
|
|
148
|
+
techniques.push('whitespace');
|
|
149
|
+
compressed = this.abbreviatePatterns(compressed);
|
|
150
|
+
techniques.push('abbreviations');
|
|
151
|
+
compressed = this.optimizeTokenUsage(compressed);
|
|
152
|
+
techniques.push('token-optimization');
|
|
153
|
+
// Calculate savings (rough estimate: 1 token ≈ 4 characters)
|
|
154
|
+
const originalTokens = Math.ceil(original.length / 4);
|
|
155
|
+
const compressedTokens = Math.ceil(compressed.length / 4);
|
|
156
|
+
const tokensSaved = Math.max(0, originalTokens - compressedTokens);
|
|
157
|
+
const ratio = originalTokens > 0 ? tokensSaved / originalTokens : 0;
|
|
158
|
+
// Only return compressed version if meets minimum ratio
|
|
159
|
+
if (ratio < this.config.minCompressionRatio) {
|
|
160
|
+
if (this.config.debug) {
|
|
161
|
+
this.logger.debug('Compression below threshold, using original', {
|
|
162
|
+
ratio: ratio.toFixed(3),
|
|
163
|
+
threshold: this.config.minCompressionRatio
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
return {
|
|
167
|
+
compressed: original,
|
|
168
|
+
original,
|
|
169
|
+
tokensSaved: 0,
|
|
170
|
+
ratio: 0,
|
|
171
|
+
techniques: []
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
if (this.config.debug) {
|
|
175
|
+
this.logger.debug('Prompt compressed', {
|
|
176
|
+
originalLength: original.length,
|
|
177
|
+
compressedLength: compressed.length,
|
|
178
|
+
tokensSaved,
|
|
179
|
+
ratio: ratio.toFixed(3),
|
|
180
|
+
techniques
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
return {
|
|
184
|
+
compressed,
|
|
185
|
+
original,
|
|
186
|
+
tokensSaved,
|
|
187
|
+
ratio,
|
|
188
|
+
techniques
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Compress LLM completion options
|
|
193
|
+
*/
|
|
194
|
+
compressOptions(options) {
|
|
195
|
+
// Compress message content
|
|
196
|
+
const compressedMessages = options.messages.map(msg => {
|
|
197
|
+
if (typeof msg.content === 'string') {
|
|
198
|
+
const result = this.compress(msg.content);
|
|
199
|
+
return { ...msg, content: result.compressed };
|
|
200
|
+
}
|
|
201
|
+
// Handle array content (compress text blocks)
|
|
202
|
+
const compressedContent = msg.content.map(block => {
|
|
203
|
+
if (block.type === 'text' && block.text) {
|
|
204
|
+
const result = this.compress(block.text);
|
|
205
|
+
return { ...block, text: result.compressed };
|
|
206
|
+
}
|
|
207
|
+
return block;
|
|
208
|
+
});
|
|
209
|
+
return { ...msg, content: compressedContent };
|
|
210
|
+
});
|
|
211
|
+
// Compress system prompts
|
|
212
|
+
const compressedSystem = options.system?.map(sys => {
|
|
213
|
+
const result = this.compress(sys.text);
|
|
214
|
+
return { ...sys, text: result.compressed };
|
|
215
|
+
});
|
|
216
|
+
// Calculate overall compression
|
|
217
|
+
const originalText = this.extractAllText(options);
|
|
218
|
+
const compressedText = this.extractAllText({
|
|
219
|
+
...options,
|
|
220
|
+
messages: compressedMessages,
|
|
221
|
+
system: compressedSystem
|
|
222
|
+
});
|
|
223
|
+
const originalTokens = Math.ceil(originalText.length / 4);
|
|
224
|
+
const compressedTokens = Math.ceil(compressedText.length / 4);
|
|
225
|
+
const compressionResult = {
|
|
226
|
+
compressed: compressedText,
|
|
227
|
+
original: originalText,
|
|
228
|
+
tokensSaved: originalTokens - compressedTokens,
|
|
229
|
+
ratio: originalTokens > 0 ? (originalTokens - compressedTokens) / originalTokens : 0,
|
|
230
|
+
techniques: ['full-options-compression']
|
|
231
|
+
};
|
|
232
|
+
return {
|
|
233
|
+
options: {
|
|
234
|
+
...options,
|
|
235
|
+
messages: compressedMessages,
|
|
236
|
+
system: compressedSystem
|
|
237
|
+
},
|
|
238
|
+
result: compressionResult
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Extract all text from options
|
|
243
|
+
*/
|
|
244
|
+
extractAllText(options) {
|
|
245
|
+
const parts = [];
|
|
246
|
+
if (options.system) {
|
|
247
|
+
parts.push(...options.system.map(s => s.text));
|
|
248
|
+
}
|
|
249
|
+
options.messages.forEach(msg => {
|
|
250
|
+
if (typeof msg.content === 'string') {
|
|
251
|
+
parts.push(msg.content);
|
|
252
|
+
}
|
|
253
|
+
else {
|
|
254
|
+
parts.push(...msg.content.filter(c => c.type === 'text').map(c => c.text || ''));
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
return parts.join('\n');
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
exports.PromptCompressor = PromptCompressor;
|
|
261
|
+
/**
|
|
262
|
+
* RequestBatcher - Batch similar requests for cost efficiency
|
|
263
|
+
*/
|
|
264
|
+
class RequestBatcher {
|
|
265
|
+
constructor(config = {}) {
|
|
266
|
+
this.logger = Logger_1.Logger.getInstance();
|
|
267
|
+
this.config = {
|
|
268
|
+
debug: config.debug ?? false,
|
|
269
|
+
enableCompression: config.enableCompression ?? true,
|
|
270
|
+
enableBatching: config.enableBatching ?? true,
|
|
271
|
+
enableSmartCaching: config.enableSmartCaching ?? true,
|
|
272
|
+
minCompressionRatio: config.minCompressionRatio ?? 0.05,
|
|
273
|
+
maxBatchSize: config.maxBatchSize ?? 10,
|
|
274
|
+
defaultCacheTTL: config.defaultCacheTTL ?? 3600
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Group similar requests for batch processing
|
|
279
|
+
*/
|
|
280
|
+
groupSimilarRequests(requests) {
|
|
281
|
+
if (!this.config.enableBatching || requests.length === 0) {
|
|
282
|
+
return [];
|
|
283
|
+
}
|
|
284
|
+
const groups = new Map();
|
|
285
|
+
// Group by similarity characteristics
|
|
286
|
+
requests.forEach(request => {
|
|
287
|
+
const groupKey = this.calculateGroupKey(request);
|
|
288
|
+
const existing = groups.get(groupKey) || [];
|
|
289
|
+
existing.push(request);
|
|
290
|
+
groups.set(groupKey, existing);
|
|
291
|
+
});
|
|
292
|
+
// Convert to RequestGroup objects
|
|
293
|
+
const requestGroups = [];
|
|
294
|
+
groups.forEach((groupRequests, groupId) => {
|
|
295
|
+
// Only create groups with 2+ requests
|
|
296
|
+
if (groupRequests.length < 2) {
|
|
297
|
+
return;
|
|
298
|
+
}
|
|
299
|
+
// Limit group size
|
|
300
|
+
const batches = this.splitIntoBatches(groupRequests, this.config.maxBatchSize);
|
|
301
|
+
batches.forEach((batch, index) => {
|
|
302
|
+
const characteristics = this.analyzeGroupCharacteristics(batch);
|
|
303
|
+
const estimatedSavings = this.estimateBatchSavings(batch);
|
|
304
|
+
requestGroups.push({
|
|
305
|
+
groupId: `${groupId}-${index}`,
|
|
306
|
+
requests: batch,
|
|
307
|
+
characteristics,
|
|
308
|
+
estimatedSavings
|
|
309
|
+
});
|
|
310
|
+
});
|
|
311
|
+
});
|
|
312
|
+
if (this.config.debug) {
|
|
313
|
+
this.logger.debug('Requests grouped for batching', {
|
|
314
|
+
totalRequests: requests.length,
|
|
315
|
+
groups: requestGroups.length,
|
|
316
|
+
largestGroup: Math.max(...requestGroups.map(g => g.requests.length))
|
|
317
|
+
});
|
|
318
|
+
}
|
|
319
|
+
return requestGroups;
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* Estimate savings from batching
|
|
323
|
+
*/
|
|
324
|
+
estimateBatchSavings(requests) {
|
|
325
|
+
if (requests.length < 2) {
|
|
326
|
+
return 0;
|
|
327
|
+
}
|
|
328
|
+
// Rough estimate: batching saves ~15% on overhead per request after first
|
|
329
|
+
const avgCostPerRequest = 0.001; // $0.001 per request estimate
|
|
330
|
+
const overheadSavings = (requests.length - 1) * avgCostPerRequest * 0.15;
|
|
331
|
+
return overheadSavings;
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Calculate group key for similarity
|
|
335
|
+
*/
|
|
336
|
+
calculateGroupKey(request) {
|
|
337
|
+
const parts = [];
|
|
338
|
+
// Model
|
|
339
|
+
parts.push(request.model);
|
|
340
|
+
// Temperature (rounded)
|
|
341
|
+
const temp = request.temperature ?? 0.7;
|
|
342
|
+
parts.push(`temp-${Math.round(temp * 10)}`);
|
|
343
|
+
// Max tokens (bucketed)
|
|
344
|
+
const maxTokens = request.maxTokens ?? 1024;
|
|
345
|
+
const tokenBucket = Math.floor(maxTokens / 1000) * 1000;
|
|
346
|
+
parts.push(`tokens-${tokenBucket}`);
|
|
347
|
+
// Has system prompt
|
|
348
|
+
parts.push(request.system && request.system.length > 0 ? 'sys' : 'nosys');
|
|
349
|
+
// Content characteristics
|
|
350
|
+
const allContent = this.extractAllText(request);
|
|
351
|
+
const hasCode = /```/.test(allContent);
|
|
352
|
+
parts.push(hasCode ? 'code' : 'text');
|
|
353
|
+
return parts.join('|');
|
|
354
|
+
}
|
|
355
|
+
/**
|
|
356
|
+
* Analyze group characteristics
|
|
357
|
+
*/
|
|
358
|
+
analyzeGroupCharacteristics(requests) {
|
|
359
|
+
const allContent = requests.map(r => this.extractAllText(r)).join('\n');
|
|
360
|
+
const totalLength = allContent.length;
|
|
361
|
+
const averageTokens = Math.ceil(totalLength / (requests.length * 4));
|
|
362
|
+
const hasCode = /```/.test(allContent);
|
|
363
|
+
return {
|
|
364
|
+
averageTokens,
|
|
365
|
+
hasCode
|
|
366
|
+
};
|
|
367
|
+
}
|
|
368
|
+
/**
|
|
369
|
+
* Split requests into batches
|
|
370
|
+
*/
|
|
371
|
+
splitIntoBatches(requests, maxSize) {
|
|
372
|
+
const batches = [];
|
|
373
|
+
for (let i = 0; i < requests.length; i += maxSize) {
|
|
374
|
+
batches.push(requests.slice(i, i + maxSize));
|
|
375
|
+
}
|
|
376
|
+
return batches;
|
|
377
|
+
}
|
|
378
|
+
/**
|
|
379
|
+
* Extract all text from request
|
|
380
|
+
*/
|
|
381
|
+
extractAllText(request) {
|
|
382
|
+
const parts = [];
|
|
383
|
+
if (request.system) {
|
|
384
|
+
parts.push(...request.system.map(s => s.text));
|
|
385
|
+
}
|
|
386
|
+
request.messages.forEach(msg => {
|
|
387
|
+
if (typeof msg.content === 'string') {
|
|
388
|
+
parts.push(msg.content);
|
|
389
|
+
}
|
|
390
|
+
else {
|
|
391
|
+
parts.push(...msg.content.filter(c => c.type === 'text').map(c => c.text || ''));
|
|
392
|
+
}
|
|
393
|
+
});
|
|
394
|
+
return parts.join('\n');
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
exports.RequestBatcher = RequestBatcher;
|
|
398
|
+
/**
|
|
399
|
+
* SmartCacheStrategy - Task-specific caching strategies
|
|
400
|
+
*/
|
|
401
|
+
class SmartCacheStrategy {
|
|
402
|
+
constructor(config = {}) {
|
|
403
|
+
this.logger = Logger_1.Logger.getInstance();
|
|
404
|
+
this.config = {
|
|
405
|
+
debug: config.debug ?? false,
|
|
406
|
+
enableCompression: config.enableCompression ?? true,
|
|
407
|
+
enableBatching: config.enableBatching ?? true,
|
|
408
|
+
enableSmartCaching: config.enableSmartCaching ?? true,
|
|
409
|
+
minCompressionRatio: config.minCompressionRatio ?? 0.05,
|
|
410
|
+
maxBatchSize: config.maxBatchSize ?? 10,
|
|
411
|
+
defaultCacheTTL: config.defaultCacheTTL ?? 3600
|
|
412
|
+
};
|
|
413
|
+
this.strategies = new Map();
|
|
414
|
+
this.initializeDefaultStrategies();
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Get cache strategy for task type
|
|
418
|
+
*/
|
|
419
|
+
getCacheStrategy(taskType) {
|
|
420
|
+
const strategy = this.strategies.get(taskType);
|
|
421
|
+
if (!strategy) {
|
|
422
|
+
// Return default strategy
|
|
423
|
+
return {
|
|
424
|
+
taskType,
|
|
425
|
+
ttlSeconds: this.config.defaultCacheTTL,
|
|
426
|
+
aggressive: false,
|
|
427
|
+
confidenceThreshold: 0.85,
|
|
428
|
+
keyGenerator: this.defaultKeyGenerator.bind(this)
|
|
429
|
+
};
|
|
430
|
+
}
|
|
431
|
+
return strategy;
|
|
432
|
+
}
|
|
433
|
+
/**
|
|
434
|
+
* Check if result should be cached
|
|
435
|
+
*/
|
|
436
|
+
shouldCache(options, response) {
|
|
437
|
+
if (!this.config.enableSmartCaching) {
|
|
438
|
+
return false;
|
|
439
|
+
}
|
|
440
|
+
// Don't cache errors
|
|
441
|
+
if (response.stop_reason !== 'end_turn') {
|
|
442
|
+
return false;
|
|
443
|
+
}
|
|
444
|
+
// Don't cache streaming responses
|
|
445
|
+
if (options.stream) {
|
|
446
|
+
return false;
|
|
447
|
+
}
|
|
448
|
+
// Don't cache very short responses (likely errors or incomplete)
|
|
449
|
+
const responseText = response.content.map(c => c.text).join('');
|
|
450
|
+
if (responseText.length < 50) {
|
|
451
|
+
return false;
|
|
452
|
+
}
|
|
453
|
+
// Don't cache very expensive responses (might be one-off)
|
|
454
|
+
const totalTokens = response.usage.input_tokens + response.usage.output_tokens;
|
|
455
|
+
if (totalTokens > 10000) {
|
|
456
|
+
return false;
|
|
457
|
+
}
|
|
458
|
+
return true;
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* Generate cache key for request
|
|
462
|
+
*/
|
|
463
|
+
generateCacheKey(options, taskType) {
|
|
464
|
+
if (taskType) {
|
|
465
|
+
const strategy = this.getCacheStrategy(taskType);
|
|
466
|
+
return strategy.keyGenerator(options);
|
|
467
|
+
}
|
|
468
|
+
return this.defaultKeyGenerator(options);
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* Default cache key generator
|
|
472
|
+
*/
|
|
473
|
+
defaultKeyGenerator(options) {
|
|
474
|
+
const parts = [];
|
|
475
|
+
// Model
|
|
476
|
+
parts.push(options.model);
|
|
477
|
+
// Extract query (last user message)
|
|
478
|
+
const userMessages = options.messages.filter(m => m.role === 'user');
|
|
479
|
+
const lastUserMsg = userMessages[userMessages.length - 1];
|
|
480
|
+
if (lastUserMsg) {
|
|
481
|
+
const content = typeof lastUserMsg.content === 'string'
|
|
482
|
+
? lastUserMsg.content
|
|
483
|
+
: lastUserMsg.content.filter(c => c.type === 'text').map(c => c.text).join('');
|
|
484
|
+
// Create hash from content (simple hash for cache key)
|
|
485
|
+
const hash = this.simpleHash(content);
|
|
486
|
+
parts.push(hash);
|
|
487
|
+
}
|
|
488
|
+
// Temperature
|
|
489
|
+
const temp = options.temperature ?? 0.7;
|
|
490
|
+
parts.push(`t${Math.round(temp * 10)}`);
|
|
491
|
+
return parts.join(':');
|
|
492
|
+
}
|
|
493
|
+
/**
|
|
494
|
+
* Simple hash function for cache keys
|
|
495
|
+
*/
|
|
496
|
+
simpleHash(str) {
|
|
497
|
+
let hash = 0;
|
|
498
|
+
for (let i = 0; i < str.length; i++) {
|
|
499
|
+
const char = str.charCodeAt(i);
|
|
500
|
+
hash = ((hash << 5) - hash) + char;
|
|
501
|
+
hash = hash & hash; // Convert to 32-bit integer
|
|
502
|
+
}
|
|
503
|
+
return Math.abs(hash).toString(36);
|
|
504
|
+
}
|
|
505
|
+
/**
|
|
506
|
+
* Initialize default caching strategies
|
|
507
|
+
*/
|
|
508
|
+
initializeDefaultStrategies() {
|
|
509
|
+
// Test generation - moderate caching (tests change frequently)
|
|
510
|
+
this.strategies.set('test-generation', {
|
|
511
|
+
taskType: 'test-generation',
|
|
512
|
+
ttlSeconds: 1800, // 30 minutes
|
|
513
|
+
aggressive: false,
|
|
514
|
+
confidenceThreshold: 0.90,
|
|
515
|
+
keyGenerator: this.testGenerationKeyGenerator.bind(this)
|
|
516
|
+
});
|
|
517
|
+
// Coverage analysis - aggressive caching (code doesn't change often)
|
|
518
|
+
this.strategies.set('coverage-analysis', {
|
|
519
|
+
taskType: 'coverage-analysis',
|
|
520
|
+
ttlSeconds: 7200, // 2 hours
|
|
521
|
+
aggressive: true,
|
|
522
|
+
confidenceThreshold: 0.80,
|
|
523
|
+
keyGenerator: this.defaultKeyGenerator.bind(this)
|
|
524
|
+
});
|
|
525
|
+
// Code review - moderate caching
|
|
526
|
+
this.strategies.set('code-review', {
|
|
527
|
+
taskType: 'code-review',
|
|
528
|
+
ttlSeconds: 3600, // 1 hour
|
|
529
|
+
aggressive: false,
|
|
530
|
+
confidenceThreshold: 0.85,
|
|
531
|
+
keyGenerator: this.defaultKeyGenerator.bind(this)
|
|
532
|
+
});
|
|
533
|
+
// Bug detection - conservative caching (need fresh analysis)
|
|
534
|
+
this.strategies.set('bug-detection', {
|
|
535
|
+
taskType: 'bug-detection',
|
|
536
|
+
ttlSeconds: 900, // 15 minutes
|
|
537
|
+
aggressive: false,
|
|
538
|
+
confidenceThreshold: 0.92,
|
|
539
|
+
keyGenerator: this.defaultKeyGenerator.bind(this)
|
|
540
|
+
});
|
|
541
|
+
// Documentation - very aggressive caching (stable content)
|
|
542
|
+
this.strategies.set('documentation', {
|
|
543
|
+
taskType: 'documentation',
|
|
544
|
+
ttlSeconds: 14400, // 4 hours
|
|
545
|
+
aggressive: true,
|
|
546
|
+
confidenceThreshold: 0.75,
|
|
547
|
+
keyGenerator: this.defaultKeyGenerator.bind(this)
|
|
548
|
+
});
|
|
549
|
+
// Refactoring - conservative caching
|
|
550
|
+
this.strategies.set('refactoring', {
|
|
551
|
+
taskType: 'refactoring',
|
|
552
|
+
ttlSeconds: 1800, // 30 minutes
|
|
553
|
+
aggressive: false,
|
|
554
|
+
confidenceThreshold: 0.88,
|
|
555
|
+
keyGenerator: this.defaultKeyGenerator.bind(this)
|
|
556
|
+
});
|
|
557
|
+
// Performance testing - moderate caching
|
|
558
|
+
this.strategies.set('performance-testing', {
|
|
559
|
+
taskType: 'performance-testing',
|
|
560
|
+
ttlSeconds: 3600, // 1 hour
|
|
561
|
+
aggressive: false,
|
|
562
|
+
confidenceThreshold: 0.85,
|
|
563
|
+
keyGenerator: this.defaultKeyGenerator.bind(this)
|
|
564
|
+
});
|
|
565
|
+
// Security scanning - conservative caching (security landscape changes)
|
|
566
|
+
this.strategies.set('security-scanning', {
|
|
567
|
+
taskType: 'security-scanning',
|
|
568
|
+
ttlSeconds: 1800, // 30 minutes
|
|
569
|
+
aggressive: false,
|
|
570
|
+
confidenceThreshold: 0.90,
|
|
571
|
+
keyGenerator: this.defaultKeyGenerator.bind(this)
|
|
572
|
+
});
|
|
573
|
+
}
|
|
574
|
+
/**
|
|
575
|
+
* Specialized key generator for test generation
|
|
576
|
+
*/
|
|
577
|
+
testGenerationKeyGenerator(options) {
|
|
578
|
+
const parts = ['test-gen'];
|
|
579
|
+
// Extract source code being tested (if in messages)
|
|
580
|
+
const allContent = options.messages
|
|
581
|
+
.filter(m => m.role === 'user')
|
|
582
|
+
.map(m => typeof m.content === 'string' ? m.content : m.content.map(c => c.text || '').join(''))
|
|
583
|
+
.join('\n');
|
|
584
|
+
// Look for code blocks
|
|
585
|
+
const codeMatches = allContent.match(/```[\s\S]*?```/g);
|
|
586
|
+
if (codeMatches && codeMatches.length > 0) {
|
|
587
|
+
// Hash the first code block (likely the source under test)
|
|
588
|
+
const hash = this.simpleHash(codeMatches[0]);
|
|
589
|
+
parts.push(hash);
|
|
590
|
+
}
|
|
591
|
+
else {
|
|
592
|
+
// Hash all content
|
|
593
|
+
const hash = this.simpleHash(allContent);
|
|
594
|
+
parts.push(hash);
|
|
595
|
+
}
|
|
596
|
+
return parts.join(':');
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
exports.SmartCacheStrategy = SmartCacheStrategy;
|
|
600
|
+
/**
|
|
601
|
+
* ModelRightSizer - Budget-aware model selection
|
|
602
|
+
*/
|
|
603
|
+
class ModelRightSizer {
|
|
604
|
+
constructor() {
|
|
605
|
+
this.logger = Logger_1.Logger.getInstance();
|
|
606
|
+
}
|
|
607
|
+
/**
|
|
608
|
+
* Determine if should use smaller model based on budget
|
|
609
|
+
*/
|
|
610
|
+
shouldDowngradeModel(complexity, budgetStatus, constraints) {
|
|
611
|
+
// If not over budget and no alert, no need to downgrade
|
|
612
|
+
if (!budgetStatus.isOverBudget && !budgetStatus.alertTriggered) {
|
|
613
|
+
return {
|
|
614
|
+
shouldDowngrade: false,
|
|
615
|
+
reason: 'Budget healthy, no downgrade needed',
|
|
616
|
+
qualityImpact: 1.0
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
// Calculate budget pressure (0-1, higher = more pressure)
|
|
620
|
+
const budgetPressure = this.calculateBudgetPressure(budgetStatus);
|
|
621
|
+
// Determine if task allows downgrade
|
|
622
|
+
const downgradePossible = this.canDowngrade(complexity, constraints);
|
|
623
|
+
if (!downgradePossible) {
|
|
624
|
+
return {
|
|
625
|
+
shouldDowngrade: false,
|
|
626
|
+
reason: 'Task complexity requires current model tier',
|
|
627
|
+
qualityImpact: 1.0
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
// Decide based on budget pressure and complexity
|
|
631
|
+
const { shouldDowngrade, recommendedTier, qualityImpact } = this.makeDowngradeDecision(complexity, budgetPressure, budgetStatus);
|
|
632
|
+
if (!shouldDowngrade) {
|
|
633
|
+
return {
|
|
634
|
+
shouldDowngrade: false,
|
|
635
|
+
reason: 'Budget pressure not severe enough to warrant downgrade',
|
|
636
|
+
qualityImpact: 1.0
|
|
637
|
+
};
|
|
638
|
+
}
|
|
639
|
+
// Calculate estimated savings
|
|
640
|
+
const estimatedSavings = this.estimateSavings(complexity, recommendedTier);
|
|
641
|
+
return {
|
|
642
|
+
shouldDowngrade: true,
|
|
643
|
+
recommendedModel: this.getModelForTier(recommendedTier),
|
|
644
|
+
reason: this.getDowngradeReason(budgetPressure, budgetStatus),
|
|
645
|
+
estimatedSavings,
|
|
646
|
+
qualityImpact
|
|
647
|
+
};
|
|
648
|
+
}
|
|
649
|
+
/**
|
|
650
|
+
* Calculate budget pressure score (0-1)
|
|
651
|
+
*/
|
|
652
|
+
calculateBudgetPressure(budgetStatus) {
|
|
653
|
+
if (budgetStatus.isOverBudget) {
|
|
654
|
+
return 1.0; // Maximum pressure
|
|
655
|
+
}
|
|
656
|
+
// Use utilization percentage as pressure indicator
|
|
657
|
+
return Math.min(budgetStatus.utilizationPercentage / 100, 0.95);
|
|
658
|
+
}
|
|
659
|
+
/**
|
|
660
|
+
* Check if task allows model downgrade
|
|
661
|
+
*/
|
|
662
|
+
canDowngrade(complexity, constraints) {
|
|
663
|
+
// Very complex tasks should not be downgraded
|
|
664
|
+
if (complexity === HybridRouter_1.TaskComplexity.VERY_COMPLEX) {
|
|
665
|
+
return false;
|
|
666
|
+
}
|
|
667
|
+
// Check constraints
|
|
668
|
+
if (constraints?.requiredCapabilities && constraints.requiredCapabilities.length > 0) {
|
|
669
|
+
// If specific capabilities required, be conservative
|
|
670
|
+
return complexity === HybridRouter_1.TaskComplexity.SIMPLE;
|
|
671
|
+
}
|
|
672
|
+
return true;
|
|
673
|
+
}
|
|
674
|
+
/**
|
|
675
|
+
* Make downgrade decision
|
|
676
|
+
*/
|
|
677
|
+
makeDowngradeDecision(complexity, budgetPressure, budgetStatus) {
|
|
678
|
+
// Over budget - always downgrade if possible
|
|
679
|
+
if (budgetStatus.isOverBudget) {
|
|
680
|
+
return {
|
|
681
|
+
shouldDowngrade: true,
|
|
682
|
+
recommendedTier: complexity === HybridRouter_1.TaskComplexity.SIMPLE ? 'small' : 'medium',
|
|
683
|
+
qualityImpact: complexity === HybridRouter_1.TaskComplexity.SIMPLE ? 0.95 : 0.85
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
// High budget pressure (>80%)
|
|
687
|
+
if (budgetPressure > 0.80) {
|
|
688
|
+
if (complexity === HybridRouter_1.TaskComplexity.SIMPLE || complexity === HybridRouter_1.TaskComplexity.MODERATE) {
|
|
689
|
+
return {
|
|
690
|
+
shouldDowngrade: true,
|
|
691
|
+
recommendedTier: 'medium',
|
|
692
|
+
qualityImpact: 0.90
|
|
693
|
+
};
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
// Moderate budget pressure (>60%)
|
|
697
|
+
if (budgetPressure > 0.60) {
|
|
698
|
+
if (complexity === HybridRouter_1.TaskComplexity.SIMPLE) {
|
|
699
|
+
return {
|
|
700
|
+
shouldDowngrade: true,
|
|
701
|
+
recommendedTier: 'small',
|
|
702
|
+
qualityImpact: 0.95
|
|
703
|
+
};
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
return {
|
|
707
|
+
shouldDowngrade: false,
|
|
708
|
+
recommendedTier: 'large',
|
|
709
|
+
qualityImpact: 1.0
|
|
710
|
+
};
|
|
711
|
+
}
|
|
712
|
+
/**
|
|
713
|
+
* Get model for tier
|
|
714
|
+
*/
|
|
715
|
+
getModelForTier(tier) {
|
|
716
|
+
const modelMap = {
|
|
717
|
+
small: 'claude-haiku-3-5',
|
|
718
|
+
medium: 'claude-sonnet-3-5',
|
|
719
|
+
large: 'claude-opus-4'
|
|
720
|
+
};
|
|
721
|
+
return modelMap[tier];
|
|
722
|
+
}
|
|
723
|
+
/**
|
|
724
|
+
* Estimate cost savings from downgrade
|
|
725
|
+
*/
|
|
726
|
+
estimateSavings(complexity, tier) {
|
|
727
|
+
// Rough cost estimates per 1M tokens
|
|
728
|
+
const costs = {
|
|
729
|
+
small: 1.00, // $1 per 1M tokens
|
|
730
|
+
medium: 3.00, // $3 per 1M tokens
|
|
731
|
+
large: 15.00 // $15 per 1M tokens
|
|
732
|
+
};
|
|
733
|
+
// Estimate tokens per task
|
|
734
|
+
const tokensPerTask = {
|
|
735
|
+
[HybridRouter_1.TaskComplexity.SIMPLE]: 500,
|
|
736
|
+
[HybridRouter_1.TaskComplexity.MODERATE]: 2000,
|
|
737
|
+
[HybridRouter_1.TaskComplexity.COMPLEX]: 5000,
|
|
738
|
+
[HybridRouter_1.TaskComplexity.VERY_COMPLEX]: 10000
|
|
739
|
+
};
|
|
740
|
+
const tokens = tokensPerTask[complexity];
|
|
741
|
+
const currentCost = (costs.large * tokens) / 1000000;
|
|
742
|
+
const newCost = (costs[tier] * tokens) / 1000000;
|
|
743
|
+
return currentCost - newCost;
|
|
744
|
+
}
|
|
745
|
+
/**
|
|
746
|
+
* Get downgrade reason message
|
|
747
|
+
*/
|
|
748
|
+
getDowngradeReason(budgetPressure, budgetStatus) {
|
|
749
|
+
if (budgetStatus.isOverBudget) {
|
|
750
|
+
return 'Budget exceeded - using cost-efficient model to stay within limits';
|
|
751
|
+
}
|
|
752
|
+
if (budgetPressure > 0.80) {
|
|
753
|
+
return `Budget utilization high (${budgetStatus.utilizationPercentage.toFixed(1)}%) - downgrading to preserve budget`;
|
|
754
|
+
}
|
|
755
|
+
return `Budget pressure at ${(budgetPressure * 100).toFixed(1)}% - proactively optimizing costs`;
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
exports.ModelRightSizer = ModelRightSizer;
|
|
759
|
+
/**
|
|
760
|
+
* CostOptimizationManager - Orchestrates all optimization strategies
|
|
761
|
+
*/
|
|
762
|
+
class CostOptimizationManager {
|
|
763
|
+
constructor(config = {}) {
|
|
764
|
+
this.compressor = new PromptCompressor(config);
|
|
765
|
+
this.batcher = new RequestBatcher(config);
|
|
766
|
+
this.cacheStrategy = new SmartCacheStrategy(config);
|
|
767
|
+
this.rightSizer = new ModelRightSizer();
|
|
768
|
+
this.logger = Logger_1.Logger.getInstance();
|
|
769
|
+
}
|
|
770
|
+
/**
|
|
771
|
+
* Get prompt compressor
|
|
772
|
+
*/
|
|
773
|
+
getCompressor() {
|
|
774
|
+
return this.compressor;
|
|
775
|
+
}
|
|
776
|
+
/**
|
|
777
|
+
* Get request batcher
|
|
778
|
+
*/
|
|
779
|
+
getBatcher() {
|
|
780
|
+
return this.batcher;
|
|
781
|
+
}
|
|
782
|
+
/**
|
|
783
|
+
* Get cache strategy manager
|
|
784
|
+
*/
|
|
785
|
+
getCacheStrategy() {
|
|
786
|
+
return this.cacheStrategy;
|
|
787
|
+
}
|
|
788
|
+
/**
|
|
789
|
+
* Get model right-sizer
|
|
790
|
+
*/
|
|
791
|
+
getRightSizer() {
|
|
792
|
+
return this.rightSizer;
|
|
793
|
+
}
|
|
794
|
+
/**
|
|
795
|
+
* Apply all applicable optimizations to a request
|
|
796
|
+
*/
|
|
797
|
+
optimizeRequest(options, context) {
|
|
798
|
+
let optimizedOptions = { ...options };
|
|
799
|
+
let estimatedSavings = 0;
|
|
800
|
+
let compressionResult;
|
|
801
|
+
let modelDowngrade;
|
|
802
|
+
// 1. Apply prompt compression
|
|
803
|
+
const compressed = this.compressor.compressOptions(optimizedOptions);
|
|
804
|
+
optimizedOptions = compressed.options;
|
|
805
|
+
compressionResult = compressed.result;
|
|
806
|
+
// Estimate savings from compression (rough: $3 per 1M tokens)
|
|
807
|
+
estimatedSavings += (compressionResult.tokensSaved / 1000000) * 3;
|
|
808
|
+
// 2. Check for model right-sizing
|
|
809
|
+
if (context?.complexity && context?.budgetStatus) {
|
|
810
|
+
modelDowngrade = this.rightSizer.shouldDowngradeModel(context.complexity, context.budgetStatus);
|
|
811
|
+
if (modelDowngrade.shouldDowngrade && modelDowngrade.recommendedModel) {
|
|
812
|
+
optimizedOptions.model = modelDowngrade.recommendedModel;
|
|
813
|
+
estimatedSavings += modelDowngrade.estimatedSavings || 0;
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
this.logger.debug('Request optimized', {
|
|
817
|
+
compressionRatio: compressionResult.ratio.toFixed(3),
|
|
818
|
+
tokensSaved: compressionResult.tokensSaved,
|
|
819
|
+
modelDowngraded: modelDowngrade?.shouldDowngrade,
|
|
820
|
+
estimatedSavings: estimatedSavings.toFixed(4)
|
|
821
|
+
});
|
|
822
|
+
return {
|
|
823
|
+
optimizedOptions,
|
|
824
|
+
compressionResult,
|
|
825
|
+
modelDowngrade,
|
|
826
|
+
estimatedSavings
|
|
827
|
+
};
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
exports.CostOptimizationManager = CostOptimizationManager;
|
|
831
|
+
//# sourceMappingURL=CostOptimizationStrategies.js.map
|