agentic-qe 2.6.1 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/.claude/agents/qe-code-intelligence.md +88 -1
  2. package/CHANGELOG.md +134 -0
  3. package/README.md +222 -159
  4. package/dist/agents/BaseAgent.d.ts +19 -0
  5. package/dist/agents/BaseAgent.d.ts.map +1 -1
  6. package/dist/agents/BaseAgent.js +41 -1
  7. package/dist/agents/BaseAgent.js.map +1 -1
  8. package/dist/agents/CodeIntelligenceAgent.d.ts +18 -1
  9. package/dist/agents/CodeIntelligenceAgent.d.ts.map +1 -1
  10. package/dist/agents/CodeIntelligenceAgent.js +96 -1
  11. package/dist/agents/CodeIntelligenceAgent.js.map +1 -1
  12. package/dist/agents/CoverageAnalyzerAgent.d.ts +8 -0
  13. package/dist/agents/CoverageAnalyzerAgent.d.ts.map +1 -1
  14. package/dist/agents/CoverageAnalyzerAgent.js +65 -1
  15. package/dist/agents/CoverageAnalyzerAgent.js.map +1 -1
  16. package/dist/agents/TestGeneratorAgent.d.ts +2 -2
  17. package/dist/agents/TestGeneratorAgent.d.ts.map +1 -1
  18. package/dist/agents/TestGeneratorAgent.js +16 -6
  19. package/dist/agents/TestGeneratorAgent.js.map +1 -1
  20. package/dist/agents/adapters/AgentLLMAdapter.d.ts +127 -0
  21. package/dist/agents/adapters/AgentLLMAdapter.d.ts.map +1 -0
  22. package/dist/agents/adapters/AgentLLMAdapter.js +366 -0
  23. package/dist/agents/adapters/AgentLLMAdapter.js.map +1 -0
  24. package/dist/agents/adapters/index.d.ts +1 -0
  25. package/dist/agents/adapters/index.d.ts.map +1 -1
  26. package/dist/agents/adapters/index.js +5 -1
  27. package/dist/agents/adapters/index.js.map +1 -1
  28. package/dist/agents/interfaces/IAgentLLM.d.ts +257 -0
  29. package/dist/agents/interfaces/IAgentLLM.d.ts.map +1 -0
  30. package/dist/agents/interfaces/IAgentLLM.js +39 -0
  31. package/dist/agents/interfaces/IAgentLLM.js.map +1 -0
  32. package/dist/agents/interfaces/index.d.ts +10 -0
  33. package/dist/agents/interfaces/index.d.ts.map +1 -0
  34. package/dist/agents/interfaces/index.js +14 -0
  35. package/dist/agents/interfaces/index.js.map +1 -0
  36. package/dist/agents/n8n/N8nBaseAgent.d.ts +18 -0
  37. package/dist/agents/n8n/N8nBaseAgent.d.ts.map +1 -1
  38. package/dist/agents/n8n/N8nBaseAgent.js +80 -0
  39. package/dist/agents/n8n/N8nBaseAgent.js.map +1 -1
  40. package/dist/cli/commands/knowledge-graph.d.ts +30 -0
  41. package/dist/cli/commands/knowledge-graph.d.ts.map +1 -1
  42. package/dist/cli/commands/knowledge-graph.js +206 -4
  43. package/dist/cli/commands/knowledge-graph.js.map +1 -1
  44. package/dist/cli/commands/providers.d.ts +50 -0
  45. package/dist/cli/commands/providers.d.ts.map +1 -0
  46. package/dist/cli/commands/providers.js +403 -0
  47. package/dist/cli/commands/providers.js.map +1 -0
  48. package/dist/cli/index.js +214 -0
  49. package/dist/cli/index.js.map +1 -1
  50. package/dist/code-intelligence/indexing/FileWatcher.d.ts.map +1 -1
  51. package/dist/code-intelligence/indexing/FileWatcher.js +11 -8
  52. package/dist/code-intelligence/indexing/FileWatcher.js.map +1 -1
  53. package/dist/code-intelligence/inference/ComponentBoundaryAnalyzer.d.ts +75 -0
  54. package/dist/code-intelligence/inference/ComponentBoundaryAnalyzer.d.ts.map +1 -0
  55. package/dist/code-intelligence/inference/ComponentBoundaryAnalyzer.js +400 -0
  56. package/dist/code-intelligence/inference/ComponentBoundaryAnalyzer.js.map +1 -0
  57. package/dist/code-intelligence/inference/ExternalSystemDetector.d.ts +31 -0
  58. package/dist/code-intelligence/inference/ExternalSystemDetector.d.ts.map +1 -0
  59. package/dist/code-intelligence/inference/ExternalSystemDetector.js +523 -0
  60. package/dist/code-intelligence/inference/ExternalSystemDetector.js.map +1 -0
  61. package/dist/code-intelligence/inference/ProjectMetadataAnalyzer.d.ts +78 -0
  62. package/dist/code-intelligence/inference/ProjectMetadataAnalyzer.d.ts.map +1 -0
  63. package/dist/code-intelligence/inference/ProjectMetadataAnalyzer.js +491 -0
  64. package/dist/code-intelligence/inference/ProjectMetadataAnalyzer.js.map +1 -0
  65. package/dist/code-intelligence/inference/index.d.ts +36 -0
  66. package/dist/code-intelligence/inference/index.d.ts.map +1 -0
  67. package/dist/code-intelligence/inference/index.js +65 -0
  68. package/dist/code-intelligence/inference/index.js.map +1 -0
  69. package/dist/code-intelligence/inference/types.d.ts +196 -0
  70. package/dist/code-intelligence/inference/types.d.ts.map +1 -0
  71. package/dist/code-intelligence/inference/types.js +9 -0
  72. package/dist/code-intelligence/inference/types.js.map +1 -0
  73. package/dist/code-intelligence/visualization/C4ComponentDiagramBuilder.d.ts +75 -0
  74. package/dist/code-intelligence/visualization/C4ComponentDiagramBuilder.d.ts.map +1 -0
  75. package/dist/code-intelligence/visualization/C4ComponentDiagramBuilder.js +267 -0
  76. package/dist/code-intelligence/visualization/C4ComponentDiagramBuilder.js.map +1 -0
  77. package/dist/code-intelligence/visualization/C4ContainerDiagramBuilder.d.ts +138 -0
  78. package/dist/code-intelligence/visualization/C4ContainerDiagramBuilder.d.ts.map +1 -0
  79. package/dist/code-intelligence/visualization/C4ContainerDiagramBuilder.js +343 -0
  80. package/dist/code-intelligence/visualization/C4ContainerDiagramBuilder.js.map +1 -0
  81. package/dist/code-intelligence/visualization/C4ContextDiagramBuilder.d.ts +67 -0
  82. package/dist/code-intelligence/visualization/C4ContextDiagramBuilder.d.ts.map +1 -0
  83. package/dist/code-intelligence/visualization/C4ContextDiagramBuilder.js +152 -0
  84. package/dist/code-intelligence/visualization/C4ContextDiagramBuilder.js.map +1 -0
  85. package/dist/code-intelligence/visualization/MermaidGenerator.d.ts +79 -0
  86. package/dist/code-intelligence/visualization/MermaidGenerator.d.ts.map +1 -1
  87. package/dist/code-intelligence/visualization/MermaidGenerator.js +143 -0
  88. package/dist/code-intelligence/visualization/MermaidGenerator.js.map +1 -1
  89. package/dist/config/ConfigLoader.d.ts +86 -0
  90. package/dist/config/ConfigLoader.d.ts.map +1 -0
  91. package/dist/config/ConfigLoader.js +450 -0
  92. package/dist/config/ConfigLoader.js.map +1 -0
  93. package/dist/config/ProviderConfig.d.ts +153 -0
  94. package/dist/config/ProviderConfig.d.ts.map +1 -0
  95. package/dist/config/ProviderConfig.js +155 -0
  96. package/dist/config/ProviderConfig.js.map +1 -0
  97. package/dist/config/index.d.ts +35 -0
  98. package/dist/config/index.d.ts.map +1 -0
  99. package/dist/config/index.js +45 -0
  100. package/dist/config/index.js.map +1 -0
  101. package/dist/core/memory/HNSWVectorMemory.js +1 -1
  102. package/dist/mcp/handlers/integration/integration-test-orchestrate.d.ts.map +1 -1
  103. package/dist/mcp/handlers/integration/integration-test-orchestrate.js +6 -9
  104. package/dist/mcp/handlers/integration/integration-test-orchestrate.js.map +1 -1
  105. package/dist/mcp/server-instructions.d.ts +1 -1
  106. package/dist/mcp/server-instructions.js +1 -1
  107. package/dist/mcp/server.d.ts +1 -0
  108. package/dist/mcp/server.d.ts.map +1 -1
  109. package/dist/memory/HNSWPatternStore.d.ts.map +1 -1
  110. package/dist/memory/HNSWPatternStore.js +23 -0
  111. package/dist/memory/HNSWPatternStore.js.map +1 -1
  112. package/dist/memory/RuVectorPatternStore.d.ts +5 -0
  113. package/dist/memory/RuVectorPatternStore.d.ts.map +1 -1
  114. package/dist/memory/RuVectorPatternStore.js +11 -0
  115. package/dist/memory/RuVectorPatternStore.js.map +1 -1
  116. package/dist/providers/CostOptimizationStrategies.d.ts +297 -0
  117. package/dist/providers/CostOptimizationStrategies.d.ts.map +1 -0
  118. package/dist/providers/CostOptimizationStrategies.js +831 -0
  119. package/dist/providers/CostOptimizationStrategies.js.map +1 -0
  120. package/dist/providers/HybridRouter.d.ts +142 -5
  121. package/dist/providers/HybridRouter.d.ts.map +1 -1
  122. package/dist/providers/HybridRouter.js +472 -6
  123. package/dist/providers/HybridRouter.js.map +1 -1
  124. package/dist/providers/HybridRouterComplexityIntegration.d.ts +169 -0
  125. package/dist/providers/HybridRouterComplexityIntegration.d.ts.map +1 -0
  126. package/dist/providers/HybridRouterComplexityIntegration.js +319 -0
  127. package/dist/providers/HybridRouterComplexityIntegration.js.map +1 -0
  128. package/dist/providers/HybridRouterModelSelection.d.ts +106 -0
  129. package/dist/providers/HybridRouterModelSelection.d.ts.map +1 -0
  130. package/dist/providers/HybridRouterModelSelection.js +420 -0
  131. package/dist/providers/HybridRouterModelSelection.js.map +1 -0
  132. package/dist/providers/LLMProviderFactory.d.ts +23 -9
  133. package/dist/providers/LLMProviderFactory.d.ts.map +1 -1
  134. package/dist/providers/LLMProviderFactory.js +54 -11
  135. package/dist/providers/LLMProviderFactory.js.map +1 -1
  136. package/dist/providers/OllamaProvider.d.ts +122 -0
  137. package/dist/providers/OllamaProvider.d.ts.map +1 -0
  138. package/dist/providers/OllamaProvider.js +425 -0
  139. package/dist/providers/OllamaProvider.js.map +1 -0
  140. package/dist/providers/index.d.ts +6 -1
  141. package/dist/providers/index.d.ts.map +1 -1
  142. package/dist/providers/index.js +17 -1
  143. package/dist/providers/index.js.map +1 -1
  144. package/dist/routing/ComplexityClassifier.d.ts +266 -0
  145. package/dist/routing/ComplexityClassifier.d.ts.map +1 -0
  146. package/dist/routing/ComplexityClassifier.js +567 -0
  147. package/dist/routing/ComplexityClassifier.js.map +1 -0
  148. package/dist/routing/ModelCapabilityRegistry.d.ts +98 -0
  149. package/dist/routing/ModelCapabilityRegistry.d.ts.map +1 -0
  150. package/dist/routing/ModelCapabilityRegistry.js +216 -0
  151. package/dist/routing/ModelCapabilityRegistry.js.map +1 -0
  152. package/dist/routing/index.d.ts +13 -0
  153. package/dist/routing/index.d.ts.map +1 -0
  154. package/dist/routing/index.js +24 -0
  155. package/dist/routing/index.js.map +1 -0
  156. package/docs/reference/model-capability-registry.md +402 -0
  157. package/docs/reference/provider-config-schema.md +608 -0
  158. package/package.json +20 -4
@@ -0,0 +1,831 @@
1
+ "use strict";
2
+ /**
3
+ * Cost Optimization Strategies for LLM Independence
4
+ *
5
+ * Provides intelligent cost optimization through:
6
+ * - Prompt compression (6-10% token savings)
7
+ * - Request batching for bulk operations
8
+ * - Smart caching with task-specific TTLs
9
+ * - Model right-sizing based on budget and complexity
10
+ *
11
+ * Designed as standalone utilities for integration with HybridRouter.
12
+ *
13
+ * @module providers/CostOptimizationStrategies
14
+ * @version 1.0.0
15
+ */
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.CostOptimizationManager = exports.ModelRightSizer = exports.SmartCacheStrategy = exports.RequestBatcher = exports.PromptCompressor = void 0;
18
+ const HybridRouter_1 = require("./HybridRouter");
19
+ const Logger_1 = require("../utils/Logger");
20
+ /**
21
+ * PromptCompressor - Reduce token usage through intelligent compression
22
+ *
23
+ * Achieves 6-10% token savings through:
24
+ * - Redundant whitespace removal
25
+ * - Common pattern abbreviation
26
+ * - Token-efficient phrasing
27
+ */
28
+ class PromptCompressor {
29
+ constructor(config = {}) {
30
+ this.logger = Logger_1.Logger.getInstance();
31
+ this.config = {
32
+ debug: config.debug ?? false,
33
+ enableCompression: config.enableCompression ?? true,
34
+ enableBatching: config.enableBatching ?? true,
35
+ enableSmartCaching: config.enableSmartCaching ?? true,
36
+ minCompressionRatio: config.minCompressionRatio ?? 0.05,
37
+ maxBatchSize: config.maxBatchSize ?? 10,
38
+ defaultCacheTTL: config.defaultCacheTTL ?? 3600
39
+ };
40
+ }
41
+ /**
42
+ * Compress whitespace - remove redundant spaces, newlines
43
+ */
44
+ compressWhitespace(prompt) {
45
+ return prompt
46
+ // Replace multiple spaces with single space
47
+ .replace(/ +/g, ' ')
48
+ // Replace multiple newlines with double newline (preserve paragraph breaks)
49
+ .replace(/\n\n\n+/g, '\n\n')
50
+ // Trim lines
51
+ .split('\n')
52
+ .map(line => line.trim())
53
+ .join('\n')
54
+ // Remove trailing/leading whitespace
55
+ .trim();
56
+ }
57
+ /**
58
+ * Abbreviate common patterns to save tokens
59
+ */
60
+ abbreviatePatterns(prompt) {
61
+ const abbreviations = [
62
+ // Code-related patterns
63
+ { pattern: /\bfunction\b/gi, replacement: 'fn' },
64
+ { pattern: /\bconstant\b/gi, replacement: 'const' },
65
+ { pattern: /\bvariable\b/gi, replacement: 'var' },
66
+ { pattern: /\bparameter\b/gi, replacement: 'param' },
67
+ { pattern: /\bargument\b/gi, replacement: 'arg' },
68
+ { pattern: /\bconfiguration\b/gi, replacement: 'config' },
69
+ { pattern: /\benvironment\b/gi, replacement: 'env' },
70
+ { pattern: /\bdocumentation\b/gi, replacement: 'docs' },
71
+ { pattern: /\bapplication\b/gi, replacement: 'app' },
72
+ { pattern: /\bdatabase\b/gi, replacement: 'db' },
73
+ { pattern: /\brepository\b/gi, replacement: 'repo' },
74
+ // Test-related patterns
75
+ { pattern: /\btest case\b/gi, replacement: 'test' },
76
+ { pattern: /\bunit test\b/gi, replacement: 'unit' },
77
+ { pattern: /\bintegration test\b/gi, replacement: 'integration' },
78
+ // Common phrases
79
+ { pattern: /\bfor example\b/gi, replacement: 'e.g.' },
80
+ { pattern: /\bthat is\b/gi, replacement: 'i.e.' },
81
+ { pattern: /\band so on\b/gi, replacement: 'etc.' }
82
+ ];
83
+ let compressed = prompt;
84
+ const appliedAbbreviations = [];
85
+ for (const { pattern, replacement } of abbreviations) {
86
+ const before = compressed;
87
+ compressed = compressed.replace(pattern, replacement);
88
+ if (compressed !== before) {
89
+ appliedAbbreviations.push(`${pattern.source} → ${replacement}`);
90
+ }
91
+ }
92
+ if (this.config.debug && appliedAbbreviations.length > 0) {
93
+ this.logger.debug('Applied abbreviations', {
94
+ count: appliedAbbreviations.length,
95
+ examples: appliedAbbreviations.slice(0, 3)
96
+ });
97
+ }
98
+ return compressed;
99
+ }
100
+ /**
101
+ * Optimize token usage through efficient phrasing
102
+ */
103
+ optimizeTokenUsage(prompt) {
104
+ let optimized = prompt;
105
+ // Remove filler words that don't add meaning
106
+ const fillerWords = [
107
+ /\bvery\s+/gi,
108
+ /\breally\s+/gi,
109
+ /\bactually\s+/gi,
110
+ /\bjust\s+/gi,
111
+ /\bbasically\s+/gi,
112
+ /\bsimply\s+/gi
113
+ ];
114
+ fillerWords.forEach(pattern => {
115
+ optimized = optimized.replace(pattern, '');
116
+ });
117
+ // Simplify verbose phrases
118
+ const verbosePatterns = [
119
+ { pattern: /\bin order to\b/gi, replacement: 'to' },
120
+ { pattern: /\bdue to the fact that\b/gi, replacement: 'because' },
121
+ { pattern: /\bat this point in time\b/gi, replacement: 'now' },
122
+ { pattern: /\bin the event that\b/gi, replacement: 'if' },
123
+ { pattern: /\bprior to\b/gi, replacement: 'before' },
124
+ { pattern: /\bsubsequent to\b/gi, replacement: 'after' }
125
+ ];
126
+ verbosePatterns.forEach(({ pattern, replacement }) => {
127
+ optimized = optimized.replace(pattern, replacement);
128
+ });
129
+ return optimized;
130
+ }
131
+ /**
132
+ * Full compression pipeline
133
+ */
134
+ compress(prompt) {
135
+ if (!this.config.enableCompression) {
136
+ return {
137
+ compressed: prompt,
138
+ original: prompt,
139
+ tokensSaved: 0,
140
+ ratio: 0,
141
+ techniques: []
142
+ };
143
+ }
144
+ const original = prompt;
145
+ const techniques = [];
146
+ // Apply compression techniques
147
+ let compressed = this.compressWhitespace(prompt);
148
+ techniques.push('whitespace');
149
+ compressed = this.abbreviatePatterns(compressed);
150
+ techniques.push('abbreviations');
151
+ compressed = this.optimizeTokenUsage(compressed);
152
+ techniques.push('token-optimization');
153
+ // Calculate savings (rough estimate: 1 token ≈ 4 characters)
154
+ const originalTokens = Math.ceil(original.length / 4);
155
+ const compressedTokens = Math.ceil(compressed.length / 4);
156
+ const tokensSaved = Math.max(0, originalTokens - compressedTokens);
157
+ const ratio = originalTokens > 0 ? tokensSaved / originalTokens : 0;
158
+ // Only return compressed version if meets minimum ratio
159
+ if (ratio < this.config.minCompressionRatio) {
160
+ if (this.config.debug) {
161
+ this.logger.debug('Compression below threshold, using original', {
162
+ ratio: ratio.toFixed(3),
163
+ threshold: this.config.minCompressionRatio
164
+ });
165
+ }
166
+ return {
167
+ compressed: original,
168
+ original,
169
+ tokensSaved: 0,
170
+ ratio: 0,
171
+ techniques: []
172
+ };
173
+ }
174
+ if (this.config.debug) {
175
+ this.logger.debug('Prompt compressed', {
176
+ originalLength: original.length,
177
+ compressedLength: compressed.length,
178
+ tokensSaved,
179
+ ratio: ratio.toFixed(3),
180
+ techniques
181
+ });
182
+ }
183
+ return {
184
+ compressed,
185
+ original,
186
+ tokensSaved,
187
+ ratio,
188
+ techniques
189
+ };
190
+ }
191
+ /**
192
+ * Compress LLM completion options
193
+ */
194
+ compressOptions(options) {
195
+ // Compress message content
196
+ const compressedMessages = options.messages.map(msg => {
197
+ if (typeof msg.content === 'string') {
198
+ const result = this.compress(msg.content);
199
+ return { ...msg, content: result.compressed };
200
+ }
201
+ // Handle array content (compress text blocks)
202
+ const compressedContent = msg.content.map(block => {
203
+ if (block.type === 'text' && block.text) {
204
+ const result = this.compress(block.text);
205
+ return { ...block, text: result.compressed };
206
+ }
207
+ return block;
208
+ });
209
+ return { ...msg, content: compressedContent };
210
+ });
211
+ // Compress system prompts
212
+ const compressedSystem = options.system?.map(sys => {
213
+ const result = this.compress(sys.text);
214
+ return { ...sys, text: result.compressed };
215
+ });
216
+ // Calculate overall compression
217
+ const originalText = this.extractAllText(options);
218
+ const compressedText = this.extractAllText({
219
+ ...options,
220
+ messages: compressedMessages,
221
+ system: compressedSystem
222
+ });
223
+ const originalTokens = Math.ceil(originalText.length / 4);
224
+ const compressedTokens = Math.ceil(compressedText.length / 4);
225
+ const compressionResult = {
226
+ compressed: compressedText,
227
+ original: originalText,
228
+ tokensSaved: originalTokens - compressedTokens,
229
+ ratio: originalTokens > 0 ? (originalTokens - compressedTokens) / originalTokens : 0,
230
+ techniques: ['full-options-compression']
231
+ };
232
+ return {
233
+ options: {
234
+ ...options,
235
+ messages: compressedMessages,
236
+ system: compressedSystem
237
+ },
238
+ result: compressionResult
239
+ };
240
+ }
241
+ /**
242
+ * Extract all text from options
243
+ */
244
+ extractAllText(options) {
245
+ const parts = [];
246
+ if (options.system) {
247
+ parts.push(...options.system.map(s => s.text));
248
+ }
249
+ options.messages.forEach(msg => {
250
+ if (typeof msg.content === 'string') {
251
+ parts.push(msg.content);
252
+ }
253
+ else {
254
+ parts.push(...msg.content.filter(c => c.type === 'text').map(c => c.text || ''));
255
+ }
256
+ });
257
+ return parts.join('\n');
258
+ }
259
+ }
260
+ exports.PromptCompressor = PromptCompressor;
261
+ /**
262
+ * RequestBatcher - Batch similar requests for cost efficiency
263
+ */
264
+ class RequestBatcher {
265
+ constructor(config = {}) {
266
+ this.logger = Logger_1.Logger.getInstance();
267
+ this.config = {
268
+ debug: config.debug ?? false,
269
+ enableCompression: config.enableCompression ?? true,
270
+ enableBatching: config.enableBatching ?? true,
271
+ enableSmartCaching: config.enableSmartCaching ?? true,
272
+ minCompressionRatio: config.minCompressionRatio ?? 0.05,
273
+ maxBatchSize: config.maxBatchSize ?? 10,
274
+ defaultCacheTTL: config.defaultCacheTTL ?? 3600
275
+ };
276
+ }
277
+ /**
278
+ * Group similar requests for batch processing
279
+ */
280
+ groupSimilarRequests(requests) {
281
+ if (!this.config.enableBatching || requests.length === 0) {
282
+ return [];
283
+ }
284
+ const groups = new Map();
285
+ // Group by similarity characteristics
286
+ requests.forEach(request => {
287
+ const groupKey = this.calculateGroupKey(request);
288
+ const existing = groups.get(groupKey) || [];
289
+ existing.push(request);
290
+ groups.set(groupKey, existing);
291
+ });
292
+ // Convert to RequestGroup objects
293
+ const requestGroups = [];
294
+ groups.forEach((groupRequests, groupId) => {
295
+ // Only create groups with 2+ requests
296
+ if (groupRequests.length < 2) {
297
+ return;
298
+ }
299
+ // Limit group size
300
+ const batches = this.splitIntoBatches(groupRequests, this.config.maxBatchSize);
301
+ batches.forEach((batch, index) => {
302
+ const characteristics = this.analyzeGroupCharacteristics(batch);
303
+ const estimatedSavings = this.estimateBatchSavings(batch);
304
+ requestGroups.push({
305
+ groupId: `${groupId}-${index}`,
306
+ requests: batch,
307
+ characteristics,
308
+ estimatedSavings
309
+ });
310
+ });
311
+ });
312
+ if (this.config.debug) {
313
+ this.logger.debug('Requests grouped for batching', {
314
+ totalRequests: requests.length,
315
+ groups: requestGroups.length,
316
+ largestGroup: Math.max(...requestGroups.map(g => g.requests.length))
317
+ });
318
+ }
319
+ return requestGroups;
320
+ }
321
+ /**
322
+ * Estimate savings from batching
323
+ */
324
+ estimateBatchSavings(requests) {
325
+ if (requests.length < 2) {
326
+ return 0;
327
+ }
328
+ // Rough estimate: batching saves ~15% on overhead per request after first
329
+ const avgCostPerRequest = 0.001; // $0.001 per request estimate
330
+ const overheadSavings = (requests.length - 1) * avgCostPerRequest * 0.15;
331
+ return overheadSavings;
332
+ }
333
+ /**
334
+ * Calculate group key for similarity
335
+ */
336
+ calculateGroupKey(request) {
337
+ const parts = [];
338
+ // Model
339
+ parts.push(request.model);
340
+ // Temperature (rounded)
341
+ const temp = request.temperature ?? 0.7;
342
+ parts.push(`temp-${Math.round(temp * 10)}`);
343
+ // Max tokens (bucketed)
344
+ const maxTokens = request.maxTokens ?? 1024;
345
+ const tokenBucket = Math.floor(maxTokens / 1000) * 1000;
346
+ parts.push(`tokens-${tokenBucket}`);
347
+ // Has system prompt
348
+ parts.push(request.system && request.system.length > 0 ? 'sys' : 'nosys');
349
+ // Content characteristics
350
+ const allContent = this.extractAllText(request);
351
+ const hasCode = /```/.test(allContent);
352
+ parts.push(hasCode ? 'code' : 'text');
353
+ return parts.join('|');
354
+ }
355
+ /**
356
+ * Analyze group characteristics
357
+ */
358
+ analyzeGroupCharacteristics(requests) {
359
+ const allContent = requests.map(r => this.extractAllText(r)).join('\n');
360
+ const totalLength = allContent.length;
361
+ const averageTokens = Math.ceil(totalLength / (requests.length * 4));
362
+ const hasCode = /```/.test(allContent);
363
+ return {
364
+ averageTokens,
365
+ hasCode
366
+ };
367
+ }
368
+ /**
369
+ * Split requests into batches
370
+ */
371
+ splitIntoBatches(requests, maxSize) {
372
+ const batches = [];
373
+ for (let i = 0; i < requests.length; i += maxSize) {
374
+ batches.push(requests.slice(i, i + maxSize));
375
+ }
376
+ return batches;
377
+ }
378
+ /**
379
+ * Extract all text from request
380
+ */
381
+ extractAllText(request) {
382
+ const parts = [];
383
+ if (request.system) {
384
+ parts.push(...request.system.map(s => s.text));
385
+ }
386
+ request.messages.forEach(msg => {
387
+ if (typeof msg.content === 'string') {
388
+ parts.push(msg.content);
389
+ }
390
+ else {
391
+ parts.push(...msg.content.filter(c => c.type === 'text').map(c => c.text || ''));
392
+ }
393
+ });
394
+ return parts.join('\n');
395
+ }
396
+ }
397
+ exports.RequestBatcher = RequestBatcher;
398
+ /**
399
+ * SmartCacheStrategy - Task-specific caching strategies
400
+ */
401
+ class SmartCacheStrategy {
402
+ constructor(config = {}) {
403
+ this.logger = Logger_1.Logger.getInstance();
404
+ this.config = {
405
+ debug: config.debug ?? false,
406
+ enableCompression: config.enableCompression ?? true,
407
+ enableBatching: config.enableBatching ?? true,
408
+ enableSmartCaching: config.enableSmartCaching ?? true,
409
+ minCompressionRatio: config.minCompressionRatio ?? 0.05,
410
+ maxBatchSize: config.maxBatchSize ?? 10,
411
+ defaultCacheTTL: config.defaultCacheTTL ?? 3600
412
+ };
413
+ this.strategies = new Map();
414
+ this.initializeDefaultStrategies();
415
+ }
416
+ /**
417
+ * Get cache strategy for task type
418
+ */
419
+ getCacheStrategy(taskType) {
420
+ const strategy = this.strategies.get(taskType);
421
+ if (!strategy) {
422
+ // Return default strategy
423
+ return {
424
+ taskType,
425
+ ttlSeconds: this.config.defaultCacheTTL,
426
+ aggressive: false,
427
+ confidenceThreshold: 0.85,
428
+ keyGenerator: this.defaultKeyGenerator.bind(this)
429
+ };
430
+ }
431
+ return strategy;
432
+ }
433
+ /**
434
+ * Check if result should be cached
435
+ */
436
+ shouldCache(options, response) {
437
+ if (!this.config.enableSmartCaching) {
438
+ return false;
439
+ }
440
+ // Don't cache errors
441
+ if (response.stop_reason !== 'end_turn') {
442
+ return false;
443
+ }
444
+ // Don't cache streaming responses
445
+ if (options.stream) {
446
+ return false;
447
+ }
448
+ // Don't cache very short responses (likely errors or incomplete)
449
+ const responseText = response.content.map(c => c.text).join('');
450
+ if (responseText.length < 50) {
451
+ return false;
452
+ }
453
+ // Don't cache very expensive responses (might be one-off)
454
+ const totalTokens = response.usage.input_tokens + response.usage.output_tokens;
455
+ if (totalTokens > 10000) {
456
+ return false;
457
+ }
458
+ return true;
459
+ }
460
+ /**
461
+ * Generate cache key for request
462
+ */
463
+ generateCacheKey(options, taskType) {
464
+ if (taskType) {
465
+ const strategy = this.getCacheStrategy(taskType);
466
+ return strategy.keyGenerator(options);
467
+ }
468
+ return this.defaultKeyGenerator(options);
469
+ }
470
+ /**
471
+ * Default cache key generator
472
+ */
473
+ defaultKeyGenerator(options) {
474
+ const parts = [];
475
+ // Model
476
+ parts.push(options.model);
477
+ // Extract query (last user message)
478
+ const userMessages = options.messages.filter(m => m.role === 'user');
479
+ const lastUserMsg = userMessages[userMessages.length - 1];
480
+ if (lastUserMsg) {
481
+ const content = typeof lastUserMsg.content === 'string'
482
+ ? lastUserMsg.content
483
+ : lastUserMsg.content.filter(c => c.type === 'text').map(c => c.text).join('');
484
+ // Create hash from content (simple hash for cache key)
485
+ const hash = this.simpleHash(content);
486
+ parts.push(hash);
487
+ }
488
+ // Temperature
489
+ const temp = options.temperature ?? 0.7;
490
+ parts.push(`t${Math.round(temp * 10)}`);
491
+ return parts.join(':');
492
+ }
493
+ /**
494
+ * Simple hash function for cache keys
495
+ */
496
+ simpleHash(str) {
497
+ let hash = 0;
498
+ for (let i = 0; i < str.length; i++) {
499
+ const char = str.charCodeAt(i);
500
+ hash = ((hash << 5) - hash) + char;
501
+ hash = hash & hash; // Convert to 32-bit integer
502
+ }
503
+ return Math.abs(hash).toString(36);
504
+ }
505
+ /**
506
+ * Initialize default caching strategies
507
+ */
508
+ initializeDefaultStrategies() {
509
+ // Test generation - moderate caching (tests change frequently)
510
+ this.strategies.set('test-generation', {
511
+ taskType: 'test-generation',
512
+ ttlSeconds: 1800, // 30 minutes
513
+ aggressive: false,
514
+ confidenceThreshold: 0.90,
515
+ keyGenerator: this.testGenerationKeyGenerator.bind(this)
516
+ });
517
+ // Coverage analysis - aggressive caching (code doesn't change often)
518
+ this.strategies.set('coverage-analysis', {
519
+ taskType: 'coverage-analysis',
520
+ ttlSeconds: 7200, // 2 hours
521
+ aggressive: true,
522
+ confidenceThreshold: 0.80,
523
+ keyGenerator: this.defaultKeyGenerator.bind(this)
524
+ });
525
+ // Code review - moderate caching
526
+ this.strategies.set('code-review', {
527
+ taskType: 'code-review',
528
+ ttlSeconds: 3600, // 1 hour
529
+ aggressive: false,
530
+ confidenceThreshold: 0.85,
531
+ keyGenerator: this.defaultKeyGenerator.bind(this)
532
+ });
533
+ // Bug detection - conservative caching (need fresh analysis)
534
+ this.strategies.set('bug-detection', {
535
+ taskType: 'bug-detection',
536
+ ttlSeconds: 900, // 15 minutes
537
+ aggressive: false,
538
+ confidenceThreshold: 0.92,
539
+ keyGenerator: this.defaultKeyGenerator.bind(this)
540
+ });
541
+ // Documentation - very aggressive caching (stable content)
542
+ this.strategies.set('documentation', {
543
+ taskType: 'documentation',
544
+ ttlSeconds: 14400, // 4 hours
545
+ aggressive: true,
546
+ confidenceThreshold: 0.75,
547
+ keyGenerator: this.defaultKeyGenerator.bind(this)
548
+ });
549
+ // Refactoring - conservative caching
550
+ this.strategies.set('refactoring', {
551
+ taskType: 'refactoring',
552
+ ttlSeconds: 1800, // 30 minutes
553
+ aggressive: false,
554
+ confidenceThreshold: 0.88,
555
+ keyGenerator: this.defaultKeyGenerator.bind(this)
556
+ });
557
+ // Performance testing - moderate caching
558
+ this.strategies.set('performance-testing', {
559
+ taskType: 'performance-testing',
560
+ ttlSeconds: 3600, // 1 hour
561
+ aggressive: false,
562
+ confidenceThreshold: 0.85,
563
+ keyGenerator: this.defaultKeyGenerator.bind(this)
564
+ });
565
+ // Security scanning - conservative caching (security landscape changes)
566
+ this.strategies.set('security-scanning', {
567
+ taskType: 'security-scanning',
568
+ ttlSeconds: 1800, // 30 minutes
569
+ aggressive: false,
570
+ confidenceThreshold: 0.90,
571
+ keyGenerator: this.defaultKeyGenerator.bind(this)
572
+ });
573
+ }
574
+ /**
575
+ * Specialized key generator for test generation
576
+ */
577
+ testGenerationKeyGenerator(options) {
578
+ const parts = ['test-gen'];
579
+ // Extract source code being tested (if in messages)
580
+ const allContent = options.messages
581
+ .filter(m => m.role === 'user')
582
+ .map(m => typeof m.content === 'string' ? m.content : m.content.map(c => c.text || '').join(''))
583
+ .join('\n');
584
+ // Look for code blocks
585
+ const codeMatches = allContent.match(/```[\s\S]*?```/g);
586
+ if (codeMatches && codeMatches.length > 0) {
587
+ // Hash the first code block (likely the source under test)
588
+ const hash = this.simpleHash(codeMatches[0]);
589
+ parts.push(hash);
590
+ }
591
+ else {
592
+ // Hash all content
593
+ const hash = this.simpleHash(allContent);
594
+ parts.push(hash);
595
+ }
596
+ return parts.join(':');
597
+ }
598
+ }
599
+ exports.SmartCacheStrategy = SmartCacheStrategy;
600
+ /**
601
+ * ModelRightSizer - Budget-aware model selection
602
+ */
603
+ class ModelRightSizer {
604
+ constructor() {
605
+ this.logger = Logger_1.Logger.getInstance();
606
+ }
607
+ /**
608
+ * Determine if should use smaller model based on budget
609
+ */
610
+ shouldDowngradeModel(complexity, budgetStatus, constraints) {
611
+ // If not over budget and no alert, no need to downgrade
612
+ if (!budgetStatus.isOverBudget && !budgetStatus.alertTriggered) {
613
+ return {
614
+ shouldDowngrade: false,
615
+ reason: 'Budget healthy, no downgrade needed',
616
+ qualityImpact: 1.0
617
+ };
618
+ }
619
+ // Calculate budget pressure (0-1, higher = more pressure)
620
+ const budgetPressure = this.calculateBudgetPressure(budgetStatus);
621
+ // Determine if task allows downgrade
622
+ const downgradePossible = this.canDowngrade(complexity, constraints);
623
+ if (!downgradePossible) {
624
+ return {
625
+ shouldDowngrade: false,
626
+ reason: 'Task complexity requires current model tier',
627
+ qualityImpact: 1.0
628
+ };
629
+ }
630
+ // Decide based on budget pressure and complexity
631
+ const { shouldDowngrade, recommendedTier, qualityImpact } = this.makeDowngradeDecision(complexity, budgetPressure, budgetStatus);
632
+ if (!shouldDowngrade) {
633
+ return {
634
+ shouldDowngrade: false,
635
+ reason: 'Budget pressure not severe enough to warrant downgrade',
636
+ qualityImpact: 1.0
637
+ };
638
+ }
639
+ // Calculate estimated savings
640
+ const estimatedSavings = this.estimateSavings(complexity, recommendedTier);
641
+ return {
642
+ shouldDowngrade: true,
643
+ recommendedModel: this.getModelForTier(recommendedTier),
644
+ reason: this.getDowngradeReason(budgetPressure, budgetStatus),
645
+ estimatedSavings,
646
+ qualityImpact
647
+ };
648
+ }
649
+ /**
650
+ * Calculate budget pressure score (0-1)
651
+ */
652
+ calculateBudgetPressure(budgetStatus) {
653
+ if (budgetStatus.isOverBudget) {
654
+ return 1.0; // Maximum pressure
655
+ }
656
+ // Use utilization percentage as pressure indicator
657
+ return Math.min(budgetStatus.utilizationPercentage / 100, 0.95);
658
+ }
659
+ /**
660
+ * Check if task allows model downgrade
661
+ */
662
+ canDowngrade(complexity, constraints) {
663
+ // Very complex tasks should not be downgraded
664
+ if (complexity === HybridRouter_1.TaskComplexity.VERY_COMPLEX) {
665
+ return false;
666
+ }
667
+ // Check constraints
668
+ if (constraints?.requiredCapabilities && constraints.requiredCapabilities.length > 0) {
669
+ // If specific capabilities required, be conservative
670
+ return complexity === HybridRouter_1.TaskComplexity.SIMPLE;
671
+ }
672
+ return true;
673
+ }
674
+ /**
675
+ * Make downgrade decision
676
+ */
677
+ makeDowngradeDecision(complexity, budgetPressure, budgetStatus) {
678
+ // Over budget - always downgrade if possible
679
+ if (budgetStatus.isOverBudget) {
680
+ return {
681
+ shouldDowngrade: true,
682
+ recommendedTier: complexity === HybridRouter_1.TaskComplexity.SIMPLE ? 'small' : 'medium',
683
+ qualityImpact: complexity === HybridRouter_1.TaskComplexity.SIMPLE ? 0.95 : 0.85
684
+ };
685
+ }
686
+ // High budget pressure (>80%)
687
+ if (budgetPressure > 0.80) {
688
+ if (complexity === HybridRouter_1.TaskComplexity.SIMPLE || complexity === HybridRouter_1.TaskComplexity.MODERATE) {
689
+ return {
690
+ shouldDowngrade: true,
691
+ recommendedTier: 'medium',
692
+ qualityImpact: 0.90
693
+ };
694
+ }
695
+ }
696
+ // Moderate budget pressure (>60%)
697
+ if (budgetPressure > 0.60) {
698
+ if (complexity === HybridRouter_1.TaskComplexity.SIMPLE) {
699
+ return {
700
+ shouldDowngrade: true,
701
+ recommendedTier: 'small',
702
+ qualityImpact: 0.95
703
+ };
704
+ }
705
+ }
706
+ return {
707
+ shouldDowngrade: false,
708
+ recommendedTier: 'large',
709
+ qualityImpact: 1.0
710
+ };
711
+ }
712
+ /**
713
+ * Get model for tier
714
+ */
715
+ getModelForTier(tier) {
716
+ const modelMap = {
717
+ small: 'claude-haiku-3-5',
718
+ medium: 'claude-sonnet-3-5',
719
+ large: 'claude-opus-4'
720
+ };
721
+ return modelMap[tier];
722
+ }
723
+ /**
724
+ * Estimate cost savings from downgrade
725
+ */
726
+ estimateSavings(complexity, tier) {
727
+ // Rough cost estimates per 1M tokens
728
+ const costs = {
729
+ small: 1.00, // $1 per 1M tokens
730
+ medium: 3.00, // $3 per 1M tokens
731
+ large: 15.00 // $15 per 1M tokens
732
+ };
733
+ // Estimate tokens per task
734
+ const tokensPerTask = {
735
+ [HybridRouter_1.TaskComplexity.SIMPLE]: 500,
736
+ [HybridRouter_1.TaskComplexity.MODERATE]: 2000,
737
+ [HybridRouter_1.TaskComplexity.COMPLEX]: 5000,
738
+ [HybridRouter_1.TaskComplexity.VERY_COMPLEX]: 10000
739
+ };
740
+ const tokens = tokensPerTask[complexity];
741
+ const currentCost = (costs.large * tokens) / 1000000;
742
+ const newCost = (costs[tier] * tokens) / 1000000;
743
+ return currentCost - newCost;
744
+ }
745
+ /**
746
+ * Get downgrade reason message
747
+ */
748
+ getDowngradeReason(budgetPressure, budgetStatus) {
749
+ if (budgetStatus.isOverBudget) {
750
+ return 'Budget exceeded - using cost-efficient model to stay within limits';
751
+ }
752
+ if (budgetPressure > 0.80) {
753
+ return `Budget utilization high (${budgetStatus.utilizationPercentage.toFixed(1)}%) - downgrading to preserve budget`;
754
+ }
755
+ return `Budget pressure at ${(budgetPressure * 100).toFixed(1)}% - proactively optimizing costs`;
756
+ }
757
+ }
758
+ exports.ModelRightSizer = ModelRightSizer;
759
+ /**
760
+ * CostOptimizationManager - Orchestrates all optimization strategies
761
+ */
762
+ class CostOptimizationManager {
763
+ constructor(config = {}) {
764
+ this.compressor = new PromptCompressor(config);
765
+ this.batcher = new RequestBatcher(config);
766
+ this.cacheStrategy = new SmartCacheStrategy(config);
767
+ this.rightSizer = new ModelRightSizer();
768
+ this.logger = Logger_1.Logger.getInstance();
769
+ }
770
+ /**
771
+ * Get prompt compressor
772
+ */
773
+ getCompressor() {
774
+ return this.compressor;
775
+ }
776
+ /**
777
+ * Get request batcher
778
+ */
779
+ getBatcher() {
780
+ return this.batcher;
781
+ }
782
+ /**
783
+ * Get cache strategy manager
784
+ */
785
+ getCacheStrategy() {
786
+ return this.cacheStrategy;
787
+ }
788
+ /**
789
+ * Get model right-sizer
790
+ */
791
+ getRightSizer() {
792
+ return this.rightSizer;
793
+ }
794
+ /**
795
+ * Apply all applicable optimizations to a request
796
+ */
797
+ optimizeRequest(options, context) {
798
+ let optimizedOptions = { ...options };
799
+ let estimatedSavings = 0;
800
+ let compressionResult;
801
+ let modelDowngrade;
802
+ // 1. Apply prompt compression
803
+ const compressed = this.compressor.compressOptions(optimizedOptions);
804
+ optimizedOptions = compressed.options;
805
+ compressionResult = compressed.result;
806
+ // Estimate savings from compression (rough: $3 per 1M tokens)
807
+ estimatedSavings += (compressionResult.tokensSaved / 1000000) * 3;
808
+ // 2. Check for model right-sizing
809
+ if (context?.complexity && context?.budgetStatus) {
810
+ modelDowngrade = this.rightSizer.shouldDowngradeModel(context.complexity, context.budgetStatus);
811
+ if (modelDowngrade.shouldDowngrade && modelDowngrade.recommendedModel) {
812
+ optimizedOptions.model = modelDowngrade.recommendedModel;
813
+ estimatedSavings += modelDowngrade.estimatedSavings || 0;
814
+ }
815
+ }
816
+ this.logger.debug('Request optimized', {
817
+ compressionRatio: compressionResult.ratio.toFixed(3),
818
+ tokensSaved: compressionResult.tokensSaved,
819
+ modelDowngraded: modelDowngrade?.shouldDowngrade,
820
+ estimatedSavings: estimatedSavings.toFixed(4)
821
+ });
822
+ return {
823
+ optimizedOptions,
824
+ compressionResult,
825
+ modelDowngrade,
826
+ estimatedSavings
827
+ };
828
+ }
829
+ }
830
+ exports.CostOptimizationManager = CostOptimizationManager;
831
+ //# sourceMappingURL=CostOptimizationStrategies.js.map