tachibot-mcp 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/.env.example +260 -0
  2. package/CHANGELOG.md +54 -0
  3. package/CODE_OF_CONDUCT.md +56 -0
  4. package/CONTRIBUTING.md +54 -0
  5. package/Dockerfile +36 -0
  6. package/LICENSE +644 -0
  7. package/README.md +201 -0
  8. package/SECURITY.md +95 -0
  9. package/dist/personality/komaai-expressions.js +12 -0
  10. package/dist/profiles/balanced.json +33 -0
  11. package/dist/profiles/code_focus.json +33 -0
  12. package/dist/profiles/full.json +33 -0
  13. package/dist/profiles/minimal.json +33 -0
  14. package/dist/profiles/research_power.json +33 -0
  15. package/dist/scripts/build-profiles.js +46 -0
  16. package/dist/src/application/services/focus/FocusModeRegistry.js +46 -0
  17. package/dist/src/application/services/focus/FocusTool.service.js +109 -0
  18. package/dist/src/application/services/focus/ModeRegistry.js +46 -0
  19. package/dist/src/application/services/focus/modes/focus-deep.mode.js +27 -0
  20. package/dist/src/application/services/focus/modes/status.mode.js +50 -0
  21. package/dist/src/application/services/focus/modes/tachibot-status.mode.js +50 -0
  22. package/dist/src/collaborative-orchestrator.js +391 -0
  23. package/dist/src/config/model-constants.js +188 -0
  24. package/dist/src/config/model-defaults.js +57 -0
  25. package/dist/src/config/model-preferences.js +382 -0
  26. package/dist/src/config/timeout-config.js +130 -0
  27. package/dist/src/config.js +173 -0
  28. package/dist/src/domain/interfaces/IFocusMode.js +5 -0
  29. package/dist/src/domain/interfaces/IProvider.js +6 -0
  30. package/dist/src/domain/interfaces/ITool.js +5 -0
  31. package/dist/src/focus-deep.js +245 -0
  32. package/dist/src/infrastructure/ascii/art/robots.ascii.js +16 -0
  33. package/dist/src/mcp-client.js +90 -0
  34. package/dist/src/memory/index.js +17 -0
  35. package/dist/src/memory/memory-config.js +135 -0
  36. package/dist/src/memory/memory-interface.js +174 -0
  37. package/dist/src/memory/memory-manager.js +383 -0
  38. package/dist/src/memory/providers/devlog-provider.js +385 -0
  39. package/dist/src/memory/providers/hybrid-provider.js +399 -0
  40. package/dist/src/memory/providers/local-provider.js +388 -0
  41. package/dist/src/memory/providers/mem0-provider.js +337 -0
  42. package/dist/src/modes/architect.js +477 -0
  43. package/dist/src/modes/auditor.js +362 -0
  44. package/dist/src/modes/challenger.js +841 -0
  45. package/dist/src/modes/code-reviewer.js +382 -0
  46. package/dist/src/modes/commit-guardian.js +424 -0
  47. package/dist/src/modes/documentation-writer.js +572 -0
  48. package/dist/src/modes/scout.js +587 -0
  49. package/dist/src/modes/shared/helpers/challenger-helpers.js +454 -0
  50. package/dist/src/modes/shared/helpers/index.js +17 -0
  51. package/dist/src/modes/shared/helpers/scout-helpers.js +270 -0
  52. package/dist/src/modes/shared/helpers/verifier-helpers.js +332 -0
  53. package/dist/src/modes/test-architect.js +767 -0
  54. package/dist/src/modes/verifier.js +378 -0
  55. package/dist/src/monitoring/performance-monitor.js +435 -0
  56. package/dist/src/optimization/batch-executor.js +121 -0
  57. package/dist/src/optimization/context-pruner.js +196 -0
  58. package/dist/src/optimization/cost-monitor.js +338 -0
  59. package/dist/src/optimization/index.js +65 -0
  60. package/dist/src/optimization/model-router.js +264 -0
  61. package/dist/src/optimization/result-cache.js +114 -0
  62. package/dist/src/optimization/token-optimizer.js +257 -0
  63. package/dist/src/optimization/token-tracker.js +118 -0
  64. package/dist/src/orchestrator-instructions.js +128 -0
  65. package/dist/src/orchestrator-lite.js +139 -0
  66. package/dist/src/orchestrator.js +191 -0
  67. package/dist/src/orchestrators/collaborative/interfaces/IToolExecutionEngine.js +1 -0
  68. package/dist/src/orchestrators/collaborative/interfaces/IToolExecutionStrategy.js +5 -0
  69. package/dist/src/orchestrators/collaborative/interfaces/IVisualizationRenderer.js +1 -0
  70. package/dist/src/orchestrators/collaborative/registries/ModelProviderRegistry.js +95 -0
  71. package/dist/src/orchestrators/collaborative/registries/ToolAdapterRegistry.js +64 -0
  72. package/dist/src/orchestrators/collaborative/services/tool-execution/ToolExecutionService.js +502 -0
  73. package/dist/src/orchestrators/collaborative/services/visualization/VisualizationService.js +206 -0
  74. package/dist/src/orchestrators/collaborative/types/session-types.js +5 -0
  75. package/dist/src/profiles/balanced.js +37 -0
  76. package/dist/src/profiles/code_focus.js +37 -0
  77. package/dist/src/profiles/debug_intensive.js +59 -0
  78. package/dist/src/profiles/full.js +37 -0
  79. package/dist/src/profiles/minimal.js +37 -0
  80. package/dist/src/profiles/research_code.js +59 -0
  81. package/dist/src/profiles/research_power.js +37 -0
  82. package/dist/src/profiles/types.js +5 -0
  83. package/dist/src/profiles/workflow_builder.js +53 -0
  84. package/dist/src/prompt-engineer-lite.js +78 -0
  85. package/dist/src/prompt-engineer.js +399 -0
  86. package/dist/src/reasoning-chain.js +508 -0
  87. package/dist/src/sequential-thinking.js +291 -0
  88. package/dist/src/server-diagnostic.js +74 -0
  89. package/dist/src/server-raw.js +158 -0
  90. package/dist/src/server-simple.js +58 -0
  91. package/dist/src/server.js +514 -0
  92. package/dist/src/session/session-logger.js +617 -0
  93. package/dist/src/session/session-manager.js +571 -0
  94. package/dist/src/session/session-tools.js +400 -0
  95. package/dist/src/tools/advanced-modes.js +200 -0
  96. package/dist/src/tools/claude-integration.js +356 -0
  97. package/dist/src/tools/consolidated/ai-router.js +174 -0
  98. package/dist/src/tools/consolidated/ai-tool.js +48 -0
  99. package/dist/src/tools/consolidated/brainstorm-tool.js +87 -0
  100. package/dist/src/tools/consolidated/environment-detector.js +80 -0
  101. package/dist/src/tools/consolidated/index.js +50 -0
  102. package/dist/src/tools/consolidated/search-tool.js +110 -0
  103. package/dist/src/tools/consolidated/workflow-tool.js +238 -0
  104. package/dist/src/tools/gemini-tools.js +329 -0
  105. package/dist/src/tools/grok-enhanced.js +376 -0
  106. package/dist/src/tools/grok-tools.js +299 -0
  107. package/dist/src/tools/lmstudio-tools.js +223 -0
  108. package/dist/src/tools/openai-tools.js +498 -0
  109. package/dist/src/tools/openrouter-tools.js +317 -0
  110. package/dist/src/tools/optimized-wrapper.js +204 -0
  111. package/dist/src/tools/perplexity-tools.js +294 -0
  112. package/dist/src/tools/pingpong-tool.js +343 -0
  113. package/dist/src/tools/qwen-wrapper.js +74 -0
  114. package/dist/src/tools/tool-router.js +444 -0
  115. package/dist/src/tools/unified-ai-provider.js +260 -0
  116. package/dist/src/tools/workflow-runner.js +425 -0
  117. package/dist/src/tools/workflow-validator-tool.js +107 -0
  118. package/dist/src/types.js +23 -0
  119. package/dist/src/utils/input-validator.js +130 -0
  120. package/dist/src/utils/model-router.js +91 -0
  121. package/dist/src/utils/progress-stream.js +255 -0
  122. package/dist/src/utils/provider-router.js +88 -0
  123. package/dist/src/utils/smart-api-client.js +146 -0
  124. package/dist/src/utils/table-builder.js +218 -0
  125. package/dist/src/utils/timestamp-formatter.js +134 -0
  126. package/dist/src/utils/tool-compressor.js +257 -0
  127. package/dist/src/utils/tool-config.js +201 -0
  128. package/dist/src/validators/dependency-graph-validator.js +147 -0
  129. package/dist/src/validators/interpolation-validator.js +222 -0
  130. package/dist/src/validators/output-usage-validator.js +151 -0
  131. package/dist/src/validators/syntax-validator.js +102 -0
  132. package/dist/src/validators/tool-registry-validator.js +123 -0
  133. package/dist/src/validators/tool-types.js +97 -0
  134. package/dist/src/validators/types.js +8 -0
  135. package/dist/src/validators/workflow-validator.js +134 -0
  136. package/dist/src/visualizer-lite.js +42 -0
  137. package/dist/src/visualizer.js +179 -0
  138. package/dist/src/workflows/circuit-breaker.js +199 -0
  139. package/dist/src/workflows/custom-workflows.js +451 -0
  140. package/dist/src/workflows/engine/AutoSynthesizer.js +97 -0
  141. package/dist/src/workflows/engine/StepParameterResolver.js +74 -0
  142. package/dist/src/workflows/engine/VariableInterpolator.js +123 -0
  143. package/dist/src/workflows/engine/WorkflowDiscovery.js +125 -0
  144. package/dist/src/workflows/engine/WorkflowExecutionEngine.js +485 -0
  145. package/dist/src/workflows/engine/WorkflowExecutor.js +113 -0
  146. package/dist/src/workflows/engine/WorkflowFileManager.js +244 -0
  147. package/dist/src/workflows/engine/WorkflowHelpers.js +114 -0
  148. package/dist/src/workflows/engine/WorkflowOutputFormatter.js +83 -0
  149. package/dist/src/workflows/engine/events/WorkflowEventBus.js +132 -0
  150. package/dist/src/workflows/engine/events/interfaces/IEventBus.js +5 -0
  151. package/dist/src/workflows/engine/handlers/ErrorRecoveryHandler.js +162 -0
  152. package/dist/src/workflows/engine/handlers/PromptEnhancementHandler.js +115 -0
  153. package/dist/src/workflows/engine/handlers/SessionPersistenceHandler.js +167 -0
  154. package/dist/src/workflows/engine/handlers/StepExecutionHandler.js +231 -0
  155. package/dist/src/workflows/engine/handlers/ToolInvocationHandler.js +46 -0
  156. package/dist/src/workflows/engine/interfaces/IAutoSynthesizer.js +5 -0
  157. package/dist/src/workflows/engine/interfaces/IStepParameterResolver.js +5 -0
  158. package/dist/src/workflows/engine/interfaces/IVariableInterpolator.js +5 -0
  159. package/dist/src/workflows/engine/interfaces/IWorkflowDiscovery.js +4 -0
  160. package/dist/src/workflows/engine/interfaces/IWorkflowFileManager.js +5 -0
  161. package/dist/src/workflows/engine/interfaces/IWorkflowOutputFormatter.js +5 -0
  162. package/dist/src/workflows/engine/state/WorkflowStateMachine.js +194 -0
  163. package/dist/src/workflows/engine/state/interfaces/IStateMachine.js +17 -0
  164. package/dist/src/workflows/fallback-strategies.js +373 -0
  165. package/dist/src/workflows/message-queue.js +455 -0
  166. package/dist/src/workflows/model-router.js +189 -0
  167. package/dist/src/workflows/orchestrator-examples.js +174 -0
  168. package/dist/src/workflows/orchestrator-integration.js +200 -0
  169. package/dist/src/workflows/self-healing.js +524 -0
  170. package/dist/src/workflows/tool-mapper.js +407 -0
  171. package/dist/src/workflows/tool-orchestrator.js +796 -0
  172. package/dist/src/workflows/workflow-engine.js +573 -0
  173. package/dist/src/workflows/workflow-parser.js +283 -0
  174. package/dist/src/workflows/workflow-types.js +95 -0
  175. package/dist/src/workflows.js +568 -0
  176. package/dist/test-workflow-file-output.js +93 -0
  177. package/docs/API_KEYS.md +570 -0
  178. package/docs/CLAUDE_CODE_SETUP.md +181 -0
  179. package/docs/CLAUDE_DESKTOP_MANUAL.md +127 -0
  180. package/docs/CONFIGURATION.md +745 -0
  181. package/docs/FOCUS_MODES.md +240 -0
  182. package/docs/INSTALLATION_BOTH.md +145 -0
  183. package/docs/TERMS.md +352 -0
  184. package/docs/TOOLS_REFERENCE.md +1622 -0
  185. package/docs/TOOL_PARAMETERS.md +496 -0
  186. package/docs/TOOL_PROFILES.md +236 -0
  187. package/docs/WORKFLOWS.md +987 -0
  188. package/docs/WORKFLOW_OUTPUT.md +198 -0
  189. package/docs/WORKFLOW_PROGRESS_TRACKING.md +305 -0
  190. package/docs/workflows/design-brainstorm.md +335 -0
  191. package/package.json +97 -0
  192. package/profiles/balanced.json +37 -0
  193. package/profiles/code_focus.json +37 -0
  194. package/profiles/debug_intensive.json +34 -0
  195. package/profiles/full.json +37 -0
  196. package/profiles/minimal.json +37 -0
  197. package/profiles/research_power.json +37 -0
  198. package/profiles/workflow_builder.json +37 -0
  199. package/smithery.yaml +66 -0
  200. package/start.sh +8 -0
  201. package/tools.config.json +81 -0
  202. package/tsconfig.json +18 -0
  203. package/workflows/accessibility-code-audit.yaml +92 -0
  204. package/workflows/code-architecture-review.yaml +202 -0
  205. package/workflows/code-review.yaml +142 -0
  206. package/workflows/core/iterative-problem-solver.yaml +283 -0
  207. package/workflows/creative-brainstorm-yaml.yaml +215 -0
  208. package/workflows/pingpong.yaml +141 -0
  209. package/workflows/system/README.md +412 -0
  210. package/workflows/system/challenger.yaml +175 -0
  211. package/workflows/system/scout.yaml +164 -0
  212. package/workflows/system/verifier.yaml +133 -0
  213. package/workflows/ultra-creative-brainstorm.yaml +318 -0
  214. package/workflows/ux-research-flow.yaml +92 -0
@@ -0,0 +1,264 @@
1
+ /**
2
+ * Smart Model Router - Optimized model selection based on query complexity
3
+ * Part of Phase 1A implementation for cost optimization
4
+ */
5
+ export var ModelTier;
6
+ (function (ModelTier) {
7
+ // Tier 0: NEW! GPT-5 Nano - CHEAPEST OPTION
8
+ ModelTier["ULTRA_CHEAP"] = "gpt-5-nano";
9
+ // Tier 1: Ultra Fast & Cheap (< $0.001 per request)
10
+ ModelTier["ULTRA_EFFICIENT"] = "gemini-2.5-flash";
11
+ ModelTier["EFFICIENT"] = "gpt-5-mini";
12
+ // Tier 2: Balanced ($0.001-$0.01 per request)
13
+ ModelTier["STANDARD"] = "gpt-5";
14
+ ModelTier["GPT5_MINI"] = "gpt-5-mini";
15
+ // Tier 3: Advanced ($0.01-$0.05 per request)
16
+ ModelTier["WEB_SEARCH"] = "perplexity-sonar-pro";
17
+ // Tier 4: Premium (Use with caution)
18
+ ModelTier["GPT5_FULL"] = "gpt-5";
19
+ })(ModelTier || (ModelTier = {}));
20
+ const MODEL_COSTS = {
21
+ // GPT-5 Models (Nov 2025 pricing)
22
+ "gpt-5-nano": { input: 0.00005, output: 0.0004, latency: 400 }, // CHEAPEST!
23
+ "gpt-5-mini": { input: 0.00025, output: 0.002, latency: 800 },
24
+ "gpt-5": { input: 0.00125, output: 0.01, latency: 2000 },
25
+ // Existing models
26
+ "gemini-2.5-flash": { input: 0.000075, output: 0.0003, latency: 500 },
27
+ "gemini-2.5-pro": { input: 0.00015, output: 0.0006, latency: 1000 },
28
+ qwencoder: { input: 0.00015, output: 0.0006, latency: 1000 },
29
+ "perplexity-sonar-pro": { input: 0.006, output: 0.006, latency: 2000 },
30
+ };
31
+ export class SmartModelRouter {
32
+ constructor() {
33
+ this.complexityCache = new Map();
34
+ }
35
+ /**
36
+ * Analyze query to determine complexity
37
+ */
38
+ analyzeComplexity(query) {
39
+ // Check cache first
40
+ const cached = this.complexityCache.get(query);
41
+ if (cached)
42
+ return cached;
43
+ const wordCount = query.split(/\s+/).length;
44
+ const hasCode = /```|function|class|import|const|let|var/.test(query);
45
+ const hasMultiStep = /step|first|then|after|finally|additionally/.test(query.toLowerCase());
46
+ const hasMath = /calculate|solve|equation|formula/.test(query.toLowerCase());
47
+ const hasAnalysis = /analyze|compare|evaluate|assess/.test(query.toLowerCase());
48
+ let complexity = "simple";
49
+ if (wordCount > 100 || hasMultiStep || hasMath) {
50
+ complexity = "complex";
51
+ }
52
+ else if (wordCount > 30 || hasCode || hasAnalysis) {
53
+ complexity = "moderate";
54
+ }
55
+ // Cache for reuse
56
+ this.complexityCache.set(query, complexity);
57
+ return complexity;
58
+ }
59
+ /**
60
+ * Detect query type from content
61
+ */
62
+ detectQueryType(query) {
63
+ const lowerQuery = query.toLowerCase();
64
+ if (/```|function|class|implement|debug|fix/.test(query)) {
65
+ return "code";
66
+ }
67
+ if (/research|find|search|what is|explain/.test(lowerQuery)) {
68
+ return "research";
69
+ }
70
+ if (/analyze|compare|evaluate|assess/.test(lowerQuery)) {
71
+ return "analysis";
72
+ }
73
+ if (/think|reason|solve|calculate|prove/.test(lowerQuery)) {
74
+ return "reasoning";
75
+ }
76
+ return "chat";
77
+ }
78
+ /**
79
+ * Build context from query
80
+ */
81
+ buildContext(query, overrides) {
82
+ return {
83
+ query,
84
+ complexity: this.analyzeComplexity(query),
85
+ type: this.detectQueryType(query),
86
+ requiresWeb: /current|latest|today|news|http|www/.test(query),
87
+ requiresReasoning: /why|how|solve|prove|calculate/.test(query.toLowerCase()),
88
+ urgency: "normal",
89
+ costSensitive: true, // Default to cost-sensitive
90
+ ...overrides,
91
+ };
92
+ }
93
+ /**
94
+ * Select optimal model based on context
95
+ */
96
+ selectModel(context) {
97
+ // Rule 1: Simple queries → GPT-5 Nano (CHEAPEST!)
98
+ if (context.complexity === "simple" && context.costSensitive !== false) {
99
+ // Check if GPT-5 is enabled
100
+ const gpt5Enabled = process.env.ENABLE_GPT5 !== "false";
101
+ if (gpt5Enabled) {
102
+ return {
103
+ primary: ModelTier.ULTRA_CHEAP, // gpt-5-nano
104
+ fallback: ModelTier.ULTRA_EFFICIENT, // gemini-2.5-flash
105
+ estimatedCost: 0.000008, // Even cheaper than gemini!
106
+ estimatedLatency: 400,
107
+ requiresConfirmation: false,
108
+ reasoning: "Simple query - using GPT-5 Nano (cheapest option)",
109
+ };
110
+ }
111
+ else {
112
+ return {
113
+ primary: ModelTier.ULTRA_EFFICIENT, // gemini-2.5-flash
114
+ fallback: ModelTier.EFFICIENT,
115
+ estimatedCost: 0.00001,
116
+ estimatedLatency: 500,
117
+ requiresConfirmation: false,
118
+ reasoning: "Simple query - using Gemini Flash (GPT-5 disabled)",
119
+ };
120
+ }
121
+ }
122
+ // Rule 2: Web search required → Perplexity
123
+ if (context.requiresWeb) {
124
+ return {
125
+ primary: ModelTier.WEB_SEARCH,
126
+ fallback: ModelTier.STANDARD,
127
+ estimatedCost: 0.006,
128
+ estimatedLatency: 2000,
129
+ requiresConfirmation: false,
130
+ reasoning: "Web search required - using Perplexity",
131
+ };
132
+ }
133
+ // Rule 3: Code generation → GPT-5 (proven best)
134
+ if (context.type === "code") {
135
+ if (context.complexity === "complex") {
136
+ return {
137
+ primary: ModelTier.STANDARD,
138
+ fallback: ModelTier.EFFICIENT,
139
+ estimatedCost: 0.005,
140
+ estimatedLatency: 1500,
141
+ requiresConfirmation: false,
142
+ reasoning: "Complex code generation - using GPT-5",
143
+ };
144
+ }
145
+ return {
146
+ primary: ModelTier.EFFICIENT,
147
+ fallback: ModelTier.ULTRA_EFFICIENT,
148
+ estimatedCost: 0.0003,
149
+ estimatedLatency: 1000,
150
+ requiresConfirmation: false,
151
+ reasoning: "Simple code task - using GPT-5-mini",
152
+ };
153
+ }
154
+ // Rule 4: Complex reasoning → GPT-5 models
155
+ if (context.requiresReasoning && context.complexity !== "simple") {
156
+ if (context.urgency === "fast" || context.costSensitive) {
157
+ return {
158
+ primary: ModelTier.EFFICIENT,
159
+ fallback: ModelTier.STANDARD,
160
+ estimatedCost: 0.003,
161
+ estimatedLatency: 2000,
162
+ requiresConfirmation: false,
163
+ reasoning: "Reasoning required - using gpt-5-mini for speed/cost",
164
+ };
165
+ }
166
+ if (context.complexity === "complex") {
167
+ return {
168
+ primary: ModelTier.STANDARD,
169
+ fallback: ModelTier.EFFICIENT,
170
+ estimatedCost: 0.015,
171
+ estimatedLatency: 3000,
172
+ requiresConfirmation: true, // Expensive
173
+ reasoning: "Complex reasoning - using gpt-5 (requires confirmation)",
174
+ };
175
+ }
176
+ }
177
+ // Rule 5: Moderate complexity → Balanced
178
+ if (context.complexity === "moderate") {
179
+ return {
180
+ primary: ModelTier.EFFICIENT,
181
+ fallback: ModelTier.ULTRA_EFFICIENT,
182
+ estimatedCost: 0.0003,
183
+ estimatedLatency: 1000,
184
+ requiresConfirmation: false,
185
+ reasoning: "Moderate complexity - using GPT-5-mini",
186
+ };
187
+ }
188
+ // Default: Cost-efficient
189
+ return {
190
+ primary: ModelTier.EFFICIENT,
191
+ fallback: ModelTier.ULTRA_EFFICIENT,
192
+ estimatedCost: 0.00015,
193
+ estimatedLatency: 1000,
194
+ requiresConfirmation: false,
195
+ reasoning: "Default selection - using cost-efficient model",
196
+ };
197
+ }
198
+ /**
199
+ * Estimate cost for a request
200
+ */
201
+ estimateCost(model, estimatedTokens) {
202
+ const costs = MODEL_COSTS[model];
203
+ if (!costs)
204
+ return 0.001; // Default estimate
205
+ // Assume 40% input, 60% output for typical usage
206
+ const inputTokens = estimatedTokens * 0.4;
207
+ const outputTokens = estimatedTokens * 0.6;
208
+ return (inputTokens * costs.input + outputTokens * costs.output) / 1000;
209
+ }
210
+ /**
211
+ * Get model recommendations for a query
212
+ */
213
+ getRecommendations(query) {
214
+ const context = this.buildContext(query);
215
+ const optimal = this.selectModel(context);
216
+ // Generate alternatives
217
+ const alternatives = [];
218
+ // Cheaper alternative
219
+ const cheaperContext = {
220
+ ...context,
221
+ costSensitive: true,
222
+ complexity: "simple",
223
+ };
224
+ const cheaper = this.selectModel(cheaperContext);
225
+ if (cheaper.primary !== optimal.primary) {
226
+ alternatives.push({ ...cheaper, reasoning: "Cheaper alternative" });
227
+ }
228
+ // Faster alternative
229
+ const fasterContext = { ...context, urgency: "fast" };
230
+ const faster = this.selectModel(fasterContext);
231
+ if (faster.primary !== optimal.primary &&
232
+ faster.primary !== cheaper.primary) {
233
+ alternatives.push({ ...faster, reasoning: "Faster alternative" });
234
+ }
235
+ // Higher quality alternative
236
+ const qualityContext = {
237
+ ...context,
238
+ costSensitive: false,
239
+ complexity: "complex",
240
+ };
241
+ const quality = this.selectModel(qualityContext);
242
+ if (quality.primary !== optimal.primary) {
243
+ alternatives.push({
244
+ ...quality,
245
+ reasoning: "Higher quality alternative",
246
+ });
247
+ }
248
+ return {
249
+ optimal,
250
+ alternatives: alternatives.slice(0, 3), // Max 3 alternatives
251
+ context,
252
+ };
253
+ }
254
+ /**
255
+ * Clear complexity cache (call periodically to prevent memory growth)
256
+ */
257
+ clearCache() {
258
+ if (this.complexityCache.size > 1000) {
259
+ this.complexityCache.clear();
260
+ }
261
+ }
262
+ }
263
+ // Export singleton instance
264
+ export const modelRouter = new SmartModelRouter();
@@ -0,0 +1,114 @@
1
+ import * as crypto from 'crypto';
2
+ export class ResultCache {
3
+ constructor(maxSize, ttl) {
4
+ this.cache = new Map();
5
+ this.maxSize = 1000;
6
+ this.ttl = 3600000; // 1 hour default
7
+ this.stats = {
8
+ hits: 0,
9
+ misses: 0,
10
+ evictions: 0
11
+ };
12
+ if (maxSize)
13
+ this.maxSize = maxSize;
14
+ if (ttl)
15
+ this.ttl = ttl;
16
+ // Periodic cleanup
17
+ setInterval(() => this.cleanup(), 60000); // Every minute
18
+ }
19
+ async get(key) {
20
+ const hash = this.hashKey(key);
21
+ const entry = this.cache.get(hash);
22
+ if (!entry) {
23
+ this.stats.misses++;
24
+ return null;
25
+ }
26
+ const age = Date.now() - entry.timestamp;
27
+ if (age > this.ttl) {
28
+ this.cache.delete(hash);
29
+ this.stats.misses++;
30
+ return null;
31
+ }
32
+ entry.hits++;
33
+ entry.lastAccessed = Date.now();
34
+ this.stats.hits++;
35
+ return entry.result;
36
+ }
37
+ async set(key, value) {
38
+ const hash = this.hashKey(key);
39
+ if (this.cache.size >= this.maxSize) {
40
+ this.evictLRU();
41
+ }
42
+ this.cache.set(hash, {
43
+ key: hash,
44
+ result: value.result,
45
+ tokens: value.tokens || 0,
46
+ timestamp: value.timestamp || Date.now(),
47
+ hits: 0,
48
+ lastAccessed: Date.now()
49
+ });
50
+ }
51
+ getStats() {
52
+ const totalHits = this.stats.hits;
53
+ const totalMisses = this.stats.misses;
54
+ const hitRate = totalHits / (totalHits + totalMisses) || 0;
55
+ let tokensSaved = 0;
56
+ let memorySaved = 0;
57
+ for (const entry of this.cache.values()) {
58
+ if (entry.hits > 0) {
59
+ tokensSaved += entry.tokens * entry.hits;
60
+ memorySaved += JSON.stringify(entry.result).length * entry.hits;
61
+ }
62
+ }
63
+ return {
64
+ totalEntries: this.cache.size,
65
+ totalHits,
66
+ totalMisses,
67
+ hitRate,
68
+ memorySaved,
69
+ tokensSaved
70
+ };
71
+ }
72
+ clear() {
73
+ this.cache.clear();
74
+ this.stats = { hits: 0, misses: 0, evictions: 0 };
75
+ }
76
+ hashKey(key) {
77
+ return crypto.createHash('sha256').update(key).digest('hex').substring(0, 16);
78
+ }
79
+ evictLRU() {
80
+ let oldest = null;
81
+ let oldestKey = null;
82
+ for (const [key, entry] of this.cache) {
83
+ if (!oldest || entry.lastAccessed < oldest.lastAccessed) {
84
+ oldest = entry;
85
+ oldestKey = key;
86
+ }
87
+ }
88
+ if (oldestKey) {
89
+ this.cache.delete(oldestKey);
90
+ this.stats.evictions++;
91
+ }
92
+ }
93
+ cleanup() {
94
+ const now = Date.now();
95
+ const keysToDelete = [];
96
+ for (const [key, entry] of this.cache) {
97
+ if (now - entry.timestamp > this.ttl) {
98
+ keysToDelete.push(key);
99
+ }
100
+ }
101
+ for (const key of keysToDelete) {
102
+ this.cache.delete(key);
103
+ }
104
+ }
105
+ setTTL(ttl) {
106
+ this.ttl = ttl;
107
+ }
108
+ setMaxSize(size) {
109
+ this.maxSize = size;
110
+ while (this.cache.size > this.maxSize) {
111
+ this.evictLRU();
112
+ }
113
+ }
114
+ }
@@ -0,0 +1,257 @@
1
+ /**
2
+ * Token Optimizer - Caching, compression, and batching for token efficiency
3
+ * Part of Phase 1B implementation for 70% cost reduction
4
+ */
5
+ import { LRUCache } from 'lru-cache';
6
+ import crypto from 'crypto';
7
+ export class TokenOptimizer {
8
+ constructor() {
9
+ // Batch queue for request batching
10
+ this.batchQueue = [];
11
+ this.batchTimer = null;
12
+ // Configuration
13
+ this.config = {
14
+ cacheMaxSize: 100 * 1024 * 1024, // 100MB
15
+ cacheTTL: 60 * 60 * 1000, // 1 hour
16
+ batchWindow: 100, // ms to wait for batch
17
+ maxBatchSize: 10, // max requests per batch
18
+ compressionThreshold: 2000, // characters
19
+ compressionModel: 'gemini-2.5-flash', // Ultra cheap for compression
20
+ };
21
+ // Metrics
22
+ this.metrics = {
23
+ cacheHits: 0,
24
+ cacheMisses: 0,
25
+ tokensCompressed: 0,
26
+ tokensSaved: 0,
27
+ batchesProcessed: 0,
28
+ };
29
+ this.responseCache = new LRUCache({
30
+ max: 1000, // Max 1000 entries
31
+ ttl: this.config.cacheTTL,
32
+ updateAgeOnGet: true,
33
+ updateAgeOnHas: true,
34
+ });
35
+ }
36
+ /**
37
+ * Generate cache key for a request
38
+ */
39
+ generateCacheKey(request) {
40
+ if (request.cacheKey)
41
+ return request.cacheKey;
42
+ const keyData = {
43
+ prompt: request.prompt,
44
+ model: request.model,
45
+ temperature: request.temperature || 0.7,
46
+ };
47
+ return crypto
48
+ .createHash('sha256')
49
+ .update(JSON.stringify(keyData))
50
+ .digest('hex');
51
+ }
52
+ /**
53
+ * Check cache for existing response
54
+ */
55
+ checkCache(request) {
56
+ const key = this.generateCacheKey(request);
57
+ const cached = this.responseCache.get(key);
58
+ if (cached) {
59
+ this.metrics.cacheHits++;
60
+ cached.hits++;
61
+ return cached;
62
+ }
63
+ this.metrics.cacheMisses++;
64
+ return null;
65
+ }
66
+ /**
67
+ * Compress prompt using cheap model (simulated for now)
68
+ */
69
+ async compressPrompt(prompt) {
70
+ // In production, this would call the actual compression model
71
+ // For now, we'll simulate compression by removing redundancy
72
+ if (prompt.length < this.config.compressionThreshold) {
73
+ return prompt;
74
+ }
75
+ // Simulated compression techniques:
76
+ // 1. Remove excessive whitespace
77
+ let compressed = prompt.replace(/\s+/g, ' ').trim();
78
+ // 2. Remove duplicate sentences
79
+ const sentences = compressed.split(/[.!?]+/);
80
+ const uniqueSentences = [...new Set(sentences)];
81
+ compressed = uniqueSentences.join('. ');
82
+ // 3. Remove filler words (carefully)
83
+ const fillerWords = /\b(very|really|actually|basically|literally|just)\b/gi;
84
+ compressed = compressed.replace(fillerWords, '');
85
+ // 4. Compress common patterns
86
+ compressed = compressed
87
+ .replace(/in order to/gi, 'to')
88
+ .replace(/as well as/gi, 'and')
89
+ .replace(/at this point in time/gi, 'now')
90
+ .replace(/due to the fact that/gi, 'because');
91
+ // Track compression
92
+ this.metrics.tokensCompressed += prompt.length;
93
+ this.metrics.tokensSaved += prompt.length - compressed.length;
94
+ return compressed;
95
+ }
96
+ /**
97
+ * Optimize a single request
98
+ */
99
+ async optimizeSingle(request) {
100
+ // ⚠️ CACHE DISABLED - Caching removed for simplicity at MCP scale
101
+ // Cache was causing bugs (storing inputs instead of outputs) and maintenance overhead
102
+ // not justified for 10-50 runs/day. See: ultrathinking analysis 2025-10-18
103
+ // // 1. Check cache first (DISABLED)
104
+ // const cached = this.checkCache(request);
105
+ // if (cached) {
106
+ // return {
107
+ // ...request,
108
+ // prompt: cached.response,
109
+ // optimized: true,
110
+ // fromCache: true,
111
+ // };
112
+ // }
113
+ // 2. Compress if needed
114
+ const originalLength = request.prompt.length;
115
+ let optimizedPrompt = request.prompt;
116
+ if (originalLength > this.config.compressionThreshold) {
117
+ optimizedPrompt = await this.compressPrompt(request.prompt);
118
+ }
119
+ return {
120
+ ...request,
121
+ prompt: optimizedPrompt,
122
+ optimized: true,
123
+ compressed: optimizedPrompt !== request.prompt,
124
+ originalLength,
125
+ compressedLength: optimizedPrompt.length,
126
+ };
127
+ }
128
+ /**
129
+ * Add request to batch queue
130
+ */
131
+ async addToBatch(request) {
132
+ return new Promise((resolve, reject) => {
133
+ this.batchQueue.push({ request, resolve, reject });
134
+ // Process immediately if batch is full
135
+ if (this.batchQueue.length >= this.config.maxBatchSize) {
136
+ this.processBatch();
137
+ }
138
+ else {
139
+ // Otherwise wait for batch window
140
+ if (!this.batchTimer) {
141
+ this.batchTimer = setTimeout(() => {
142
+ this.processBatch();
143
+ }, this.config.batchWindow);
144
+ }
145
+ }
146
+ });
147
+ }
148
+ /**
149
+ * Process batched requests
150
+ */
151
+ async processBatch() {
152
+ if (this.batchQueue.length === 0)
153
+ return;
154
+ // Clear timer
155
+ if (this.batchTimer) {
156
+ clearTimeout(this.batchTimer);
157
+ this.batchTimer = null;
158
+ }
159
+ // Extract batch
160
+ const batch = this.batchQueue.splice(0, this.config.maxBatchSize);
161
+ const batchId = crypto.randomBytes(8).toString('hex');
162
+ this.metrics.batchesProcessed++;
163
+ // Process each request in batch
164
+ for (const item of batch) {
165
+ try {
166
+ const optimized = await this.optimizeSingle(item.request);
167
+ item.resolve({
168
+ ...optimized,
169
+ batchId,
170
+ });
171
+ }
172
+ catch (error) {
173
+ item.reject(error);
174
+ }
175
+ }
176
+ }
177
+ /**
178
+ * Main optimization entry point
179
+ */
180
+ async optimize(request) {
181
+ // Check if request can be batched
182
+ if (request.canBatch && !this.checkCache(request)) {
183
+ return this.addToBatch(request);
184
+ }
185
+ // Otherwise optimize individually
186
+ return this.optimizeSingle(request);
187
+ }
188
+ /**
189
+ * Get optimization metrics
190
+ */
191
+ getMetrics() {
192
+ const cacheHitRate = this.metrics.cacheHits / (this.metrics.cacheHits + this.metrics.cacheMisses) || 0;
193
+ const compressionRatio = this.metrics.tokensSaved / this.metrics.tokensCompressed || 0;
194
+ const recommendations = [];
195
+ if (cacheHitRate < 0.3) {
196
+ recommendations.push(`⚠️ Low cache hit rate (${(cacheHitRate * 100).toFixed(1)}%). Consider caching more aggressively.`);
197
+ }
198
+ if (compressionRatio < 0.2) {
199
+ recommendations.push('💡 Compression ratio is low. Consider more aggressive compression.');
200
+ }
201
+ if (this.metrics.batchesProcessed < 10) {
202
+ recommendations.push('📊 Low batch usage. Enable batching for parallel requests.');
203
+ }
204
+ if (cacheHitRate > 0.7) {
205
+ recommendations.push('✅ Excellent cache performance!');
206
+ }
207
+ return {
208
+ cacheHitRate,
209
+ compressionRatio,
210
+ totalSaved: this.metrics.tokensSaved,
211
+ batchesProcessed: this.metrics.batchesProcessed,
212
+ recommendations,
213
+ };
214
+ }
215
+ /**
216
+ * Clear cache if it gets too large
217
+ */
218
+ maintainCache() {
219
+ if (this.responseCache.size > 900) {
220
+ // Keep 80% of most recently used
221
+ const toKeep = Math.floor(this.responseCache.size * 0.8);
222
+ while (this.responseCache.size > toKeep) {
223
+ const oldestKey = this.responseCache.keys().next().value;
224
+ if (oldestKey) {
225
+ this.responseCache.delete(oldestKey);
226
+ }
227
+ else {
228
+ break;
229
+ }
230
+ }
231
+ }
232
+ }
233
+ /**
234
+ * Reset all metrics
235
+ */
236
+ resetMetrics() {
237
+ this.metrics = {
238
+ cacheHits: 0,
239
+ cacheMisses: 0,
240
+ tokensCompressed: 0,
241
+ tokensSaved: 0,
242
+ batchesProcessed: 0,
243
+ };
244
+ }
245
+ /**
246
+ * Shutdown optimizer (cleanup)
247
+ */
248
+ shutdown() {
249
+ if (this.batchTimer) {
250
+ clearTimeout(this.batchTimer);
251
+ this.batchTimer = null;
252
+ }
253
+ this.processBatch(); // Process any remaining batched requests
254
+ }
255
+ }
256
+ // Export singleton instance
257
+ export const tokenOptimizer = new TokenOptimizer();