tachibot-mcp 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +260 -0
- package/CHANGELOG.md +54 -0
- package/CODE_OF_CONDUCT.md +56 -0
- package/CONTRIBUTING.md +54 -0
- package/Dockerfile +36 -0
- package/LICENSE +644 -0
- package/README.md +201 -0
- package/SECURITY.md +95 -0
- package/dist/personality/komaai-expressions.js +12 -0
- package/dist/profiles/balanced.json +33 -0
- package/dist/profiles/code_focus.json +33 -0
- package/dist/profiles/full.json +33 -0
- package/dist/profiles/minimal.json +33 -0
- package/dist/profiles/research_power.json +33 -0
- package/dist/scripts/build-profiles.js +46 -0
- package/dist/src/application/services/focus/FocusModeRegistry.js +46 -0
- package/dist/src/application/services/focus/FocusTool.service.js +109 -0
- package/dist/src/application/services/focus/ModeRegistry.js +46 -0
- package/dist/src/application/services/focus/modes/focus-deep.mode.js +27 -0
- package/dist/src/application/services/focus/modes/status.mode.js +50 -0
- package/dist/src/application/services/focus/modes/tachibot-status.mode.js +50 -0
- package/dist/src/collaborative-orchestrator.js +391 -0
- package/dist/src/config/model-constants.js +188 -0
- package/dist/src/config/model-defaults.js +57 -0
- package/dist/src/config/model-preferences.js +382 -0
- package/dist/src/config/timeout-config.js +130 -0
- package/dist/src/config.js +173 -0
- package/dist/src/domain/interfaces/IFocusMode.js +5 -0
- package/dist/src/domain/interfaces/IProvider.js +6 -0
- package/dist/src/domain/interfaces/ITool.js +5 -0
- package/dist/src/focus-deep.js +245 -0
- package/dist/src/infrastructure/ascii/art/robots.ascii.js +16 -0
- package/dist/src/mcp-client.js +90 -0
- package/dist/src/memory/index.js +17 -0
- package/dist/src/memory/memory-config.js +135 -0
- package/dist/src/memory/memory-interface.js +174 -0
- package/dist/src/memory/memory-manager.js +383 -0
- package/dist/src/memory/providers/devlog-provider.js +385 -0
- package/dist/src/memory/providers/hybrid-provider.js +399 -0
- package/dist/src/memory/providers/local-provider.js +388 -0
- package/dist/src/memory/providers/mem0-provider.js +337 -0
- package/dist/src/modes/architect.js +477 -0
- package/dist/src/modes/auditor.js +362 -0
- package/dist/src/modes/challenger.js +841 -0
- package/dist/src/modes/code-reviewer.js +382 -0
- package/dist/src/modes/commit-guardian.js +424 -0
- package/dist/src/modes/documentation-writer.js +572 -0
- package/dist/src/modes/scout.js +587 -0
- package/dist/src/modes/shared/helpers/challenger-helpers.js +454 -0
- package/dist/src/modes/shared/helpers/index.js +17 -0
- package/dist/src/modes/shared/helpers/scout-helpers.js +270 -0
- package/dist/src/modes/shared/helpers/verifier-helpers.js +332 -0
- package/dist/src/modes/test-architect.js +767 -0
- package/dist/src/modes/verifier.js +378 -0
- package/dist/src/monitoring/performance-monitor.js +435 -0
- package/dist/src/optimization/batch-executor.js +121 -0
- package/dist/src/optimization/context-pruner.js +196 -0
- package/dist/src/optimization/cost-monitor.js +338 -0
- package/dist/src/optimization/index.js +65 -0
- package/dist/src/optimization/model-router.js +264 -0
- package/dist/src/optimization/result-cache.js +114 -0
- package/dist/src/optimization/token-optimizer.js +257 -0
- package/dist/src/optimization/token-tracker.js +118 -0
- package/dist/src/orchestrator-instructions.js +128 -0
- package/dist/src/orchestrator-lite.js +139 -0
- package/dist/src/orchestrator.js +191 -0
- package/dist/src/orchestrators/collaborative/interfaces/IToolExecutionEngine.js +1 -0
- package/dist/src/orchestrators/collaborative/interfaces/IToolExecutionStrategy.js +5 -0
- package/dist/src/orchestrators/collaborative/interfaces/IVisualizationRenderer.js +1 -0
- package/dist/src/orchestrators/collaborative/registries/ModelProviderRegistry.js +95 -0
- package/dist/src/orchestrators/collaborative/registries/ToolAdapterRegistry.js +64 -0
- package/dist/src/orchestrators/collaborative/services/tool-execution/ToolExecutionService.js +502 -0
- package/dist/src/orchestrators/collaborative/services/visualization/VisualizationService.js +206 -0
- package/dist/src/orchestrators/collaborative/types/session-types.js +5 -0
- package/dist/src/profiles/balanced.js +37 -0
- package/dist/src/profiles/code_focus.js +37 -0
- package/dist/src/profiles/debug_intensive.js +59 -0
- package/dist/src/profiles/full.js +37 -0
- package/dist/src/profiles/minimal.js +37 -0
- package/dist/src/profiles/research_code.js +59 -0
- package/dist/src/profiles/research_power.js +37 -0
- package/dist/src/profiles/types.js +5 -0
- package/dist/src/profiles/workflow_builder.js +53 -0
- package/dist/src/prompt-engineer-lite.js +78 -0
- package/dist/src/prompt-engineer.js +399 -0
- package/dist/src/reasoning-chain.js +508 -0
- package/dist/src/sequential-thinking.js +291 -0
- package/dist/src/server-diagnostic.js +74 -0
- package/dist/src/server-raw.js +158 -0
- package/dist/src/server-simple.js +58 -0
- package/dist/src/server.js +514 -0
- package/dist/src/session/session-logger.js +617 -0
- package/dist/src/session/session-manager.js +571 -0
- package/dist/src/session/session-tools.js +400 -0
- package/dist/src/tools/advanced-modes.js +200 -0
- package/dist/src/tools/claude-integration.js +356 -0
- package/dist/src/tools/consolidated/ai-router.js +174 -0
- package/dist/src/tools/consolidated/ai-tool.js +48 -0
- package/dist/src/tools/consolidated/brainstorm-tool.js +87 -0
- package/dist/src/tools/consolidated/environment-detector.js +80 -0
- package/dist/src/tools/consolidated/index.js +50 -0
- package/dist/src/tools/consolidated/search-tool.js +110 -0
- package/dist/src/tools/consolidated/workflow-tool.js +238 -0
- package/dist/src/tools/gemini-tools.js +329 -0
- package/dist/src/tools/grok-enhanced.js +376 -0
- package/dist/src/tools/grok-tools.js +299 -0
- package/dist/src/tools/lmstudio-tools.js +223 -0
- package/dist/src/tools/openai-tools.js +498 -0
- package/dist/src/tools/openrouter-tools.js +317 -0
- package/dist/src/tools/optimized-wrapper.js +204 -0
- package/dist/src/tools/perplexity-tools.js +294 -0
- package/dist/src/tools/pingpong-tool.js +343 -0
- package/dist/src/tools/qwen-wrapper.js +74 -0
- package/dist/src/tools/tool-router.js +444 -0
- package/dist/src/tools/unified-ai-provider.js +260 -0
- package/dist/src/tools/workflow-runner.js +425 -0
- package/dist/src/tools/workflow-validator-tool.js +107 -0
- package/dist/src/types.js +23 -0
- package/dist/src/utils/input-validator.js +130 -0
- package/dist/src/utils/model-router.js +91 -0
- package/dist/src/utils/progress-stream.js +255 -0
- package/dist/src/utils/provider-router.js +88 -0
- package/dist/src/utils/smart-api-client.js +146 -0
- package/dist/src/utils/table-builder.js +218 -0
- package/dist/src/utils/timestamp-formatter.js +134 -0
- package/dist/src/utils/tool-compressor.js +257 -0
- package/dist/src/utils/tool-config.js +201 -0
- package/dist/src/validators/dependency-graph-validator.js +147 -0
- package/dist/src/validators/interpolation-validator.js +222 -0
- package/dist/src/validators/output-usage-validator.js +151 -0
- package/dist/src/validators/syntax-validator.js +102 -0
- package/dist/src/validators/tool-registry-validator.js +123 -0
- package/dist/src/validators/tool-types.js +97 -0
- package/dist/src/validators/types.js +8 -0
- package/dist/src/validators/workflow-validator.js +134 -0
- package/dist/src/visualizer-lite.js +42 -0
- package/dist/src/visualizer.js +179 -0
- package/dist/src/workflows/circuit-breaker.js +199 -0
- package/dist/src/workflows/custom-workflows.js +451 -0
- package/dist/src/workflows/engine/AutoSynthesizer.js +97 -0
- package/dist/src/workflows/engine/StepParameterResolver.js +74 -0
- package/dist/src/workflows/engine/VariableInterpolator.js +123 -0
- package/dist/src/workflows/engine/WorkflowDiscovery.js +125 -0
- package/dist/src/workflows/engine/WorkflowExecutionEngine.js +485 -0
- package/dist/src/workflows/engine/WorkflowExecutor.js +113 -0
- package/dist/src/workflows/engine/WorkflowFileManager.js +244 -0
- package/dist/src/workflows/engine/WorkflowHelpers.js +114 -0
- package/dist/src/workflows/engine/WorkflowOutputFormatter.js +83 -0
- package/dist/src/workflows/engine/events/WorkflowEventBus.js +132 -0
- package/dist/src/workflows/engine/events/interfaces/IEventBus.js +5 -0
- package/dist/src/workflows/engine/handlers/ErrorRecoveryHandler.js +162 -0
- package/dist/src/workflows/engine/handlers/PromptEnhancementHandler.js +115 -0
- package/dist/src/workflows/engine/handlers/SessionPersistenceHandler.js +167 -0
- package/dist/src/workflows/engine/handlers/StepExecutionHandler.js +231 -0
- package/dist/src/workflows/engine/handlers/ToolInvocationHandler.js +46 -0
- package/dist/src/workflows/engine/interfaces/IAutoSynthesizer.js +5 -0
- package/dist/src/workflows/engine/interfaces/IStepParameterResolver.js +5 -0
- package/dist/src/workflows/engine/interfaces/IVariableInterpolator.js +5 -0
- package/dist/src/workflows/engine/interfaces/IWorkflowDiscovery.js +4 -0
- package/dist/src/workflows/engine/interfaces/IWorkflowFileManager.js +5 -0
- package/dist/src/workflows/engine/interfaces/IWorkflowOutputFormatter.js +5 -0
- package/dist/src/workflows/engine/state/WorkflowStateMachine.js +194 -0
- package/dist/src/workflows/engine/state/interfaces/IStateMachine.js +17 -0
- package/dist/src/workflows/fallback-strategies.js +373 -0
- package/dist/src/workflows/message-queue.js +455 -0
- package/dist/src/workflows/model-router.js +189 -0
- package/dist/src/workflows/orchestrator-examples.js +174 -0
- package/dist/src/workflows/orchestrator-integration.js +200 -0
- package/dist/src/workflows/self-healing.js +524 -0
- package/dist/src/workflows/tool-mapper.js +407 -0
- package/dist/src/workflows/tool-orchestrator.js +796 -0
- package/dist/src/workflows/workflow-engine.js +573 -0
- package/dist/src/workflows/workflow-parser.js +283 -0
- package/dist/src/workflows/workflow-types.js +95 -0
- package/dist/src/workflows.js +568 -0
- package/dist/test-workflow-file-output.js +93 -0
- package/docs/API_KEYS.md +570 -0
- package/docs/CLAUDE_CODE_SETUP.md +181 -0
- package/docs/CLAUDE_DESKTOP_MANUAL.md +127 -0
- package/docs/CONFIGURATION.md +745 -0
- package/docs/FOCUS_MODES.md +240 -0
- package/docs/INSTALLATION_BOTH.md +145 -0
- package/docs/TERMS.md +352 -0
- package/docs/TOOLS_REFERENCE.md +1622 -0
- package/docs/TOOL_PARAMETERS.md +496 -0
- package/docs/TOOL_PROFILES.md +236 -0
- package/docs/WORKFLOWS.md +987 -0
- package/docs/WORKFLOW_OUTPUT.md +198 -0
- package/docs/WORKFLOW_PROGRESS_TRACKING.md +305 -0
- package/docs/workflows/design-brainstorm.md +335 -0
- package/package.json +97 -0
- package/profiles/balanced.json +37 -0
- package/profiles/code_focus.json +37 -0
- package/profiles/debug_intensive.json +34 -0
- package/profiles/full.json +37 -0
- package/profiles/minimal.json +37 -0
- package/profiles/research_power.json +37 -0
- package/profiles/workflow_builder.json +37 -0
- package/smithery.yaml +66 -0
- package/start.sh +8 -0
- package/tools.config.json +81 -0
- package/tsconfig.json +18 -0
- package/workflows/accessibility-code-audit.yaml +92 -0
- package/workflows/code-architecture-review.yaml +202 -0
- package/workflows/code-review.yaml +142 -0
- package/workflows/core/iterative-problem-solver.yaml +283 -0
- package/workflows/creative-brainstorm-yaml.yaml +215 -0
- package/workflows/pingpong.yaml +141 -0
- package/workflows/system/README.md +412 -0
- package/workflows/system/challenger.yaml +175 -0
- package/workflows/system/scout.yaml +164 -0
- package/workflows/system/verifier.yaml +133 -0
- package/workflows/ultra-creative-brainstorm.yaml +318 -0
- package/workflows/ux-research-flow.yaml +92 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Model Router - Optimized model selection based on query complexity
|
|
3
|
+
* Part of Phase 1A implementation for cost optimization
|
|
4
|
+
*/
|
|
5
|
+
export var ModelTier;
|
|
6
|
+
(function (ModelTier) {
|
|
7
|
+
// Tier 0: NEW! GPT-5 Nano - CHEAPEST OPTION
|
|
8
|
+
ModelTier["ULTRA_CHEAP"] = "gpt-5-nano";
|
|
9
|
+
// Tier 1: Ultra Fast & Cheap (< $0.001 per request)
|
|
10
|
+
ModelTier["ULTRA_EFFICIENT"] = "gemini-2.5-flash";
|
|
11
|
+
ModelTier["EFFICIENT"] = "gpt-5-mini";
|
|
12
|
+
// Tier 2: Balanced ($0.001-$0.01 per request)
|
|
13
|
+
ModelTier["STANDARD"] = "gpt-5";
|
|
14
|
+
ModelTier["GPT5_MINI"] = "gpt-5-mini";
|
|
15
|
+
// Tier 3: Advanced ($0.01-$0.05 per request)
|
|
16
|
+
ModelTier["WEB_SEARCH"] = "perplexity-sonar-pro";
|
|
17
|
+
// Tier 4: Premium (Use with caution)
|
|
18
|
+
ModelTier["GPT5_FULL"] = "gpt-5";
|
|
19
|
+
})(ModelTier || (ModelTier = {}));
|
|
20
|
+
const MODEL_COSTS = {
|
|
21
|
+
// GPT-5 Models (Nov 2025 pricing)
|
|
22
|
+
"gpt-5-nano": { input: 0.00005, output: 0.0004, latency: 400 }, // CHEAPEST!
|
|
23
|
+
"gpt-5-mini": { input: 0.00025, output: 0.002, latency: 800 },
|
|
24
|
+
"gpt-5": { input: 0.00125, output: 0.01, latency: 2000 },
|
|
25
|
+
// Existing models
|
|
26
|
+
"gemini-2.5-flash": { input: 0.000075, output: 0.0003, latency: 500 },
|
|
27
|
+
"gemini-2.5-pro": { input: 0.00015, output: 0.0006, latency: 1000 },
|
|
28
|
+
qwencoder: { input: 0.00015, output: 0.0006, latency: 1000 },
|
|
29
|
+
"perplexity-sonar-pro": { input: 0.006, output: 0.006, latency: 2000 },
|
|
30
|
+
};
|
|
31
|
+
export class SmartModelRouter {
|
|
32
|
+
constructor() {
|
|
33
|
+
this.complexityCache = new Map();
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Analyze query to determine complexity
|
|
37
|
+
*/
|
|
38
|
+
analyzeComplexity(query) {
|
|
39
|
+
// Check cache first
|
|
40
|
+
const cached = this.complexityCache.get(query);
|
|
41
|
+
if (cached)
|
|
42
|
+
return cached;
|
|
43
|
+
const wordCount = query.split(/\s+/).length;
|
|
44
|
+
const hasCode = /```|function|class|import|const|let|var/.test(query);
|
|
45
|
+
const hasMultiStep = /step|first|then|after|finally|additionally/.test(query.toLowerCase());
|
|
46
|
+
const hasMath = /calculate|solve|equation|formula/.test(query.toLowerCase());
|
|
47
|
+
const hasAnalysis = /analyze|compare|evaluate|assess/.test(query.toLowerCase());
|
|
48
|
+
let complexity = "simple";
|
|
49
|
+
if (wordCount > 100 || hasMultiStep || hasMath) {
|
|
50
|
+
complexity = "complex";
|
|
51
|
+
}
|
|
52
|
+
else if (wordCount > 30 || hasCode || hasAnalysis) {
|
|
53
|
+
complexity = "moderate";
|
|
54
|
+
}
|
|
55
|
+
// Cache for reuse
|
|
56
|
+
this.complexityCache.set(query, complexity);
|
|
57
|
+
return complexity;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Detect query type from content
|
|
61
|
+
*/
|
|
62
|
+
detectQueryType(query) {
|
|
63
|
+
const lowerQuery = query.toLowerCase();
|
|
64
|
+
if (/```|function|class|implement|debug|fix/.test(query)) {
|
|
65
|
+
return "code";
|
|
66
|
+
}
|
|
67
|
+
if (/research|find|search|what is|explain/.test(lowerQuery)) {
|
|
68
|
+
return "research";
|
|
69
|
+
}
|
|
70
|
+
if (/analyze|compare|evaluate|assess/.test(lowerQuery)) {
|
|
71
|
+
return "analysis";
|
|
72
|
+
}
|
|
73
|
+
if (/think|reason|solve|calculate|prove/.test(lowerQuery)) {
|
|
74
|
+
return "reasoning";
|
|
75
|
+
}
|
|
76
|
+
return "chat";
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Build context from query
|
|
80
|
+
*/
|
|
81
|
+
buildContext(query, overrides) {
|
|
82
|
+
return {
|
|
83
|
+
query,
|
|
84
|
+
complexity: this.analyzeComplexity(query),
|
|
85
|
+
type: this.detectQueryType(query),
|
|
86
|
+
requiresWeb: /current|latest|today|news|http|www/.test(query),
|
|
87
|
+
requiresReasoning: /why|how|solve|prove|calculate/.test(query.toLowerCase()),
|
|
88
|
+
urgency: "normal",
|
|
89
|
+
costSensitive: true, // Default to cost-sensitive
|
|
90
|
+
...overrides,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Select optimal model based on context
|
|
95
|
+
*/
|
|
96
|
+
selectModel(context) {
|
|
97
|
+
// Rule 1: Simple queries → GPT-5 Nano (CHEAPEST!)
|
|
98
|
+
if (context.complexity === "simple" && context.costSensitive !== false) {
|
|
99
|
+
// Check if GPT-5 is enabled
|
|
100
|
+
const gpt5Enabled = process.env.ENABLE_GPT5 !== "false";
|
|
101
|
+
if (gpt5Enabled) {
|
|
102
|
+
return {
|
|
103
|
+
primary: ModelTier.ULTRA_CHEAP, // gpt-5-nano
|
|
104
|
+
fallback: ModelTier.ULTRA_EFFICIENT, // gemini-2.5-flash
|
|
105
|
+
estimatedCost: 0.000008, // Even cheaper than gemini!
|
|
106
|
+
estimatedLatency: 400,
|
|
107
|
+
requiresConfirmation: false,
|
|
108
|
+
reasoning: "Simple query - using GPT-5 Nano (cheapest option)",
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
return {
|
|
113
|
+
primary: ModelTier.ULTRA_EFFICIENT, // gemini-2.5-flash
|
|
114
|
+
fallback: ModelTier.EFFICIENT,
|
|
115
|
+
estimatedCost: 0.00001,
|
|
116
|
+
estimatedLatency: 500,
|
|
117
|
+
requiresConfirmation: false,
|
|
118
|
+
reasoning: "Simple query - using Gemini Flash (GPT-5 disabled)",
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
// Rule 2: Web search required → Perplexity
|
|
123
|
+
if (context.requiresWeb) {
|
|
124
|
+
return {
|
|
125
|
+
primary: ModelTier.WEB_SEARCH,
|
|
126
|
+
fallback: ModelTier.STANDARD,
|
|
127
|
+
estimatedCost: 0.006,
|
|
128
|
+
estimatedLatency: 2000,
|
|
129
|
+
requiresConfirmation: false,
|
|
130
|
+
reasoning: "Web search required - using Perplexity",
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
// Rule 3: Code generation → GPT-5 (proven best)
|
|
134
|
+
if (context.type === "code") {
|
|
135
|
+
if (context.complexity === "complex") {
|
|
136
|
+
return {
|
|
137
|
+
primary: ModelTier.STANDARD,
|
|
138
|
+
fallback: ModelTier.EFFICIENT,
|
|
139
|
+
estimatedCost: 0.005,
|
|
140
|
+
estimatedLatency: 1500,
|
|
141
|
+
requiresConfirmation: false,
|
|
142
|
+
reasoning: "Complex code generation - using GPT-5",
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
return {
|
|
146
|
+
primary: ModelTier.EFFICIENT,
|
|
147
|
+
fallback: ModelTier.ULTRA_EFFICIENT,
|
|
148
|
+
estimatedCost: 0.0003,
|
|
149
|
+
estimatedLatency: 1000,
|
|
150
|
+
requiresConfirmation: false,
|
|
151
|
+
reasoning: "Simple code task - using GPT-5-mini",
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
// Rule 4: Complex reasoning → GPT-5 models
|
|
155
|
+
if (context.requiresReasoning && context.complexity !== "simple") {
|
|
156
|
+
if (context.urgency === "fast" || context.costSensitive) {
|
|
157
|
+
return {
|
|
158
|
+
primary: ModelTier.EFFICIENT,
|
|
159
|
+
fallback: ModelTier.STANDARD,
|
|
160
|
+
estimatedCost: 0.003,
|
|
161
|
+
estimatedLatency: 2000,
|
|
162
|
+
requiresConfirmation: false,
|
|
163
|
+
reasoning: "Reasoning required - using gpt-5-mini for speed/cost",
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
if (context.complexity === "complex") {
|
|
167
|
+
return {
|
|
168
|
+
primary: ModelTier.STANDARD,
|
|
169
|
+
fallback: ModelTier.EFFICIENT,
|
|
170
|
+
estimatedCost: 0.015,
|
|
171
|
+
estimatedLatency: 3000,
|
|
172
|
+
requiresConfirmation: true, // Expensive
|
|
173
|
+
reasoning: "Complex reasoning - using gpt-5 (requires confirmation)",
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
// Rule 5: Moderate complexity → Balanced
|
|
178
|
+
if (context.complexity === "moderate") {
|
|
179
|
+
return {
|
|
180
|
+
primary: ModelTier.EFFICIENT,
|
|
181
|
+
fallback: ModelTier.ULTRA_EFFICIENT,
|
|
182
|
+
estimatedCost: 0.0003,
|
|
183
|
+
estimatedLatency: 1000,
|
|
184
|
+
requiresConfirmation: false,
|
|
185
|
+
reasoning: "Moderate complexity - using GPT-5-mini",
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
// Default: Cost-efficient
|
|
189
|
+
return {
|
|
190
|
+
primary: ModelTier.EFFICIENT,
|
|
191
|
+
fallback: ModelTier.ULTRA_EFFICIENT,
|
|
192
|
+
estimatedCost: 0.00015,
|
|
193
|
+
estimatedLatency: 1000,
|
|
194
|
+
requiresConfirmation: false,
|
|
195
|
+
reasoning: "Default selection - using cost-efficient model",
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Estimate cost for a request
|
|
200
|
+
*/
|
|
201
|
+
estimateCost(model, estimatedTokens) {
|
|
202
|
+
const costs = MODEL_COSTS[model];
|
|
203
|
+
if (!costs)
|
|
204
|
+
return 0.001; // Default estimate
|
|
205
|
+
// Assume 40% input, 60% output for typical usage
|
|
206
|
+
const inputTokens = estimatedTokens * 0.4;
|
|
207
|
+
const outputTokens = estimatedTokens * 0.6;
|
|
208
|
+
return (inputTokens * costs.input + outputTokens * costs.output) / 1000;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Get model recommendations for a query
|
|
212
|
+
*/
|
|
213
|
+
getRecommendations(query) {
|
|
214
|
+
const context = this.buildContext(query);
|
|
215
|
+
const optimal = this.selectModel(context);
|
|
216
|
+
// Generate alternatives
|
|
217
|
+
const alternatives = [];
|
|
218
|
+
// Cheaper alternative
|
|
219
|
+
const cheaperContext = {
|
|
220
|
+
...context,
|
|
221
|
+
costSensitive: true,
|
|
222
|
+
complexity: "simple",
|
|
223
|
+
};
|
|
224
|
+
const cheaper = this.selectModel(cheaperContext);
|
|
225
|
+
if (cheaper.primary !== optimal.primary) {
|
|
226
|
+
alternatives.push({ ...cheaper, reasoning: "Cheaper alternative" });
|
|
227
|
+
}
|
|
228
|
+
// Faster alternative
|
|
229
|
+
const fasterContext = { ...context, urgency: "fast" };
|
|
230
|
+
const faster = this.selectModel(fasterContext);
|
|
231
|
+
if (faster.primary !== optimal.primary &&
|
|
232
|
+
faster.primary !== cheaper.primary) {
|
|
233
|
+
alternatives.push({ ...faster, reasoning: "Faster alternative" });
|
|
234
|
+
}
|
|
235
|
+
// Higher quality alternative
|
|
236
|
+
const qualityContext = {
|
|
237
|
+
...context,
|
|
238
|
+
costSensitive: false,
|
|
239
|
+
complexity: "complex",
|
|
240
|
+
};
|
|
241
|
+
const quality = this.selectModel(qualityContext);
|
|
242
|
+
if (quality.primary !== optimal.primary) {
|
|
243
|
+
alternatives.push({
|
|
244
|
+
...quality,
|
|
245
|
+
reasoning: "Higher quality alternative",
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
return {
|
|
249
|
+
optimal,
|
|
250
|
+
alternatives: alternatives.slice(0, 3), // Max 3 alternatives
|
|
251
|
+
context,
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* Clear complexity cache (call periodically to prevent memory growth)
|
|
256
|
+
*/
|
|
257
|
+
clearCache() {
|
|
258
|
+
if (this.complexityCache.size > 1000) {
|
|
259
|
+
this.complexityCache.clear();
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
// Export singleton instance
|
|
264
|
+
export const modelRouter = new SmartModelRouter();
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import * as crypto from 'crypto';
|
|
2
|
+
export class ResultCache {
|
|
3
|
+
constructor(maxSize, ttl) {
|
|
4
|
+
this.cache = new Map();
|
|
5
|
+
this.maxSize = 1000;
|
|
6
|
+
this.ttl = 3600000; // 1 hour default
|
|
7
|
+
this.stats = {
|
|
8
|
+
hits: 0,
|
|
9
|
+
misses: 0,
|
|
10
|
+
evictions: 0
|
|
11
|
+
};
|
|
12
|
+
if (maxSize)
|
|
13
|
+
this.maxSize = maxSize;
|
|
14
|
+
if (ttl)
|
|
15
|
+
this.ttl = ttl;
|
|
16
|
+
// Periodic cleanup
|
|
17
|
+
setInterval(() => this.cleanup(), 60000); // Every minute
|
|
18
|
+
}
|
|
19
|
+
async get(key) {
|
|
20
|
+
const hash = this.hashKey(key);
|
|
21
|
+
const entry = this.cache.get(hash);
|
|
22
|
+
if (!entry) {
|
|
23
|
+
this.stats.misses++;
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
const age = Date.now() - entry.timestamp;
|
|
27
|
+
if (age > this.ttl) {
|
|
28
|
+
this.cache.delete(hash);
|
|
29
|
+
this.stats.misses++;
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
entry.hits++;
|
|
33
|
+
entry.lastAccessed = Date.now();
|
|
34
|
+
this.stats.hits++;
|
|
35
|
+
return entry.result;
|
|
36
|
+
}
|
|
37
|
+
async set(key, value) {
|
|
38
|
+
const hash = this.hashKey(key);
|
|
39
|
+
if (this.cache.size >= this.maxSize) {
|
|
40
|
+
this.evictLRU();
|
|
41
|
+
}
|
|
42
|
+
this.cache.set(hash, {
|
|
43
|
+
key: hash,
|
|
44
|
+
result: value.result,
|
|
45
|
+
tokens: value.tokens || 0,
|
|
46
|
+
timestamp: value.timestamp || Date.now(),
|
|
47
|
+
hits: 0,
|
|
48
|
+
lastAccessed: Date.now()
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
getStats() {
|
|
52
|
+
const totalHits = this.stats.hits;
|
|
53
|
+
const totalMisses = this.stats.misses;
|
|
54
|
+
const hitRate = totalHits / (totalHits + totalMisses) || 0;
|
|
55
|
+
let tokensSaved = 0;
|
|
56
|
+
let memorySaved = 0;
|
|
57
|
+
for (const entry of this.cache.values()) {
|
|
58
|
+
if (entry.hits > 0) {
|
|
59
|
+
tokensSaved += entry.tokens * entry.hits;
|
|
60
|
+
memorySaved += JSON.stringify(entry.result).length * entry.hits;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return {
|
|
64
|
+
totalEntries: this.cache.size,
|
|
65
|
+
totalHits,
|
|
66
|
+
totalMisses,
|
|
67
|
+
hitRate,
|
|
68
|
+
memorySaved,
|
|
69
|
+
tokensSaved
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
clear() {
|
|
73
|
+
this.cache.clear();
|
|
74
|
+
this.stats = { hits: 0, misses: 0, evictions: 0 };
|
|
75
|
+
}
|
|
76
|
+
hashKey(key) {
|
|
77
|
+
return crypto.createHash('sha256').update(key).digest('hex').substring(0, 16);
|
|
78
|
+
}
|
|
79
|
+
evictLRU() {
|
|
80
|
+
let oldest = null;
|
|
81
|
+
let oldestKey = null;
|
|
82
|
+
for (const [key, entry] of this.cache) {
|
|
83
|
+
if (!oldest || entry.lastAccessed < oldest.lastAccessed) {
|
|
84
|
+
oldest = entry;
|
|
85
|
+
oldestKey = key;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
if (oldestKey) {
|
|
89
|
+
this.cache.delete(oldestKey);
|
|
90
|
+
this.stats.evictions++;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
cleanup() {
|
|
94
|
+
const now = Date.now();
|
|
95
|
+
const keysToDelete = [];
|
|
96
|
+
for (const [key, entry] of this.cache) {
|
|
97
|
+
if (now - entry.timestamp > this.ttl) {
|
|
98
|
+
keysToDelete.push(key);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
for (const key of keysToDelete) {
|
|
102
|
+
this.cache.delete(key);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
setTTL(ttl) {
|
|
106
|
+
this.ttl = ttl;
|
|
107
|
+
}
|
|
108
|
+
setMaxSize(size) {
|
|
109
|
+
this.maxSize = size;
|
|
110
|
+
while (this.cache.size > this.maxSize) {
|
|
111
|
+
this.evictLRU();
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Token Optimizer - Caching, compression, and batching for token efficiency
|
|
3
|
+
* Part of Phase 1B implementation for 70% cost reduction
|
|
4
|
+
*/
|
|
5
|
+
import { LRUCache } from 'lru-cache';
|
|
6
|
+
import crypto from 'crypto';
|
|
7
|
+
export class TokenOptimizer {
|
|
8
|
+
constructor() {
|
|
9
|
+
// Batch queue for request batching
|
|
10
|
+
this.batchQueue = [];
|
|
11
|
+
this.batchTimer = null;
|
|
12
|
+
// Configuration
|
|
13
|
+
this.config = {
|
|
14
|
+
cacheMaxSize: 100 * 1024 * 1024, // 100MB
|
|
15
|
+
cacheTTL: 60 * 60 * 1000, // 1 hour
|
|
16
|
+
batchWindow: 100, // ms to wait for batch
|
|
17
|
+
maxBatchSize: 10, // max requests per batch
|
|
18
|
+
compressionThreshold: 2000, // characters
|
|
19
|
+
compressionModel: 'gemini-2.5-flash', // Ultra cheap for compression
|
|
20
|
+
};
|
|
21
|
+
// Metrics
|
|
22
|
+
this.metrics = {
|
|
23
|
+
cacheHits: 0,
|
|
24
|
+
cacheMisses: 0,
|
|
25
|
+
tokensCompressed: 0,
|
|
26
|
+
tokensSaved: 0,
|
|
27
|
+
batchesProcessed: 0,
|
|
28
|
+
};
|
|
29
|
+
this.responseCache = new LRUCache({
|
|
30
|
+
max: 1000, // Max 1000 entries
|
|
31
|
+
ttl: this.config.cacheTTL,
|
|
32
|
+
updateAgeOnGet: true,
|
|
33
|
+
updateAgeOnHas: true,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Generate cache key for a request
|
|
38
|
+
*/
|
|
39
|
+
generateCacheKey(request) {
|
|
40
|
+
if (request.cacheKey)
|
|
41
|
+
return request.cacheKey;
|
|
42
|
+
const keyData = {
|
|
43
|
+
prompt: request.prompt,
|
|
44
|
+
model: request.model,
|
|
45
|
+
temperature: request.temperature || 0.7,
|
|
46
|
+
};
|
|
47
|
+
return crypto
|
|
48
|
+
.createHash('sha256')
|
|
49
|
+
.update(JSON.stringify(keyData))
|
|
50
|
+
.digest('hex');
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Check cache for existing response
|
|
54
|
+
*/
|
|
55
|
+
checkCache(request) {
|
|
56
|
+
const key = this.generateCacheKey(request);
|
|
57
|
+
const cached = this.responseCache.get(key);
|
|
58
|
+
if (cached) {
|
|
59
|
+
this.metrics.cacheHits++;
|
|
60
|
+
cached.hits++;
|
|
61
|
+
return cached;
|
|
62
|
+
}
|
|
63
|
+
this.metrics.cacheMisses++;
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Compress prompt using cheap model (simulated for now)
|
|
68
|
+
*/
|
|
69
|
+
async compressPrompt(prompt) {
|
|
70
|
+
// In production, this would call the actual compression model
|
|
71
|
+
// For now, we'll simulate compression by removing redundancy
|
|
72
|
+
if (prompt.length < this.config.compressionThreshold) {
|
|
73
|
+
return prompt;
|
|
74
|
+
}
|
|
75
|
+
// Simulated compression techniques:
|
|
76
|
+
// 1. Remove excessive whitespace
|
|
77
|
+
let compressed = prompt.replace(/\s+/g, ' ').trim();
|
|
78
|
+
// 2. Remove duplicate sentences
|
|
79
|
+
const sentences = compressed.split(/[.!?]+/);
|
|
80
|
+
const uniqueSentences = [...new Set(sentences)];
|
|
81
|
+
compressed = uniqueSentences.join('. ');
|
|
82
|
+
// 3. Remove filler words (carefully)
|
|
83
|
+
const fillerWords = /\b(very|really|actually|basically|literally|just)\b/gi;
|
|
84
|
+
compressed = compressed.replace(fillerWords, '');
|
|
85
|
+
// 4. Compress common patterns
|
|
86
|
+
compressed = compressed
|
|
87
|
+
.replace(/in order to/gi, 'to')
|
|
88
|
+
.replace(/as well as/gi, 'and')
|
|
89
|
+
.replace(/at this point in time/gi, 'now')
|
|
90
|
+
.replace(/due to the fact that/gi, 'because');
|
|
91
|
+
// Track compression
|
|
92
|
+
this.metrics.tokensCompressed += prompt.length;
|
|
93
|
+
this.metrics.tokensSaved += prompt.length - compressed.length;
|
|
94
|
+
return compressed;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Optimize a single request
|
|
98
|
+
*/
|
|
99
|
+
async optimizeSingle(request) {
|
|
100
|
+
// ⚠️ CACHE DISABLED - Caching removed for simplicity at MCP scale
|
|
101
|
+
// Cache was causing bugs (storing inputs instead of outputs) and maintenance overhead
|
|
102
|
+
// not justified for 10-50 runs/day. See: ultrathinking analysis 2025-10-18
|
|
103
|
+
// // 1. Check cache first (DISABLED)
|
|
104
|
+
// const cached = this.checkCache(request);
|
|
105
|
+
// if (cached) {
|
|
106
|
+
// return {
|
|
107
|
+
// ...request,
|
|
108
|
+
// prompt: cached.response,
|
|
109
|
+
// optimized: true,
|
|
110
|
+
// fromCache: true,
|
|
111
|
+
// };
|
|
112
|
+
// }
|
|
113
|
+
// 2. Compress if needed
|
|
114
|
+
const originalLength = request.prompt.length;
|
|
115
|
+
let optimizedPrompt = request.prompt;
|
|
116
|
+
if (originalLength > this.config.compressionThreshold) {
|
|
117
|
+
optimizedPrompt = await this.compressPrompt(request.prompt);
|
|
118
|
+
}
|
|
119
|
+
return {
|
|
120
|
+
...request,
|
|
121
|
+
prompt: optimizedPrompt,
|
|
122
|
+
optimized: true,
|
|
123
|
+
compressed: optimizedPrompt !== request.prompt,
|
|
124
|
+
originalLength,
|
|
125
|
+
compressedLength: optimizedPrompt.length,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Add request to batch queue
|
|
130
|
+
*/
|
|
131
|
+
async addToBatch(request) {
|
|
132
|
+
return new Promise((resolve, reject) => {
|
|
133
|
+
this.batchQueue.push({ request, resolve, reject });
|
|
134
|
+
// Process immediately if batch is full
|
|
135
|
+
if (this.batchQueue.length >= this.config.maxBatchSize) {
|
|
136
|
+
this.processBatch();
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
// Otherwise wait for batch window
|
|
140
|
+
if (!this.batchTimer) {
|
|
141
|
+
this.batchTimer = setTimeout(() => {
|
|
142
|
+
this.processBatch();
|
|
143
|
+
}, this.config.batchWindow);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Process batched requests
|
|
150
|
+
*/
|
|
151
|
+
async processBatch() {
|
|
152
|
+
if (this.batchQueue.length === 0)
|
|
153
|
+
return;
|
|
154
|
+
// Clear timer
|
|
155
|
+
if (this.batchTimer) {
|
|
156
|
+
clearTimeout(this.batchTimer);
|
|
157
|
+
this.batchTimer = null;
|
|
158
|
+
}
|
|
159
|
+
// Extract batch
|
|
160
|
+
const batch = this.batchQueue.splice(0, this.config.maxBatchSize);
|
|
161
|
+
const batchId = crypto.randomBytes(8).toString('hex');
|
|
162
|
+
this.metrics.batchesProcessed++;
|
|
163
|
+
// Process each request in batch
|
|
164
|
+
for (const item of batch) {
|
|
165
|
+
try {
|
|
166
|
+
const optimized = await this.optimizeSingle(item.request);
|
|
167
|
+
item.resolve({
|
|
168
|
+
...optimized,
|
|
169
|
+
batchId,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
catch (error) {
|
|
173
|
+
item.reject(error);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
/**
|
|
178
|
+
* Main optimization entry point
|
|
179
|
+
*/
|
|
180
|
+
async optimize(request) {
|
|
181
|
+
// Check if request can be batched
|
|
182
|
+
if (request.canBatch && !this.checkCache(request)) {
|
|
183
|
+
return this.addToBatch(request);
|
|
184
|
+
}
|
|
185
|
+
// Otherwise optimize individually
|
|
186
|
+
return this.optimizeSingle(request);
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Get optimization metrics
|
|
190
|
+
*/
|
|
191
|
+
getMetrics() {
|
|
192
|
+
const cacheHitRate = this.metrics.cacheHits / (this.metrics.cacheHits + this.metrics.cacheMisses) || 0;
|
|
193
|
+
const compressionRatio = this.metrics.tokensSaved / this.metrics.tokensCompressed || 0;
|
|
194
|
+
const recommendations = [];
|
|
195
|
+
if (cacheHitRate < 0.3) {
|
|
196
|
+
recommendations.push(`⚠️ Low cache hit rate (${(cacheHitRate * 100).toFixed(1)}%). Consider caching more aggressively.`);
|
|
197
|
+
}
|
|
198
|
+
if (compressionRatio < 0.2) {
|
|
199
|
+
recommendations.push('💡 Compression ratio is low. Consider more aggressive compression.');
|
|
200
|
+
}
|
|
201
|
+
if (this.metrics.batchesProcessed < 10) {
|
|
202
|
+
recommendations.push('📊 Low batch usage. Enable batching for parallel requests.');
|
|
203
|
+
}
|
|
204
|
+
if (cacheHitRate > 0.7) {
|
|
205
|
+
recommendations.push('✅ Excellent cache performance!');
|
|
206
|
+
}
|
|
207
|
+
return {
|
|
208
|
+
cacheHitRate,
|
|
209
|
+
compressionRatio,
|
|
210
|
+
totalSaved: this.metrics.tokensSaved,
|
|
211
|
+
batchesProcessed: this.metrics.batchesProcessed,
|
|
212
|
+
recommendations,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Clear cache if it gets too large
|
|
217
|
+
*/
|
|
218
|
+
maintainCache() {
|
|
219
|
+
if (this.responseCache.size > 900) {
|
|
220
|
+
// Keep 80% of most recently used
|
|
221
|
+
const toKeep = Math.floor(this.responseCache.size * 0.8);
|
|
222
|
+
while (this.responseCache.size > toKeep) {
|
|
223
|
+
const oldestKey = this.responseCache.keys().next().value;
|
|
224
|
+
if (oldestKey) {
|
|
225
|
+
this.responseCache.delete(oldestKey);
|
|
226
|
+
}
|
|
227
|
+
else {
|
|
228
|
+
break;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Reset all metrics
|
|
235
|
+
*/
|
|
236
|
+
resetMetrics() {
|
|
237
|
+
this.metrics = {
|
|
238
|
+
cacheHits: 0,
|
|
239
|
+
cacheMisses: 0,
|
|
240
|
+
tokensCompressed: 0,
|
|
241
|
+
tokensSaved: 0,
|
|
242
|
+
batchesProcessed: 0,
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Shutdown optimizer (cleanup)
|
|
247
|
+
*/
|
|
248
|
+
shutdown() {
|
|
249
|
+
if (this.batchTimer) {
|
|
250
|
+
clearTimeout(this.batchTimer);
|
|
251
|
+
this.batchTimer = null;
|
|
252
|
+
}
|
|
253
|
+
this.processBatch(); // Process any remaining batched requests
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
// Export singleton instance
|
|
257
|
+
export const tokenOptimizer = new TokenOptimizer();
|