tachibot-mcp 2.19.2 → 2.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/dist/src/collaborative-orchestrator.js +4 -4
- package/dist/src/config/model-constants.js +20 -10
- package/dist/src/config/model-defaults.js +1 -1
- package/dist/src/config/timeout-config.js +4 -4
- package/dist/src/config.js +1 -1
- package/dist/src/modes/architect.js +4 -4
- package/dist/src/modes/scout.js +2 -1
- package/dist/src/optimization/cost-monitor.js +1 -1
- package/dist/src/orchestrators/collaborative/registries/ModelProviderRegistry.js +1 -1
- package/dist/src/server.js +4 -0
- package/dist/src/tools/grok-enhanced.js +15 -29
- package/dist/src/tools/grok-tools.js +46 -31
- package/dist/src/tools/openai-tools.js +16 -3
- package/dist/src/tools/planner-tools.js +7 -1
- package/dist/src/tools/tachi-tool.js +3 -3
- package/dist/src/utils/ansi-renderer.js +2 -2
- package/dist/src/utils/ansi-styles.js +12 -0
- package/dist/src/utils/openrouter-gateway.js +3 -0
- package/dist/src/utils/param-aliases.js +68 -0
- package/dist/src/workflows/model-router.js +29 -2
- package/dist/src/workflows/tool-mapper.js +3 -3
- package/docs/API_KEYS.md +9 -8
- package/docs/TOOL_PARAMETERS.md +1 -1
- package/docs/superpowers/plans/2026-04-10-grok-420-upgrade.md +498 -0
- package/package.json +1 -1
- package/dist/scripts/demo-all-components.js +0 -340
- package/dist/scripts/test-usage-stats.js +0 -6
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,13 @@ All notable changes to TachiBot MCP will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [2.19.3] - 2026-03-21
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- **Section header regex** — now matches mixed case + optional dashes (works across all providers, not just Gemini)
|
|
12
|
+
- **Planner 5/1 bug** — `parsePlanSteps` now matches `### Task [T-ID]:` format (was only matching `### Step N:`)
|
|
13
|
+
- **Planner mismatch warning** — surfaces parse failures instead of masking with `Math.max`
|
|
14
|
+
|
|
8
15
|
## [2.19.2] - 2026-03-21
|
|
9
16
|
|
|
10
17
|
### Added
|
|
@@ -4,6 +4,7 @@ import { sessionLogger } from "./session/session-logger.js";
|
|
|
4
4
|
import { sessionManager } from "./session/session-manager.js";
|
|
5
5
|
import { ToolRouter } from "./tools/tool-router.js";
|
|
6
6
|
import { getMemoryManager } from "./memory/index.js";
|
|
7
|
+
import { GROK_MODELS } from "./config/model-constants.js";
|
|
7
8
|
import { VisualizationService } from "./orchestrators/collaborative/services/visualization/VisualizationService.js";
|
|
8
9
|
import { ToolExecutionService } from "./orchestrators/collaborative/services/tool-execution/ToolExecutionService.js";
|
|
9
10
|
export class CollaborativeOrchestrator {
|
|
@@ -229,12 +230,11 @@ export class CollaborativeOrchestrator {
|
|
|
229
230
|
this.modelPreferences = { ...this.modelPreferences, ...preferences };
|
|
230
231
|
}
|
|
231
232
|
/**
|
|
232
|
-
* Use Grok 4
|
|
233
|
+
* Use Grok 4.20 Multi-Agent for all Grok operations
|
|
233
234
|
*/
|
|
234
235
|
useGrok4Heavy() {
|
|
235
|
-
|
|
236
|
-
this.modelPreferences['grok'] =
|
|
237
|
-
this.modelPreferences['grok-4'] = 'grok-4-0709';
|
|
236
|
+
this.modelPreferences['grok'] = GROK_MODELS._4_20_MULTI_AGENT;
|
|
237
|
+
this.modelPreferences['grok-4'] = GROK_MODELS._4_20_MULTI_AGENT;
|
|
238
238
|
}
|
|
239
239
|
/**
|
|
240
240
|
* Generate session ID
|
|
@@ -59,10 +59,14 @@ export const PERPLEXITY_MODELS = {
|
|
|
59
59
|
SONAR_PRO: "sonar-pro", // Advanced search
|
|
60
60
|
SONAR_REASONING: "sonar-reasoning-pro", // Reasoning model (expensive - avoid)
|
|
61
61
|
};
|
|
62
|
-
// Grok Models (xAI) - Updated
|
|
62
|
+
// Grok Models (xAI) - Updated 2026-04-10 with Grok 4.20 (Mar 2026)
|
|
63
63
|
export const GROK_MODELS = {
|
|
64
|
-
// Grok 4.
|
|
65
|
-
|
|
64
|
+
// Grok 4.20 models (Mar 10, 2026) - FLAGSHIP
|
|
65
|
+
_4_20_REASONING: "grok-4.20-0309-reasoning", // Flagship: 2M context, $2/$6, low hallucination
|
|
66
|
+
_4_20_NON_REASONING: "grok-4.20-0309-non-reasoning", // Standard: 2M context, $2/$6
|
|
67
|
+
_4_20_MULTI_AGENT: "grok-4.20-multi-agent-0309", // Multi-agent: 4-16 agents via reasoning.effort, $2/$6
|
|
68
|
+
// Grok 4.1 fast models (Nov 2025) - BEST VALUE (10x cheaper)
|
|
69
|
+
_4_1_FAST_REASONING: "grok-4-1-fast-reasoning", // Fast reasoning: 2M context, $0.20/$0.50
|
|
66
70
|
_4_1_FAST_NON_REASONING: "grok-4-1-fast-non-reasoning", // Tool-calling optimized: 2M context, $0.20/$0.50
|
|
67
71
|
// Grok 4 fast models (2025) - Still good
|
|
68
72
|
CODE_FAST: "grok-code-fast-1", // Coding specialist: 256K→2M, $0.20/$1.50, 92 tok/sec
|
|
@@ -153,12 +157,12 @@ export const CURRENT_MODELS = {
|
|
|
153
157
|
premium: OPENAI_MODELS.PRO, // Expert mode (gpt-5.4-pro - higher compute)
|
|
154
158
|
},
|
|
155
159
|
grok: {
|
|
156
|
-
reason: GROK_MODELS.
|
|
157
|
-
code: GROK_MODELS.
|
|
158
|
-
debug: GROK_MODELS.
|
|
159
|
-
brainstorm: GROK_MODELS.
|
|
160
|
-
search: GROK_MODELS.
|
|
161
|
-
architect: GROK_MODELS.
|
|
160
|
+
reason: GROK_MODELS._4_20_REASONING, // grok-4.20-0309-reasoning (flagship, low hallucination)
|
|
161
|
+
code: GROK_MODELS._4_20_NON_REASONING, // grok-4.20 non-reasoning (flagship quality, tool-calling)
|
|
162
|
+
debug: GROK_MODELS._4_20_NON_REASONING, // grok-4.20 non-reasoning (low hallucination for debugging)
|
|
163
|
+
brainstorm: GROK_MODELS._4_20_NON_REASONING, // grok-4.20-0309-non-reasoning (2M context)
|
|
164
|
+
search: GROK_MODELS._4_20_REASONING, // grok-4.20 LOW HALLUCINATION - critical for search
|
|
165
|
+
architect: GROK_MODELS._4_20_MULTI_AGENT, // grok-4.20-multi-agent-0309 (4-16 agent swarm)
|
|
162
166
|
},
|
|
163
167
|
gemini: {
|
|
164
168
|
default: GEMINI_MODELS.GEMINI_3_PRO,
|
|
@@ -331,6 +335,9 @@ export const MODEL_DISPLAY_NAMES = {
|
|
|
331
335
|
"gemini-3-flash-preview": "gemini-3-flash",
|
|
332
336
|
"gemini-3.1-flash-lite": "gemini-3.1-flash-lite",
|
|
333
337
|
// Grok (xAI)
|
|
338
|
+
"grok-4.20-0309-reasoning": "grok-4.20",
|
|
339
|
+
"grok-4.20-0309-non-reasoning": "grok-4.20-fast",
|
|
340
|
+
"grok-4.20-multi-agent-0309": "grok-4.20-multi",
|
|
334
341
|
"grok-4-1-fast-reasoning": "grok-4.1",
|
|
335
342
|
"grok-4-1-fast-non-reasoning": "grok-4.1-fast",
|
|
336
343
|
"grok-4-fast-reasoning": "grok-4",
|
|
@@ -370,7 +377,10 @@ export const MODEL_PRICING = {
|
|
|
370
377
|
"gemini-3.1-pro-preview": 0.007, // ($2 + $12) / 2 / 1000
|
|
371
378
|
"gemini-3-flash-preview": 0.00175, // ($0.50 + $3) / 2 / 1000
|
|
372
379
|
"gemini-3.1-flash-lite": 0.001, // Cheapest/fastest in 3.1 series (Mar 2026)
|
|
373
|
-
// Grok
|
|
380
|
+
// Grok
|
|
381
|
+
"grok-4.20-0309-reasoning": 0.004, // ($2 + $6) / 2 / 1000
|
|
382
|
+
"grok-4.20-0309-non-reasoning": 0.004, // ($2 + $6) / 2 / 1000
|
|
383
|
+
"grok-4.20-multi-agent-0309": 0.004, // ($2 + $6) / 2 / 1000
|
|
374
384
|
"grok-4-1-fast-reasoning": 0.00035,
|
|
375
385
|
"grok-4-1-fast-non-reasoning": 0.00035,
|
|
376
386
|
"grok-4-fast-reasoning": 0.00035,
|
|
@@ -21,7 +21,7 @@ const MODELS = {
|
|
|
21
21
|
OPENAI: OPENAI_MODELS.THINKING, // gpt-5.4 (default - most capable)
|
|
22
22
|
OPENAI_REASON: OPENAI_MODELS.THINKING, // gpt-5.4 (deep reasoning)
|
|
23
23
|
// xAI Grok
|
|
24
|
-
GROK: GROK_MODELS.
|
|
24
|
+
GROK: GROK_MODELS._4_20_REASONING, // grok-4.20-0309-reasoning
|
|
25
25
|
// Perplexity
|
|
26
26
|
PERPLEXITY: PERPLEXITY_MODELS.SONAR, // sonar (cheapest)
|
|
27
27
|
PERPLEXITY_REASON: PERPLEXITY_MODELS.SONAR_REASONING, // sonar-reasoning-pro ($2/$8 per M)
|
|
@@ -112,12 +112,12 @@ export const SMART_TIMEOUT_DEFAULTS = {
|
|
|
112
112
|
max: 90000 // 90 seconds
|
|
113
113
|
},
|
|
114
114
|
grok: {
|
|
115
|
-
base: 30000, // 30 seconds
|
|
116
|
-
max:
|
|
115
|
+
base: 30000, // 30 seconds - 4.1 fast models are quick
|
|
116
|
+
max: 120000 // 2 minutes - 4.20 reasoning models need more
|
|
117
117
|
},
|
|
118
118
|
openai: {
|
|
119
|
-
base:
|
|
120
|
-
max:
|
|
119
|
+
base: 60000, // 60 seconds - GPT-5.4 reasoning needs more time
|
|
120
|
+
max: 180000 // 3 minutes - high/xhigh reasoning effort
|
|
121
121
|
},
|
|
122
122
|
anthropic: {
|
|
123
123
|
base: 20000, // 20 seconds
|
package/dist/src/config.js
CHANGED
|
@@ -128,7 +128,7 @@ export function getAvailableModels(config) {
|
|
|
128
128
|
models.push('sonar-pro', 'sonar-reasoning-pro', 'sonar-deep-research');
|
|
129
129
|
}
|
|
130
130
|
if (config.apiKeys.grok) {
|
|
131
|
-
models.push('grok-3', 'grok-
|
|
131
|
+
models.push('grok-3', 'grok-4.20-0309-reasoning', 'grok-4.20-multi-agent-0309');
|
|
132
132
|
}
|
|
133
133
|
if (config.apiKeys.openrouter) {
|
|
134
134
|
models.push('qwen3-coder', 'qwq-32b', 'qwen3-32b');
|
|
@@ -17,8 +17,8 @@ export class Architect {
|
|
|
17
17
|
},
|
|
18
18
|
specialized_verification: {
|
|
19
19
|
models: {
|
|
20
|
-
'syntax_error': 'gpt-4-mini',
|
|
21
|
-
'type_error': 'gpt-4-mini',
|
|
20
|
+
'syntax_error': 'gpt-5.4-mini',
|
|
21
|
+
'type_error': 'gpt-5.4-mini',
|
|
22
22
|
'algorithmic_complexity': 'qwq-32b',
|
|
23
23
|
'performance_issue': 'qwq-32b',
|
|
24
24
|
'architectural_smell': 'claude-opus-4.1',
|
|
@@ -28,7 +28,7 @@ export class Architect {
|
|
|
28
28
|
'design_pattern_violation': 'claude-opus-4.1',
|
|
29
29
|
'memory_leak': 'qwq-32b',
|
|
30
30
|
'race_condition': 'claude-opus-4.1',
|
|
31
|
-
'code_duplication': 'gpt-4-mini',
|
|
31
|
+
'code_duplication': 'gpt-5.4-mini',
|
|
32
32
|
'circular_dependency': 'claude-opus-4.1'
|
|
33
33
|
},
|
|
34
34
|
dynamicTokens: {
|
|
@@ -269,7 +269,7 @@ export class Architect {
|
|
|
269
269
|
'claude-opus-4.1': 10,
|
|
270
270
|
'qwq-32b': 8,
|
|
271
271
|
'perplexity-reasoning': 7,
|
|
272
|
-
'gpt-4-mini': 5
|
|
272
|
+
'gpt-5.4-mini': 5
|
|
273
273
|
};
|
|
274
274
|
return priorities[model] || 5;
|
|
275
275
|
}
|
package/dist/src/modes/scout.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { ModelRouter } from '../workflows/model-router.js';
|
|
2
2
|
import { getScoutModels, getDefaultModels } from '../config/model-defaults.js';
|
|
3
|
+
import { GROK_MODELS } from '../config/model-constants.js';
|
|
3
4
|
import { getGrokApiKey } from '../utils/api-keys.js';
|
|
4
5
|
import { createProgressStream } from '../utils/progress-stream.js';
|
|
5
6
|
import { providerRouter } from '../utils/provider-router.js';
|
|
@@ -530,7 +531,7 @@ export class Scout {
|
|
|
530
531
|
const { callGrokEnhanced } = await import('../tools/grok-enhanced.js');
|
|
531
532
|
const messages = [{ role: 'user', content: query }];
|
|
532
533
|
const result = await callGrokEnhanced(messages, {
|
|
533
|
-
model:
|
|
534
|
+
model: GROK_MODELS._4_20_REASONING,
|
|
534
535
|
maxTokens,
|
|
535
536
|
enableLiveSearch: options?.enableLiveSearch ?? true,
|
|
536
537
|
searchSources: options?.maxSources ?? 100,
|
|
@@ -56,7 +56,7 @@ export class CostMonitor extends EventEmitter {
|
|
|
56
56
|
"claude-3.5-sonnet",
|
|
57
57
|
{ model: "claude-3.5-sonnet", inputCost: 0.003, outputCost: 0.015 },
|
|
58
58
|
],
|
|
59
|
-
["grok-4", { model: "grok-4", inputCost: 0.
|
|
59
|
+
["grok-4.20-0309-reasoning", { model: "grok-4.20-0309-reasoning", inputCost: 0.002, outputCost: 0.006 }],
|
|
60
60
|
]);
|
|
61
61
|
// Clean up old records periodically
|
|
62
62
|
setInterval(() => this.cleanupOldRecords(), 60 * 60 * 1000); // Every hour
|
|
@@ -82,7 +82,7 @@ modelProviderRegistry.registerMany([
|
|
|
82
82
|
{ modelName: "qwq", toolName: "qwq_reason", provider: "openrouter" },
|
|
83
83
|
// Grok models
|
|
84
84
|
{ modelName: "grok", toolName: "grok_reason", provider: "x.ai" },
|
|
85
|
-
{ modelName: "grok-4", toolName: "grok_reason", provider: "x.ai", aliases: ["grok-4-
|
|
85
|
+
{ modelName: "grok-4.20", toolName: "grok_reason", provider: "x.ai", aliases: ["grok-4.20-0309-reasoning", "grok-4.20-multi-agent-0309"] },
|
|
86
86
|
// Claude models
|
|
87
87
|
{ modelName: "claude", toolName: "think", provider: "anthropic", aliases: ["claude-code", "reasoning", "analysis"] },
|
|
88
88
|
// Gemini models (all use gemini-3.1-pro-preview for RAW POWER)
|
package/dist/src/server.js
CHANGED
|
@@ -75,6 +75,7 @@ import { isGeminiAvailable, geminiBrainstormTool, geminiAnalyzeCodeTool } from "
|
|
|
75
75
|
import { isOpenRouterAvailable } from "./tools/openrouter-tools.js";
|
|
76
76
|
import { getTachiTools } from "./tools/tachi-tool.js";
|
|
77
77
|
import { getPromptTechniqueTools } from "./tools/prompt-technique-tools.js";
|
|
78
|
+
import { withParamAliases } from "./utils/param-aliases.js";
|
|
78
79
|
// import { registerGPT5Tools, isGPT5Available } from "./tools/openai-gpt5-fixed.js"; // DISABLED - using regular openai-tools.ts
|
|
79
80
|
import { initializeOptimizations } from "./optimization/index.js";
|
|
80
81
|
import { FocusModeRegistry } from "./application/services/focus/FocusModeRegistry.js";
|
|
@@ -117,6 +118,9 @@ function safeAddTool(tool) {
|
|
|
117
118
|
if (!isToolEnabled(tool.name)) {
|
|
118
119
|
return; // Skip disabled tools silently (logging handled by isToolEnabled)
|
|
119
120
|
}
|
|
121
|
+
// Auto-alias common param names (query/problem/prompt/question/topic)
|
|
122
|
+
// so LLMs can use any synonym and the tool still works
|
|
123
|
+
tool = withParamAliases(tool);
|
|
120
124
|
if (!registeredTools.has(tool.name)) {
|
|
121
125
|
// Wrap execute with usage tracking
|
|
122
126
|
const originalExecute = tool.execute;
|
|
@@ -19,23 +19,9 @@ config({ path: path.resolve(__dirname, '../../../.env') });
|
|
|
19
19
|
const GROK_API_KEY = getGrokApiKey();
|
|
20
20
|
const GROK_API_URL = "https://api.x.ai/v1/chat/completions";
|
|
21
21
|
const GROK_RESPONSES_URL = "https://api.x.ai/v1/responses"; // New Agent Tools API endpoint (Jan 2025)
|
|
22
|
-
//
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
// Grok 4.1 models (Nov 2025) - LATEST & BEST (verified working)
|
|
26
|
-
GrokModel["GROK_4_1_FAST_REASONING"] = "grok-4-1-fast-reasoning";
|
|
27
|
-
GrokModel["GROK_4_1_FAST"] = "grok-4-1-fast-non-reasoning";
|
|
28
|
-
// Grok 4 fast models (2025) - Still good
|
|
29
|
-
GrokModel["CODE_FAST"] = "grok-code-fast-1";
|
|
30
|
-
GrokModel["GROK_4_FAST_REASONING"] = "grok-4-fast-reasoning";
|
|
31
|
-
GrokModel["GROK_4_FAST"] = "grok-4-fast-non-reasoning";
|
|
32
|
-
// Expensive/specialized (use sparingly)
|
|
33
|
-
GrokModel["GROK_4_HEAVY"] = "grok-4-0709";
|
|
34
|
-
GrokModel["GROK_3"] = "grok-3";
|
|
35
|
-
// Beta/experimental (deprecated)
|
|
36
|
-
GrokModel["GROK_BETA"] = "grok-beta";
|
|
37
|
-
GrokModel["GROK_VISION_BETA"] = "grok-vision-beta";
|
|
38
|
-
})(GrokModel || (GrokModel = {}));
|
|
22
|
+
// Unified GrokModel enum - single source of truth in grok-tools.ts
|
|
23
|
+
import { GrokModel } from './grok-tools.js';
|
|
24
|
+
export { GrokModel };
|
|
39
25
|
/**
|
|
40
26
|
* Enhanced Grok API call with live search support
|
|
41
27
|
*/
|
|
@@ -45,7 +31,7 @@ export async function callGrokEnhanced(messages, options = {}) {
|
|
|
45
31
|
content: `[Grok API key not configured. Add GROK_API_KEY or XAI_API_KEY to .env file]`
|
|
46
32
|
};
|
|
47
33
|
}
|
|
48
|
-
const { model = GrokModel.
|
|
34
|
+
const { model = GrokModel.GROK_4_20_REASONING, // Updated: Use Grok 4.20 by default
|
|
49
35
|
temperature = 0.7, maxTokens = options.useHeavy ? 100000 : 4000, enableLiveSearch = false, searchSources = 100, // Default to 100 sources for cost control
|
|
50
36
|
searchDomains = [], structuredOutput = false } = options;
|
|
51
37
|
try {
|
|
@@ -54,7 +40,7 @@ export async function callGrokEnhanced(messages, options = {}) {
|
|
|
54
40
|
// NEW Agent Tools API (Jan 2025) - uses /v1/responses endpoint
|
|
55
41
|
// with 'input' instead of 'messages' and tools array
|
|
56
42
|
const searchRequestBody = {
|
|
57
|
-
model: GrokModel.
|
|
43
|
+
model: GrokModel.GROK_4_20_NON_REASONING, // 4.20 standard is better for tool-calling search
|
|
58
44
|
input: messages.map(m => ({ role: m.role, content: m.content })),
|
|
59
45
|
tools: [
|
|
60
46
|
{ type: "web_search" },
|
|
@@ -187,9 +173,9 @@ ${FORMAT_INSTRUCTION}`
|
|
|
187
173
|
content: query
|
|
188
174
|
}
|
|
189
175
|
];
|
|
190
|
-
log?.info(`Grok Scout: ${variant} research with ${enableLiveSearch ? 'live search' : 'knowledge base'} (using grok-4
|
|
176
|
+
log?.info(`Grok Scout: ${variant} research with ${enableLiveSearch ? 'live search' : 'knowledge base'} (using grok-4.20 reasoning)`);
|
|
191
177
|
const result = await callGrokEnhanced(messages, {
|
|
192
|
-
model: GrokModel.
|
|
178
|
+
model: GrokModel.GROK_4_20_REASONING, // 4.20 for low hallucination research
|
|
193
179
|
enableLiveSearch,
|
|
194
180
|
searchSources,
|
|
195
181
|
searchDomains,
|
|
@@ -255,7 +241,7 @@ ${FORMAT_INSTRUCTION}`
|
|
|
255
241
|
const costInfo = useHeavy ? '$3/$15 (expensive!)' : '$0.20/$0.50 (latest!)';
|
|
256
242
|
log?.info(`Using ${modelName} (${approach}) with ${enableLiveSearch ? 'live search' : 'knowledge base'} - Cost: ${costInfo}`);
|
|
257
243
|
const result = await callGrokEnhanced(messages, {
|
|
258
|
-
model: useHeavy ? GrokModel.
|
|
244
|
+
model: useHeavy ? GrokModel.GROK_4_20_MULTI_AGENT : GrokModel.GROK_4_20_REASONING,
|
|
259
245
|
useHeavy,
|
|
260
246
|
enableLiveSearch,
|
|
261
247
|
searchSources: 50,
|
|
@@ -307,7 +293,7 @@ export const grokFunctionTool = {
|
|
|
307
293
|
];
|
|
308
294
|
// Make request with tools
|
|
309
295
|
const requestBody = {
|
|
310
|
-
model: args.useHeavy ? GrokModel.
|
|
296
|
+
model: args.useHeavy ? GrokModel.GROK_4_20_MULTI_AGENT : GrokModel.GROK_4_20_NON_REASONING,
|
|
311
297
|
messages,
|
|
312
298
|
tools,
|
|
313
299
|
tool_choice: "auto", // Let Grok decide when to call functions
|
|
@@ -373,13 +359,13 @@ ${FORMAT_INSTRUCTION}`
|
|
|
373
359
|
content: `Search for: ${query}`
|
|
374
360
|
}
|
|
375
361
|
];
|
|
376
|
-
log?.info(`Grok Search: ${max_search_results} sources, recency: ${recency} (using grok-4
|
|
362
|
+
log?.info(`Grok Search: ${max_search_results} sources, recency: ${recency} (using grok-4.20 reasoning)`);
|
|
377
363
|
// Extract domains from sources if specified
|
|
378
364
|
const domains = sources
|
|
379
365
|
?.filter((s) => s.allowed_websites)
|
|
380
366
|
?.flatMap((s) => s.allowed_websites) || [];
|
|
381
367
|
const result = await callGrokEnhanced(messages, {
|
|
382
|
-
model: GrokModel.
|
|
368
|
+
model: GrokModel.GROK_4_20_REASONING, // Low hallucination is CRITICAL for search accuracy
|
|
383
369
|
enableLiveSearch: true,
|
|
384
370
|
searchSources: max_search_results,
|
|
385
371
|
searchDomains: domains,
|
|
@@ -417,11 +403,11 @@ export function isGrokAvailable() {
|
|
|
417
403
|
export function getGrokStatus() {
|
|
418
404
|
return {
|
|
419
405
|
available: isGrokAvailable(),
|
|
420
|
-
model:
|
|
406
|
+
model: "grok-4.20-0309-reasoning",
|
|
421
407
|
features: [
|
|
422
|
-
'Grok 4.
|
|
423
|
-
'Grok 4.
|
|
424
|
-
'
|
|
408
|
+
'Grok 4.20 Reasoning (grok-4.20-0309-reasoning): Flagship, low hallucination, 2M context ($2/$6)',
|
|
409
|
+
'Grok 4.20 Non-Reasoning (grok-4.20-0309-non-reasoning): Tool-calling optimized, agentic workflows ($2/$6)',
|
|
410
|
+
'Grok 4.20 Multi-Agent (grok-4.20-multi-agent-0309): 4-16 parallel agents ($2/$6)',
|
|
425
411
|
'Live web search with citations',
|
|
426
412
|
'Function calling',
|
|
427
413
|
'Structured outputs',
|
|
@@ -21,10 +21,14 @@ config({ path: path.resolve(__dirname, '../../../.env') });
|
|
|
21
21
|
// Grok API configuration
|
|
22
22
|
const GROK_API_KEY = getGrokApiKey();
|
|
23
23
|
const GROK_API_URL = "https://api.x.ai/v1/chat/completions";
|
|
24
|
-
// Available Grok models - Updated
|
|
24
|
+
// Available Grok models - Updated 2026-04-10 with Grok 4.20 (Mar 2026)
|
|
25
25
|
export var GrokModel;
|
|
26
26
|
(function (GrokModel) {
|
|
27
|
-
// Grok 4.
|
|
27
|
+
// Grok 4.20 models (Mar 10, 2026) - FLAGSHIP
|
|
28
|
+
GrokModel["GROK_4_20_REASONING"] = "grok-4.20-0309-reasoning";
|
|
29
|
+
GrokModel["GROK_4_20_NON_REASONING"] = "grok-4.20-0309-non-reasoning";
|
|
30
|
+
GrokModel["GROK_4_20_MULTI_AGENT"] = "grok-4.20-multi-agent-0309";
|
|
31
|
+
// Grok 4.1 fast models (Nov 2025) - BEST VALUE (10x cheaper)
|
|
28
32
|
GrokModel["GROK_4_1_FAST_REASONING"] = "grok-4-1-fast-reasoning";
|
|
29
33
|
GrokModel["GROK_4_1_FAST"] = "grok-4-1-fast-non-reasoning";
|
|
30
34
|
// Grok 4 fast models (2025) - Still good
|
|
@@ -42,9 +46,8 @@ export var GrokModel;
|
|
|
42
46
|
* - 'code-analysis': Relaxed for code analysis tools
|
|
43
47
|
* - 'llm-orchestration': Medium for LLM-to-LLM calls
|
|
44
48
|
*/
|
|
45
|
-
export async function callGrok(messages, model = GrokModel.
|
|
46
|
-
|
|
47
|
-
forceVisibleOutput = true, validationContext = 'llm-orchestration') {
|
|
49
|
+
export async function callGrok(messages, model = GrokModel.GROK_4_20_REASONING, temperature = 0.7, maxTokens = 16384, // Increased default for comprehensive responses
|
|
50
|
+
forceVisibleOutput = true, validationContext = 'llm-orchestration', reasoningEffort) {
|
|
48
51
|
// Try OpenRouter gateway first if enabled
|
|
49
52
|
if (isGatewayEnabled()) {
|
|
50
53
|
const gatewayResult = await tryOpenRouterGateway(model, messages, {
|
|
@@ -68,34 +71,43 @@ forceVisibleOutput = true, validationContext = 'llm-orchestration') {
|
|
|
68
71
|
}
|
|
69
72
|
return { ...msg, content: validation.sanitized };
|
|
70
73
|
});
|
|
74
|
+
// Grok 4.x reasoning can take 60-90s; 4.20 and multi-agent can take longer
|
|
75
|
+
const isReasoning = model.includes('reasoning') || model.includes('multi-agent');
|
|
76
|
+
const is420 = model.includes('4.20');
|
|
77
|
+
const timeoutMs = is420 ? 180000 : (isReasoning ? 120000 : 60000);
|
|
71
78
|
try {
|
|
72
|
-
// For Grok 4 models, we need to handle reasoning tokens specially
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
model === GrokModel.GROK_4_FAST ||
|
|
77
|
-
model === GrokModel.GROK_4_HEAVY;
|
|
78
|
-
// Adjust prompt for Grok 4 to ensure visible output
|
|
79
|
-
if (isGrok4 && forceVisibleOutput) {
|
|
79
|
+
// For Grok 4+ models, we need to handle reasoning tokens specially
|
|
80
|
+
const isGrok4Plus = model.includes('grok-4');
|
|
81
|
+
// Adjust prompt for Grok 4+ to ensure visible output
|
|
82
|
+
if (isGrok4Plus && forceVisibleOutput) {
|
|
80
83
|
const lastMessage = validatedMessages[validatedMessages.length - 1];
|
|
81
84
|
if (lastMessage.role === 'user') {
|
|
82
85
|
lastMessage.content += '\n\nProvide a detailed response with your reasoning and conclusion.';
|
|
83
86
|
}
|
|
84
87
|
}
|
|
88
|
+
const controller = new AbortController();
|
|
89
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
90
|
+
const isMultiAgent = model.includes('multi-agent');
|
|
91
|
+
const requestBody = {
|
|
92
|
+
model,
|
|
93
|
+
messages: validatedMessages,
|
|
94
|
+
temperature,
|
|
95
|
+
max_tokens: maxTokens,
|
|
96
|
+
stream: false
|
|
97
|
+
};
|
|
98
|
+
if (isMultiAgent && reasoningEffort) {
|
|
99
|
+
requestBody.reasoning = { effort: reasoningEffort };
|
|
100
|
+
}
|
|
85
101
|
const response = await fetch(GROK_API_URL, {
|
|
86
102
|
method: "POST",
|
|
87
103
|
headers: {
|
|
88
104
|
"Authorization": `Bearer ${GROK_API_KEY}`,
|
|
89
105
|
"Content-Type": "application/json"
|
|
90
106
|
},
|
|
91
|
-
body: JSON.stringify(
|
|
92
|
-
|
|
93
|
-
messages: validatedMessages,
|
|
94
|
-
temperature,
|
|
95
|
-
max_tokens: maxTokens,
|
|
96
|
-
stream: false
|
|
97
|
-
})
|
|
107
|
+
body: JSON.stringify(requestBody),
|
|
108
|
+
signal: controller.signal
|
|
98
109
|
});
|
|
110
|
+
clearTimeout(timeoutId);
|
|
99
111
|
if (!response.ok) {
|
|
100
112
|
const error = await response.text();
|
|
101
113
|
throw new Error(`Grok API error: ${response.statusText} - ${error}`);
|
|
@@ -105,7 +117,7 @@ forceVisibleOutput = true, validationContext = 'llm-orchestration') {
|
|
|
105
117
|
// Handle Grok 4's reasoning tokens
|
|
106
118
|
if (!content && data.usage?.completion_tokens_details?.reasoning_tokens > 0) {
|
|
107
119
|
// If Grok 4 returns no visible content, retry with Grok 3 for visible output
|
|
108
|
-
if (
|
|
120
|
+
if (isGrok4Plus && forceVisibleOutput) {
|
|
109
121
|
console.error(`Grok 4 used ${data.usage.completion_tokens_details.reasoning_tokens} reasoning tokens with no output. Retrying with Grok 3...`);
|
|
110
122
|
// Messages already validated - use same context for retry
|
|
111
123
|
return callGrok(validatedMessages, GrokModel.GROK_3, temperature, maxTokens, false, validationContext);
|
|
@@ -115,6 +127,9 @@ forceVisibleOutput = true, validationContext = 'llm-orchestration') {
|
|
|
115
127
|
return content || "No response from Grok";
|
|
116
128
|
}
|
|
117
129
|
catch (error) {
|
|
130
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
131
|
+
return `[Grok timeout: ${model} exceeded ${isReasoning ? '120' : '60'}s limit]`;
|
|
132
|
+
}
|
|
118
133
|
return `[Grok error: ${error instanceof Error ? error.message : String(error)}]`;
|
|
119
134
|
}
|
|
120
135
|
}
|
|
@@ -158,10 +173,10 @@ ${FORMAT_INSTRUCTION}`
|
|
|
158
173
|
content: problem + fileContext
|
|
159
174
|
}
|
|
160
175
|
];
|
|
161
|
-
// Use
|
|
162
|
-
const model = useHeavy ? GrokModel.
|
|
163
|
-
const maxTokens = useHeavy ? 100000 : 16384;
|
|
164
|
-
log?.info(`Using Grok model: ${model} for deep reasoning (max tokens: ${maxTokens}
|
|
176
|
+
// Use 4.20 flagship by default, multi-agent for heavy tasks
|
|
177
|
+
const model = useHeavy ? GrokModel.GROK_4_20_MULTI_AGENT : GrokModel.GROK_4_20_REASONING;
|
|
178
|
+
const maxTokens = useHeavy ? 100000 : 16384;
|
|
179
|
+
log?.info(`Using Grok model: ${model} for deep reasoning (max tokens: ${maxTokens})`);
|
|
165
180
|
// Use heartbeat to prevent MCP timeout during long reasoning operations
|
|
166
181
|
const reportFn = reportProgress ?? (async () => { });
|
|
167
182
|
const result = await withHeartbeat(() => callGrok(messages, model, 0.7, maxTokens, true, 'llm-orchestration'), reportFn);
|
|
@@ -212,7 +227,7 @@ ${FORMAT_INSTRUCTION}`
|
|
|
212
227
|
log?.info(`Using Grok 4.1 Fast Non-Reasoning (2M context, tool-calling optimized, $0.20/$0.50)`);
|
|
213
228
|
// Use heartbeat to prevent MCP timeout
|
|
214
229
|
const reportFn = reportProgress ?? (async () => { });
|
|
215
|
-
const result = await withHeartbeat(() => callGrok(messages, GrokModel.
|
|
230
|
+
const result = await withHeartbeat(() => callGrok(messages, GrokModel.GROK_4_20_NON_REASONING, 0.2, 4000, true, 'code-analysis'), reportFn);
|
|
216
231
|
return stripFormatting(result);
|
|
217
232
|
}
|
|
218
233
|
};
|
|
@@ -264,7 +279,7 @@ ${FORMAT_INSTRUCTION}`
|
|
|
264
279
|
log?.info(`Using Grok 4.1 Fast Non-Reasoning for debugging (tool-calling optimized, $0.20/$0.50)`);
|
|
265
280
|
// Use heartbeat to prevent MCP timeout
|
|
266
281
|
const reportFn = reportProgress ?? (async () => { });
|
|
267
|
-
const result = await withHeartbeat(() => callGrok(messages, GrokModel.
|
|
282
|
+
const result = await withHeartbeat(() => callGrok(messages, GrokModel.GROK_4_20_NON_REASONING, 0.3, 3000, true, 'code-analysis'), reportFn);
|
|
268
283
|
return stripFormatting(result);
|
|
269
284
|
}
|
|
270
285
|
};
|
|
@@ -302,10 +317,10 @@ ${FORMAT_INSTRUCTION}`
|
|
|
302
317
|
content: requirements + fileContext
|
|
303
318
|
}
|
|
304
319
|
];
|
|
305
|
-
log?.info(`Using Grok 4.
|
|
320
|
+
log?.info(`Using Grok 4.20 multi-agent for architecture (16-agent swarm)`);
|
|
306
321
|
// Use heartbeat to prevent MCP timeout
|
|
307
322
|
const reportFn = reportProgress ?? (async () => { });
|
|
308
|
-
const result = await withHeartbeat(() => callGrok(messages, GrokModel.
|
|
323
|
+
const result = await withHeartbeat(() => callGrok(messages, GrokModel.GROK_4_20_MULTI_AGENT, 0.6, 4000, true, 'llm-orchestration', 'high'), reportFn);
|
|
309
324
|
return stripFormatting(result);
|
|
310
325
|
}
|
|
311
326
|
};
|
|
@@ -356,8 +371,8 @@ ${FORMAT_INSTRUCTION}`
|
|
|
356
371
|
content: topic + fileContext
|
|
357
372
|
}
|
|
358
373
|
];
|
|
359
|
-
const model = forceHeavy ? GrokModel.
|
|
360
|
-
log?.info(`Brainstorming with Grok model: ${model} (Heavy: ${forceHeavy}
|
|
374
|
+
const model = forceHeavy ? GrokModel.GROK_4_20_MULTI_AGENT : GrokModel.GROK_4_20_NON_REASONING;
|
|
375
|
+
log?.info(`Brainstorming with Grok model: ${model} (Heavy: ${forceHeavy})`);
|
|
361
376
|
const reportFn = reportProgress ?? (async () => { });
|
|
362
377
|
const result = await withHeartbeat(() => callGrok(messages, model, 0.95, 4000, true, 'llm-orchestration'), reportFn);
|
|
363
378
|
return stripFormatting(result);
|
|
@@ -184,14 +184,20 @@ reasoningEffort = "low", requireConfirmation = false, skipValidation = false) {
|
|
|
184
184
|
};
|
|
185
185
|
}
|
|
186
186
|
console.error(`🔍 TRACE: Using ${isGPT5 ? '/v1/responses' : '/v1/chat/completions'} endpoint for ${currentModel}`);
|
|
187
|
+
// GPT-5.4 with high reasoning effort can take 2+ minutes
|
|
188
|
+
const timeoutMs = (reasoningEffort === 'high' || reasoningEffort === 'xhigh') ? 180000 : 90000;
|
|
189
|
+
const controller = new AbortController();
|
|
190
|
+
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
|
187
191
|
const response = await fetch(endpoint, {
|
|
188
192
|
method: "POST",
|
|
189
193
|
headers: {
|
|
190
194
|
"Authorization": `Bearer ${OPENAI_API_KEY}`,
|
|
191
195
|
"Content-Type": "application/json"
|
|
192
196
|
},
|
|
193
|
-
body: JSON.stringify(requestBody)
|
|
197
|
+
body: JSON.stringify(requestBody),
|
|
198
|
+
signal: controller.signal
|
|
194
199
|
});
|
|
200
|
+
clearTimeout(timeoutId);
|
|
195
201
|
if (!response.ok) {
|
|
196
202
|
const error = await response.text();
|
|
197
203
|
lastError = `${currentModel}: ${response.statusText} - ${error}`;
|
|
@@ -254,8 +260,15 @@ reasoningEffort = "low", requireConfirmation = false, skipValidation = false) {
|
|
|
254
260
|
return stripFormatting(result);
|
|
255
261
|
}
|
|
256
262
|
catch (error) {
|
|
257
|
-
|
|
258
|
-
|
|
263
|
+
// Handle abort/timeout specifically
|
|
264
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
265
|
+
lastError = `${currentModel}: Timeout (reasoning_effort=${reasoningEffort})`;
|
|
266
|
+
console.error(`🔍 TRACE: ${currentModel} TIMEOUT - reasoning_effort=${reasoningEffort}`);
|
|
267
|
+
}
|
|
268
|
+
else {
|
|
269
|
+
lastError = `${currentModel}: ${error instanceof Error ? error.message : String(error)}`;
|
|
270
|
+
console.error(`🔍 TRACE: ${currentModel} EXCEPTION - ${lastError}`);
|
|
271
|
+
}
|
|
259
272
|
continue; // Try next model
|
|
260
273
|
}
|
|
261
274
|
}
|
|
@@ -1136,11 +1136,13 @@ function generateProgressBar(current, total) {
|
|
|
1136
1136
|
*/
|
|
1137
1137
|
function parsePlanSteps(plan) {
|
|
1138
1138
|
const steps = [];
|
|
1139
|
-
// Try
|
|
1139
|
+
// Try structured step/task headers (### Step 1:, ### Task T1:, 1., Step 1:)
|
|
1140
1140
|
const stepPatterns = [
|
|
1141
1141
|
/###\s*Step\s*\d+[:\s]+([^\n]+)([\s\S]*?)(?=###\s*Step|\n##[^#]|$)/gi,
|
|
1142
|
+
/###\s*Task\s*[^\n:]+:\s*([^\n]+)([\s\S]*?)(?=###\s*Task|\n##[^#]|$)/gi,
|
|
1142
1143
|
/^\s*(\d+)\.\s*([^\n]+)([\s\S]*?)(?=^\s*\d+\.|$)/gm,
|
|
1143
1144
|
/^Step\s*\d+[:\s]+([^\n]+)([\s\S]*?)(?=^Step\s*\d+|$)/gim,
|
|
1145
|
+
/^Task\s*[^\n:]+:\s*([^\n]+)([\s\S]*?)(?=^Task\s*\S+|$)/gim,
|
|
1144
1146
|
];
|
|
1145
1147
|
for (const pattern of stepPatterns) {
|
|
1146
1148
|
const matches = [...plan.matchAll(pattern)];
|
|
@@ -1253,6 +1255,10 @@ Evidence params (unblind the checkpoints):
|
|
|
1253
1255
|
// Parse plan into steps
|
|
1254
1256
|
const steps = parsePlanSteps(plan);
|
|
1255
1257
|
const totalSteps = steps.length;
|
|
1258
|
+
if (completed.length > totalSteps) {
|
|
1259
|
+
lines.push(`⚠️ Plan parse mismatch: ${completed.length} steps completed but only ${totalSteps} parsed. Plan format may have degraded.`);
|
|
1260
|
+
lines.push("");
|
|
1261
|
+
}
|
|
1256
1262
|
if (mode === "start") {
|
|
1257
1263
|
// ═══════════════════════════════════════════════════════════════
|
|
1258
1264
|
// START: Show parsed plan and devlog hint
|
|
@@ -131,7 +131,7 @@ Focus on recent, accurate information. Provide sources.${FORMAT_INSTRUCTION}`
|
|
|
131
131
|
{ role: "user", content: query }
|
|
132
132
|
];
|
|
133
133
|
const result = await callGrokEnhanced(messages, {
|
|
134
|
-
model: GrokModel.
|
|
134
|
+
model: GrokModel.GROK_4_20_REASONING,
|
|
135
135
|
enableLiveSearch: true,
|
|
136
136
|
searchSources: 20,
|
|
137
137
|
temperature: 0.3,
|
|
@@ -176,7 +176,7 @@ async function solveHandler(query) {
|
|
|
176
176
|
{ role: "system", content: `Search for solutions to this coding problem. Find relevant Stack Overflow, docs, or GitHub issues.${FORMAT_INSTRUCTION}` },
|
|
177
177
|
{ role: "user", content: query }
|
|
178
178
|
], {
|
|
179
|
-
model: GrokModel.
|
|
179
|
+
model: GrokModel.GROK_4_20_REASONING,
|
|
180
180
|
enableLiveSearch: true,
|
|
181
181
|
searchSources: 10,
|
|
182
182
|
temperature: 0.3,
|
|
@@ -269,7 +269,7 @@ async function architectHandler(query) {
|
|
|
269
269
|
{ role: "system", content: `Search for architecture patterns, best practices, and real-world examples for this design decision.${FORMAT_INSTRUCTION}` },
|
|
270
270
|
{ role: "user", content: query }
|
|
271
271
|
], {
|
|
272
|
-
model: GrokModel.
|
|
272
|
+
model: GrokModel.GROK_4_20_REASONING,
|
|
273
273
|
enableLiveSearch: true,
|
|
274
274
|
searchSources: 15,
|
|
275
275
|
temperature: 0.3,
|
|
@@ -452,8 +452,8 @@ export function stripMarkdown(md, options) {
|
|
|
452
452
|
text = text
|
|
453
453
|
// Markdown headers — strip # prefix (or bold if boldHeaders)
|
|
454
454
|
.replace(/^#{1,6}\s+(.+)$/gm, boldHeaders ? '\x1b[1m$1\x1b[0m' : '$1')
|
|
455
|
-
// Emoji section headers — e.g. "🧠 TYPE SAFETY ───" → rotating pastel bg
|
|
456
|
-
.replace(/^(.{1,2})\s+([A-Z][
|
|
455
|
+
// Emoji section headers — e.g. "🧠 TYPE SAFETY ───" or "🧠 Key Activities" → rotating pastel bg
|
|
456
|
+
.replace(/^(.{1,2})\s+([A-Z][\w\s&,()/-]{2,50}?)\s*─*$/gm, (_match, emoji, header) => {
|
|
457
457
|
if (!boldHeaders)
|
|
458
458
|
return `${emoji} ${header}`;
|
|
459
459
|
const pastels = [146, 182, 152, 187, 116, 180]; // lavender, mauve, powder blue, sand, mint, peach
|